diff options
Diffstat (limited to 'arch/arm64/include/asm')
63 files changed, 1201 insertions, 943 deletions
diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h index 6647ae4f0231..880b9054d75c 100644 --- a/arch/arm64/include/asm/arch_gicv3.h +++ b/arch/arm64/include/asm/arch_gicv3.h @@ -153,7 +153,7 @@ static inline bool gic_prio_masking_enabled(void) static inline void gic_pmr_mask_irqs(void) { - BUILD_BUG_ON(GICD_INT_DEF_PRI < (GIC_PRIO_IRQOFF | + BUILD_BUG_ON(GICD_INT_DEF_PRI < (__GIC_PRIO_IRQOFF | GIC_PRIO_PSR_I_SET)); BUILD_BUG_ON(GICD_INT_DEF_PRI >= GIC_PRIO_IRQON); /* @@ -162,6 +162,12 @@ static inline void gic_pmr_mask_irqs(void) * are applied to IRQ priorities */ BUILD_BUG_ON((0x80 | (GICD_INT_DEF_PRI >> 1)) >= GIC_PRIO_IRQON); + /* + * Same situation as above, but now we make sure that we can mask + * regular interrupts. + */ + BUILD_BUG_ON((0x80 | (GICD_INT_DEF_PRI >> 1)) < (__GIC_PRIO_IRQOFF_NS | + GIC_PRIO_PSR_I_SET)); gic_write_pmr(GIC_PRIO_IRQOFF); } diff --git a/arch/arm64/include/asm/archrandom.h b/arch/arm64/include/asm/archrandom.h index 44209f6146aa..ffb1a40d5475 100644 --- a/arch/arm64/include/asm/archrandom.h +++ b/arch/arm64/include/asm/archrandom.h @@ -79,10 +79,5 @@ arch_get_random_seed_long_early(unsigned long *v) } #define arch_get_random_seed_long_early arch_get_random_seed_long_early -#else - -static inline bool __arm64_rndr(unsigned long *v) { return false; } -static inline bool __init __early_cpu_has_rndr(void) { return false; } - #endif /* CONFIG_ARCH_RANDOM */ #endif /* _ASM_ARCHRANDOM_H */ diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 54d181177656..ddbe6bf00e33 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -219,6 +219,23 @@ lr .req x30 // link register .endm /* + * @dst: destination register + */ +#if defined(__KVM_NVHE_HYPERVISOR__) || defined(__KVM_VHE_HYPERVISOR__) + .macro this_cpu_offset, dst + mrs \dst, tpidr_el2 + .endm +#else + .macro this_cpu_offset, dst +alternative_if_not ARM64_HAS_VIRT_HOST_EXTN + mrs \dst, tpidr_el1 +alternative_else + mrs \dst, tpidr_el2 +alternative_endif + .endm +#endif + + /* * @dst: Result of per_cpu(sym, smp_processor_id()) (can be SP) * @sym: The name of the per-cpu variable * @tmp: scratch register @@ -226,11 +243,7 @@ lr .req x30 // link register .macro adr_this_cpu, dst, sym, tmp adrp \tmp, \sym add \dst, \tmp, #:lo12:\sym -alternative_if_not ARM64_HAS_VIRT_HOST_EXTN - mrs \tmp, tpidr_el1 -alternative_else - mrs \tmp, tpidr_el2 -alternative_endif + this_cpu_offset \tmp add \dst, \dst, \tmp .endm @@ -241,11 +254,7 @@ alternative_endif */ .macro ldr_this_cpu dst, sym, tmp adr_l \dst, \sym -alternative_if_not ARM64_HAS_VIRT_HOST_EXTN - mrs \tmp, tpidr_el1 -alternative_else - mrs \tmp, tpidr_el2 -alternative_endif + this_cpu_offset \tmp ldr \dst, [\dst, \tmp] .endm diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index fb4c27506ef4..c3009b0e5239 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -45,6 +45,7 @@ #define rmb() dsb(ld) #define wmb() dsb(st) +#define dma_mb() dmb(osh) #define dma_rmb() dmb(oshld) #define dma_wmb() dmb(oshst) diff --git a/arch/arm64/include/asm/boot.h b/arch/arm64/include/asm/boot.h index c7f67da13cd9..3e7943fd17a4 100644 --- a/arch/arm64/include/asm/boot.h +++ b/arch/arm64/include/asm/boot.h @@ -13,8 +13,7 @@ #define MAX_FDT_SIZE SZ_2M /* - * arm64 requires the kernel image to placed - * TEXT_OFFSET bytes beyond a 2 MB aligned base + * arm64 requires the kernel image to placed at a 2 MB aligned base address */ #define MIN_KIMG_ALIGN SZ_2M diff --git a/arch/arm64/include/asm/brk-imm.h b/arch/arm64/include/asm/brk-imm.h index e3d47b52161d..ec7720dbe2c8 100644 --- a/arch/arm64/include/asm/brk-imm.h +++ b/arch/arm64/include/asm/brk-imm.h @@ -10,6 +10,7 @@ * #imm16 values used for BRK instruction generation * 0x004: for installing kprobes * 0x005: for installing uprobes + * 0x006: for kprobe software single-step * Allowed values for kgdb are 0x400 - 0x7ff * 0x100: for triggering a fault on purpose (reserved) * 0x400: for dynamic BRK instruction @@ -19,6 +20,7 @@ */ #define KPROBES_BRK_IMM 0x004 #define UPROBES_BRK_IMM 0x005 +#define KPROBES_BRK_SS_IMM 0x006 #define FAULT_BRK_IMM 0x100 #define KGDB_DYN_DBG_BRK_IMM 0x400 #define KGDB_COMPILED_DBG_BRK_IMM 0x401 diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h index a4d1b5f771f6..63d43b5f82f6 100644 --- a/arch/arm64/include/asm/cache.h +++ b/arch/arm64/include/asm/cache.h @@ -24,6 +24,7 @@ #define CTR_L1IP(ctr) (((ctr) >> CTR_L1IP_SHIFT) & CTR_L1IP_MASK) #define ICACHE_POLICY_VPIPT 0 +#define ICACHE_POLICY_RESERVED 1 #define ICACHE_POLICY_VIPT 2 #define ICACHE_POLICY_PIPT 3 @@ -79,7 +80,7 @@ static inline u32 cache_type_cwg(void) return (read_cpuid_cachetype() >> CTR_CWG_SHIFT) & CTR_CWG_MASK; } -#define __read_mostly __section(.data..read_mostly) +#define __read_mostly __section(".data..read_mostly") static inline int cache_line_size_of_cpu(void) { diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h index 935d2aa231bf..23a9fb73c04f 100644 --- a/arch/arm64/include/asm/compat.h +++ b/arch/arm64/include/asm/compat.h @@ -35,8 +35,6 @@ typedef s32 compat_nlink_t; typedef u16 compat_ipc_pid_t; typedef u32 compat_caddr_t; typedef __kernel_fsid_t compat_fsid_t; -typedef s64 compat_s64; -typedef u64 compat_u64; struct compat_stat { #ifdef __AARCH64EB__ diff --git a/arch/arm64/include/asm/cpu_ops.h b/arch/arm64/include/asm/cpu_ops.h index d28e8f37d3b4..e95c4df83911 100644 --- a/arch/arm64/include/asm/cpu_ops.h +++ b/arch/arm64/include/asm/cpu_ops.h @@ -21,7 +21,7 @@ * mechanism for doing so, tests whether it is possible to boot * the given CPU. * @cpu_boot: Boots a cpu into the kernel. - * @cpu_postboot: Optionally, perform any post-boot cleanup or necesary + * @cpu_postboot: Optionally, perform any post-boot cleanup or necessary * synchronisation. Called from the cpu being booted. * @cpu_can_disable: Determines whether a CPU can be disabled based on * mechanism-specific information. diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index 07b643a70710..e7d98997c09c 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -31,13 +31,13 @@ #define ARM64_HAS_DCPOP 21 #define ARM64_SVE 22 #define ARM64_UNMAP_KERNEL_AT_EL0 23 -#define ARM64_HARDEN_BRANCH_PREDICTOR 24 +#define ARM64_SPECTRE_V2 24 #define ARM64_HAS_RAS_EXTN 25 #define ARM64_WORKAROUND_843419 26 #define ARM64_HAS_CACHE_IDC 27 #define ARM64_HAS_CACHE_DIC 28 #define ARM64_HW_DBM 29 -#define ARM64_SSBD 30 +#define ARM64_SPECTRE_V4 30 #define ARM64_MISMATCHED_CACHE_TYPE 31 #define ARM64_HAS_STAGE2_FWB 32 #define ARM64_HAS_CRC32 33 @@ -64,7 +64,9 @@ #define ARM64_BTI 54 #define ARM64_HAS_ARMv8_4_TTL 55 #define ARM64_HAS_TLB_RANGE 56 +#define ARM64_MTE 57 +#define ARM64_WORKAROUND_1508412 58 -#define ARM64_NCAPS 57 +#define ARM64_NCAPS 59 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 89b4f0142c28..da250e4741bd 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -268,6 +268,8 @@ extern struct arm64_ftr_reg arm64_ftr_reg_ctrel0; /* * CPU feature detected at boot time based on feature of one or more CPUs. * All possible conflicts for a late CPU are ignored. + * NOTE: this means that a late CPU with the feature will *not* cause the + * capability to be advertised by cpus_have_*cap()! */ #define ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE \ (ARM64_CPUCAP_SCOPE_LOCAL_CPU | \ @@ -358,7 +360,7 @@ static inline int cpucap_default_scope(const struct arm64_cpu_capabilities *cap) } /* - * Generic helper for handling capabilties with multiple (match,enable) pairs + * Generic helper for handling capabilities with multiple (match,enable) pairs * of call backs, sharing the same capability bit. * Iterate over each entry to see if at least one matches. */ @@ -375,6 +377,23 @@ cpucap_multi_entry_cap_matches(const struct arm64_cpu_capabilities *entry, return false; } +static __always_inline bool is_vhe_hyp_code(void) +{ + /* Only defined for code run in VHE hyp context */ + return __is_defined(__KVM_VHE_HYPERVISOR__); +} + +static __always_inline bool is_nvhe_hyp_code(void) +{ + /* Only defined for code run in NVHE hyp context */ + return __is_defined(__KVM_NVHE_HYPERVISOR__); +} + +static __always_inline bool is_hyp_code(void) +{ + return is_vhe_hyp_code() || is_nvhe_hyp_code(); +} + extern DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS); extern struct static_key_false cpu_hwcap_keys[ARM64_NCAPS]; extern struct static_key_false arm64_const_caps_ready; @@ -428,35 +447,40 @@ static __always_inline bool __cpus_have_const_cap(int num) } /* - * Test for a capability, possibly with a runtime check. + * Test for a capability without a runtime check. * - * Before capabilities are finalized, this behaves as cpus_have_cap(). + * Before capabilities are finalized, this will BUG(). * After capabilities are finalized, this is patched to avoid a runtime check. * * @num must be a compile-time constant. */ -static __always_inline bool cpus_have_const_cap(int num) +static __always_inline bool cpus_have_final_cap(int num) { if (system_capabilities_finalized()) return __cpus_have_const_cap(num); else - return cpus_have_cap(num); + BUG(); } /* - * Test for a capability without a runtime check. + * Test for a capability, possibly with a runtime check for non-hyp code. * - * Before capabilities are finalized, this will BUG(). + * For hyp code, this behaves the same as cpus_have_final_cap(). + * + * For non-hyp code: + * Before capabilities are finalized, this behaves as cpus_have_cap(). * After capabilities are finalized, this is patched to avoid a runtime check. * * @num must be a compile-time constant. */ -static __always_inline bool cpus_have_final_cap(int num) +static __always_inline bool cpus_have_const_cap(int num) { - if (system_capabilities_finalized()) + if (is_hyp_code()) + return cpus_have_final_cap(num); + else if (system_capabilities_finalized()) return __cpus_have_const_cap(num); else - BUG(); + return cpus_have_cap(num); } static inline void cpus_set_cap(unsigned int num) @@ -681,6 +705,12 @@ static __always_inline bool system_uses_irq_prio_masking(void) cpus_have_const_cap(ARM64_HAS_IRQ_PRIO_MASKING); } +static inline bool system_supports_mte(void) +{ + return IS_ENABLED(CONFIG_ARM64_MTE) && + cpus_have_const_cap(ARM64_MTE); +} + static inline bool system_has_prio_mask_debugging(void) { return IS_ENABLED(CONFIG_ARM64_DEBUG_PRIORITY_MASKING) && @@ -698,30 +728,6 @@ static inline bool system_supports_tlb_range(void) cpus_have_const_cap(ARM64_HAS_TLB_RANGE); } -#define ARM64_BP_HARDEN_UNKNOWN -1 -#define ARM64_BP_HARDEN_WA_NEEDED 0 -#define ARM64_BP_HARDEN_NOT_REQUIRED 1 - -int get_spectre_v2_workaround_state(void); - -#define ARM64_SSBD_UNKNOWN -1 -#define ARM64_SSBD_FORCE_DISABLE 0 -#define ARM64_SSBD_KERNEL 1 -#define ARM64_SSBD_FORCE_ENABLE 2 -#define ARM64_SSBD_MITIGATED 3 - -static inline int arm64_get_ssbd_state(void) -{ -#ifdef CONFIG_ARM64_SSBD - extern int ssbd_state; - return ssbd_state; -#else - return ARM64_SSBD_UNKNOWN; -#endif -} - -void arm64_set_ssbd_mitigation(bool state); - extern int do_emulate_mrs(struct pt_regs *regs, u32 sys_reg, u32 rt); static inline u32 id_aa64mmfr0_parange_to_phys_shift(int parange) diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 7219cddeba66..ef5b040dee44 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -71,6 +71,7 @@ #define ARM_CPU_PART_CORTEX_A55 0xD05 #define ARM_CPU_PART_CORTEX_A76 0xD0B #define ARM_CPU_PART_NEOVERSE_N1 0xD0C +#define ARM_CPU_PART_CORTEX_A77 0xD0D #define APM_CPU_PART_POTENZA 0x000 @@ -85,6 +86,8 @@ #define QCOM_CPU_PART_FALKOR_V1 0x800 #define QCOM_CPU_PART_FALKOR 0xC00 #define QCOM_CPU_PART_KRYO 0x200 +#define QCOM_CPU_PART_KRYO_2XX_GOLD 0x800 +#define QCOM_CPU_PART_KRYO_2XX_SILVER 0x801 #define QCOM_CPU_PART_KRYO_3XX_SILVER 0x803 #define QCOM_CPU_PART_KRYO_4XX_GOLD 0x804 #define QCOM_CPU_PART_KRYO_4XX_SILVER 0x805 @@ -105,6 +108,7 @@ #define MIDR_CORTEX_A55 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A55) #define MIDR_CORTEX_A76 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A76) #define MIDR_NEOVERSE_N1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N1) +#define MIDR_CORTEX_A77 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A77) #define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX) #define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX) @@ -114,6 +118,8 @@ #define MIDR_QCOM_FALKOR_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR_V1) #define MIDR_QCOM_FALKOR MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR) #define MIDR_QCOM_KRYO MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO) +#define MIDR_QCOM_KRYO_2XX_GOLD MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_2XX_GOLD) +#define MIDR_QCOM_KRYO_2XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_2XX_SILVER) #define MIDR_QCOM_KRYO_3XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_3XX_SILVER) #define MIDR_QCOM_KRYO_4XX_GOLD MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_4XX_GOLD) #define MIDR_QCOM_KRYO_4XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_4XX_SILVER) diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h index 0b298f48f5bf..657c921fd784 100644 --- a/arch/arm64/include/asm/debug-monitors.h +++ b/arch/arm64/include/asm/debug-monitors.h @@ -53,6 +53,7 @@ /* kprobes BRK opcodes with ESR encoding */ #define BRK64_OPCODE_KPROBES (AARCH64_BREAK_MON | (KPROBES_BRK_IMM << 5)) +#define BRK64_OPCODE_KPROBES_SS (AARCH64_BREAK_MON | (KPROBES_BRK_SS_IMM << 5)) /* uprobes BRK opcodes with ESR encoding */ #define BRK64_OPCODE_UPROBES (AARCH64_BREAK_MON | (UPROBES_BRK_IMM << 5)) diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index d4ab3f73e7a3..973b14415271 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -65,7 +65,7 @@ efi_status_t __efi_rt_asm_wrapper(void *, const char *, ...); (SEGMENT_ALIGN > THREAD_ALIGN ? SEGMENT_ALIGN : THREAD_ALIGN) /* on arm64, the FDT may be located anywhere in system RAM */ -static inline unsigned long efi_get_max_fdt_addr(unsigned long dram_base) +static inline unsigned long efi_get_max_fdt_addr(unsigned long image_addr) { return ULONG_MAX; } @@ -80,8 +80,7 @@ static inline unsigned long efi_get_max_fdt_addr(unsigned long dram_base) * apply to other bootloaders, and are required for some kernel * configurations. */ -static inline unsigned long efi_get_max_initrd_addr(unsigned long dram_base, - unsigned long image_addr) +static inline unsigned long efi_get_max_initrd_addr(unsigned long image_addr) { return (image_addr & ~(SZ_1G - 1UL)) + (1UL << (VA_BITS_MIN - 1)); } diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index 035003acfa87..22c81f1edda2 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -35,7 +35,9 @@ #define ESR_ELx_EC_SYS64 (0x18) #define ESR_ELx_EC_SVE (0x19) #define ESR_ELx_EC_ERET (0x1a) /* EL2 only */ -/* Unallocated EC: 0x1b - 0x1E */ +/* Unallocated EC: 0x1B */ +#define ESR_ELx_EC_FPAC (0x1C) /* EL1 and above */ +/* Unallocated EC: 0x1D - 0x1E */ #define ESR_ELx_EC_IMP_DEF (0x1f) /* EL3 only */ #define ESR_ELx_EC_IABT_LOW (0x20) #define ESR_ELx_EC_IABT_CUR (0x21) diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h index 7577a754d443..99b9383cd036 100644 --- a/arch/arm64/include/asm/exception.h +++ b/arch/arm64/include/asm/exception.h @@ -47,4 +47,5 @@ void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr); void do_cp15instr(unsigned int esr, struct pt_regs *regs); void do_el0_svc(struct pt_regs *regs); void do_el0_svc_compat(struct pt_regs *regs); +void do_ptrauth_fault(struct pt_regs *regs, unsigned int esr); #endif /* __ASM_EXCEPTION_H */ diff --git a/arch/arm64/include/asm/extable.h b/arch/arm64/include/asm/extable.h index 840a35ed92ec..b15eb4a3e6b2 100644 --- a/arch/arm64/include/asm/extable.h +++ b/arch/arm64/include/asm/extable.h @@ -22,6 +22,15 @@ struct exception_table_entry #define ARCH_HAS_RELATIVE_EXTABLE +static inline bool in_bpf_jit(struct pt_regs *regs) +{ + if (!IS_ENABLED(CONFIG_BPF_JIT)) + return false; + + return regs->pc >= BPF_JIT_REGION_START && + regs->pc < BPF_JIT_REGION_END; +} + #ifdef CONFIG_BPF_JIT int arm64_bpf_fixup_exception(const struct exception_table_entry *ex, struct pt_regs *regs); diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index 59f10dd13f12..bec5f14b622a 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -69,6 +69,9 @@ static inline void *sve_pffr(struct thread_struct *thread) extern void sve_save_state(void *state, u32 *pfpsr); extern void sve_load_state(void const *state, u32 const *pfpsr, unsigned long vq_minus_1); +extern void sve_flush_live(void); +extern void sve_load_from_fpsimd_state(struct user_fpsimd_state const *state, + unsigned long vq_minus_1); extern unsigned int sve_get_vl(void); struct arm64_cpu_capabilities; diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h index 636e9d9c7929..af43367534c7 100644 --- a/arch/arm64/include/asm/fpsimdmacros.h +++ b/arch/arm64/include/asm/fpsimdmacros.h @@ -164,25 +164,59 @@ | ((\np) << 5) .endm +/* PFALSE P\np.B */ +.macro _sve_pfalse np + _sve_check_preg \np + .inst 0x2518e400 \ + | (\np) +.endm + .macro __for from:req, to:req .if (\from) == (\to) - _for__body \from + _for__body %\from .else - __for \from, (\from) + ((\to) - (\from)) / 2 - __for (\from) + ((\to) - (\from)) / 2 + 1, \to + __for %\from, %((\from) + ((\to) - (\from)) / 2) + __for %((\from) + ((\to) - (\from)) / 2 + 1), %\to .endif .endm .macro _for var:req, from:req, to:req, insn:vararg .macro _for__body \var:req + .noaltmacro \insn + .altmacro .endm + .altmacro __for \from, \to + .noaltmacro .purgem _for__body .endm +/* Update ZCR_EL1.LEN with the new VQ */ +.macro sve_load_vq xvqminus1, xtmp, xtmp2 + mrs_s \xtmp, SYS_ZCR_EL1 + bic \xtmp2, \xtmp, ZCR_ELx_LEN_MASK + orr \xtmp2, \xtmp2, \xvqminus1 + cmp \xtmp2, \xtmp + b.eq 921f + msr_s SYS_ZCR_EL1, \xtmp2 //self-synchronising +921: +.endm + +/* Preserve the first 128-bits of Znz and zero the rest. */ +.macro _sve_flush_z nz + _sve_check_zreg \nz + mov v\nz\().16b, v\nz\().16b +.endm + +.macro sve_flush + _for n, 0, 31, _sve_flush_z \n + _for n, 0, 15, _sve_pfalse \n + _sve_wrffr 0 +.endm + .macro sve_save nxbase, xpfpsr, nxtmp _for n, 0, 31, _sve_str_v \n, \nxbase, \n - 34 _for n, 0, 15, _sve_str_p \n, \nxbase, \n - 16 @@ -197,13 +231,7 @@ .endm .macro sve_load nxbase, xpfpsr, xvqminus1, nxtmp, xtmp2 - mrs_s x\nxtmp, SYS_ZCR_EL1 - bic \xtmp2, x\nxtmp, ZCR_ELx_LEN_MASK - orr \xtmp2, \xtmp2, \xvqminus1 - cmp \xtmp2, x\nxtmp - b.eq 921f - msr_s SYS_ZCR_EL1, \xtmp2 // self-synchronising -921: + sve_load_vq \xvqminus1, x\nxtmp, \xtmp2 _for n, 0, 31, _sve_ldr_v \n, \nxbase, \n - 34 _sve_ldr_p 0, \nxbase _sve_wrffr 0 diff --git a/arch/arm64/include/asm/hardirq.h b/arch/arm64/include/asm/hardirq.h index 985493af704b..5ffa4bacdad3 100644 --- a/arch/arm64/include/asm/hardirq.h +++ b/arch/arm64/include/asm/hardirq.h @@ -13,21 +13,12 @@ #include <asm/kvm_arm.h> #include <asm/sysreg.h> -#define NR_IPI 7 - typedef struct { unsigned int __softirq_pending; - unsigned int ipi_irqs[NR_IPI]; } ____cacheline_aligned irq_cpustat_t; #include <linux/irq_cpustat.h> /* Standard mappings for irq_cpustat_t above */ -#define __inc_irq_stat(cpu, member) __IRQ_STAT(cpu, member)++ -#define __get_irq_stat(cpu, member) __IRQ_STAT(cpu, member) - -u64 smp_irq_stat_cpu(unsigned int cpu); -#define arch_irq_stat_cpu smp_irq_stat_cpu - #define __ARCH_IRQ_EXIT_IRQS_DISABLED 1 struct nmi_ctx { diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h index 22f73fe09030..9a5498c2c8ee 100644 --- a/arch/arm64/include/asm/hwcap.h +++ b/arch/arm64/include/asm/hwcap.h @@ -8,18 +8,27 @@ #include <uapi/asm/hwcap.h> #include <asm/cpufeature.h> +#define COMPAT_HWCAP_SWP (1 << 0) #define COMPAT_HWCAP_HALF (1 << 1) #define COMPAT_HWCAP_THUMB (1 << 2) +#define COMPAT_HWCAP_26BIT (1 << 3) #define COMPAT_HWCAP_FAST_MULT (1 << 4) +#define COMPAT_HWCAP_FPA (1 << 5) #define COMPAT_HWCAP_VFP (1 << 6) #define COMPAT_HWCAP_EDSP (1 << 7) +#define COMPAT_HWCAP_JAVA (1 << 8) +#define COMPAT_HWCAP_IWMMXT (1 << 9) +#define COMPAT_HWCAP_CRUNCH (1 << 10) +#define COMPAT_HWCAP_THUMBEE (1 << 11) #define COMPAT_HWCAP_NEON (1 << 12) #define COMPAT_HWCAP_VFPv3 (1 << 13) +#define COMPAT_HWCAP_VFPV3D16 (1 << 14) #define COMPAT_HWCAP_TLS (1 << 15) #define COMPAT_HWCAP_VFPv4 (1 << 16) #define COMPAT_HWCAP_IDIVA (1 << 17) #define COMPAT_HWCAP_IDIVT (1 << 18) #define COMPAT_HWCAP_IDIV (COMPAT_HWCAP_IDIVA|COMPAT_HWCAP_IDIVT) +#define COMPAT_HWCAP_VFPD32 (1 << 19) #define COMPAT_HWCAP_LPAE (1 << 20) #define COMPAT_HWCAP_EVTSTRM (1 << 21) @@ -95,7 +104,7 @@ #define KERNEL_HWCAP_DGH __khwcap2_feature(DGH) #define KERNEL_HWCAP_RNG __khwcap2_feature(RNG) #define KERNEL_HWCAP_BTI __khwcap2_feature(BTI) -/* reserved for KERNEL_HWCAP_MTE __khwcap2_feature(MTE) */ +#define KERNEL_HWCAP_MTE __khwcap2_feature(MTE) /* * This yields a mask that user programs can use to figure out what diff --git a/arch/arm64/include/asm/hyp_image.h b/arch/arm64/include/asm/hyp_image.h new file mode 100644 index 000000000000..daa1a1da539e --- /dev/null +++ b/arch/arm64/include/asm/hyp_image.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Google LLC. + * Written by David Brazdil <dbrazdil@google.com> + */ + +#ifndef __ARM64_HYP_IMAGE_H__ +#define __ARM64_HYP_IMAGE_H__ + +/* + * KVM nVHE code has its own symbol namespace prefixed with __kvm_nvhe_, + * to separate it from the kernel proper. + */ +#define kvm_nvhe_sym(sym) __kvm_nvhe_##sym + +#ifdef LINKER_SCRIPT + +/* + * KVM nVHE ELF section names are prefixed with .hyp, to separate them + * from the kernel proper. + */ +#define HYP_SECTION_NAME(NAME) .hyp##NAME + +/* Defines an ELF hyp section from input section @NAME and its subsections. */ +#define HYP_SECTION(NAME) \ + HYP_SECTION_NAME(NAME) : { *(NAME NAME##.*) } + +/* + * Defines a linker script alias of a kernel-proper symbol referenced by + * KVM nVHE hyp code. + */ +#define KVM_NVHE_ALIAS(sym) kvm_nvhe_sym(sym) = sym; + +#endif /* LINKER_SCRIPT */ + +#endif /* __ARM64_HYP_IMAGE_H__ */ diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h index 0bc46149e491..4b39293d0f72 100644 --- a/arch/arm64/include/asm/insn.h +++ b/arch/arm64/include/asm/insn.h @@ -359,9 +359,13 @@ __AARCH64_INSN_FUNCS(brk, 0xFFE0001F, 0xD4200000) __AARCH64_INSN_FUNCS(exception, 0xFF000000, 0xD4000000) __AARCH64_INSN_FUNCS(hint, 0xFFFFF01F, 0xD503201F) __AARCH64_INSN_FUNCS(br, 0xFFFFFC1F, 0xD61F0000) +__AARCH64_INSN_FUNCS(br_auth, 0xFEFFF800, 0xD61F0800) __AARCH64_INSN_FUNCS(blr, 0xFFFFFC1F, 0xD63F0000) +__AARCH64_INSN_FUNCS(blr_auth, 0xFEFFF800, 0xD63F0800) __AARCH64_INSN_FUNCS(ret, 0xFFFFFC1F, 0xD65F0000) +__AARCH64_INSN_FUNCS(ret_auth, 0xFFFFFBFF, 0xD65F0BFF) __AARCH64_INSN_FUNCS(eret, 0xFFFFFFFF, 0xD69F03E0) +__AARCH64_INSN_FUNCS(eret_auth, 0xFFFFFBFF, 0xD69F0BFF) __AARCH64_INSN_FUNCS(mrs, 0xFFF00000, 0xD5300000) __AARCH64_INSN_FUNCS(msr_imm, 0xFFF8F01F, 0xD500401F) __AARCH64_INSN_FUNCS(msr_reg, 0xFFF00000, 0xD5100000) diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h index ff50dd731852..fd172c41df90 100644 --- a/arch/arm64/include/asm/io.h +++ b/arch/arm64/include/asm/io.h @@ -110,6 +110,7 @@ static inline u64 __raw_readq(const volatile void __iomem *addr) #define __io_par(v) __iormb(v) #define __iowmb() dma_wmb() +#define __iomb() dma_mb() /* * Relaxed I/O memory access primitives. These follow the Device memory diff --git a/arch/arm64/include/asm/irq_work.h b/arch/arm64/include/asm/irq_work.h index 8a1ef1907760..a1020285ea75 100644 --- a/arch/arm64/include/asm/irq_work.h +++ b/arch/arm64/include/asm/irq_work.h @@ -2,11 +2,9 @@ #ifndef __ASM_IRQ_WORK_H #define __ASM_IRQ_WORK_H -#include <asm/smp.h> - static inline bool arch_irq_work_has_interrupt(void) { - return !!__smp_cross_call; + return true; } #endif /* __ASM_IRQ_WORK_H */ diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h index 329fb15f6bac..19ca76ea60d9 100644 --- a/arch/arm64/include/asm/kernel-pgtable.h +++ b/arch/arm64/include/asm/kernel-pgtable.h @@ -86,7 +86,7 @@ + EARLY_PGDS((vstart), (vend)) /* each PGDIR needs a next level page table */ \ + EARLY_PUDS((vstart), (vend)) /* each PUD needs a next level page table */ \ + EARLY_PMDS((vstart), (vend))) /* each PMD needs a next level page table */ -#define INIT_DIR_SIZE (PAGE_SIZE * EARLY_PAGES(KIMAGE_VADDR + TEXT_OFFSET, _end)) +#define INIT_DIR_SIZE (PAGE_SIZE * EARLY_PAGES(KIMAGE_VADDR, _end)) #define IDMAP_DIR_SIZE (IDMAP_PGTABLE_LEVELS * PAGE_SIZE) #ifdef CONFIG_ARM64_SW_TTBR0_PAN diff --git a/arch/arm64/include/asm/kprobes.h b/arch/arm64/include/asm/kprobes.h index 97e511d645a2..8699ce30f587 100644 --- a/arch/arm64/include/asm/kprobes.h +++ b/arch/arm64/include/asm/kprobes.h @@ -16,7 +16,7 @@ #include <linux/percpu.h> #define __ARCH_WANT_KPROBES_INSN_SLOT -#define MAX_INSN_SIZE 1 +#define MAX_INSN_SIZE 2 #define flush_insn_slot(p) do { } while (0) #define kretprobe_blacklist_size 0 diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 1da8e3dc4455..64ce29378467 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -12,6 +12,7 @@ #include <asm/types.h> /* Hyp Configuration Register (HCR) bits */ +#define HCR_ATA (UL(1) << 56) #define HCR_FWB (UL(1) << 46) #define HCR_API (UL(1) << 41) #define HCR_APK (UL(1) << 40) @@ -66,7 +67,7 @@ * TWI: Trap WFI * TIDCP: Trap L2CTLR/L2ECTLR * BSU_IS: Upgrade barriers to the inner shareable domain - * FB: Force broadcast of all maintainance operations + * FB: Force broadcast of all maintenance operations * AMO: Override CPSR.A and enable signaling with VA * IMO: Override CPSR.I and enable signaling with VI * FMO: Override CPSR.F and enable signaling with VF @@ -78,7 +79,7 @@ HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW | HCR_TLOR | \ HCR_FMO | HCR_IMO | HCR_PTW ) #define HCR_VIRT_EXCP_MASK (HCR_VSE | HCR_VI | HCR_VF) -#define HCR_HOST_NVHE_FLAGS (HCR_RW | HCR_API | HCR_APK) +#define HCR_HOST_NVHE_FLAGS (HCR_RW | HCR_API | HCR_APK | HCR_ATA) #define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H) /* TCR_EL2 Registers bits */ diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 6f98fbd0ac81..54387ccd1ab2 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -7,11 +7,9 @@ #ifndef __ARM_KVM_ASM_H__ #define __ARM_KVM_ASM_H__ +#include <asm/hyp_image.h> #include <asm/virt.h> -#define VCPU_WORKAROUND_2_FLAG_SHIFT 0 -#define VCPU_WORKAROUND_2_FLAG (_AC(1, UL) << VCPU_WORKAROUND_2_FLAG_SHIFT) - #define ARM_EXIT_WITH_SERROR_BIT 31 #define ARM_EXCEPTION_CODE(x) ((x) & ~(1U << ARM_EXIT_WITH_SERROR_BIT)) #define ARM_EXCEPTION_IS_TRAP(x) (ARM_EXCEPTION_CODE((x)) == ARM_EXCEPTION_TRAP) @@ -38,17 +36,34 @@ #define __SMCCC_WORKAROUND_1_SMC_SZ 36 +#define KVM_HOST_SMCCC_ID(id) \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_64, \ + ARM_SMCCC_OWNER_VENDOR_HYP, \ + (id)) + +#define KVM_HOST_SMCCC_FUNC(name) KVM_HOST_SMCCC_ID(__KVM_HOST_SMCCC_FUNC_##name) + +#define __KVM_HOST_SMCCC_FUNC___kvm_hyp_init 0 +#define __KVM_HOST_SMCCC_FUNC___kvm_vcpu_run 1 +#define __KVM_HOST_SMCCC_FUNC___kvm_flush_vm_context 2 +#define __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_ipa 3 +#define __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid 4 +#define __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_local_vmid 5 +#define __KVM_HOST_SMCCC_FUNC___kvm_timer_set_cntvoff 6 +#define __KVM_HOST_SMCCC_FUNC___kvm_enable_ssbs 7 +#define __KVM_HOST_SMCCC_FUNC___vgic_v3_get_ich_vtr_el2 8 +#define __KVM_HOST_SMCCC_FUNC___vgic_v3_read_vmcr 9 +#define __KVM_HOST_SMCCC_FUNC___vgic_v3_write_vmcr 10 +#define __KVM_HOST_SMCCC_FUNC___vgic_v3_init_lrs 11 +#define __KVM_HOST_SMCCC_FUNC___kvm_get_mdcr_el2 12 +#define __KVM_HOST_SMCCC_FUNC___vgic_v3_save_aprs 13 +#define __KVM_HOST_SMCCC_FUNC___vgic_v3_restore_aprs 14 + #ifndef __ASSEMBLY__ #include <linux/mm.h> -/* - * Translate name of a symbol defined in nVHE hyp to the name seen - * by kernel proper. All nVHE symbols are prefixed by the build system - * to avoid clashes with the VHE variants. - */ -#define kvm_nvhe_sym(sym) __kvm_nvhe_##sym - #define DECLARE_KVM_VHE_SYM(sym) extern char sym[] #define DECLARE_KVM_NVHE_SYM(sym) extern char kvm_nvhe_sym(sym)[] @@ -60,10 +75,53 @@ DECLARE_KVM_VHE_SYM(sym); \ DECLARE_KVM_NVHE_SYM(sym) +#define DECLARE_KVM_VHE_PER_CPU(type, sym) \ + DECLARE_PER_CPU(type, sym) +#define DECLARE_KVM_NVHE_PER_CPU(type, sym) \ + DECLARE_PER_CPU(type, kvm_nvhe_sym(sym)) + +#define DECLARE_KVM_HYP_PER_CPU(type, sym) \ + DECLARE_KVM_VHE_PER_CPU(type, sym); \ + DECLARE_KVM_NVHE_PER_CPU(type, sym) + +/* + * Compute pointer to a symbol defined in nVHE percpu region. + * Returns NULL if percpu memory has not been allocated yet. + */ +#define this_cpu_ptr_nvhe_sym(sym) per_cpu_ptr_nvhe_sym(sym, smp_processor_id()) +#define per_cpu_ptr_nvhe_sym(sym, cpu) \ + ({ \ + unsigned long base, off; \ + base = kvm_arm_hyp_percpu_base[cpu]; \ + off = (unsigned long)&CHOOSE_NVHE_SYM(sym) - \ + (unsigned long)&CHOOSE_NVHE_SYM(__per_cpu_start); \ + base ? (typeof(CHOOSE_NVHE_SYM(sym))*)(base + off) : NULL; \ + }) + +#if defined(__KVM_NVHE_HYPERVISOR__) + +#define CHOOSE_NVHE_SYM(sym) sym +#define CHOOSE_HYP_SYM(sym) CHOOSE_NVHE_SYM(sym) + +/* The nVHE hypervisor shouldn't even try to access VHE symbols */ +extern void *__nvhe_undefined_symbol; +#define CHOOSE_VHE_SYM(sym) __nvhe_undefined_symbol +#define this_cpu_ptr_hyp_sym(sym) (&__nvhe_undefined_symbol) +#define per_cpu_ptr_hyp_sym(sym, cpu) (&__nvhe_undefined_symbol) + +#elif defined(__KVM_VHE_HYPERVISOR__) + #define CHOOSE_VHE_SYM(sym) sym -#define CHOOSE_NVHE_SYM(sym) kvm_nvhe_sym(sym) +#define CHOOSE_HYP_SYM(sym) CHOOSE_VHE_SYM(sym) + +/* The VHE hypervisor shouldn't even try to access nVHE symbols */ +extern void *__vhe_undefined_symbol; +#define CHOOSE_NVHE_SYM(sym) __vhe_undefined_symbol +#define this_cpu_ptr_hyp_sym(sym) (&__vhe_undefined_symbol) +#define per_cpu_ptr_hyp_sym(sym, cpu) (&__vhe_undefined_symbol) + +#else -#ifndef __KVM_NVHE_HYPERVISOR__ /* * BIG FAT WARNINGS: * @@ -75,12 +133,21 @@ * - Don't let the nVHE hypervisor have access to this, as it will * pick the *wrong* symbol (yes, it runs at EL2...). */ -#define CHOOSE_HYP_SYM(sym) (is_kernel_in_hyp_mode() ? CHOOSE_VHE_SYM(sym) \ +#define CHOOSE_HYP_SYM(sym) (is_kernel_in_hyp_mode() \ + ? CHOOSE_VHE_SYM(sym) \ : CHOOSE_NVHE_SYM(sym)) -#else -/* The nVHE hypervisor shouldn't even try to access anything */ -extern void *__nvhe_undefined_symbol; -#define CHOOSE_HYP_SYM(sym) __nvhe_undefined_symbol + +#define this_cpu_ptr_hyp_sym(sym) (is_kernel_in_hyp_mode() \ + ? this_cpu_ptr(&sym) \ + : this_cpu_ptr_nvhe_sym(sym)) + +#define per_cpu_ptr_hyp_sym(sym, cpu) (is_kernel_in_hyp_mode() \ + ? per_cpu_ptr(&sym, cpu) \ + : per_cpu_ptr_nvhe_sym(sym, cpu)) + +#define CHOOSE_VHE_SYM(sym) sym +#define CHOOSE_NVHE_SYM(sym) kvm_nvhe_sym(sym) + #endif /* Translate a kernel address @ptr into its equivalent linear mapping */ @@ -98,15 +165,19 @@ struct kvm_vcpu; struct kvm_s2_mmu; DECLARE_KVM_NVHE_SYM(__kvm_hyp_init); +DECLARE_KVM_NVHE_SYM(__kvm_hyp_host_vector); DECLARE_KVM_HYP_SYM(__kvm_hyp_vector); #define __kvm_hyp_init CHOOSE_NVHE_SYM(__kvm_hyp_init) +#define __kvm_hyp_host_vector CHOOSE_NVHE_SYM(__kvm_hyp_host_vector) #define __kvm_hyp_vector CHOOSE_HYP_SYM(__kvm_hyp_vector) -#ifdef CONFIG_KVM_INDIRECT_VECTORS +extern unsigned long kvm_arm_hyp_percpu_base[NR_CPUS]; +DECLARE_KVM_NVHE_SYM(__per_cpu_start); +DECLARE_KVM_NVHE_SYM(__per_cpu_end); + extern atomic_t arm64_el2_vector_last_slot; DECLARE_KVM_HYP_SYM(__bp_harden_hyp_vecs); #define __bp_harden_hyp_vecs CHOOSE_HYP_SYM(__bp_harden_hyp_vecs) -#endif extern void __kvm_flush_vm_context(void); extern void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa, @@ -149,26 +220,6 @@ extern char __smccc_workaround_1_smc[__SMCCC_WORKAROUND_1_SMC_SZ]; addr; \ }) -/* - * Home-grown __this_cpu_{ptr,read} variants that always work at HYP, - * provided that sym is really a *symbol* and not a pointer obtained from - * a data structure. As for SHIFT_PERCPU_PTR(), the creative casting keeps - * sparse quiet. - */ -#define __hyp_this_cpu_ptr(sym) \ - ({ \ - void *__ptr; \ - __verify_pcpu_ptr(&sym); \ - __ptr = hyp_symbol_addr(sym); \ - __ptr += read_sysreg(tpidr_el2); \ - (typeof(sym) __kernel __force *)__ptr; \ - }) - -#define __hyp_this_cpu_read(sym) \ - ({ \ - *__hyp_this_cpu_ptr(sym); \ - }) - #define __KVM_EXTABLE(from, to) \ " .pushsection __kvm_ex_table, \"a\"\n" \ " .align 3\n" \ @@ -199,20 +250,8 @@ extern char __smccc_workaround_1_smc[__SMCCC_WORKAROUND_1_SMC_SZ]; #else /* __ASSEMBLY__ */ -.macro hyp_adr_this_cpu reg, sym, tmp - adr_l \reg, \sym - mrs \tmp, tpidr_el2 - add \reg, \reg, \tmp -.endm - -.macro hyp_ldr_this_cpu reg, sym, tmp - adr_l \reg, \sym - mrs \tmp, tpidr_el2 - ldr \reg, [\reg, \tmp] -.endm - .macro get_host_ctxt reg, tmp - hyp_adr_this_cpu \reg, kvm_host_data, \tmp + adr_this_cpu \reg, kvm_host_data, \tmp add \reg, \reg, #HOST_DATA_CONTEXT .endm @@ -221,6 +260,16 @@ extern char __smccc_workaround_1_smc[__SMCCC_WORKAROUND_1_SMC_SZ]; ldr \vcpu, [\ctxt, #HOST_CONTEXT_VCPU] .endm +.macro get_loaded_vcpu vcpu, ctxt + adr_this_cpu \ctxt, kvm_hyp_ctxt, \vcpu + ldr \vcpu, [\ctxt, #HOST_CONTEXT_VCPU] +.endm + +.macro set_loaded_vcpu vcpu, ctxt, tmp + adr_this_cpu \ctxt, kvm_hyp_ctxt, \tmp + str \vcpu, [\ctxt, #HOST_CONTEXT_VCPU] +.endm + /* * KVM extable for unexpected exceptions. * In the same format _asm_extable, but output to a different section so that @@ -236,6 +285,45 @@ extern char __smccc_workaround_1_smc[__SMCCC_WORKAROUND_1_SMC_SZ]; .popsection .endm +#define CPU_XREG_OFFSET(x) (CPU_USER_PT_REGS + 8*x) +#define CPU_LR_OFFSET CPU_XREG_OFFSET(30) +#define CPU_SP_EL0_OFFSET (CPU_LR_OFFSET + 8) + +/* + * We treat x18 as callee-saved as the host may use it as a platform + * register (e.g. for shadow call stack). + */ +.macro save_callee_saved_regs ctxt + str x18, [\ctxt, #CPU_XREG_OFFSET(18)] + stp x19, x20, [\ctxt, #CPU_XREG_OFFSET(19)] + stp x21, x22, [\ctxt, #CPU_XREG_OFFSET(21)] + stp x23, x24, [\ctxt, #CPU_XREG_OFFSET(23)] + stp x25, x26, [\ctxt, #CPU_XREG_OFFSET(25)] + stp x27, x28, [\ctxt, #CPU_XREG_OFFSET(27)] + stp x29, lr, [\ctxt, #CPU_XREG_OFFSET(29)] +.endm + +.macro restore_callee_saved_regs ctxt + // We require \ctxt is not x18-x28 + ldr x18, [\ctxt, #CPU_XREG_OFFSET(18)] + ldp x19, x20, [\ctxt, #CPU_XREG_OFFSET(19)] + ldp x21, x22, [\ctxt, #CPU_XREG_OFFSET(21)] + ldp x23, x24, [\ctxt, #CPU_XREG_OFFSET(23)] + ldp x25, x26, [\ctxt, #CPU_XREG_OFFSET(25)] + ldp x27, x28, [\ctxt, #CPU_XREG_OFFSET(27)] + ldp x29, lr, [\ctxt, #CPU_XREG_OFFSET(29)] +.endm + +.macro save_sp_el0 ctxt, tmp + mrs \tmp, sp_el0 + str \tmp, [\ctxt, #CPU_SP_EL0_OFFSET] +.endm + +.macro restore_sp_el0 ctxt, tmp + ldr \tmp, [\ctxt, #CPU_SP_EL0_OFFSET] + msr sp_el0, \tmp +.endm + #endif #endif /* __ARM_KVM_ASM_H__ */ diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 1cc5f5f72d0b..5ef2669ccd6c 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -391,20 +391,6 @@ static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu) return vcpu_read_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK; } -static inline bool kvm_arm_get_vcpu_workaround_2_flag(struct kvm_vcpu *vcpu) -{ - return vcpu->arch.workaround_flags & VCPU_WORKAROUND_2_FLAG; -} - -static inline void kvm_arm_set_vcpu_workaround_2_flag(struct kvm_vcpu *vcpu, - bool flag) -{ - if (flag) - vcpu->arch.workaround_flags |= VCPU_WORKAROUND_2_FLAG; - else - vcpu->arch.workaround_flags &= ~VCPU_WORKAROUND_2_FLAG; -} - static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu) { if (vcpu_mode_is_32bit(vcpu)) { diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 905c2b87e05a..0cd9f0f75c13 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -11,6 +11,7 @@ #ifndef __ARM64_KVM_HOST_H__ #define __ARM64_KVM_HOST_H__ +#include <linux/arm-smccc.h> #include <linux/bitmap.h> #include <linux/types.h> #include <linux/jump_label.h> @@ -79,8 +80,8 @@ struct kvm_s2_mmu { * for vEL1/EL0 with vHCR_EL2.VM == 0. In that case, we use the * canonical stage-2 page tables. */ - pgd_t *pgd; phys_addr_t pgd_phys; + struct kvm_pgtable *pgt; /* The last vcpu id that ran on each physical CPU */ int __percpu *last_vcpu_ran; @@ -110,6 +111,15 @@ struct kvm_arch { * supported. */ bool return_nisv_io_abort_to_user; + + /* + * VM-wide PMU filter, implemented as a bitmap and big enough for + * up to 2^10 events (ARMv8.0) or 2^16 events (ARMv8.1+). + */ + unsigned long *pmu_filter; + unsigned int pmuver; + + u8 pfr0_csv2; }; struct kvm_vcpu_fault_info { @@ -231,6 +241,7 @@ enum vcpu_sysreg { #define cp14_DBGWCR0 (DBGWCR0_EL1 * 2) #define cp14_DBGWVR0 (DBGWVR0_EL1 * 2) #define cp14_DBGDCCINT (MDCCINT_EL1 * 2) +#define cp14_DBGVCR (DBGVCR32_EL2 * 2) #define NR_COPRO_REGS (NR_SYS_REGS * 2) @@ -262,8 +273,6 @@ struct kvm_host_data { struct kvm_pmu_events pmu_events; }; -typedef struct kvm_host_data kvm_host_data_t; - struct vcpu_reset_state { unsigned long pc; unsigned long r0; @@ -480,18 +489,15 @@ int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); void kvm_arm_halt_guest(struct kvm *kvm); void kvm_arm_resume_guest(struct kvm *kvm); -u64 __kvm_call_hyp(void *hypfn, ...); - -#define kvm_call_hyp_nvhe(f, ...) \ - do { \ - DECLARE_KVM_NVHE_SYM(f); \ - __kvm_call_hyp(kvm_ksym_ref_nvhe(f), ##__VA_ARGS__); \ - } while(0) - -#define kvm_call_hyp_nvhe_ret(f, ...) \ +#define kvm_call_hyp_nvhe(f, ...) \ ({ \ - DECLARE_KVM_NVHE_SYM(f); \ - __kvm_call_hyp(kvm_ksym_ref_nvhe(f), ##__VA_ARGS__); \ + struct arm_smccc_res res; \ + \ + arm_smccc_1_1_hvc(KVM_HOST_SMCCC_FUNC(f), \ + ##__VA_ARGS__, &res); \ + WARN_ON(res.a0 != SMCCC_RET_SUCCESS); \ + \ + res.a1; \ }) /* @@ -517,7 +523,7 @@ u64 __kvm_call_hyp(void *hypfn, ...); ret = f(__VA_ARGS__); \ isb(); \ } else { \ - ret = kvm_call_hyp_nvhe_ret(f, ##__VA_ARGS__); \ + ret = kvm_call_hyp_nvhe(f, ##__VA_ARGS__); \ } \ \ ret; \ @@ -565,7 +571,7 @@ void kvm_set_sei_esr(struct kvm_vcpu *vcpu, u64 syndrome); struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr); -DECLARE_PER_CPU(kvm_host_data_t, kvm_host_data); +DECLARE_KVM_HYP_PER_CPU(struct kvm_host_data, kvm_host_data); static inline void kvm_init_host_cpu_context(struct kvm_cpu_context *cpu_ctxt) { @@ -631,46 +637,6 @@ static inline void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr) {} static inline void kvm_clr_pmu_events(u32 clr) {} #endif -#define KVM_BP_HARDEN_UNKNOWN -1 -#define KVM_BP_HARDEN_WA_NEEDED 0 -#define KVM_BP_HARDEN_NOT_REQUIRED 1 - -static inline int kvm_arm_harden_branch_predictor(void) -{ - switch (get_spectre_v2_workaround_state()) { - case ARM64_BP_HARDEN_WA_NEEDED: - return KVM_BP_HARDEN_WA_NEEDED; - case ARM64_BP_HARDEN_NOT_REQUIRED: - return KVM_BP_HARDEN_NOT_REQUIRED; - case ARM64_BP_HARDEN_UNKNOWN: - default: - return KVM_BP_HARDEN_UNKNOWN; - } -} - -#define KVM_SSBD_UNKNOWN -1 -#define KVM_SSBD_FORCE_DISABLE 0 -#define KVM_SSBD_KERNEL 1 -#define KVM_SSBD_FORCE_ENABLE 2 -#define KVM_SSBD_MITIGATED 3 - -static inline int kvm_arm_have_ssbd(void) -{ - switch (arm64_get_ssbd_state()) { - case ARM64_SSBD_FORCE_DISABLE: - return KVM_SSBD_FORCE_DISABLE; - case ARM64_SSBD_KERNEL: - return KVM_SSBD_KERNEL; - case ARM64_SSBD_FORCE_ENABLE: - return KVM_SSBD_FORCE_ENABLE; - case ARM64_SSBD_MITIGATED: - return KVM_SSBD_MITIGATED; - case ARM64_SSBD_UNKNOWN: - default: - return KVM_SSBD_UNKNOWN; - } -} - void kvm_vcpu_load_sysregs_vhe(struct kvm_vcpu *vcpu); void kvm_vcpu_put_sysregs_vhe(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index 46689e7db46c..6b664de5ec1f 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -12,6 +12,9 @@ #include <asm/alternative.h> #include <asm/sysreg.h> +DECLARE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt); +DECLARE_PER_CPU(unsigned long, kvm_hyp_vector); + #define read_sysreg_elx(r,nvh,vh) \ ({ \ u64 reg; \ @@ -87,11 +90,11 @@ void activate_traps_vhe_load(struct kvm_vcpu *vcpu); void deactivate_traps_vhe_put(void); #endif -u64 __guest_enter(struct kvm_vcpu *vcpu, struct kvm_cpu_context *host_ctxt); +u64 __guest_enter(struct kvm_vcpu *vcpu); -void __noreturn hyp_panic(struct kvm_cpu_context *host_ctxt); +void __noreturn hyp_panic(void); #ifdef __KVM_NVHE_HYPERVISOR__ -void __noreturn __hyp_do_panic(unsigned long, ...); +void __noreturn __hyp_do_panic(bool restore_host, u64 spsr, u64 elr, u64 par); #endif #endif /* __ARM64_KVM_HYP_H__ */ diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 189839c3706a..331394306cce 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -9,6 +9,7 @@ #include <asm/page.h> #include <asm/memory.h> +#include <asm/mmu.h> #include <asm/cpufeature.h> /* @@ -43,16 +44,6 @@ * HYP_VA_MIN = 1 << (VA_BITS - 1) * HYP_VA_MAX = HYP_VA_MIN + (1 << (VA_BITS - 1)) - 1 * - * This of course assumes that the trampoline page exists within the - * VA_BITS range. If it doesn't, then it means we're in the odd case - * where the kernel idmap (as well as HYP) uses more levels than the - * kernel runtime page tables (as seen when the kernel is configured - * for 4k pages, 39bits VA, and yet memory lives just above that - * limit, forcing the idmap to use 4 levels of page tables while the - * kernel itself only uses 3). In this particular case, it doesn't - * matter which side of VA_BITS we use, as we're guaranteed not to - * conflict with anything. - * * When using VHE, there are no separate hyp mappings and all KVM * functionality is already mapped as part of the main kernel * mappings, and none of this applies in that case. @@ -117,15 +108,10 @@ static __always_inline unsigned long __kern_hyp_va(unsigned long v) #define kvm_phys_size(kvm) (_AC(1, ULL) << kvm_phys_shift(kvm)) #define kvm_phys_mask(kvm) (kvm_phys_size(kvm) - _AC(1, ULL)) -static inline bool kvm_page_empty(void *ptr) -{ - struct page *ptr_page = virt_to_page(ptr); - return page_count(ptr_page) == 1; -} - +#include <asm/kvm_pgtable.h> #include <asm/stage2_pgtable.h> -int create_hyp_mappings(void *from, void *to, pgprot_t prot); +int create_hyp_mappings(void *from, void *to, enum kvm_pgtable_prot prot); int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size, void __iomem **kaddr, void __iomem **haddr); @@ -141,149 +127,9 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, int kvm_handle_guest_abort(struct kvm_vcpu *vcpu); -void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu); - phys_addr_t kvm_mmu_get_httbr(void); phys_addr_t kvm_get_idmap_vector(void); int kvm_mmu_init(void); -void kvm_clear_hyp_idmap(void); - -#define kvm_mk_pmd(ptep) \ - __pmd(__phys_to_pmd_val(__pa(ptep)) | PMD_TYPE_TABLE) -#define kvm_mk_pud(pmdp) \ - __pud(__phys_to_pud_val(__pa(pmdp)) | PMD_TYPE_TABLE) -#define kvm_mk_p4d(pmdp) \ - __p4d(__phys_to_p4d_val(__pa(pmdp)) | PUD_TYPE_TABLE) - -#define kvm_set_pud(pudp, pud) set_pud(pudp, pud) - -#define kvm_pfn_pte(pfn, prot) pfn_pte(pfn, prot) -#define kvm_pfn_pmd(pfn, prot) pfn_pmd(pfn, prot) -#define kvm_pfn_pud(pfn, prot) pfn_pud(pfn, prot) - -#define kvm_pud_pfn(pud) pud_pfn(pud) - -#define kvm_pmd_mkhuge(pmd) pmd_mkhuge(pmd) -#define kvm_pud_mkhuge(pud) pud_mkhuge(pud) - -static inline pte_t kvm_s2pte_mkwrite(pte_t pte) -{ - pte_val(pte) |= PTE_S2_RDWR; - return pte; -} - -static inline pmd_t kvm_s2pmd_mkwrite(pmd_t pmd) -{ - pmd_val(pmd) |= PMD_S2_RDWR; - return pmd; -} - -static inline pud_t kvm_s2pud_mkwrite(pud_t pud) -{ - pud_val(pud) |= PUD_S2_RDWR; - return pud; -} - -static inline pte_t kvm_s2pte_mkexec(pte_t pte) -{ - pte_val(pte) &= ~PTE_S2_XN; - return pte; -} - -static inline pmd_t kvm_s2pmd_mkexec(pmd_t pmd) -{ - pmd_val(pmd) &= ~PMD_S2_XN; - return pmd; -} - -static inline pud_t kvm_s2pud_mkexec(pud_t pud) -{ - pud_val(pud) &= ~PUD_S2_XN; - return pud; -} - -static inline void kvm_set_s2pte_readonly(pte_t *ptep) -{ - pteval_t old_pteval, pteval; - - pteval = READ_ONCE(pte_val(*ptep)); - do { - old_pteval = pteval; - pteval &= ~PTE_S2_RDWR; - pteval |= PTE_S2_RDONLY; - pteval = cmpxchg_relaxed(&pte_val(*ptep), old_pteval, pteval); - } while (pteval != old_pteval); -} - -static inline bool kvm_s2pte_readonly(pte_t *ptep) -{ - return (READ_ONCE(pte_val(*ptep)) & PTE_S2_RDWR) == PTE_S2_RDONLY; -} - -static inline bool kvm_s2pte_exec(pte_t *ptep) -{ - return !(READ_ONCE(pte_val(*ptep)) & PTE_S2_XN); -} - -static inline void kvm_set_s2pmd_readonly(pmd_t *pmdp) -{ - kvm_set_s2pte_readonly((pte_t *)pmdp); -} - -static inline bool kvm_s2pmd_readonly(pmd_t *pmdp) -{ - return kvm_s2pte_readonly((pte_t *)pmdp); -} - -static inline bool kvm_s2pmd_exec(pmd_t *pmdp) -{ - return !(READ_ONCE(pmd_val(*pmdp)) & PMD_S2_XN); -} - -static inline void kvm_set_s2pud_readonly(pud_t *pudp) -{ - kvm_set_s2pte_readonly((pte_t *)pudp); -} - -static inline bool kvm_s2pud_readonly(pud_t *pudp) -{ - return kvm_s2pte_readonly((pte_t *)pudp); -} - -static inline bool kvm_s2pud_exec(pud_t *pudp) -{ - return !(READ_ONCE(pud_val(*pudp)) & PUD_S2_XN); -} - -static inline pud_t kvm_s2pud_mkyoung(pud_t pud) -{ - return pud_mkyoung(pud); -} - -static inline bool kvm_s2pud_young(pud_t pud) -{ - return pud_young(pud); -} - -#define hyp_pte_table_empty(ptep) kvm_page_empty(ptep) - -#ifdef __PAGETABLE_PMD_FOLDED -#define hyp_pmd_table_empty(pmdp) (0) -#else -#define hyp_pmd_table_empty(pmdp) kvm_page_empty(pmdp) -#endif - -#ifdef __PAGETABLE_PUD_FOLDED -#define hyp_pud_table_empty(pudp) (0) -#else -#define hyp_pud_table_empty(pudp) kvm_page_empty(pudp) -#endif - -#ifdef __PAGETABLE_P4D_FOLDED -#define hyp_p4d_table_empty(p4dp) (0) -#else -#define hyp_p4d_table_empty(p4dp) kvm_page_empty(p4dp) -#endif struct kvm; @@ -325,77 +171,9 @@ static inline void __invalidate_icache_guest_page(kvm_pfn_t pfn, } } -static inline void __kvm_flush_dcache_pte(pte_t pte) -{ - if (!cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) { - struct page *page = pte_page(pte); - kvm_flush_dcache_to_poc(page_address(page), PAGE_SIZE); - } -} - -static inline void __kvm_flush_dcache_pmd(pmd_t pmd) -{ - if (!cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) { - struct page *page = pmd_page(pmd); - kvm_flush_dcache_to_poc(page_address(page), PMD_SIZE); - } -} - -static inline void __kvm_flush_dcache_pud(pud_t pud) -{ - if (!cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) { - struct page *page = pud_page(pud); - kvm_flush_dcache_to_poc(page_address(page), PUD_SIZE); - } -} - void kvm_set_way_flush(struct kvm_vcpu *vcpu); void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled); -static inline bool __kvm_cpu_uses_extended_idmap(void) -{ - return __cpu_uses_extended_idmap_level(); -} - -static inline unsigned long __kvm_idmap_ptrs_per_pgd(void) -{ - return idmap_ptrs_per_pgd; -} - -/* - * Can't use pgd_populate here, because the extended idmap adds an extra level - * above CONFIG_PGTABLE_LEVELS (which is 2 or 3 if we're using the extended - * idmap), and pgd_populate is only available if CONFIG_PGTABLE_LEVELS = 4. - */ -static inline void __kvm_extend_hypmap(pgd_t *boot_hyp_pgd, - pgd_t *hyp_pgd, - pgd_t *merged_hyp_pgd, - unsigned long hyp_idmap_start) -{ - int idmap_idx; - u64 pgd_addr; - - /* - * Use the first entry to access the HYP mappings. It is - * guaranteed to be free, otherwise we wouldn't use an - * extended idmap. - */ - VM_BUG_ON(pgd_val(merged_hyp_pgd[0])); - pgd_addr = __phys_to_pgd_val(__pa(hyp_pgd)); - merged_hyp_pgd[0] = __pgd(pgd_addr | PMD_TYPE_TABLE); - - /* - * Create another extended level entry that points to the boot HYP map, - * which contains an ID mapping of the HYP init code. We essentially - * merge the boot and runtime HYP maps by doing so, but they don't - * overlap anyway, so this is fine. - */ - idmap_idx = hyp_idmap_start >> VA_BITS; - VM_BUG_ON(pgd_val(merged_hyp_pgd[idmap_idx])); - pgd_addr = __phys_to_pgd_val(__pa(boot_hyp_pgd)); - merged_hyp_pgd[idmap_idx] = __pgd(pgd_addr | PMD_TYPE_TABLE); -} - static inline unsigned int kvm_get_vmid_bits(void) { int reg = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1); @@ -430,19 +208,17 @@ static inline int kvm_write_guest_lock(struct kvm *kvm, gpa_t gpa, return ret; } -#ifdef CONFIG_KVM_INDIRECT_VECTORS /* * EL2 vectors can be mapped and rerouted in a number of ways, * depending on the kernel configuration and CPU present: * - * - If the CPU has the ARM64_HARDEN_BRANCH_PREDICTOR cap, the - * hardening sequence is placed in one of the vector slots, which is - * executed before jumping to the real vectors. + * - If the CPU is affected by Spectre-v2, the hardening sequence is + * placed in one of the vector slots, which is executed before jumping + * to the real vectors. * - * - If the CPU has both the ARM64_HARDEN_EL2_VECTORS cap and the - * ARM64_HARDEN_BRANCH_PREDICTOR cap, the slot containing the - * hardening sequence is mapped next to the idmap page, and executed - * before jumping to the real vectors. + * - If the CPU also has the ARM64_HARDEN_EL2_VECTORS cap, the slot + * containing the hardening sequence is mapped next to the idmap page, + * and executed before jumping to the real vectors. * * - If the CPU only has the ARM64_HARDEN_EL2_VECTORS cap, then an * empty slot is selected, mapped next to the idmap page, and @@ -452,19 +228,16 @@ static inline int kvm_write_guest_lock(struct kvm *kvm, gpa_t gpa, * VHE, as we don't have hypervisor-specific mappings. If the system * is VHE and yet selects this capability, it will be ignored. */ -#include <asm/mmu.h> - extern void *__kvm_bp_vect_base; extern int __kvm_harden_el2_vector_slot; -/* This is called on both VHE and !VHE systems */ static inline void *kvm_get_hyp_vector(void) { struct bp_hardening_data *data = arm64_get_bp_hardening_data(); void *vect = kern_hyp_va(kvm_ksym_ref(__kvm_hyp_vector)); int slot = -1; - if (cpus_have_const_cap(ARM64_HARDEN_BRANCH_PREDICTOR) && data->fn) { + if (cpus_have_const_cap(ARM64_SPECTRE_V2) && data->fn) { vect = kern_hyp_va(kvm_ksym_ref(__bp_harden_hyp_vecs)); slot = data->hyp_vectors_slot; } @@ -481,102 +254,8 @@ static inline void *kvm_get_hyp_vector(void) return vect; } -/* This is only called on a !VHE system */ -static inline int kvm_map_vectors(void) -{ - /* - * HBP = ARM64_HARDEN_BRANCH_PREDICTOR - * HEL2 = ARM64_HARDEN_EL2_VECTORS - * - * !HBP + !HEL2 -> use direct vectors - * HBP + !HEL2 -> use hardened vectors in place - * !HBP + HEL2 -> allocate one vector slot and use exec mapping - * HBP + HEL2 -> use hardened vertors and use exec mapping - */ - if (cpus_have_const_cap(ARM64_HARDEN_BRANCH_PREDICTOR)) { - __kvm_bp_vect_base = kvm_ksym_ref(__bp_harden_hyp_vecs); - __kvm_bp_vect_base = kern_hyp_va(__kvm_bp_vect_base); - } - - if (cpus_have_const_cap(ARM64_HARDEN_EL2_VECTORS)) { - phys_addr_t vect_pa = __pa_symbol(__bp_harden_hyp_vecs); - unsigned long size = __BP_HARDEN_HYP_VECS_SZ; - - /* - * Always allocate a spare vector slot, as we don't - * know yet which CPUs have a BP hardening slot that - * we can reuse. - */ - __kvm_harden_el2_vector_slot = atomic_inc_return(&arm64_el2_vector_last_slot); - BUG_ON(__kvm_harden_el2_vector_slot >= BP_HARDEN_EL2_SLOTS); - return create_hyp_exec_mappings(vect_pa, size, - &__kvm_bp_vect_base); - } - - return 0; -} -#else -static inline void *kvm_get_hyp_vector(void) -{ - return kern_hyp_va(kvm_ksym_ref(__kvm_hyp_vector)); -} - -static inline int kvm_map_vectors(void) -{ - return 0; -} -#endif - -#ifdef CONFIG_ARM64_SSBD -DECLARE_PER_CPU_READ_MOSTLY(u64, arm64_ssbd_callback_required); - -static inline int hyp_map_aux_data(void) -{ - int cpu, err; - - for_each_possible_cpu(cpu) { - u64 *ptr; - - ptr = per_cpu_ptr(&arm64_ssbd_callback_required, cpu); - err = create_hyp_mappings(ptr, ptr + 1, PAGE_HYP); - if (err) - return err; - } - return 0; -} -#else -static inline int hyp_map_aux_data(void) -{ - return 0; -} -#endif - #define kvm_phys_to_vttbr(addr) phys_to_ttbr(addr) -/* - * Get the magic number 'x' for VTTBR:BADDR of this KVM instance. - * With v8.2 LVA extensions, 'x' should be a minimum of 6 with - * 52bit IPS. - */ -static inline int arm64_vttbr_x(u32 ipa_shift, u32 levels) -{ - int x = ARM64_VTTBR_X(ipa_shift, levels); - - return (IS_ENABLED(CONFIG_ARM64_PA_BITS_52) && x < 6) ? 6 : x; -} - -static inline u64 vttbr_baddr_mask(u32 ipa_shift, u32 levels) -{ - unsigned int x = arm64_vttbr_x(ipa_shift, levels); - - return GENMASK_ULL(PHYS_MASK_SHIFT - 1, x); -} - -static inline u64 kvm_vttbr_baddr_mask(struct kvm *kvm) -{ - return vttbr_baddr_mask(kvm_phys_shift(kvm), kvm_stage2_levels(kvm)); -} - static __always_inline u64 kvm_get_vttbr(struct kvm_s2_mmu *mmu) { struct kvm_vmid *vmid = &mmu->vmid; diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h new file mode 100644 index 000000000000..52ab38db04c7 --- /dev/null +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -0,0 +1,309 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020 Google LLC + * Author: Will Deacon <will@kernel.org> + */ + +#ifndef __ARM64_KVM_PGTABLE_H__ +#define __ARM64_KVM_PGTABLE_H__ + +#include <linux/bits.h> +#include <linux/kvm_host.h> +#include <linux/types.h> + +typedef u64 kvm_pte_t; + +/** + * struct kvm_pgtable - KVM page-table. + * @ia_bits: Maximum input address size, in bits. + * @start_level: Level at which the page-table walk starts. + * @pgd: Pointer to the first top-level entry of the page-table. + * @mmu: Stage-2 KVM MMU struct. Unused for stage-1 page-tables. + */ +struct kvm_pgtable { + u32 ia_bits; + u32 start_level; + kvm_pte_t *pgd; + + /* Stage-2 only */ + struct kvm_s2_mmu *mmu; +}; + +/** + * enum kvm_pgtable_prot - Page-table permissions and attributes. + * @KVM_PGTABLE_PROT_X: Execute permission. + * @KVM_PGTABLE_PROT_W: Write permission. + * @KVM_PGTABLE_PROT_R: Read permission. + * @KVM_PGTABLE_PROT_DEVICE: Device attributes. + */ +enum kvm_pgtable_prot { + KVM_PGTABLE_PROT_X = BIT(0), + KVM_PGTABLE_PROT_W = BIT(1), + KVM_PGTABLE_PROT_R = BIT(2), + + KVM_PGTABLE_PROT_DEVICE = BIT(3), +}; + +#define PAGE_HYP (KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W) +#define PAGE_HYP_EXEC (KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_X) +#define PAGE_HYP_RO (KVM_PGTABLE_PROT_R) +#define PAGE_HYP_DEVICE (PAGE_HYP | KVM_PGTABLE_PROT_DEVICE) + +/** + * enum kvm_pgtable_walk_flags - Flags to control a depth-first page-table walk. + * @KVM_PGTABLE_WALK_LEAF: Visit leaf entries, including invalid + * entries. + * @KVM_PGTABLE_WALK_TABLE_PRE: Visit table entries before their + * children. + * @KVM_PGTABLE_WALK_TABLE_POST: Visit table entries after their + * children. + */ +enum kvm_pgtable_walk_flags { + KVM_PGTABLE_WALK_LEAF = BIT(0), + KVM_PGTABLE_WALK_TABLE_PRE = BIT(1), + KVM_PGTABLE_WALK_TABLE_POST = BIT(2), +}; + +typedef int (*kvm_pgtable_visitor_fn_t)(u64 addr, u64 end, u32 level, + kvm_pte_t *ptep, + enum kvm_pgtable_walk_flags flag, + void * const arg); + +/** + * struct kvm_pgtable_walker - Hook into a page-table walk. + * @cb: Callback function to invoke during the walk. + * @arg: Argument passed to the callback function. + * @flags: Bitwise-OR of flags to identify the entry types on which to + * invoke the callback function. + */ +struct kvm_pgtable_walker { + const kvm_pgtable_visitor_fn_t cb; + void * const arg; + const enum kvm_pgtable_walk_flags flags; +}; + +/** + * kvm_pgtable_hyp_init() - Initialise a hypervisor stage-1 page-table. + * @pgt: Uninitialised page-table structure to initialise. + * @va_bits: Maximum virtual address bits. + * + * Return: 0 on success, negative error code on failure. + */ +int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits); + +/** + * kvm_pgtable_hyp_destroy() - Destroy an unused hypervisor stage-1 page-table. + * @pgt: Page-table structure initialised by kvm_pgtable_hyp_init(). + * + * The page-table is assumed to be unreachable by any hardware walkers prior + * to freeing and therefore no TLB invalidation is performed. + */ +void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt); + +/** + * kvm_pgtable_hyp_map() - Install a mapping in a hypervisor stage-1 page-table. + * @pgt: Page-table structure initialised by kvm_pgtable_hyp_init(). + * @addr: Virtual address at which to place the mapping. + * @size: Size of the mapping. + * @phys: Physical address of the memory to map. + * @prot: Permissions and attributes for the mapping. + * + * The offset of @addr within a page is ignored, @size is rounded-up to + * the next page boundary and @phys is rounded-down to the previous page + * boundary. + * + * If device attributes are not explicitly requested in @prot, then the + * mapping will be normal, cacheable. Attempts to install a new mapping + * for a virtual address that is already mapped will be rejected with an + * error and a WARN(). + * + * Return: 0 on success, negative error code on failure. + */ +int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys, + enum kvm_pgtable_prot prot); + +/** + * kvm_pgtable_stage2_init() - Initialise a guest stage-2 page-table. + * @pgt: Uninitialised page-table structure to initialise. + * @kvm: KVM structure representing the guest virtual machine. + * + * Return: 0 on success, negative error code on failure. + */ +int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm *kvm); + +/** + * kvm_pgtable_stage2_destroy() - Destroy an unused guest stage-2 page-table. + * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init(). + * + * The page-table is assumed to be unreachable by any hardware walkers prior + * to freeing and therefore no TLB invalidation is performed. + */ +void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt); + +/** + * kvm_pgtable_stage2_map() - Install a mapping in a guest stage-2 page-table. + * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init(). + * @addr: Intermediate physical address at which to place the mapping. + * @size: Size of the mapping. + * @phys: Physical address of the memory to map. + * @prot: Permissions and attributes for the mapping. + * @mc: Cache of pre-allocated GFP_PGTABLE_USER memory from which to + * allocate page-table pages. + * + * The offset of @addr within a page is ignored, @size is rounded-up to + * the next page boundary and @phys is rounded-down to the previous page + * boundary. + * + * If device attributes are not explicitly requested in @prot, then the + * mapping will be normal, cacheable. + * + * Note that this function will both coalesce existing table entries and split + * existing block mappings, relying on page-faults to fault back areas outside + * of the new mapping lazily. + * + * Return: 0 on success, negative error code on failure. + */ +int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, + u64 phys, enum kvm_pgtable_prot prot, + struct kvm_mmu_memory_cache *mc); + +/** + * kvm_pgtable_stage2_unmap() - Remove a mapping from a guest stage-2 page-table. + * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init(). + * @addr: Intermediate physical address from which to remove the mapping. + * @size: Size of the mapping. + * + * The offset of @addr within a page is ignored and @size is rounded-up to + * the next page boundary. + * + * TLB invalidation is performed for each page-table entry cleared during the + * unmapping operation and the reference count for the page-table page + * containing the cleared entry is decremented, with unreferenced pages being + * freed. Unmapping a cacheable page will ensure that it is clean to the PoC if + * FWB is not supported by the CPU. + * + * Return: 0 on success, negative error code on failure. + */ +int kvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size); + +/** + * kvm_pgtable_stage2_wrprotect() - Write-protect guest stage-2 address range + * without TLB invalidation. + * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init(). + * @addr: Intermediate physical address from which to write-protect, + * @size: Size of the range. + * + * The offset of @addr within a page is ignored and @size is rounded-up to + * the next page boundary. + * + * Note that it is the caller's responsibility to invalidate the TLB after + * calling this function to ensure that the updated permissions are visible + * to the CPUs. + * + * Return: 0 on success, negative error code on failure. + */ +int kvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size); + +/** + * kvm_pgtable_stage2_mkyoung() - Set the access flag in a page-table entry. + * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init(). + * @addr: Intermediate physical address to identify the page-table entry. + * + * The offset of @addr within a page is ignored. + * + * If there is a valid, leaf page-table entry used to translate @addr, then + * set the access flag in that entry. + * + * Return: The old page-table entry prior to setting the flag, 0 on failure. + */ +kvm_pte_t kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr); + +/** + * kvm_pgtable_stage2_mkold() - Clear the access flag in a page-table entry. + * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init(). + * @addr: Intermediate physical address to identify the page-table entry. + * + * The offset of @addr within a page is ignored. + * + * If there is a valid, leaf page-table entry used to translate @addr, then + * clear the access flag in that entry. + * + * Note that it is the caller's responsibility to invalidate the TLB after + * calling this function to ensure that the updated permissions are visible + * to the CPUs. + * + * Return: The old page-table entry prior to clearing the flag, 0 on failure. + */ +kvm_pte_t kvm_pgtable_stage2_mkold(struct kvm_pgtable *pgt, u64 addr); + +/** + * kvm_pgtable_stage2_relax_perms() - Relax the permissions enforced by a + * page-table entry. + * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init(). + * @addr: Intermediate physical address to identify the page-table entry. + * @prot: Additional permissions to grant for the mapping. + * + * The offset of @addr within a page is ignored. + * + * If there is a valid, leaf page-table entry used to translate @addr, then + * relax the permissions in that entry according to the read, write and + * execute permissions specified by @prot. No permissions are removed, and + * TLB invalidation is performed after updating the entry. + * + * Return: 0 on success, negative error code on failure. + */ +int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, + enum kvm_pgtable_prot prot); + +/** + * kvm_pgtable_stage2_is_young() - Test whether a page-table entry has the + * access flag set. + * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init(). + * @addr: Intermediate physical address to identify the page-table entry. + * + * The offset of @addr within a page is ignored. + * + * Return: True if the page-table entry has the access flag set, false otherwise. + */ +bool kvm_pgtable_stage2_is_young(struct kvm_pgtable *pgt, u64 addr); + +/** + * kvm_pgtable_stage2_flush_range() - Clean and invalidate data cache to Point + * of Coherency for guest stage-2 address + * range. + * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init(). + * @addr: Intermediate physical address from which to flush. + * @size: Size of the range. + * + * The offset of @addr within a page is ignored and @size is rounded-up to + * the next page boundary. + * + * Return: 0 on success, negative error code on failure. + */ +int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size); + +/** + * kvm_pgtable_walk() - Walk a page-table. + * @pgt: Page-table structure initialised by kvm_pgtable_*_init(). + * @addr: Input address for the start of the walk. + * @size: Size of the range to walk. + * @walker: Walker callback description. + * + * The offset of @addr within a page is ignored and @size is rounded-up to + * the next page boundary. + * + * The walker will walk the page-table entries corresponding to the input + * address range specified, visiting entries according to the walker flags. + * Invalid entries are treated as leaf entries. Leaf entries are reloaded + * after invoking the walker callback, allowing the walker to descend into + * a newly installed table. + * + * Returning a negative error code from the walker callback function will + * terminate the walk immediately with the same error code. + * + * Return: 0 on success, negative error code on failure. + */ +int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size, + struct kvm_pgtable_walker *walker); + +#endif /* __ARM64_KVM_PGTABLE_H__ */ diff --git a/arch/arm64/include/asm/kvm_ptrauth.h b/arch/arm64/include/asm/kvm_ptrauth.h index 0ddf98c3ba9f..0cd0965255d2 100644 --- a/arch/arm64/include/asm/kvm_ptrauth.h +++ b/arch/arm64/include/asm/kvm_ptrauth.h @@ -60,7 +60,7 @@ .endm /* - * Both ptrauth_switch_to_guest and ptrauth_switch_to_host macros will + * Both ptrauth_switch_to_guest and ptrauth_switch_to_hyp macros will * check for the presence ARM64_HAS_ADDRESS_AUTH, which is defined as * (ARM64_HAS_ADDRESS_AUTH_ARCH || ARM64_HAS_ADDRESS_AUTH_IMP_DEF) and * then proceed ahead with the save/restore of Pointer Authentication @@ -78,7 +78,7 @@ alternative_else_nop_endif .L__skip_switch\@: .endm -.macro ptrauth_switch_to_host g_ctxt, h_ctxt, reg1, reg2, reg3 +.macro ptrauth_switch_to_hyp g_ctxt, h_ctxt, reg1, reg2, reg3 alternative_if_not ARM64_HAS_ADDRESS_AUTH b .L__skip_switch\@ alternative_else_nop_endif @@ -96,7 +96,7 @@ alternative_else_nop_endif #else /* !CONFIG_ARM64_PTR_AUTH */ .macro ptrauth_switch_to_guest g_ctxt, reg1, reg2, reg3 .endm -.macro ptrauth_switch_to_host g_ctxt, h_ctxt, reg1, reg2, reg3 +.macro ptrauth_switch_to_hyp g_ctxt, h_ctxt, reg1, reg2, reg3 .endm #endif /* CONFIG_ARM64_PTR_AUTH */ #endif /* __ASSEMBLY__ */ diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index afa722504bfd..cd61239bae8c 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -126,13 +126,18 @@ /* * Memory types available. + * + * IMPORTANT: MT_NORMAL must be index 0 since vm_get_page_prot() may 'or' in + * the MT_NORMAL_TAGGED memory type for PROT_MTE mappings. Note + * that protection_map[] only contains MT_NORMAL attributes. */ -#define MT_DEVICE_nGnRnE 0 -#define MT_DEVICE_nGnRE 1 -#define MT_DEVICE_GRE 2 -#define MT_NORMAL_NC 3 -#define MT_NORMAL 4 -#define MT_NORMAL_WT 5 +#define MT_NORMAL 0 +#define MT_NORMAL_TAGGED 1 +#define MT_NORMAL_NC 2 +#define MT_NORMAL_WT 3 +#define MT_DEVICE_nGnRnE 4 +#define MT_DEVICE_nGnRE 5 +#define MT_DEVICE_GRE 6 /* * Memory types for Stage-2 translation @@ -164,12 +169,11 @@ extern u64 vabits_actual; #define PAGE_END (_PAGE_END(vabits_actual)) -extern s64 physvirt_offset; extern s64 memstart_addr; /* PHYS_OFFSET - the physical address of the start of memory. */ #define PHYS_OFFSET ({ VM_BUG_ON(memstart_addr & 1); memstart_addr; }) -/* the virtual base of the kernel image (minus TEXT_OFFSET) */ +/* the virtual base of the kernel image */ extern u64 kimage_vaddr; /* the offset between the kernel virtual and physical mappings */ @@ -240,7 +244,7 @@ static inline const void *__tag_set(const void *addr, u8 tag) */ #define __is_lm_address(addr) (!(((u64)addr) & BIT(vabits_actual - 1))) -#define __lm_to_phys(addr) (((addr) + physvirt_offset)) +#define __lm_to_phys(addr) (((addr) & ~PAGE_OFFSET) + PHYS_OFFSET) #define __kimg_to_phys(addr) ((addr) - kimage_voffset) #define __virt_to_phys_nodebug(x) ({ \ @@ -258,7 +262,7 @@ extern phys_addr_t __phys_addr_symbol(unsigned long x); #define __phys_addr_symbol(x) __pa_symbol_nodebug(x) #endif /* CONFIG_DEBUG_VIRTUAL */ -#define __phys_to_virt(x) ((unsigned long)((x) - physvirt_offset)) +#define __phys_to_virt(x) ((unsigned long)((x) - PHYS_OFFSET) | PAGE_OFFSET) #define __phys_to_kimg(x) ((unsigned long)((x) + kimage_voffset)) /* diff --git a/arch/arm64/include/asm/mman.h b/arch/arm64/include/asm/mman.h index 081ec8de9ea6..e3e28f7daf62 100644 --- a/arch/arm64/include/asm/mman.h +++ b/arch/arm64/include/asm/mman.h @@ -9,16 +9,53 @@ static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot, unsigned long pkey __always_unused) { + unsigned long ret = 0; + if (system_supports_bti() && (prot & PROT_BTI)) - return VM_ARM64_BTI; + ret |= VM_ARM64_BTI; - return 0; + if (system_supports_mte() && (prot & PROT_MTE)) + ret |= VM_MTE; + + return ret; } #define arch_calc_vm_prot_bits(prot, pkey) arch_calc_vm_prot_bits(prot, pkey) +static inline unsigned long arch_calc_vm_flag_bits(unsigned long flags) +{ + /* + * Only allow MTE on anonymous mappings as these are guaranteed to be + * backed by tags-capable memory. The vm_flags may be overridden by a + * filesystem supporting MTE (RAM-based). + */ + if (system_supports_mte() && (flags & MAP_ANONYMOUS)) + return VM_MTE_ALLOWED; + + return 0; +} +#define arch_calc_vm_flag_bits(flags) arch_calc_vm_flag_bits(flags) + static inline pgprot_t arch_vm_get_page_prot(unsigned long vm_flags) { - return (vm_flags & VM_ARM64_BTI) ? __pgprot(PTE_GP) : __pgprot(0); + pteval_t prot = 0; + + if (vm_flags & VM_ARM64_BTI) + prot |= PTE_GP; + + /* + * There are two conditions required for returning a Normal Tagged + * memory type: (1) the user requested it via PROT_MTE passed to + * mmap() or mprotect() and (2) the corresponding vma supports MTE. We + * register (1) as VM_MTE in the vma->vm_flags and (2) as + * VM_MTE_ALLOWED. Note that the latter can only be set during the + * mmap() call since mprotect() does not accept MAP_* flags. + * Checking for VM_MTE only is sufficient since arch_validate_flags() + * does not permit (VM_MTE & !VM_MTE_ALLOWED). + */ + if (vm_flags & VM_MTE) + prot |= PTE_ATTRINDX(MT_NORMAL_TAGGED); + + return __pgprot(prot); } #define arch_vm_get_page_prot(vm_flags) arch_vm_get_page_prot(vm_flags) @@ -30,8 +67,21 @@ static inline bool arch_validate_prot(unsigned long prot, if (system_supports_bti()) supported |= PROT_BTI; + if (system_supports_mte()) + supported |= PROT_MTE; + return (prot & ~supported) == 0; } #define arch_validate_prot(prot, addr) arch_validate_prot(prot, addr) +static inline bool arch_validate_flags(unsigned long vm_flags) +{ + if (!system_supports_mte()) + return true; + + /* only allow VM_MTE if VM_MTE_ALLOWED has been set previously */ + return !(vm_flags & VM_MTE) || (vm_flags & VM_MTE_ALLOWED); +} +#define arch_validate_flags(vm_flags) arch_validate_flags(vm_flags) + #endif /* ! __ASM_MMAN_H__ */ diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index a7a5ecaa2e83..b2e91c187e2a 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -17,11 +17,14 @@ #ifndef __ASSEMBLY__ +#include <linux/refcount.h> + typedef struct { atomic64_t id; #ifdef CONFIG_COMPAT void *sigpage; #endif + refcount_t pinned; void *vdso; unsigned long flags; } mm_context_t; @@ -45,7 +48,6 @@ struct bp_hardening_data { bp_hardening_cb_t fn; }; -#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR DECLARE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data); static inline struct bp_hardening_data *arm64_get_bp_hardening_data(void) @@ -57,21 +59,13 @@ static inline void arm64_apply_bp_hardening(void) { struct bp_hardening_data *d; - if (!cpus_have_const_cap(ARM64_HARDEN_BRANCH_PREDICTOR)) + if (!cpus_have_const_cap(ARM64_SPECTRE_V2)) return; d = arm64_get_bp_hardening_data(); if (d->fn) d->fn(); } -#else -static inline struct bp_hardening_data *arm64_get_bp_hardening_data(void) -{ - return NULL; -} - -static inline void arm64_apply_bp_hardening(void) { } -#endif /* CONFIG_HARDEN_BRANCH_PREDICTOR */ extern void arm64_memblock_init(void); extern void paging_init(void); diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index f2d7537d6f83..0672236e1aea 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -177,7 +177,13 @@ static inline void cpu_replace_ttbr1(pgd_t *pgdp) #define destroy_context(mm) do { } while(0) void check_and_switch_context(struct mm_struct *mm); -#define init_new_context(tsk,mm) ({ atomic64_set(&(mm)->context.id, 0); 0; }) +static inline int +init_new_context(struct task_struct *tsk, struct mm_struct *mm) +{ + atomic64_set(&mm->context.id, 0); + refcount_set(&mm->context.pinned, 0); + return 0; +} #ifdef CONFIG_ARM64_SW_TTBR0_PAN static inline void update_saved_ttbr0(struct task_struct *tsk, @@ -248,6 +254,9 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next, void verify_cpu_asid_bits(void); void post_ttbr_update_workaround(void); +unsigned long arm64_mm_context_get(struct mm_struct *mm); +void arm64_mm_context_put(struct mm_struct *mm); + #endif /* !__ASSEMBLY__ */ #endif /* !__ASM_MMU_CONTEXT_H */ diff --git a/arch/arm64/include/asm/module.lds.h b/arch/arm64/include/asm/module.lds.h new file mode 100644 index 000000000000..691f15af788e --- /dev/null +++ b/arch/arm64/include/asm/module.lds.h @@ -0,0 +1,7 @@ +#ifdef CONFIG_ARM64_MODULE_PLTS +SECTIONS { + .plt (NOLOAD) : { BYTE(0) } + .init.plt (NOLOAD) : { BYTE(0) } + .text.ftrace_trampoline (NOLOAD) : { BYTE(0) } +} +#endif diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h new file mode 100644 index 000000000000..1c99fcadb58c --- /dev/null +++ b/arch/arm64/include/asm/mte.h @@ -0,0 +1,86 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 ARM Ltd. + */ +#ifndef __ASM_MTE_H +#define __ASM_MTE_H + +#define MTE_GRANULE_SIZE UL(16) +#define MTE_GRANULE_MASK (~(MTE_GRANULE_SIZE - 1)) +#define MTE_TAG_SHIFT 56 +#define MTE_TAG_SIZE 4 + +#ifndef __ASSEMBLY__ + +#include <linux/page-flags.h> + +#include <asm/pgtable-types.h> + +void mte_clear_page_tags(void *addr); +unsigned long mte_copy_tags_from_user(void *to, const void __user *from, + unsigned long n); +unsigned long mte_copy_tags_to_user(void __user *to, void *from, + unsigned long n); +int mte_save_tags(struct page *page); +void mte_save_page_tags(const void *page_addr, void *tag_storage); +bool mte_restore_tags(swp_entry_t entry, struct page *page); +void mte_restore_page_tags(void *page_addr, const void *tag_storage); +void mte_invalidate_tags(int type, pgoff_t offset); +void mte_invalidate_tags_area(int type); +void *mte_allocate_tag_storage(void); +void mte_free_tag_storage(char *storage); + +#ifdef CONFIG_ARM64_MTE + +/* track which pages have valid allocation tags */ +#define PG_mte_tagged PG_arch_2 + +void mte_sync_tags(pte_t *ptep, pte_t pte); +void mte_copy_page_tags(void *kto, const void *kfrom); +void flush_mte_state(void); +void mte_thread_switch(struct task_struct *next); +void mte_suspend_exit(void); +long set_mte_ctrl(struct task_struct *task, unsigned long arg); +long get_mte_ctrl(struct task_struct *task); +int mte_ptrace_copy_tags(struct task_struct *child, long request, + unsigned long addr, unsigned long data); + +#else + +/* unused if !CONFIG_ARM64_MTE, silence the compiler */ +#define PG_mte_tagged 0 + +static inline void mte_sync_tags(pte_t *ptep, pte_t pte) +{ +} +static inline void mte_copy_page_tags(void *kto, const void *kfrom) +{ +} +static inline void flush_mte_state(void) +{ +} +static inline void mte_thread_switch(struct task_struct *next) +{ +} +static inline void mte_suspend_exit(void) +{ +} +static inline long set_mte_ctrl(struct task_struct *task, unsigned long arg) +{ + return 0; +} +static inline long get_mte_ctrl(struct task_struct *task) +{ + return 0; +} +static inline int mte_ptrace_copy_tags(struct task_struct *child, + long request, unsigned long addr, + unsigned long data) +{ + return -EIO; +} + +#endif + +#endif /* __ASSEMBLY__ */ +#endif /* __ASM_MTE_H */ diff --git a/arch/arm64/include/asm/numa.h b/arch/arm64/include/asm/numa.h index 626ad01e83bf..dd870390d639 100644 --- a/arch/arm64/include/asm/numa.h +++ b/arch/arm64/include/asm/numa.h @@ -25,6 +25,9 @@ const struct cpumask *cpumask_of_node(int node); /* Returns a pointer to the cpumask of CPUs on Node 'node'. */ static inline const struct cpumask *cpumask_of_node(int node) { + if (node == NUMA_NO_NODE) + return cpu_all_mask; + return node_to_cpumask_map[node]; } #endif diff --git a/arch/arm64/include/asm/page-def.h b/arch/arm64/include/asm/page-def.h index f99d48ecbeef..2403f7b4cdbf 100644 --- a/arch/arm64/include/asm/page-def.h +++ b/arch/arm64/include/asm/page-def.h @@ -11,13 +11,8 @@ #include <linux/const.h> /* PAGE_SHIFT determines the page size */ -/* CONT_SHIFT determines the number of pages which can be tracked together */ #define PAGE_SHIFT CONFIG_ARM64_PAGE_SHIFT -#define CONT_SHIFT CONFIG_ARM64_CONT_SHIFT #define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT) #define PAGE_MASK (~(PAGE_SIZE-1)) -#define CONT_SIZE (_AC(1, UL) << (CONT_SHIFT + PAGE_SHIFT)) -#define CONT_MASK (~(CONT_SIZE-1)) - #endif /* __ASM_PAGE_DEF_H */ diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h index c01b52add377..012cffc574e8 100644 --- a/arch/arm64/include/asm/page.h +++ b/arch/arm64/include/asm/page.h @@ -15,18 +15,25 @@ #include <linux/personality.h> /* for READ_IMPLIES_EXEC */ #include <asm/pgtable-types.h> -extern void __cpu_clear_user_page(void *p, unsigned long user); -extern void __cpu_copy_user_page(void *to, const void *from, - unsigned long user); +struct page; +struct vm_area_struct; + extern void copy_page(void *to, const void *from); extern void clear_page(void *to); +void copy_user_highpage(struct page *to, struct page *from, + unsigned long vaddr, struct vm_area_struct *vma); +#define __HAVE_ARCH_COPY_USER_HIGHPAGE + +void copy_highpage(struct page *to, struct page *from); +#define __HAVE_ARCH_COPY_HIGHPAGE + #define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \ alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr) #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE -#define clear_user_page(addr,vaddr,pg) __cpu_clear_user_page(addr, vaddr) -#define copy_user_page(to,from,vaddr,pg) __cpu_copy_user_page(to, from, vaddr) +#define clear_user_page(page, vaddr, pg) clear_page(page) +#define copy_user_page(to, from, vaddr, pg) copy_page(to, from) typedef struct page *pgtable_t; @@ -36,7 +43,7 @@ extern int pfn_valid(unsigned long); #endif /* !__ASSEMBLY__ */ -#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_TSK_EXEC +#define VM_DATA_DEFAULT_FLAGS (VM_DATA_FLAGS_TSK_EXEC | VM_MTE_ALLOWED) #include <asm-generic/getorder.h> diff --git a/arch/arm64/include/asm/pci.h b/arch/arm64/include/asm/pci.h index 70b323cf8300..b33ca260e3c9 100644 --- a/arch/arm64/include/asm/pci.h +++ b/arch/arm64/include/asm/pci.h @@ -17,6 +17,7 @@ #define pcibios_assign_all_busses() \ (pci_has_flag(PCI_REASSIGN_ALL_BUS)) +#define arch_can_pci_mmap_wc() 1 #define ARCH_GENERIC_PCI_MMAP_RESOURCE 1 extern int isa_dma_bridge_buggy; diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h index 0b6409b89e5e..1599e17379d8 100644 --- a/arch/arm64/include/asm/percpu.h +++ b/arch/arm64/include/asm/percpu.h @@ -19,7 +19,16 @@ static inline void set_my_cpu_offset(unsigned long off) :: "r" (off) : "memory"); } -static inline unsigned long __my_cpu_offset(void) +static inline unsigned long __hyp_my_cpu_offset(void) +{ + /* + * Non-VHE hyp code runs with preemption disabled. No need to hazard + * the register access against barrier() as in __kern_my_cpu_offset. + */ + return read_sysreg(tpidr_el2); +} + +static inline unsigned long __kern_my_cpu_offset(void) { unsigned long off; @@ -35,7 +44,12 @@ static inline unsigned long __my_cpu_offset(void) return off; } -#define __my_cpu_offset __my_cpu_offset() + +#ifdef __KVM_NVHE_HYPERVISOR__ +#define __my_cpu_offset __hyp_my_cpu_offset() +#else +#define __my_cpu_offset __kern_my_cpu_offset() +#endif #define PERCPU_RW_OPS(sz) \ static inline unsigned long __percpu_read_##sz(void *ptr) \ @@ -227,4 +241,14 @@ PERCPU_RET_OP(add, add, ldadd) #include <asm-generic/percpu.h> +/* Redefine macros for nVHE hyp under DEBUG_PREEMPT to avoid its dependencies. */ +#if defined(__KVM_NVHE_HYPERVISOR__) && defined(CONFIG_DEBUG_PREEMPT) +#undef this_cpu_ptr +#define this_cpu_ptr raw_cpu_ptr +#undef __this_cpu_read +#define __this_cpu_read raw_cpu_read +#undef __this_cpu_write +#define __this_cpu_write raw_cpu_write +#endif + #endif /* __ASM_PERCPU_H */ diff --git a/arch/arm64/include/asm/perf_event.h b/arch/arm64/include/asm/perf_event.h index 2c2d7dbe8a02..60731f602d3e 100644 --- a/arch/arm64/include/asm/perf_event.h +++ b/arch/arm64/include/asm/perf_event.h @@ -236,6 +236,9 @@ #define ARMV8_PMU_USERENR_CR (1 << 2) /* Cycle counter can be read at EL0 */ #define ARMV8_PMU_USERENR_ER (1 << 3) /* Event counter can be read at EL0 */ +/* PMMIR_EL1.SLOTS mask */ +#define ARMV8_PMU_SLOTS_MASK 0xff + #ifdef CONFIG_PERF_EVENTS struct pt_regs; extern unsigned long perf_instruction_pointer(struct pt_regs *regs); diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index d400a4d9aee2..01a96d07ae74 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -81,25 +81,15 @@ /* * Contiguous page definitions. */ -#ifdef CONFIG_ARM64_64K_PAGES -#define CONT_PTE_SHIFT (5 + PAGE_SHIFT) -#define CONT_PMD_SHIFT (5 + PMD_SHIFT) -#elif defined(CONFIG_ARM64_16K_PAGES) -#define CONT_PTE_SHIFT (7 + PAGE_SHIFT) -#define CONT_PMD_SHIFT (5 + PMD_SHIFT) -#else -#define CONT_PTE_SHIFT (4 + PAGE_SHIFT) -#define CONT_PMD_SHIFT (4 + PMD_SHIFT) -#endif - +#define CONT_PTE_SHIFT (CONFIG_ARM64_CONT_PTE_SHIFT + PAGE_SHIFT) #define CONT_PTES (1 << (CONT_PTE_SHIFT - PAGE_SHIFT)) #define CONT_PTE_SIZE (CONT_PTES * PAGE_SIZE) #define CONT_PTE_MASK (~(CONT_PTE_SIZE - 1)) + +#define CONT_PMD_SHIFT (CONFIG_ARM64_CONT_PMD_SHIFT + PMD_SHIFT) #define CONT_PMDS (1 << (CONT_PMD_SHIFT - PMD_SHIFT)) #define CONT_PMD_SIZE (CONT_PMDS * PMD_SIZE) #define CONT_PMD_MASK (~(CONT_PMD_SIZE - 1)) -/* the numerical offset of the PTE within a range of CONT_PTES */ -#define CONT_RANGE_OFFSET(addr) (((addr)>>PAGE_SHIFT)&(CONT_PTES-1)) /* * Hardware page table definitions. @@ -156,7 +146,6 @@ #define PTE_CONT (_AT(pteval_t, 1) << 52) /* Contiguous range */ #define PTE_PXN (_AT(pteval_t, 1) << 53) /* Privileged XN */ #define PTE_UXN (_AT(pteval_t, 1) << 54) /* User XN */ -#define PTE_HYP_XN (_AT(pteval_t, 1) << 54) /* HYP XN */ #define PTE_ADDR_LOW (((_AT(pteval_t, 1) << (48 - PAGE_SHIFT)) - 1) << PAGE_SHIFT) #ifdef CONFIG_ARM64_PA_BITS_52 @@ -173,34 +162,11 @@ #define PTE_ATTRINDX_MASK (_AT(pteval_t, 7) << 2) /* - * 2nd stage PTE definitions - */ -#define PTE_S2_RDONLY (_AT(pteval_t, 1) << 6) /* HAP[2:1] */ -#define PTE_S2_RDWR (_AT(pteval_t, 3) << 6) /* HAP[2:1] */ -#define PTE_S2_XN (_AT(pteval_t, 2) << 53) /* XN[1:0] */ -#define PTE_S2_SW_RESVD (_AT(pteval_t, 15) << 55) /* Reserved for SW */ - -#define PMD_S2_RDONLY (_AT(pmdval_t, 1) << 6) /* HAP[2:1] */ -#define PMD_S2_RDWR (_AT(pmdval_t, 3) << 6) /* HAP[2:1] */ -#define PMD_S2_XN (_AT(pmdval_t, 2) << 53) /* XN[1:0] */ -#define PMD_S2_SW_RESVD (_AT(pmdval_t, 15) << 55) /* Reserved for SW */ - -#define PUD_S2_RDONLY (_AT(pudval_t, 1) << 6) /* HAP[2:1] */ -#define PUD_S2_RDWR (_AT(pudval_t, 3) << 6) /* HAP[2:1] */ -#define PUD_S2_XN (_AT(pudval_t, 2) << 53) /* XN[1:0] */ - -/* * Memory Attribute override for Stage-2 (MemAttr[3:0]) */ #define PTE_S2_MEMATTR(t) (_AT(pteval_t, (t)) << 2) /* - * EL2/HYP PTE/PMD definitions - */ -#define PMD_HYP PMD_SECT_USER -#define PTE_HYP PTE_USER - -/* * Highest possible physical address supported. */ #define PHYS_MASK_SHIFT (CONFIG_ARM64_PA_BITS) diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index 4d867c6446c4..046be789fbb4 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -19,6 +19,13 @@ #define PTE_DEVMAP (_AT(pteval_t, 1) << 57) #define PTE_PROT_NONE (_AT(pteval_t, 1) << 58) /* only when !PTE_VALID */ +/* + * This bit indicates that the entry is present i.e. pmd_page() + * still points to a valid huge page in memory even if the pmd + * has been invalidated. + */ +#define PMD_PRESENT_INVALID (_AT(pteval_t, 1) << 59) /* only when !PMD_SECT_VALID */ + #ifndef __ASSEMBLY__ #include <asm/cpufeature.h> @@ -50,25 +57,21 @@ extern bool arm64_use_ng_mappings; #define PROT_NORMAL_NC (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_NC)) #define PROT_NORMAL_WT (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_WT)) #define PROT_NORMAL (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL)) +#define PROT_NORMAL_TAGGED (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_TAGGED)) #define PROT_SECT_DEVICE_nGnRE (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_DEVICE_nGnRE)) #define PROT_SECT_NORMAL (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL)) #define PROT_SECT_NORMAL_EXEC (PROT_SECT_DEFAULT | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL)) #define _PAGE_DEFAULT (_PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL)) -#define _HYP_PAGE_DEFAULT _PAGE_DEFAULT #define PAGE_KERNEL __pgprot(PROT_NORMAL) +#define PAGE_KERNEL_TAGGED __pgprot(PROT_NORMAL_TAGGED) #define PAGE_KERNEL_RO __pgprot((PROT_NORMAL & ~PTE_WRITE) | PTE_RDONLY) #define PAGE_KERNEL_ROX __pgprot((PROT_NORMAL & ~(PTE_WRITE | PTE_PXN)) | PTE_RDONLY) #define PAGE_KERNEL_EXEC __pgprot(PROT_NORMAL & ~PTE_PXN) #define PAGE_KERNEL_EXEC_CONT __pgprot((PROT_NORMAL & ~PTE_PXN) | PTE_CONT) -#define PAGE_HYP __pgprot(_HYP_PAGE_DEFAULT | PTE_HYP | PTE_HYP_XN) -#define PAGE_HYP_EXEC __pgprot(_HYP_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY) -#define PAGE_HYP_RO __pgprot(_HYP_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY | PTE_HYP_XN) -#define PAGE_HYP_DEVICE __pgprot(_PROT_DEFAULT | PTE_ATTRINDX(MT_DEVICE_nGnRE) | PTE_HYP | PTE_HYP_XN) - #define PAGE_S2_MEMATTR(attr) \ ({ \ u64 __val; \ @@ -79,19 +82,6 @@ extern bool arm64_use_ng_mappings; __val; \ }) -#define PAGE_S2_XN \ - ({ \ - u64 __val; \ - if (cpus_have_const_cap(ARM64_HAS_CACHE_DIC)) \ - __val = 0; \ - else \ - __val = PTE_S2_XN; \ - __val; \ - }) - -#define PAGE_S2 __pgprot(_PROT_DEFAULT | PAGE_S2_MEMATTR(NORMAL) | PTE_S2_RDONLY | PAGE_S2_XN) -#define PAGE_S2_DEVICE __pgprot(_PROT_DEFAULT | PAGE_S2_MEMATTR(DEVICE_nGnRE) | PTE_S2_RDONLY | PTE_S2_XN) - #define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN) /* shared+writable pages are clean by default, hence PTE_RDONLY|PTE_WRITE */ #define PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index d5d3fbe73953..4ff12a7adcfd 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -9,6 +9,7 @@ #include <asm/proc-fns.h> #include <asm/memory.h> +#include <asm/mte.h> #include <asm/pgtable-hwdef.h> #include <asm/pgtable-prot.h> #include <asm/tlbflush.h> @@ -23,6 +24,8 @@ #define VMALLOC_START (MODULES_END) #define VMALLOC_END (- PUD_SIZE - VMEMMAP_SIZE - SZ_64K) +#define vmemmap ((struct page *)VMEMMAP_START - (memstart_addr >> PAGE_SHIFT)) + #define FIRST_USER_ADDRESS 0UL #ifndef __ASSEMBLY__ @@ -33,13 +36,6 @@ #include <linux/mm_types.h> #include <linux/sched.h> -extern struct page *vmemmap; - -extern void __pte_error(const char *file, int line, unsigned long val); -extern void __pmd_error(const char *file, int line, unsigned long val); -extern void __pud_error(const char *file, int line, unsigned long val); -extern void __pgd_error(const char *file, int line, unsigned long val); - #ifdef CONFIG_TRANSPARENT_HUGEPAGE #define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE @@ -51,13 +47,22 @@ extern void __pgd_error(const char *file, int line, unsigned long val); #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ /* + * Outside of a few very special situations (e.g. hibernation), we always + * use broadcast TLB invalidation instructions, therefore a spurious page + * fault on one CPU which has been handled concurrently by another CPU + * does not need to perform additional invalidation. + */ +#define flush_tlb_fix_spurious_fault(vma, address) do { } while (0) + +/* * ZERO_PAGE is a global shared page that is always zero: used * for zero-mapped memory areas etc.. */ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; #define ZERO_PAGE(vaddr) phys_to_page(__pa_symbol(empty_zero_page)) -#define pte_ERROR(pte) __pte_error(__FILE__, __LINE__, pte_val(pte)) +#define pte_ERROR(e) \ + pr_err("%s:%d: bad pte %016llx.\n", __FILE__, __LINE__, pte_val(e)) /* * Macros to convert between a physical address and its placement in a @@ -90,6 +95,8 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; #define pte_user_exec(pte) (!(pte_val(pte) & PTE_UXN)) #define pte_cont(pte) (!!(pte_val(pte) & PTE_CONT)) #define pte_devmap(pte) (!!(pte_val(pte) & PTE_DEVMAP)) +#define pte_tagged(pte) ((pte_val(pte) & PTE_ATTRINDX_MASK) == \ + PTE_ATTRINDX(MT_NORMAL_TAGGED)) #define pte_cont_addr_end(addr, end) \ ({ unsigned long __boundary = ((addr) + CONT_PTE_SIZE) & CONT_PTE_MASK; \ @@ -145,6 +152,18 @@ static inline pte_t set_pte_bit(pte_t pte, pgprot_t prot) return pte; } +static inline pmd_t clear_pmd_bit(pmd_t pmd, pgprot_t prot) +{ + pmd_val(pmd) &= ~pgprot_val(prot); + return pmd; +} + +static inline pmd_t set_pmd_bit(pmd_t pmd, pgprot_t prot) +{ + pmd_val(pmd) |= pgprot_val(prot); + return pmd; +} + static inline pte_t pte_wrprotect(pte_t pte) { pte = clear_pte_bit(pte, __pgprot(PTE_WRITE)); @@ -284,6 +303,10 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, if (pte_present(pte) && pte_user_exec(pte) && !pte_special(pte)) __sync_icache_dcache(pte); + if (system_supports_mte() && + pte_present(pte) && pte_tagged(pte) && !pte_special(pte)) + mte_sync_tags(ptep, pte); + __check_racy_pte_update(mm, ptep, pte); set_pte(ptep, pte); @@ -363,15 +386,24 @@ static inline int pmd_protnone(pmd_t pmd) } #endif +#define pmd_present_invalid(pmd) (!!(pmd_val(pmd) & PMD_PRESENT_INVALID)) + +static inline int pmd_present(pmd_t pmd) +{ + return pte_present(pmd_pte(pmd)) || pmd_present_invalid(pmd); +} + /* * THP definitions. */ #ifdef CONFIG_TRANSPARENT_HUGEPAGE -#define pmd_trans_huge(pmd) (pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT)) +static inline int pmd_trans_huge(pmd_t pmd) +{ + return pmd_val(pmd) && pmd_present(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT); +} #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ -#define pmd_present(pmd) pte_present(pmd_pte(pmd)) #define pmd_dirty(pmd) pte_dirty(pmd_pte(pmd)) #define pmd_young(pmd) pte_young(pmd_pte(pmd)) #define pmd_valid(pmd) pte_valid(pmd_pte(pmd)) @@ -381,7 +413,14 @@ static inline int pmd_protnone(pmd_t pmd) #define pmd_mkclean(pmd) pte_pmd(pte_mkclean(pmd_pte(pmd))) #define pmd_mkdirty(pmd) pte_pmd(pte_mkdirty(pmd_pte(pmd))) #define pmd_mkyoung(pmd) pte_pmd(pte_mkyoung(pmd_pte(pmd))) -#define pmd_mkinvalid(pmd) (__pmd(pmd_val(pmd) & ~PMD_SECT_VALID)) + +static inline pmd_t pmd_mkinvalid(pmd_t pmd) +{ + pmd = set_pmd_bit(pmd, __pgprot(PMD_PRESENT_INVALID)); + pmd = clear_pmd_bit(pmd, __pgprot(PMD_SECT_VALID)); + + return pmd; +} #define pmd_thp_or_huge(pmd) (pmd_huge(pmd) || pmd_trans_huge(pmd)) @@ -541,7 +580,8 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd) #if CONFIG_PGTABLE_LEVELS > 2 -#define pmd_ERROR(pmd) __pmd_error(__FILE__, __LINE__, pmd_val(pmd)) +#define pmd_ERROR(e) \ + pr_err("%s:%d: bad pmd %016llx.\n", __FILE__, __LINE__, pmd_val(e)) #define pud_none(pud) (!pud_val(pud)) #define pud_bad(pud) (!(pud_val(pud) & PUD_TABLE_BIT)) @@ -608,7 +648,8 @@ static inline unsigned long pud_page_vaddr(pud_t pud) #if CONFIG_PGTABLE_LEVELS > 3 -#define pud_ERROR(pud) __pud_error(__FILE__, __LINE__, pud_val(pud)) +#define pud_ERROR(e) \ + pr_err("%s:%d: bad pud %016llx.\n", __FILE__, __LINE__, pud_val(e)) #define p4d_none(p4d) (!p4d_val(p4d)) #define p4d_bad(p4d) (!(p4d_val(p4d) & 2)) @@ -667,15 +708,21 @@ static inline unsigned long p4d_page_vaddr(p4d_t p4d) #endif /* CONFIG_PGTABLE_LEVELS > 3 */ -#define pgd_ERROR(pgd) __pgd_error(__FILE__, __LINE__, pgd_val(pgd)) +#define pgd_ERROR(e) \ + pr_err("%s:%d: bad pgd %016llx.\n", __FILE__, __LINE__, pgd_val(e)) #define pgd_set_fixmap(addr) ((pgd_t *)set_fixmap_offset(FIX_PGD, addr)) #define pgd_clear_fixmap() clear_fixmap(FIX_PGD) static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { + /* + * Normal and Normal-Tagged are two different memory types and indices + * in MAIR_EL1. The mask below has to include PTE_ATTRINDX_MASK. + */ const pteval_t mask = PTE_USER | PTE_PXN | PTE_UXN | PTE_RDONLY | - PTE_PROT_NONE | PTE_VALID | PTE_WRITE | PTE_GP; + PTE_PROT_NONE | PTE_VALID | PTE_WRITE | PTE_GP | + PTE_ATTRINDX_MASK; /* preserve the hardware dirty information */ if (pte_hw_dirty(pte)) pte = pte_mkdirty(pte); @@ -847,6 +894,11 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma, #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(swp) ((pte_t) { (swp).val }) +#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION +#define __pmd_to_swp_entry(pmd) ((swp_entry_t) { pmd_val(pmd) }) +#define __swp_entry_to_pmd(swp) __pmd((swp).val) +#endif /* CONFIG_ARCH_ENABLE_THP_MIGRATION */ + /* * Ensure that there are not more swap files than can be encoded in the kernel * PTEs. @@ -855,6 +907,38 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma, extern int kern_addr_valid(unsigned long addr); +#ifdef CONFIG_ARM64_MTE + +#define __HAVE_ARCH_PREPARE_TO_SWAP +static inline int arch_prepare_to_swap(struct page *page) +{ + if (system_supports_mte()) + return mte_save_tags(page); + return 0; +} + +#define __HAVE_ARCH_SWAP_INVALIDATE +static inline void arch_swap_invalidate_page(int type, pgoff_t offset) +{ + if (system_supports_mte()) + mte_invalidate_tags(type, offset); +} + +static inline void arch_swap_invalidate_area(int type) +{ + if (system_supports_mte()) + mte_invalidate_tags_area(type); +} + +#define __HAVE_ARCH_SWAP_RESTORE +static inline void arch_swap_restore(swp_entry_t entry, struct page *page) +{ + if (system_supports_mte() && mte_restore_tags(entry, page)) + set_bit(PG_mte_tagged, &page->flags); +} + +#endif /* CONFIG_ARM64_MTE */ + /* * On AArch64, the cache coherency is handled via the set_pte_at() function. */ diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 240fe5e5b720..fce8cbecd6bc 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -38,6 +38,7 @@ #include <asm/pgtable-hwdef.h> #include <asm/pointer_auth.h> #include <asm/ptrace.h> +#include <asm/spectre.h> #include <asm/types.h> /* @@ -151,6 +152,10 @@ struct thread_struct { struct ptrauth_keys_user keys_user; struct ptrauth_keys_kernel keys_kernel; #endif +#ifdef CONFIG_ARM64_MTE + u64 sctlr_tcf0; + u64 gcr_user_incl; +#endif }; static inline void arch_thread_struct_whitelist(unsigned long *offset, @@ -197,40 +202,15 @@ static inline void start_thread_common(struct pt_regs *regs, unsigned long pc) regs->pmr_save = GIC_PRIO_IRQON; } -static inline void set_ssbs_bit(struct pt_regs *regs) -{ - regs->pstate |= PSR_SSBS_BIT; -} - -static inline void set_compat_ssbs_bit(struct pt_regs *regs) -{ - regs->pstate |= PSR_AA32_SSBS_BIT; -} - static inline void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long sp) { start_thread_common(regs, pc); regs->pstate = PSR_MODE_EL0t; - - if (arm64_get_ssbd_state() != ARM64_SSBD_FORCE_ENABLE) - set_ssbs_bit(regs); - + spectre_v4_enable_task_mitigation(current); regs->sp = sp; } -static inline bool is_ttbr0_addr(unsigned long addr) -{ - /* entry assembly clears tags for TTBR0 addrs */ - return addr < TASK_SIZE; -} - -static inline bool is_ttbr1_addr(unsigned long addr) -{ - /* TTBR1 addresses may have a tag if KASAN_SW_TAGS is in use */ - return arch_kasan_reset_tag(addr) >= PAGE_OFFSET; -} - #ifdef CONFIG_COMPAT static inline void compat_start_thread(struct pt_regs *regs, unsigned long pc, unsigned long sp) @@ -244,13 +224,23 @@ static inline void compat_start_thread(struct pt_regs *regs, unsigned long pc, regs->pstate |= PSR_AA32_E_BIT; #endif - if (arm64_get_ssbd_state() != ARM64_SSBD_FORCE_ENABLE) - set_compat_ssbs_bit(regs); - + spectre_v4_enable_task_mitigation(current); regs->compat_sp = sp; } #endif +static inline bool is_ttbr0_addr(unsigned long addr) +{ + /* entry assembly clears tags for TTBR0 addrs */ + return addr < TASK_SIZE; +} + +static inline bool is_ttbr1_addr(unsigned long addr) +{ + /* TTBR1 addresses may have a tag if KASAN_SW_TAGS is in use */ + return arch_kasan_reset_tag(addr) >= PAGE_OFFSET; +} + /* Forward declaration, a strange C thing */ struct task_struct; @@ -315,10 +305,10 @@ extern void __init minsigstksz_setup(void); #ifdef CONFIG_ARM64_TAGGED_ADDR_ABI /* PR_{SET,GET}_TAGGED_ADDR_CTRL prctl */ -long set_tagged_addr_ctrl(unsigned long arg); -long get_tagged_addr_ctrl(void); -#define SET_TAGGED_ADDR_CTRL(arg) set_tagged_addr_ctrl(arg) -#define GET_TAGGED_ADDR_CTRL() get_tagged_addr_ctrl() +long set_tagged_addr_ctrl(struct task_struct *task, unsigned long arg); +long get_tagged_addr_ctrl(struct task_struct *task); +#define SET_TAGGED_ADDR_CTRL(arg) set_tagged_addr_ctrl(current, arg) +#define GET_TAGGED_ADDR_CTRL() get_tagged_addr_ctrl(current) #endif /* diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index 966ed30ed5f7..997cf8c8cd52 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -31,9 +31,21 @@ * interrupt disabling temporarily does not rely on IRQ priorities. */ #define GIC_PRIO_IRQON 0xe0 -#define GIC_PRIO_IRQOFF (GIC_PRIO_IRQON & ~0x80) +#define __GIC_PRIO_IRQOFF (GIC_PRIO_IRQON & ~0x80) +#define __GIC_PRIO_IRQOFF_NS 0xa0 #define GIC_PRIO_PSR_I_SET (1 << 4) +#define GIC_PRIO_IRQOFF \ + ({ \ + extern struct static_key_false gic_nonsecure_priorities;\ + u8 __prio = __GIC_PRIO_IRQOFF; \ + \ + if (static_branch_unlikely(&gic_nonsecure_priorities)) \ + __prio = __GIC_PRIO_IRQOFF_NS; \ + \ + __prio; \ + }) + /* Additional SPSR bits not exposed in the UABI */ #define PSR_MODE_THREAD_BIT (1 << 0) #define PSR_IL_BIT (1 << 20) diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h index 0eadbf933e35..2e7f529ec5a6 100644 --- a/arch/arm64/include/asm/smp.h +++ b/arch/arm64/include/asm/smp.h @@ -56,27 +56,15 @@ static inline void set_cpu_logical_map(int cpu, u64 hwid) struct seq_file; /* - * generate IPI list text - */ -extern void show_ipi_list(struct seq_file *p, int prec); - -/* - * Called from C code, this handles an IPI. - */ -extern void handle_IPI(int ipinr, struct pt_regs *regs); - -/* * Discover the set of possible CPUs and determine their * SMP operations. */ extern void smp_init_cpus(void); /* - * Provide a function to raise an IPI cross call on CPUs in callmap. + * Register IPI interrupts with the arch SMP code */ -extern void set_smp_cross_call(void (*)(const struct cpumask *, unsigned int)); - -extern void (*__smp_cross_call)(const struct cpumask *, unsigned int); +extern void set_smp_ipi_range(int ipi_base, int nr_ipi); /* * Called from the secondary holding pen, this is the secondary CPU entry point. diff --git a/arch/arm64/include/asm/spectre.h b/arch/arm64/include/asm/spectre.h new file mode 100644 index 000000000000..fcdfbce302bd --- /dev/null +++ b/arch/arm64/include/asm/spectre.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Interface for managing mitigations for Spectre vulnerabilities. + * + * Copyright (C) 2020 Google LLC + * Author: Will Deacon <will@kernel.org> + */ + +#ifndef __ASM_SPECTRE_H +#define __ASM_SPECTRE_H + +#include <asm/cpufeature.h> + +/* Watch out, ordering is important here. */ +enum mitigation_state { + SPECTRE_UNAFFECTED, + SPECTRE_MITIGATED, + SPECTRE_VULNERABLE, +}; + +struct task_struct; + +enum mitigation_state arm64_get_spectre_v2_state(void); +bool has_spectre_v2(const struct arm64_cpu_capabilities *cap, int scope); +void spectre_v2_enable_mitigation(const struct arm64_cpu_capabilities *__unused); + +enum mitigation_state arm64_get_spectre_v4_state(void); +bool has_spectre_v4(const struct arm64_cpu_capabilities *cap, int scope); +void spectre_v4_enable_mitigation(const struct arm64_cpu_capabilities *__unused); +void spectre_v4_enable_task_mitigation(struct task_struct *tsk); + +#endif /* __ASM_SPECTRE_H */ diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h index fc7613023c19..eb29b1fe8255 100644 --- a/arch/arm64/include/asm/stacktrace.h +++ b/arch/arm64/include/asm/stacktrace.h @@ -63,7 +63,7 @@ struct stackframe { extern int unwind_frame(struct task_struct *tsk, struct stackframe *frame); extern void walk_stackframe(struct task_struct *tsk, struct stackframe *frame, - int (*fn)(struct stackframe *, void *), void *data); + bool (*fn)(void *, unsigned long), void *data); extern void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk, const char *loglvl); diff --git a/arch/arm64/include/asm/stage2_pgtable.h b/arch/arm64/include/asm/stage2_pgtable.h index 996bf98f0cab..fe341a6578c3 100644 --- a/arch/arm64/include/asm/stage2_pgtable.h +++ b/arch/arm64/include/asm/stage2_pgtable.h @@ -8,7 +8,6 @@ #ifndef __ARM64_S2_PGTABLE_H_ #define __ARM64_S2_PGTABLE_H_ -#include <linux/hugetlb.h> #include <linux/pgtable.h> /* @@ -37,217 +36,12 @@ #define stage2_pgdir_mask(kvm) ~(stage2_pgdir_size(kvm) - 1) /* - * The number of PTRS across all concatenated stage2 tables given by the - * number of bits resolved at the initial level. - * If we force more levels than necessary, we may have (stage2_pgdir_shift > IPA), - * in which case, stage2_pgd_ptrs will have one entry. - */ -#define pgd_ptrs_shift(ipa, pgdir_shift) \ - ((ipa) > (pgdir_shift) ? ((ipa) - (pgdir_shift)) : 0) -#define __s2_pgd_ptrs(ipa, lvls) \ - (1 << (pgd_ptrs_shift((ipa), pt_levels_pgdir_shift(lvls)))) -#define __s2_pgd_size(ipa, lvls) (__s2_pgd_ptrs((ipa), (lvls)) * sizeof(pgd_t)) - -#define stage2_pgd_ptrs(kvm) __s2_pgd_ptrs(kvm_phys_shift(kvm), kvm_stage2_levels(kvm)) -#define stage2_pgd_size(kvm) __s2_pgd_size(kvm_phys_shift(kvm), kvm_stage2_levels(kvm)) - -/* * kvm_mmmu_cache_min_pages() is the number of pages required to install * a stage-2 translation. We pre-allocate the entry level page table at * the VM creation. */ #define kvm_mmu_cache_min_pages(kvm) (kvm_stage2_levels(kvm) - 1) -/* Stage2 PUD definitions when the level is present */ -static inline bool kvm_stage2_has_pud(struct kvm *kvm) -{ - return (CONFIG_PGTABLE_LEVELS > 3) && (kvm_stage2_levels(kvm) > 3); -} - -#define S2_PUD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(1) -#define S2_PUD_SIZE (1UL << S2_PUD_SHIFT) -#define S2_PUD_MASK (~(S2_PUD_SIZE - 1)) - -#define stage2_pgd_none(kvm, pgd) pgd_none(pgd) -#define stage2_pgd_clear(kvm, pgd) pgd_clear(pgd) -#define stage2_pgd_present(kvm, pgd) pgd_present(pgd) -#define stage2_pgd_populate(kvm, pgd, p4d) pgd_populate(NULL, pgd, p4d) - -static inline p4d_t *stage2_p4d_offset(struct kvm *kvm, - pgd_t *pgd, unsigned long address) -{ - return p4d_offset(pgd, address); -} - -static inline void stage2_p4d_free(struct kvm *kvm, p4d_t *p4d) -{ -} - -static inline bool stage2_p4d_table_empty(struct kvm *kvm, p4d_t *p4dp) -{ - return false; -} - -static inline phys_addr_t stage2_p4d_addr_end(struct kvm *kvm, - phys_addr_t addr, phys_addr_t end) -{ - return end; -} - -static inline bool stage2_p4d_none(struct kvm *kvm, p4d_t p4d) -{ - if (kvm_stage2_has_pud(kvm)) - return p4d_none(p4d); - else - return 0; -} - -static inline void stage2_p4d_clear(struct kvm *kvm, p4d_t *p4dp) -{ - if (kvm_stage2_has_pud(kvm)) - p4d_clear(p4dp); -} - -static inline bool stage2_p4d_present(struct kvm *kvm, p4d_t p4d) -{ - if (kvm_stage2_has_pud(kvm)) - return p4d_present(p4d); - else - return 1; -} - -static inline void stage2_p4d_populate(struct kvm *kvm, p4d_t *p4d, pud_t *pud) -{ - if (kvm_stage2_has_pud(kvm)) - p4d_populate(NULL, p4d, pud); -} - -static inline pud_t *stage2_pud_offset(struct kvm *kvm, - p4d_t *p4d, unsigned long address) -{ - if (kvm_stage2_has_pud(kvm)) - return pud_offset(p4d, address); - else - return (pud_t *)p4d; -} - -static inline void stage2_pud_free(struct kvm *kvm, pud_t *pud) -{ - if (kvm_stage2_has_pud(kvm)) - free_page((unsigned long)pud); -} - -static inline bool stage2_pud_table_empty(struct kvm *kvm, pud_t *pudp) -{ - if (kvm_stage2_has_pud(kvm)) - return kvm_page_empty(pudp); - else - return false; -} - -static inline phys_addr_t -stage2_pud_addr_end(struct kvm *kvm, phys_addr_t addr, phys_addr_t end) -{ - if (kvm_stage2_has_pud(kvm)) { - phys_addr_t boundary = (addr + S2_PUD_SIZE) & S2_PUD_MASK; - - return (boundary - 1 < end - 1) ? boundary : end; - } else { - return end; - } -} - -/* Stage2 PMD definitions when the level is present */ -static inline bool kvm_stage2_has_pmd(struct kvm *kvm) -{ - return (CONFIG_PGTABLE_LEVELS > 2) && (kvm_stage2_levels(kvm) > 2); -} - -#define S2_PMD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(2) -#define S2_PMD_SIZE (1UL << S2_PMD_SHIFT) -#define S2_PMD_MASK (~(S2_PMD_SIZE - 1)) - -static inline bool stage2_pud_none(struct kvm *kvm, pud_t pud) -{ - if (kvm_stage2_has_pmd(kvm)) - return pud_none(pud); - else - return 0; -} - -static inline void stage2_pud_clear(struct kvm *kvm, pud_t *pud) -{ - if (kvm_stage2_has_pmd(kvm)) - pud_clear(pud); -} - -static inline bool stage2_pud_present(struct kvm *kvm, pud_t pud) -{ - if (kvm_stage2_has_pmd(kvm)) - return pud_present(pud); - else - return 1; -} - -static inline void stage2_pud_populate(struct kvm *kvm, pud_t *pud, pmd_t *pmd) -{ - if (kvm_stage2_has_pmd(kvm)) - pud_populate(NULL, pud, pmd); -} - -static inline pmd_t *stage2_pmd_offset(struct kvm *kvm, - pud_t *pud, unsigned long address) -{ - if (kvm_stage2_has_pmd(kvm)) - return pmd_offset(pud, address); - else - return (pmd_t *)pud; -} - -static inline void stage2_pmd_free(struct kvm *kvm, pmd_t *pmd) -{ - if (kvm_stage2_has_pmd(kvm)) - free_page((unsigned long)pmd); -} - -static inline bool stage2_pud_huge(struct kvm *kvm, pud_t pud) -{ - if (kvm_stage2_has_pmd(kvm)) - return pud_huge(pud); - else - return 0; -} - -static inline bool stage2_pmd_table_empty(struct kvm *kvm, pmd_t *pmdp) -{ - if (kvm_stage2_has_pmd(kvm)) - return kvm_page_empty(pmdp); - else - return 0; -} - -static inline phys_addr_t -stage2_pmd_addr_end(struct kvm *kvm, phys_addr_t addr, phys_addr_t end) -{ - if (kvm_stage2_has_pmd(kvm)) { - phys_addr_t boundary = (addr + S2_PMD_SIZE) & S2_PMD_MASK; - - return (boundary - 1 < end - 1) ? boundary : end; - } else { - return end; - } -} - -static inline bool stage2_pte_table_empty(struct kvm *kvm, pte_t *ptep) -{ - return kvm_page_empty(ptep); -} - -static inline unsigned long stage2_pgd_index(struct kvm *kvm, phys_addr_t addr) -{ - return (((addr) >> stage2_pgdir_shift(kvm)) & (stage2_pgd_ptrs(kvm) - 1)); -} - static inline phys_addr_t stage2_pgd_addr_end(struct kvm *kvm, phys_addr_t addr, phys_addr_t end) { @@ -256,13 +50,4 @@ stage2_pgd_addr_end(struct kvm *kvm, phys_addr_t addr, phys_addr_t end) return (boundary - 1 < end - 1) ? boundary : end; } -/* - * Level values for the ARMv8.4-TTL extension, mapping PUD/PMD/PTE and - * the architectural page-table level. - */ -#define S2_NO_LEVEL_HINT 0 -#define S2_PUD_LEVEL 1 -#define S2_PMD_LEVEL 2 -#define S2_PTE_LEVEL 3 - #endif /* __ARM64_S2_PGTABLE_H_ */ diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 554a7e8ecb07..e2ef4c2edf06 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -91,10 +91,12 @@ #define PSTATE_PAN pstate_field(0, 4) #define PSTATE_UAO pstate_field(0, 3) #define PSTATE_SSBS pstate_field(3, 1) +#define PSTATE_TCO pstate_field(3, 4) #define SET_PSTATE_PAN(x) __emit_inst(0xd500401f | PSTATE_PAN | ((!!x) << PSTATE_Imm_shift)) #define SET_PSTATE_UAO(x) __emit_inst(0xd500401f | PSTATE_UAO | ((!!x) << PSTATE_Imm_shift)) #define SET_PSTATE_SSBS(x) __emit_inst(0xd500401f | PSTATE_SSBS | ((!!x) << PSTATE_Imm_shift)) +#define SET_PSTATE_TCO(x) __emit_inst(0xd500401f | PSTATE_TCO | ((!!x) << PSTATE_Imm_shift)) #define __SYS_BARRIER_INSN(CRm, op2, Rt) \ __emit_inst(0xd5000000 | sys_insn(0, 3, 3, (CRm), (op2)) | ((Rt) & 0x1f)) @@ -181,6 +183,8 @@ #define SYS_SCTLR_EL1 sys_reg(3, 0, 1, 0, 0) #define SYS_ACTLR_EL1 sys_reg(3, 0, 1, 0, 1) #define SYS_CPACR_EL1 sys_reg(3, 0, 1, 0, 2) +#define SYS_RGSR_EL1 sys_reg(3, 0, 1, 0, 5) +#define SYS_GCR_EL1 sys_reg(3, 0, 1, 0, 6) #define SYS_ZCR_EL1 sys_reg(3, 0, 1, 2, 0) @@ -218,6 +222,8 @@ #define SYS_ERXADDR_EL1 sys_reg(3, 0, 5, 4, 3) #define SYS_ERXMISC0_EL1 sys_reg(3, 0, 5, 5, 0) #define SYS_ERXMISC1_EL1 sys_reg(3, 0, 5, 5, 1) +#define SYS_TFSR_EL1 sys_reg(3, 0, 5, 6, 0) +#define SYS_TFSRE0_EL1 sys_reg(3, 0, 5, 6, 1) #define SYS_FAR_EL1 sys_reg(3, 0, 6, 0, 0) #define SYS_PAR_EL1 sys_reg(3, 0, 7, 4, 0) @@ -321,6 +327,8 @@ #define SYS_PMINTENSET_EL1 sys_reg(3, 0, 9, 14, 1) #define SYS_PMINTENCLR_EL1 sys_reg(3, 0, 9, 14, 2) +#define SYS_PMMIR_EL1 sys_reg(3, 0, 9, 14, 6) + #define SYS_MAIR_EL1 sys_reg(3, 0, 10, 2, 0) #define SYS_AMAIR_EL1 sys_reg(3, 0, 10, 3, 0) @@ -364,10 +372,13 @@ #define SYS_CONTEXTIDR_EL1 sys_reg(3, 0, 13, 0, 1) #define SYS_TPIDR_EL1 sys_reg(3, 0, 13, 0, 4) +#define SYS_SCXTNUM_EL1 sys_reg(3, 0, 13, 0, 7) + #define SYS_CNTKCTL_EL1 sys_reg(3, 0, 14, 1, 0) #define SYS_CCSIDR_EL1 sys_reg(3, 1, 0, 0, 0) #define SYS_CLIDR_EL1 sys_reg(3, 1, 0, 0, 1) +#define SYS_GMID_EL1 sys_reg(3, 1, 0, 0, 4) #define SYS_AIDR_EL1 sys_reg(3, 1, 0, 0, 7) #define SYS_CSSELR_EL1 sys_reg(3, 2, 0, 0, 0) @@ -395,6 +406,8 @@ #define SYS_TPIDR_EL0 sys_reg(3, 3, 13, 0, 2) #define SYS_TPIDRRO_EL0 sys_reg(3, 3, 13, 0, 3) +#define SYS_SCXTNUM_EL0 sys_reg(3, 3, 13, 0, 7) + /* Definitions for system register interface to AMU for ARMv8.4 onwards */ #define SYS_AM_EL0(crm, op2) sys_reg(3, 3, 13, (crm), (op2)) #define SYS_AMCR_EL0 SYS_AM_EL0(2, 0) @@ -460,6 +473,7 @@ #define SYS_ESR_EL2 sys_reg(3, 4, 5, 2, 0) #define SYS_VSESR_EL2 sys_reg(3, 4, 5, 2, 3) #define SYS_FPEXC32_EL2 sys_reg(3, 4, 5, 3, 0) +#define SYS_TFSR_EL2 sys_reg(3, 4, 5, 6, 0) #define SYS_FAR_EL2 sys_reg(3, 4, 6, 0, 0) #define SYS_VDISR_EL2 sys_reg(3, 4, 12, 1, 1) @@ -516,6 +530,7 @@ #define SYS_AFSR0_EL12 sys_reg(3, 5, 5, 1, 0) #define SYS_AFSR1_EL12 sys_reg(3, 5, 5, 1, 1) #define SYS_ESR_EL12 sys_reg(3, 5, 5, 2, 0) +#define SYS_TFSR_EL12 sys_reg(3, 5, 5, 6, 0) #define SYS_FAR_EL12 sys_reg(3, 5, 6, 0, 0) #define SYS_MAIR_EL12 sys_reg(3, 5, 10, 2, 0) #define SYS_AMAIR_EL12 sys_reg(3, 5, 10, 3, 0) @@ -531,6 +546,15 @@ /* Common SCTLR_ELx flags. */ #define SCTLR_ELx_DSSBS (BIT(44)) +#define SCTLR_ELx_ATA (BIT(43)) + +#define SCTLR_ELx_TCF_SHIFT 40 +#define SCTLR_ELx_TCF_NONE (UL(0x0) << SCTLR_ELx_TCF_SHIFT) +#define SCTLR_ELx_TCF_SYNC (UL(0x1) << SCTLR_ELx_TCF_SHIFT) +#define SCTLR_ELx_TCF_ASYNC (UL(0x2) << SCTLR_ELx_TCF_SHIFT) +#define SCTLR_ELx_TCF_MASK (UL(0x3) << SCTLR_ELx_TCF_SHIFT) + +#define SCTLR_ELx_ITFSB (BIT(37)) #define SCTLR_ELx_ENIA (BIT(31)) #define SCTLR_ELx_ENIB (BIT(30)) #define SCTLR_ELx_ENDA (BIT(27)) @@ -559,6 +583,14 @@ #endif /* SCTLR_EL1 specific flags. */ +#define SCTLR_EL1_ATA0 (BIT(42)) + +#define SCTLR_EL1_TCF0_SHIFT 38 +#define SCTLR_EL1_TCF0_NONE (UL(0x0) << SCTLR_EL1_TCF0_SHIFT) +#define SCTLR_EL1_TCF0_SYNC (UL(0x1) << SCTLR_EL1_TCF0_SHIFT) +#define SCTLR_EL1_TCF0_ASYNC (UL(0x2) << SCTLR_EL1_TCF0_SHIFT) +#define SCTLR_EL1_TCF0_MASK (UL(0x3) << SCTLR_EL1_TCF0_SHIFT) + #define SCTLR_EL1_BT1 (BIT(36)) #define SCTLR_EL1_BT0 (BIT(35)) #define SCTLR_EL1_UCI (BIT(26)) @@ -587,6 +619,7 @@ SCTLR_EL1_SA0 | SCTLR_EL1_SED | SCTLR_ELx_I |\ SCTLR_EL1_DZE | SCTLR_EL1_UCT |\ SCTLR_EL1_NTWE | SCTLR_ELx_IESB | SCTLR_EL1_SPAN |\ + SCTLR_ELx_ITFSB| SCTLR_ELx_ATA | SCTLR_EL1_ATA0 |\ ENDIAN_SET_EL1 | SCTLR_EL1_UCI | SCTLR_EL1_RES1) /* MAIR_ELx memory attributes (used by Linux) */ @@ -595,6 +628,7 @@ #define MAIR_ATTR_DEVICE_GRE UL(0x0c) #define MAIR_ATTR_NORMAL_NC UL(0x44) #define MAIR_ATTR_NORMAL_WT UL(0xbb) +#define MAIR_ATTR_NORMAL_TAGGED UL(0xf0) #define MAIR_ATTR_NORMAL UL(0xff) #define MAIR_ATTR_MASK UL(0xff) @@ -636,14 +670,22 @@ #define ID_AA64ISAR1_APA_SHIFT 4 #define ID_AA64ISAR1_DPB_SHIFT 0 -#define ID_AA64ISAR1_APA_NI 0x0 -#define ID_AA64ISAR1_APA_ARCHITECTED 0x1 -#define ID_AA64ISAR1_API_NI 0x0 -#define ID_AA64ISAR1_API_IMP_DEF 0x1 -#define ID_AA64ISAR1_GPA_NI 0x0 -#define ID_AA64ISAR1_GPA_ARCHITECTED 0x1 -#define ID_AA64ISAR1_GPI_NI 0x0 -#define ID_AA64ISAR1_GPI_IMP_DEF 0x1 +#define ID_AA64ISAR1_APA_NI 0x0 +#define ID_AA64ISAR1_APA_ARCHITECTED 0x1 +#define ID_AA64ISAR1_APA_ARCH_EPAC 0x2 +#define ID_AA64ISAR1_APA_ARCH_EPAC2 0x3 +#define ID_AA64ISAR1_APA_ARCH_EPAC2_FPAC 0x4 +#define ID_AA64ISAR1_APA_ARCH_EPAC2_FPAC_CMB 0x5 +#define ID_AA64ISAR1_API_NI 0x0 +#define ID_AA64ISAR1_API_IMP_DEF 0x1 +#define ID_AA64ISAR1_API_IMP_DEF_EPAC 0x2 +#define ID_AA64ISAR1_API_IMP_DEF_EPAC2 0x3 +#define ID_AA64ISAR1_API_IMP_DEF_EPAC2_FPAC 0x4 +#define ID_AA64ISAR1_API_IMP_DEF_EPAC2_FPAC_CMB 0x5 +#define ID_AA64ISAR1_GPA_NI 0x0 +#define ID_AA64ISAR1_GPA_ARCHITECTED 0x1 +#define ID_AA64ISAR1_GPI_NI 0x0 +#define ID_AA64ISAR1_GPI_IMP_DEF 0x1 /* id_aa64pfr0 */ #define ID_AA64PFR0_CSV3_SHIFT 60 @@ -686,6 +728,10 @@ #define ID_AA64PFR1_SSBS_PSTATE_INSNS 2 #define ID_AA64PFR1_BT_BTI 0x1 +#define ID_AA64PFR1_MTE_NI 0x0 +#define ID_AA64PFR1_MTE_EL0 0x1 +#define ID_AA64PFR1_MTE 0x2 + /* id_aa64zfr0 */ #define ID_AA64ZFR0_F64MM_SHIFT 56 #define ID_AA64ZFR0_F32MM_SHIFT 52 @@ -920,6 +966,28 @@ #define CPACR_EL1_ZEN_EL0EN (BIT(17)) /* enable EL0 access, if EL1EN set */ #define CPACR_EL1_ZEN (CPACR_EL1_ZEN_EL1EN | CPACR_EL1_ZEN_EL0EN) +/* TCR EL1 Bit Definitions */ +#define SYS_TCR_EL1_TCMA1 (BIT(58)) +#define SYS_TCR_EL1_TCMA0 (BIT(57)) + +/* GCR_EL1 Definitions */ +#define SYS_GCR_EL1_RRND (BIT(16)) +#define SYS_GCR_EL1_EXCL_MASK 0xffffUL + +/* RGSR_EL1 Definitions */ +#define SYS_RGSR_EL1_TAG_MASK 0xfUL +#define SYS_RGSR_EL1_SEED_SHIFT 8 +#define SYS_RGSR_EL1_SEED_MASK 0xffffUL + +/* GMID_EL1 field definitions */ +#define SYS_GMID_EL1_BS_SHIFT 0 +#define SYS_GMID_EL1_BS_SIZE 4 + +/* TFSR{,E0}_EL1 bit definitions */ +#define SYS_TFSR_EL1_TF0_SHIFT 0 +#define SYS_TFSR_EL1_TF1_SHIFT 1 +#define SYS_TFSR_EL1_TF0 (UL(1) << SYS_TFSR_EL1_TF0_SHIFT) +#define SYS_TFSR_EL1_TF1 (UK(2) << SYS_TFSR_EL1_TF1_SHIFT) /* Safe value for MPIDR_EL1: Bit31:RES1, Bit30:U:0, Bit24:MT:0 */ #define SYS_MPIDR_SAFE_VAL (BIT(31)) @@ -943,6 +1011,7 @@ #include <linux/build_bug.h> #include <linux/types.h> +#include <asm/alternative.h> #define __DEFINE_MRS_MSR_S_REGNUM \ " .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30\n" \ @@ -1024,6 +1093,21 @@ write_sysreg(__scs_new, sysreg); \ } while (0) +#define sysreg_clear_set_s(sysreg, clear, set) do { \ + u64 __scs_val = read_sysreg_s(sysreg); \ + u64 __scs_new = (__scs_val & ~(u64)(clear)) | (set); \ + if (__scs_new != __scs_val) \ + write_sysreg_s(__scs_new, sysreg); \ +} while (0) + +#define read_sysreg_par() ({ \ + u64 par; \ + asm(ALTERNATIVE("nop", "dmb sy", ARM64_WORKAROUND_1508412)); \ + par = read_sysreg(par_el1); \ + asm(ALTERNATIVE("nop", "dmb sy", ARM64_WORKAROUND_1508412)); \ + par; \ +}) + #endif #endif /* __ASM_SYSREG_H */ diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index 5e784e16ee89..1fbab854a51b 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -67,6 +67,7 @@ void arch_release_task_struct(struct task_struct *tsk); #define TIF_FOREIGN_FPSTATE 3 /* CPU's FP state is not current's */ #define TIF_UPROBE 4 /* uprobe breakpoint or singlestep */ #define TIF_FSCHECK 5 /* Check FS is USER_DS on return */ +#define TIF_MTE_ASYNC_FAULT 6 /* MTE Asynchronous Tag Check Fault */ #define TIF_SYSCALL_TRACE 8 /* syscall trace active */ #define TIF_SYSCALL_AUDIT 9 /* syscall auditing */ #define TIF_SYSCALL_TRACEPOINT 10 /* syscall tracepoint for ftrace */ @@ -96,10 +97,11 @@ void arch_release_task_struct(struct task_struct *tsk); #define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) #define _TIF_32BIT (1 << TIF_32BIT) #define _TIF_SVE (1 << TIF_SVE) +#define _TIF_MTE_ASYNC_FAULT (1 << TIF_MTE_ASYNC_FAULT) #define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \ - _TIF_UPROBE | _TIF_FSCHECK) + _TIF_UPROBE | _TIF_FSCHECK | _TIF_MTE_ASYNC_FAULT) #define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \ diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h index e042f6527981..11a465243f66 100644 --- a/arch/arm64/include/asm/topology.h +++ b/arch/arm64/include/asm/topology.h @@ -26,7 +26,9 @@ void topology_scale_freq_tick(void); #endif /* CONFIG_ARM64_AMU_EXTN */ /* Replace task scheduler's default frequency-invariant accounting */ +#define arch_set_freq_scale topology_set_freq_scale #define arch_scale_freq_capacity topology_get_freq_scale +#define arch_scale_freq_invariant topology_scale_freq_invariant /* Replace task scheduler's default cpu-invariant accounting */ #define arch_scale_cpu_capacity topology_get_cpu_scale diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h index cee5928e1b7d..d96dc2c7c09d 100644 --- a/arch/arm64/include/asm/traps.h +++ b/arch/arm64/include/asm/traps.h @@ -24,7 +24,7 @@ struct undef_hook { void register_undef_hook(struct undef_hook *hook); void unregister_undef_hook(struct undef_hook *hook); -void force_signal_inject(int signal, int code, unsigned long address); +void force_signal_inject(int signal, int code, unsigned long address, unsigned int err); void arm64_notify_segfault(unsigned long addr); void arm64_force_sig_fault(int signo, int code, void __user *addr, const char *str); void arm64_force_sig_mceerr(int code, void __user *addr, short lsb, const char *str); diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h index 3b859596840d..b3b2019f8d16 100644 --- a/arch/arm64/include/asm/unistd.h +++ b/arch/arm64/include/asm/unistd.h @@ -38,7 +38,7 @@ #define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5) #define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800) -#define __NR_compat_syscalls 440 +#define __NR_compat_syscalls 441 #endif #define __ARCH_WANT_SYS_CLONE diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index 734860ac7cf9..107f08e03b9f 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -53,7 +53,7 @@ __SYSCALL(__NR_lseek, compat_sys_lseek) #define __NR_getpid 20 __SYSCALL(__NR_getpid, sys_getpid) #define __NR_mount 21 -__SYSCALL(__NR_mount, compat_sys_mount) +__SYSCALL(__NR_mount, sys_mount) /* 22 was sys_umount */ __SYSCALL(22, sys_ni_syscall) #define __NR_setuid 23 @@ -301,9 +301,9 @@ __SYSCALL(__NR_flock, sys_flock) #define __NR_msync 144 __SYSCALL(__NR_msync, sys_msync) #define __NR_readv 145 -__SYSCALL(__NR_readv, compat_sys_readv) +__SYSCALL(__NR_readv, sys_readv) #define __NR_writev 146 -__SYSCALL(__NR_writev, compat_sys_writev) +__SYSCALL(__NR_writev, sys_writev) #define __NR_getsid 147 __SYSCALL(__NR_getsid, sys_getsid) #define __NR_fdatasync 148 @@ -697,7 +697,7 @@ __SYSCALL(__NR_sync_file_range2, compat_sys_aarch32_sync_file_range2) #define __NR_tee 342 __SYSCALL(__NR_tee, sys_tee) #define __NR_vmsplice 343 -__SYSCALL(__NR_vmsplice, compat_sys_vmsplice) +__SYSCALL(__NR_vmsplice, sys_vmsplice) #define __NR_move_pages 344 __SYSCALL(__NR_move_pages, compat_sys_move_pages) #define __NR_getcpu 345 @@ -763,9 +763,9 @@ __SYSCALL(__NR_sendmmsg, compat_sys_sendmmsg) #define __NR_setns 375 __SYSCALL(__NR_setns, sys_setns) #define __NR_process_vm_readv 376 -__SYSCALL(__NR_process_vm_readv, compat_sys_process_vm_readv) +__SYSCALL(__NR_process_vm_readv, sys_process_vm_readv) #define __NR_process_vm_writev 377 -__SYSCALL(__NR_process_vm_writev, compat_sys_process_vm_writev) +__SYSCALL(__NR_process_vm_writev, sys_process_vm_writev) #define __NR_kcmp 378 __SYSCALL(__NR_kcmp, sys_kcmp) #define __NR_finit_module 379 @@ -887,6 +887,8 @@ __SYSCALL(__NR_openat2, sys_openat2) __SYSCALL(__NR_pidfd_getfd, sys_pidfd_getfd) #define __NR_faccessat2 439 __SYSCALL(__NR_faccessat2, sys_faccessat2) +#define __NR_process_madvise 440 +__SYSCALL(__NR_process_madvise, sys_process_madvise) /* * Please add new compat syscalls above this comment and update diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h index 09977acc007d..6069be50baf9 100644 --- a/arch/arm64/include/asm/virt.h +++ b/arch/arm64/include/asm/virt.h @@ -86,13 +86,12 @@ static inline bool is_kernel_in_hyp_mode(void) static __always_inline bool has_vhe(void) { /* - * The following macros are defined for code specic to VHE/nVHE. - * If has_vhe() is inlined into those compilation units, it can - * be determined statically. Otherwise fall back to caps. + * Code only run in VHE/NVHE hyp context can assume VHE is present or + * absent. Otherwise fall back to caps. */ - if (__is_defined(__KVM_VHE_HYPERVISOR__)) + if (is_vhe_hyp_code()) return true; - else if (__is_defined(__KVM_NVHE_HYPERVISOR__)) + else if (is_nvhe_hyp_code()) return false; else return cpus_have_final_cap(ARM64_HAS_VIRT_HOST_EXTN); |