diff options
Diffstat (limited to 'arch/arm64')
78 files changed, 1853 insertions, 1461 deletions
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 1652a9800ebe..340e61199057 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -101,6 +101,7 @@ config ARM64 select ARCH_WANT_HUGETLB_PAGE_OPTIMIZE_VMEMMAP select ARCH_WANT_LD_ORPHAN_WARN select ARCH_WANTS_NO_INSTR + select ARCH_WANTS_THP_SWAP if ARM64_4K_PAGES select ARCH_HAS_UBSAN_SANITIZE_ALL select ARM_AMBA select ARM_ARCH_TIMER @@ -126,6 +127,7 @@ config ARM64 select GENERIC_CPU_VULNERABILITIES select GENERIC_EARLY_IOREMAP select GENERIC_IDLE_POLL_SETUP + select GENERIC_IOREMAP select GENERIC_IRQ_IPI select GENERIC_IRQ_PROBE select GENERIC_IRQ_SHOW @@ -188,6 +190,7 @@ config ARM64 select HAVE_FUNCTION_GRAPH_TRACER select HAVE_GCC_PLUGINS select HAVE_HW_BREAKPOINT if PERF_EVENTS + select HAVE_IOREMAP_PROT select HAVE_IRQ_TIME_ACCOUNTING select HAVE_KVM select HAVE_NMI @@ -226,6 +229,7 @@ config ARM64 select THREAD_INFO_IN_TASK select HAVE_ARCH_USERFAULTFD_MINOR if USERFAULTFD select TRACE_IRQFLAGS_SUPPORT + select TRACE_IRQFLAGS_NMI_SUPPORT help ARM 64-bit (AArch64) Linux support. @@ -503,6 +507,22 @@ config ARM64_ERRATUM_834220 If unsure, say Y. +config ARM64_ERRATUM_1742098 + bool "Cortex-A57/A72: 1742098: ELR recorded incorrectly on interrupt taken between cryptographic instructions in a sequence" + depends on COMPAT + default y + help + This option removes the AES hwcap for aarch32 user-space to + workaround erratum 1742098 on Cortex-A57 and Cortex-A72. + + Affected parts may corrupt the AES state if an interrupt is + taken between a pair of AES instructions. These instructions + are only present if the cryptography extensions are present. + All software should have a fallback implementation for CPUs + that don't implement the cryptography extensions. + + If unsure, say Y. + config ARM64_ERRATUM_845719 bool "Cortex-A53: 845719: a load might read incorrect data" depends on COMPAT @@ -821,6 +841,23 @@ config ARM64_ERRATUM_2224489 If unsure, say Y. +config ARM64_ERRATUM_2441009 + bool "Cortex-A510: Completion of affected memory accesses might not be guaranteed by completion of a TLBI" + default y + select ARM64_WORKAROUND_REPEAT_TLBI + help + This option adds a workaround for ARM Cortex-A510 erratum #2441009. + + Under very rare circumstances, affected Cortex-A510 CPUs + may not handle a race between a break-before-make sequence on one + CPU, and another CPU accessing the same page. This could allow a + store to a page that has been unmapped. + + Work around this by adding the affected CPUs to the list that needs + TLB sequences to be done twice. + + If unsure, say Y. + config ARM64_ERRATUM_2064142 bool "Cortex-A510: 2064142: workaround TRBE register writes while disabled" depends on CORESIGHT_TRBE diff --git a/arch/arm64/boot/Makefile b/arch/arm64/boot/Makefile index ebe80faab883..a0e3dedd2883 100644 --- a/arch/arm64/boot/Makefile +++ b/arch/arm64/boot/Makefile @@ -16,7 +16,7 @@ OBJCOPYFLAGS_Image :=-O binary -R .note -R .note.gnu.build-id -R .comment -S -targets := Image Image.bz2 Image.gz Image.lz4 Image.lzma Image.lzo +targets := Image Image.bz2 Image.gz Image.lz4 Image.lzma Image.lzo Image.zst $(obj)/Image: vmlinux FORCE $(call if_changed,objcopy) @@ -35,3 +35,6 @@ $(obj)/Image.lzma: $(obj)/Image FORCE $(obj)/Image.lzo: $(obj)/Image FORCE $(call if_changed,lzo) + +$(obj)/Image.zst: $(obj)/Image FORCE + $(call if_changed,zstd) diff --git a/arch/arm64/include/asm/asm-extable.h b/arch/arm64/include/asm/asm-extable.h index c39f2437e08e..980d1dd8e1a3 100644 --- a/arch/arm64/include/asm/asm-extable.h +++ b/arch/arm64/include/asm/asm-extable.h @@ -2,12 +2,27 @@ #ifndef __ASM_ASM_EXTABLE_H #define __ASM_ASM_EXTABLE_H +#include <linux/bits.h> +#include <asm/gpr-num.h> + #define EX_TYPE_NONE 0 -#define EX_TYPE_FIXUP 1 -#define EX_TYPE_BPF 2 -#define EX_TYPE_UACCESS_ERR_ZERO 3 +#define EX_TYPE_BPF 1 +#define EX_TYPE_UACCESS_ERR_ZERO 2 +#define EX_TYPE_KACCESS_ERR_ZERO 3 #define EX_TYPE_LOAD_UNALIGNED_ZEROPAD 4 +/* Data fields for EX_TYPE_UACCESS_ERR_ZERO */ +#define EX_DATA_REG_ERR_SHIFT 0 +#define EX_DATA_REG_ERR GENMASK(4, 0) +#define EX_DATA_REG_ZERO_SHIFT 5 +#define EX_DATA_REG_ZERO GENMASK(9, 5) + +/* Data fields for EX_TYPE_LOAD_UNALIGNED_ZEROPAD */ +#define EX_DATA_REG_DATA_SHIFT 0 +#define EX_DATA_REG_DATA GENMASK(4, 0) +#define EX_DATA_REG_ADDR_SHIFT 5 +#define EX_DATA_REG_ADDR GENMASK(9, 5) + #ifdef __ASSEMBLY__ #define __ASM_EXTABLE_RAW(insn, fixup, type, data) \ @@ -19,31 +34,45 @@ .short (data); \ .popsection; +#define EX_DATA_REG(reg, gpr) \ + (.L__gpr_num_##gpr << EX_DATA_REG_##reg##_SHIFT) + +#define _ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, err, zero) \ + __ASM_EXTABLE_RAW(insn, fixup, \ + EX_TYPE_UACCESS_ERR_ZERO, \ + ( \ + EX_DATA_REG(ERR, err) | \ + EX_DATA_REG(ZERO, zero) \ + )) + +#define _ASM_EXTABLE_UACCESS_ERR(insn, fixup, err) \ + _ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, err, wzr) + +#define _ASM_EXTABLE_UACCESS(insn, fixup) \ + _ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, wzr, wzr) + /* - * Create an exception table entry for `insn`, which will branch to `fixup` + * Create an exception table entry for uaccess `insn`, which will branch to `fixup` * when an unhandled fault is taken. */ - .macro _asm_extable, insn, fixup - __ASM_EXTABLE_RAW(\insn, \fixup, EX_TYPE_FIXUP, 0) + .macro _asm_extable_uaccess, insn, fixup + _ASM_EXTABLE_UACCESS(\insn, \fixup) .endm /* * Create an exception table entry for `insn` if `fixup` is provided. Otherwise * do nothing. */ - .macro _cond_extable, insn, fixup - .ifnc \fixup, - _asm_extable \insn, \fixup + .macro _cond_uaccess_extable, insn, fixup + .ifnc \fixup, + _asm_extable_uaccess \insn, \fixup .endif .endm #else /* __ASSEMBLY__ */ -#include <linux/bits.h> #include <linux/stringify.h> -#include <asm/gpr-num.h> - #define __ASM_EXTABLE_RAW(insn, fixup, type, data) \ ".pushsection __ex_table, \"a\"\n" \ ".align 2\n" \ @@ -53,14 +82,6 @@ ".short (" data ")\n" \ ".popsection\n" -#define _ASM_EXTABLE(insn, fixup) \ - __ASM_EXTABLE_RAW(#insn, #fixup, __stringify(EX_TYPE_FIXUP), "0") - -#define EX_DATA_REG_ERR_SHIFT 0 -#define EX_DATA_REG_ERR GENMASK(4, 0) -#define EX_DATA_REG_ZERO_SHIFT 5 -#define EX_DATA_REG_ZERO GENMASK(9, 5) - #define EX_DATA_REG(reg, gpr) \ "((.L__gpr_num_" #gpr ") << " __stringify(EX_DATA_REG_##reg##_SHIFT) ")" @@ -73,13 +94,23 @@ EX_DATA_REG(ZERO, zero) \ ")") +#define _ASM_EXTABLE_KACCESS_ERR_ZERO(insn, fixup, err, zero) \ + __DEFINE_ASM_GPR_NUMS \ + __ASM_EXTABLE_RAW(#insn, #fixup, \ + __stringify(EX_TYPE_KACCESS_ERR_ZERO), \ + "(" \ + EX_DATA_REG(ERR, err) " | " \ + EX_DATA_REG(ZERO, zero) \ + ")") + #define _ASM_EXTABLE_UACCESS_ERR(insn, fixup, err) \ _ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, err, wzr) -#define EX_DATA_REG_DATA_SHIFT 0 -#define EX_DATA_REG_DATA GENMASK(4, 0) -#define EX_DATA_REG_ADDR_SHIFT 5 -#define EX_DATA_REG_ADDR GENMASK(9, 5) +#define _ASM_EXTABLE_UACCESS(insn, fixup) \ + _ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, wzr, wzr) + +#define _ASM_EXTABLE_KACCESS_ERR(insn, fixup, err) \ + _ASM_EXTABLE_KACCESS_ERR_ZERO(insn, fixup, err, wzr) #define _ASM_EXTABLE_LOAD_UNALIGNED_ZEROPAD(insn, fixup, data, addr) \ __DEFINE_ASM_GPR_NUMS \ diff --git a/arch/arm64/include/asm/asm-uaccess.h b/arch/arm64/include/asm/asm-uaccess.h index 0557af834e03..75b211c98dea 100644 --- a/arch/arm64/include/asm/asm-uaccess.h +++ b/arch/arm64/include/asm/asm-uaccess.h @@ -61,7 +61,7 @@ alternative_else_nop_endif #define USER(l, x...) \ 9999: x; \ - _asm_extable 9999b, l + _asm_extable_uaccess 9999b, l /* * Generate the assembly for LDTR/STTR with exception table entries. @@ -73,8 +73,8 @@ alternative_else_nop_endif 8889: ldtr \reg2, [\addr, #8]; add \addr, \addr, \post_inc; - _asm_extable 8888b,\l; - _asm_extable 8889b,\l; + _asm_extable_uaccess 8888b, \l; + _asm_extable_uaccess 8889b, \l; .endm .macro user_stp l, reg1, reg2, addr, post_inc @@ -82,14 +82,14 @@ alternative_else_nop_endif 8889: sttr \reg2, [\addr, #8]; add \addr, \addr, \post_inc; - _asm_extable 8888b,\l; - _asm_extable 8889b,\l; + _asm_extable_uaccess 8888b,\l; + _asm_extable_uaccess 8889b,\l; .endm .macro user_ldst l, inst, reg, addr, post_inc 8888: \inst \reg, [\addr]; add \addr, \addr, \post_inc; - _asm_extable 8888b,\l; + _asm_extable_uaccess 8888b, \l; .endm #endif diff --git a/arch/arm64/include/asm/asm_pointer_auth.h b/arch/arm64/include/asm/asm_pointer_auth.h index ead62f7dd269..13ecc79854ee 100644 --- a/arch/arm64/include/asm/asm_pointer_auth.h +++ b/arch/arm64/include/asm/asm_pointer_auth.h @@ -59,9 +59,9 @@ alternative_else_nop_endif .macro __ptrauth_keys_init_cpu tsk, tmp1, tmp2, tmp3 mrs \tmp1, id_aa64isar1_el1 - ubfx \tmp1, \tmp1, #ID_AA64ISAR1_APA_SHIFT, #8 + ubfx \tmp1, \tmp1, #ID_AA64ISAR1_EL1_APA_SHIFT, #8 mrs_s \tmp2, SYS_ID_AA64ISAR2_EL1 - ubfx \tmp2, \tmp2, #ID_AA64ISAR2_APA3_SHIFT, #4 + ubfx \tmp2, \tmp2, #ID_AA64ISAR2_EL1_APA3_SHIFT, #4 orr \tmp1, \tmp1, \tmp2 cbz \tmp1, .Lno_addr_auth\@ mov_q \tmp1, (SCTLR_ELx_ENIA | SCTLR_ELx_ENIB | \ diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 8c5a61aeaf8e..5846145be523 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -360,6 +360,20 @@ alternative_cb_end .endm /* + * idmap_get_t0sz - get the T0SZ value needed to cover the ID map + * + * Calculate the maximum allowed value for TCR_EL1.T0SZ so that the + * entire ID map region can be mapped. As T0SZ == (64 - #bits used), + * this number conveniently equals the number of leading zeroes in + * the physical address of _end. + */ + .macro idmap_get_t0sz, reg + adrp \reg, _end + orr \reg, \reg, #(1 << VA_BITS_MIN) - 1 + clz \reg, \reg + .endm + +/* * tcr_compute_pa_size - set TCR.(I)PS to the highest supported * ID_AA64MMFR0_EL1.PARange value * @@ -423,7 +437,7 @@ alternative_endif b.lo .Ldcache_op\@ dsb \domain - _cond_extable .Ldcache_op\@, \fixup + _cond_uaccess_extable .Ldcache_op\@, \fixup .endm /* @@ -462,7 +476,19 @@ alternative_endif dsb ish isb - _cond_extable .Licache_op\@, \fixup + _cond_uaccess_extable .Licache_op\@, \fixup + .endm + +/* + * load_ttbr1 - install @pgtbl as a TTBR1 page table + * pgtbl preserved + * tmp1/tmp2 clobbered, either may overlap with pgtbl + */ + .macro load_ttbr1, pgtbl, tmp1, tmp2 + phys_to_ttbr \tmp1, \pgtbl + offset_ttbr1 \tmp1, \tmp2 + msr ttbr1_el1, \tmp1 + isb .endm /* @@ -478,10 +504,7 @@ alternative_endif isb tlbi vmalle1 dsb nsh - phys_to_ttbr \tmp, \page_table - offset_ttbr1 \tmp, \tmp2 - msr ttbr1_el1, \tmp - isb + load_ttbr1 \page_table, \tmp, \tmp2 .endm /* diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index 9f3e2c3d2ca0..2cfc4245d2e2 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -50,13 +50,13 @@ #define pmr_sync() do {} while (0) #endif -#define mb() dsb(sy) -#define rmb() dsb(ld) -#define wmb() dsb(st) +#define __mb() dsb(sy) +#define __rmb() dsb(ld) +#define __wmb() dsb(st) -#define dma_mb() dmb(osh) -#define dma_rmb() dmb(oshld) -#define dma_wmb() dmb(oshst) +#define __dma_mb() dmb(osh) +#define __dma_rmb() dmb(oshld) +#define __dma_wmb() dmb(oshst) #define io_stop_wc() dgh() diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h index 7c2181c72116..ca9b487112cc 100644 --- a/arch/arm64/include/asm/cache.h +++ b/arch/arm64/include/asm/cache.h @@ -5,34 +5,9 @@ #ifndef __ASM_CACHE_H #define __ASM_CACHE_H -#include <asm/cputype.h> -#include <asm/mte-def.h> - -#define CTR_L1IP_SHIFT 14 -#define CTR_L1IP_MASK 3 -#define CTR_DMINLINE_SHIFT 16 -#define CTR_IMINLINE_SHIFT 0 -#define CTR_IMINLINE_MASK 0xf -#define CTR_ERG_SHIFT 20 -#define CTR_CWG_SHIFT 24 -#define CTR_CWG_MASK 15 -#define CTR_IDC_SHIFT 28 -#define CTR_DIC_SHIFT 29 - -#define CTR_CACHE_MINLINE_MASK \ - (0xf << CTR_DMINLINE_SHIFT | CTR_IMINLINE_MASK << CTR_IMINLINE_SHIFT) - -#define CTR_L1IP(ctr) (((ctr) >> CTR_L1IP_SHIFT) & CTR_L1IP_MASK) - -#define ICACHE_POLICY_VPIPT 0 -#define ICACHE_POLICY_RESERVED 1 -#define ICACHE_POLICY_VIPT 2 -#define ICACHE_POLICY_PIPT 3 - #define L1_CACHE_SHIFT (6) #define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) - #define CLIDR_LOUU_SHIFT 27 #define CLIDR_LOC_SHIFT 24 #define CLIDR_LOUIS_SHIFT 21 @@ -55,6 +30,10 @@ #include <linux/bitops.h> #include <linux/kasan-enabled.h> +#include <asm/cputype.h> +#include <asm/mte-def.h> +#include <asm/sysreg.h> + #ifdef CONFIG_KASAN_SW_TAGS #define ARCH_SLAB_MINALIGN (1ULL << KASAN_SHADOW_SCALE_SHIFT) #elif defined(CONFIG_KASAN_HW_TAGS) @@ -66,6 +45,12 @@ static inline unsigned int arch_slab_minalign(void) #define arch_slab_minalign() arch_slab_minalign() #endif +#define CTR_CACHE_MINLINE_MASK \ + (0xf << CTR_EL0_DMINLINE_SHIFT | \ + CTR_EL0_IMINLINE_MASK << CTR_EL0_IMINLINE_SHIFT) + +#define CTR_L1IP(ctr) SYS_FIELD_GET(CTR_EL0, L1Ip, ctr) + #define ICACHEF_ALIASING 0 #define ICACHEF_VPIPT 1 extern unsigned long __icache_flags; @@ -86,7 +71,7 @@ static __always_inline int icache_is_vpipt(void) static inline u32 cache_type_cwg(void) { - return (read_cpuid_cachetype() >> CTR_CWG_SHIFT) & CTR_CWG_MASK; + return (read_cpuid_cachetype() >> CTR_EL0_CWG_SHIFT) & CTR_EL0_CWG_MASK; } #define __read_mostly __section(".data..read_mostly") @@ -120,12 +105,12 @@ static inline u32 __attribute_const__ read_cpuid_effective_cachetype(void) { u32 ctr = read_cpuid_cachetype(); - if (!(ctr & BIT(CTR_IDC_SHIFT))) { + if (!(ctr & BIT(CTR_EL0_IDC_SHIFT))) { u64 clidr = read_sysreg(clidr_el1); if (CLIDR_LOC(clidr) == 0 || (CLIDR_LOUIS(clidr) == 0 && CLIDR_LOUU(clidr) == 0)) - ctr |= BIT(CTR_IDC_SHIFT); + ctr |= BIT(CTR_EL0_IDC_SHIFT); } return ctr; diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index 5a228e203ef9..37185e978aeb 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -105,13 +105,6 @@ static inline void flush_icache_range(unsigned long start, unsigned long end) #define flush_icache_range flush_icache_range /* - * Cache maintenance functions used by the DMA API. No to be used directly. - */ -extern void __dma_map_area(const void *, size_t, int); -extern void __dma_unmap_area(const void *, size_t, int); -extern void __dma_flush_area(const void *, size_t); - -/* * Copy user data from/to a page which is mapped into a different * processes address space. Really, we want to allow our "user * space" model to handle this. diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h index 115cdec1ae87..fd7a92219eea 100644 --- a/arch/arm64/include/asm/cpu.h +++ b/arch/arm64/include/asm/cpu.h @@ -46,6 +46,7 @@ struct cpuinfo_arm64 { u64 reg_midr; u64 reg_revidr; u64 reg_gmid; + u64 reg_smidr; u64 reg_id_aa64dfr0; u64 reg_id_aa64dfr1; diff --git a/arch/arm64/include/asm/cpu_ops.h b/arch/arm64/include/asm/cpu_ops.h index e95c4df83911..a444c8915e88 100644 --- a/arch/arm64/include/asm/cpu_ops.h +++ b/arch/arm64/include/asm/cpu_ops.h @@ -31,11 +31,6 @@ * @cpu_die: Makes a cpu leave the kernel. Must not fail. Called from the * cpu being killed. * @cpu_kill: Ensures a cpu has left the kernel. Called from another cpu. - * @cpu_init_idle: Reads any data necessary to initialize CPU idle states for - * a proposed logical id. - * @cpu_suspend: Suspends a cpu and saves the required context. May fail owing - * to wrong parameters or error conditions. Called from the - * CPU being suspended. Must be called with IRQs disabled. */ struct cpu_operations { const char *name; @@ -49,10 +44,6 @@ struct cpu_operations { void (*cpu_die)(unsigned int cpu); int (*cpu_kill)(unsigned int cpu); #endif -#ifdef CONFIG_CPU_IDLE - int (*cpu_init_idle)(unsigned int); - int (*cpu_suspend)(unsigned long); -#endif }; int __init init_cpu_ops(int cpu); diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 14a8f3d93add..fd7d75a275f6 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -11,7 +11,7 @@ #include <asm/hwcap.h> #include <asm/sysreg.h> -#define MAX_CPU_FEATURES 64 +#define MAX_CPU_FEATURES 128 #define cpu_feature(x) KERNEL_HWCAP_ ## x #ifndef __ASSEMBLY__ @@ -673,7 +673,7 @@ static inline bool supports_clearbhb(int scope) isar2 = read_sanitised_ftr_reg(SYS_ID_AA64ISAR2_EL1); return cpuid_feature_extract_unsigned_field(isar2, - ID_AA64ISAR2_CLEARBHB_SHIFT); + ID_AA64ISAR2_EL1_BC_SHIFT); } const struct cpumask *system_32bit_el0_cpumask(void); @@ -908,7 +908,10 @@ static inline unsigned int get_vmid_bits(u64 mmfr1) } extern struct arm64_ftr_override id_aa64mmfr1_override; +extern struct arm64_ftr_override id_aa64pfr0_override; extern struct arm64_ftr_override id_aa64pfr1_override; +extern struct arm64_ftr_override id_aa64zfr0_override; +extern struct arm64_ftr_override id_aa64smfr0_override; extern struct arm64_ftr_override id_aa64isar1_override; extern struct arm64_ftr_override id_aa64isar2_override; diff --git a/arch/arm64/include/asm/cpuidle.h b/arch/arm64/include/asm/cpuidle.h index 14a19d1141bd..2047713e097d 100644 --- a/arch/arm64/include/asm/cpuidle.h +++ b/arch/arm64/include/asm/cpuidle.h @@ -4,21 +4,6 @@ #include <asm/proc-fns.h> -#ifdef CONFIG_CPU_IDLE -extern int arm_cpuidle_init(unsigned int cpu); -extern int arm_cpuidle_suspend(int index); -#else -static inline int arm_cpuidle_init(unsigned int cpu) -{ - return -EOPNOTSUPP; -} - -static inline int arm_cpuidle_suspend(int index) -{ - return -EOPNOTSUPP; -} -#endif - #ifdef CONFIG_ARM64_PSEUDO_NMI #include <asm/arch_gicv3.h> diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index 34ceff08cac4..2630faa5bc08 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -129,64 +129,6 @@ msr cptr_el2, x0 // Disable copro. traps to EL2 .endm -/* SVE register access */ -.macro __init_el2_nvhe_sve - mrs x1, id_aa64pfr0_el1 - ubfx x1, x1, #ID_AA64PFR0_SVE_SHIFT, #4 - cbz x1, .Lskip_sve_\@ - - bic x0, x0, #CPTR_EL2_TZ // Also disable SVE traps - msr cptr_el2, x0 // Disable copro. traps to EL2 - isb - mov x1, #ZCR_ELx_LEN_MASK // SVE: Enable full vector - msr_s SYS_ZCR_EL2, x1 // length for EL1. -.Lskip_sve_\@: -.endm - -/* SME register access and priority mapping */ -.macro __init_el2_nvhe_sme - mrs x1, id_aa64pfr1_el1 - ubfx x1, x1, #ID_AA64PFR1_SME_SHIFT, #4 - cbz x1, .Lskip_sme_\@ - - bic x0, x0, #CPTR_EL2_TSM // Also disable SME traps - msr cptr_el2, x0 // Disable copro. traps to EL2 - isb - - mrs x1, sctlr_el2 - orr x1, x1, #SCTLR_ELx_ENTP2 // Disable TPIDR2 traps - msr sctlr_el2, x1 - isb - - mov x1, #0 // SMCR controls - - mrs_s x2, SYS_ID_AA64SMFR0_EL1 - ubfx x2, x2, #ID_AA64SMFR0_FA64_SHIFT, #1 // Full FP in SM? - cbz x2, .Lskip_sme_fa64_\@ - - orr x1, x1, SMCR_ELx_FA64_MASK -.Lskip_sme_fa64_\@: - - orr x1, x1, #SMCR_ELx_LEN_MASK // Enable full SME vector - msr_s SYS_SMCR_EL2, x1 // length for EL1. - - mrs_s x1, SYS_SMIDR_EL1 // Priority mapping supported? - ubfx x1, x1, #SMIDR_EL1_SMPS_SHIFT, #1 - cbz x1, .Lskip_sme_\@ - - msr_s SYS_SMPRIMAP_EL2, xzr // Make all priorities equal - - mrs x1, id_aa64mmfr1_el1 // HCRX_EL2 present? - ubfx x1, x1, #ID_AA64MMFR1_HCX_SHIFT, #4 - cbz x1, .Lskip_sme_\@ - - mrs_s x1, SYS_HCRX_EL2 - orr x1, x1, #HCRX_EL2_SMPME_MASK // Enable priority mapping - msr_s SYS_HCRX_EL2, x1 - -.Lskip_sme_\@: -.endm - /* Disable any fine grained traps */ .macro __init_el2_fgt mrs x1, id_aa64mmfr0_el1 @@ -250,8 +192,6 @@ __init_el2_hstr __init_el2_nvhe_idregs __init_el2_nvhe_cptr - __init_el2_nvhe_sve - __init_el2_nvhe_sme __init_el2_fgt __init_el2_nvhe_prepare_eret .endm diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h index daff882883f9..71ed5fdf718b 100644 --- a/arch/arm64/include/asm/fixmap.h +++ b/arch/arm64/include/asm/fixmap.h @@ -62,10 +62,12 @@ enum fixed_addresses { #endif /* CONFIG_ACPI_APEI_GHES */ #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 +#ifdef CONFIG_RELOCATABLE + FIX_ENTRY_TRAMP_TEXT4, /* one extra slot for the data page */ +#endif FIX_ENTRY_TRAMP_TEXT3, FIX_ENTRY_TRAMP_TEXT2, FIX_ENTRY_TRAMP_TEXT1, - FIX_ENTRY_TRAMP_DATA, #define TRAMP_VALIAS (__fix_to_virt(FIX_ENTRY_TRAMP_TEXT1)) #endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */ __end_of_permanent_fixed_addresses, diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h index aa443d8f8cfb..cef4ae7a3d8b 100644 --- a/arch/arm64/include/asm/hwcap.h +++ b/arch/arm64/include/asm/hwcap.h @@ -85,7 +85,7 @@ #define KERNEL_HWCAP_PACA __khwcap_feature(PACA) #define KERNEL_HWCAP_PACG __khwcap_feature(PACG) -#define __khwcap2_feature(x) (const_ilog2(HWCAP2_ ## x) + 32) +#define __khwcap2_feature(x) (const_ilog2(HWCAP2_ ## x) + 64) #define KERNEL_HWCAP_DCPODP __khwcap2_feature(DCPODP) #define KERNEL_HWCAP_SVE2 __khwcap2_feature(SVE2) #define KERNEL_HWCAP_SVEAES __khwcap2_feature(SVEAES) @@ -118,6 +118,7 @@ #define KERNEL_HWCAP_SME_F32F32 __khwcap2_feature(SME_F32F32) #define KERNEL_HWCAP_SME_FA64 __khwcap2_feature(SME_FA64) #define KERNEL_HWCAP_WFXT __khwcap2_feature(WFXT) +#define KERNEL_HWCAP_EBF16 __khwcap2_feature(EBF16) /* * This yields a mask that user programs can use to figure out what diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h index 3995652daf81..87dd42d74afe 100644 --- a/arch/arm64/include/asm/io.h +++ b/arch/arm64/include/asm/io.h @@ -163,13 +163,16 @@ extern void __memset_io(volatile void __iomem *, int, size_t); /* * I/O memory mapping functions. */ -extern void __iomem *__ioremap(phys_addr_t phys_addr, size_t size, pgprot_t prot); -extern void iounmap(volatile void __iomem *addr); -extern void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size); -#define ioremap(addr, size) __ioremap((addr), (size), __pgprot(PROT_DEVICE_nGnRE)) -#define ioremap_wc(addr, size) __ioremap((addr), (size), __pgprot(PROT_NORMAL_NC)) -#define ioremap_np(addr, size) __ioremap((addr), (size), __pgprot(PROT_DEVICE_nGnRnE)) +bool ioremap_allowed(phys_addr_t phys_addr, size_t size, unsigned long prot); +#define ioremap_allowed ioremap_allowed + +#define _PAGE_IOREMAP PROT_DEVICE_nGnRE + +#define ioremap_wc(addr, size) \ + ioremap_prot((addr), (size), PROT_NORMAL_NC) +#define ioremap_np(addr, size) \ + ioremap_prot((addr), (size), PROT_DEVICE_nGnRnE) /* * io{read,write}{16,32,64}be() macros @@ -184,6 +187,15 @@ extern void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size); #include <asm-generic/io.h> +#define ioremap_cache ioremap_cache +static inline void __iomem *ioremap_cache(phys_addr_t addr, size_t size) +{ + if (pfn_is_map_memory(__phys_to_pfn(addr))) + return (void __iomem *)__phys_to_virt(addr); + + return ioremap_prot(addr, size, PROT_NORMAL); +} + /* * More restrictive address range checking than the default implementation * (PHYS_OFFSET and PHYS_MASK taken into account). diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h index 96dc0f7da258..02e59fa8f293 100644 --- a/arch/arm64/include/asm/kernel-pgtable.h +++ b/arch/arm64/include/asm/kernel-pgtable.h @@ -8,6 +8,7 @@ #ifndef __ASM_KERNEL_PGTABLE_H #define __ASM_KERNEL_PGTABLE_H +#include <asm/boot.h> #include <asm/pgtable-hwdef.h> #include <asm/sparsemem.h> @@ -35,10 +36,8 @@ */ #if ARM64_KERNEL_USES_PMD_MAPS #define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS - 1) -#define IDMAP_PGTABLE_LEVELS (ARM64_HW_PGTABLE_LEVELS(PHYS_MASK_SHIFT) - 1) #else #define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS) -#define IDMAP_PGTABLE_LEVELS (ARM64_HW_PGTABLE_LEVELS(PHYS_MASK_SHIFT)) #endif @@ -87,7 +86,14 @@ + EARLY_PUDS((vstart), (vend)) /* each PUD needs a next level page table */ \ + EARLY_PMDS((vstart), (vend))) /* each PMD needs a next level page table */ #define INIT_DIR_SIZE (PAGE_SIZE * EARLY_PAGES(KIMAGE_VADDR, _end)) -#define IDMAP_DIR_SIZE (IDMAP_PGTABLE_LEVELS * PAGE_SIZE) + +/* the initial ID map may need two extra pages if it needs to be extended */ +#if VA_BITS < 48 +#define INIT_IDMAP_DIR_SIZE ((INIT_IDMAP_DIR_PAGES + 2) * PAGE_SIZE) +#else +#define INIT_IDMAP_DIR_SIZE (INIT_IDMAP_DIR_PAGES * PAGE_SIZE) +#endif +#define INIT_IDMAP_DIR_PAGES EARLY_PAGES(KIMAGE_VADDR, _end + MAX_FDT_SIZE + SWAPPER_BLOCK_SIZE) /* Initial memory map size */ #if ARM64_KERNEL_USES_PMD_MAPS @@ -107,9 +113,11 @@ #define SWAPPER_PMD_FLAGS (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S) #if ARM64_KERNEL_USES_PMD_MAPS -#define SWAPPER_MM_MMUFLAGS (PMD_ATTRINDX(MT_NORMAL) | SWAPPER_PMD_FLAGS) +#define SWAPPER_RW_MMUFLAGS (PMD_ATTRINDX(MT_NORMAL) | SWAPPER_PMD_FLAGS) +#define SWAPPER_RX_MMUFLAGS (SWAPPER_RW_MMUFLAGS | PMD_SECT_RDONLY) #else -#define SWAPPER_MM_MMUFLAGS (PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS) +#define SWAPPER_RW_MMUFLAGS (PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS) +#define SWAPPER_RX_MMUFLAGS (SWAPPER_RW_MMUFLAGS | PTE_RDONLY) #endif /* diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 0af70d9abede..227d256cd4b9 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -174,7 +174,11 @@ #include <linux/types.h> #include <asm/bug.h> +#if VA_BITS > 48 extern u64 vabits_actual; +#else +#define vabits_actual ((u64)VA_BITS) +#endif extern s64 memstart_addr; /* PHYS_OFFSET - the physical address of the start of memory. */ @@ -351,6 +355,11 @@ static inline void *phys_to_virt(phys_addr_t x) }) void dump_mem_limit(void); + +static inline bool defer_reserve_crashkernel(void) +{ + return IS_ENABLED(CONFIG_ZONE_DMA) || IS_ENABLED(CONFIG_ZONE_DMA32); +} #endif /* !ASSEMBLY */ /* diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index 6770667b34a3..c7ccd82db1d2 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -60,8 +60,7 @@ static inline void cpu_switch_mm(pgd_t *pgd, struct mm_struct *mm) * TCR_T0SZ(VA_BITS), unless system RAM is positioned very high in * physical memory, in which case it will be smaller. */ -extern u64 idmap_t0sz; -extern u64 idmap_ptrs_per_pgd; +extern int idmap_t0sz; /* * Ensure TCR.T0SZ is set to the provided value. @@ -106,13 +105,18 @@ static inline void cpu_uninstall_idmap(void) cpu_switch_mm(mm->pgd, mm); } -static inline void cpu_install_idmap(void) +static inline void __cpu_install_idmap(pgd_t *idmap) { cpu_set_reserved_ttbr0(); local_flush_tlb_all(); cpu_set_idmap_tcr_t0sz(); - cpu_switch_mm(lm_alias(idmap_pg_dir), &init_mm); + cpu_switch_mm(lm_alias(idmap), &init_mm); +} + +static inline void cpu_install_idmap(void) +{ + __cpu_install_idmap(idmap_pg_dir); } /* @@ -143,7 +147,7 @@ static inline void cpu_install_ttbr0(phys_addr_t ttbr0, unsigned long t0sz) * Atomically replaces the active TTBR1_EL1 PGD with a new VA-compatible PGD, * avoiding the possibility of conflicting TLB entries being allocated. */ -static inline void __nocfi cpu_replace_ttbr1(pgd_t *pgdp) +static inline void __nocfi cpu_replace_ttbr1(pgd_t *pgdp, pgd_t *idmap) { typedef void (ttbr_replace_func)(phys_addr_t); extern ttbr_replace_func idmap_cpu_replace_ttbr1; @@ -166,7 +170,7 @@ static inline void __nocfi cpu_replace_ttbr1(pgd_t *pgdp) replace_phys = (void *)__pa_symbol(function_nocfi(idmap_cpu_replace_ttbr1)); - cpu_install_idmap(); + __cpu_install_idmap(idmap); replace_phys(ttbr1); cpu_uninstall_idmap(); } diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index dd3d12bce07b..5ab8d163198f 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -281,10 +281,9 @@ */ #ifdef CONFIG_ARM64_PA_BITS_52 /* - * This should be GENMASK_ULL(47, 2). * TTBR_ELx[1] is RES0 in this configuration. */ -#define TTBR_BADDR_MASK_52 (((UL(1) << 46) - 1) << 2) +#define TTBR_BADDR_MASK_52 GENMASK_ULL(47, 2) #endif #ifdef CONFIG_ARM64_VA_BITS_52 diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 0b6632f18364..b5df82aa99e6 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -45,6 +45,12 @@ __flush_tlb_range(vma, addr, end, PUD_SIZE, false, 1) #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ +static inline bool arch_thp_swp_supported(void) +{ + return !system_supports_mte(); +} +#define arch_thp_swp_supported arch_thp_swp_supported + /* * Outside of a few very special situations (e.g. hibernation), we always * use broadcast TLB invalidation instructions, therefore a spurious page @@ -427,6 +433,16 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte) return clear_pte_bit(pte, __pgprot(PTE_SWP_EXCLUSIVE)); } +/* + * Select all bits except the pfn + */ +static inline pgprot_t pte_pgprot(pte_t pte) +{ + unsigned long pfn = pte_pfn(pte); + + return __pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^ pte_val(pte)); +} + #ifdef CONFIG_NUMA_BALANCING /* * See the comment in include/linux/pgtable.h diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 9e58749db21d..86eb0bfe3b38 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -272,8 +272,9 @@ void tls_preserve_current_state(void); static inline void start_thread_common(struct pt_regs *regs, unsigned long pc) { + s32 previous_syscall = regs->syscallno; memset(regs, 0, sizeof(*regs)); - forget_syscall(regs); + regs->syscallno = previous_syscall; regs->pc = pc; if (system_uses_irq_prio_masking()) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 42ff95dba6da..7c71358d44c4 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -192,8 +192,6 @@ #define SYS_ID_AA64PFR0_EL1 sys_reg(3, 0, 0, 4, 0) #define SYS_ID_AA64PFR1_EL1 sys_reg(3, 0, 0, 4, 1) -#define SYS_ID_AA64ZFR0_EL1 sys_reg(3, 0, 0, 4, 4) -#define SYS_ID_AA64SMFR0_EL1 sys_reg(3, 0, 0, 4, 5) #define SYS_ID_AA64DFR0_EL1 sys_reg(3, 0, 0, 5, 0) #define SYS_ID_AA64DFR1_EL1 sys_reg(3, 0, 0, 5, 1) @@ -201,9 +199,6 @@ #define SYS_ID_AA64AFR0_EL1 sys_reg(3, 0, 0, 5, 4) #define SYS_ID_AA64AFR1_EL1 sys_reg(3, 0, 0, 5, 5) -#define SYS_ID_AA64ISAR1_EL1 sys_reg(3, 0, 0, 6, 1) -#define SYS_ID_AA64ISAR2_EL1 sys_reg(3, 0, 0, 6, 2) - #define SYS_ID_AA64MMFR0_EL1 sys_reg(3, 0, 0, 7, 0) #define SYS_ID_AA64MMFR1_EL1 sys_reg(3, 0, 0, 7, 1) #define SYS_ID_AA64MMFR2_EL1 sys_reg(3, 0, 0, 7, 2) @@ -410,12 +405,6 @@ #define SYS_MAIR_EL1 sys_reg(3, 0, 10, 2, 0) #define SYS_AMAIR_EL1 sys_reg(3, 0, 10, 3, 0) -#define SYS_LORSA_EL1 sys_reg(3, 0, 10, 4, 0) -#define SYS_LOREA_EL1 sys_reg(3, 0, 10, 4, 1) -#define SYS_LORN_EL1 sys_reg(3, 0, 10, 4, 2) -#define SYS_LORC_EL1 sys_reg(3, 0, 10, 4, 3) -#define SYS_LORID_EL1 sys_reg(3, 0, 10, 4, 7) - #define SYS_VBAR_EL1 sys_reg(3, 0, 12, 0, 0) #define SYS_DISR_EL1 sys_reg(3, 0, 12, 1, 1) @@ -454,16 +443,12 @@ #define SYS_CNTKCTL_EL1 sys_reg(3, 0, 14, 1, 0) #define SYS_CCSIDR_EL1 sys_reg(3, 1, 0, 0, 0) -#define SYS_GMID_EL1 sys_reg(3, 1, 0, 0, 4) #define SYS_AIDR_EL1 sys_reg(3, 1, 0, 0, 7) #define SMIDR_EL1_IMPLEMENTER_SHIFT 24 #define SMIDR_EL1_SMPS_SHIFT 15 #define SMIDR_EL1_AFFINITY_SHIFT 0 -#define SYS_CTR_EL0 sys_reg(3, 3, 0, 0, 1) -#define SYS_DCZID_EL0 sys_reg(3, 3, 0, 0, 7) - #define SYS_RNDR_EL0 sys_reg(3, 3, 2, 4, 0) #define SYS_RNDRRS_EL0 sys_reg(3, 3, 2, 4, 1) @@ -704,66 +689,6 @@ /* Position the attr at the correct index */ #define MAIR_ATTRIDX(attr, idx) ((attr) << ((idx) * 8)) -/* id_aa64isar1 */ -#define ID_AA64ISAR1_I8MM_SHIFT 52 -#define ID_AA64ISAR1_DGH_SHIFT 48 -#define ID_AA64ISAR1_BF16_SHIFT 44 -#define ID_AA64ISAR1_SPECRES_SHIFT 40 -#define ID_AA64ISAR1_SB_SHIFT 36 -#define ID_AA64ISAR1_FRINTTS_SHIFT 32 -#define ID_AA64ISAR1_GPI_SHIFT 28 -#define ID_AA64ISAR1_GPA_SHIFT 24 -#define ID_AA64ISAR1_LRCPC_SHIFT 20 -#define ID_AA64ISAR1_FCMA_SHIFT 16 -#define ID_AA64ISAR1_JSCVT_SHIFT 12 -#define ID_AA64ISAR1_API_SHIFT 8 -#define ID_AA64ISAR1_APA_SHIFT 4 -#define ID_AA64ISAR1_DPB_SHIFT 0 - -#define ID_AA64ISAR1_APA_NI 0x0 -#define ID_AA64ISAR1_APA_ARCHITECTED 0x1 -#define ID_AA64ISAR1_APA_ARCH_EPAC 0x2 -#define ID_AA64ISAR1_APA_ARCH_EPAC2 0x3 -#define ID_AA64ISAR1_APA_ARCH_EPAC2_FPAC 0x4 -#define ID_AA64ISAR1_APA_ARCH_EPAC2_FPAC_CMB 0x5 -#define ID_AA64ISAR1_API_NI 0x0 -#define ID_AA64ISAR1_API_IMP_DEF 0x1 -#define ID_AA64ISAR1_API_IMP_DEF_EPAC 0x2 -#define ID_AA64ISAR1_API_IMP_DEF_EPAC2 0x3 -#define ID_AA64ISAR1_API_IMP_DEF_EPAC2_FPAC 0x4 -#define ID_AA64ISAR1_API_IMP_DEF_EPAC2_FPAC_CMB 0x5 -#define ID_AA64ISAR1_GPA_NI 0x0 -#define ID_AA64ISAR1_GPA_ARCHITECTED 0x1 -#define ID_AA64ISAR1_GPI_NI 0x0 -#define ID_AA64ISAR1_GPI_IMP_DEF 0x1 - -/* id_aa64isar2 */ -#define ID_AA64ISAR2_CLEARBHB_SHIFT 28 -#define ID_AA64ISAR2_APA3_SHIFT 12 -#define ID_AA64ISAR2_GPA3_SHIFT 8 -#define ID_AA64ISAR2_RPRES_SHIFT 4 -#define ID_AA64ISAR2_WFXT_SHIFT 0 - -#define ID_AA64ISAR2_RPRES_8BIT 0x0 -#define ID_AA64ISAR2_RPRES_12BIT 0x1 -/* - * Value 0x1 has been removed from the architecture, and is - * reserved, but has not yet been removed from the ARM ARM - * as of ARM DDI 0487G.b. - */ -#define ID_AA64ISAR2_WFXT_NI 0x0 -#define ID_AA64ISAR2_WFXT_SUPPORTED 0x2 - -#define ID_AA64ISAR2_APA3_NI 0x0 -#define ID_AA64ISAR2_APA3_ARCHITECTED 0x1 -#define ID_AA64ISAR2_APA3_ARCH_EPAC 0x2 -#define ID_AA64ISAR2_APA3_ARCH_EPAC2 0x3 -#define ID_AA64ISAR2_APA3_ARCH_EPAC2_FPAC 0x4 -#define ID_AA64ISAR2_APA3_ARCH_EPAC2_FPAC_CMB 0x5 - -#define ID_AA64ISAR2_GPA3_NI 0x0 -#define ID_AA64ISAR2_GPA3_ARCHITECTED 0x1 - /* id_aa64pfr0 */ #define ID_AA64PFR0_CSV3_SHIFT 60 #define ID_AA64PFR0_CSV2_SHIFT 56 @@ -811,45 +736,6 @@ #define ID_AA64PFR1_MTE 0x2 #define ID_AA64PFR1_MTE_ASYMM 0x3 -/* id_aa64zfr0 */ -#define ID_AA64ZFR0_F64MM_SHIFT 56 -#define ID_AA64ZFR0_F32MM_SHIFT 52 -#define ID_AA64ZFR0_I8MM_SHIFT 44 -#define ID_AA64ZFR0_SM4_SHIFT 40 -#define ID_AA64ZFR0_SHA3_SHIFT 32 -#define ID_AA64ZFR0_BF16_SHIFT 20 -#define ID_AA64ZFR0_BITPERM_SHIFT 16 -#define ID_AA64ZFR0_AES_SHIFT 4 -#define ID_AA64ZFR0_SVEVER_SHIFT 0 - -#define ID_AA64ZFR0_F64MM 0x1 -#define ID_AA64ZFR0_F32MM 0x1 -#define ID_AA64ZFR0_I8MM 0x1 -#define ID_AA64ZFR0_BF16 0x1 -#define ID_AA64ZFR0_SM4 0x1 -#define ID_AA64ZFR0_SHA3 0x1 -#define ID_AA64ZFR0_BITPERM 0x1 -#define ID_AA64ZFR0_AES 0x1 -#define ID_AA64ZFR0_AES_PMULL 0x2 -#define ID_AA64ZFR0_SVEVER_SVE2 0x1 - -/* id_aa64smfr0 */ -#define ID_AA64SMFR0_FA64_SHIFT 63 -#define ID_AA64SMFR0_I16I64_SHIFT 52 -#define ID_AA64SMFR0_F64F64_SHIFT 48 -#define ID_AA64SMFR0_I8I32_SHIFT 36 -#define ID_AA64SMFR0_F16F32_SHIFT 35 -#define ID_AA64SMFR0_B16F32_SHIFT 34 -#define ID_AA64SMFR0_F32F32_SHIFT 32 - -#define ID_AA64SMFR0_FA64 0x1 -#define ID_AA64SMFR0_I16I64 0xf -#define ID_AA64SMFR0_F64F64 0x1 -#define ID_AA64SMFR0_I8I32 0xf -#define ID_AA64SMFR0_F16F32 0x1 -#define ID_AA64SMFR0_B16F32 0x1 -#define ID_AA64SMFR0_F32F32 0x1 - /* id_aa64mmfr0 */ #define ID_AA64MMFR0_ECV_SHIFT 60 #define ID_AA64MMFR0_FGT_SHIFT 56 @@ -902,6 +788,7 @@ /* id_aa64mmfr1 */ #define ID_AA64MMFR1_ECBHB_SHIFT 60 +#define ID_AA64MMFR1_TIDCP1_SHIFT 52 #define ID_AA64MMFR1_HCX_SHIFT 40 #define ID_AA64MMFR1_AFP_SHIFT 44 #define ID_AA64MMFR1_ETS_SHIFT 36 @@ -918,6 +805,9 @@ #define ID_AA64MMFR1_VMIDBITS_8 0 #define ID_AA64MMFR1_VMIDBITS_16 2 +#define ID_AA64MMFR1_TIDCP1_NI 0 +#define ID_AA64MMFR1_TIDCP1_IMP 1 + /* id_aa64mmfr2 */ #define ID_AA64MMFR2_E0PD_SHIFT 60 #define ID_AA64MMFR2_EVT_SHIFT 56 @@ -1084,9 +974,6 @@ #define MVFR2_FPMISC_SHIFT 4 #define MVFR2_SIMDMISC_SHIFT 0 -#define DCZID_DZP_SHIFT 4 -#define DCZID_BS_SHIFT 0 - #define CPACR_EL1_FPEN_EL1EN (BIT(20)) /* enable EL1 access */ #define CPACR_EL1_FPEN_EL0EN (BIT(21)) /* enable EL0 access, if EL1EN set */ @@ -1121,8 +1008,8 @@ #define SYS_RGSR_EL1_SEED_MASK 0xffffUL /* GMID_EL1 field definitions */ -#define SYS_GMID_EL1_BS_SHIFT 0 -#define SYS_GMID_EL1_BS_SIZE 4 +#define GMID_EL1_BS_SHIFT 0 +#define GMID_EL1_BS_SIZE 4 /* TFSR{,E0}_EL1 bit definitions */ #define SYS_TFSR_EL1_TF0_SHIFT 0 @@ -1324,6 +1211,9 @@ #endif +#define SYS_FIELD_GET(reg, field, val) \ + FIELD_GET(reg##_##field##_MASK, val) + #define SYS_FIELD_PREP(reg, field, val) \ FIELD_PREP(reg##_##field##_MASK, val) diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 63f9c828f1a7..2fc9f0861769 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -232,34 +232,34 @@ static inline void __user *__uaccess_mask_ptr(const void __user *ptr) * The "__xxx_error" versions set the third argument to -EFAULT if an error * occurs, and leave it unchanged on success. */ -#define __get_mem_asm(load, reg, x, addr, err) \ +#define __get_mem_asm(load, reg, x, addr, err, type) \ asm volatile( \ "1: " load " " reg "1, [%2]\n" \ "2:\n" \ - _ASM_EXTABLE_UACCESS_ERR_ZERO(1b, 2b, %w0, %w1) \ + _ASM_EXTABLE_##type##ACCESS_ERR_ZERO(1b, 2b, %w0, %w1) \ : "+r" (err), "=&r" (x) \ : "r" (addr)) -#define __raw_get_mem(ldr, x, ptr, err) \ -do { \ - unsigned long __gu_val; \ - switch (sizeof(*(ptr))) { \ - case 1: \ - __get_mem_asm(ldr "b", "%w", __gu_val, (ptr), (err)); \ - break; \ - case 2: \ - __get_mem_asm(ldr "h", "%w", __gu_val, (ptr), (err)); \ - break; \ - case 4: \ - __get_mem_asm(ldr, "%w", __gu_val, (ptr), (err)); \ - break; \ - case 8: \ - __get_mem_asm(ldr, "%x", __gu_val, (ptr), (err)); \ - break; \ - default: \ - BUILD_BUG(); \ - } \ - (x) = (__force __typeof__(*(ptr)))__gu_val; \ +#define __raw_get_mem(ldr, x, ptr, err, type) \ +do { \ + unsigned long __gu_val; \ + switch (sizeof(*(ptr))) { \ + case 1: \ + __get_mem_asm(ldr "b", "%w", __gu_val, (ptr), (err), type); \ + break; \ + case 2: \ + __get_mem_asm(ldr "h", "%w", __gu_val, (ptr), (err), type); \ + break; \ + case 4: \ + __get_mem_asm(ldr, "%w", __gu_val, (ptr), (err), type); \ + break; \ + case 8: \ + __get_mem_asm(ldr, "%x", __gu_val, (ptr), (err), type); \ + break; \ + default: \ + BUILD_BUG(); \ + } \ + (x) = (__force __typeof__(*(ptr)))__gu_val; \ } while (0) /* @@ -274,7 +274,7 @@ do { \ __chk_user_ptr(ptr); \ \ uaccess_ttbr0_enable(); \ - __raw_get_mem("ldtr", __rgu_val, __rgu_ptr, err); \ + __raw_get_mem("ldtr", __rgu_val, __rgu_ptr, err, U); \ uaccess_ttbr0_disable(); \ \ (x) = __rgu_val; \ @@ -314,40 +314,40 @@ do { \ \ __uaccess_enable_tco_async(); \ __raw_get_mem("ldr", *((type *)(__gkn_dst)), \ - (__force type *)(__gkn_src), __gkn_err); \ + (__force type *)(__gkn_src), __gkn_err, K); \ __uaccess_disable_tco_async(); \ \ if (unlikely(__gkn_err)) \ goto err_label; \ } while (0) -#define __put_mem_asm(store, reg, x, addr, err) \ +#define __put_mem_asm(store, reg, x, addr, err, type) \ asm volatile( \ "1: " store " " reg "1, [%2]\n" \ "2:\n" \ - _ASM_EXTABLE_UACCESS_ERR(1b, 2b, %w0) \ + _ASM_EXTABLE_##type##ACCESS_ERR(1b, 2b, %w0) \ : "+r" (err) \ : "r" (x), "r" (addr)) -#define __raw_put_mem(str, x, ptr, err) \ -do { \ - __typeof__(*(ptr)) __pu_val = (x); \ - switch (sizeof(*(ptr))) { \ - case 1: \ - __put_mem_asm(str "b", "%w", __pu_val, (ptr), (err)); \ - break; \ - case 2: \ - __put_mem_asm(str "h", "%w", __pu_val, (ptr), (err)); \ - break; \ - case 4: \ - __put_mem_asm(str, "%w", __pu_val, (ptr), (err)); \ - break; \ - case 8: \ - __put_mem_asm(str, "%x", __pu_val, (ptr), (err)); \ - break; \ - default: \ - BUILD_BUG(); \ - } \ +#define __raw_put_mem(str, x, ptr, err, type) \ +do { \ + __typeof__(*(ptr)) __pu_val = (x); \ + switch (sizeof(*(ptr))) { \ + case 1: \ + __put_mem_asm(str "b", "%w", __pu_val, (ptr), (err), type); \ + break; \ + case 2: \ + __put_mem_asm(str "h", "%w", __pu_val, (ptr), (err), type); \ + break; \ + case 4: \ + __put_mem_asm(str, "%w", __pu_val, (ptr), (err), type); \ + break; \ + case 8: \ + __put_mem_asm(str, "%x", __pu_val, (ptr), (err), type); \ + break; \ + default: \ + BUILD_BUG(); \ + } \ } while (0) /* @@ -362,7 +362,7 @@ do { \ __chk_user_ptr(__rpu_ptr); \ \ uaccess_ttbr0_enable(); \ - __raw_put_mem("sttr", __rpu_val, __rpu_ptr, err); \ + __raw_put_mem("sttr", __rpu_val, __rpu_ptr, err, U); \ uaccess_ttbr0_disable(); \ } while (0) @@ -400,7 +400,7 @@ do { \ \ __uaccess_enable_tco_async(); \ __raw_put_mem("str", *((type *)(__pkn_src)), \ - (__force type *)(__pkn_dst), __pkn_err); \ + (__force type *)(__pkn_dst), __pkn_err, K); \ __uaccess_disable_tco_async(); \ \ if (unlikely(__pkn_err)) \ diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h index 0e80db4327b6..4eb601e7de50 100644 --- a/arch/arm64/include/asm/virt.h +++ b/arch/arm64/include/asm/virt.h @@ -36,9 +36,9 @@ #define HVC_RESET_VECTORS 2 /* - * HVC_VHE_RESTART - Upgrade the CPU from EL1 to EL2, if possible + * HVC_FINALISE_EL2 - Upgrade the CPU from EL1 to EL2, if possible */ -#define HVC_VHE_RESTART 3 +#define HVC_FINALISE_EL2 3 /* Max number of HYP stub hypercalls */ #define HVC_STUB_HCALL_NR 4 @@ -49,6 +49,13 @@ #define BOOT_CPU_MODE_EL1 (0xe11) #define BOOT_CPU_MODE_EL2 (0xe12) +/* + * Flags returned together with the boot mode, but not preserved in + * __boot_cpu_mode. Used by the idreg override code to work out the + * boot state. + */ +#define BOOT_CPU_FLAG_E2H BIT_ULL(32) + #ifndef __ASSEMBLY__ #include <asm/ptrace.h> diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h index 4bb2cc8ac446..1ad2568a2569 100644 --- a/arch/arm64/include/uapi/asm/hwcap.h +++ b/arch/arm64/include/uapi/asm/hwcap.h @@ -19,6 +19,9 @@ /* * HWCAP flags - for AT_HWCAP + * + * Bits 62 and 63 are reserved for use by libc. + * Bits 32-61 are unallocated for potential use by libc. */ #define HWCAP_FP (1 << 0) #define HWCAP_ASIMD (1 << 1) @@ -88,5 +91,6 @@ #define HWCAP2_SME_F32F32 (1 << 29) #define HWCAP2_SME_FA64 (1 << 30) #define HWCAP2_WFXT (1UL << 31) +#define HWCAP2_EBF16 (1UL << 32) #endif /* _UAPI__ASM_HWCAP_H */ diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index fa7981d0d917..1add7b01efa7 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -14,6 +14,11 @@ CFLAGS_REMOVE_return_address.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_syscall.o = -fstack-protector -fstack-protector-strong CFLAGS_syscall.o += -fno-stack-protector +# When KASAN is enabled, a stack trace is recorded for every alloc/free, which +# can significantly impact performance. Avoid instrumenting the stack trace +# collection code to minimize this impact. +KASAN_SANITIZE_stacktrace.o := n + # It's not safe to invoke KCOV when portions of the kernel environment aren't # available or are out-of-sync with HW state. Since `noinstr` doesn't always # inhibit KCOV instrumentation, disable it for the entire compilation unit. @@ -59,7 +64,7 @@ obj-$(CONFIG_ACPI) += acpi.o obj-$(CONFIG_ACPI_NUMA) += acpi_numa.o obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL) += acpi_parking_protocol.o obj-$(CONFIG_PARAVIRT) += paravirt.o -obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o +obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o pi/ obj-$(CONFIG_HIBERNATION) += hibernate.o hibernate-asm.o obj-$(CONFIG_ELF_CORE) += elfcore.o obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o relocate_kernel.o \ diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c index e4dea8db6924..a5a256e3f9fe 100644 --- a/arch/arm64/kernel/acpi.c +++ b/arch/arm64/kernel/acpi.c @@ -351,7 +351,7 @@ void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size) prot = __acpi_get_writethrough_mem_attribute(); } } - return __ioremap(phys, size, prot); + return ioremap_prot(phys, size, pgprot_val(prot)); } /* diff --git a/arch/arm64/kernel/acpi_numa.c b/arch/arm64/kernel/acpi_numa.c index fdfecf0991ce..e51535a5f939 100644 --- a/arch/arm64/kernel/acpi_numa.c +++ b/arch/arm64/kernel/acpi_numa.c @@ -109,7 +109,7 @@ void __init acpi_numa_gicc_affinity_init(struct acpi_srat_gicc_affinity *pa) pxm = pa->proximity_domain; node = acpi_map_pxm_to_node(pxm); - if (node == NUMA_NO_NODE || node >= MAX_NUMNODES) { + if (node == NUMA_NO_NODE) { pr_err("SRAT: Too many proximity domains %d\n", pxm); bad_srat(); return; diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c index 7bbf5104b7b7..9bcaa5eacf16 100644 --- a/arch/arm64/kernel/alternative.c +++ b/arch/arm64/kernel/alternative.c @@ -121,7 +121,7 @@ static void clean_dcache_range_nopatch(u64 start, u64 end) ctr_el0 = read_sanitised_ftr_reg(SYS_CTR_EL0); d_size = 4 << cpuid_feature_extract_unsigned_field(ctr_el0, - CTR_DMINLINE_SHIFT); + CTR_EL0_DminLine_SHIFT); cur = start & ~(d_size - 1); do { /* diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index 6875a16b09d2..fb0e7c7b2e20 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -59,6 +59,7 @@ struct insn_emulation { static LIST_HEAD(insn_emulation); static int nr_insn_emulated __initdata; static DEFINE_RAW_SPINLOCK(insn_emulation_lock); +static DEFINE_MUTEX(insn_emulation_mutex); static void register_emulation_hooks(struct insn_emulation_ops *ops) { @@ -207,10 +208,10 @@ static int emulation_proc_handler(struct ctl_table *table, int write, loff_t *ppos) { int ret = 0; - struct insn_emulation *insn = (struct insn_emulation *) table->data; + struct insn_emulation *insn = container_of(table->data, struct insn_emulation, current_mode); enum insn_emulation_mode prev_mode = insn->current_mode; - table->data = &insn->current_mode; + mutex_lock(&insn_emulation_mutex); ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); if (ret || !write || prev_mode == insn->current_mode) @@ -223,7 +224,7 @@ static int emulation_proc_handler(struct ctl_table *table, int write, update_insn_emulation_mode(insn, INSN_UNDEF); } ret: - table->data = insn; + mutex_unlock(&insn_emulation_mutex); return ret; } @@ -247,7 +248,7 @@ static void __init register_insn_emulation_sysctl(void) sysctl->maxlen = sizeof(int); sysctl->procname = insn->ops->name; - sysctl->data = insn; + sysctl->data = &insn->current_mode; sysctl->extra1 = &insn->min; sysctl->extra2 = &insn->max; sysctl->proc_handler = emulation_proc_handler; diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index c05cc3b6162e..7e6289e709fc 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -187,7 +187,7 @@ has_neoverse_n1_erratum_1542419(const struct arm64_cpu_capabilities *entry, int scope) { u32 midr = read_cpuid_id(); - bool has_dic = read_cpuid_cachetype() & BIT(CTR_DIC_SHIFT); + bool has_dic = read_cpuid_cachetype() & BIT(CTR_EL0_DIC_SHIFT); const struct midr_range range = MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1); WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); @@ -212,6 +212,12 @@ static const struct arm64_cpu_capabilities arm64_repeat_tlbi_list[] = { ERRATA_MIDR_RANGE(MIDR_QCOM_KRYO_4XX_GOLD, 0xc, 0xe, 0xf, 0xe), }, #endif +#ifdef CONFIG_ARM64_ERRATUM_2441009 + { + /* Cortex-A510 r0p0 -> r1p1. Fixed in r1p2 */ + ERRATA_MIDR_RANGE(MIDR_CORTEX_A510, 0, 0, 1, 1), + }, +#endif {}, }; #endif @@ -395,6 +401,14 @@ static struct midr_range trbe_write_out_of_range_cpus[] = { }; #endif /* CONFIG_ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE */ +#ifdef CONFIG_ARM64_ERRATUM_1742098 +static struct midr_range broken_aarch32_aes[] = { + MIDR_RANGE(MIDR_CORTEX_A57, 0, 1, 0xf, 0xf), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A72), + {}, +}; +#endif /* CONFIG_ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE */ + const struct arm64_cpu_capabilities arm64_errata[] = { #ifdef CONFIG_ARM64_WORKAROUND_CLEAN_CACHE { @@ -480,7 +494,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = { #endif #ifdef CONFIG_ARM64_WORKAROUND_REPEAT_TLBI { - .desc = "Qualcomm erratum 1009, or ARM erratum 1286807", + .desc = "Qualcomm erratum 1009, or ARM erratum 1286807, 2441009", .capability = ARM64_WORKAROUND_REPEAT_TLBI, .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, .matches = cpucap_multi_entry_cap_matches, @@ -658,6 +672,14 @@ const struct arm64_cpu_capabilities arm64_errata[] = { ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A510, 0, 0, 1) }, #endif +#ifdef CONFIG_ARM64_ERRATUM_1742098 + { + .desc = "ARM erratum 1742098", + .capability = ARM64_WORKAROUND_1742098, + CAP_MIDR_RANGE_LIST(broken_aarch32_aes), + .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, + }, +#endif { } }; diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 8d88433de81d..ad64cab0a2ba 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -79,6 +79,7 @@ #include <asm/cpufeature.h> #include <asm/cpu_ops.h> #include <asm/fpsimd.h> +#include <asm/hwcap.h> #include <asm/insn.h> #include <asm/kvm_host.h> #include <asm/mmu_context.h> @@ -91,7 +92,7 @@ #include <asm/virt.h> /* Kernel representation of AT_HWCAP and AT_HWCAP2 */ -static unsigned long elf_hwcap __read_mostly; +static DECLARE_BITMAP(elf_hwcap, MAX_CPU_FEATURES) __read_mostly; #ifdef CONFIG_COMPAT #define COMPAT_ELF_HWCAP_DEFAULT \ @@ -209,35 +210,35 @@ static const struct arm64_ftr_bits ftr_id_aa64isar0[] = { }; static const struct arm64_ftr_bits ftr_id_aa64isar1[] = { - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_I8MM_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_DGH_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_BF16_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_SPECRES_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_SB_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_FRINTTS_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_I8MM_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_DGH_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_BF16_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_SPECRES_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_SB_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_FRINTTS_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH), - FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_GPI_SHIFT, 4, 0), + FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_GPI_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH), - FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_GPA_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_LRCPC_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_FCMA_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_JSCVT_SHIFT, 4, 0), + FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_GPA_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_LRCPC_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_FCMA_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_JSCVT_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH), - FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_API_SHIFT, 4, 0), + FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_EL1_API_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH), - FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_APA_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_DPB_SHIFT, 4, 0), + FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_EL1_APA_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_DPB_SHIFT, 4, 0), ARM64_FTR_END, }; static const struct arm64_ftr_bits ftr_id_aa64isar2[] = { - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_HIGHER_SAFE, ID_AA64ISAR2_CLEARBHB_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_HIGHER_SAFE, ID_AA64ISAR2_EL1_BC_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH), - FTR_STRICT, FTR_EXACT, ID_AA64ISAR2_APA3_SHIFT, 4, 0), + FTR_STRICT, FTR_EXACT, ID_AA64ISAR2_EL1_APA3_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH), - FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_GPA3_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_RPRES_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_WFXT_SHIFT, 4, 0), + FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_GPA3_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_RPRES_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_WFxT_SHIFT, 4, 0), ARM64_FTR_END, }; @@ -276,41 +277,41 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = { static const struct arm64_ftr_bits ftr_id_aa64zfr0[] = { ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), - FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_F64MM_SHIFT, 4, 0), + FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_F64MM_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), - FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_F32MM_SHIFT, 4, 0), + FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_F32MM_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), - FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_I8MM_SHIFT, 4, 0), + FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_I8MM_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), - FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_SM4_SHIFT, 4, 0), + FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_SM4_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), - FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_SHA3_SHIFT, 4, 0), + FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_SHA3_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), - FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_BF16_SHIFT, 4, 0), + FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_BF16_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), - FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_BITPERM_SHIFT, 4, 0), + FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_BitPerm_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), - FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_AES_SHIFT, 4, 0), + FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_AES_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), - FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_SVEVER_SHIFT, 4, 0), + FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_SVEver_SHIFT, 4, 0), ARM64_FTR_END, }; static const struct arm64_ftr_bits ftr_id_aa64smfr0[] = { ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), - FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_FA64_SHIFT, 1, 0), + FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_FA64_SHIFT, 1, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), - FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_I16I64_SHIFT, 4, 0), + FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_I16I64_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), - FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_F64F64_SHIFT, 1, 0), + FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_F64F64_SHIFT, 1, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), - FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_I8I32_SHIFT, 4, 0), + FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_I8I32_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), - FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_F16F32_SHIFT, 1, 0), + FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_F16F32_SHIFT, 1, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), - FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_B16F32_SHIFT, 1, 0), + FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_B16F32_SHIFT, 1, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), - FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_F32F32_SHIFT, 1, 0), + FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_F32F32_SHIFT, 1, 0), ARM64_FTR_END, }; @@ -361,6 +362,7 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = { }; static const struct arm64_ftr_bits ftr_id_aa64mmfr1[] = { + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_TIDCP1_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_AFP_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_ETS_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_TWED_SHIFT, 4, 0), @@ -396,18 +398,18 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr2[] = { static const struct arm64_ftr_bits ftr_ctr[] = { ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, 31, 1, 1), /* RES1 */ - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_DIC_SHIFT, 1, 1), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_IDC_SHIFT, 1, 1), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_OR_ZERO_SAFE, CTR_CWG_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_OR_ZERO_SAFE, CTR_ERG_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_DMINLINE_SHIFT, 4, 1), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_EL0_DIC_SHIFT, 1, 1), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_EL0_IDC_SHIFT, 1, 1), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_OR_ZERO_SAFE, CTR_EL0_CWG_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_OR_ZERO_SAFE, CTR_EL0_ERG_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_EL0_DminLine_SHIFT, 4, 1), /* * Linux can handle differing I-cache policies. Userspace JITs will * make use of *minLine. * If we have differing I-cache policies, report it as the weakest - VIPT. */ - ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_EXACT, CTR_L1IP_SHIFT, 2, ICACHE_POLICY_VIPT), /* L1Ip */ - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_IMINLINE_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_EXACT, CTR_EL0_L1Ip_SHIFT, 2, CTR_EL0_L1Ip_VIPT), /* L1Ip */ + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_EL0_IminLine_SHIFT, 4, 0), ARM64_FTR_END, }; @@ -453,13 +455,13 @@ static const struct arm64_ftr_bits ftr_mvfr2[] = { }; static const struct arm64_ftr_bits ftr_dczid[] = { - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, DCZID_DZP_SHIFT, 1, 1), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, DCZID_BS_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, DCZID_EL0_DZP_SHIFT, 1, 1), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, DCZID_EL0_BS_SHIFT, 4, 0), ARM64_FTR_END, }; static const struct arm64_ftr_bits ftr_gmid[] = { - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, SYS_GMID_EL1_BS_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, GMID_EL1_BS_SHIFT, 4, 0), ARM64_FTR_END, }; @@ -561,7 +563,7 @@ static const struct arm64_ftr_bits ftr_id_pfr2[] = { static const struct arm64_ftr_bits ftr_id_dfr0[] = { /* [31:28] TraceFilt */ - S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_PERFMON_SHIFT, 4, 0xf), + S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_DFR0_PERFMON_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_MPROFDBG_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_MMAPTRC_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_COPTRC_SHIFT, 4, 0), @@ -631,7 +633,10 @@ static const struct arm64_ftr_bits ftr_raz[] = { __ARM64_FTR_REG_OVERRIDE(#id, id, table, &no_override) struct arm64_ftr_override __ro_after_init id_aa64mmfr1_override; +struct arm64_ftr_override __ro_after_init id_aa64pfr0_override; struct arm64_ftr_override __ro_after_init id_aa64pfr1_override; +struct arm64_ftr_override __ro_after_init id_aa64zfr0_override; +struct arm64_ftr_override __ro_after_init id_aa64smfr0_override; struct arm64_ftr_override __ro_after_init id_aa64isar1_override; struct arm64_ftr_override __ro_after_init id_aa64isar2_override; @@ -668,11 +673,14 @@ static const struct __ftr_reg_entry { ARM64_FTR_REG(SYS_ID_MMFR5_EL1, ftr_id_mmfr5), /* Op1 = 0, CRn = 0, CRm = 4 */ - ARM64_FTR_REG(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0), + ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0, + &id_aa64pfr0_override), ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64PFR1_EL1, ftr_id_aa64pfr1, &id_aa64pfr1_override), - ARM64_FTR_REG(SYS_ID_AA64ZFR0_EL1, ftr_id_aa64zfr0), - ARM64_FTR_REG(SYS_ID_AA64SMFR0_EL1, ftr_id_aa64smfr0), + ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64ZFR0_EL1, ftr_id_aa64zfr0, + &id_aa64zfr0_override), + ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64SMFR0_EL1, ftr_id_aa64smfr0, + &id_aa64smfr0_override), /* Op1 = 0, CRn = 0, CRm = 5 */ ARM64_FTR_REG(SYS_ID_AA64DFR0_EL1, ftr_id_aa64dfr0), @@ -993,15 +1001,24 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info) if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) init_32bit_cpu_features(&info->aarch32); - if (id_aa64pfr0_sve(info->reg_id_aa64pfr0)) { + if (IS_ENABLED(CONFIG_ARM64_SVE) && + id_aa64pfr0_sve(read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1))) { + info->reg_zcr = read_zcr_features(); init_cpu_ftr_reg(SYS_ZCR_EL1, info->reg_zcr); vec_init_vq_map(ARM64_VEC_SVE); } - if (id_aa64pfr1_sme(info->reg_id_aa64pfr1)) { + if (IS_ENABLED(CONFIG_ARM64_SME) && + id_aa64pfr1_sme(read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1))) { + info->reg_smcr = read_smcr_features(); + /* + * We mask out SMPS since even if the hardware + * supports priorities the kernel does not at present + * and we block access to them. + */ + info->reg_smidr = read_cpuid(SMIDR_EL1) & ~SMIDR_EL1_SMPS; init_cpu_ftr_reg(SYS_SMCR_EL1, info->reg_smcr); - if (IS_ENABLED(CONFIG_ARM64_SME)) - vec_init_vq_map(ARM64_VEC_SME); + vec_init_vq_map(ARM64_VEC_SME); } if (id_aa64pfr1_mte(info->reg_id_aa64pfr1)) @@ -1233,23 +1250,31 @@ void update_cpu_features(int cpu, taint |= check_update_ftr_reg(SYS_ID_AA64SMFR0_EL1, cpu, info->reg_id_aa64smfr0, boot->reg_id_aa64smfr0); - if (id_aa64pfr0_sve(info->reg_id_aa64pfr0)) { + if (IS_ENABLED(CONFIG_ARM64_SVE) && + id_aa64pfr0_sve(read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1))) { + info->reg_zcr = read_zcr_features(); taint |= check_update_ftr_reg(SYS_ZCR_EL1, cpu, info->reg_zcr, boot->reg_zcr); - /* Probe vector lengths, unless we already gave up on SVE */ - if (id_aa64pfr0_sve(read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1)) && - !system_capabilities_finalized()) + /* Probe vector lengths */ + if (!system_capabilities_finalized()) vec_update_vq_map(ARM64_VEC_SVE); } - if (id_aa64pfr1_sme(info->reg_id_aa64pfr1)) { + if (IS_ENABLED(CONFIG_ARM64_SME) && + id_aa64pfr1_sme(read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1))) { + info->reg_smcr = read_smcr_features(); + /* + * We mask out SMPS since even if the hardware + * supports priorities the kernel does not at present + * and we block access to them. + */ + info->reg_smidr = read_cpuid(SMIDR_EL1) & ~SMIDR_EL1_SMPS; taint |= check_update_ftr_reg(SYS_SMCR_EL1, cpu, info->reg_smcr, boot->reg_smcr); - /* Probe vector lengths, unless we already gave up on SME */ - if (id_aa64pfr1_sme(read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1)) && - !system_capabilities_finalized()) + /* Probe vector lengths */ + if (!system_capabilities_finalized()) vec_update_vq_map(ARM64_VEC_SME); } @@ -1480,7 +1505,7 @@ static bool has_cache_idc(const struct arm64_cpu_capabilities *entry, else ctr = read_cpuid_effective_cachetype(); - return ctr & BIT(CTR_IDC_SHIFT); + return ctr & BIT(CTR_EL0_IDC_SHIFT); } static void cpu_emulate_effective_ctr(const struct arm64_cpu_capabilities *__unused) @@ -1491,7 +1516,7 @@ static void cpu_emulate_effective_ctr(const struct arm64_cpu_capabilities *__unu * to the CTR_EL0 on this CPU and emulate it with the real/safe * value. */ - if (!(read_cpuid_cachetype() & BIT(CTR_IDC_SHIFT))) + if (!(read_cpuid_cachetype() & BIT(CTR_EL0_IDC_SHIFT))) sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCT, 0); } @@ -1505,7 +1530,7 @@ static bool has_cache_dic(const struct arm64_cpu_capabilities *entry, else ctr = read_cpuid_cachetype(); - return ctr & BIT(CTR_DIC_SHIFT); + return ctr & BIT(CTR_EL0_DIC_SHIFT); } static bool __maybe_unused @@ -1645,14 +1670,34 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry, } #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 +#define KPTI_NG_TEMP_VA (-(1UL << PMD_SHIFT)) + +extern +void create_kpti_ng_temp_pgd(pgd_t *pgdir, phys_addr_t phys, unsigned long virt, + phys_addr_t size, pgprot_t prot, + phys_addr_t (*pgtable_alloc)(int), int flags); + +static phys_addr_t kpti_ng_temp_alloc; + +static phys_addr_t kpti_ng_pgd_alloc(int shift) +{ + kpti_ng_temp_alloc -= PAGE_SIZE; + return kpti_ng_temp_alloc; +} + static void __nocfi kpti_install_ng_mappings(const struct arm64_cpu_capabilities *__unused) { - typedef void (kpti_remap_fn)(int, int, phys_addr_t); + typedef void (kpti_remap_fn)(int, int, phys_addr_t, unsigned long); extern kpti_remap_fn idmap_kpti_install_ng_mappings; kpti_remap_fn *remap_fn; int cpu = smp_processor_id(); + int levels = CONFIG_PGTABLE_LEVELS; + int order = order_base_2(levels); + u64 kpti_ng_temp_pgd_pa = 0; + pgd_t *kpti_ng_temp_pgd; + u64 alloc = 0; if (__this_cpu_read(this_cpu_vector) == vectors) { const char *v = arm64_get_bp_hardening_vector(EL1_VECTOR_KPTI); @@ -1670,12 +1715,40 @@ kpti_install_ng_mappings(const struct arm64_cpu_capabilities *__unused) remap_fn = (void *)__pa_symbol(function_nocfi(idmap_kpti_install_ng_mappings)); + if (!cpu) { + alloc = __get_free_pages(GFP_ATOMIC | __GFP_ZERO, order); + kpti_ng_temp_pgd = (pgd_t *)(alloc + (levels - 1) * PAGE_SIZE); + kpti_ng_temp_alloc = kpti_ng_temp_pgd_pa = __pa(kpti_ng_temp_pgd); + + // + // Create a minimal page table hierarchy that permits us to map + // the swapper page tables temporarily as we traverse them. + // + // The physical pages are laid out as follows: + // + // +--------+-/-------+-/------ +-\\--------+ + // : PTE[] : | PMD[] : | PUD[] : || PGD[] : + // +--------+-\-------+-\------ +-//--------+ + // ^ + // The first page is mapped into this hierarchy at a PMD_SHIFT + // aligned virtual address, so that we can manipulate the PTE + // level entries while the mapping is active. The first entry + // covers the PTE[] page itself, the remaining entries are free + // to be used as a ad-hoc fixmap. + // + create_kpti_ng_temp_pgd(kpti_ng_temp_pgd, __pa(alloc), + KPTI_NG_TEMP_VA, PAGE_SIZE, PAGE_KERNEL, + kpti_ng_pgd_alloc, 0); + } + cpu_install_idmap(); - remap_fn(cpu, num_online_cpus(), __pa_symbol(swapper_pg_dir)); + remap_fn(cpu, num_online_cpus(), kpti_ng_temp_pgd_pa, KPTI_NG_TEMP_VA); cpu_uninstall_idmap(); - if (!cpu) + if (!cpu) { + free_pages(alloc, order); arm64_use_ng_mappings = true; + } } #else static void @@ -1971,6 +2044,14 @@ static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap) } #endif /* CONFIG_ARM64_MTE */ +static void elf_hwcap_fixup(void) +{ +#ifdef CONFIG_ARM64_ERRATUM_1742098 + if (cpus_have_const_cap(ARM64_WORKAROUND_1742098)) + compat_elf_hwcap2 &= ~COMPAT_HWCAP2_AES; +#endif /* ARM64_ERRATUM_1742098 */ +} + #ifdef CONFIG_KVM static bool is_kvm_protected_mode(const struct arm64_cpu_capabilities *entry, int __unused) { @@ -1978,6 +2059,11 @@ static bool is_kvm_protected_mode(const struct arm64_cpu_capabilities *entry, in } #endif /* CONFIG_KVM */ +static void cpu_trap_el0_impdef(const struct arm64_cpu_capabilities *__unused) +{ + sysreg_clear_set(sctlr_el1, 0, SCTLR_EL1_TIDCP); +} + /* Internal helper functions to match cpu capability type */ static bool cpucap_late_cpu_optional(const struct arm64_cpu_capabilities *cap) @@ -2132,7 +2218,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64ISAR1_EL1, - .field_pos = ID_AA64ISAR1_DPB_SHIFT, + .field_pos = ID_AA64ISAR1_EL1_DPB_SHIFT, .field_width = 4, .min_field_value = 1, }, @@ -2143,7 +2229,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64ISAR1_EL1, .sign = FTR_UNSIGNED, - .field_pos = ID_AA64ISAR1_DPB_SHIFT, + .field_pos = ID_AA64ISAR1_EL1_DPB_SHIFT, .field_width = 4, .min_field_value = 2, }, @@ -2303,7 +2389,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64ISAR1_EL1, - .field_pos = ID_AA64ISAR1_SB_SHIFT, + .field_pos = ID_AA64ISAR1_EL1_SB_SHIFT, .field_width = 4, .sign = FTR_UNSIGNED, .min_field_value = 1, @@ -2315,9 +2401,9 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .type = ARM64_CPUCAP_BOOT_CPU_FEATURE, .sys_reg = SYS_ID_AA64ISAR1_EL1, .sign = FTR_UNSIGNED, - .field_pos = ID_AA64ISAR1_APA_SHIFT, + .field_pos = ID_AA64ISAR1_EL1_APA_SHIFT, .field_width = 4, - .min_field_value = ID_AA64ISAR1_APA_ARCHITECTED, + .min_field_value = ID_AA64ISAR1_EL1_APA_PAuth, .matches = has_address_auth_cpucap, }, { @@ -2326,9 +2412,9 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .type = ARM64_CPUCAP_BOOT_CPU_FEATURE, .sys_reg = SYS_ID_AA64ISAR2_EL1, .sign = FTR_UNSIGNED, - .field_pos = ID_AA64ISAR2_APA3_SHIFT, + .field_pos = ID_AA64ISAR2_EL1_APA3_SHIFT, .field_width = 4, - .min_field_value = ID_AA64ISAR2_APA3_ARCHITECTED, + .min_field_value = ID_AA64ISAR2_EL1_APA3_PAuth, .matches = has_address_auth_cpucap, }, { @@ -2337,9 +2423,9 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .type = ARM64_CPUCAP_BOOT_CPU_FEATURE, .sys_reg = SYS_ID_AA64ISAR1_EL1, .sign = FTR_UNSIGNED, - .field_pos = ID_AA64ISAR1_API_SHIFT, + .field_pos = ID_AA64ISAR1_EL1_API_SHIFT, .field_width = 4, - .min_field_value = ID_AA64ISAR1_API_IMP_DEF, + .min_field_value = ID_AA64ISAR1_EL1_API_PAuth, .matches = has_address_auth_cpucap, }, { @@ -2353,9 +2439,9 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .type = ARM64_CPUCAP_SYSTEM_FEATURE, .sys_reg = SYS_ID_AA64ISAR1_EL1, .sign = FTR_UNSIGNED, - .field_pos = ID_AA64ISAR1_GPA_SHIFT, + .field_pos = ID_AA64ISAR1_EL1_GPA_SHIFT, .field_width = 4, - .min_field_value = ID_AA64ISAR1_GPA_ARCHITECTED, + .min_field_value = ID_AA64ISAR1_EL1_GPA_IMP, .matches = has_cpuid_feature, }, { @@ -2364,9 +2450,9 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .type = ARM64_CPUCAP_SYSTEM_FEATURE, .sys_reg = SYS_ID_AA64ISAR2_EL1, .sign = FTR_UNSIGNED, - .field_pos = ID_AA64ISAR2_GPA3_SHIFT, + .field_pos = ID_AA64ISAR2_EL1_GPA3_SHIFT, .field_width = 4, - .min_field_value = ID_AA64ISAR2_GPA3_ARCHITECTED, + .min_field_value = ID_AA64ISAR2_EL1_GPA3_IMP, .matches = has_cpuid_feature, }, { @@ -2375,9 +2461,9 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .type = ARM64_CPUCAP_SYSTEM_FEATURE, .sys_reg = SYS_ID_AA64ISAR1_EL1, .sign = FTR_UNSIGNED, - .field_pos = ID_AA64ISAR1_GPI_SHIFT, + .field_pos = ID_AA64ISAR1_EL1_GPI_SHIFT, .field_width = 4, - .min_field_value = ID_AA64ISAR1_GPI_IMP_DEF, + .min_field_value = ID_AA64ISAR1_EL1_GPI_IMP, .matches = has_cpuid_feature, }, { @@ -2478,7 +2564,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .type = ARM64_CPUCAP_SYSTEM_FEATURE, .sys_reg = SYS_ID_AA64ISAR1_EL1, .sign = FTR_UNSIGNED, - .field_pos = ID_AA64ISAR1_LRCPC_SHIFT, + .field_pos = ID_AA64ISAR1_EL1_LRCPC_SHIFT, .field_width = 4, .matches = has_cpuid_feature, .min_field_value = 1, @@ -2503,9 +2589,9 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .capability = ARM64_SME_FA64, .sys_reg = SYS_ID_AA64SMFR0_EL1, .sign = FTR_UNSIGNED, - .field_pos = ID_AA64SMFR0_FA64_SHIFT, + .field_pos = ID_AA64SMFR0_EL1_FA64_SHIFT, .field_width = 1, - .min_field_value = ID_AA64SMFR0_FA64, + .min_field_value = ID_AA64SMFR0_EL1_FA64_IMP, .matches = has_cpuid_feature, .cpu_enable = fa64_kernel_enable, }, @@ -2516,10 +2602,22 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .type = ARM64_CPUCAP_SYSTEM_FEATURE, .sys_reg = SYS_ID_AA64ISAR2_EL1, .sign = FTR_UNSIGNED, - .field_pos = ID_AA64ISAR2_WFXT_SHIFT, + .field_pos = ID_AA64ISAR2_EL1_WFxT_SHIFT, .field_width = 4, .matches = has_cpuid_feature, - .min_field_value = ID_AA64ISAR2_WFXT_SUPPORTED, + .min_field_value = ID_AA64ISAR2_EL1_WFxT_IMP, + }, + { + .desc = "Trap EL0 IMPLEMENTATION DEFINED functionality", + .capability = ARM64_HAS_TIDCP1, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .sys_reg = SYS_ID_AA64MMFR1_EL1, + .sign = FTR_UNSIGNED, + .field_pos = ID_AA64MMFR1_TIDCP1_SHIFT, + .field_width = 4, + .min_field_value = ID_AA64MMFR1_TIDCP1_IMP, + .matches = has_cpuid_feature, + .cpu_enable = cpu_trap_el0_impdef, }, {}, }; @@ -2560,33 +2658,33 @@ static const struct arm64_cpu_capabilities arm64_features[] = { #ifdef CONFIG_ARM64_PTR_AUTH static const struct arm64_cpu_capabilities ptr_auth_hwcap_addr_matches[] = { { - HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_APA_SHIFT, + HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_APA_SHIFT, 4, FTR_UNSIGNED, - ID_AA64ISAR1_APA_ARCHITECTED) + ID_AA64ISAR1_EL1_APA_PAuth) }, { - HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_APA3_SHIFT, - 4, FTR_UNSIGNED, ID_AA64ISAR2_APA3_ARCHITECTED) + HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_EL1_APA3_SHIFT, + 4, FTR_UNSIGNED, ID_AA64ISAR2_EL1_APA3_PAuth) }, { - HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_API_SHIFT, - 4, FTR_UNSIGNED, ID_AA64ISAR1_API_IMP_DEF) + HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_API_SHIFT, + 4, FTR_UNSIGNED, ID_AA64ISAR1_EL1_API_PAuth) }, {}, }; static const struct arm64_cpu_capabilities ptr_auth_hwcap_gen_matches[] = { { - HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_GPA_SHIFT, - 4, FTR_UNSIGNED, ID_AA64ISAR1_GPA_ARCHITECTED) + HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_GPA_SHIFT, + 4, FTR_UNSIGNED, ID_AA64ISAR1_EL1_GPA_IMP) }, { - HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_GPA3_SHIFT, - 4, FTR_UNSIGNED, ID_AA64ISAR2_GPA3_ARCHITECTED) + HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_EL1_GPA3_SHIFT, + 4, FTR_UNSIGNED, ID_AA64ISAR2_EL1_GPA3_IMP) }, { - HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_GPI_SHIFT, - 4, FTR_UNSIGNED, ID_AA64ISAR1_GPI_IMP_DEF) + HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_GPI_SHIFT, + 4, FTR_UNSIGNED, ID_AA64ISAR1_EL1_GPI_IMP) }, {}, }; @@ -2614,30 +2712,31 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, 4, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_ASIMD), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, 4, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDHP), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_DIT_SHIFT, 4, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DIT), - HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DPB_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DCPOP), - HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DPB_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_DCPODP), - HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_JSCVT_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_JSCVT), - HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FCMA_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FCMA), - HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_LRCPC), - HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_ILRCPC), - HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FRINTTS_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FRINT), - HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_SB_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SB), - HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_BF16_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_BF16), - HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DGH_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DGH), - HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_I8MM_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_I8MM), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_DPB_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DCPOP), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_DPB_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_DCPODP), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_JSCVT_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_JSCVT), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_FCMA_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FCMA), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_LRCPC_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_LRCPC), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_LRCPC_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_ILRCPC), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_FRINTTS_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FRINT), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_SB_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SB), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_BF16_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_BF16), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_BF16_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_EBF16), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_DGH_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DGH), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_I8MM_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_I8MM), HWCAP_CAP(SYS_ID_AA64MMFR2_EL1, ID_AA64MMFR2_AT_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_USCAT), #ifdef CONFIG_ARM64_SVE HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_SVE_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR0_SVE, CAP_HWCAP, KERNEL_HWCAP_SVE), - HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_SVEVER_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_SVEVER_SVE2, CAP_HWCAP, KERNEL_HWCAP_SVE2), - HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_AES_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_AES, CAP_HWCAP, KERNEL_HWCAP_SVEAES), - HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_AES_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_AES_PMULL, CAP_HWCAP, KERNEL_HWCAP_SVEPMULL), - HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_BITPERM_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_BITPERM, CAP_HWCAP, KERNEL_HWCAP_SVEBITPERM), - HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_BF16_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_BF16, CAP_HWCAP, KERNEL_HWCAP_SVEBF16), - HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_SHA3_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_SHA3, CAP_HWCAP, KERNEL_HWCAP_SVESHA3), - HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_SM4_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_SM4, CAP_HWCAP, KERNEL_HWCAP_SVESM4), - HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_I8MM_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_I8MM, CAP_HWCAP, KERNEL_HWCAP_SVEI8MM), - HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_F32MM_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_F32MM, CAP_HWCAP, KERNEL_HWCAP_SVEF32MM), - HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_F64MM_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_F64MM, CAP_HWCAP, KERNEL_HWCAP_SVEF64MM), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_SVEver_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_SVEver_SVE2, CAP_HWCAP, KERNEL_HWCAP_SVE2), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_AES_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_AES_IMP, CAP_HWCAP, KERNEL_HWCAP_SVEAES), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_AES_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_AES_PMULL128, CAP_HWCAP, KERNEL_HWCAP_SVEPMULL), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_BitPerm_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_BitPerm_IMP, CAP_HWCAP, KERNEL_HWCAP_SVEBITPERM), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_BF16_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_BF16_IMP, CAP_HWCAP, KERNEL_HWCAP_SVEBF16), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_SHA3_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_SHA3_IMP, CAP_HWCAP, KERNEL_HWCAP_SVESHA3), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_SM4_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_SM4_IMP, CAP_HWCAP, KERNEL_HWCAP_SVESM4), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_I8MM_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_I8MM_IMP, CAP_HWCAP, KERNEL_HWCAP_SVEI8MM), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_F32MM_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_F32MM_IMP, CAP_HWCAP, KERNEL_HWCAP_SVEF32MM), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_F64MM_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_F64MM_IMP, CAP_HWCAP, KERNEL_HWCAP_SVEF64MM), #endif HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_SSBS_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_SSBS_PSTATE_INSNS, CAP_HWCAP, KERNEL_HWCAP_SSBS), #ifdef CONFIG_ARM64_BTI @@ -2653,17 +2752,17 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { #endif /* CONFIG_ARM64_MTE */ HWCAP_CAP(SYS_ID_AA64MMFR0_EL1, ID_AA64MMFR0_ECV_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ECV), HWCAP_CAP(SYS_ID_AA64MMFR1_EL1, ID_AA64MMFR1_AFP_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_AFP), - HWCAP_CAP(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_RPRES_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_RPRES), - HWCAP_CAP(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_WFXT_SHIFT, 4, FTR_UNSIGNED, ID_AA64ISAR2_WFXT_SUPPORTED, CAP_HWCAP, KERNEL_HWCAP_WFXT), + HWCAP_CAP(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_EL1_RPRES_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_RPRES), + HWCAP_CAP(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_EL1_WFxT_SHIFT, 4, FTR_UNSIGNED, ID_AA64ISAR2_EL1_WFxT_IMP, CAP_HWCAP, KERNEL_HWCAP_WFXT), #ifdef CONFIG_ARM64_SME HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_SME_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_SME, CAP_HWCAP, KERNEL_HWCAP_SME), - HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_FA64_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_FA64, CAP_HWCAP, KERNEL_HWCAP_SME_FA64), - HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_I16I64_SHIFT, 4, FTR_UNSIGNED, ID_AA64SMFR0_I16I64, CAP_HWCAP, KERNEL_HWCAP_SME_I16I64), - HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_F64F64_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_F64F64, CAP_HWCAP, KERNEL_HWCAP_SME_F64F64), - HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_I8I32_SHIFT, 4, FTR_UNSIGNED, ID_AA64SMFR0_I8I32, CAP_HWCAP, KERNEL_HWCAP_SME_I8I32), - HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_F16F32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_F16F32, CAP_HWCAP, KERNEL_HWCAP_SME_F16F32), - HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_B16F32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_B16F32, CAP_HWCAP, KERNEL_HWCAP_SME_B16F32), - HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_F32F32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_F32F32, CAP_HWCAP, KERNEL_HWCAP_SME_F32F32), + HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_FA64_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_FA64_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_FA64), + HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_I16I64_SHIFT, 4, FTR_UNSIGNED, ID_AA64SMFR0_EL1_I16I64_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I16I64), + HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_F64F64_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_F64F64_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F64F64), + HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_I8I32_SHIFT, 4, FTR_UNSIGNED, ID_AA64SMFR0_EL1_I8I32_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I8I32), + HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_F16F32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_F16F32_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F16F32), + HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_B16F32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_B16F32_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_B16F32), + HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_F32F32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_F32F32_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F32F32), #endif /* CONFIG_ARM64_SME */ {}, }; @@ -3098,14 +3197,12 @@ static bool __maybe_unused __system_matches_cap(unsigned int n) void cpu_set_feature(unsigned int num) { - WARN_ON(num >= MAX_CPU_FEATURES); - elf_hwcap |= BIT(num); + set_bit(num, elf_hwcap); } bool cpu_have_feature(unsigned int num) { - WARN_ON(num >= MAX_CPU_FEATURES); - return elf_hwcap & BIT(num); + return test_bit(num, elf_hwcap); } EXPORT_SYMBOL_GPL(cpu_have_feature); @@ -3116,12 +3213,12 @@ unsigned long cpu_get_elf_hwcap(void) * note that for userspace compatibility we guarantee that bits 62 * and 63 will always be returned as 0. */ - return lower_32_bits(elf_hwcap); + return elf_hwcap[0]; } unsigned long cpu_get_elf_hwcap2(void) { - return upper_32_bits(elf_hwcap); + return elf_hwcap[1]; } static void __init setup_system_capabilities(void) @@ -3143,8 +3240,10 @@ void __init setup_cpu_features(void) setup_system_capabilities(); setup_elf_hwcaps(arm64_elf_hwcaps); - if (system_supports_32bit_el0()) + if (system_supports_32bit_el0()) { setup_elf_hwcaps(compat_elf_hwcaps); + elf_hwcap_fixup(); + } if (system_uses_ttbr0_pan()) pr_info("emulated: Privileged Access Never (PAN) using TTBR0_EL1 switching\n"); @@ -3197,6 +3296,7 @@ static int enable_mismatched_32bit_el0(unsigned int cpu) cpu_active_mask); get_cpu_device(lucky_winner)->offline_disabled = true; setup_elf_hwcaps(compat_elf_hwcaps); + elf_hwcap_fixup(); pr_info("Asymmetric 32-bit EL0 support detected on CPU %u; CPU hot-unplug disabled on CPU %u\n", cpu, lucky_winner); return 0; @@ -3218,7 +3318,7 @@ subsys_initcall_sync(init_32bit_el0_mask); static void __maybe_unused cpu_enable_cnp(struct arm64_cpu_capabilities const *cap) { - cpu_replace_ttbr1(lm_alias(swapper_pg_dir)); + cpu_replace_ttbr1(lm_alias(swapper_pg_dir), idmap_pg_dir); } /* diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c index 3006f4324808..4150e308e99c 100644 --- a/arch/arm64/kernel/cpuidle.c +++ b/arch/arm64/kernel/cpuidle.c @@ -13,35 +13,6 @@ #include <linux/of_device.h> #include <linux/psci.h> -#include <asm/cpuidle.h> -#include <asm/cpu_ops.h> - -int arm_cpuidle_init(unsigned int cpu) -{ - const struct cpu_operations *ops = get_cpu_ops(cpu); - int ret = -EOPNOTSUPP; - - if (ops && ops->cpu_suspend && ops->cpu_init_idle) - ret = ops->cpu_init_idle(cpu); - - return ret; -} - -/** - * arm_cpuidle_suspend() - function to enter a low-power idle state - * @index: argument to pass to CPU suspend operations - * - * Return: 0 on success, -EOPNOTSUPP if CPU suspend hook not initialized, CPU - * operations back-end error code otherwise. - */ -int arm_cpuidle_suspend(int index) -{ - int cpu = smp_processor_id(); - const struct cpu_operations *ops = get_cpu_ops(cpu); - - return ops->cpu_suspend(index); -} - #ifdef CONFIG_ACPI #include <acpi/processor.h> diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 8eff0a34ffd4..d7702f39b4d3 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -33,12 +33,19 @@ DEFINE_PER_CPU(struct cpuinfo_arm64, cpu_data); static struct cpuinfo_arm64 boot_cpu_data; -static const char *icache_policy_str[] = { - [ICACHE_POLICY_VPIPT] = "VPIPT", - [ICACHE_POLICY_RESERVED] = "RESERVED/UNKNOWN", - [ICACHE_POLICY_VIPT] = "VIPT", - [ICACHE_POLICY_PIPT] = "PIPT", -}; +static inline const char *icache_policy_str(int l1ip) +{ + switch (l1ip) { + case CTR_EL0_L1Ip_VPIPT: + return "VPIPT"; + case CTR_EL0_L1Ip_VIPT: + return "VIPT"; + case CTR_EL0_L1Ip_PIPT: + return "PIPT"; + default: + return "RESERVED/UNKNOWN"; + } +} unsigned long __icache_flags; @@ -107,6 +114,7 @@ static const char *const hwcap_str[] = { [KERNEL_HWCAP_SME_F32F32] = "smef32f32", [KERNEL_HWCAP_SME_FA64] = "smefa64", [KERNEL_HWCAP_WFXT] = "wfxt", + [KERNEL_HWCAP_EBF16] = "ebf16", }; #ifdef CONFIG_COMPAT @@ -267,6 +275,7 @@ static struct kobj_type cpuregs_kobj_type = { CPUREGS_ATTR_RO(midr_el1, midr); CPUREGS_ATTR_RO(revidr_el1, revidr); +CPUREGS_ATTR_RO(smidr_el1, smidr); static struct attribute *cpuregs_id_attrs[] = { &cpuregs_attr_midr_el1.attr, @@ -279,6 +288,16 @@ static const struct attribute_group cpuregs_attr_group = { .name = "identification" }; +static struct attribute *sme_cpuregs_id_attrs[] = { + &cpuregs_attr_smidr_el1.attr, + NULL +}; + +static const struct attribute_group sme_cpuregs_attr_group = { + .attrs = sme_cpuregs_id_attrs, + .name = "identification" +}; + static int cpuid_cpu_online(unsigned int cpu) { int rc; @@ -296,6 +315,8 @@ static int cpuid_cpu_online(unsigned int cpu) rc = sysfs_create_group(&info->kobj, &cpuregs_attr_group); if (rc) kobject_del(&info->kobj); + if (system_supports_sme()) + rc = sysfs_merge_group(&info->kobj, &sme_cpuregs_attr_group); out: return rc; } @@ -342,19 +363,19 @@ static void cpuinfo_detect_icache_policy(struct cpuinfo_arm64 *info) u32 l1ip = CTR_L1IP(info->reg_ctr); switch (l1ip) { - case ICACHE_POLICY_PIPT: + case CTR_EL0_L1Ip_PIPT: break; - case ICACHE_POLICY_VPIPT: + case CTR_EL0_L1Ip_VPIPT: set_bit(ICACHEF_VPIPT, &__icache_flags); break; - case ICACHE_POLICY_RESERVED: - case ICACHE_POLICY_VIPT: + case CTR_EL0_L1Ip_VIPT: + default: /* Assume aliasing */ set_bit(ICACHEF_ALIASING, &__icache_flags); break; } - pr_info("Detected %s I-cache on CPU%d\n", icache_policy_str[l1ip], cpu); + pr_info("Detected %s I-cache on CPU%d\n", icache_policy_str(l1ip), cpu); } static void __cpuinfo_store_cpu_32bit(struct cpuinfo_32bit *info) @@ -418,14 +439,6 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) __cpuinfo_store_cpu_32bit(&info->aarch32); - if (IS_ENABLED(CONFIG_ARM64_SVE) && - id_aa64pfr0_sve(info->reg_id_aa64pfr0)) - info->reg_zcr = read_zcr_features(); - - if (IS_ENABLED(CONFIG_ARM64_SME) && - id_aa64pfr1_sme(info->reg_id_aa64pfr1)) - info->reg_smcr = read_smcr_features(); - cpuinfo_detect_icache_policy(info); } diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 5b82b9292400..254fe31c03a0 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -636,18 +636,28 @@ alternative_else_nop_endif */ .endm - .macro tramp_data_page dst - adr_l \dst, .entry.tramp.text - sub \dst, \dst, PAGE_SIZE - .endm - - .macro tramp_data_read_var dst, var -#ifdef CONFIG_RANDOMIZE_BASE - tramp_data_page \dst - add \dst, \dst, #:lo12:__entry_tramp_data_\var - ldr \dst, [\dst] + .macro tramp_data_read_var dst, var +#ifdef CONFIG_RELOCATABLE + ldr \dst, .L__tramp_data_\var + .ifndef .L__tramp_data_\var + .pushsection ".entry.tramp.rodata", "a", %progbits + .align 3 +.L__tramp_data_\var: + .quad \var + .popsection + .endif #else - ldr \dst, =\var + /* + * As !RELOCATABLE implies !RANDOMIZE_BASE the address is always a + * compile time constant (and hence not secret and not worth hiding). + * + * As statically allocated kernel code and data always live in the top + * 47 bits of the address space we can sign-extend bit 47 and avoid an + * instruction to load the upper 16 bits (which must be 0xFFFF). + */ + movz \dst, :abs_g2_s:\var + movk \dst, :abs_g1_nc:\var + movk \dst, :abs_g0_nc:\var #endif .endm @@ -695,7 +705,7 @@ alternative_else_nop_endif msr vbar_el1, x30 isb .else - ldr x30, =vectors + adr_l x30, vectors .endif // \kpti == 1 .if \bhb == BHB_MITIGATION_FW @@ -764,24 +774,7 @@ SYM_CODE_END(tramp_exit_native) SYM_CODE_START(tramp_exit_compat) tramp_exit 32 SYM_CODE_END(tramp_exit_compat) - - .ltorg .popsection // .entry.tramp.text -#ifdef CONFIG_RANDOMIZE_BASE - .pushsection ".rodata", "a" - .align PAGE_SHIFT -SYM_DATA_START(__entry_tramp_data_start) -__entry_tramp_data_vectors: - .quad vectors -#ifdef CONFIG_ARM_SDE_INTERFACE -__entry_tramp_data___sdei_asm_handler: - .quad __sdei_asm_handler -#endif /* CONFIG_ARM_SDE_INTERFACE */ -__entry_tramp_data_this_cpu_vector: - .quad this_cpu_vector -SYM_DATA_END(__entry_tramp_data_start) - .popsection // .rodata -#endif /* CONFIG_RANDOMIZE_BASE */ #endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */ /* @@ -932,7 +925,6 @@ NOKPROBE(call_on_irq_stack) * This clobbers x4, __sdei_handler() will restore this from firmware's * copy. */ -.ltorg .pushsection ".entry.tramp.text", "ax" SYM_CODE_START(__sdei_asm_entry_trampoline) mrs x4, ttbr1_el1 @@ -967,7 +959,6 @@ SYM_CODE_START(__sdei_asm_exit_trampoline) 1: sdei_handler_exit exit_mode=x2 SYM_CODE_END(__sdei_asm_exit_trampoline) NOKPROBE(__sdei_asm_exit_trampoline) - .ltorg .popsection // .entry.tramp.text #endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */ diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index aecf3071efdd..dd63ffc3a2fa 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -445,7 +445,6 @@ static void fpsimd_save(void) if (system_supports_sme()) { u64 *svcr = last->svcr; - *svcr = read_sysreg_s(SYS_SVCR); *svcr = read_sysreg_s(SYS_SVCR); diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 6a98f1a38c29..cefe6a73ee54 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -37,8 +37,6 @@ #include "efi-header.S" -#define __PHYS_OFFSET KERNEL_START - #if (PAGE_OFFSET & 0x1fffff) != 0 #error PAGE_OFFSET must be at least 2MB aligned #endif @@ -51,9 +49,6 @@ * MMU = off, D-cache = off, I-cache = on or off, * x0 = physical address to the FDT blob. * - * This code is mostly position independent so you call this at - * __pa(PAGE_OFFSET). - * * Note that the callee-saved registers are used for storing variables * that are useful before the MMU is enabled. The allocations are described * in the entry routines. @@ -82,25 +77,34 @@ * primary lowlevel boot path: * * Register Scope Purpose + * x20 primary_entry() .. __primary_switch() CPU boot mode * x21 primary_entry() .. start_kernel() FDT pointer passed at boot in x0 + * x22 create_idmap() .. start_kernel() ID map VA of the DT blob * x23 primary_entry() .. start_kernel() physical misalignment/KASLR offset - * x28 __create_page_tables() callee preserved temp register - * x19/x20 __primary_switch() callee preserved temp registers - * x24 __primary_switch() .. relocate_kernel() current RELR displacement + * x24 __primary_switch() linear map KASLR seed + * x25 primary_entry() .. start_kernel() supported VA size + * x28 create_idmap() callee preserved temp register */ SYM_CODE_START(primary_entry) bl preserve_boot_args bl init_kernel_el // w0=cpu_boot_mode - adrp x23, __PHYS_OFFSET - and x23, x23, MIN_KIMG_ALIGN - 1 // KASLR offset, defaults to 0 - bl set_cpu_boot_mode_flag - bl __create_page_tables + mov x20, x0 + bl create_idmap + /* * The following calls CPU setup code, see arch/arm64/mm/proc.S for * details. * On return, the CPU will be ready for the MMU to be turned on and * the TCR will have been set. */ +#if VA_BITS > 48 + mrs_s x0, SYS_ID_AA64MMFR2_EL1 + tst x0, #0xf << ID_AA64MMFR2_LVA_SHIFT + mov x0, #VA_BITS + mov x25, #VA_BITS_MIN + csel x25, x25, x0, eq + mov x0, x25 +#endif bl __cpu_setup // initialise processor b __primary_switch SYM_CODE_END(primary_entry) @@ -122,28 +126,16 @@ SYM_CODE_START_LOCAL(preserve_boot_args) b dcache_inval_poc // tail call SYM_CODE_END(preserve_boot_args) -/* - * Macro to create a table entry to the next page. - * - * tbl: page table address - * virt: virtual address - * shift: #imm page table shift - * ptrs: #imm pointers per table page - * - * Preserves: virt - * Corrupts: ptrs, tmp1, tmp2 - * Returns: tbl -> next level table page address - */ - .macro create_table_entry, tbl, virt, shift, ptrs, tmp1, tmp2 - add \tmp1, \tbl, #PAGE_SIZE - phys_to_pte \tmp2, \tmp1 - orr \tmp2, \tmp2, #PMD_TYPE_TABLE // address of next table and entry type - lsr \tmp1, \virt, #\shift - sub \ptrs, \ptrs, #1 - and \tmp1, \tmp1, \ptrs // table index - str \tmp2, [\tbl, \tmp1, lsl #3] - add \tbl, \tbl, #PAGE_SIZE // next level table page - .endm +SYM_FUNC_START_LOCAL(clear_page_tables) + /* + * Clear the init page tables. + */ + adrp x0, init_pg_dir + adrp x1, init_pg_end + sub x2, x1, x0 + mov x1, xzr + b __pi_memset // tail call +SYM_FUNC_END(clear_page_tables) /* * Macro to populate page table entries, these entries can be pointers to the next level @@ -179,31 +171,20 @@ SYM_CODE_END(preserve_boot_args) * vstart: virtual address of start of range * vend: virtual address of end of range - we map [vstart, vend] * shift: shift used to transform virtual address into index - * ptrs: number of entries in page table + * order: #imm 2log(number of entries in page table) * istart: index in table corresponding to vstart * iend: index in table corresponding to vend * count: On entry: how many extra entries were required in previous level, scales * our end index. * On exit: returns how many extra entries required for next page table level * - * Preserves: vstart, vend, shift, ptrs + * Preserves: vstart, vend * Returns: istart, iend, count */ - .macro compute_indices, vstart, vend, shift, ptrs, istart, iend, count - lsr \iend, \vend, \shift - mov \istart, \ptrs - sub \istart, \istart, #1 - and \iend, \iend, \istart // iend = (vend >> shift) & (ptrs - 1) - mov \istart, \ptrs - mul \istart, \istart, \count - add \iend, \iend, \istart // iend += count * ptrs - // our entries span multiple tables - - lsr \istart, \vstart, \shift - mov \count, \ptrs - sub \count, \count, #1 - and \istart, \istart, \count - + .macro compute_indices, vstart, vend, shift, order, istart, iend, count + ubfx \istart, \vstart, \shift, \order + ubfx \iend, \vend, \shift, \order + add \iend, \iend, \count, lsl \order sub \count, \iend, \istart .endm @@ -218,119 +199,116 @@ SYM_CODE_END(preserve_boot_args) * vend: virtual address of end of range - we map [vstart, vend - 1] * flags: flags to use to map last level entries * phys: physical address corresponding to vstart - physical memory is contiguous - * pgds: the number of pgd entries + * order: #imm 2log(number of entries in PGD table) + * + * If extra_shift is set, an extra level will be populated if the end address does + * not fit in 'extra_shift' bits. This assumes vend is in the TTBR0 range. * * Temporaries: istart, iend, tmp, count, sv - these need to be different registers * Preserves: vstart, flags * Corrupts: tbl, rtbl, vend, istart, iend, tmp, count, sv */ - .macro map_memory, tbl, rtbl, vstart, vend, flags, phys, pgds, istart, iend, tmp, count, sv + .macro map_memory, tbl, rtbl, vstart, vend, flags, phys, order, istart, iend, tmp, count, sv, extra_shift sub \vend, \vend, #1 add \rtbl, \tbl, #PAGE_SIZE - mov \sv, \rtbl mov \count, #0 - compute_indices \vstart, \vend, #PGDIR_SHIFT, \pgds, \istart, \iend, \count + + .ifnb \extra_shift + tst \vend, #~((1 << (\extra_shift)) - 1) + b.eq .L_\@ + compute_indices \vstart, \vend, #\extra_shift, #(PAGE_SHIFT - 3), \istart, \iend, \count + mov \sv, \rtbl populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp mov \tbl, \sv + .endif +.L_\@: + compute_indices \vstart, \vend, #PGDIR_SHIFT, #\order, \istart, \iend, \count mov \sv, \rtbl + populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp + mov \tbl, \sv #if SWAPPER_PGTABLE_LEVELS > 3 - compute_indices \vstart, \vend, #PUD_SHIFT, #PTRS_PER_PUD, \istart, \iend, \count + compute_indices \vstart, \vend, #PUD_SHIFT, #(PAGE_SHIFT - 3), \istart, \iend, \count + mov \sv, \rtbl populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp mov \tbl, \sv - mov \sv, \rtbl #endif #if SWAPPER_PGTABLE_LEVELS > 2 - compute_indices \vstart, \vend, #SWAPPER_TABLE_SHIFT, #PTRS_PER_PMD, \istart, \iend, \count + compute_indices \vstart, \vend, #SWAPPER_TABLE_SHIFT, #(PAGE_SHIFT - 3), \istart, \iend, \count + mov \sv, \rtbl populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp mov \tbl, \sv #endif - compute_indices \vstart, \vend, #SWAPPER_BLOCK_SHIFT, #PTRS_PER_PTE, \istart, \iend, \count - bic \count, \phys, #SWAPPER_BLOCK_SIZE - 1 - populate_entries \tbl, \count, \istart, \iend, \flags, #SWAPPER_BLOCK_SIZE, \tmp + compute_indices \vstart, \vend, #SWAPPER_BLOCK_SHIFT, #(PAGE_SHIFT - 3), \istart, \iend, \count + bic \rtbl, \phys, #SWAPPER_BLOCK_SIZE - 1 + populate_entries \tbl, \rtbl, \istart, \iend, \flags, #SWAPPER_BLOCK_SIZE, \tmp .endm /* - * Setup the initial page tables. We only setup the barest amount which is - * required to get the kernel running. The following sections are required: - * - identity mapping to enable the MMU (low address, TTBR0) - * - first few MB of the kernel linear mapping to jump to once the MMU has - * been enabled + * Remap a subregion created with the map_memory macro with modified attributes + * or output address. The entire remapped region must have been covered in the + * invocation of map_memory. + * + * x0: last level table address (returned in first argument to map_memory) + * x1: start VA of the existing mapping + * x2: start VA of the region to update + * x3: end VA of the region to update (exclusive) + * x4: start PA associated with the region to update + * x5: attributes to set on the updated region + * x6: order of the last level mappings */ -SYM_FUNC_START_LOCAL(__create_page_tables) - mov x28, lr +SYM_FUNC_START_LOCAL(remap_region) + sub x3, x3, #1 // make end inclusive - /* - * Invalidate the init page tables to avoid potential dirty cache lines - * being evicted. Other page tables are allocated in rodata as part of - * the kernel image, and thus are clean to the PoC per the boot - * protocol. - */ - adrp x0, init_pg_dir - adrp x1, init_pg_end - bl dcache_inval_poc + // Get the index offset for the start of the last level table + lsr x1, x1, x6 + bfi x1, xzr, #0, #PAGE_SHIFT - 3 - /* - * Clear the init page tables. - */ - adrp x0, init_pg_dir - adrp x1, init_pg_end - sub x1, x1, x0 -1: stp xzr, xzr, [x0], #16 - stp xzr, xzr, [x0], #16 - stp xzr, xzr, [x0], #16 - stp xzr, xzr, [x0], #16 - subs x1, x1, #64 - b.ne 1b + // Derive the start and end indexes into the last level table + // associated with the provided region + lsr x2, x2, x6 + lsr x3, x3, x6 + sub x2, x2, x1 + sub x3, x3, x1 - mov x7, SWAPPER_MM_MMUFLAGS + mov x1, #1 + lsl x6, x1, x6 // block size at this level - /* - * Create the identity mapping. - */ - adrp x0, idmap_pg_dir - adrp x3, __idmap_text_start // __pa(__idmap_text_start) - -#ifdef CONFIG_ARM64_VA_BITS_52 - mrs_s x6, SYS_ID_AA64MMFR2_EL1 - and x6, x6, #(0xf << ID_AA64MMFR2_LVA_SHIFT) - mov x5, #52 - cbnz x6, 1f -#endif - mov x5, #VA_BITS_MIN -1: - adr_l x6, vabits_actual - str x5, [x6] - dmb sy - dc ivac, x6 // Invalidate potentially stale cache line + populate_entries x0, x4, x2, x3, x5, x6, x7 + ret +SYM_FUNC_END(remap_region) +SYM_FUNC_START_LOCAL(create_idmap) + mov x28, lr /* - * VA_BITS may be too small to allow for an ID mapping to be created - * that covers system RAM if that is located sufficiently high in the - * physical address space. So for the ID map, use an extended virtual - * range in that case, and configure an additional translation level - * if needed. + * The ID map carries a 1:1 mapping of the physical address range + * covered by the loaded image, which could be anywhere in DRAM. This + * means that the required size of the VA (== PA) space is decided at + * boot time, and could be more than the configured size of the VA + * space for ordinary kernel and user space mappings. + * + * There are three cases to consider here: + * - 39 <= VA_BITS < 48, and the ID map needs up to 48 VA bits to cover + * the placement of the image. In this case, we configure one extra + * level of translation on the fly for the ID map only. (This case + * also covers 42-bit VA/52-bit PA on 64k pages). * - * Calculate the maximum allowed value for TCR_EL1.T0SZ so that the - * entire ID map region can be mapped. As T0SZ == (64 - #bits used), - * this number conveniently equals the number of leading zeroes in - * the physical address of __idmap_text_end. + * - VA_BITS == 48, and the ID map needs more than 48 VA bits. This can + * only happen when using 64k pages, in which case we need to extend + * the root level table rather than add a level. Note that we can + * treat this case as 'always extended' as long as we take care not + * to program an unsupported T0SZ value into the TCR register. + * + * - Combinations that would require two additional levels of + * translation are not supported, e.g., VA_BITS==36 on 16k pages, or + * VA_BITS==39/4k pages with 5-level paging, where the input address + * requires more than 47 or 48 bits, respectively. */ - adrp x5, __idmap_text_end - clz x5, x5 - cmp x5, TCR_T0SZ(VA_BITS_MIN) // default T0SZ small enough? - b.ge 1f // .. then skip VA range extension - - adr_l x6, idmap_t0sz - str x5, [x6] - dmb sy - dc ivac, x6 // Invalidate potentially stale cache line - #if (VA_BITS < 48) +#define IDMAP_PGD_ORDER (VA_BITS - PGDIR_SHIFT) #define EXTRA_SHIFT (PGDIR_SHIFT + PAGE_SHIFT - 3) -#define EXTRA_PTRS (1 << (PHYS_MASK_SHIFT - EXTRA_SHIFT)) /* * If VA_BITS < 48, we have to configure an additional table level. @@ -342,36 +320,40 @@ SYM_FUNC_START_LOCAL(__create_page_tables) #if VA_BITS != EXTRA_SHIFT #error "Mismatch between VA_BITS and page size/number of translation levels" #endif - - mov x4, EXTRA_PTRS - create_table_entry x0, x3, EXTRA_SHIFT, x4, x5, x6 #else +#define IDMAP_PGD_ORDER (PHYS_MASK_SHIFT - PGDIR_SHIFT) +#define EXTRA_SHIFT /* * If VA_BITS == 48, we don't have to configure an additional * translation level, but the top-level table has more entries. */ - mov x4, #1 << (PHYS_MASK_SHIFT - PGDIR_SHIFT) - str_l x4, idmap_ptrs_per_pgd, x5 #endif -1: - ldr_l x4, idmap_ptrs_per_pgd - adr_l x6, __idmap_text_end // __pa(__idmap_text_end) - - map_memory x0, x1, x3, x6, x7, x3, x4, x10, x11, x12, x13, x14 - - /* - * Map the kernel image (starting with PHYS_OFFSET). - */ - adrp x0, init_pg_dir - mov_q x5, KIMAGE_VADDR // compile time __va(_text) - add x5, x5, x23 // add KASLR displacement - mov x4, PTRS_PER_PGD - adrp x6, _end // runtime __pa(_end) - adrp x3, _text // runtime __pa(_text) - sub x6, x6, x3 // _end - _text - add x6, x6, x5 // runtime __va(_end) - - map_memory x0, x1, x5, x6, x7, x3, x4, x10, x11, x12, x13, x14 + adrp x0, init_idmap_pg_dir + adrp x3, _text + adrp x6, _end + MAX_FDT_SIZE + SWAPPER_BLOCK_SIZE + mov x7, SWAPPER_RX_MMUFLAGS + + map_memory x0, x1, x3, x6, x7, x3, IDMAP_PGD_ORDER, x10, x11, x12, x13, x14, EXTRA_SHIFT + + /* Remap the kernel page tables r/w in the ID map */ + adrp x1, _text + adrp x2, init_pg_dir + adrp x3, init_pg_end + bic x4, x2, #SWAPPER_BLOCK_SIZE - 1 + mov x5, SWAPPER_RW_MMUFLAGS + mov x6, #SWAPPER_BLOCK_SHIFT + bl remap_region + + /* Remap the FDT after the kernel image */ + adrp x1, _text + adrp x22, _end + SWAPPER_BLOCK_SIZE + bic x2, x22, #SWAPPER_BLOCK_SIZE - 1 + bfi x22, x21, #0, #SWAPPER_BLOCK_SHIFT // remapped FDT address + add x3, x2, #MAX_FDT_SIZE + SWAPPER_BLOCK_SIZE + bic x4, x21, #SWAPPER_BLOCK_SIZE - 1 + mov x5, SWAPPER_RW_MMUFLAGS + mov x6, #SWAPPER_BLOCK_SHIFT + bl remap_region /* * Since the page tables have been populated with non-cacheable @@ -380,16 +362,27 @@ SYM_FUNC_START_LOCAL(__create_page_tables) */ dmb sy - adrp x0, idmap_pg_dir - adrp x1, idmap_pg_end + adrp x0, init_idmap_pg_dir + adrp x1, init_idmap_pg_end bl dcache_inval_poc + ret x28 +SYM_FUNC_END(create_idmap) +SYM_FUNC_START_LOCAL(create_kernel_mapping) adrp x0, init_pg_dir - adrp x1, init_pg_end - bl dcache_inval_poc + mov_q x5, KIMAGE_VADDR // compile time __va(_text) + add x5, x5, x23 // add KASLR displacement + adrp x6, _end // runtime __pa(_end) + adrp x3, _text // runtime __pa(_text) + sub x6, x6, x3 // _end - _text + add x6, x6, x5 // runtime __va(_end) + mov x7, SWAPPER_RW_MMUFLAGS - ret x28 -SYM_FUNC_END(__create_page_tables) + map_memory x0, x1, x5, x6, x7, x3, (VA_BITS - PGDIR_SHIFT), x10, x11, x12, x13, x14 + + dsb ishst // sync with page table walker + ret +SYM_FUNC_END(create_kernel_mapping) /* * Initialize CPU registers with task-specific and cpu-specific context. @@ -420,7 +413,7 @@ SYM_FUNC_END(__create_page_tables) /* * The following fragment of code is executed with the MMU enabled. * - * x0 = __PHYS_OFFSET + * x0 = __pa(KERNEL_START) */ SYM_FUNC_START_LOCAL(__primary_switched) adr_l x4, init_task @@ -439,6 +432,9 @@ SYM_FUNC_START_LOCAL(__primary_switched) sub x4, x4, x0 // the kernel virtual and str_l x4, kimage_voffset, x5 // physical mappings + mov x0, x20 + bl set_cpu_boot_mode_flag + // Clear BSS adr_l x0, __bss_start mov x1, xzr @@ -447,35 +443,30 @@ SYM_FUNC_START_LOCAL(__primary_switched) bl __pi_memset dsb ishst // Make zero page visible to PTW +#if VA_BITS > 48 + adr_l x8, vabits_actual // Set this early so KASAN early init + str x25, [x8] // ... observes the correct value + dc civac, x8 // Make visible to booting secondaries +#endif + +#ifdef CONFIG_RANDOMIZE_BASE + adrp x5, memstart_offset_seed // Save KASLR linear map seed + strh w24, [x5, :lo12:memstart_offset_seed] +#endif #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) bl kasan_early_init #endif mov x0, x21 // pass FDT address in x0 bl early_fdt_map // Try mapping the FDT early + mov x0, x20 // pass the full boot status bl init_feature_override // Parse cpu feature overrides -#ifdef CONFIG_RANDOMIZE_BASE - tst x23, ~(MIN_KIMG_ALIGN - 1) // already running randomized? - b.ne 0f - bl kaslr_early_init // parse FDT for KASLR options - cbz x0, 0f // KASLR disabled? just proceed - orr x23, x23, x0 // record KASLR offset - ldp x29, x30, [sp], #16 // we must enable KASLR, return - ret // to __primary_switch() -0: -#endif - bl switch_to_vhe // Prefer VHE if possible + mov x0, x20 + bl finalise_el2 // Prefer VHE if possible ldp x29, x30, [sp], #16 bl start_kernel ASM_BUG() SYM_FUNC_END(__primary_switched) - .pushsection ".rodata", "a" -SYM_DATA_START(kimage_vaddr) - .quad _text -SYM_DATA_END(kimage_vaddr) -EXPORT_SYMBOL(kimage_vaddr) - .popsection - /* * end early head section, begin head code that is also used for * hotplug and needs to have the same protections as the text region @@ -490,8 +481,9 @@ EXPORT_SYMBOL(kimage_vaddr) * Since we cannot always rely on ERET synchronizing writes to sysregs (e.g. if * SCTLR_ELx.EOS is clear), we place an ISB prior to ERET. * - * Returns either BOOT_CPU_MODE_EL1 or BOOT_CPU_MODE_EL2 in w0 if - * booted in EL1 or EL2 respectively. + * Returns either BOOT_CPU_MODE_EL1 or BOOT_CPU_MODE_EL2 in x0 if + * booted in EL1 or EL2 respectively, with the top 32 bits containing + * potential context flags. These flags are *not* stored in __boot_cpu_mode. */ SYM_FUNC_START(init_kernel_el) mrs x0, CurrentEL @@ -520,6 +512,8 @@ SYM_INNER_LABEL(init_el2, SYM_L_LOCAL) msr vbar_el2, x0 isb + mov_q x1, INIT_SCTLR_EL1_MMU_OFF + /* * Fruity CPUs seem to have HCR_EL2.E2H set to RES1, * making it impossible to start in nVHE mode. Is that @@ -529,34 +523,19 @@ SYM_INNER_LABEL(init_el2, SYM_L_LOCAL) and x0, x0, #HCR_E2H cbz x0, 1f - /* Switching to VHE requires a sane SCTLR_EL1 as a start */ - mov_q x0, INIT_SCTLR_EL1_MMU_OFF - msr_s SYS_SCTLR_EL12, x0 - - /* - * Force an eret into a helper "function", and let it return - * to our original caller... This makes sure that we have - * initialised the basic PSTATE state. - */ - mov x0, #INIT_PSTATE_EL2 - msr spsr_el1, x0 - adr x0, __cpu_stick_to_vhe - msr elr_el1, x0 - eret + /* Set a sane SCTLR_EL1, the VHE way */ + msr_s SYS_SCTLR_EL12, x1 + mov x2, #BOOT_CPU_FLAG_E2H + b 2f 1: - mov_q x0, INIT_SCTLR_EL1_MMU_OFF - msr sctlr_el1, x0 - + msr sctlr_el1, x1 + mov x2, xzr +2: msr elr_el2, lr mov w0, #BOOT_CPU_MODE_EL2 + orr x0, x0, x2 eret - -__cpu_stick_to_vhe: - mov x0, #HVC_VHE_RESTART - hvc #0 - mov x0, #BOOT_CPU_MODE_EL2 - ret SYM_FUNC_END(init_kernel_el) /* @@ -569,52 +548,21 @@ SYM_FUNC_START_LOCAL(set_cpu_boot_mode_flag) b.ne 1f add x1, x1, #4 1: str w0, [x1] // Save CPU boot mode - dmb sy - dc ivac, x1 // Invalidate potentially stale cache line ret SYM_FUNC_END(set_cpu_boot_mode_flag) -/* - * These values are written with the MMU off, but read with the MMU on. - * Writers will invalidate the corresponding address, discarding up to a - * 'Cache Writeback Granule' (CWG) worth of data. The linker script ensures - * sufficient alignment that the CWG doesn't overlap another section. - */ - .pushsection ".mmuoff.data.write", "aw" -/* - * We need to find out the CPU boot mode long after boot, so we need to - * store it in a writable variable. - * - * This is not in .bss, because we set it sufficiently early that the boot-time - * zeroing of .bss would clobber it. - */ -SYM_DATA_START(__boot_cpu_mode) - .long BOOT_CPU_MODE_EL2 - .long BOOT_CPU_MODE_EL1 -SYM_DATA_END(__boot_cpu_mode) -/* - * The booting CPU updates the failed status @__early_cpu_boot_status, - * with MMU turned off. - */ -SYM_DATA_START(__early_cpu_boot_status) - .quad 0 -SYM_DATA_END(__early_cpu_boot_status) - - .popsection - /* * This provides a "holding pen" for platforms to hold all secondary * cores are held until we're ready for them to initialise. */ SYM_FUNC_START(secondary_holding_pen) bl init_kernel_el // w0=cpu_boot_mode - bl set_cpu_boot_mode_flag - mrs x0, mpidr_el1 + mrs x2, mpidr_el1 mov_q x1, MPIDR_HWID_BITMASK - and x0, x0, x1 + and x2, x2, x1 adr_l x3, secondary_holding_pen_release pen: ldr x4, [x3] - cmp x4, x0 + cmp x4, x2 b.eq secondary_startup wfe b pen @@ -626,7 +574,6 @@ SYM_FUNC_END(secondary_holding_pen) */ SYM_FUNC_START(secondary_entry) bl init_kernel_el // w0=cpu_boot_mode - bl set_cpu_boot_mode_flag b secondary_startup SYM_FUNC_END(secondary_entry) @@ -634,16 +581,24 @@ SYM_FUNC_START_LOCAL(secondary_startup) /* * Common entry point for secondary CPUs. */ - bl switch_to_vhe + mov x20, x0 // preserve boot mode + bl finalise_el2 bl __cpu_secondary_check52bitva +#if VA_BITS > 48 + ldr_l x0, vabits_actual +#endif bl __cpu_setup // initialise processor adrp x1, swapper_pg_dir + adrp x2, idmap_pg_dir bl __enable_mmu ldr x8, =__secondary_switched br x8 SYM_FUNC_END(secondary_startup) SYM_FUNC_START_LOCAL(__secondary_switched) + mov x0, x20 + bl set_cpu_boot_mode_flag + str_l xzr, __early_cpu_boot_status, x3 adr_l x5, vectors msr vbar_el1, x5 isb @@ -691,6 +646,7 @@ SYM_FUNC_END(__secondary_too_slow) * * x0 = SCTLR_EL1 value for turning on the MMU. * x1 = TTBR1_EL1 value + * x2 = ID map root table address * * Returns to the caller via x30/lr. This requires the caller to be covered * by the .idmap.text section. @@ -699,20 +655,15 @@ SYM_FUNC_END(__secondary_too_slow) * If it isn't, park the CPU */ SYM_FUNC_START(__enable_mmu) - mrs x2, ID_AA64MMFR0_EL1 - ubfx x2, x2, #ID_AA64MMFR0_TGRAN_SHIFT, 4 - cmp x2, #ID_AA64MMFR0_TGRAN_SUPPORTED_MIN + mrs x3, ID_AA64MMFR0_EL1 + ubfx x3, x3, #ID_AA64MMFR0_TGRAN_SHIFT, 4 + cmp x3, #ID_AA64MMFR0_TGRAN_SUPPORTED_MIN b.lt __no_granule_support - cmp x2, #ID_AA64MMFR0_TGRAN_SUPPORTED_MAX + cmp x3, #ID_AA64MMFR0_TGRAN_SUPPORTED_MAX b.gt __no_granule_support - update_early_cpu_boot_status 0, x2, x3 - adrp x2, idmap_pg_dir - phys_to_ttbr x1, x1 phys_to_ttbr x2, x2 msr ttbr0_el1, x2 // load TTBR0 - offset_ttbr1 x1, x3 - msr ttbr1_el1, x1 // load TTBR1 - isb + load_ttbr1 x1, x1, x3 set_sctlr_el1 x0 @@ -720,7 +671,7 @@ SYM_FUNC_START(__enable_mmu) SYM_FUNC_END(__enable_mmu) SYM_FUNC_START(__cpu_secondary_check52bitva) -#ifdef CONFIG_ARM64_VA_BITS_52 +#if VA_BITS > 48 ldr_l x0, vabits_actual cmp x0, #52 b.ne 2f @@ -755,13 +706,10 @@ SYM_FUNC_START_LOCAL(__relocate_kernel) * Iterate over each entry in the relocation table, and apply the * relocations in place. */ - ldr w9, =__rela_offset // offset to reloc table - ldr w10, =__rela_size // size of reloc table - + adr_l x9, __rela_start + adr_l x10, __rela_end mov_q x11, KIMAGE_VADDR // default virtual offset add x11, x11, x23 // actual virtual offset - add x9, x9, x11 // __va(.rela) - add x10, x9, x10 // __va(.rela) + sizeof(.rela) 0: cmp x9, x10 b.hs 1f @@ -804,21 +752,9 @@ SYM_FUNC_START_LOCAL(__relocate_kernel) * entry in x9, the address being relocated by the current address or * bitmap entry in x13 and the address being relocated by the current * bit in x14. - * - * Because addends are stored in place in the binary, RELR relocations - * cannot be applied idempotently. We use x24 to keep track of the - * currently applied displacement so that we can correctly relocate if - * __relocate_kernel is called twice with non-zero displacements (i.e. - * if there is both a physical misalignment and a KASLR displacement). */ - ldr w9, =__relr_offset // offset to reloc table - ldr w10, =__relr_size // size of reloc table - add x9, x9, x11 // __va(.relr) - add x10, x9, x10 // __va(.relr) + sizeof(.relr) - - sub x15, x23, x24 // delta from previous offset - cbz x15, 7f // nothing to do if unchanged - mov x24, x23 // save new offset + adr_l x9, __relr_start + adr_l x10, __relr_end 2: cmp x9, x10 b.hs 7f @@ -826,7 +762,7 @@ SYM_FUNC_START_LOCAL(__relocate_kernel) tbnz x11, #0, 3f // branch to handle bitmaps add x13, x11, x23 ldr x12, [x13] // relocate address entry - add x12, x12, x15 + add x12, x12, x23 str x12, [x13], #8 // adjust to start of bitmap b 2b @@ -835,7 +771,7 @@ SYM_FUNC_START_LOCAL(__relocate_kernel) cbz x11, 6f tbz x11, #0, 5f // skip bit if not set ldr x12, [x14] // relocate bit - add x12, x12, x15 + add x12, x12, x23 str x12, [x14] 5: add x14, x14, #8 // move to next bit's address @@ -856,43 +792,32 @@ SYM_FUNC_END(__relocate_kernel) #endif SYM_FUNC_START_LOCAL(__primary_switch) + adrp x1, reserved_pg_dir + adrp x2, init_idmap_pg_dir + bl __enable_mmu +#ifdef CONFIG_RELOCATABLE + adrp x23, KERNEL_START + and x23, x23, MIN_KIMG_ALIGN - 1 #ifdef CONFIG_RANDOMIZE_BASE - mov x19, x0 // preserve new SCTLR_EL1 value - mrs x20, sctlr_el1 // preserve old SCTLR_EL1 value + mov x0, x22 + adrp x1, init_pg_end + mov sp, x1 + mov x29, xzr + bl __pi_kaslr_early_init + and x24, x0, #SZ_2M - 1 // capture memstart offset seed + bic x0, x0, #SZ_2M - 1 + orr x23, x23, x0 // record kernel offset +#endif #endif + bl clear_page_tables + bl create_kernel_mapping adrp x1, init_pg_dir - bl __enable_mmu + load_ttbr1 x1, x1, x2 #ifdef CONFIG_RELOCATABLE -#ifdef CONFIG_RELR - mov x24, #0 // no RELR displacement yet -#endif bl __relocate_kernel -#ifdef CONFIG_RANDOMIZE_BASE - ldr x8, =__primary_switched - adrp x0, __PHYS_OFFSET - blr x8 - - /* - * If we return here, we have a KASLR displacement in x23 which we need - * to take into account by discarding the current kernel mapping and - * creating a new one. - */ - pre_disable_mmu_workaround - msr sctlr_el1, x20 // disable the MMU - isb - bl __create_page_tables // recreate kernel mapping - - tlbi vmalle1 // Remove any stale TLB entries - dsb nsh - isb - - set_sctlr_el1 x19 // re-enable the MMU - - bl __relocate_kernel -#endif #endif ldr x8, =__primary_switched - adrp x0, __PHYS_OFFSET + adrp x0, KERNEL_START // __pa(KERNEL_START) br x8 SYM_FUNC_END(__primary_switch) diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index 2e248342476e..af5df48ba915 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -300,11 +300,6 @@ static void swsusp_mte_restore_tags(void) unsigned long pfn = xa_state.xa_index; struct page *page = pfn_to_online_page(pfn); - /* - * It is not required to invoke page_kasan_tag_reset(page) - * at this point since the tags stored in page->flags are - * already restored. - */ mte_restore_page_tags(page_address(page), tags); mte_free_tag_storage(tags); diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S index 43d212618834..12c7fad02ae5 100644 --- a/arch/arm64/kernel/hyp-stub.S +++ b/arch/arm64/kernel/hyp-stub.S @@ -16,6 +16,30 @@ #include <asm/ptrace.h> #include <asm/virt.h> +// Warning, hardcoded register allocation +// This will clobber x1 and x2, and expect x1 to contain +// the id register value as read from the HW +.macro __check_override idreg, fld, width, pass, fail + ubfx x1, x1, #\fld, #\width + cbz x1, \fail + + adr_l x1, \idreg\()_override + ldr x2, [x1, FTR_OVR_VAL_OFFSET] + ldr x1, [x1, FTR_OVR_MASK_OFFSET] + ubfx x2, x2, #\fld, #\width + ubfx x1, x1, #\fld, #\width + cmp x1, xzr + and x2, x2, x1 + csinv x2, x2, xzr, ne + cbnz x2, \pass + b \fail +.endm + +.macro check_override idreg, fld, pass, fail + mrs x1, \idreg\()_el1 + __check_override \idreg \fld 4 \pass \fail +.endm + .text .pushsection .hyp.text, "ax" @@ -51,8 +75,8 @@ SYM_CODE_START_LOCAL(elx_sync) msr vbar_el2, x1 b 9f -1: cmp x0, #HVC_VHE_RESTART - b.eq mutate_to_vhe +1: cmp x0, #HVC_FINALISE_EL2 + b.eq __finalise_el2 2: cmp x0, #HVC_SOFT_RESTART b.ne 3f @@ -73,27 +97,67 @@ SYM_CODE_START_LOCAL(elx_sync) eret SYM_CODE_END(elx_sync) -// nVHE? No way! Give me the real thing! -SYM_CODE_START_LOCAL(mutate_to_vhe) +SYM_CODE_START_LOCAL(__finalise_el2) + check_override id_aa64pfr0 ID_AA64PFR0_SVE_SHIFT .Linit_sve .Lskip_sve + +.Linit_sve: /* SVE register access */ + mrs x0, cptr_el2 // Disable SVE traps + bic x0, x0, #CPTR_EL2_TZ + msr cptr_el2, x0 + isb + mov x1, #ZCR_ELx_LEN_MASK // SVE: Enable full vector + msr_s SYS_ZCR_EL2, x1 // length for EL1. + +.Lskip_sve: + check_override id_aa64pfr1 ID_AA64PFR1_SME_SHIFT .Linit_sme .Lskip_sme + +.Linit_sme: /* SME register access and priority mapping */ + mrs x0, cptr_el2 // Disable SME traps + bic x0, x0, #CPTR_EL2_TSM + msr cptr_el2, x0 + isb + + mrs x1, sctlr_el2 + orr x1, x1, #SCTLR_ELx_ENTP2 // Disable TPIDR2 traps + msr sctlr_el2, x1 + isb + + mov x0, #0 // SMCR controls + + // Full FP in SM? + mrs_s x1, SYS_ID_AA64SMFR0_EL1 + __check_override id_aa64smfr0 ID_AA64SMFR0_EL1_FA64_SHIFT 1 .Linit_sme_fa64 .Lskip_sme_fa64 + +.Linit_sme_fa64: + orr x0, x0, SMCR_ELx_FA64_MASK +.Lskip_sme_fa64: + + orr x0, x0, #SMCR_ELx_LEN_MASK // Enable full SME vector + msr_s SYS_SMCR_EL2, x0 // length for EL1. + + mrs_s x1, SYS_SMIDR_EL1 // Priority mapping supported? + ubfx x1, x1, #SMIDR_EL1_SMPS_SHIFT, #1 + cbz x1, .Lskip_sme + + msr_s SYS_SMPRIMAP_EL2, xzr // Make all priorities equal + + mrs x1, id_aa64mmfr1_el1 // HCRX_EL2 present? + ubfx x1, x1, #ID_AA64MMFR1_HCX_SHIFT, #4 + cbz x1, .Lskip_sme + + mrs_s x1, SYS_HCRX_EL2 + orr x1, x1, #HCRX_EL2_SMPME_MASK // Enable priority mapping + msr_s SYS_HCRX_EL2, x1 + +.Lskip_sme: + + // nVHE? No way! Give me the real thing! // Sanity check: MMU *must* be off mrs x1, sctlr_el2 tbnz x1, #0, 1f // Needs to be VHE capable, obviously - mrs x1, id_aa64mmfr1_el1 - ubfx x1, x1, #ID_AA64MMFR1_VHE_SHIFT, #4 - cbz x1, 1f - - // Check whether VHE is disabled from the command line - adr_l x1, id_aa64mmfr1_override - ldr x2, [x1, FTR_OVR_VAL_OFFSET] - ldr x1, [x1, FTR_OVR_MASK_OFFSET] - ubfx x2, x2, #ID_AA64MMFR1_VHE_SHIFT, #4 - ubfx x1, x1, #ID_AA64MMFR1_VHE_SHIFT, #4 - cmp x1, xzr - and x2, x2, x1 - csinv x2, x2, xzr, ne - cbnz x2, 2f + check_override id_aa64mmfr1 ID_AA64MMFR1_VHE_SHIFT 2f 1f 1: mov_q x0, HVC_STUB_ERR eret @@ -140,10 +204,10 @@ SYM_CODE_START_LOCAL(mutate_to_vhe) msr spsr_el1, x0 b enter_vhe -SYM_CODE_END(mutate_to_vhe) +SYM_CODE_END(__finalise_el2) // At the point where we reach enter_vhe(), we run with - // the MMU off (which is enforced by mutate_to_vhe()). + // the MMU off (which is enforced by __finalise_el2()). // We thus need to be in the idmap, or everything will // explode when enabling the MMU. @@ -222,12 +286,12 @@ SYM_FUNC_START(__hyp_reset_vectors) SYM_FUNC_END(__hyp_reset_vectors) /* - * Entry point to switch to VHE if deemed capable + * Entry point to finalise EL2 and switch to VHE if deemed capable + * + * w0: boot mode, as returned by init_kernel_el() */ -SYM_FUNC_START(switch_to_vhe) +SYM_FUNC_START(finalise_el2) // Need to have booted at EL2 - adr_l x1, __boot_cpu_mode - ldr w0, [x1] cmp w0, #BOOT_CPU_MODE_EL2 b.ne 1f @@ -236,9 +300,8 @@ SYM_FUNC_START(switch_to_vhe) cmp x0, #CurrentEL_EL1 b.ne 1f - // Turn the world upside down - mov x0, #HVC_VHE_RESTART + mov x0, #HVC_FINALISE_EL2 hvc #0 1: ret -SYM_FUNC_END(switch_to_vhe) +SYM_FUNC_END(finalise_el2) diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c index 8a2ceb591686..1b0542c69738 100644 --- a/arch/arm64/kernel/idreg-override.c +++ b/arch/arm64/kernel/idreg-override.c @@ -19,16 +19,21 @@ #define FTR_ALIAS_NAME_LEN 30 #define FTR_ALIAS_OPTION_LEN 116 +static u64 __boot_status __initdata; + struct ftr_set_desc { char name[FTR_DESC_NAME_LEN]; struct arm64_ftr_override *override; struct { char name[FTR_DESC_FIELD_LEN]; u8 shift; + u8 width; bool (*filter)(u64 val); } fields[]; }; +#define FIELD(n, s, f) { .name = n, .shift = s, .width = 4, .filter = f } + static bool __init mmfr1_vh_filter(u64 val) { /* @@ -37,24 +42,65 @@ static bool __init mmfr1_vh_filter(u64 val) * the user was trying to force nVHE on us, proceed with * attitude adjustment. */ - return !(is_kernel_in_hyp_mode() && val == 0); + return !(__boot_status == (BOOT_CPU_FLAG_E2H | BOOT_CPU_MODE_EL2) && + val == 0); } static const struct ftr_set_desc mmfr1 __initconst = { .name = "id_aa64mmfr1", .override = &id_aa64mmfr1_override, .fields = { - { "vh", ID_AA64MMFR1_VHE_SHIFT, mmfr1_vh_filter }, + FIELD("vh", ID_AA64MMFR1_VHE_SHIFT, mmfr1_vh_filter), + {} + }, +}; + +static bool __init pfr0_sve_filter(u64 val) +{ + /* + * Disabling SVE also means disabling all the features that + * are associated with it. The easiest way to do it is just to + * override id_aa64zfr0_el1 to be 0. + */ + if (!val) { + id_aa64zfr0_override.val = 0; + id_aa64zfr0_override.mask = GENMASK(63, 0); + } + + return true; +} + +static const struct ftr_set_desc pfr0 __initconst = { + .name = "id_aa64pfr0", + .override = &id_aa64pfr0_override, + .fields = { + FIELD("sve", ID_AA64PFR0_SVE_SHIFT, pfr0_sve_filter), {} }, }; +static bool __init pfr1_sme_filter(u64 val) +{ + /* + * Similarly to SVE, disabling SME also means disabling all + * the features that are associated with it. Just set + * id_aa64smfr0_el1 to 0 and don't look back. + */ + if (!val) { + id_aa64smfr0_override.val = 0; + id_aa64smfr0_override.mask = GENMASK(63, 0); + } + + return true; +} + static const struct ftr_set_desc pfr1 __initconst = { .name = "id_aa64pfr1", .override = &id_aa64pfr1_override, .fields = { - { "bt", ID_AA64PFR1_BT_SHIFT }, - { "mte", ID_AA64PFR1_MTE_SHIFT}, + FIELD("bt", ID_AA64PFR1_BT_SHIFT, NULL ), + FIELD("mte", ID_AA64PFR1_MTE_SHIFT, NULL), + FIELD("sme", ID_AA64PFR1_SME_SHIFT, pfr1_sme_filter), {} }, }; @@ -63,10 +109,10 @@ static const struct ftr_set_desc isar1 __initconst = { .name = "id_aa64isar1", .override = &id_aa64isar1_override, .fields = { - { "gpi", ID_AA64ISAR1_GPI_SHIFT }, - { "gpa", ID_AA64ISAR1_GPA_SHIFT }, - { "api", ID_AA64ISAR1_API_SHIFT }, - { "apa", ID_AA64ISAR1_APA_SHIFT }, + FIELD("gpi", ID_AA64ISAR1_EL1_GPI_SHIFT, NULL), + FIELD("gpa", ID_AA64ISAR1_EL1_GPA_SHIFT, NULL), + FIELD("api", ID_AA64ISAR1_EL1_API_SHIFT, NULL), + FIELD("apa", ID_AA64ISAR1_EL1_APA_SHIFT, NULL), {} }, }; @@ -75,8 +121,18 @@ static const struct ftr_set_desc isar2 __initconst = { .name = "id_aa64isar2", .override = &id_aa64isar2_override, .fields = { - { "gpa3", ID_AA64ISAR2_GPA3_SHIFT }, - { "apa3", ID_AA64ISAR2_APA3_SHIFT }, + FIELD("gpa3", ID_AA64ISAR2_EL1_GPA3_SHIFT, NULL), + FIELD("apa3", ID_AA64ISAR2_EL1_APA3_SHIFT, NULL), + {} + }, +}; + +static const struct ftr_set_desc smfr0 __initconst = { + .name = "id_aa64smfr0", + .override = &id_aa64smfr0_override, + .fields = { + /* FA64 is a one bit field... :-/ */ + { "fa64", ID_AA64SMFR0_EL1_FA64_SHIFT, 1, }, {} }, }; @@ -89,16 +145,18 @@ static const struct ftr_set_desc kaslr __initconst = { .override = &kaslr_feature_override, #endif .fields = { - { "disabled", 0 }, + FIELD("disabled", 0, NULL), {} }, }; static const struct ftr_set_desc * const regs[] __initconst = { &mmfr1, + &pfr0, &pfr1, &isar1, &isar2, + &smfr0, &kaslr, }; @@ -108,6 +166,8 @@ static const struct { } aliases[] __initconst = { { "kvm-arm.mode=nvhe", "id_aa64mmfr1.vh=0" }, { "kvm-arm.mode=protected", "id_aa64mmfr1.vh=0" }, + { "arm64.nosve", "id_aa64pfr0.sve=0 id_aa64pfr1.sme=0" }, + { "arm64.nosme", "id_aa64pfr1.sme=0" }, { "arm64.nobti", "id_aa64pfr1.bt=0" }, { "arm64.nopauth", "id_aa64isar1.gpi=0 id_aa64isar1.gpa=0 " @@ -144,7 +204,8 @@ static void __init match_options(const char *cmdline) for (f = 0; strlen(regs[i]->fields[f].name); f++) { u64 shift = regs[i]->fields[f].shift; - u64 mask = 0xfUL << shift; + u64 width = regs[i]->fields[f].width ?: 4; + u64 mask = GENMASK_ULL(shift + width - 1, shift); u64 v; if (find_field(cmdline, regs[i], f, &v)) @@ -152,7 +213,7 @@ static void __init match_options(const char *cmdline) /* * If an override gets filtered out, advertise - * it by setting the value to 0xf, but + * it by setting the value to the all-ones while * clearing the mask... Yes, this is fragile. */ if (regs[i]->fields[f].filter && @@ -234,9 +295,9 @@ static __init void parse_cmdline(void) } /* Keep checkers quiet */ -void init_feature_override(void); +void init_feature_override(u64 boot_status); -asmlinkage void __init init_feature_override(void) +asmlinkage void __init init_feature_override(u64 boot_status) { int i; @@ -247,6 +308,8 @@ asmlinkage void __init init_feature_override(void) } } + __boot_status = boot_status; + parse_cmdline(); for (i = 0; i < ARRAY_SIZE(regs); i++) { diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index 241c86b67d01..afa69e04e75e 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -10,11 +10,8 @@ #error This file should only be included in vmlinux.lds.S #endif -#ifdef CONFIG_EFI - -__efistub_kernel_size = _edata - _text; -__efistub_primary_entry_offset = primary_entry - _text; - +PROVIDE(__efistub_kernel_size = _edata - _text); +PROVIDE(__efistub_primary_entry_offset = primary_entry - _text); /* * The EFI stub has its own symbol namespace prefixed by __efistub_, to @@ -25,31 +22,37 @@ __efistub_primary_entry_offset = primary_entry - _text; * linked at. The routines below are all implemented in assembler in a * position independent manner */ -__efistub_memcmp = __pi_memcmp; -__efistub_memchr = __pi_memchr; -__efistub_memcpy = __pi_memcpy; -__efistub_memmove = __pi_memmove; -__efistub_memset = __pi_memset; -__efistub_strlen = __pi_strlen; -__efistub_strnlen = __pi_strnlen; -__efistub_strcmp = __pi_strcmp; -__efistub_strncmp = __pi_strncmp; -__efistub_strrchr = __pi_strrchr; -__efistub_dcache_clean_poc = __pi_dcache_clean_poc; - -#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) -__efistub___memcpy = __pi_memcpy; -__efistub___memmove = __pi_memmove; -__efistub___memset = __pi_memset; -#endif +PROVIDE(__efistub_memcmp = __pi_memcmp); +PROVIDE(__efistub_memchr = __pi_memchr); +PROVIDE(__efistub_memcpy = __pi_memcpy); +PROVIDE(__efistub_memmove = __pi_memmove); +PROVIDE(__efistub_memset = __pi_memset); +PROVIDE(__efistub_strlen = __pi_strlen); +PROVIDE(__efistub_strnlen = __pi_strnlen); +PROVIDE(__efistub_strcmp = __pi_strcmp); +PROVIDE(__efistub_strncmp = __pi_strncmp); +PROVIDE(__efistub_strrchr = __pi_strrchr); +PROVIDE(__efistub_dcache_clean_poc = __pi_dcache_clean_poc); + +PROVIDE(__efistub__text = _text); +PROVIDE(__efistub__end = _end); +PROVIDE(__efistub__edata = _edata); +PROVIDE(__efistub_screen_info = screen_info); +PROVIDE(__efistub__ctype = _ctype); -__efistub__text = _text; -__efistub__end = _end; -__efistub__edata = _edata; -__efistub_screen_info = screen_info; -__efistub__ctype = _ctype; +/* + * The __ prefixed memcpy/memset/memmove symbols are provided by KASAN, which + * instruments the conventional ones. Therefore, any references from the EFI + * stub or other position independent, low level C code should be redirected to + * the non-instrumented versions as well. + */ +PROVIDE(__efistub___memcpy = __pi_memcpy); +PROVIDE(__efistub___memmove = __pi_memmove); +PROVIDE(__efistub___memset = __pi_memset); -#endif +PROVIDE(__pi___memcpy = __pi_memcpy); +PROVIDE(__pi___memmove = __pi_memmove); +PROVIDE(__pi___memset = __pi_memset); #ifdef CONFIG_KVM diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c index 418b2bba1521..325455d16dbc 100644 --- a/arch/arm64/kernel/kaslr.c +++ b/arch/arm64/kernel/kaslr.c @@ -13,7 +13,6 @@ #include <linux/pgtable.h> #include <linux/random.h> -#include <asm/cacheflush.h> #include <asm/fixmap.h> #include <asm/kernel-pgtable.h> #include <asm/memory.h> @@ -21,128 +20,45 @@ #include <asm/sections.h> #include <asm/setup.h> -enum kaslr_status { - KASLR_ENABLED, - KASLR_DISABLED_CMDLINE, - KASLR_DISABLED_NO_SEED, - KASLR_DISABLED_FDT_REMAP, -}; - -static enum kaslr_status __initdata kaslr_status; u64 __ro_after_init module_alloc_base; u16 __initdata memstart_offset_seed; -static __init u64 get_kaslr_seed(void *fdt) -{ - int node, len; - fdt64_t *prop; - u64 ret; - - node = fdt_path_offset(fdt, "/chosen"); - if (node < 0) - return 0; - - prop = fdt_getprop_w(fdt, node, "kaslr-seed", &len); - if (!prop || len != sizeof(u64)) - return 0; - - ret = fdt64_to_cpu(*prop); - *prop = 0; - return ret; -} - struct arm64_ftr_override kaslr_feature_override __initdata; -/* - * This routine will be executed with the kernel mapped at its default virtual - * address, and if it returns successfully, the kernel will be remapped, and - * start_kernel() will be executed from a randomized virtual offset. The - * relocation will result in all absolute references (e.g., static variables - * containing function pointers) to be reinitialized, and zero-initialized - * .bss variables will be reset to 0. - */ -u64 __init kaslr_early_init(void) +static int __init kaslr_init(void) { - void *fdt; - u64 seed, offset, mask, module_range; - unsigned long raw; + u64 module_range; + u32 seed; /* * Set a reasonable default for module_alloc_base in case * we end up running with module randomization disabled. */ module_alloc_base = (u64)_etext - MODULES_VSIZE; - dcache_clean_inval_poc((unsigned long)&module_alloc_base, - (unsigned long)&module_alloc_base + - sizeof(module_alloc_base)); - - /* - * Try to map the FDT early. If this fails, we simply bail, - * and proceed with KASLR disabled. We will make another - * attempt at mapping the FDT in setup_machine() - */ - fdt = get_early_fdt_ptr(); - if (!fdt) { - kaslr_status = KASLR_DISABLED_FDT_REMAP; - return 0; - } - /* - * Retrieve (and wipe) the seed from the FDT - */ - seed = get_kaslr_seed(fdt); - - /* - * Check if 'nokaslr' appears on the command line, and - * return 0 if that is the case. - */ if (kaslr_feature_override.val & kaslr_feature_override.mask & 0xf) { - kaslr_status = KASLR_DISABLED_CMDLINE; + pr_info("KASLR disabled on command line\n"); return 0; } - /* - * Mix in any entropy obtainable architecturally if enabled - * and supported. - */ - - if (arch_get_random_seed_long_early(&raw)) - seed ^= raw; - - if (!seed) { - kaslr_status = KASLR_DISABLED_NO_SEED; + if (!kaslr_offset()) { + pr_warn("KASLR disabled due to lack of seed\n"); return 0; } + pr_info("KASLR enabled\n"); + /* - * OK, so we are proceeding with KASLR enabled. Calculate a suitable - * kernel image offset from the seed. Let's place the kernel in the - * middle half of the VMALLOC area (VA_BITS_MIN - 2), and stay clear of - * the lower and upper quarters to avoid colliding with other - * allocations. - * Even if we could randomize at page granularity for 16k and 64k pages, - * let's always round to 2 MB so we don't interfere with the ability to - * map using contiguous PTEs + * KASAN without KASAN_VMALLOC does not expect the module region to + * intersect the vmalloc region, since shadow memory is allocated for + * each module at load time, whereas the vmalloc region will already be + * shadowed by KASAN zero pages. */ - mask = ((1UL << (VA_BITS_MIN - 2)) - 1) & ~(SZ_2M - 1); - offset = BIT(VA_BITS_MIN - 3) + (seed & mask); + BUILD_BUG_ON((IS_ENABLED(CONFIG_KASAN_GENERIC) || + IS_ENABLED(CONFIG_KASAN_SW_TAGS)) && + !IS_ENABLED(CONFIG_KASAN_VMALLOC)); - /* use the top 16 bits to randomize the linear region */ - memstart_offset_seed = seed >> 48; - - if (!IS_ENABLED(CONFIG_KASAN_VMALLOC) && - (IS_ENABLED(CONFIG_KASAN_GENERIC) || - IS_ENABLED(CONFIG_KASAN_SW_TAGS))) - /* - * KASAN without KASAN_VMALLOC does not expect the module region - * to intersect the vmalloc region, since shadow memory is - * allocated for each module at load time, whereas the vmalloc - * region is shadowed by KASAN zero pages. So keep modules - * out of the vmalloc region if KASAN is enabled without - * KASAN_VMALLOC, and put the kernel well within 4 GB of the - * module region. - */ - return offset % SZ_2G; + seed = get_random_u32(); if (IS_ENABLED(CONFIG_RANDOMIZE_MODULE_REGION_FULL)) { /* @@ -154,8 +70,7 @@ u64 __init kaslr_early_init(void) * resolved normally.) */ module_range = SZ_2G - (u64)(_end - _stext); - module_alloc_base = max((u64)_end + offset - SZ_2G, - (u64)MODULES_VADDR); + module_alloc_base = max((u64)_end - SZ_2G, (u64)MODULES_VADDR); } else { /* * Randomize the module region by setting module_alloc_base to @@ -167,40 +82,12 @@ u64 __init kaslr_early_init(void) * when ARM64_MODULE_PLTS is enabled. */ module_range = MODULES_VSIZE - (u64)(_etext - _stext); - module_alloc_base = (u64)_etext + offset - MODULES_VSIZE; } /* use the lower 21 bits to randomize the base of the module region */ module_alloc_base += (module_range * (seed & ((1 << 21) - 1))) >> 21; module_alloc_base &= PAGE_MASK; - dcache_clean_inval_poc((unsigned long)&module_alloc_base, - (unsigned long)&module_alloc_base + - sizeof(module_alloc_base)); - dcache_clean_inval_poc((unsigned long)&memstart_offset_seed, - (unsigned long)&memstart_offset_seed + - sizeof(memstart_offset_seed)); - - return offset; -} - -static int __init kaslr_init(void) -{ - switch (kaslr_status) { - case KASLR_ENABLED: - pr_info("KASLR enabled\n"); - break; - case KASLR_DISABLED_CMDLINE: - pr_info("KASLR disabled on command line\n"); - break; - case KASLR_DISABLED_NO_SEED: - pr_warn("KASLR disabled due to lack of seed\n"); - break; - case KASLR_DISABLED_FDT_REMAP: - pr_warn("KASLR disabled due to FDT remapping failure\n"); - break; - } - return 0; } -core_initcall(kaslr_init) +subsys_initcall(kaslr_init) diff --git a/arch/arm64/kernel/kuser32.S b/arch/arm64/kernel/kuser32.S index 42bd8c0c60e0..692e9d2e31e5 100644 --- a/arch/arm64/kernel/kuser32.S +++ b/arch/arm64/kernel/kuser32.S @@ -15,6 +15,7 @@ #include <asm/unistd.h> + .section .rodata .align 5 .globl __kuser_helper_start __kuser_helper_start: diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c index f6b00743c399..b2b730233274 100644 --- a/arch/arm64/kernel/mte.c +++ b/arch/arm64/kernel/mte.c @@ -48,15 +48,6 @@ static void mte_sync_page_tags(struct page *page, pte_t old_pte, if (!pte_is_tagged) return; - page_kasan_tag_reset(page); - /* - * We need smp_wmb() in between setting the flags and clearing the - * tags because if another thread reads page->flags and builds a - * tagged address out of it, there is an actual dependency to the - * memory access, but on the current thread we do not guarantee that - * the new page->flags are visible before the tags were updated. - */ - smp_wmb(); mte_clear_page_tags(page_address(page)); } diff --git a/arch/arm64/kernel/pi/Makefile b/arch/arm64/kernel/pi/Makefile new file mode 100644 index 000000000000..839291430cb3 --- /dev/null +++ b/arch/arm64/kernel/pi/Makefile @@ -0,0 +1,33 @@ +# SPDX-License-Identifier: GPL-2.0 +# Copyright 2022 Google LLC + +KBUILD_CFLAGS := $(subst $(CC_FLAGS_FTRACE),,$(KBUILD_CFLAGS)) -fpie \ + -Os -DDISABLE_BRANCH_PROFILING $(DISABLE_STACKLEAK_PLUGIN) \ + $(call cc-option,-mbranch-protection=none) \ + -I$(srctree)/scripts/dtc/libfdt -fno-stack-protector \ + -include $(srctree)/include/linux/hidden.h \ + -D__DISABLE_EXPORTS -ffreestanding -D__NO_FORTIFY \ + $(call cc-option,-fno-addrsig) + +# remove SCS flags from all objects in this directory +KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_SCS), $(KBUILD_CFLAGS)) +# disable LTO +KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_LTO), $(KBUILD_CFLAGS)) + +GCOV_PROFILE := n +KASAN_SANITIZE := n +KCSAN_SANITIZE := n +UBSAN_SANITIZE := n +KCOV_INSTRUMENT := n + +$(obj)/%.pi.o: OBJCOPYFLAGS := --prefix-symbols=__pi_ \ + --remove-section=.note.gnu.property \ + --prefix-alloc-sections=.init +$(obj)/%.pi.o: $(obj)/%.o FORCE + $(call if_changed,objcopy) + +$(obj)/lib-%.o: $(srctree)/lib/%.c FORCE + $(call if_changed_rule,cc_o_c) + +obj-y := kaslr_early.pi.o lib-fdt.pi.o lib-fdt_ro.pi.o +extra-y := $(patsubst %.pi.o,%.o,$(obj-y)) diff --git a/arch/arm64/kernel/pi/kaslr_early.c b/arch/arm64/kernel/pi/kaslr_early.c new file mode 100644 index 000000000000..6c3855e69395 --- /dev/null +++ b/arch/arm64/kernel/pi/kaslr_early.c @@ -0,0 +1,112 @@ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright 2022 Google LLC +// Author: Ard Biesheuvel <ardb@google.com> + +// NOTE: code in this file runs *very* early, and is not permitted to use +// global variables or anything that relies on absolute addressing. + +#include <linux/libfdt.h> +#include <linux/init.h> +#include <linux/linkage.h> +#include <linux/types.h> +#include <linux/sizes.h> +#include <linux/string.h> + +#include <asm/archrandom.h> +#include <asm/memory.h> + +/* taken from lib/string.c */ +static char *__strstr(const char *s1, const char *s2) +{ + size_t l1, l2; + + l2 = strlen(s2); + if (!l2) + return (char *)s1; + l1 = strlen(s1); + while (l1 >= l2) { + l1--; + if (!memcmp(s1, s2, l2)) + return (char *)s1; + s1++; + } + return NULL; +} +static bool cmdline_contains_nokaslr(const u8 *cmdline) +{ + const u8 *str; + + str = __strstr(cmdline, "nokaslr"); + return str == cmdline || (str > cmdline && *(str - 1) == ' '); +} + +static bool is_kaslr_disabled_cmdline(void *fdt) +{ + if (!IS_ENABLED(CONFIG_CMDLINE_FORCE)) { + int node; + const u8 *prop; + + node = fdt_path_offset(fdt, "/chosen"); + if (node < 0) + goto out; + + prop = fdt_getprop(fdt, node, "bootargs", NULL); + if (!prop) + goto out; + + if (cmdline_contains_nokaslr(prop)) + return true; + + if (IS_ENABLED(CONFIG_CMDLINE_EXTEND)) + goto out; + + return false; + } +out: + return cmdline_contains_nokaslr(CONFIG_CMDLINE); +} + +static u64 get_kaslr_seed(void *fdt) +{ + int node, len; + fdt64_t *prop; + u64 ret; + + node = fdt_path_offset(fdt, "/chosen"); + if (node < 0) + return 0; + + prop = fdt_getprop_w(fdt, node, "kaslr-seed", &len); + if (!prop || len != sizeof(u64)) + return 0; + + ret = fdt64_to_cpu(*prop); + *prop = 0; + return ret; +} + +asmlinkage u64 kaslr_early_init(void *fdt) +{ + u64 seed; + + if (is_kaslr_disabled_cmdline(fdt)) + return 0; + + seed = get_kaslr_seed(fdt); + if (!seed) { +#ifdef CONFIG_ARCH_RANDOM + if (!__early_cpu_has_rndr() || + !__arm64_rndr((unsigned long *)&seed)) +#endif + return 0; + } + + /* + * OK, so we are proceeding with KASLR enabled. Calculate a suitable + * kernel image offset from the seed. Let's place the kernel in the + * middle half of the VMALLOC area (VA_BITS_MIN - 2), and stay clear of + * the lower and upper quarters to avoid colliding with other + * allocations. + */ + return BIT(VA_BITS_MIN - 3) + (seed & GENMASK(VA_BITS_MIN - 3, 0)); +} diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index b0980fbb6bc7..3e6d0352d7d3 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -280,6 +280,9 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user) vl = task_get_sme_vl(current); } else { + if (!system_supports_sve()) + return -EINVAL; + vl = task_get_sve_vl(current); } @@ -342,9 +345,14 @@ fpsimd_only: #else /* ! CONFIG_ARM64_SVE */ -/* Turn any non-optimised out attempts to use these into a link error: */ +static int restore_sve_fpsimd_context(struct user_ctxs *user) +{ + WARN_ON_ONCE(1); + return -EINVAL; +} + +/* Turn any non-optimised out attempts to use this into a link error: */ extern int preserve_sve_context(void __user *ctx); -extern int restore_sve_fpsimd_context(struct user_ctxs *user); #endif /* ! CONFIG_ARM64_SVE */ @@ -649,14 +657,10 @@ static int restore_sigframe(struct pt_regs *regs, if (!user.fpsimd) return -EINVAL; - if (user.sve) { - if (!system_supports_sve()) - return -EINVAL; - + if (user.sve) err = restore_sve_fpsimd_context(&user); - } else { + else err = restore_fpsimd_context(user.fpsimd); - } } if (err == 0 && system_supports_sme() && user.za) diff --git a/arch/arm64/kernel/sigreturn32.S b/arch/arm64/kernel/sigreturn32.S index 475d30d471ac..ccbd4aab4ba4 100644 --- a/arch/arm64/kernel/sigreturn32.S +++ b/arch/arm64/kernel/sigreturn32.S @@ -15,6 +15,7 @@ #include <asm/unistd.h> + .section .rodata .globl __aarch32_sigret_code_start __aarch32_sigret_code_start: diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S index 4ea9392f86e0..617f78ad43a1 100644 --- a/arch/arm64/kernel/sleep.S +++ b/arch/arm64/kernel/sleep.S @@ -100,10 +100,11 @@ SYM_FUNC_END(__cpu_suspend_enter) .pushsection ".idmap.text", "awx" SYM_CODE_START(cpu_resume) bl init_kernel_el - bl switch_to_vhe + bl finalise_el2 bl __cpu_setup /* enable the MMU early - so we can access sleep_save_stash by va */ adrp x1, swapper_pg_dir + adrp x2, idmap_pg_dir bl __enable_mmu ldr x8, =_cpu_resume br x8 diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index 0467cb79f080..fcaa151b81f1 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -38,6 +38,8 @@ * @kr_cur: When KRETPROBES is selected, holds the kretprobe instance * associated with the most recently encountered replacement lr * value. + * + * @task: The task being unwound. */ struct unwind_state { unsigned long fp; @@ -48,13 +50,13 @@ struct unwind_state { #ifdef CONFIG_KRETPROBES struct llist_node *kr_cur; #endif + struct task_struct *task; }; -static notrace void unwind_init(struct unwind_state *state, unsigned long fp, - unsigned long pc) +static void unwind_init_common(struct unwind_state *state, + struct task_struct *task) { - state->fp = fp; - state->pc = pc; + state->task = task; #ifdef CONFIG_KRETPROBES state->kr_cur = NULL; #endif @@ -72,7 +74,57 @@ static notrace void unwind_init(struct unwind_state *state, unsigned long fp, state->prev_fp = 0; state->prev_type = STACK_TYPE_UNKNOWN; } -NOKPROBE_SYMBOL(unwind_init); + +/* + * Start an unwind from a pt_regs. + * + * The unwind will begin at the PC within the regs. + * + * The regs must be on a stack currently owned by the calling task. + */ +static inline void unwind_init_from_regs(struct unwind_state *state, + struct pt_regs *regs) +{ + unwind_init_common(state, current); + + state->fp = regs->regs[29]; + state->pc = regs->pc; +} + +/* + * Start an unwind from a caller. + * + * The unwind will begin at the caller of whichever function this is inlined + * into. + * + * The function which invokes this must be noinline. + */ +static __always_inline void unwind_init_from_caller(struct unwind_state *state) +{ + unwind_init_common(state, current); + + state->fp = (unsigned long)__builtin_frame_address(1); + state->pc = (unsigned long)__builtin_return_address(0); +} + +/* + * Start an unwind from a blocked task. + * + * The unwind will begin at the blocked tasks saved PC (i.e. the caller of + * cpu_switch_to()). + * + * The caller should ensure the task is blocked in cpu_switch_to() for the + * duration of the unwind, or the unwind will be bogus. It is never valid to + * call this for the current task. + */ +static inline void unwind_init_from_task(struct unwind_state *state, + struct task_struct *task) +{ + unwind_init_common(state, task); + + state->fp = thread_saved_fp(task); + state->pc = thread_saved_pc(task); +} /* * Unwind from one frame record (A) to the next frame record (B). @@ -81,9 +133,9 @@ NOKPROBE_SYMBOL(unwind_init); * records (e.g. a cycle), determined based on the location and fp value of A * and the location (but not the fp value) of B. */ -static int notrace unwind_next(struct task_struct *tsk, - struct unwind_state *state) +static int notrace unwind_next(struct unwind_state *state) { + struct task_struct *tsk = state->task; unsigned long fp = state->fp; struct stack_info info; @@ -117,15 +169,15 @@ static int notrace unwind_next(struct task_struct *tsk, if (fp <= state->prev_fp) return -EINVAL; } else { - set_bit(state->prev_type, state->stacks_done); + __set_bit(state->prev_type, state->stacks_done); } /* * Record this frame record's values and location. The prev_fp and * prev_type are only meaningful to the next unwind_next() invocation. */ - state->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp)); - state->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp + 8)); + state->fp = READ_ONCE(*(unsigned long *)(fp)); + state->pc = READ_ONCE(*(unsigned long *)(fp + 8)); state->prev_fp = fp; state->prev_type = info.type; @@ -157,8 +209,7 @@ static int notrace unwind_next(struct task_struct *tsk, } NOKPROBE_SYMBOL(unwind_next); -static void notrace unwind(struct task_struct *tsk, - struct unwind_state *state, +static void notrace unwind(struct unwind_state *state, stack_trace_consume_fn consume_entry, void *cookie) { while (1) { @@ -166,7 +217,7 @@ static void notrace unwind(struct task_struct *tsk, if (!consume_entry(cookie, state->pc)) break; - ret = unwind_next(tsk, state); + ret = unwind_next(state); if (ret < 0) break; } @@ -212,15 +263,15 @@ noinline notrace void arch_stack_walk(stack_trace_consume_fn consume_entry, { struct unwind_state state; - if (regs) - unwind_init(&state, regs->regs[29], regs->pc); - else if (task == current) - unwind_init(&state, - (unsigned long)__builtin_frame_address(1), - (unsigned long)__builtin_return_address(0)); - else - unwind_init(&state, thread_saved_fp(task), - thread_saved_pc(task)); - - unwind(task, &state, consume_entry, cookie); + if (regs) { + if (task != current) + return; + unwind_init_from_regs(&state, regs); + } else if (task == current) { + unwind_init_from_caller(&state); + } else { + unwind_init_from_task(&state, task); + } + + unwind(&state, consume_entry, cookie); } diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c index 2b0887e58a7c..9135fe0f3df5 100644 --- a/arch/arm64/kernel/suspend.c +++ b/arch/arm64/kernel/suspend.c @@ -52,7 +52,7 @@ void notrace __cpu_suspend_exit(void) /* Restore CnP bit in TTBR1_EL1 */ if (system_supports_cnp()) - cpu_replace_ttbr1(lm_alias(swapper_pg_dir)); + cpu_replace_ttbr1(lm_alias(swapper_pg_dir), idmap_pg_dir); /* * PSTATE was not saved over suspend/resume, re-enable any detected diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 9ac7a81b79be..b7fed33981f7 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -579,11 +579,11 @@ static void ctr_read_handler(unsigned long esr, struct pt_regs *regs) if (cpus_have_const_cap(ARM64_WORKAROUND_1542419)) { /* Hide DIC so that we can trap the unnecessary maintenance...*/ - val &= ~BIT(CTR_DIC_SHIFT); + val &= ~BIT(CTR_EL0_DIC_SHIFT); /* ... and fake IminLine to reduce the number of traps. */ - val &= ~CTR_IMINLINE_MASK; - val |= (PAGE_SHIFT - 2) & CTR_IMINLINE_MASK; + val &= ~CTR_EL0_IminLine_MASK; + val |= (PAGE_SHIFT - 2) & CTR_EL0_IminLine_MASK; } pt_regs_write_reg(regs, rt, val); diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index f6e25d7c346a..bafbf78fab77 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -24,7 +24,13 @@ btildflags-$(CONFIG_ARM64_BTI_KERNEL) += -z force-bti # routines, as x86 does (see 6f121e548f83 ("x86, vdso: Reimplement vdso.so # preparation in build-time C")). ldflags-y := -shared -soname=linux-vdso.so.1 --hash-style=sysv \ - -Bsymbolic --build-id=sha1 -n $(btildflags-y) -T + -Bsymbolic --build-id=sha1 -n $(btildflags-y) + +ifdef CONFIG_LD_ORPHAN_WARN + ldflags-y += --orphan-handling=warn +endif + +ldflags-y += -T ccflags-y := -fno-common -fno-builtin -fno-stack-protector -ffixed-x18 ccflags-y += -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO diff --git a/arch/arm64/kernel/vdso/vdso.lds.S b/arch/arm64/kernel/vdso/vdso.lds.S index a5e61e09ea92..e69fb4aaaf3e 100644 --- a/arch/arm64/kernel/vdso/vdso.lds.S +++ b/arch/arm64/kernel/vdso/vdso.lds.S @@ -11,6 +11,7 @@ #include <linux/const.h> #include <asm/page.h> #include <asm/vdso.h> +#include <asm-generic/vmlinux.lds.h> OUTPUT_FORMAT("elf64-littleaarch64", "elf64-bigaarch64", "elf64-littleaarch64") OUTPUT_ARCH(aarch64) @@ -49,11 +50,24 @@ SECTIONS .dynamic : { *(.dynamic) } :text :dynamic - .rodata : { *(.rodata*) } :text + .rela.dyn : ALIGN(8) { *(.rela .rela*) } + + .rodata : { + *(.rodata*) + *(.got) + *(.got.plt) + *(.plt) + *(.plt.*) + *(.iplt) + *(.igot .igot.plt) + } :text _end = .; PROVIDE(end = .); + DWARF_DEBUG + ELF_DETAILS + /DISCARD/ : { *(.data .data.* .gnu.linkonce.d.* .sdata*) *(.bss .sbss .dynbss .dynsbss) diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile index 05ba1aae1b6f..36c8f66cad25 100644 --- a/arch/arm64/kernel/vdso32/Makefile +++ b/arch/arm64/kernel/vdso32/Makefile @@ -104,6 +104,7 @@ VDSO_AFLAGS += -D__ASSEMBLY__ VDSO_LDFLAGS += -Bsymbolic --no-undefined -soname=linux-vdso.so.1 VDSO_LDFLAGS += -z max-page-size=4096 -z common-page-size=4096 VDSO_LDFLAGS += -shared --hash-style=sysv --build-id=sha1 +VDSO_LDFLAGS += --orphan-handling=warn # Borrow vdsomunge.c from the arm vDSO diff --git a/arch/arm64/kernel/vdso32/vdso.lds.S b/arch/arm64/kernel/vdso32/vdso.lds.S index 3348ce5ea306..8d95d7d35057 100644 --- a/arch/arm64/kernel/vdso32/vdso.lds.S +++ b/arch/arm64/kernel/vdso32/vdso.lds.S @@ -11,6 +11,7 @@ #include <linux/const.h> #include <asm/page.h> #include <asm/vdso.h> +#include <asm-generic/vmlinux.lds.h> OUTPUT_FORMAT("elf32-littlearm", "elf32-bigarm", "elf32-littlearm") OUTPUT_ARCH(arm) @@ -35,12 +36,30 @@ SECTIONS .dynamic : { *(.dynamic) } :text :dynamic - .rodata : { *(.rodata*) } :text + .rodata : { + *(.rodata*) + *(.got) + *(.got.plt) + *(.plt) + *(.rel.iplt) + *(.iplt) + *(.igot.plt) + } :text - .text : { *(.text*) } :text =0xe7f001f2 + .text : { + *(.text*) + *(.glue_7) + *(.glue_7t) + *(.vfp11_veneer) + *(.v4_bx) + } :text =0xe7f001f2 - .got : { *(.got) } - .rel.plt : { *(.rel.plt) } + .rel.dyn : { *(.rel*) } + + .ARM.exidx : { *(.ARM.exidx*) } + DWARF_DEBUG + ELF_DETAILS + .ARM.attributes 0 : { *(.ARM.attributes) } /DISCARD/ : { *(.note.GNU-stack) diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 2d4a8f995175..45131e354e27 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -115,7 +115,8 @@ jiffies = jiffies_64; __entry_tramp_text_start = .; \ *(.entry.tramp.text) \ . = ALIGN(PAGE_SIZE); \ - __entry_tramp_text_end = .; + __entry_tramp_text_end = .; \ + *(.entry.tramp.rodata) #else #define TRAMP_TEXT #endif @@ -198,8 +199,7 @@ SECTIONS } idmap_pg_dir = .; - . += IDMAP_DIR_SIZE; - idmap_pg_end = .; + . += PAGE_SIZE; #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 tramp_pg_dir = .; @@ -235,6 +235,10 @@ SECTIONS __inittext_end = .; __initdata_begin = .; + init_idmap_pg_dir = .; + . += INIT_IDMAP_DIR_SIZE; + init_idmap_pg_end = .; + .init.data : { INIT_DATA INIT_SETUP(16) @@ -253,21 +257,17 @@ SECTIONS HYPERVISOR_RELOC_SECTION .rela.dyn : ALIGN(8) { + __rela_start = .; *(.rela .rela*) + __rela_end = .; } - __rela_offset = ABSOLUTE(ADDR(.rela.dyn) - KIMAGE_VADDR); - __rela_size = SIZEOF(.rela.dyn); - -#ifdef CONFIG_RELR .relr.dyn : ALIGN(8) { + __relr_start = .; *(.relr.dyn) + __relr_end = .; } - __relr_offset = ABSOLUTE(ADDR(.relr.dyn) - KIMAGE_VADDR); - __relr_size = SIZEOF(.relr.dyn); -#endif - . = ALIGN(SEGMENT_ALIGN); __initdata_end = .; __init_end = .; diff --git a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h index fd55014b3497..fa6e466ed57f 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h +++ b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h @@ -176,25 +176,25 @@ ) #define PVM_ID_AA64ISAR1_ALLOW (\ - ARM64_FEATURE_MASK(ID_AA64ISAR1_DPB) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR1_APA) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR1_API) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR1_JSCVT) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR1_FCMA) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR1_LRCPC) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR1_GPA) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR1_GPI) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR1_FRINTTS) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR1_SB) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR1_SPECRES) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR1_BF16) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR1_DGH) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR1_I8MM) \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_DPB) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_APA) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_API) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_JSCVT) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_FCMA) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_LRCPC) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPA) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPI) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_FRINTTS) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_SB) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_SPECRES) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_BF16) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_DGH) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_I8MM) \ ) #define PVM_ID_AA64ISAR2_ALLOW (\ - ARM64_FEATURE_MASK(ID_AA64ISAR2_GPA3) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR2_APA3) \ + ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_GPA3) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_APA3) \ ) u64 pvm_read_id_reg(const struct kvm_vcpu *vcpu, u32 id); diff --git a/arch/arm64/kvm/hyp/nvhe/sys_regs.c b/arch/arm64/kvm/hyp/nvhe/sys_regs.c index 35a4331ba5f3..6b94c3e6ff26 100644 --- a/arch/arm64/kvm/hyp/nvhe/sys_regs.c +++ b/arch/arm64/kvm/hyp/nvhe/sys_regs.c @@ -173,10 +173,10 @@ static u64 get_pvm_id_aa64isar1(const struct kvm_vcpu *vcpu) u64 allow_mask = PVM_ID_AA64ISAR1_ALLOW; if (!vcpu_has_ptrauth(vcpu)) - allow_mask &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR1_APA) | - ARM64_FEATURE_MASK(ID_AA64ISAR1_API) | - ARM64_FEATURE_MASK(ID_AA64ISAR1_GPA) | - ARM64_FEATURE_MASK(ID_AA64ISAR1_GPI)); + allow_mask &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_APA) | + ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_API) | + ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPA) | + ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPI)); return id_aa64isar1_el1_sys_val & allow_mask; } @@ -186,8 +186,8 @@ static u64 get_pvm_id_aa64isar2(const struct kvm_vcpu *vcpu) u64 allow_mask = PVM_ID_AA64ISAR2_ALLOW; if (!vcpu_has_ptrauth(vcpu)) - allow_mask &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR2_APA3) | - ARM64_FEATURE_MASK(ID_AA64ISAR2_GPA3)); + allow_mask &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_APA3) | + ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_GPA3)); return id_aa64isar2_el1_sys_val & allow_mask; } diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index c06c0477fab5..c4fb3874b5e2 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1136,17 +1136,17 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu, break; case SYS_ID_AA64ISAR1_EL1: if (!vcpu_has_ptrauth(vcpu)) - val &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR1_APA) | - ARM64_FEATURE_MASK(ID_AA64ISAR1_API) | - ARM64_FEATURE_MASK(ID_AA64ISAR1_GPA) | - ARM64_FEATURE_MASK(ID_AA64ISAR1_GPI)); + val &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_APA) | + ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_API) | + ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPA) | + ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPI)); break; case SYS_ID_AA64ISAR2_EL1: if (!vcpu_has_ptrauth(vcpu)) - val &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR2_APA3) | - ARM64_FEATURE_MASK(ID_AA64ISAR2_GPA3)); + val &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_APA3) | + ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_GPA3)); if (!cpus_have_final_cap(ARM64_HAS_WFXT)) - val &= ~ARM64_FEATURE_MASK(ID_AA64ISAR2_WFXT); + val &= ~ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_WFxT); break; case SYS_ID_AA64DFR0_EL1: /* Limit debug to ARMv8.0 */ diff --git a/arch/arm64/lib/mte.S b/arch/arm64/lib/mte.S index eeb9e45bcce8..1b7c93ae7e63 100644 --- a/arch/arm64/lib/mte.S +++ b/arch/arm64/lib/mte.S @@ -18,7 +18,7 @@ */ .macro multitag_transfer_size, reg, tmp mrs_s \reg, SYS_GMID_EL1 - ubfx \reg, \reg, #SYS_GMID_EL1_BS_SHIFT, #SYS_GMID_EL1_BS_SIZE + ubfx \reg, \reg, #GMID_EL1_BS_SHIFT, #GMID_EL1_BS_SIZE mov \tmp, #4 lsl \reg, \tmp, \reg .endm diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index 21c907987080..081058d4e436 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -194,44 +194,3 @@ SYM_FUNC_START(__pi_dcache_clean_pop) ret SYM_FUNC_END(__pi_dcache_clean_pop) SYM_FUNC_ALIAS(dcache_clean_pop, __pi_dcache_clean_pop) - -/* - * __dma_flush_area(start, size) - * - * clean & invalidate D / U line - * - * - start - virtual start address of region - * - size - size in question - */ -SYM_FUNC_START(__pi___dma_flush_area) - add x1, x0, x1 - dcache_by_line_op civac, sy, x0, x1, x2, x3 - ret -SYM_FUNC_END(__pi___dma_flush_area) -SYM_FUNC_ALIAS(__dma_flush_area, __pi___dma_flush_area) - -/* - * __dma_map_area(start, size, dir) - * - start - kernel virtual start address - * - size - size of region - * - dir - DMA direction - */ -SYM_FUNC_START(__pi___dma_map_area) - add x1, x0, x1 - b __pi_dcache_clean_poc -SYM_FUNC_END(__pi___dma_map_area) -SYM_FUNC_ALIAS(__dma_map_area, __pi___dma_map_area) - -/* - * __dma_unmap_area(start, size, dir) - * - start - kernel virtual start address - * - size - size of region - * - dir - DMA direction - */ -SYM_FUNC_START(__pi___dma_unmap_area) - add x1, x0, x1 - cmp w2, #DMA_TO_DEVICE - b.ne __pi_dcache_inval_poc - ret -SYM_FUNC_END(__pi___dma_unmap_area) -SYM_FUNC_ALIAS(__dma_unmap_area, __pi___dma_unmap_area) diff --git a/arch/arm64/mm/copypage.c b/arch/arm64/mm/copypage.c index 0dea80bf6de4..24913271e898 100644 --- a/arch/arm64/mm/copypage.c +++ b/arch/arm64/mm/copypage.c @@ -23,15 +23,6 @@ void copy_highpage(struct page *to, struct page *from) if (system_supports_mte() && test_bit(PG_mte_tagged, &from->flags)) { set_bit(PG_mte_tagged, &to->flags); - page_kasan_tag_reset(to); - /* - * We need smp_wmb() in between setting the flags and clearing the - * tags because if another thread reads page->flags and builds a - * tagged address out of it, there is an actual dependency to the - * memory access, but on the current thread we do not guarantee that - * the new page->flags are visible before the tags were updated. - */ - smp_wmb(); mte_copy_page_tags(kto, kfrom); } } diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 6099c81b9322..599cf81f5685 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -14,20 +14,29 @@ #include <asm/xen/xen-ops.h> void arch_sync_dma_for_device(phys_addr_t paddr, size_t size, - enum dma_data_direction dir) + enum dma_data_direction dir) { - __dma_map_area(phys_to_virt(paddr), size, dir); + unsigned long start = (unsigned long)phys_to_virt(paddr); + + dcache_clean_poc(start, start + size); } void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size, - enum dma_data_direction dir) + enum dma_data_direction dir) { - __dma_unmap_area(phys_to_virt(paddr), size, dir); + unsigned long start = (unsigned long)phys_to_virt(paddr); + + if (dir == DMA_TO_DEVICE) + return; + + dcache_inval_poc(start, start + size); } void arch_dma_prep_coherent(struct page *page, size_t size) { - __dma_flush_area(page_address(page), size); + unsigned long start = (unsigned long)page_address(page); + + dcache_clean_inval_poc(start, start + size); } #ifdef CONFIG_IOMMU_DMA diff --git a/arch/arm64/mm/extable.c b/arch/arm64/mm/extable.c index 489455309695..228d681a8715 100644 --- a/arch/arm64/mm/extable.c +++ b/arch/arm64/mm/extable.c @@ -16,13 +16,6 @@ get_ex_fixup(const struct exception_table_entry *ex) return ((unsigned long)&ex->fixup + ex->fixup); } -static bool ex_handler_fixup(const struct exception_table_entry *ex, - struct pt_regs *regs) -{ - regs->pc = get_ex_fixup(ex); - return true; -} - static bool ex_handler_uaccess_err_zero(const struct exception_table_entry *ex, struct pt_regs *regs) { @@ -72,11 +65,10 @@ bool fixup_exception(struct pt_regs *regs) return false; switch (ex->type) { - case EX_TYPE_FIXUP: - return ex_handler_fixup(ex, regs); case EX_TYPE_BPF: return ex_handler_bpf(ex, regs); case EX_TYPE_UACCESS_ERR_ZERO: + case EX_TYPE_KACCESS_ERR_ZERO: return ex_handler_uaccess_err_zero(ex, regs); case EX_TYPE_LOAD_UNALIGNED_ZEROPAD: return ex_handler_load_unaligned_zeropad(ex, regs); diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index c5e11768e5c1..cdf3ffa0c223 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -927,6 +927,5 @@ struct page *alloc_zeroed_user_highpage_movable(struct vm_area_struct *vma, void tag_clear_highpage(struct page *page) { mte_zero_clear_page_tags(page_address(page)); - page_kasan_tag_reset(page); set_bit(PG_mte_tagged, &page->flags); } diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 3618ef3f6d81..5307ffdefb8d 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -100,16 +100,6 @@ int pud_huge(pud_t pud) #endif } -/* - * Select all bits except the pfn - */ -static inline pgprot_t pte_pgprot(pte_t pte) -{ - unsigned long pfn = pte_pfn(pte); - - return __pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^ pte_val(pte)); -} - static int find_num_contig(struct mm_struct *mm, unsigned long addr, pte_t *ptep, size_t *pgsize) { diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 339ee84e5a61..b6ef26fc8ebe 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -389,7 +389,7 @@ void __init arm64_memblock_init(void) early_init_fdt_scan_reserved_mem(); - if (!IS_ENABLED(CONFIG_ZONE_DMA) && !IS_ENABLED(CONFIG_ZONE_DMA32)) + if (!defer_reserve_crashkernel()) reserve_crashkernel(); high_memory = __va(memblock_end_of_DRAM() - 1) + 1; @@ -438,7 +438,7 @@ void __init bootmem_init(void) * request_standard_resources() depends on crashkernel's memory being * reserved, so do it here. */ - if (IS_ENABLED(CONFIG_ZONE_DMA) || IS_ENABLED(CONFIG_ZONE_DMA32)) + if (defer_reserve_crashkernel()) reserve_crashkernel(); memblock_dump_all(); diff --git a/arch/arm64/mm/ioremap.c b/arch/arm64/mm/ioremap.c index b21f91cd830d..c5af103d4ad4 100644 --- a/arch/arm64/mm/ioremap.c +++ b/arch/arm64/mm/ioremap.c @@ -1,96 +1,22 @@ // SPDX-License-Identifier: GPL-2.0-only -/* - * Based on arch/arm/mm/ioremap.c - * - * (C) Copyright 1995 1996 Linus Torvalds - * Hacked for ARM by Phil Blundell <philb@gnu.org> - * Hacked to allow all architectures to build, and various cleanups - * by Russell King - * Copyright (C) 2012 ARM Ltd. - */ -#include <linux/export.h> #include <linux/mm.h> -#include <linux/vmalloc.h> #include <linux/io.h> -#include <asm/fixmap.h> -#include <asm/tlbflush.h> - -static void __iomem *__ioremap_caller(phys_addr_t phys_addr, size_t size, - pgprot_t prot, void *caller) +bool ioremap_allowed(phys_addr_t phys_addr, size_t size, unsigned long prot) { - unsigned long last_addr; - unsigned long offset = phys_addr & ~PAGE_MASK; - int err; - unsigned long addr; - struct vm_struct *area; + unsigned long last_addr = phys_addr + size - 1; - /* - * Page align the mapping address and size, taking account of any - * offset. - */ - phys_addr &= PAGE_MASK; - size = PAGE_ALIGN(size + offset); + /* Don't allow outside PHYS_MASK */ + if (last_addr & ~PHYS_MASK) + return false; - /* - * Don't allow wraparound, zero size or outside PHYS_MASK. - */ - last_addr = phys_addr + size - 1; - if (!size || last_addr < phys_addr || (last_addr & ~PHYS_MASK)) - return NULL; - - /* - * Don't allow RAM to be mapped. - */ + /* Don't allow RAM to be mapped. */ if (WARN_ON(pfn_is_map_memory(__phys_to_pfn(phys_addr)))) - return NULL; - - area = get_vm_area_caller(size, VM_IOREMAP, caller); - if (!area) - return NULL; - addr = (unsigned long)area->addr; - area->phys_addr = phys_addr; - - err = ioremap_page_range(addr, addr + size, phys_addr, prot); - if (err) { - vunmap((void *)addr); - return NULL; - } - - return (void __iomem *)(offset + addr); -} - -void __iomem *__ioremap(phys_addr_t phys_addr, size_t size, pgprot_t prot) -{ - return __ioremap_caller(phys_addr, size, prot, - __builtin_return_address(0)); -} -EXPORT_SYMBOL(__ioremap); - -void iounmap(volatile void __iomem *io_addr) -{ - unsigned long addr = (unsigned long)io_addr & PAGE_MASK; - - /* - * We could get an address outside vmalloc range in case - * of ioremap_cache() reusing a RAM mapping. - */ - if (is_vmalloc_addr((void *)addr)) - vunmap((void *)addr); -} -EXPORT_SYMBOL(iounmap); - -void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size) -{ - /* For normal memory we already have a cacheable mapping. */ - if (pfn_is_map_memory(__phys_to_pfn(phys_addr))) - return (void __iomem *)__phys_to_virt(phys_addr); + return false; - return __ioremap_caller(phys_addr, size, __pgprot(PROT_NORMAL), - __builtin_return_address(0)); + return true; } -EXPORT_SYMBOL(ioremap_cache); /* * Must be called after early_fixmap_init diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c index c12cd700598f..e969e68de005 100644 --- a/arch/arm64/mm/kasan_init.c +++ b/arch/arm64/mm/kasan_init.c @@ -236,7 +236,7 @@ static void __init kasan_init_shadow(void) */ memcpy(tmp_pg_dir, swapper_pg_dir, sizeof(tmp_pg_dir)); dsb(ishst); - cpu_replace_ttbr1(lm_alias(tmp_pg_dir)); + cpu_replace_ttbr1(lm_alias(tmp_pg_dir), idmap_pg_dir); clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END); @@ -280,7 +280,7 @@ static void __init kasan_init_shadow(void) PAGE_KERNEL_RO)); memset(kasan_early_shadow_page, KASAN_SHADOW_INIT, PAGE_SIZE); - cpu_replace_ttbr1(lm_alias(swapper_pg_dir)); + cpu_replace_ttbr1(lm_alias(swapper_pg_dir), idmap_pg_dir); } static void __init kasan_init_depth(void) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 626ec32873c6..db7c4e6ae57b 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -43,15 +43,27 @@ #define NO_CONT_MAPPINGS BIT(1) #define NO_EXEC_MAPPINGS BIT(2) /* assumes FEAT_HPDS is not used */ -u64 idmap_t0sz = TCR_T0SZ(VA_BITS_MIN); -u64 idmap_ptrs_per_pgd = PTRS_PER_PGD; +int idmap_t0sz __ro_after_init; -u64 __section(".mmuoff.data.write") vabits_actual; +#if VA_BITS > 48 +u64 vabits_actual __ro_after_init = VA_BITS_MIN; EXPORT_SYMBOL(vabits_actual); +#endif + +u64 kimage_vaddr __ro_after_init = (u64)&_text; +EXPORT_SYMBOL(kimage_vaddr); u64 kimage_voffset __ro_after_init; EXPORT_SYMBOL(kimage_voffset); +u32 __boot_cpu_mode[] = { BOOT_CPU_MODE_EL2, BOOT_CPU_MODE_EL1 }; + +/* + * The booting CPU updates the failed status @__early_cpu_boot_status, + * with MMU turned off. + */ +long __section(".mmuoff.data.write") __early_cpu_boot_status; + /* * Empty_zero_page is a special page that is used for zero-initialized data * and COW. @@ -388,6 +400,13 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys, } while (pgdp++, addr = next, addr != end); } +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 +extern __alias(__create_pgd_mapping) +void create_kpti_ng_temp_pgd(pgd_t *pgdir, phys_addr_t phys, unsigned long virt, + phys_addr_t size, pgprot_t prot, + phys_addr_t (*pgtable_alloc)(int), int flags); +#endif + static phys_addr_t __pgd_pgtable_alloc(int shift) { void *ptr = (void *)__get_free_page(GFP_PGTABLE_KERNEL); @@ -529,8 +548,7 @@ static void __init map_mem(pgd_t *pgdp) #ifdef CONFIG_KEXEC_CORE if (crash_mem_map) { - if (IS_ENABLED(CONFIG_ZONE_DMA) || - IS_ENABLED(CONFIG_ZONE_DMA32)) + if (defer_reserve_crashkernel()) flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; else if (crashk_res.end) memblock_mark_nomap(crashk_res.start, @@ -571,8 +589,7 @@ static void __init map_mem(pgd_t *pgdp) * through /sys/kernel/kexec_crash_size interface. */ #ifdef CONFIG_KEXEC_CORE - if (crash_mem_map && - !IS_ENABLED(CONFIG_ZONE_DMA) && !IS_ENABLED(CONFIG_ZONE_DMA32)) { + if (crash_mem_map && !defer_reserve_crashkernel()) { if (crashk_res.end) { __map_memblock(pgdp, crashk_res.start, crashk_res.end + 1, @@ -665,13 +682,9 @@ static int __init map_entry_trampoline(void) __set_fixmap(FIX_ENTRY_TRAMP_TEXT1 - i, pa_start + i * PAGE_SIZE, prot); - if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) { - extern char __entry_tramp_data_start[]; - - __set_fixmap(FIX_ENTRY_TRAMP_DATA, - __pa_symbol(__entry_tramp_data_start), - PAGE_KERNEL_RO); - } + if (IS_ENABLED(CONFIG_RELOCATABLE)) + __set_fixmap(FIX_ENTRY_TRAMP_TEXT1 - i, + pa_start + i * PAGE_SIZE, PAGE_KERNEL_RO); return 0; } @@ -762,22 +775,57 @@ static void __init map_kernel(pgd_t *pgdp) kasan_copy_shadow(pgdp); } +static void __init create_idmap(void) +{ + u64 start = __pa_symbol(__idmap_text_start); + u64 size = __pa_symbol(__idmap_text_end) - start; + pgd_t *pgd = idmap_pg_dir; + u64 pgd_phys; + + /* check if we need an additional level of translation */ + if (VA_BITS < 48 && idmap_t0sz < (64 - VA_BITS_MIN)) { + pgd_phys = early_pgtable_alloc(PAGE_SHIFT); + set_pgd(&idmap_pg_dir[start >> VA_BITS], + __pgd(pgd_phys | P4D_TYPE_TABLE)); + pgd = __va(pgd_phys); + } + __create_pgd_mapping(pgd, start, start, size, PAGE_KERNEL_ROX, + early_pgtable_alloc, 0); + + if (IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0)) { + extern u32 __idmap_kpti_flag; + u64 pa = __pa_symbol(&__idmap_kpti_flag); + + /* + * The KPTI G-to-nG conversion code needs a read-write mapping + * of its synchronization flag in the ID map. + */ + __create_pgd_mapping(pgd, pa, pa, sizeof(u32), PAGE_KERNEL, + early_pgtable_alloc, 0); + } +} + void __init paging_init(void) { pgd_t *pgdp = pgd_set_fixmap(__pa_symbol(swapper_pg_dir)); + extern pgd_t init_idmap_pg_dir[]; + + idmap_t0sz = 63UL - __fls(__pa_symbol(_end) | GENMASK(VA_BITS_MIN - 1, 0)); map_kernel(pgdp); map_mem(pgdp); pgd_clear_fixmap(); - cpu_replace_ttbr1(lm_alias(swapper_pg_dir)); + cpu_replace_ttbr1(lm_alias(swapper_pg_dir), init_idmap_pg_dir); init_mm.pgd = swapper_pg_dir; memblock_phys_free(__pa_symbol(init_pg_dir), __pa_symbol(init_pg_end) - __pa_symbol(init_pg_dir)); memblock_allow_resize(); + + create_idmap(); } /* diff --git a/arch/arm64/mm/mteswap.c b/arch/arm64/mm/mteswap.c index a9e50e930484..4334dec93bd4 100644 --- a/arch/arm64/mm/mteswap.c +++ b/arch/arm64/mm/mteswap.c @@ -53,15 +53,6 @@ bool mte_restore_tags(swp_entry_t entry, struct page *page) if (!tags) return false; - page_kasan_tag_reset(page); - /* - * We need smp_wmb() in between setting the flags and clearing the - * tags because if another thread reads page->flags and builds a - * tagged address out of it, there is an actual dependency to the - * memory access, but on the current thread we do not guarantee that - * the new page->flags are visible before the tags were updated. - */ - smp_wmb(); mte_restore_page_tags(page_address(page), tags); return true; diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 50bbed947bec..7837a69524c5 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -14,6 +14,7 @@ #include <asm/asm-offsets.h> #include <asm/asm_pointer_auth.h> #include <asm/hwcap.h> +#include <asm/kernel-pgtable.h> #include <asm/pgtable-hwdef.h> #include <asm/cpufeature.h> #include <asm/alternative.h> @@ -200,34 +201,64 @@ SYM_FUNC_END(idmap_cpu_replace_ttbr1) .popsection #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 + +#define KPTI_NG_PTE_FLAGS (PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS) + .pushsection ".idmap.text", "awx" - .macro __idmap_kpti_get_pgtable_ent, type - dc cvac, cur_\()\type\()p // Ensure any existing dirty - dmb sy // lines are written back before - ldr \type, [cur_\()\type\()p] // loading the entry - tbz \type, #0, skip_\()\type // Skip invalid and - tbnz \type, #11, skip_\()\type // non-global entries + .macro kpti_mk_tbl_ng, type, num_entries + add end_\type\()p, cur_\type\()p, #\num_entries * 8 +.Ldo_\type: + ldr \type, [cur_\type\()p] // Load the entry + tbz \type, #0, .Lnext_\type // Skip invalid and + tbnz \type, #11, .Lnext_\type // non-global entries + orr \type, \type, #PTE_NG // Same bit for blocks and pages + str \type, [cur_\type\()p] // Update the entry + .ifnc \type, pte + tbnz \type, #1, .Lderef_\type + .endif +.Lnext_\type: + add cur_\type\()p, cur_\type\()p, #8 + cmp cur_\type\()p, end_\type\()p + b.ne .Ldo_\type .endm - .macro __idmap_kpti_put_pgtable_ent_ng, type - orr \type, \type, #PTE_NG // Same bit for blocks and pages - str \type, [cur_\()\type\()p] // Update the entry and ensure - dmb sy // that it is visible to all - dc civac, cur_\()\type\()p // CPUs. + /* + * Dereference the current table entry and map it into the temporary + * fixmap slot associated with the current level. + */ + .macro kpti_map_pgtbl, type, level + str xzr, [temp_pte, #8 * (\level + 1)] // break before make + dsb nshst + add pte, temp_pte, #PAGE_SIZE * (\level + 1) + lsr pte, pte, #12 + tlbi vaae1, pte + dsb nsh + isb + + phys_to_pte pte, cur_\type\()p + add cur_\type\()p, temp_pte, #PAGE_SIZE * (\level + 1) + orr pte, pte, pte_flags + str pte, [temp_pte, #8 * (\level + 1)] + dsb nshst .endm /* - * void __kpti_install_ng_mappings(int cpu, int num_cpus, phys_addr_t swapper) + * void __kpti_install_ng_mappings(int cpu, int num_secondaries, phys_addr_t temp_pgd, + * unsigned long temp_pte_va) * * Called exactly once from stop_machine context by each CPU found during boot. */ -__idmap_kpti_flag: - .long 1 + .pushsection ".data", "aw", %progbits +SYM_DATA(__idmap_kpti_flag, .long 1) + .popsection + SYM_FUNC_START(idmap_kpti_install_ng_mappings) cpu .req w0 + temp_pte .req x0 num_cpus .req w1 - swapper_pa .req x2 + pte_flags .req x1 + temp_pgd_phys .req x2 swapper_ttb .req x3 flag_ptr .req x4 cur_pgdp .req x5 @@ -235,17 +266,16 @@ SYM_FUNC_START(idmap_kpti_install_ng_mappings) pgd .req x7 cur_pudp .req x8 end_pudp .req x9 - pud .req x10 cur_pmdp .req x11 end_pmdp .req x12 - pmd .req x13 cur_ptep .req x14 end_ptep .req x15 pte .req x16 + valid .req x17 + mov x5, x3 // preserve temp_pte arg mrs swapper_ttb, ttbr1_el1 - restore_ttbr1 swapper_ttb - adr flag_ptr, __idmap_kpti_flag + adr_l flag_ptr, __idmap_kpti_flag cbnz cpu, __idmap_kpti_secondary @@ -256,98 +286,71 @@ SYM_FUNC_START(idmap_kpti_install_ng_mappings) eor w17, w17, num_cpus cbnz w17, 1b - /* We need to walk swapper, so turn off the MMU. */ - pre_disable_mmu_workaround - mrs x17, sctlr_el1 - bic x17, x17, #SCTLR_ELx_M - msr sctlr_el1, x17 + /* Switch to the temporary page tables on this CPU only */ + __idmap_cpu_set_reserved_ttbr1 x8, x9 + offset_ttbr1 temp_pgd_phys, x8 + msr ttbr1_el1, temp_pgd_phys isb + mov temp_pte, x5 + mov pte_flags, #KPTI_NG_PTE_FLAGS + /* Everybody is enjoying the idmap, so we can rewrite swapper. */ /* PGD */ - mov cur_pgdp, swapper_pa - add end_pgdp, cur_pgdp, #(PTRS_PER_PGD * 8) -do_pgd: __idmap_kpti_get_pgtable_ent pgd - tbnz pgd, #1, walk_puds -next_pgd: - __idmap_kpti_put_pgtable_ent_ng pgd -skip_pgd: - add cur_pgdp, cur_pgdp, #8 - cmp cur_pgdp, end_pgdp - b.ne do_pgd - - /* Publish the updated tables and nuke all the TLBs */ - dsb sy - tlbi vmalle1is - dsb ish - isb + adrp cur_pgdp, swapper_pg_dir + kpti_map_pgtbl pgd, 0 + kpti_mk_tbl_ng pgd, PTRS_PER_PGD - /* We're done: fire up the MMU again */ - mrs x17, sctlr_el1 - orr x17, x17, #SCTLR_ELx_M - set_sctlr_el1 x17 + /* Ensure all the updated entries are visible to secondary CPUs */ + dsb ishst + + /* We're done: fire up swapper_pg_dir again */ + __idmap_cpu_set_reserved_ttbr1 x8, x9 + msr ttbr1_el1, swapper_ttb + isb /* Set the flag to zero to indicate that we're all done */ str wzr, [flag_ptr] ret +.Lderef_pgd: /* PUD */ -walk_puds: - .if CONFIG_PGTABLE_LEVELS > 3 + .if CONFIG_PGTABLE_LEVELS > 3 + pud .req x10 pte_to_phys cur_pudp, pgd - add end_pudp, cur_pudp, #(PTRS_PER_PUD * 8) -do_pud: __idmap_kpti_get_pgtable_ent pud - tbnz pud, #1, walk_pmds -next_pud: - __idmap_kpti_put_pgtable_ent_ng pud -skip_pud: - add cur_pudp, cur_pudp, 8 - cmp cur_pudp, end_pudp - b.ne do_pud - b next_pgd - .else /* CONFIG_PGTABLE_LEVELS <= 3 */ - mov pud, pgd - b walk_pmds -next_pud: - b next_pgd + kpti_map_pgtbl pud, 1 + kpti_mk_tbl_ng pud, PTRS_PER_PUD + b .Lnext_pgd + .else /* CONFIG_PGTABLE_LEVELS <= 3 */ + pud .req pgd + .set .Lnext_pud, .Lnext_pgd .endif +.Lderef_pud: /* PMD */ -walk_pmds: - .if CONFIG_PGTABLE_LEVELS > 2 + .if CONFIG_PGTABLE_LEVELS > 2 + pmd .req x13 pte_to_phys cur_pmdp, pud - add end_pmdp, cur_pmdp, #(PTRS_PER_PMD * 8) -do_pmd: __idmap_kpti_get_pgtable_ent pmd - tbnz pmd, #1, walk_ptes -next_pmd: - __idmap_kpti_put_pgtable_ent_ng pmd -skip_pmd: - add cur_pmdp, cur_pmdp, #8 - cmp cur_pmdp, end_pmdp - b.ne do_pmd - b next_pud - .else /* CONFIG_PGTABLE_LEVELS <= 2 */ - mov pmd, pud - b walk_ptes -next_pmd: - b next_pud + kpti_map_pgtbl pmd, 2 + kpti_mk_tbl_ng pmd, PTRS_PER_PMD + b .Lnext_pud + .else /* CONFIG_PGTABLE_LEVELS <= 2 */ + pmd .req pgd + .set .Lnext_pmd, .Lnext_pgd .endif +.Lderef_pmd: /* PTE */ -walk_ptes: pte_to_phys cur_ptep, pmd - add end_ptep, cur_ptep, #(PTRS_PER_PTE * 8) -do_pte: __idmap_kpti_get_pgtable_ent pte - __idmap_kpti_put_pgtable_ent_ng pte -skip_pte: - add cur_ptep, cur_ptep, #8 - cmp cur_ptep, end_ptep - b.ne do_pte - b next_pmd + kpti_map_pgtbl pte, 3 + kpti_mk_tbl_ng pte, PTRS_PER_PTE + b .Lnext_pmd .unreq cpu + .unreq temp_pte .unreq num_cpus - .unreq swapper_pa + .unreq pte_flags + .unreq temp_pgd_phys .unreq cur_pgdp .unreq end_pgdp .unreq pgd @@ -360,6 +363,7 @@ skip_pte: .unreq cur_ptep .unreq end_ptep .unreq pte + .unreq valid /* Secondary CPUs end up here */ __idmap_kpti_secondary: @@ -379,7 +383,6 @@ __idmap_kpti_secondary: cbnz w16, 1b /* All done, act like nothing happened */ - offset_ttbr1 swapper_ttb, x16 msr ttbr1_el1, swapper_ttb isb ret @@ -395,6 +398,8 @@ SYM_FUNC_END(idmap_kpti_install_ng_mappings) * * Initialise the processor for turning the MMU on. * + * Input: + * x0 - actual number of VA bits (ignored unless VA_BITS > 48) * Output: * Return in x0 the value of the SCTLR_EL1 register. */ @@ -464,12 +469,11 @@ SYM_FUNC_START(__cpu_setup) tcr_clear_errata_bits tcr, x9, x5 #ifdef CONFIG_ARM64_VA_BITS_52 - ldr_l x9, vabits_actual - sub x9, xzr, x9 + sub x9, xzr, x0 add x9, x9, #64 tcr_set_t1sz tcr, x9 #else - ldr_l x9, idmap_t0sz + idmap_get_t0sz x9 #endif tcr_set_t0sz tcr, x9 diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index 507b20373953..779653771507 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -36,6 +36,7 @@ HAS_RNG HAS_SB HAS_STAGE2_FWB HAS_SYSREG_GIC_CPUIF +HAS_TIDCP1 HAS_TLB_RANGE HAS_VIRT_HOST_EXTN HAS_WFXT @@ -61,6 +62,7 @@ WORKAROUND_1418040 WORKAROUND_1463225 WORKAROUND_1508412 WORKAROUND_1542419 +WORKAROUND_1742098 WORKAROUND_1902691 WORKAROUND_2038923 WORKAROUND_2064142 diff --git a/arch/arm64/tools/gen-sysreg.awk b/arch/arm64/tools/gen-sysreg.awk index 5c55509eb43f..db461921d256 100755 --- a/arch/arm64/tools/gen-sysreg.awk +++ b/arch/arm64/tools/gen-sysreg.awk @@ -88,7 +88,7 @@ END { # skip blank lines and comment lines /^$/ { next } -/^#/ { next } +/^[\t ]*#/ { next } /^SysregFields/ { change_block("SysregFields", "None", "SysregFields") diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index ff5e552f7420..9ae483ec1e56 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -46,6 +46,89 @@ # feature that introduces them (eg, FEAT_LS64_ACCDATA introduces enumeration # item ACCDATA) though it may be more taseful to do something else. +Sysreg ID_AA64ZFR0_EL1 3 0 0 4 4 +Res0 63:60 +Enum 59:56 F64MM + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 55:52 F32MM + 0b0000 NI + 0b0001 IMP +EndEnum +Res0 51:48 +Enum 47:44 I8MM + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 43:40 SM4 + 0b0000 NI + 0b0001 IMP +EndEnum +Res0 39:36 +Enum 35:32 SHA3 + 0b0000 NI + 0b0001 IMP +EndEnum +Res0 31:24 +Enum 23:20 BF16 + 0b0000 NI + 0b0001 IMP + 0b0010 EBF16 +EndEnum +Enum 19:16 BitPerm + 0b0000 NI + 0b0001 IMP +EndEnum +Res0 15:8 +Enum 7:4 AES + 0b0000 NI + 0b0001 IMP + 0b0010 PMULL128 +EndEnum +Enum 3:0 SVEver + 0b0000 IMP + 0b0001 SVE2 +EndEnum +EndSysreg + +Sysreg ID_AA64SMFR0_EL1 3 0 0 4 5 +Enum 63 FA64 + 0b0 NI + 0b1 IMP +EndEnum +Res0 62:60 +Field 59:56 SMEver +Enum 55:52 I16I64 + 0b0000 NI + 0b1111 IMP +EndEnum +Res0 51:49 +Enum 48 F64F64 + 0b0 NI + 0b1 IMP +EndEnum +Res0 47:40 +Enum 39:36 I8I32 + 0b0000 NI + 0b1111 IMP +EndEnum +Enum 35 F16F32 + 0b0 NI + 0b1 IMP +EndEnum +Enum 34 B16F32 + 0b0 NI + 0b1 IMP +EndEnum +Res0 33 +Enum 32 F32F32 + 0b0 NI + 0b1 IMP +EndEnum +Res0 31:0 +EndSysreg + Sysreg ID_AA64ISAR0_EL1 3 0 0 6 0 Enum 63:60 RNDR 0b0000 NI @@ -114,6 +197,122 @@ EndEnum Res0 3:0 EndSysreg +Sysreg ID_AA64ISAR1_EL1 3 0 0 6 1 +Enum 63:60 LS64 + 0b0000 NI + 0b0001 LS64 + 0b0010 LS64_V + 0b0011 LS64_ACCDATA +EndEnum +Enum 59:56 XS + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 55:52 I8MM + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 51:48 DGH + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 47:44 BF16 + 0b0000 NI + 0b0001 IMP + 0b0010 EBF16 +EndEnum +Enum 43:40 SPECRES + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 39:36 SB + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 35:32 FRINTTS + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 31:28 GPI + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 27:24 GPA + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 23:20 LRCPC + 0b0000 NI + 0b0001 IMP + 0b0010 LRCPC2 +EndEnum +Enum 19:16 FCMA + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 15:12 JSCVT + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 11:8 API + 0b0000 NI + 0b0001 PAuth + 0b0010 EPAC + 0b0011 PAuth2 + 0b0100 FPAC + 0b0101 FPACCOMBINE +EndEnum +Enum 7:4 APA + 0b0000 NI + 0b0001 PAuth + 0b0010 EPAC + 0b0011 PAuth2 + 0b0100 FPAC + 0b0101 FPACCOMBINE +EndEnum +Enum 3:0 DPB + 0b0000 NI + 0b0001 IMP + 0b0010 DPB2 +EndEnum +EndSysreg + +Sysreg ID_AA64ISAR2_EL1 3 0 0 6 2 +Res0 63:28 +Enum 27:24 PAC_frac + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 23:20 BC + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 19:16 MOPS + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 15:12 APA3 + 0b0000 NI + 0b0001 PAuth + 0b0010 EPAC + 0b0011 PAuth2 + 0b0100 FPAC + 0b0101 FPACCOMBINE +EndEnum +Enum 11:8 GPA3 + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 7:4 RPRES + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 3:0 WFxT + 0b0000 NI + 0b0010 IMP +EndEnum +EndSysreg + Sysreg SCTLR_EL1 3 0 1 0 0 Field 63 TIDCP Field 62 SPINMASK @@ -257,6 +456,11 @@ Field 5:3 Ctype2 Field 2:0 Ctype1 EndSysreg +Sysreg GMID_EL1 3 1 0 0 4 +Res0 63:4 +Field 3:0 BS +EndSysreg + Sysreg SMIDR_EL1 3 1 0 0 6 Res0 63:32 Field 31:24 IMPLEMENTER @@ -273,6 +477,33 @@ Field 3:1 Level Field 0 InD EndSysreg +Sysreg CTR_EL0 3 3 0 0 1 +Res0 63:38 +Field 37:32 TminLine +Res1 31 +Res0 30 +Field 29 DIC +Field 28 IDC +Field 27:24 CWG +Field 23:20 ERG +Field 19:16 DminLine +Enum 15:14 L1Ip + 0b00 VPIPT + # This is named as AIVIVT in the ARM but documented as reserved + 0b01 RESERVED + 0b10 VIPT + 0b11 PIPT +EndEnum +Res0 13:4 +Field 3:0 IminLine +EndSysreg + +Sysreg DCZID_EL0 3 3 0 0 7 +Res0 63:5 +Field 4 DZP +Field 3:0 BS +EndSysreg + Sysreg SVCR 3 3 4 2 2 Res0 63:2 Field 1 ZA @@ -367,3 +598,36 @@ EndSysreg Sysreg TTBR1_EL1 3 0 2 0 1 Fields TTBRx_EL1 EndSysreg + +Sysreg LORSA_EL1 3 0 10 4 0 +Res0 63:52 +Field 51:16 SA +Res0 15:1 +Field 0 Valid +EndSysreg + +Sysreg LOREA_EL1 3 0 10 4 1 +Res0 63:52 +Field 51:48 EA_51_48 +Field 47:16 EA_47_16 +Res0 15:0 +EndSysreg + +Sysreg LORN_EL1 3 0 10 4 2 +Res0 63:8 +Field 7:0 Num +EndSysreg + +Sysreg LORC_EL1 3 0 10 4 3 +Res0 63:10 +Field 9:2 DS +Res0 1 +Field 0 EN +EndSysreg + +Sysreg LORID_EL1 3 0 10 4 7 +Res0 63:24 +Field 23:16 LD +Res0 15:8 +Field 7:0 LR +EndSysreg |