diff options
Diffstat (limited to 'arch/arm64')
61 files changed, 1219 insertions, 378 deletions
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 1b1a0e95c751..c03cd0d765d3 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -75,6 +75,7 @@ config ARM64 select CLONE_BACKWARDS select COMMON_CLK select CPU_PM if (SUSPEND || CPU_IDLE) + select CRC32 select DCACHE_WORD_ACCESS select DMA_DIRECT_OPS select EDAC_SUPPORT @@ -104,6 +105,7 @@ config ARM64 select HAVE_ARCH_BITREVERSE select HAVE_ARCH_HUGE_VMAP select HAVE_ARCH_JUMP_LABEL + select HAVE_ARCH_JUMP_LABEL_RELATIVE select HAVE_ARCH_KASAN if !(ARM64_16K_PAGES && ARM64_VA_BITS_48) select HAVE_ARCH_KGDB select HAVE_ARCH_MMAP_RND_BITS @@ -142,6 +144,7 @@ config ARM64 select HAVE_PERF_USER_STACK_DUMP select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RCU_TABLE_FREE + select HAVE_RCU_TABLE_INVALIDATE select HAVE_RSEQ select HAVE_STACKPROTECTOR select HAVE_SYSCALL_TRACEPOINTS @@ -479,6 +482,19 @@ config ARM64_ERRATUM_1024718 If unsure, say Y. +config ARM64_ERRATUM_1188873 + bool "Cortex-A76: MRC read following MRRC read of specific Generic Timer in AArch32 might give incorrect result" + default y + select ARM_ARCH_TIMER_OOL_WORKAROUND + help + This option adds work arounds for ARM Cortex-A76 erratum 1188873 + + Affected Cortex-A76 cores (r0p0, r1p0, r2p0) could cause + register corruption when accessing the timer registers from + AArch32 userspace. + + If unsure, say Y. + config CAVIUM_ERRATUM_22375 bool "Cavium erratum 22375, 24313" default y @@ -769,9 +785,6 @@ source kernel/Kconfig.hz config ARCH_SUPPORTS_DEBUG_PAGEALLOC def_bool y -config ARCH_HAS_HOLES_MEMORYMODEL - def_bool y if SPARSEMEM - config ARCH_SPARSEMEM_ENABLE def_bool y select SPARSEMEM_VMEMMAP_ENABLE @@ -786,7 +799,7 @@ config ARCH_FLATMEM_ENABLE def_bool !NUMA config HAVE_ARCH_PFN_VALID - def_bool ARCH_HAS_HOLES_MEMORYMODEL || !SPARSEMEM + def_bool y config HW_PERF_EVENTS def_bool y @@ -1132,6 +1145,20 @@ config ARM64_RAS_EXTN and access the new registers if the system supports the extension. Platform RAS features may additionally depend on firmware support. +config ARM64_CNP + bool "Enable support for Common Not Private (CNP) translations" + default y + depends on ARM64_PAN || !ARM64_SW_TTBR0_PAN + help + Common Not Private (CNP) allows translation table entries to + be shared between different PEs in the same inner shareable + domain, so the hardware can use this fact to optimise the + caching of such entries in the TLB. + + Selecting this option allows the CNP feature to be detected + at runtime, and does not affect PEs that do not implement + this feature. + endmenu config ARM64_SVE diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 0bcc98dbba56..6142402c2eb4 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -286,12 +286,11 @@ alternative_endif ldr \rd, [\rn, #MM_CONTEXT_ID] .endm /* - * read_ctr - read CTR_EL0. If the system has mismatched - * cache line sizes, provide the system wide safe value - * from arm64_ftr_reg_ctrel0.sys_val + * read_ctr - read CTR_EL0. If the system has mismatched register fields, + * provide the system wide safe value from arm64_ftr_reg_ctrel0.sys_val */ .macro read_ctr, reg -alternative_if_not ARM64_MISMATCHED_CACHE_LINE_SIZE +alternative_if_not ARM64_MISMATCHED_CACHE_TYPE mrs \reg, ctr_el0 // read CTR nop alternative_else diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h index 5ee5bca8c24b..13dd42c3ad4e 100644 --- a/arch/arm64/include/asm/cache.h +++ b/arch/arm64/include/asm/cache.h @@ -40,6 +40,15 @@ #define L1_CACHE_SHIFT (6) #define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) + +#define CLIDR_LOUU_SHIFT 27 +#define CLIDR_LOC_SHIFT 24 +#define CLIDR_LOUIS_SHIFT 21 + +#define CLIDR_LOUU(clidr) (((clidr) >> CLIDR_LOUU_SHIFT) & 0x7) +#define CLIDR_LOC(clidr) (((clidr) >> CLIDR_LOC_SHIFT) & 0x7) +#define CLIDR_LOUIS(clidr) (((clidr) >> CLIDR_LOUIS_SHIFT) & 0x7) + /* * Memory returned by kmalloc() may be used for DMA, so we must make * sure that all such allocations are cache aligned. Otherwise, @@ -84,6 +93,37 @@ static inline int cache_line_size(void) return cwg ? 4 << cwg : ARCH_DMA_MINALIGN; } +/* + * Read the effective value of CTR_EL0. + * + * According to ARM ARM for ARMv8-A (ARM DDI 0487C.a), + * section D10.2.33 "CTR_EL0, Cache Type Register" : + * + * CTR_EL0.IDC reports the data cache clean requirements for + * instruction to data coherence. + * + * 0 - dcache clean to PoU is required unless : + * (CLIDR_EL1.LoC == 0) || (CLIDR_EL1.LoUIS == 0 && CLIDR_EL1.LoUU == 0) + * 1 - dcache clean to PoU is not required for i-to-d coherence. + * + * This routine provides the CTR_EL0 with the IDC field updated to the + * effective state. + */ +static inline u32 __attribute_const__ read_cpuid_effective_cachetype(void) +{ + u32 ctr = read_cpuid_cachetype(); + + if (!(ctr & BIT(CTR_IDC_SHIFT))) { + u64 clidr = read_sysreg(clidr_el1); + + if (CLIDR_LOC(clidr) == 0 || + (CLIDR_LOUIS(clidr) == 0 && CLIDR_LOUU(clidr) == 0)) + ctr |= BIT(CTR_IDC_SHIFT); + } + + return ctr; +} + #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h index 1a037b94eba1..cee28a05ee98 100644 --- a/arch/arm64/include/asm/compat.h +++ b/arch/arm64/include/asm/compat.h @@ -159,6 +159,7 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr) } #define compat_user_stack_pointer() (user_stack_pointer(task_pt_regs(current))) +#define COMPAT_MINSIGSTKSZ 2048 static inline void __user *arch_compat_alloc_user_space(long len) { diff --git a/arch/arm64/include/asm/compiler.h b/arch/arm64/include/asm/compiler.h deleted file mode 100644 index ee35fd0f2236..000000000000 --- a/arch/arm64/include/asm/compiler.h +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Based on arch/arm/include/asm/compiler.h - * - * Copyright (C) 2012 ARM Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ -#ifndef __ASM_COMPILER_H -#define __ASM_COMPILER_H - -/* - * This is used to ensure the compiler did actually allocate the register we - * asked it for some inline assembly sequences. Apparently we can't trust the - * compiler from one version to another so a bit of paranoia won't hurt. This - * string is meant to be concatenated with the inline asm string and will - * cause compilation to stop on mismatch. (for details, see gcc PR 15089) - */ -#define __asmeq(x, y) ".ifnc " x "," y " ; .err ; .endif\n\t" - -#endif /* __ASM_COMPILER_H */ diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index ae1f70450fb2..6e2d254c09eb 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -33,7 +33,7 @@ #define ARM64_WORKAROUND_CAVIUM_27456 12 #define ARM64_HAS_32BIT_EL0 13 #define ARM64_HARDEN_EL2_VECTORS 14 -#define ARM64_MISMATCHED_CACHE_LINE_SIZE 15 +#define ARM64_HAS_CNP 15 #define ARM64_HAS_NO_FPSIMD 16 #define ARM64_WORKAROUND_REPEAT_TLBI 17 #define ARM64_WORKAROUND_QCOM_FALKOR_E1003 18 @@ -51,7 +51,10 @@ #define ARM64_SSBD 30 #define ARM64_MISMATCHED_CACHE_TYPE 31 #define ARM64_HAS_STAGE2_FWB 32 +#define ARM64_HAS_CRC32 33 +#define ARM64_SSBS 34 +#define ARM64_WORKAROUND_1188873 35 -#define ARM64_NCAPS 33 +#define ARM64_NCAPS 36 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 1717ba1db35d..6db48d90ad63 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -262,7 +262,7 @@ extern struct arm64_ftr_reg arm64_ftr_reg_ctrel0; /* * CPU feature detected at boot time based on system-wide value of a * feature. It is safe for a late CPU to have this feature even though - * the system hasn't enabled it, although the featuer will not be used + * the system hasn't enabled it, although the feature will not be used * by Linux in this case. If the system has enabled this feature already, * then every late CPU must have it. */ @@ -508,6 +508,12 @@ static inline bool system_supports_sve(void) cpus_have_const_cap(ARM64_SVE); } +static inline bool system_supports_cnp(void) +{ + return IS_ENABLED(CONFIG_ARM64_CNP) && + cpus_have_const_cap(ARM64_HAS_CNP); +} + #define ARM64_SSBD_UNKNOWN -1 #define ARM64_SSBD_FORCE_DISABLE 0 #define ARM64_SSBD_KERNEL 1 @@ -530,6 +536,7 @@ void arm64_set_ssbd_mitigation(bool state); static inline void arm64_set_ssbd_mitigation(bool state) {} #endif +extern int do_emulate_mrs(struct pt_regs *regs, u32 sys_reg, u32 rt); #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index ea690b3562af..12f93e4d2452 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -86,6 +86,7 @@ #define ARM_CPU_PART_CORTEX_A75 0xD0A #define ARM_CPU_PART_CORTEX_A35 0xD04 #define ARM_CPU_PART_CORTEX_A55 0xD05 +#define ARM_CPU_PART_CORTEX_A76 0xD0B #define APM_CPU_PART_POTENZA 0x000 @@ -110,6 +111,7 @@ #define MIDR_CORTEX_A75 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A75) #define MIDR_CORTEX_A35 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A35) #define MIDR_CORTEX_A55 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A55) +#define MIDR_CORTEX_A76 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A76) #define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX) #define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX) diff --git a/arch/arm64/include/asm/daifflags.h b/arch/arm64/include/asm/daifflags.h index 22e4c83de5a5..8d91f2233135 100644 --- a/arch/arm64/include/asm/daifflags.h +++ b/arch/arm64/include/asm/daifflags.h @@ -36,11 +36,8 @@ static inline unsigned long local_daif_save(void) { unsigned long flags; - asm volatile( - "mrs %0, daif // local_daif_save\n" - : "=r" (flags) - : - : "memory"); + flags = arch_local_save_flags(); + local_daif_mask(); return flags; @@ -60,11 +57,9 @@ static inline void local_daif_restore(unsigned long flags) { if (!arch_irqs_disabled_flags(flags)) trace_hardirqs_on(); - asm volatile( - "msr daif, %0 // local_daif_restore" - : - : "r" (flags) - : "memory"); + + arch_local_irq_restore(flags); + if (arch_irqs_disabled_flags(flags)) trace_hardirqs_off(); } diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index ce70c3ffb993..676de2ec1762 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -137,6 +137,8 @@ #define ESR_ELx_CV (UL(1) << 24) #define ESR_ELx_COND_SHIFT (20) #define ESR_ELx_COND_MASK (UL(0xF) << ESR_ELx_COND_SHIFT) +#define ESR_ELx_WFx_ISS_TI (UL(1) << 0) +#define ESR_ELx_WFx_ISS_WFI (UL(0) << 0) #define ESR_ELx_WFx_ISS_WFE (UL(1) << 0) #define ESR_ELx_xVC_IMM_MASK ((1UL << 16) - 1) @@ -148,6 +150,9 @@ #define DISR_EL1_ESR_MASK (ESR_ELx_AET | ESR_ELx_EA | ESR_ELx_FSC) /* ESR value templates for specific events */ +#define ESR_ELx_WFx_MASK (ESR_ELx_EC_MASK | ESR_ELx_WFx_ISS_TI) +#define ESR_ELx_WFx_WFI_VAL ((ESR_ELx_EC_WFx << ESR_ELx_EC_SHIFT) | \ + ESR_ELx_WFx_ISS_WFI) /* BRK instruction trap from AArch64 state */ #define ESR_ELx_VAL_BRK64(imm) \ @@ -187,6 +192,8 @@ #define ESR_ELx_SYS64_ISS_SYS_OP_MASK (ESR_ELx_SYS64_ISS_SYS_MASK | \ ESR_ELx_SYS64_ISS_DIR_MASK) +#define ESR_ELx_SYS64_ISS_RT(esr) \ + (((esr) & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT) /* * User space cache operations have the following sysreg encoding * in System instructions. @@ -206,6 +213,18 @@ #define ESR_ELx_SYS64_ISS_EL0_CACHE_OP_VAL \ (ESR_ELx_SYS64_ISS_SYS_VAL(1, 3, 1, 7, 0) | \ ESR_ELx_SYS64_ISS_DIR_WRITE) +/* + * User space MRS operations which are supported for emulation + * have the following sysreg encoding in System instructions. + * op0 = 3, op1= 0, crn = 0, {crm = 0, 4-7}, READ (L = 1) + */ +#define ESR_ELx_SYS64_ISS_SYS_MRS_OP_MASK (ESR_ELx_SYS64_ISS_OP0_MASK | \ + ESR_ELx_SYS64_ISS_OP1_MASK | \ + ESR_ELx_SYS64_ISS_CRN_MASK | \ + ESR_ELx_SYS64_ISS_DIR_MASK) +#define ESR_ELx_SYS64_ISS_SYS_MRS_OP_VAL \ + (ESR_ELx_SYS64_ISS_SYS_VAL(3, 0, 0, 0, 0) | \ + ESR_ELx_SYS64_ISS_DIR_READ) #define ESR_ELx_SYS64_ISS_SYS_CTR ESR_ELx_SYS64_ISS_SYS_VAL(3, 3, 1, 0, 0) #define ESR_ELx_SYS64_ISS_SYS_CTR_READ (ESR_ELx_SYS64_ISS_SYS_CTR | \ @@ -249,6 +268,64 @@ #define ESR_ELx_FP_EXC_TFV (UL(1) << 23) +/* + * ISS field definitions for CP15 accesses + */ +#define ESR_ELx_CP15_32_ISS_DIR_MASK 0x1 +#define ESR_ELx_CP15_32_ISS_DIR_READ 0x1 +#define ESR_ELx_CP15_32_ISS_DIR_WRITE 0x0 + +#define ESR_ELx_CP15_32_ISS_RT_SHIFT 5 +#define ESR_ELx_CP15_32_ISS_RT_MASK (UL(0x1f) << ESR_ELx_CP15_32_ISS_RT_SHIFT) +#define ESR_ELx_CP15_32_ISS_CRM_SHIFT 1 +#define ESR_ELx_CP15_32_ISS_CRM_MASK (UL(0xf) << ESR_ELx_CP15_32_ISS_CRM_SHIFT) +#define ESR_ELx_CP15_32_ISS_CRN_SHIFT 10 +#define ESR_ELx_CP15_32_ISS_CRN_MASK (UL(0xf) << ESR_ELx_CP15_32_ISS_CRN_SHIFT) +#define ESR_ELx_CP15_32_ISS_OP1_SHIFT 14 +#define ESR_ELx_CP15_32_ISS_OP1_MASK (UL(0x7) << ESR_ELx_CP15_32_ISS_OP1_SHIFT) +#define ESR_ELx_CP15_32_ISS_OP2_SHIFT 17 +#define ESR_ELx_CP15_32_ISS_OP2_MASK (UL(0x7) << ESR_ELx_CP15_32_ISS_OP2_SHIFT) + +#define ESR_ELx_CP15_32_ISS_SYS_MASK (ESR_ELx_CP15_32_ISS_OP1_MASK | \ + ESR_ELx_CP15_32_ISS_OP2_MASK | \ + ESR_ELx_CP15_32_ISS_CRN_MASK | \ + ESR_ELx_CP15_32_ISS_CRM_MASK | \ + ESR_ELx_CP15_32_ISS_DIR_MASK) +#define ESR_ELx_CP15_32_ISS_SYS_VAL(op1, op2, crn, crm) \ + (((op1) << ESR_ELx_CP15_32_ISS_OP1_SHIFT) | \ + ((op2) << ESR_ELx_CP15_32_ISS_OP2_SHIFT) | \ + ((crn) << ESR_ELx_CP15_32_ISS_CRN_SHIFT) | \ + ((crm) << ESR_ELx_CP15_32_ISS_CRM_SHIFT)) + +#define ESR_ELx_CP15_64_ISS_DIR_MASK 0x1 +#define ESR_ELx_CP15_64_ISS_DIR_READ 0x1 +#define ESR_ELx_CP15_64_ISS_DIR_WRITE 0x0 + +#define ESR_ELx_CP15_64_ISS_RT_SHIFT 5 +#define ESR_ELx_CP15_64_ISS_RT_MASK (UL(0x1f) << ESR_ELx_CP15_64_ISS_RT_SHIFT) + +#define ESR_ELx_CP15_64_ISS_RT2_SHIFT 10 +#define ESR_ELx_CP15_64_ISS_RT2_MASK (UL(0x1f) << ESR_ELx_CP15_64_ISS_RT2_SHIFT) + +#define ESR_ELx_CP15_64_ISS_OP1_SHIFT 16 +#define ESR_ELx_CP15_64_ISS_OP1_MASK (UL(0xf) << ESR_ELx_CP15_64_ISS_OP1_SHIFT) +#define ESR_ELx_CP15_64_ISS_CRM_SHIFT 1 +#define ESR_ELx_CP15_64_ISS_CRM_MASK (UL(0xf) << ESR_ELx_CP15_64_ISS_CRM_SHIFT) + +#define ESR_ELx_CP15_64_ISS_SYS_VAL(op1, crm) \ + (((op1) << ESR_ELx_CP15_64_ISS_OP1_SHIFT) | \ + ((crm) << ESR_ELx_CP15_64_ISS_CRM_SHIFT)) + +#define ESR_ELx_CP15_64_ISS_SYS_MASK (ESR_ELx_CP15_64_ISS_OP1_MASK | \ + ESR_ELx_CP15_64_ISS_CRM_MASK | \ + ESR_ELx_CP15_64_ISS_DIR_MASK) + +#define ESR_ELx_CP15_64_ISS_SYS_CNTVCT (ESR_ELx_CP15_64_ISS_SYS_VAL(1, 14) | \ + ESR_ELx_CP15_64_ISS_DIR_READ) + +#define ESR_ELx_CP15_32_ISS_SYS_CNTFRQ (ESR_ELx_CP15_32_ISS_SYS_VAL(0, 0, 14, 0) |\ + ESR_ELx_CP15_32_ISS_DIR_READ) + #ifndef __ASSEMBLY__ #include <asm/types.h> diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h index 35b2e50f17fb..9f8b915af3a7 100644 --- a/arch/arm64/include/asm/io.h +++ b/arch/arm64/include/asm/io.h @@ -31,8 +31,6 @@ #include <asm/alternative.h> #include <asm/cpufeature.h> -#include <xen/xen.h> - /* * Generic IO read/write. These perform native-endian accesses. */ @@ -205,12 +203,5 @@ extern int valid_mmap_phys_addr_range(unsigned long pfn, size_t size); extern int devmem_is_allowed(unsigned long pfn); -struct bio_vec; -extern bool xen_biovec_phys_mergeable(const struct bio_vec *vec1, - const struct bio_vec *vec2); -#define BIOVEC_PHYS_MERGEABLE(vec1, vec2) \ - (__BIOVEC_PHYS_MERGEABLE(vec1, vec2) && \ - (!xen_domain() || xen_biovec_phys_mergeable(vec1, vec2))) - #endif /* __KERNEL__ */ #endif /* __ASM_IO_H */ diff --git a/arch/arm64/include/asm/jump_label.h b/arch/arm64/include/asm/jump_label.h index 7e2b3e360086..472023498d71 100644 --- a/arch/arm64/include/asm/jump_label.h +++ b/arch/arm64/include/asm/jump_label.h @@ -26,13 +26,16 @@ #define JUMP_LABEL_NOP_SIZE AARCH64_INSN_SIZE -static __always_inline bool arch_static_branch(struct static_key *key, bool branch) +static __always_inline bool arch_static_branch(struct static_key *key, + bool branch) { - asm_volatile_goto("1: nop\n\t" - ".pushsection __jump_table, \"aw\"\n\t" - ".align 3\n\t" - ".quad 1b, %l[l_yes], %c0\n\t" - ".popsection\n\t" + asm_volatile_goto( + "1: nop \n\t" + " .pushsection __jump_table, \"aw\" \n\t" + " .align 3 \n\t" + " .long 1b - ., %l[l_yes] - . \n\t" + " .quad %c0 - . \n\t" + " .popsection \n\t" : : "i"(&((char *)key)[branch]) : : l_yes); return false; @@ -40,13 +43,16 @@ l_yes: return true; } -static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) +static __always_inline bool arch_static_branch_jump(struct static_key *key, + bool branch) { - asm_volatile_goto("1: b %l[l_yes]\n\t" - ".pushsection __jump_table, \"aw\"\n\t" - ".align 3\n\t" - ".quad 1b, %l[l_yes], %c0\n\t" - ".popsection\n\t" + asm_volatile_goto( + "1: b %l[l_yes] \n\t" + " .pushsection __jump_table, \"aw\" \n\t" + " .align 3 \n\t" + " .long 1b - ., %l[l_yes] - . \n\t" + " .quad %c0 - . \n\t" + " .popsection \n\t" : : "i"(&((char *)key)[branch]) : : l_yes); return false; @@ -54,13 +60,5 @@ l_yes: return true; } -typedef u64 jump_label_t; - -struct jump_entry { - jump_label_t code; - jump_label_t target; - jump_label_t key; -}; - #endif /* __ASSEMBLY__ */ #endif /* __ASM_JUMP_LABEL_H */ diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h index a780f6714b44..850e2122d53f 100644 --- a/arch/arm64/include/asm/kernel-pgtable.h +++ b/arch/arm64/include/asm/kernel-pgtable.h @@ -97,7 +97,7 @@ + EARLY_PGDS((vstart), (vend)) /* each PGDIR needs a next level page table */ \ + EARLY_PUDS((vstart), (vend)) /* each PUD needs a next level page table */ \ + EARLY_PMDS((vstart), (vend))) /* each PMD needs a next level page table */ -#define SWAPPER_DIR_SIZE (PAGE_SIZE * EARLY_PAGES(KIMAGE_VADDR + TEXT_OFFSET, _end)) +#define INIT_DIR_SIZE (PAGE_SIZE * EARLY_PAGES(KIMAGE_VADDR + TEXT_OFFSET, _end)) #define IDMAP_DIR_SIZE (IDMAP_PGTABLE_LEVELS * PAGE_SIZE) #ifdef CONFIG_ARM64_SW_TTBR0_PAN diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index aa45df752a16..b476bc46f0ab 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -175,6 +175,7 @@ #define VTCR_EL2_FLAGS (VTCR_EL2_COMMON_BITS | VTCR_EL2_TGRAN_FLAGS) #define VTTBR_X (VTTBR_X_TGRAN_MAGIC - VTCR_EL2_T0SZ_IPA) +#define VTTBR_CNP_BIT (UL(1)) #define VTTBR_BADDR_MASK (((UL(1) << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_X) #define VTTBR_VMID_SHIFT (UL(48)) #define VTTBR_VMID_MASK(size) (_AT(u64, (1 << size) - 1) << VTTBR_VMID_SHIFT) diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 6106a85ae0be..21247870def7 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -335,7 +335,7 @@ static inline bool kvm_vcpu_dabt_isextabt(const struct kvm_vcpu *vcpu) static inline int kvm_vcpu_sys_get_rt(struct kvm_vcpu *vcpu) { u32 esr = kvm_vcpu_get_hsr(vcpu); - return (esr & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT; + return ESR_ELx_SYS64_ISS_RT(esr); } static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 3d6d7336f871..2842bf149029 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -387,6 +387,8 @@ struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr); DECLARE_PER_CPU(kvm_cpu_context_t, kvm_host_cpu_state); +void __kvm_enable_ssbs(void); + static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr, unsigned long hyp_stack_ptr, unsigned long vector_ptr) @@ -407,6 +409,15 @@ static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr, */ BUG_ON(!static_branch_likely(&arm64_const_caps_ready)); __kvm_call_hyp((void *)pgd_ptr, hyp_stack_ptr, vector_ptr, tpidr_el2); + + /* + * Disabling SSBD on a non-VHE system requires us to enable SSBS + * at EL2. + */ + if (!has_vhe() && this_cpu_has_cap(ARM64_SSBS) && + arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE) { + kvm_call_hyp(__kvm_enable_ssbs); + } } static inline bool kvm_arch_check_sve_has_vhe(void) diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index d6fff7de5539..64337afbf124 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -517,5 +517,10 @@ static inline int hyp_map_aux_data(void) #define kvm_phys_to_vttbr(addr) phys_to_ttbr(addr) +static inline bool kvm_cpu_has_cnp(void) +{ + return system_supports_cnp(); +} + #endif /* __ASSEMBLY__ */ #endif /* __ARM64_KVM_MMU_H__ */ diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index dd320df0d026..7689c7aa1d77 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -95,5 +95,8 @@ extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, extern void *fixmap_remap_fdt(phys_addr_t dt_phys); extern void mark_linear_text_alias_ro(void); +#define INIT_MM_CONTEXT(name) \ + .pgd = init_pg_dir, + #endif /* !__ASSEMBLY__ */ #endif diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index 39ec0b8a689e..1e58bf58c22b 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -147,12 +147,25 @@ static inline void cpu_replace_ttbr1(pgd_t *pgdp) extern ttbr_replace_func idmap_cpu_replace_ttbr1; ttbr_replace_func *replace_phys; - phys_addr_t pgd_phys = virt_to_phys(pgdp); + /* phys_to_ttbr() zeros lower 2 bits of ttbr with 52-bit PA */ + phys_addr_t ttbr1 = phys_to_ttbr(virt_to_phys(pgdp)); + + if (system_supports_cnp() && !WARN_ON(pgdp != lm_alias(swapper_pg_dir))) { + /* + * cpu_replace_ttbr1() is used when there's a boot CPU + * up (i.e. cpufeature framework is not up yet) and + * latter only when we enable CNP via cpufeature's + * enable() callback. + * Also we rely on the cpu_hwcap bit being set before + * calling the enable() function. + */ + ttbr1 |= TTBR_CNP_BIT; + } replace_phys = (void *)__pa_symbol(idmap_cpu_replace_ttbr1); cpu_install_idmap(); - replace_phys(pgd_phys); + replace_phys(ttbr1); cpu_uninstall_idmap(); } diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h index 60d02c81a3a2..c88a3cb117a1 100644 --- a/arch/arm64/include/asm/page.h +++ b/arch/arm64/include/asm/page.h @@ -37,9 +37,7 @@ extern void clear_page(void *to); typedef struct page *pgtable_t; -#ifdef CONFIG_HAVE_ARCH_PFN_VALID extern int pfn_valid(unsigned long); -#endif #include <asm/memory.h> diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index fd208eac9f2a..1d7d8da2ef9b 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -211,6 +211,8 @@ #define PHYS_MASK_SHIFT (CONFIG_ARM64_PA_BITS) #define PHYS_MASK ((UL(1) << PHYS_MASK_SHIFT) - 1) +#define TTBR_CNP_BIT (UL(1) << 0) + /* * TCR flags. */ diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 1bdeca8918a6..50b1ef8584c0 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -360,6 +360,7 @@ static inline int pmd_protnone(pmd_t pmd) #define pmd_present(pmd) pte_present(pmd_pte(pmd)) #define pmd_dirty(pmd) pte_dirty(pmd_pte(pmd)) #define pmd_young(pmd) pte_young(pmd_pte(pmd)) +#define pmd_valid(pmd) pte_valid(pmd_pte(pmd)) #define pmd_wrprotect(pmd) pte_pmd(pte_wrprotect(pmd_pte(pmd))) #define pmd_mkold(pmd) pte_pmd(pte_mkold(pmd_pte(pmd))) #define pmd_mkwrite(pmd) pte_pmd(pte_mkwrite(pmd_pte(pmd))) @@ -428,10 +429,33 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, PUD_TYPE_TABLE) #endif +extern pgd_t init_pg_dir[PTRS_PER_PGD]; +extern pgd_t init_pg_end[]; +extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; +extern pgd_t idmap_pg_dir[PTRS_PER_PGD]; +extern pgd_t tramp_pg_dir[PTRS_PER_PGD]; + +extern void set_swapper_pgd(pgd_t *pgdp, pgd_t pgd); + +static inline bool in_swapper_pgdir(void *addr) +{ + return ((unsigned long)addr & PAGE_MASK) == + ((unsigned long)swapper_pg_dir & PAGE_MASK); +} + static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) { +#ifdef __PAGETABLE_PMD_FOLDED + if (in_swapper_pgdir(pmdp)) { + set_swapper_pgd((pgd_t *)pmdp, __pgd(pmd_val(pmd))); + return; + } +#endif /* __PAGETABLE_PMD_FOLDED */ + WRITE_ONCE(*pmdp, pmd); - dsb(ishst); + + if (pmd_valid(pmd)) + dsb(ishst); } static inline void pmd_clear(pmd_t *pmdp) @@ -477,11 +501,21 @@ static inline phys_addr_t pmd_page_paddr(pmd_t pmd) #define pud_none(pud) (!pud_val(pud)) #define pud_bad(pud) (!(pud_val(pud) & PUD_TABLE_BIT)) #define pud_present(pud) pte_present(pud_pte(pud)) +#define pud_valid(pud) pte_valid(pud_pte(pud)) static inline void set_pud(pud_t *pudp, pud_t pud) { +#ifdef __PAGETABLE_PUD_FOLDED + if (in_swapper_pgdir(pudp)) { + set_swapper_pgd((pgd_t *)pudp, __pgd(pud_val(pud))); + return; + } +#endif /* __PAGETABLE_PUD_FOLDED */ + WRITE_ONCE(*pudp, pud); - dsb(ishst); + + if (pud_valid(pud)) + dsb(ishst); } static inline void pud_clear(pud_t *pudp) @@ -532,6 +566,11 @@ static inline phys_addr_t pud_page_paddr(pud_t pud) static inline void set_pgd(pgd_t *pgdp, pgd_t pgd) { + if (in_swapper_pgdir(pgdp)) { + set_swapper_pgd(pgdp, pgd); + return; + } + WRITE_ONCE(*pgdp, pgd); dsb(ishst); } @@ -712,11 +751,6 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma, } #endif -extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; -extern pgd_t swapper_pg_end[]; -extern pgd_t idmap_pg_dir[PTRS_PER_PGD]; -extern pgd_t tramp_pg_dir[PTRS_PER_PGD]; - /* * Encode and decode a swap entry: * bits 0-1: present (must be zero) diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 79657ad91397..2bf6691371c2 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -174,6 +174,10 @@ static inline void start_thread(struct pt_regs *regs, unsigned long pc, { start_thread_common(regs, pc); regs->pstate = PSR_MODE_EL0t; + + if (arm64_get_ssbd_state() != ARM64_SSBD_FORCE_ENABLE) + regs->pstate |= PSR_SSBS_BIT; + regs->sp = sp; } @@ -190,6 +194,9 @@ static inline void compat_start_thread(struct pt_regs *regs, unsigned long pc, regs->pstate |= PSR_AA32_E_BIT; #endif + if (arm64_get_ssbd_state() != ARM64_SSBD_FORCE_ENABLE) + regs->pstate |= PSR_AA32_SSBS_BIT; + regs->compat_sp = sp; } #endif @@ -244,10 +251,6 @@ static inline void spin_lock_prefetch(const void *ptr) #endif -void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused); -void cpu_enable_cache_maint_trap(const struct arm64_cpu_capabilities *__unused); -void cpu_clear_disr(const struct arm64_cpu_capabilities *__unused); - extern unsigned long __ro_after_init signal_minsigstksz; /* sigframe size */ extern void __init minsigstksz_setup(void); diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index 177b851ca6d9..6bc43889d11e 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -50,6 +50,7 @@ #define PSR_AA32_I_BIT 0x00000080 #define PSR_AA32_A_BIT 0x00000100 #define PSR_AA32_E_BIT 0x00000200 +#define PSR_AA32_SSBS_BIT 0x00800000 #define PSR_AA32_DIT_BIT 0x01000000 #define PSR_AA32_Q_BIT 0x08000000 #define PSR_AA32_V_BIT 0x10000000 diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index c1470931b897..0c909c4a932f 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -20,7 +20,6 @@ #ifndef __ASM_SYSREG_H #define __ASM_SYSREG_H -#include <asm/compiler.h> #include <linux/stringify.h> /* @@ -84,13 +83,26 @@ #endif /* CONFIG_BROKEN_GAS_INST */ -#define REG_PSTATE_PAN_IMM sys_reg(0, 0, 4, 0, 4) -#define REG_PSTATE_UAO_IMM sys_reg(0, 0, 4, 0, 3) +/* + * Instructions for modifying PSTATE fields. + * As per Arm ARM for v8-A, Section "C.5.1.3 op0 == 0b00, architectural hints, + * barriers and CLREX, and PSTATE access", ARM DDI 0487 C.a, system instructions + * for accessing PSTATE fields have the following encoding: + * Op0 = 0, CRn = 4 + * Op1, Op2 encodes the PSTATE field modified and defines the constraints. + * CRm = Imm4 for the instruction. + * Rt = 0x1f + */ +#define pstate_field(op1, op2) ((op1) << Op1_shift | (op2) << Op2_shift) +#define PSTATE_Imm_shift CRm_shift + +#define PSTATE_PAN pstate_field(0, 4) +#define PSTATE_UAO pstate_field(0, 3) +#define PSTATE_SSBS pstate_field(3, 1) -#define SET_PSTATE_PAN(x) __emit_inst(0xd5000000 | REG_PSTATE_PAN_IMM | \ - (!!x)<<8 | 0x1f) -#define SET_PSTATE_UAO(x) __emit_inst(0xd5000000 | REG_PSTATE_UAO_IMM | \ - (!!x)<<8 | 0x1f) +#define SET_PSTATE_PAN(x) __emit_inst(0xd500401f | PSTATE_PAN | ((!!x) << PSTATE_Imm_shift)) +#define SET_PSTATE_UAO(x) __emit_inst(0xd500401f | PSTATE_UAO | ((!!x) << PSTATE_Imm_shift)) +#define SET_PSTATE_SSBS(x) __emit_inst(0xd500401f | PSTATE_SSBS | ((!!x) << PSTATE_Imm_shift)) #define SYS_DC_ISW sys_insn(1, 0, 7, 6, 2) #define SYS_DC_CSW sys_insn(1, 0, 7, 10, 2) @@ -419,6 +431,7 @@ #define SYS_ICH_LR15_EL2 __SYS__LR8_EL2(7) /* Common SCTLR_ELx flags. */ +#define SCTLR_ELx_DSSBS (1UL << 44) #define SCTLR_ELx_EE (1 << 25) #define SCTLR_ELx_IESB (1 << 21) #define SCTLR_ELx_WXN (1 << 19) @@ -439,7 +452,7 @@ (1 << 10) | (1 << 13) | (1 << 14) | (1 << 15) | \ (1 << 17) | (1 << 20) | (1 << 24) | (1 << 26) | \ (1 << 27) | (1 << 30) | (1 << 31) | \ - (0xffffffffUL << 32)) + (0xffffefffUL << 32)) #ifdef CONFIG_CPU_BIG_ENDIAN #define ENDIAN_SET_EL2 SCTLR_ELx_EE @@ -453,7 +466,7 @@ #define SCTLR_EL2_SET (SCTLR_ELx_IESB | ENDIAN_SET_EL2 | SCTLR_EL2_RES1) #define SCTLR_EL2_CLEAR (SCTLR_ELx_M | SCTLR_ELx_A | SCTLR_ELx_C | \ SCTLR_ELx_SA | SCTLR_ELx_I | SCTLR_ELx_WXN | \ - ENDIAN_CLEAR_EL2 | SCTLR_EL2_RES0) + SCTLR_ELx_DSSBS | ENDIAN_CLEAR_EL2 | SCTLR_EL2_RES0) #if (SCTLR_EL2_SET ^ SCTLR_EL2_CLEAR) != 0xffffffffffffffff #error "Inconsistent SCTLR_EL2 set/clear bits" @@ -477,7 +490,7 @@ (1 << 29)) #define SCTLR_EL1_RES0 ((1 << 6) | (1 << 10) | (1 << 13) | (1 << 17) | \ (1 << 27) | (1 << 30) | (1 << 31) | \ - (0xffffffffUL << 32)) + (0xffffefffUL << 32)) #ifdef CONFIG_CPU_BIG_ENDIAN #define ENDIAN_SET_EL1 (SCTLR_EL1_E0E | SCTLR_ELx_EE) @@ -489,12 +502,12 @@ #define SCTLR_EL1_SET (SCTLR_ELx_M | SCTLR_ELx_C | SCTLR_ELx_SA |\ SCTLR_EL1_SA0 | SCTLR_EL1_SED | SCTLR_ELx_I |\ - SCTLR_EL1_DZE | SCTLR_EL1_UCT | SCTLR_EL1_NTWI |\ + SCTLR_EL1_DZE | SCTLR_EL1_UCT |\ SCTLR_EL1_NTWE | SCTLR_ELx_IESB | SCTLR_EL1_SPAN |\ ENDIAN_SET_EL1 | SCTLR_EL1_UCI | SCTLR_EL1_RES1) #define SCTLR_EL1_CLEAR (SCTLR_ELx_A | SCTLR_EL1_CP15BEN | SCTLR_EL1_ITD |\ SCTLR_EL1_UMA | SCTLR_ELx_WXN | ENDIAN_CLEAR_EL1 |\ - SCTLR_EL1_RES0) + SCTLR_ELx_DSSBS | SCTLR_EL1_NTWI | SCTLR_EL1_RES0) #if (SCTLR_EL1_SET ^ SCTLR_EL1_CLEAR) != 0xffffffffffffffff #error "Inconsistent SCTLR_EL1 set/clear bits" @@ -544,6 +557,13 @@ #define ID_AA64PFR0_EL0_64BIT_ONLY 0x1 #define ID_AA64PFR0_EL0_32BIT_64BIT 0x2 +/* id_aa64pfr1 */ +#define ID_AA64PFR1_SSBS_SHIFT 4 + +#define ID_AA64PFR1_SSBS_PSTATE_NI 0 +#define ID_AA64PFR1_SSBS_PSTATE_ONLY 1 +#define ID_AA64PFR1_SSBS_PSTATE_INSNS 2 + /* id_aa64mmfr0 */ #define ID_AA64MMFR0_TGRAN4_SHIFT 28 #define ID_AA64MMFR0_TGRAN64_SHIFT 24 diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h index a3233167be60..106fdc951b6e 100644 --- a/arch/arm64/include/asm/tlb.h +++ b/arch/arm64/include/asm/tlb.h @@ -22,16 +22,10 @@ #include <linux/pagemap.h> #include <linux/swap.h> -#ifdef CONFIG_HAVE_RCU_TABLE_FREE - -#define tlb_remove_entry(tlb, entry) tlb_remove_table(tlb, entry) static inline void __tlb_remove_table(void *_table) { free_page_and_swap_cache((struct page *)_table); } -#else -#define tlb_remove_entry(tlb, entry) tlb_remove_page(tlb, entry) -#endif /* CONFIG_HAVE_RCU_TABLE_FREE */ static void tlb_flush(struct mmu_gather *tlb); @@ -40,36 +34,35 @@ static void tlb_flush(struct mmu_gather *tlb); static inline void tlb_flush(struct mmu_gather *tlb) { struct vm_area_struct vma = TLB_FLUSH_VMA(tlb->mm, 0); + bool last_level = !tlb->freed_tables; + unsigned long stride = tlb_get_unmap_size(tlb); /* - * The ASID allocator will either invalidate the ASID or mark - * it as used. + * If we're tearing down the address space then we only care about + * invalidating the walk-cache, since the ASID allocator won't + * reallocate our ASID without invalidating the entire TLB. */ - if (tlb->fullmm) + if (tlb->fullmm) { + if (!last_level) + flush_tlb_mm(tlb->mm); return; + } - /* - * The intermediate page table levels are already handled by - * the __(pte|pmd|pud)_free_tlb() functions, so last level - * TLBI is sufficient here. - */ - __flush_tlb_range(&vma, tlb->start, tlb->end, true); + __flush_tlb_range(&vma, tlb->start, tlb->end, stride, last_level); } static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, unsigned long addr) { - __flush_tlb_pgtable(tlb->mm, addr); pgtable_page_dtor(pte); - tlb_remove_entry(tlb, pte); + tlb_remove_table(tlb, pte); } #if CONFIG_PGTABLE_LEVELS > 2 static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, unsigned long addr) { - __flush_tlb_pgtable(tlb->mm, addr); - tlb_remove_entry(tlb, virt_to_page(pmdp)); + tlb_remove_table(tlb, virt_to_page(pmdp)); } #endif @@ -77,8 +70,7 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pudp, unsigned long addr) { - __flush_tlb_pgtable(tlb->mm, addr); - tlb_remove_entry(tlb, virt_to_page(pudp)); + tlb_remove_table(tlb, virt_to_page(pudp)); } #endif diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index a4a1901140ee..c3c0387aee18 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -70,43 +70,73 @@ }) /* - * TLB Management - * ============== + * TLB Invalidation + * ================ * - * The TLB specific code is expected to perform whatever tests it needs - * to determine if it should invalidate the TLB for each call. Start - * addresses are inclusive and end addresses are exclusive; it is safe to - * round these addresses down. + * This header file implements the low-level TLB invalidation routines + * (sometimes referred to as "flushing" in the kernel) for arm64. * - * flush_tlb_all() + * Every invalidation operation uses the following template: + * + * DSB ISHST // Ensure prior page-table updates have completed + * TLBI ... // Invalidate the TLB + * DSB ISH // Ensure the TLB invalidation has completed + * if (invalidated kernel mappings) + * ISB // Discard any instructions fetched from the old mapping + * + * + * The following functions form part of the "core" TLB invalidation API, + * as documented in Documentation/core-api/cachetlb.rst: * - * Invalidate the entire TLB. + * flush_tlb_all() + * Invalidate the entire TLB (kernel + user) on all CPUs * * flush_tlb_mm(mm) + * Invalidate an entire user address space on all CPUs. + * The 'mm' argument identifies the ASID to invalidate. + * + * flush_tlb_range(vma, start, end) + * Invalidate the virtual-address range '[start, end)' on all + * CPUs for the user address space corresponding to 'vma->mm'. + * Note that this operation also invalidates any walk-cache + * entries associated with translations for the specified address + * range. + * + * flush_tlb_kernel_range(start, end) + * Same as flush_tlb_range(..., start, end), but applies to + * kernel mappings rather than a particular user address space. + * Whilst not explicitly documented, this function is used when + * unmapping pages from vmalloc/io space. + * + * flush_tlb_page(vma, addr) + * Invalidate a single user mapping for address 'addr' in the + * address space corresponding to 'vma->mm'. Note that this + * operation only invalidates a single, last-level page-table + * entry and therefore does not affect any walk-caches. * - * Invalidate all TLB entries in a particular address space. - * - mm - mm_struct describing address space * - * flush_tlb_range(mm,start,end) + * Next, we have some undocumented invalidation routines that you probably + * don't want to call unless you know what you're doing: * - * Invalidate a range of TLB entries in the specified address - * space. - * - mm - mm_struct describing address space - * - start - start address (may not be aligned) - * - end - end address (exclusive, may not be aligned) + * local_flush_tlb_all() + * Same as flush_tlb_all(), but only applies to the calling CPU. * - * flush_tlb_page(vaddr,vma) + * __flush_tlb_kernel_pgtable(addr) + * Invalidate a single kernel mapping for address 'addr' on all + * CPUs, ensuring that any walk-cache entries associated with the + * translation are also invalidated. * - * Invalidate the specified page in the specified address range. - * - vaddr - virtual address (may not be aligned) - * - vma - vma_struct describing address range + * __flush_tlb_range(vma, start, end, stride, last_level) + * Invalidate the virtual-address range '[start, end)' on all + * CPUs for the user address space corresponding to 'vma->mm'. + * The invalidation operations are issued at a granularity + * determined by 'stride' and only affect any walk-cache entries + * if 'last_level' is equal to false. * - * flush_kern_tlb_page(kaddr) * - * Invalidate the TLB entry for the specified page. The address - * will be in the kernels virtual memory space. Current uses - * only require the D-TLB to be invalidated. - * - kaddr - Kernel virtual memory address + * Finally, take a look at asm/tlb.h to see how tlb_flush() is implemented + * on top of these routines, since that is our interface to the mmu_gather + * API as used by munmap() and friends. */ static inline void local_flush_tlb_all(void) { @@ -149,25 +179,28 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, * This is meant to avoid soft lock-ups on large TLB flushing ranges and not * necessarily a performance improvement. */ -#define MAX_TLB_RANGE (1024UL << PAGE_SHIFT) +#define MAX_TLBI_OPS 1024UL static inline void __flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end, - bool last_level) + unsigned long stride, bool last_level) { unsigned long asid = ASID(vma->vm_mm); unsigned long addr; - if ((end - start) > MAX_TLB_RANGE) { + if ((end - start) > (MAX_TLBI_OPS * stride)) { flush_tlb_mm(vma->vm_mm); return; } + /* Convert the stride into units of 4k */ + stride >>= 12; + start = __TLBI_VADDR(start, asid); end = __TLBI_VADDR(end, asid); dsb(ishst); - for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12)) { + for (addr = start; addr < end; addr += stride) { if (last_level) { __tlbi(vale1is, addr); __tlbi_user(vale1is, addr); @@ -182,14 +215,18 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma, static inline void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { - __flush_tlb_range(vma, start, end, false); + /* + * We cannot use leaf-only invalidation here, since we may be invalidating + * table entries as part of collapsing hugepages or moving page tables. + */ + __flush_tlb_range(vma, start, end, PAGE_SIZE, false); } static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end) { unsigned long addr; - if ((end - start) > MAX_TLB_RANGE) { + if ((end - start) > (MAX_TLBI_OPS * PAGE_SIZE)) { flush_tlb_all(); return; } @@ -199,7 +236,7 @@ static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end dsb(ishst); for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12)) - __tlbi(vaae1is, addr); + __tlbi(vaale1is, addr); dsb(ish); isb(); } @@ -208,20 +245,11 @@ static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end * Used to invalidate the TLB (walk caches) corresponding to intermediate page * table levels (pgd/pud/pmd). */ -static inline void __flush_tlb_pgtable(struct mm_struct *mm, - unsigned long uaddr) -{ - unsigned long addr = __TLBI_VADDR(uaddr, ASID(mm)); - - __tlbi(vae1is, addr); - __tlbi_user(vae1is, addr); - dsb(ish); -} - static inline void __flush_tlb_kernel_pgtable(unsigned long kaddr) { unsigned long addr = __TLBI_VADDR(kaddr, 0); + dsb(ishst); __tlbi(vaae1is, addr); dsb(ish); } diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index e66b0fca99c2..07c34087bd5e 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -32,7 +32,6 @@ #include <asm/cpufeature.h> #include <asm/ptrace.h> #include <asm/memory.h> -#include <asm/compiler.h> #include <asm/extable.h> #define get_ds() (KERNEL_DS) diff --git a/arch/arm64/include/asm/xen/events.h b/arch/arm64/include/asm/xen/events.h index 4e22b7a8c038..2788e95d0ff0 100644 --- a/arch/arm64/include/asm/xen/events.h +++ b/arch/arm64/include/asm/xen/events.h @@ -14,7 +14,7 @@ enum ipi_vector { static inline int xen_irqs_disabled(struct pt_regs *regs) { - return raw_irqs_disabled_flags((unsigned long) regs->pstate); + return !interrupts_enabled(regs); } #define xchg_xen_ulong(ptr, val) xchg((ptr), (val)) diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h index 17c65c8f33cb..2bcd6e4f3474 100644 --- a/arch/arm64/include/uapi/asm/hwcap.h +++ b/arch/arm64/include/uapi/asm/hwcap.h @@ -48,5 +48,6 @@ #define HWCAP_USCAT (1 << 25) #define HWCAP_ILRCPC (1 << 26) #define HWCAP_FLAGM (1 << 27) +#define HWCAP_SSBS (1 << 28) #endif /* _UAPI__ASM_HWCAP_H */ diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h index 98c4ce55d9c3..a36227fdb084 100644 --- a/arch/arm64/include/uapi/asm/ptrace.h +++ b/arch/arm64/include/uapi/asm/ptrace.h @@ -46,6 +46,7 @@ #define PSR_I_BIT 0x00000080 #define PSR_A_BIT 0x00000100 #define PSR_D_BIT 0x00000200 +#define PSR_SSBS_BIT 0x00001000 #define PSR_PAN_BIT 0x00400000 #define PSR_UAO_BIT 0x00800000 #define PSR_V_BIT 0x10000000 diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index dec10898d688..a509e35132d2 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -68,21 +68,43 @@ static bool has_mismatched_cache_type(const struct arm64_cpu_capabilities *entry, int scope) { - u64 mask = CTR_CACHE_MINLINE_MASK; - - /* Skip matching the min line sizes for cache type check */ - if (entry->capability == ARM64_MISMATCHED_CACHE_TYPE) - mask ^= arm64_ftr_reg_ctrel0.strict_mask; + u64 mask = arm64_ftr_reg_ctrel0.strict_mask; + u64 sys = arm64_ftr_reg_ctrel0.sys_val & mask; + u64 ctr_raw, ctr_real; WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); - return (read_cpuid_cachetype() & mask) != - (arm64_ftr_reg_ctrel0.sys_val & mask); + + /* + * We want to make sure that all the CPUs in the system expose + * a consistent CTR_EL0 to make sure that applications behaves + * correctly with migration. + * + * If a CPU has CTR_EL0.IDC but does not advertise it via CTR_EL0 : + * + * 1) It is safe if the system doesn't support IDC, as CPU anyway + * reports IDC = 0, consistent with the rest. + * + * 2) If the system has IDC, it is still safe as we trap CTR_EL0 + * access on this CPU via the ARM64_HAS_CACHE_IDC capability. + * + * So, we need to make sure either the raw CTR_EL0 or the effective + * CTR_EL0 matches the system's copy to allow a secondary CPU to boot. + */ + ctr_raw = read_cpuid_cachetype() & mask; + ctr_real = read_cpuid_effective_cachetype() & mask; + + return (ctr_real != sys) && (ctr_raw != sys); } static void cpu_enable_trap_ctr_access(const struct arm64_cpu_capabilities *__unused) { - sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCT, 0); + u64 mask = arm64_ftr_reg_ctrel0.strict_mask; + + /* Trap CTR_EL0 access on this CPU, only if it has a mismatch */ + if ((read_cpuid_cachetype() & mask) != + (arm64_ftr_reg_ctrel0.sys_val & mask)) + sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCT, 0); } atomic_t arm64_el2_vector_last_slot = ATOMIC_INIT(-1); @@ -116,6 +138,15 @@ static void __install_bp_hardening_cb(bp_hardening_cb_t fn, static DEFINE_SPINLOCK(bp_lock); int cpu, slot = -1; + /* + * enable_smccc_arch_workaround_1() passes NULL for the hyp_vecs + * start/end if we're a guest. Skip the hyp-vectors work. + */ + if (!hyp_vecs_start) { + __this_cpu_write(bp_hardening_data.fn, fn); + return; + } + spin_lock(&bp_lock); for_each_possible_cpu(cpu) { if (per_cpu(bp_hardening_data.fn, cpu) == fn) { @@ -312,6 +343,14 @@ void __init arm64_enable_wa2_handling(struct alt_instr *alt, void arm64_set_ssbd_mitigation(bool state) { + if (this_cpu_has_cap(ARM64_SSBS)) { + if (state) + asm volatile(SET_PSTATE_SSBS(0)); + else + asm volatile(SET_PSTATE_SSBS(1)); + return; + } + switch (psci_ops.conduit) { case PSCI_CONDUIT_HVC: arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_2, state, NULL); @@ -336,6 +375,11 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry, WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); + if (this_cpu_has_cap(ARM64_SSBS)) { + required = false; + goto out_printmsg; + } + if (psci_ops.smccc_version == SMCCC_VERSION_1_0) { ssbd_state = ARM64_SSBD_UNKNOWN; return false; @@ -384,7 +428,6 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry, switch (ssbd_state) { case ARM64_SSBD_FORCE_DISABLE: - pr_info_once("%s disabled from command-line\n", entry->desc); arm64_set_ssbd_mitigation(false); required = false; break; @@ -397,7 +440,6 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry, break; case ARM64_SSBD_FORCE_ENABLE: - pr_info_once("%s forced from command-line\n", entry->desc); arm64_set_ssbd_mitigation(true); required = true; break; @@ -407,10 +449,27 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry, break; } +out_printmsg: + switch (ssbd_state) { + case ARM64_SSBD_FORCE_DISABLE: + pr_info_once("%s disabled from command-line\n", entry->desc); + break; + + case ARM64_SSBD_FORCE_ENABLE: + pr_info_once("%s forced from command-line\n", entry->desc); + break; + } + return required; } #endif /* CONFIG_ARM64_SSBD */ +static void __maybe_unused +cpu_enable_cache_maint_trap(const struct arm64_cpu_capabilities *__unused) +{ + sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCI, 0); +} + #define CAP_MIDR_RANGE(model, v_min, r_min, v_max, r_max) \ .matches = is_affected_midr_range, \ .midr_range = MIDR_RANGE(model, v_min, r_min, v_max, r_max) @@ -616,14 +675,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = { }, #endif { - .desc = "Mismatched cache line size", - .capability = ARM64_MISMATCHED_CACHE_LINE_SIZE, - .matches = has_mismatched_cache_type, - .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, - .cpu_enable = cpu_enable_trap_ctr_access, - }, - { - .desc = "Mismatched cache type", + .desc = "Mismatched cache type (CTR_EL0)", .capability = ARM64_MISMATCHED_CACHE_TYPE, .matches = has_mismatched_cache_type, .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, @@ -680,6 +732,14 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .matches = has_ssbd_mitigation, }, #endif +#ifdef CONFIG_ARM64_ERRATUM_1188873 + { + /* Cortex-A76 r0p0 to r2p0 */ + .desc = "ARM erratum 1188873", + .capability = ARM64_WORKAROUND_1188873, + ERRATA_MIDR_RANGE(MIDR_CORTEX_A76, 0, 0, 2, 0), + }, +#endif { } }; diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index e238b7932096..af50064dea51 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -20,6 +20,7 @@ #include <linux/bsearch.h> #include <linux/cpumask.h> +#include <linux/crash_dump.h> #include <linux/sort.h> #include <linux/stop_machine.h> #include <linux/types.h> @@ -117,6 +118,7 @@ EXPORT_SYMBOL(cpu_hwcap_keys); static bool __maybe_unused cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry, int __unused); +static void cpu_enable_cnp(struct arm64_cpu_capabilities const *cap); /* * NOTE: Any changes to the visibility of features should be kept in @@ -164,6 +166,11 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = { ARM64_FTR_END, }; +static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = { + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_SSBS_SHIFT, 4, ID_AA64PFR1_SSBS_PSTATE_NI), + ARM64_FTR_END, +}; + static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = { S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_TGRAN4_SHIFT, 4, ID_AA64MMFR0_TGRAN4_NI), S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_TGRAN64_SHIFT, 4, ID_AA64MMFR0_TGRAN64_NI), @@ -371,7 +378,7 @@ static const struct __ftr_reg_entry { /* Op1 = 0, CRn = 0, CRm = 4 */ ARM64_FTR_REG(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0), - ARM64_FTR_REG(SYS_ID_AA64PFR1_EL1, ftr_raz), + ARM64_FTR_REG(SYS_ID_AA64PFR1_EL1, ftr_id_aa64pfr1), ARM64_FTR_REG(SYS_ID_AA64ZFR0_EL1, ftr_raz), /* Op1 = 0, CRn = 0, CRm = 5 */ @@ -657,7 +664,6 @@ void update_cpu_features(int cpu, /* * EL3 is not our concern. - * ID_AA64PFR1 is currently RES0. */ taint |= check_update_ftr_reg(SYS_ID_AA64PFR0_EL1, cpu, info->reg_id_aa64pfr0, boot->reg_id_aa64pfr0); @@ -848,15 +854,55 @@ static bool has_no_fpsimd(const struct arm64_cpu_capabilities *entry, int __unus } static bool has_cache_idc(const struct arm64_cpu_capabilities *entry, - int __unused) + int scope) { - return read_sanitised_ftr_reg(SYS_CTR_EL0) & BIT(CTR_IDC_SHIFT); + u64 ctr; + + if (scope == SCOPE_SYSTEM) + ctr = arm64_ftr_reg_ctrel0.sys_val; + else + ctr = read_cpuid_effective_cachetype(); + + return ctr & BIT(CTR_IDC_SHIFT); +} + +static void cpu_emulate_effective_ctr(const struct arm64_cpu_capabilities *__unused) +{ + /* + * If the CPU exposes raw CTR_EL0.IDC = 0, while effectively + * CTR_EL0.IDC = 1 (from CLIDR values), we need to trap accesses + * to the CTR_EL0 on this CPU and emulate it with the real/safe + * value. + */ + if (!(read_cpuid_cachetype() & BIT(CTR_IDC_SHIFT))) + sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCT, 0); } static bool has_cache_dic(const struct arm64_cpu_capabilities *entry, - int __unused) + int scope) { - return read_sanitised_ftr_reg(SYS_CTR_EL0) & BIT(CTR_DIC_SHIFT); + u64 ctr; + + if (scope == SCOPE_SYSTEM) + ctr = arm64_ftr_reg_ctrel0.sys_val; + else + ctr = read_cpuid_cachetype(); + + return ctr & BIT(CTR_DIC_SHIFT); +} + +static bool __maybe_unused +has_useable_cnp(const struct arm64_cpu_capabilities *entry, int scope) +{ + /* + * Kdump isn't guaranteed to power-off all secondary CPUs, CNP + * may share TLB entries with a CPU stuck in the crashed + * kernel. + */ + if (is_kdump_kernel()) + return false; + + return has_cpuid_feature(entry, scope); } #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 @@ -1035,6 +1081,70 @@ static void cpu_has_fwb(const struct arm64_cpu_capabilities *__unused) WARN_ON(val & (7 << 27 | 7 << 21)); } +#ifdef CONFIG_ARM64_SSBD +static int ssbs_emulation_handler(struct pt_regs *regs, u32 instr) +{ + if (user_mode(regs)) + return 1; + + if (instr & BIT(PSTATE_Imm_shift)) + regs->pstate |= PSR_SSBS_BIT; + else + regs->pstate &= ~PSR_SSBS_BIT; + + arm64_skip_faulting_instruction(regs, 4); + return 0; +} + +static struct undef_hook ssbs_emulation_hook = { + .instr_mask = ~(1U << PSTATE_Imm_shift), + .instr_val = 0xd500401f | PSTATE_SSBS, + .fn = ssbs_emulation_handler, +}; + +static void cpu_enable_ssbs(const struct arm64_cpu_capabilities *__unused) +{ + static bool undef_hook_registered = false; + static DEFINE_SPINLOCK(hook_lock); + + spin_lock(&hook_lock); + if (!undef_hook_registered) { + register_undef_hook(&ssbs_emulation_hook); + undef_hook_registered = true; + } + spin_unlock(&hook_lock); + + if (arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE) { + sysreg_clear_set(sctlr_el1, 0, SCTLR_ELx_DSSBS); + arm64_set_ssbd_mitigation(false); + } else { + arm64_set_ssbd_mitigation(true); + } +} +#endif /* CONFIG_ARM64_SSBD */ + +#ifdef CONFIG_ARM64_PAN +static void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused) +{ + /* + * We modify PSTATE. This won't work from irq context as the PSTATE + * is discarded once we return from the exception. + */ + WARN_ON_ONCE(in_interrupt()); + + sysreg_clear_set(sctlr_el1, SCTLR_EL1_SPAN, 0); + asm(SET_PSTATE_PAN(1)); +} +#endif /* CONFIG_ARM64_PAN */ + +#ifdef CONFIG_ARM64_RAS_EXTN +static void cpu_clear_disr(const struct arm64_cpu_capabilities *__unused) +{ + /* Firmware may have left a deferred SError in this register. */ + write_sysreg_s(0, SYS_DISR_EL1); +} +#endif /* CONFIG_ARM64_RAS_EXTN */ + static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "GIC system register CPU interface", @@ -1184,6 +1294,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .capability = ARM64_HAS_CACHE_IDC, .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cache_idc, + .cpu_enable = cpu_emulate_effective_ctr, }, { .desc = "Instruction cache invalidation not required for I/D coherence", @@ -1222,6 +1333,41 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .cpu_enable = cpu_enable_hw_dbm, }, #endif +#ifdef CONFIG_ARM64_SSBD + { + .desc = "CRC32 instructions", + .capability = ARM64_HAS_CRC32, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .matches = has_cpuid_feature, + .sys_reg = SYS_ID_AA64ISAR0_EL1, + .field_pos = ID_AA64ISAR0_CRC32_SHIFT, + .min_field_value = 1, + }, + { + .desc = "Speculative Store Bypassing Safe (SSBS)", + .capability = ARM64_SSBS, + .type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE, + .matches = has_cpuid_feature, + .sys_reg = SYS_ID_AA64PFR1_EL1, + .field_pos = ID_AA64PFR1_SSBS_SHIFT, + .sign = FTR_UNSIGNED, + .min_field_value = ID_AA64PFR1_SSBS_PSTATE_ONLY, + .cpu_enable = cpu_enable_ssbs, + }, +#endif +#ifdef CONFIG_ARM64_CNP + { + .desc = "Common not Private translations", + .capability = ARM64_HAS_CNP, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .matches = has_useable_cnp, + .sys_reg = SYS_ID_AA64MMFR2_EL1, + .sign = FTR_UNSIGNED, + .field_pos = ID_AA64MMFR2_CNP_SHIFT, + .min_field_value = 1, + .cpu_enable = cpu_enable_cnp, + }, +#endif {}, }; @@ -1267,6 +1413,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { #ifdef CONFIG_ARM64_SVE HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_SVE_SHIFT, FTR_UNSIGNED, ID_AA64PFR0_SVE, CAP_HWCAP, HWCAP_SVE), #endif + HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_SSBS_SHIFT, FTR_UNSIGNED, ID_AA64PFR1_SSBS_PSTATE_INSNS, CAP_HWCAP, HWCAP_SSBS), {}, }; @@ -1658,6 +1805,11 @@ cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry, int __unused) return (cpus_have_const_cap(ARM64_HAS_PAN) && !cpus_have_const_cap(ARM64_HAS_UAO)); } +static void __maybe_unused cpu_enable_cnp(struct arm64_cpu_capabilities const *cap) +{ + cpu_replace_ttbr1(lm_alias(swapper_pg_dir)); +} + /* * We emulate only the following system register space. * Op0 = 0x3, CRn = 0x0, Op1 = 0x0, CRm = [0, 4 - 7] @@ -1719,27 +1871,32 @@ static int emulate_sys_reg(u32 id, u64 *valp) return 0; } -static int emulate_mrs(struct pt_regs *regs, u32 insn) +int do_emulate_mrs(struct pt_regs *regs, u32 sys_reg, u32 rt) { int rc; - u32 sys_reg, dst; u64 val; - /* - * sys_reg values are defined as used in mrs/msr instruction. - * shift the imm value to get the encoding. - */ - sys_reg = (u32)aarch64_insn_decode_immediate(AARCH64_INSN_IMM_16, insn) << 5; rc = emulate_sys_reg(sys_reg, &val); if (!rc) { - dst = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RT, insn); - pt_regs_write_reg(regs, dst, val); + pt_regs_write_reg(regs, rt, val); arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE); } - return rc; } +static int emulate_mrs(struct pt_regs *regs, u32 insn) +{ + u32 sys_reg, rt; + + /* + * sys_reg values are defined as used in mrs/msr instruction. + * shift the imm value to get the encoding. + */ + sys_reg = (u32)aarch64_insn_decode_immediate(AARCH64_INSN_IMM_16, insn) << 5; + rt = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RT, insn); + return do_emulate_mrs(regs, sys_reg, rt); +} + static struct undef_hook mrs_hook = { .instr_mask = 0xfff00000, .instr_val = 0xd5300000, @@ -1755,9 +1912,3 @@ static int __init enable_mrs_emulation(void) } core_initcall(enable_mrs_emulation); - -void cpu_clear_disr(const struct arm64_cpu_capabilities *__unused) -{ - /* Firmware may have left a deferred SError in this register. */ - write_sysreg_s(0, SYS_DISR_EL1); -} diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index e9ab7b3ed317..bcc2831399cb 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -81,6 +81,7 @@ static const char *const hwcap_str[] = { "uscat", "ilrcpc", "flagm", + "ssbs", NULL }; @@ -324,7 +325,15 @@ static void cpuinfo_detect_icache_policy(struct cpuinfo_arm64 *info) static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) { info->reg_cntfrq = arch_timer_get_cntfrq(); - info->reg_ctr = read_cpuid_cachetype(); + /* + * Use the effective value of the CTR_EL0 than the raw value + * exposed by the CPU. CTR_E0.IDC field value must be interpreted + * with the CLIDR_EL1 fields to avoid triggering false warnings + * when there is a mismatch across the CPUs. Keep track of the + * effective value of the CTR_EL0 in our internal records for + * acurate sanity check and feature enablement. + */ + info->reg_ctr = read_cpuid_effective_cachetype(); info->reg_dczid = read_cpuid(DCZID_EL0); info->reg_midr = read_cpuid_id(); info->reg_revidr = read_cpuid(REVIDR_EL1); diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 09dbea221a27..039144ecbcb2 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -589,7 +589,7 @@ el1_undef: inherit_daif pstate=x23, tmp=x2 mov x0, sp bl do_undefinstr - ASM_BUG() + kernel_exit 1 el1_dbg: /* * Debug exception handling @@ -665,6 +665,7 @@ el0_sync: cmp x24, #ESR_ELx_EC_FP_EXC64 // FP/ASIMD exception b.eq el0_fpsimd_exc cmp x24, #ESR_ELx_EC_SYS64 // configurable trap + ccmp x24, #ESR_ELx_EC_WFx, #4, ne b.eq el0_sys cmp x24, #ESR_ELx_EC_SP_ALIGN // stack alignment exception b.eq el0_sp_pc @@ -697,9 +698,9 @@ el0_sync_compat: cmp x24, #ESR_ELx_EC_UNKNOWN // unknown exception in EL0 b.eq el0_undef cmp x24, #ESR_ELx_EC_CP15_32 // CP15 MRC/MCR trap - b.eq el0_undef + b.eq el0_cp15 cmp x24, #ESR_ELx_EC_CP15_64 // CP15 MRRC/MCRR trap - b.eq el0_undef + b.eq el0_cp15 cmp x24, #ESR_ELx_EC_CP14_MR // CP14 MRC/MCR trap b.eq el0_undef cmp x24, #ESR_ELx_EC_CP14_LS // CP14 LDC/STC trap @@ -722,6 +723,17 @@ el0_irq_compat: el0_error_compat: kernel_entry 0, 32 b el0_error_naked + +el0_cp15: + /* + * Trapped CP15 (MRC, MCR, MRRC, MCRR) instructions + */ + enable_daif + ct_user_exit + mov x0, x25 + mov x1, sp + bl do_cp15instr + b ret_to_user #endif el0_da: diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index b0853069702f..4471f570a295 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -287,19 +287,21 @@ __create_page_tables: mov x28, lr /* - * Invalidate the idmap and swapper page tables to avoid potential - * dirty cache lines being evicted. + * Invalidate the init page tables to avoid potential dirty cache lines + * being evicted. Other page tables are allocated in rodata as part of + * the kernel image, and thus are clean to the PoC per the boot + * protocol. */ - adrp x0, idmap_pg_dir - adrp x1, swapper_pg_end + adrp x0, init_pg_dir + adrp x1, init_pg_end sub x1, x1, x0 bl __inval_dcache_area /* - * Clear the idmap and swapper page tables. + * Clear the init page tables. */ - adrp x0, idmap_pg_dir - adrp x1, swapper_pg_end + adrp x0, init_pg_dir + adrp x1, init_pg_end sub x1, x1, x0 1: stp xzr, xzr, [x0], #16 stp xzr, xzr, [x0], #16 @@ -373,7 +375,7 @@ __create_page_tables: /* * Map the kernel image (starting with PHYS_OFFSET). */ - adrp x0, swapper_pg_dir + adrp x0, init_pg_dir mov_q x5, KIMAGE_VADDR + TEXT_OFFSET // compile time __va(_text) add x5, x5, x23 // add KASLR displacement mov x4, PTRS_PER_PGD @@ -390,7 +392,7 @@ __create_page_tables: * tables again to remove any speculatively loaded cache lines. */ adrp x0, idmap_pg_dir - adrp x1, swapper_pg_end + adrp x1, init_pg_end sub x1, x1, x0 dmb sy bl __inval_dcache_area @@ -706,6 +708,7 @@ secondary_startup: * Common entry point for secondary CPUs. */ bl __cpu_setup // initialise processor + adrp x1, swapper_pg_dir bl __enable_mmu ldr x8, =__secondary_switched br x8 @@ -748,6 +751,7 @@ ENDPROC(__secondary_switched) * Enable the MMU. * * x0 = SCTLR_EL1 value for turning on the MMU. + * x1 = TTBR1_EL1 value * * Returns to the caller via x30/lr. This requires the caller to be covered * by the .idmap.text section. @@ -756,17 +760,16 @@ ENDPROC(__secondary_switched) * If it isn't, park the CPU */ ENTRY(__enable_mmu) - mrs x1, ID_AA64MMFR0_EL1 - ubfx x2, x1, #ID_AA64MMFR0_TGRAN_SHIFT, 4 + mrs x2, ID_AA64MMFR0_EL1 + ubfx x2, x2, #ID_AA64MMFR0_TGRAN_SHIFT, 4 cmp x2, #ID_AA64MMFR0_TGRAN_SUPPORTED b.ne __no_granule_support - update_early_cpu_boot_status 0, x1, x2 - adrp x1, idmap_pg_dir - adrp x2, swapper_pg_dir - phys_to_ttbr x3, x1 - phys_to_ttbr x4, x2 - msr ttbr0_el1, x3 // load TTBR0 - msr ttbr1_el1, x4 // load TTBR1 + update_early_cpu_boot_status 0, x2, x3 + adrp x2, idmap_pg_dir + phys_to_ttbr x1, x1 + phys_to_ttbr x2, x2 + msr ttbr0_el1, x2 // load TTBR0 + msr ttbr1_el1, x1 // load TTBR1 isb msr sctlr_el1, x0 isb @@ -823,6 +826,7 @@ __primary_switch: mrs x20, sctlr_el1 // preserve old SCTLR_EL1 value #endif + adrp x1, init_pg_dir bl __enable_mmu #ifdef CONFIG_RELOCATABLE bl __relocate_kernel diff --git a/arch/arm64/kernel/jump_label.c b/arch/arm64/kernel/jump_label.c index e0756416e567..646b9562ee64 100644 --- a/arch/arm64/kernel/jump_label.c +++ b/arch/arm64/kernel/jump_label.c @@ -25,12 +25,12 @@ void arch_jump_label_transform(struct jump_entry *entry, enum jump_label_type type) { - void *addr = (void *)entry->code; + void *addr = (void *)jump_entry_code(entry); u32 insn; if (type == JUMP_LABEL_JMP) { - insn = aarch64_insn_gen_branch_imm(entry->code, - entry->target, + insn = aarch64_insn_gen_branch_imm(jump_entry_code(entry), + jump_entry_target(entry), AARCH64_INSN_BRANCH_NOLINK); } else { insn = aarch64_insn_gen_nop(); diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 8e38d5267f22..e213f8e867f6 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -966,6 +966,12 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event, return 0; } +static int armv8pmu_filter_match(struct perf_event *event) +{ + unsigned long evtype = event->hw.config_base & ARMV8_PMU_EVTYPE_EVENT; + return evtype != ARMV8_PMUV3_PERFCTR_CHAIN; +} + static void armv8pmu_reset(void *info) { struct arm_pmu *cpu_pmu = (struct arm_pmu *)info; @@ -1114,6 +1120,7 @@ static int armv8_pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->stop = armv8pmu_stop, cpu_pmu->reset = armv8pmu_reset, cpu_pmu->set_event_filter = armv8pmu_set_event_filter; + cpu_pmu->filter_match = armv8pmu_filter_match; return 0; } diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index e78c3ef04d95..9b65132e789a 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -107,7 +107,7 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) if (!p->ainsn.api.insn) return -ENOMEM; break; - }; + } /* prepare the instruction */ if (p->ainsn.api.insn) diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 7f1628effe6d..ce99c58cd1f1 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -358,6 +358,10 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start, if (IS_ENABLED(CONFIG_ARM64_UAO) && cpus_have_const_cap(ARM64_HAS_UAO)) childregs->pstate |= PSR_UAO_BIT; + + if (arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE) + childregs->pstate |= PSR_SSBS_BIT; + p->thread.cpu_context.x19 = stack_start; p->thread.cpu_context.x20 = stk_sz; } diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c index e8edbf13302a..8cdaf25e99cd 100644 --- a/arch/arm64/kernel/psci.c +++ b/arch/arm64/kernel/psci.c @@ -24,7 +24,6 @@ #include <uapi/linux/psci.h> -#include <asm/compiler.h> #include <asm/cpu_ops.h> #include <asm/errno.h> #include <asm/smp_plat.h> diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 5b4fac434c84..d0f62dd24c90 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -64,6 +64,9 @@ #include <asm/xen/hypervisor.h> #include <asm/mmu_context.h> +static int num_standard_resources; +static struct resource *standard_resources; + phys_addr_t __fdt_pointer __initdata; /* @@ -206,14 +209,19 @@ static void __init request_standard_resources(void) { struct memblock_region *region; struct resource *res; + unsigned long i = 0; kernel_code.start = __pa_symbol(_text); kernel_code.end = __pa_symbol(__init_begin - 1); kernel_data.start = __pa_symbol(_sdata); kernel_data.end = __pa_symbol(_end - 1); + num_standard_resources = memblock.memory.cnt; + standard_resources = alloc_bootmem_low(num_standard_resources * + sizeof(*standard_resources)); + for_each_memblock(memory, region) { - res = alloc_bootmem_low(sizeof(*res)); + res = &standard_resources[i++]; if (memblock_is_nomap(region)) { res->name = "reserved"; res->flags = IORESOURCE_MEM; @@ -243,36 +251,26 @@ static void __init request_standard_resources(void) static int __init reserve_memblock_reserved_regions(void) { - phys_addr_t start, end, roundup_end = 0; - struct resource *mem, *res; - u64 i; - - for_each_reserved_mem_region(i, &start, &end) { - if (end <= roundup_end) - continue; /* done already */ - - start = __pfn_to_phys(PFN_DOWN(start)); - end = __pfn_to_phys(PFN_UP(end)) - 1; - roundup_end = end; - - res = kzalloc(sizeof(*res), GFP_ATOMIC); - if (WARN_ON(!res)) - return -ENOMEM; - res->start = start; - res->end = end; - res->name = "reserved"; - res->flags = IORESOURCE_MEM; - - mem = request_resource_conflict(&iomem_resource, res); - /* - * We expected memblock_reserve() regions to conflict with - * memory created by request_standard_resources(). - */ - if (WARN_ON_ONCE(!mem)) + u64 i, j; + + for (i = 0; i < num_standard_resources; ++i) { + struct resource *mem = &standard_resources[i]; + phys_addr_t r_start, r_end, mem_size = resource_size(mem); + + if (!memblock_is_region_reserved(mem->start, mem_size)) continue; - kfree(res); - reserve_region_with_split(mem, start, end, "reserved"); + for_each_reserved_mem_region(j, &r_start, &r_end) { + resource_size_t start, end; + + start = max(PFN_PHYS(PFN_DOWN(r_start)), mem->start); + end = min(PFN_PHYS(PFN_UP(r_end)) - 1, mem->end); + + if (start > mem->end || end < mem->start) + continue; + + reserve_region_with_split(mem, start, end, "reserved"); + } } return 0; @@ -351,12 +349,8 @@ void __init setup_arch(char **cmdline_p) #endif #ifdef CONFIG_VT -#if defined(CONFIG_VGA_CONSOLE) - conswitchp = &vga_con; -#elif defined(CONFIG_DUMMY_CONSOLE) conswitchp = &dummy_con; #endif -#endif if (boot_args[1] || boot_args[2] || boot_args[3]) { pr_err("WARNING: x1-x3 nonzero in violation of boot protocol:\n" "\tx1: %016llx\n\tx2: %016llx\n\tx3: %016llx\n" diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S index bebec8ef9372..3e53ffa07994 100644 --- a/arch/arm64/kernel/sleep.S +++ b/arch/arm64/kernel/sleep.S @@ -101,6 +101,7 @@ ENTRY(cpu_resume) bl el2_setup // if in EL2 drop to EL1 cleanly bl __cpu_setup /* enable the MMU early - so we can access sleep_save_stash by va */ + adrp x1, swapper_pg_dir bl __enable_mmu ldr x8, =_cpu_resume br x8 diff --git a/arch/arm64/kernel/ssbd.c b/arch/arm64/kernel/ssbd.c index 3432e5ef9f41..885f13e58708 100644 --- a/arch/arm64/kernel/ssbd.c +++ b/arch/arm64/kernel/ssbd.c @@ -3,17 +3,33 @@ * Copyright (C) 2018 ARM Ltd, All Rights Reserved. */ +#include <linux/compat.h> #include <linux/errno.h> #include <linux/sched.h> +#include <linux/sched/task_stack.h> #include <linux/thread_info.h> #include <asm/cpufeature.h> +static void ssbd_ssbs_enable(struct task_struct *task) +{ + u64 val = is_compat_thread(task_thread_info(task)) ? + PSR_AA32_SSBS_BIT : PSR_SSBS_BIT; + + task_pt_regs(task)->pstate |= val; +} + +static void ssbd_ssbs_disable(struct task_struct *task) +{ + u64 val = is_compat_thread(task_thread_info(task)) ? + PSR_AA32_SSBS_BIT : PSR_SSBS_BIT; + + task_pt_regs(task)->pstate &= ~val; +} + /* * prctl interface for SSBD - * FIXME: Drop the below ifdefery once merged in 4.18. */ -#ifdef PR_SPEC_STORE_BYPASS static int ssbd_prctl_set(struct task_struct *task, unsigned long ctrl) { int state = arm64_get_ssbd_state(); @@ -46,12 +62,14 @@ static int ssbd_prctl_set(struct task_struct *task, unsigned long ctrl) return -EPERM; task_clear_spec_ssb_disable(task); clear_tsk_thread_flag(task, TIF_SSBD); + ssbd_ssbs_enable(task); break; case PR_SPEC_DISABLE: if (state == ARM64_SSBD_FORCE_DISABLE) return -EPERM; task_set_spec_ssb_disable(task); set_tsk_thread_flag(task, TIF_SSBD); + ssbd_ssbs_disable(task); break; case PR_SPEC_FORCE_DISABLE: if (state == ARM64_SSBD_FORCE_DISABLE) @@ -59,6 +77,7 @@ static int ssbd_prctl_set(struct task_struct *task, unsigned long ctrl) task_set_spec_ssb_disable(task); task_set_spec_ssb_force_disable(task); set_tsk_thread_flag(task, TIF_SSBD); + ssbd_ssbs_disable(task); break; default: return -ERANGE; @@ -107,4 +126,3 @@ int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) return -ENODEV; } } -#endif /* PR_SPEC_STORE_BYPASS */ diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c index 70c283368b64..9405d1b7f4b0 100644 --- a/arch/arm64/kernel/suspend.c +++ b/arch/arm64/kernel/suspend.c @@ -48,6 +48,10 @@ void notrace __cpu_suspend_exit(void) */ cpu_uninstall_idmap(); + /* Restore CnP bit in TTBR1_EL1 */ + if (system_supports_cnp()) + cpu_replace_ttbr1(lm_alias(swapper_pg_dir)); + /* * PSTATE was not saved over suspend/resume, re-enable any detected * features that might not have been set correctly. diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 039e9ff379cc..4066da7f1e5e 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -310,10 +310,12 @@ static int call_undef_hook(struct pt_regs *regs) int (*fn)(struct pt_regs *regs, u32 instr) = NULL; void __user *pc = (void __user *)instruction_pointer(regs); - if (!user_mode(regs)) - return 1; - - if (compat_thumb_mode(regs)) { + if (!user_mode(regs)) { + __le32 instr_le; + if (probe_kernel_address((__force __le32 *)pc, instr_le)) + goto exit; + instr = le32_to_cpu(instr_le); + } else if (compat_thumb_mode(regs)) { /* 16-bit Thumb instruction */ __le16 instr_le; if (get_user(instr_le, (__le16 __user *)pc)) @@ -352,6 +354,9 @@ void force_signal_inject(int signal, int code, unsigned long address) const char *desc; struct pt_regs *regs = current_pt_regs(); + if (WARN_ON(!user_mode(regs))) + return; + clear_siginfo(&info); switch (signal) { @@ -406,14 +411,10 @@ asmlinkage void __exception do_undefinstr(struct pt_regs *regs) if (call_undef_hook(regs) == 0) return; + BUG_ON(!user_mode(regs)); force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc); } -void cpu_enable_cache_maint_trap(const struct arm64_cpu_capabilities *__unused) -{ - sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCI, 0); -} - #define __user_cache_maint(insn, address, res) \ if (address >= user_addr_max()) { \ res = -EFAULT; \ @@ -437,7 +438,7 @@ void cpu_enable_cache_maint_trap(const struct arm64_cpu_capabilities *__unused) static void user_cache_maint_handler(unsigned int esr, struct pt_regs *regs) { unsigned long address; - int rt = (esr & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT; + int rt = ESR_ELx_SYS64_ISS_RT(esr); int crm = (esr & ESR_ELx_SYS64_ISS_CRM_MASK) >> ESR_ELx_SYS64_ISS_CRM_SHIFT; int ret = 0; @@ -472,7 +473,7 @@ static void user_cache_maint_handler(unsigned int esr, struct pt_regs *regs) static void ctr_read_handler(unsigned int esr, struct pt_regs *regs) { - int rt = (esr & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT; + int rt = ESR_ELx_SYS64_ISS_RT(esr); unsigned long val = arm64_ftr_reg_user_value(&arm64_ftr_reg_ctrel0); pt_regs_write_reg(regs, rt, val); @@ -482,7 +483,7 @@ static void ctr_read_handler(unsigned int esr, struct pt_regs *regs) static void cntvct_read_handler(unsigned int esr, struct pt_regs *regs) { - int rt = (esr & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT; + int rt = ESR_ELx_SYS64_ISS_RT(esr); pt_regs_write_reg(regs, rt, arch_counter_get_cntvct()); arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE); @@ -490,12 +491,28 @@ static void cntvct_read_handler(unsigned int esr, struct pt_regs *regs) static void cntfrq_read_handler(unsigned int esr, struct pt_regs *regs) { - int rt = (esr & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT; + int rt = ESR_ELx_SYS64_ISS_RT(esr); pt_regs_write_reg(regs, rt, arch_timer_get_rate()); arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE); } +static void mrs_handler(unsigned int esr, struct pt_regs *regs) +{ + u32 sysreg, rt; + + rt = ESR_ELx_SYS64_ISS_RT(esr); + sysreg = esr_sys64_to_sysreg(esr); + + if (do_emulate_mrs(regs, sysreg, rt) != 0) + force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc); +} + +static void wfi_handler(unsigned int esr, struct pt_regs *regs) +{ + arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE); +} + struct sys64_hook { unsigned int esr_mask; unsigned int esr_val; @@ -526,9 +543,176 @@ static struct sys64_hook sys64_hooks[] = { .esr_val = ESR_ELx_SYS64_ISS_SYS_CNTFRQ, .handler = cntfrq_read_handler, }, + { + /* Trap read access to CPUID registers */ + .esr_mask = ESR_ELx_SYS64_ISS_SYS_MRS_OP_MASK, + .esr_val = ESR_ELx_SYS64_ISS_SYS_MRS_OP_VAL, + .handler = mrs_handler, + }, + { + /* Trap WFI instructions executed in userspace */ + .esr_mask = ESR_ELx_WFx_MASK, + .esr_val = ESR_ELx_WFx_WFI_VAL, + .handler = wfi_handler, + }, {}, }; + +#ifdef CONFIG_COMPAT +#define PSTATE_IT_1_0_SHIFT 25 +#define PSTATE_IT_1_0_MASK (0x3 << PSTATE_IT_1_0_SHIFT) +#define PSTATE_IT_7_2_SHIFT 10 +#define PSTATE_IT_7_2_MASK (0x3f << PSTATE_IT_7_2_SHIFT) + +static u32 compat_get_it_state(struct pt_regs *regs) +{ + u32 it, pstate = regs->pstate; + + it = (pstate & PSTATE_IT_1_0_MASK) >> PSTATE_IT_1_0_SHIFT; + it |= ((pstate & PSTATE_IT_7_2_MASK) >> PSTATE_IT_7_2_SHIFT) << 2; + + return it; +} + +static void compat_set_it_state(struct pt_regs *regs, u32 it) +{ + u32 pstate_it; + + pstate_it = (it << PSTATE_IT_1_0_SHIFT) & PSTATE_IT_1_0_MASK; + pstate_it |= ((it >> 2) << PSTATE_IT_7_2_SHIFT) & PSTATE_IT_7_2_MASK; + + regs->pstate &= ~PSR_AA32_IT_MASK; + regs->pstate |= pstate_it; +} + +static bool cp15_cond_valid(unsigned int esr, struct pt_regs *regs) +{ + int cond; + + /* Only a T32 instruction can trap without CV being set */ + if (!(esr & ESR_ELx_CV)) { + u32 it; + + it = compat_get_it_state(regs); + if (!it) + return true; + + cond = it >> 4; + } else { + cond = (esr & ESR_ELx_COND_MASK) >> ESR_ELx_COND_SHIFT; + } + + return aarch32_opcode_cond_checks[cond](regs->pstate); +} + +static void advance_itstate(struct pt_regs *regs) +{ + u32 it; + + /* ARM mode */ + if (!(regs->pstate & PSR_AA32_T_BIT) || + !(regs->pstate & PSR_AA32_IT_MASK)) + return; + + it = compat_get_it_state(regs); + + /* + * If this is the last instruction of the block, wipe the IT + * state. Otherwise advance it. + */ + if (!(it & 7)) + it = 0; + else + it = (it & 0xe0) | ((it << 1) & 0x1f); + + compat_set_it_state(regs, it); +} + +static void arm64_compat_skip_faulting_instruction(struct pt_regs *regs, + unsigned int sz) +{ + advance_itstate(regs); + arm64_skip_faulting_instruction(regs, sz); +} + +static void compat_cntfrq_read_handler(unsigned int esr, struct pt_regs *regs) +{ + int reg = (esr & ESR_ELx_CP15_32_ISS_RT_MASK) >> ESR_ELx_CP15_32_ISS_RT_SHIFT; + + pt_regs_write_reg(regs, reg, arch_timer_get_rate()); + arm64_compat_skip_faulting_instruction(regs, 4); +} + +static struct sys64_hook cp15_32_hooks[] = { + { + .esr_mask = ESR_ELx_CP15_32_ISS_SYS_MASK, + .esr_val = ESR_ELx_CP15_32_ISS_SYS_CNTFRQ, + .handler = compat_cntfrq_read_handler, + }, + {}, +}; + +static void compat_cntvct_read_handler(unsigned int esr, struct pt_regs *regs) +{ + int rt = (esr & ESR_ELx_CP15_64_ISS_RT_MASK) >> ESR_ELx_CP15_64_ISS_RT_SHIFT; + int rt2 = (esr & ESR_ELx_CP15_64_ISS_RT2_MASK) >> ESR_ELx_CP15_64_ISS_RT2_SHIFT; + u64 val = arch_counter_get_cntvct(); + + pt_regs_write_reg(regs, rt, lower_32_bits(val)); + pt_regs_write_reg(regs, rt2, upper_32_bits(val)); + arm64_compat_skip_faulting_instruction(regs, 4); +} + +static struct sys64_hook cp15_64_hooks[] = { + { + .esr_mask = ESR_ELx_CP15_64_ISS_SYS_MASK, + .esr_val = ESR_ELx_CP15_64_ISS_SYS_CNTVCT, + .handler = compat_cntvct_read_handler, + }, + {}, +}; + +asmlinkage void __exception do_cp15instr(unsigned int esr, struct pt_regs *regs) +{ + struct sys64_hook *hook, *hook_base; + + if (!cp15_cond_valid(esr, regs)) { + /* + * There is no T16 variant of a CP access, so we + * always advance PC by 4 bytes. + */ + arm64_compat_skip_faulting_instruction(regs, 4); + return; + } + + switch (ESR_ELx_EC(esr)) { + case ESR_ELx_EC_CP15_32: + hook_base = cp15_32_hooks; + break; + case ESR_ELx_EC_CP15_64: + hook_base = cp15_64_hooks; + break; + default: + do_undefinstr(regs); + return; + } + + for (hook = hook_base; hook->handler; hook++) + if ((hook->esr_mask & esr) == hook->esr_val) { + hook->handler(esr, regs); + return; + } + + /* + * New cp15 instructions may previously have been undefined at + * EL0. Fall back to our usual undefined instruction handler + * so that we handle these consistently. + */ + do_undefinstr(regs); +} +#endif + asmlinkage void __exception do_sysinstr(unsigned int esr, struct pt_regs *regs) { struct sys64_hook *hook; @@ -605,7 +789,6 @@ asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr) handler[reason], smp_processor_id(), esr, esr_get_class_string(esr)); - die("Oops - bad mode", regs, 0); local_daif_mask(); panic("bad mode"); } diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 605d1b60469c..ab29c06a7d4b 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -138,6 +138,23 @@ SECTIONS EXCEPTION_TABLE(8) /* __init_begin will be marked RO NX */ NOTES + . = ALIGN(PAGE_SIZE); + idmap_pg_dir = .; + . += IDMAP_DIR_SIZE; + +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 + tramp_pg_dir = .; + . += PAGE_SIZE; +#endif + +#ifdef CONFIG_ARM64_SW_TTBR0_PAN + reserved_ttbr0 = .; + . += RESERVED_TTBR0_SIZE; +#endif + swapper_pg_dir = .; + . += PAGE_SIZE; + swapper_pg_end = .; + . = ALIGN(SEGMENT_ALIGN); __init_begin = .; __inittext_begin = .; @@ -216,21 +233,9 @@ SECTIONS BSS_SECTION(0, 0, 0) . = ALIGN(PAGE_SIZE); - idmap_pg_dir = .; - . += IDMAP_DIR_SIZE; - -#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 - tramp_pg_dir = .; - . += PAGE_SIZE; -#endif - -#ifdef CONFIG_ARM64_SW_TTBR0_PAN - reserved_ttbr0 = .; - . += RESERVED_TTBR0_SIZE; -#endif - swapper_pg_dir = .; - . += SWAPPER_DIR_SIZE; - swapper_pg_end = .; + init_pg_dir = .; + . += INIT_DIR_SIZE; + init_pg_end = .; __pecoff_data_size = ABSOLUTE(. - __initdata_begin); _end = .; diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 07256b08226c..a6c9fbaeaefc 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -57,6 +57,45 @@ static u64 core_reg_offset_from_id(u64 id) return id & ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_CORE); } +static int validate_core_offset(const struct kvm_one_reg *reg) +{ + u64 off = core_reg_offset_from_id(reg->id); + int size; + + switch (off) { + case KVM_REG_ARM_CORE_REG(regs.regs[0]) ... + KVM_REG_ARM_CORE_REG(regs.regs[30]): + case KVM_REG_ARM_CORE_REG(regs.sp): + case KVM_REG_ARM_CORE_REG(regs.pc): + case KVM_REG_ARM_CORE_REG(regs.pstate): + case KVM_REG_ARM_CORE_REG(sp_el1): + case KVM_REG_ARM_CORE_REG(elr_el1): + case KVM_REG_ARM_CORE_REG(spsr[0]) ... + KVM_REG_ARM_CORE_REG(spsr[KVM_NR_SPSR - 1]): + size = sizeof(__u64); + break; + + case KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]) ... + KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]): + size = sizeof(__uint128_t); + break; + + case KVM_REG_ARM_CORE_REG(fp_regs.fpsr): + case KVM_REG_ARM_CORE_REG(fp_regs.fpcr): + size = sizeof(__u32); + break; + + default: + return -EINVAL; + } + + if (KVM_REG_SIZE(reg->id) == size && + IS_ALIGNED(off, size / sizeof(__u32))) + return 0; + + return -EINVAL; +} + static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) { /* @@ -76,6 +115,9 @@ static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) (off + (KVM_REG_SIZE(reg->id) / sizeof(__u32))) >= nr_regs) return -ENOENT; + if (validate_core_offset(reg)) + return -EINVAL; + if (copy_to_user(uaddr, ((u32 *)regs) + off, KVM_REG_SIZE(reg->id))) return -EFAULT; @@ -98,6 +140,9 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) (off + (KVM_REG_SIZE(reg->id) / sizeof(__u32))) >= nr_regs) return -ENOENT; + if (validate_core_offset(reg)) + return -EINVAL; + if (KVM_REG_SIZE(reg->id) > sizeof(tmp)) return -EINVAL; @@ -107,17 +152,25 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) } if (off == KVM_REG_ARM_CORE_REG(regs.pstate)) { - u32 mode = (*(u32 *)valp) & PSR_AA32_MODE_MASK; + u64 mode = (*(u64 *)valp) & PSR_AA32_MODE_MASK; switch (mode) { case PSR_AA32_MODE_USR: + if (!system_supports_32bit_el0()) + return -EINVAL; + break; case PSR_AA32_MODE_FIQ: case PSR_AA32_MODE_IRQ: case PSR_AA32_MODE_SVC: case PSR_AA32_MODE_ABT: case PSR_AA32_MODE_UND: + if (!vcpu_el1_is_32bit(vcpu)) + return -EINVAL; + break; case PSR_MODE_EL0t: case PSR_MODE_EL1t: case PSR_MODE_EL1h: + if (vcpu_el1_is_32bit(vcpu)) + return -EINVAL; break; default: err = -EINVAL; diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S index ea9225160786..4576b86a5579 100644 --- a/arch/arm64/kvm/hyp-init.S +++ b/arch/arm64/kvm/hyp-init.S @@ -65,6 +65,9 @@ __do_hyp_init: b.lo __kvm_handle_stub_hvc phys_to_ttbr x4, x0 +alternative_if ARM64_HAS_CNP + orr x4, x4, #TTBR_CNP_BIT +alternative_else_nop_endif msr ttbr0_el2, x4 mrs x4, tcr_el1 diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c index 9ce223944983..76d016b446b2 100644 --- a/arch/arm64/kvm/hyp/sysreg-sr.c +++ b/arch/arm64/kvm/hyp/sysreg-sr.c @@ -288,3 +288,14 @@ void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu) vcpu->arch.sysregs_loaded_on_cpu = false; } + +void __hyp_text __kvm_enable_ssbs(void) +{ + u64 tmp; + + asm volatile( + "mrs %0, sctlr_el2\n" + "orr %0, %0, %1\n" + "msr sctlr_el2, %0" + : "=&r" (tmp) : "L" (SCTLR_ELx_DSSBS)); +} diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile index 68755fd70dcf..69ff9887f724 100644 --- a/arch/arm64/lib/Makefile +++ b/arch/arm64/lib/Makefile @@ -12,7 +12,7 @@ lib-y := clear_user.o delay.o copy_from_user.o \ # when supported by the CPU. Result and argument registers are handled # correctly, based on the function prototype. lib-$(CONFIG_ARM64_LSE_ATOMICS) += atomic_ll_sc.o -CFLAGS_atomic_ll_sc.o := -fcall-used-x0 -ffixed-x1 -ffixed-x2 \ +CFLAGS_atomic_ll_sc.o := -ffixed-x1 -ffixed-x2 \ -ffixed-x3 -ffixed-x4 -ffixed-x5 -ffixed-x6 \ -ffixed-x7 -fcall-saved-x8 -fcall-saved-x9 \ -fcall-saved-x10 -fcall-saved-x11 -fcall-saved-x12 \ @@ -25,3 +25,5 @@ KCOV_INSTRUMENT_atomic_ll_sc.o := n UBSAN_SANITIZE_atomic_ll_sc.o := n lib-$(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE) += uaccess_flushcache.o + +obj-$(CONFIG_CRC32) += crc32.o diff --git a/arch/arm64/lib/crc32.S b/arch/arm64/lib/crc32.S new file mode 100644 index 000000000000..5bc1e85b4e1c --- /dev/null +++ b/arch/arm64/lib/crc32.S @@ -0,0 +1,60 @@ +/* + * Accelerated CRC32(C) using AArch64 CRC instructions + * + * Copyright (C) 2016 - 2018 Linaro Ltd <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/linkage.h> +#include <asm/alternative.h> +#include <asm/assembler.h> + + .cpu generic+crc + + .macro __crc32, c +0: subs x2, x2, #16 + b.mi 8f + ldp x3, x4, [x1], #16 +CPU_BE( rev x3, x3 ) +CPU_BE( rev x4, x4 ) + crc32\c\()x w0, w0, x3 + crc32\c\()x w0, w0, x4 + b.ne 0b + ret + +8: tbz x2, #3, 4f + ldr x3, [x1], #8 +CPU_BE( rev x3, x3 ) + crc32\c\()x w0, w0, x3 +4: tbz x2, #2, 2f + ldr w3, [x1], #4 +CPU_BE( rev w3, w3 ) + crc32\c\()w w0, w0, w3 +2: tbz x2, #1, 1f + ldrh w3, [x1], #2 +CPU_BE( rev16 w3, w3 ) + crc32\c\()h w0, w0, w3 +1: tbz x2, #0, 0f + ldrb w3, [x1] + crc32\c\()b w0, w0, w3 +0: ret + .endm + + .align 5 +ENTRY(crc32_le) +alternative_if_not ARM64_HAS_CRC32 + b crc32_le_base +alternative_else_nop_endif + __crc32 +ENDPROC(crc32_le) + + .align 5 +ENTRY(__crc32c_le) +alternative_if_not ARM64_HAS_CRC32 + b __crc32c_le_base +alternative_else_nop_endif + __crc32 c +ENDPROC(__crc32c_le) diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index c127f94da8e2..1f0ea2facf24 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -88,7 +88,7 @@ void verify_cpu_asid_bits(void) } } -static void flush_context(unsigned int cpu) +static void flush_context(void) { int i; u64 asid; @@ -142,7 +142,7 @@ static bool check_update_reserved_asid(u64 asid, u64 newasid) return hit; } -static u64 new_context(struct mm_struct *mm, unsigned int cpu) +static u64 new_context(struct mm_struct *mm) { static u32 cur_idx = 1; u64 asid = atomic64_read(&mm->context.id); @@ -180,7 +180,7 @@ static u64 new_context(struct mm_struct *mm, unsigned int cpu) /* We're out of ASIDs, so increment the global generation count */ generation = atomic64_add_return_relaxed(ASID_FIRST_VERSION, &asid_generation); - flush_context(cpu); + flush_context(); /* We have more ASIDs than CPUs, so this will always succeed */ asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, 1); @@ -196,6 +196,9 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu) unsigned long flags; u64 asid, old_active_asid; + if (system_supports_cnp()) + cpu_set_reserved_ttbr0(); + asid = atomic64_read(&mm->context.id); /* @@ -223,7 +226,7 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu) /* Check that our ASID belongs to the current generation. */ asid = atomic64_read(&mm->context.id); if ((asid ^ atomic64_read(&asid_generation)) >> asid_bits) { - asid = new_context(mm, cpu); + asid = new_context(mm); atomic64_set(&mm->context.id, asid); } diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c index 65dfc8571bf8..fcb1f2a6d7c6 100644 --- a/arch/arm64/mm/dump.c +++ b/arch/arm64/mm/dump.c @@ -36,8 +36,8 @@ static const struct addr_marker address_markers[] = { #endif { MODULES_VADDR, "Modules start" }, { MODULES_END, "Modules end" }, - { VMALLOC_START, "vmalloc() Area" }, - { VMALLOC_END, "vmalloc() End" }, + { VMALLOC_START, "vmalloc() area" }, + { VMALLOC_END, "vmalloc() end" }, { FIXADDR_START, "Fixmap start" }, { FIXADDR_TOP, "Fixmap end" }, { PCI_IO_START, "PCI I/O start" }, @@ -46,7 +46,7 @@ static const struct addr_marker address_markers[] = { { VMEMMAP_START, "vmemmap start" }, { VMEMMAP_START + VMEMMAP_SIZE, "vmemmap end" }, #endif - { PAGE_OFFSET, "Linear Mapping" }, + { PAGE_OFFSET, "Linear mapping" }, { -1, NULL }, }; diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 50b30ff30de4..d0e638ef3af6 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -37,6 +37,7 @@ #include <asm/cmpxchg.h> #include <asm/cpufeature.h> #include <asm/exception.h> +#include <asm/daifflags.h> #include <asm/debug-monitors.h> #include <asm/esr.h> #include <asm/sysreg.h> @@ -56,10 +57,16 @@ struct fault_info { }; static const struct fault_info fault_info[]; +static struct fault_info debug_fault_info[]; static inline const struct fault_info *esr_to_fault_info(unsigned int esr) { - return fault_info + (esr & 63); + return fault_info + (esr & ESR_ELx_FSC); +} + +static inline const struct fault_info *esr_to_debug_fault_info(unsigned int esr) +{ + return debug_fault_info + DBG_ESR_EVT(esr); } #ifdef CONFIG_KPROBES @@ -235,9 +242,8 @@ static bool is_el1_instruction_abort(unsigned int esr) return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_CUR; } -static inline bool is_el1_permission_fault(unsigned int esr, - struct pt_regs *regs, - unsigned long addr) +static inline bool is_el1_permission_fault(unsigned long addr, unsigned int esr, + struct pt_regs *regs) { unsigned int ec = ESR_ELx_EC(esr); unsigned int fsc_type = esr & ESR_ELx_FSC_TYPE; @@ -283,7 +289,7 @@ static void __do_kernel_fault(unsigned long addr, unsigned int esr, if (!is_el1_instruction_abort(esr) && fixup_exception(regs)) return; - if (is_el1_permission_fault(esr, regs, addr)) { + if (is_el1_permission_fault(addr, esr, regs)) { if (esr & ESR_ELx_WNR) msg = "write to read-only memory"; else @@ -454,7 +460,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, mm_flags |= FAULT_FLAG_WRITE; } - if (addr < TASK_SIZE && is_el1_permission_fault(esr, regs, addr)) { + if (addr < TASK_SIZE && is_el1_permission_fault(addr, esr, regs)) { /* regs->orig_addr_limit may be 0 if we entered from EL0 */ if (regs->orig_addr_limit == KERNEL_DS) die_kernel_fault("access to user memory with fs=KERNEL_DS", @@ -771,7 +777,7 @@ asmlinkage void __exception do_el0_ia_bp_hardening(unsigned long addr, if (addr > TASK_SIZE) arm64_apply_bp_hardening(); - local_irq_enable(); + local_daif_restore(DAIF_PROCCTX); do_mem_abort(addr, esr, regs); } @@ -785,7 +791,7 @@ asmlinkage void __exception do_sp_pc_abort(unsigned long addr, if (user_mode(regs)) { if (instruction_pointer(regs) > TASK_SIZE) arm64_apply_bp_hardening(); - local_irq_enable(); + local_daif_restore(DAIF_PROCCTX); } clear_siginfo(&info); @@ -831,7 +837,7 @@ asmlinkage int __exception do_debug_exception(unsigned long addr, unsigned int esr, struct pt_regs *regs) { - const struct fault_info *inf = debug_fault_info + DBG_ESR_EVT(esr); + const struct fault_info *inf = esr_to_debug_fault_info(esr); int rv; /* @@ -864,17 +870,3 @@ asmlinkage int __exception do_debug_exception(unsigned long addr, return rv; } NOKPROBE_SYMBOL(do_debug_exception); - -#ifdef CONFIG_ARM64_PAN -void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused) -{ - /* - * We modify PSTATE. This won't work from irq context as the PSTATE - * is discarded once we return from the exception. - */ - WARN_ON_ONCE(in_interrupt()); - - sysreg_clear_set(sctlr_el1, SCTLR_EL1_SPAN, 0); - asm(SET_PSTATE_PAN(1)); -} -#endif /* CONFIG_ARM64_PAN */ diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 192b3ba07075..f58ea503ad01 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -117,11 +117,14 @@ static pte_t get_clear_flush(struct mm_struct *mm, /* * If HW_AFDBM is enabled, then the HW could turn on - * the dirty bit for any page in the set, so check - * them all. All hugetlb entries are already young. + * the dirty or accessed bit for any page in the set, + * so check them all. */ if (pte_dirty(pte)) orig_pte = pte_mkdirty(orig_pte); + + if (pte_young(pte)) + orig_pte = pte_mkyoung(orig_pte); } if (valid) { @@ -320,11 +323,40 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, return get_clear_flush(mm, addr, ptep, pgsize, ncontig); } +/* + * huge_ptep_set_access_flags will update access flags (dirty, accesssed) + * and write permission. + * + * For a contiguous huge pte range we need to check whether or not write + * permission has to change only on the first pte in the set. Then for + * all the contiguous ptes we need to check whether or not there is a + * discrepancy between dirty or young. + */ +static int __cont_access_flags_changed(pte_t *ptep, pte_t pte, int ncontig) +{ + int i; + + if (pte_write(pte) != pte_write(huge_ptep_get(ptep))) + return 1; + + for (i = 0; i < ncontig; i++) { + pte_t orig_pte = huge_ptep_get(ptep + i); + + if (pte_dirty(pte) != pte_dirty(orig_pte)) + return 1; + + if (pte_young(pte) != pte_young(orig_pte)) + return 1; + } + + return 0; +} + int huge_ptep_set_access_flags(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, pte_t pte, int dirty) { - int ncontig, i, changed = 0; + int ncontig, i; size_t pgsize = 0; unsigned long pfn = pte_pfn(pte), dpfn; pgprot_t hugeprot; @@ -336,19 +368,23 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma, ncontig = find_num_contig(vma->vm_mm, addr, ptep, &pgsize); dpfn = pgsize >> PAGE_SHIFT; + if (!__cont_access_flags_changed(ptep, pte, ncontig)) + return 0; + orig_pte = get_clear_flush(vma->vm_mm, addr, ptep, pgsize, ncontig); - if (!pte_same(orig_pte, pte)) - changed = 1; - /* Make sure we don't lose the dirty state */ + /* Make sure we don't lose the dirty or young state */ if (pte_dirty(orig_pte)) pte = pte_mkdirty(pte); + if (pte_young(orig_pte)) + pte = pte_mkyoung(pte); + hugeprot = pte_pgprot(pte); for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn) set_pte_at(vma->vm_mm, addr, ptep, pfn_pte(pfn, hugeprot)); - return changed; + return 1; } void huge_ptep_set_wrprotect(struct mm_struct *mm, diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 787e27964ab9..3cf87341859f 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -284,7 +284,6 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) #endif /* CONFIG_NUMA */ -#ifdef CONFIG_HAVE_ARCH_PFN_VALID int pfn_valid(unsigned long pfn) { phys_addr_t addr = pfn << PAGE_SHIFT; @@ -294,7 +293,6 @@ int pfn_valid(unsigned long pfn) return memblock_is_map_memory(addr); } EXPORT_SYMBOL(pfn_valid); -#endif #ifndef CONFIG_SPARSEMEM static void __init arm64_memory_present(void) diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c index 12145874c02b..fccb1a6f8c6f 100644 --- a/arch/arm64/mm/kasan_init.c +++ b/arch/arm64/mm/kasan_init.c @@ -192,7 +192,7 @@ void __init kasan_init(void) /* * We are going to perform proper setup of shadow memory. - * At first we should unmap early shadow (clear_pgds() call bellow). + * At first we should unmap early shadow (clear_pgds() call below). * However, instrumented code couldn't execute without shadow memory. * tmp_pg_dir used to keep early shadow mapped until full shadow * setup will be finished. diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 8080c9f489c3..9498c15b847b 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -67,6 +67,24 @@ static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss; static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss __maybe_unused; static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss __maybe_unused; +static DEFINE_SPINLOCK(swapper_pgdir_lock); + +void set_swapper_pgd(pgd_t *pgdp, pgd_t pgd) +{ + pgd_t *fixmap_pgdp; + + spin_lock(&swapper_pgdir_lock); + fixmap_pgdp = pgd_set_fixmap(__pa_symbol(pgdp)); + WRITE_ONCE(*fixmap_pgdp, pgd); + /* + * We need dsb(ishst) here to ensure the page-table-walker sees + * our new entry before set_p?d() returns. The fixmap's + * flush_tlb_kernel_range() via clear_fixmap() does this for us. + */ + pgd_clear_fixmap(); + spin_unlock(&swapper_pgdir_lock); +} + pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, unsigned long size, pgprot_t vma_prot) { @@ -629,34 +647,18 @@ static void __init map_kernel(pgd_t *pgdp) */ void __init paging_init(void) { - phys_addr_t pgd_phys = early_pgtable_alloc(); - pgd_t *pgdp = pgd_set_fixmap(pgd_phys); + pgd_t *pgdp = pgd_set_fixmap(__pa_symbol(swapper_pg_dir)); map_kernel(pgdp); map_mem(pgdp); - /* - * We want to reuse the original swapper_pg_dir so we don't have to - * communicate the new address to non-coherent secondaries in - * secondary_entry, and so cpu_switch_mm can generate the address with - * adrp+add rather than a load from some global variable. - * - * To do this we need to go via a temporary pgd. - */ - cpu_replace_ttbr1(__va(pgd_phys)); - memcpy(swapper_pg_dir, pgdp, PGD_SIZE); - cpu_replace_ttbr1(lm_alias(swapper_pg_dir)); - pgd_clear_fixmap(); - memblock_free(pgd_phys, PAGE_SIZE); - /* - * We only reuse the PGD from the swapper_pg_dir, not the pud + pmd - * allocated with it. - */ - memblock_free(__pa_symbol(swapper_pg_dir) + PAGE_SIZE, - __pa_symbol(swapper_pg_end) - __pa_symbol(swapper_pg_dir) - - PAGE_SIZE); + cpu_replace_ttbr1(lm_alias(swapper_pg_dir)); + init_mm.pgd = swapper_pg_dir; + + memblock_free(__pa_symbol(init_pg_dir), + __pa_symbol(init_pg_end) - __pa_symbol(init_pg_dir)); } /* diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c index 146c04ceaa51..d7b66fc5e1c5 100644 --- a/arch/arm64/mm/numa.c +++ b/arch/arm64/mm/numa.c @@ -391,7 +391,6 @@ static int __init numa_init(int (*init_func)(void)) nodes_clear(numa_nodes_parsed); nodes_clear(node_possible_map); nodes_clear(node_online_map); - numa_free_distance(); ret = numa_alloc_distance(); if (ret < 0) @@ -399,20 +398,24 @@ static int __init numa_init(int (*init_func)(void)) ret = init_func(); if (ret < 0) - return ret; + goto out_free_distance; if (nodes_empty(numa_nodes_parsed)) { pr_info("No NUMA configuration found\n"); - return -EINVAL; + ret = -EINVAL; + goto out_free_distance; } ret = numa_register_nodes(); if (ret < 0) - return ret; + goto out_free_distance; setup_node_to_cpumask_map(); return 0; +out_free_distance: + numa_free_distance(); + return ret; } /** @@ -432,7 +435,7 @@ static int __init dummy_numa_init(void) if (numa_off) pr_info("NUMA disabled\n"); /* Forced off on command line. */ pr_info("Faking a node at [mem %#018Lx-%#018Lx]\n", - 0LLU, PFN_PHYS(max_pfn) - 1); + memblock_start_of_DRAM(), memblock_end_of_DRAM() - 1); for_each_memblock(memory, mblk) { ret = numa_add_memblk(0, mblk->base, mblk->base + mblk->size); diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 03646e6a2ef4..2c75b0b903ae 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -160,6 +160,12 @@ ENTRY(cpu_do_switch_mm) mrs x2, ttbr1_el1 mmid x1, x1 // get mm->context.id phys_to_ttbr x3, x0 + +alternative_if ARM64_HAS_CNP + cbz x1, 1f // skip CNP for reserved ASID + orr x3, x3, #TTBR_CNP_BIT +1: +alternative_else_nop_endif #ifdef CONFIG_ARM64_SW_TTBR0_PAN bfi x3, x1, #48, #16 // set the ASID field in TTBR0 #endif @@ -184,7 +190,7 @@ ENDPROC(cpu_do_switch_mm) .endm /* - * void idmap_cpu_replace_ttbr1(phys_addr_t new_pgd) + * void idmap_cpu_replace_ttbr1(phys_addr_t ttbr1) * * This is the low-level counterpart to cpu_replace_ttbr1, and should not be * called by anything else. It can only be executed from a TTBR0 mapping. @@ -194,8 +200,7 @@ ENTRY(idmap_cpu_replace_ttbr1) __idmap_cpu_set_reserved_ttbr1 x1, x3 - phys_to_ttbr x3, x0 - msr ttbr1_el1, x3 + msr ttbr1_el1, x0 isb restore_daif x2 |