From 34f66c4c4d5518c11bfb7d10defff8f814c9f28a Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 16 Oct 2023 11:24:36 +0100 Subject: arm64: Use a positive cpucap for FP/SIMD Currently we have a negative cpucap which describes the *absence* of FP/SIMD rather than *presence* of FP/SIMD. This largely works, but is somewhat awkward relative to other cpucaps that describe the presence of a feature, and it would be nicer to have a cpucap which describes the presence of FP/SIMD: * This will allow the cpucap to be treated as a standard ARM64_CPUCAP_SYSTEM_FEATURE, which can be detected with the standard has_cpuid_feature() function and ARM64_CPUID_FIELDS() description. * This ensures that the cpucap will only transition from not-present to present, reducing the risk of unintentional and/or unsafe usage of FP/SIMD before cpucaps are finalized. * This will allow using arm64_cpu_capabilities::cpu_enable() to enable the use of FP/SIMD later, with FP/SIMD being disabled at boot time otherwise. This will ensure that any unintentional and/or unsafe usage of FP/SIMD prior to this is trapped, and will ensure that FP/SIMD is never unintentionally enabled for userspace in mismatched big.LITTLE systems. This patch replaces the negative ARM64_HAS_NO_FPSIMD cpucap with a positive ARM64_HAS_FPSIMD cpucap, making changes as described above. Note that as FP/SIMD will now be trapped when not supported system-wide, do_fpsimd_acc() must handle these traps in the same way as for SVE and SME. The commentary in fpsimd_restore_current_state() is updated to describe the new scheme. No users of system_supports_fpsimd() need to know that FP/SIMD is available prior to alternatives being patched, so this is updated to use alternative_has_cap_likely() to check for the ARM64_HAS_FPSIMD cpucap, without generating code to test the system_cpucaps bitmap. Signed-off-by: Mark Rutland Reviewed-by: Mark Brown Cc: Suzuki K Poulose Cc: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/mm/proc.S | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/arm64/mm') diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 14fdf645edc8..f66c37a1610e 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -405,8 +405,7 @@ SYM_FUNC_START(__cpu_setup) tlbi vmalle1 // Invalidate local TLB dsb nsh - mov x1, #3 << 20 - msr cpacr_el1, x1 // Enable FP/ASIMD + msr cpacr_el1, xzr // Reset cpacr_el1 mov x1, #1 << 12 // Reset mdscr_el1 and disable msr mdscr_el1, x1 // access to the DCC from EL0 isb // Unmask debug exceptions now, -- cgit v1.2.3 From 4e00f1d9b7ff6b681fbd048fe5389089ae65bc11 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 16 Oct 2023 11:24:45 +0100 Subject: arm64: Avoid cpus_have_const_cap() for ARM64_HAS_EPAN We use cpus_have_const_cap() to check for ARM64_HAS_EPAN but this is not necessary and alternative_has_cap() or cpus_have_cap() would be preferable. For historical reasons, cpus_have_const_cap() is more complicated than it needs to be. Before cpucaps are finalized, it will perform a bitmap test of the system_cpucaps bitmap, and once cpucaps are finalized it will use an alternative branch. This used to be necessary to handle some race conditions in the window between cpucap detection and the subsequent patching of alternatives and static branches, where different branches could be out-of-sync with one another (or w.r.t. alternative sequences). Now that we use alternative branches instead of static branches, these are all patched atomically w.r.t. one another, and there are only a handful of cases that need special care in the window between cpucap detection and alternative patching. Due to the above, it would be nice to remove cpus_have_const_cap(), and migrate callers over to alternative_has_cap_*(), cpus_have_final_cap(), or cpus_have_cap() depending on when their requirements. This will remove redundant instructions and improve code generation, and will make it easier to determine how each callsite will behave before, during, and after alternative patching. The ARM64_HAS_EPAN cpucap is used to affect two things: 1) The permision bits used for userspace executable mappings, which are chosen by adjust_protection_map(), which is an arch_initcall. This is called after the ARM64_HAS_EPAN cpucap has been detected and alternatives have been patched, and before any userspace translation tables exist. 2) The handling of faults taken from (user or kernel) accesses to userspace executable mappings in do_page_fault(). Userspace translation tables are created after adjust_protection_map() is called, and hence after the ARM64_HAS_EPAN cpucap has been detected and alternatives have been patched. Neither of these run until after ARM64_HAS_EPAN cpucap has been detected and alternatives have been patched, and hence there's no need to use cpus_have_const_cap(). Since adjust_protection_map() is only executed once at boot time it would be best for it to use cpus_have_cap(), and since do_page_fault() is executed frequently it would be best for it to use alternatives_have_cap_unlikely(). This patch replaces the uses of cpus_have_const_cap() with cpus_have_cap() and alternative_has_cap_unlikely(), which will avoid generating redundant code, and should be better for all subsequent calls at runtime. The ARM64_HAS_EPAN cpucap is added to cpucap_is_possible() so that code can be elided entirely when this is not possible. Signed-off-by: Mark Rutland Cc: James Morse Cc: Vladimir Murzin Cc: Suzuki K Poulose Cc: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cpucaps.h | 2 ++ arch/arm64/mm/fault.c | 2 +- arch/arm64/mm/mmap.c | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/arm64/mm') diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index 07c9271b534d..af9550147dd0 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -21,6 +21,8 @@ cpucap_is_possible(const unsigned int cap) switch (cap) { case ARM64_HAS_PAN: return IS_ENABLED(CONFIG_ARM64_PAN); + case ARM64_HAS_EPAN: + return IS_ENABLED(CONFIG_ARM64_EPAN); case ARM64_SVE: return IS_ENABLED(CONFIG_ARM64_SVE); case ARM64_SME: diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 2e5d1e238af9..460d799e1296 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -571,7 +571,7 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr, /* Write implies read */ vm_flags |= VM_WRITE; /* If EPAN is absent then exec implies read */ - if (!cpus_have_const_cap(ARM64_HAS_EPAN)) + if (!alternative_has_cap_unlikely(ARM64_HAS_EPAN)) vm_flags |= VM_EXEC; } diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c index 8f5b7ce857ed..645fe60d000f 100644 --- a/arch/arm64/mm/mmap.c +++ b/arch/arm64/mm/mmap.c @@ -68,7 +68,7 @@ static int __init adjust_protection_map(void) * With Enhanced PAN we can honour the execute-only permissions as * there is no PAN override with such mappings. */ - if (cpus_have_const_cap(ARM64_HAS_EPAN)) { + if (cpus_have_cap(ARM64_HAS_EPAN)) { protection_map[VM_EXEC] = PAGE_EXECONLY; protection_map[VM_EXEC | VM_SHARED] = PAGE_EXECONLY; } -- cgit v1.2.3 From 412cb3801dfac4703032f438fa538e850cc14b6d Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 16 Oct 2023 11:24:57 +0100 Subject: arm64: Avoid cpus_have_const_cap() for ARM64_WORKAROUND_2645198 We use cpus_have_const_cap() to check for ARM64_WORKAROUND_2645198 but this is not necessary and alternative_has_cap() would be preferable. For historical reasons, cpus_have_const_cap() is more complicated than it needs to be. Before cpucaps are finalized, it will perform a bitmap test of the system_cpucaps bitmap, and once cpucaps are finalized it will use an alternative branch. This used to be necessary to handle some race conditions in the window between cpucap detection and the subsequent patching of alternatives and static branches, where different branches could be out-of-sync with one another (or w.r.t. alternative sequences). Now that we use alternative branches instead of static branches, these are all patched atomically w.r.t. one another, and there are only a handful of cases that need special care in the window between cpucap detection and alternative patching. Due to the above, it would be nice to remove cpus_have_const_cap(), and migrate callers over to alternative_has_cap_*(), cpus_have_final_cap(), or cpus_have_cap() depending on when their requirements. This will remove redundant instructions and improve code generation, and will make it easier to determine how each callsite will behave before, during, and after alternative patching. The ARM64_WORKAROUND_2645198 cpucap is detected and patched before any userspace translation table exist, and the workaround is only necessary when manipulating usrspace translation tables which are in use. Thus it is not necessary to use cpus_have_const_cap(), and alternative_has_cap() is equivalent. This patch replaces the use of cpus_have_const_cap() with alternative_has_cap_unlikely(), which will avoid generating code to test the system_cpucaps bitmap and should be better for all subsequent calls at runtime. The ARM64_WORKAROUND_2645198 cpucap is added to cpucap_is_possible() so that code can be elided entirely when this is not possible, and redundant IS_ENABLED() checks are removed. Signed-off-by: Mark Rutland Cc: Suzuki K Poulose Cc: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cpucaps.h | 2 ++ arch/arm64/mm/hugetlbpage.c | 3 +-- arch/arm64/mm/mmu.c | 3 +-- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/arm64/mm') diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index 5613d9e813b2..c5b67a64613e 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -48,6 +48,8 @@ cpucap_is_possible(const unsigned int cap) return IS_ENABLED(CONFIG_ARM64_ERRATUM_843419); case ARM64_WORKAROUND_1742098: return IS_ENABLED(CONFIG_ARM64_ERRATUM_1742098); + case ARM64_WORKAROUND_2645198: + return IS_ENABLED(CONFIG_ARM64_ERRATUM_2645198); case ARM64_WORKAROUND_2658417: return IS_ENABLED(CONFIG_ARM64_ERRATUM_2658417); } diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 9c52718ea750..e9fc56e4f98c 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -555,8 +555,7 @@ bool __init arch_hugetlb_valid_size(unsigned long size) pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { - if (IS_ENABLED(CONFIG_ARM64_ERRATUM_2645198) && - cpus_have_const_cap(ARM64_WORKAROUND_2645198)) { + if (alternative_has_cap_unlikely(ARM64_WORKAROUND_2645198)) { /* * Break-before-make (BBM) is required for all user space mappings * when the permission changes from executable to non-executable diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 47781bec6171..15f6347d23b6 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -1469,8 +1469,7 @@ early_initcall(prevent_bootmem_remove_init); pte_t ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { - if (IS_ENABLED(CONFIG_ARM64_ERRATUM_2645198) && - cpus_have_const_cap(ARM64_WORKAROUND_2645198)) { + if (alternative_has_cap_unlikely(ARM64_WORKAROUND_2645198)) { /* * Break-before-make (BBM) is required for all user space mappings * when the permission changes from executable to non-executable -- cgit v1.2.3