diff options
Diffstat (limited to 'arch/arm64')
39 files changed, 1101 insertions, 872 deletions
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index a182295e6f08..de9290d52fca 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -42,6 +42,7 @@ config ARM64 select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE select ARCH_HAS_NONLEAF_PMD_YOUNG if ARM64_HAFT + select ARCH_HAS_PREEMPT_LAZY select ARCH_HAS_PTDUMP select ARCH_HAS_PTE_DEVMAP select ARCH_HAS_PTE_SPECIAL @@ -134,7 +135,6 @@ config ARM64 select COMMON_CLK select CPU_PM if (SUSPEND || CPU_IDLE) select CPUMASK_OFFSTACK if NR_CPUS > 256 - select CRC32 select DCACHE_WORD_ACCESS select DYNAMIC_FTRACE if FUNCTION_TRACER select DMA_BOUNCE_UNALIGNED_KMALLOC @@ -333,9 +333,9 @@ config ARCH_MMAP_RND_BITS_MAX default 24 if ARM64_VA_BITS=39 default 27 if ARM64_VA_BITS=42 default 30 if ARM64_VA_BITS=47 - default 29 if ARM64_VA_BITS=48 && ARM64_64K_PAGES - default 31 if ARM64_VA_BITS=48 && ARM64_16K_PAGES - default 33 if ARM64_VA_BITS=48 + default 29 if (ARM64_VA_BITS=48 || ARM64_VA_BITS=52) && ARM64_64K_PAGES + default 31 if (ARM64_VA_BITS=48 || ARM64_VA_BITS=52) && ARM64_16K_PAGES + default 33 if (ARM64_VA_BITS=48 || ARM64_VA_BITS=52) default 14 if ARM64_64K_PAGES default 16 if ARM64_16K_PAGES default 18 @@ -2285,7 +2285,6 @@ config ARM64_SME bool "ARM Scalable Matrix Extension support" default y depends on ARM64_SVE - depends on BROKEN help The Scalable Matrix Extension (SME) is an extension to the AArch64 execution state which utilises a substantial subset of the SVE diff --git a/arch/arm64/boot/dts/qcom/x1e80100.dtsi b/arch/arm64/boot/dts/qcom/x1e80100.dtsi index 4936fa5b98ff..8eddf0c96098 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100.dtsi +++ b/arch/arm64/boot/dts/qcom/x1e80100.dtsi @@ -3752,60 +3752,83 @@ }; gpu_opp_table: opp-table { - compatible = "operating-points-v2"; + compatible = "operating-points-v2-adreno", "operating-points-v2"; + + opp-1250000000 { + opp-hz = /bits/ 64 <1250000000>; + opp-level = <RPMH_REGULATOR_LEVEL_TURBO_L3>; + opp-peak-kBps = <16500000>; + qcom,opp-acd-level = <0xa82a5ffd>; + }; + + opp-1175000000 { + opp-hz = /bits/ 64 <1175000000>; + opp-level = <RPMH_REGULATOR_LEVEL_TURBO_L2>; + opp-peak-kBps = <14398438>; + qcom,opp-acd-level = <0xa82a5ffd>; + }; opp-1100000000 { opp-hz = /bits/ 64 <1100000000>; opp-level = <RPMH_REGULATOR_LEVEL_TURBO_L1>; - opp-peak-kBps = <16500000>; + opp-peak-kBps = <14398438>; + qcom,opp-acd-level = <0xa82a5ffd>; }; opp-1000000000 { opp-hz = /bits/ 64 <1000000000>; opp-level = <RPMH_REGULATOR_LEVEL_TURBO>; opp-peak-kBps = <14398438>; + qcom,opp-acd-level = <0xa82b5ffd>; }; opp-925000000 { opp-hz = /bits/ 64 <925000000>; opp-level = <RPMH_REGULATOR_LEVEL_NOM_L1>; opp-peak-kBps = <14398438>; + qcom,opp-acd-level = <0xa82b5ffd>; }; opp-800000000 { opp-hz = /bits/ 64 <800000000>; opp-level = <RPMH_REGULATOR_LEVEL_NOM>; opp-peak-kBps = <12449219>; + qcom,opp-acd-level = <0xa82c5ffd>; }; opp-744000000 { opp-hz = /bits/ 64 <744000000>; opp-level = <RPMH_REGULATOR_LEVEL_SVS_L2>; opp-peak-kBps = <10687500>; + qcom,opp-acd-level = <0x882e5ffd>; }; opp-687000000 { opp-hz = /bits/ 64 <687000000>; opp-level = <RPMH_REGULATOR_LEVEL_SVS_L1>; opp-peak-kBps = <8171875>; + qcom,opp-acd-level = <0x882e5ffd>; }; opp-550000000 { opp-hz = /bits/ 64 <550000000>; opp-level = <RPMH_REGULATOR_LEVEL_SVS>; opp-peak-kBps = <6074219>; + qcom,opp-acd-level = <0xc0285ffd>; }; opp-390000000 { opp-hz = /bits/ 64 <390000000>; opp-level = <RPMH_REGULATOR_LEVEL_LOW_SVS>; opp-peak-kBps = <3000000>; + qcom,opp-acd-level = <0xc0285ffd>; }; opp-300000000 { opp-hz = /bits/ 64 <300000000>; opp-level = <RPMH_REGULATOR_LEVEL_LOW_SVS_D1>; opp-peak-kBps = <2136719>; + qcom,opp-acd-level = <0xc02b5ffd>; }; }; }; diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 370ad70b4be8..a61154545c89 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -1010,6 +1010,7 @@ CONFIG_SND_SOC_ROCKCHIP_RT5645=m CONFIG_SND_SOC_RK3399_GRU_SOUND=m CONFIG_SND_SOC_SAMSUNG=y CONFIG_SND_SOC_RCAR=m +CONFIG_SND_SOC_MSIOF=m CONFIG_SND_SOC_RZ=m CONFIG_SND_SOC_SOF_TOPLEVEL=y CONFIG_SND_SOC_SOF_OF=y @@ -1474,29 +1475,6 @@ CONFIG_QCOM_WCNSS_CTRL=m CONFIG_QCOM_APR=m CONFIG_QCOM_ICC_BWMON=m CONFIG_QCOM_PBS=m -CONFIG_ARCH_R8A77995=y -CONFIG_ARCH_R8A77990=y -CONFIG_ARCH_R8A77951=y -CONFIG_ARCH_R8A77965=y -CONFIG_ARCH_R8A77960=y -CONFIG_ARCH_R8A77961=y -CONFIG_ARCH_R8A779F0=y -CONFIG_ARCH_R8A77980=y -CONFIG_ARCH_R8A77970=y -CONFIG_ARCH_R8A779A0=y -CONFIG_ARCH_R8A779G0=y -CONFIG_ARCH_R8A779H0=y -CONFIG_ARCH_R8A774C0=y -CONFIG_ARCH_R8A774E1=y -CONFIG_ARCH_R8A774A1=y -CONFIG_ARCH_R8A774B1=y -CONFIG_ARCH_R9A07G043=y -CONFIG_ARCH_R9A07G044=y -CONFIG_ARCH_R9A07G054=y -CONFIG_ARCH_R9A08G045=y -CONFIG_ARCH_R9A09G011=y -CONFIG_ARCH_R9A09G047=y -CONFIG_ARCH_R9A09G057=y CONFIG_ROCKCHIP_IODOMAIN=y CONFIG_ARCH_TEGRA_132_SOC=y CONFIG_ARCH_TEGRA_210_SOC=y @@ -1550,10 +1528,11 @@ CONFIG_PWM_IMX27=m CONFIG_PWM_MESON=m CONFIG_PWM_MTK_DISP=m CONFIG_PWM_MEDIATEK=m -CONFIG_PWM_RCAR=m +CONFIG_PWM_RENESAS_RCAR=m +CONFIG_PWM_RENESAS_RZG2L_GPT=m +CONFIG_PWM_RENESAS_RZ_MTU3=m CONFIG_PWM_RENESAS_TPU=m CONFIG_PWM_ROCKCHIP=y -CONFIG_PWM_RZ_MTU3=m CONFIG_PWM_SAMSUNG=y CONFIG_PWM_SL28CPLD=m CONFIG_PWM_SUN4I=m diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h index 81e4157f92b7..71493b760b83 100644 --- a/arch/arm64/include/asm/cpu.h +++ b/arch/arm64/include/asm/cpu.h @@ -44,6 +44,7 @@ struct cpuinfo_arm64 { u64 reg_dczid; u64 reg_midr; u64 reg_revidr; + u64 reg_aidr; u64 reg_gmid; u64 reg_smidr; u64 reg_mpamidr; diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index dffff6763812..661735616787 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -134,6 +134,7 @@ #define HISI_CPU_PART_TSV110 0xD01 #define HISI_CPU_PART_HIP09 0xD02 +#define HISI_CPU_PART_HIP12 0xD06 #define APPLE_CPU_PART_M1_ICESTORM 0x022 #define APPLE_CPU_PART_M1_FIRESTORM 0x023 @@ -222,6 +223,7 @@ #define MIDR_FUJITSU_A64FX MIDR_CPU_MODEL(ARM_CPU_IMP_FUJITSU, FUJITSU_CPU_PART_A64FX) #define MIDR_HISI_TSV110 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_TSV110) #define MIDR_HISI_HIP09 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_HIP09) +#define MIDR_HISI_HIP12 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_HIP12) #define MIDR_APPLE_M1_ICESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM) #define MIDR_APPLE_M1_FIRESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM) #define MIDR_APPLE_M1_ICESTORM_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM_PRO) diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index d40e427ddad9..f6d72ca03133 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -204,19 +204,21 @@ orr x0, x0, #(1 << 62) .Lskip_spe_fgt_\@: + +.Lset_debug_fgt_\@: msr_s SYS_HDFGRTR_EL2, x0 msr_s SYS_HDFGWTR_EL2, x0 mov x0, xzr mrs x1, id_aa64pfr1_el1 ubfx x1, x1, #ID_AA64PFR1_EL1_SME_SHIFT, #4 - cbz x1, .Lskip_debug_fgt_\@ + cbz x1, .Lskip_sme_fgt_\@ /* Disable nVHE traps of TPIDR2 and SMPRI */ orr x0, x0, #HFGxTR_EL2_nSMPRI_EL1_MASK orr x0, x0, #HFGxTR_EL2_nTPIDR2_EL0_MASK -.Lskip_debug_fgt_\@: +.Lskip_sme_fgt_\@: mrs_s x1, SYS_ID_AA64MMFR3_EL1 ubfx x1, x1, #ID_AA64MMFR3_EL1_S1PIE_SHIFT, #4 cbz x1, .Lskip_pie_fgt_\@ @@ -237,12 +239,14 @@ /* GCS depends on PIE so we don't check it if PIE is absent */ mrs_s x1, SYS_ID_AA64PFR1_EL1 ubfx x1, x1, #ID_AA64PFR1_EL1_GCS_SHIFT, #4 - cbz x1, .Lset_fgt_\@ + cbz x1, .Lskip_gce_fgt_\@ /* Disable traps of access to GCS registers at EL0 and EL1 */ orr x0, x0, #HFGxTR_EL2_nGCS_EL1_MASK orr x0, x0, #HFGxTR_EL2_nGCS_EL0_MASK +.Lskip_gce_fgt_\@: + .Lset_fgt_\@: msr_s SYS_HFGRTR_EL2, x0 msr_s SYS_HFGWTR_EL2, x0 diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index e4f77757937e..71f0cbf7b288 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -378,12 +378,14 @@ /* * ISS values for SME traps */ - -#define ESR_ELx_SME_ISS_SME_DISABLED 0 -#define ESR_ELx_SME_ISS_ILL 1 -#define ESR_ELx_SME_ISS_SM_DISABLED 2 -#define ESR_ELx_SME_ISS_ZA_DISABLED 3 -#define ESR_ELx_SME_ISS_ZT_DISABLED 4 +#define ESR_ELx_SME_ISS_SMTC_MASK GENMASK(2, 0) +#define ESR_ELx_SME_ISS_SMTC(esr) ((esr) & ESR_ELx_SME_ISS_SMTC_MASK) + +#define ESR_ELx_SME_ISS_SMTC_SME_DISABLED 0 +#define ESR_ELx_SME_ISS_SMTC_ILL 1 +#define ESR_ELx_SME_ISS_SMTC_SM_DISABLED 2 +#define ESR_ELx_SME_ISS_SMTC_ZA_DISABLED 3 +#define ESR_ELx_SME_ISS_SMTC_ZT_DISABLED 4 /* ISS field definitions for MOPS exceptions */ #define ESR_ELx_MOPS_ISS_MEM_INST (UL(1) << 24) diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index 564bc09b3e06..b8cf0ea43cc0 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -6,6 +6,7 @@ #define __ASM_FP_H #include <asm/errno.h> +#include <asm/percpu.h> #include <asm/ptrace.h> #include <asm/processor.h> #include <asm/sigcontext.h> @@ -76,7 +77,6 @@ extern void fpsimd_load_state(struct user_fpsimd_state *state); extern void fpsimd_thread_switch(struct task_struct *next); extern void fpsimd_flush_thread(void); -extern void fpsimd_signal_preserve_current_state(void); extern void fpsimd_preserve_current_state(void); extern void fpsimd_restore_current_state(void); extern void fpsimd_update_current_state(struct user_fpsimd_state const *state); @@ -93,9 +93,12 @@ struct cpu_fp_state { enum fp_type to_save; }; +DECLARE_PER_CPU(struct cpu_fp_state, fpsimd_last_state); + extern void fpsimd_bind_state_to_cpu(struct cpu_fp_state *fp_state); extern void fpsimd_flush_task_state(struct task_struct *target); +extern void fpsimd_save_and_flush_current_state(void); extern void fpsimd_save_and_flush_cpu_state(void); static inline bool thread_sm_enabled(struct thread_struct *thread) @@ -108,6 +111,8 @@ static inline bool thread_za_enabled(struct thread_struct *thread) return system_supports_sme() && (thread->svcr & SVCR_ZA_MASK); } +extern void task_smstop_sm(struct task_struct *task); + /* Maximum VL that SVE/SME VL-agnostic software can transparently support */ #define VL_ARCH_MAX 0x100 @@ -195,10 +200,8 @@ struct vl_info { extern void sve_alloc(struct task_struct *task, bool flush); extern void fpsimd_release_task(struct task_struct *task); -extern void fpsimd_sync_to_sve(struct task_struct *task); -extern void fpsimd_force_sync_to_sve(struct task_struct *task); -extern void sve_sync_to_fpsimd(struct task_struct *task); -extern void sve_sync_from_fpsimd_zeropad(struct task_struct *task); +extern void fpsimd_sync_from_effective_state(struct task_struct *task); +extern void fpsimd_sync_to_effective_state_zeropad(struct task_struct *task); extern int vec_set_vector_length(struct task_struct *task, enum vec_type type, unsigned long vl, unsigned long flags); @@ -292,14 +295,29 @@ static inline bool sve_vq_available(unsigned int vq) return vq_available(ARM64_VEC_SVE, vq); } -size_t sve_state_size(struct task_struct const *task); +static inline size_t __sve_state_size(unsigned int sve_vl, unsigned int sme_vl) +{ + unsigned int vl = max(sve_vl, sme_vl); + return SVE_SIG_REGS_SIZE(sve_vq_from_vl(vl)); +} + +/* + * Return how many bytes of memory are required to store the full SVE + * state for task, given task's currently configured vector length. + */ +static inline size_t sve_state_size(struct task_struct const *task) +{ + unsigned int sve_vl = task_get_sve_vl(task); + unsigned int sme_vl = task_get_sme_vl(task); + return __sve_state_size(sve_vl, sme_vl); +} #else /* ! CONFIG_ARM64_SVE */ static inline void sve_alloc(struct task_struct *task, bool flush) { } static inline void fpsimd_release_task(struct task_struct *task) { } -static inline void sve_sync_to_fpsimd(struct task_struct *task) { } -static inline void sve_sync_from_fpsimd_zeropad(struct task_struct *task) { } +static inline void fpsimd_sync_from_effective_state(struct task_struct *task) { } +static inline void fpsimd_sync_to_effective_state_zeropad(struct task_struct *task) { } static inline int sve_max_virtualisable_vl(void) { @@ -333,6 +351,11 @@ static inline void vec_update_vq_map(enum vec_type t) { } static inline int vec_verify_vq_map(enum vec_type t) { return 0; } static inline void sve_setup(void) { } +static inline size_t __sve_state_size(unsigned int sve_vl, unsigned int sme_vl) +{ + return 0; +} + static inline size_t sve_state_size(struct task_struct const *task) { return 0; @@ -385,6 +408,16 @@ extern int sme_set_current_vl(unsigned long arg); extern int sme_get_current_vl(void); extern void sme_suspend_exit(void); +static inline size_t __sme_state_size(unsigned int sme_vl) +{ + size_t size = ZA_SIG_REGS_SIZE(sve_vq_from_vl(sme_vl)); + + if (system_supports_sme2()) + size += ZT_SIG_REG_SIZE; + + return size; +} + /* * Return how many bytes of memory are required to store the full SME * specific state for task, given task's currently configured vector @@ -392,15 +425,7 @@ extern void sme_suspend_exit(void); */ static inline size_t sme_state_size(struct task_struct const *task) { - unsigned int vl = task_get_sme_vl(task); - size_t size; - - size = ZA_SIG_REGS_SIZE(sve_vq_from_vl(vl)); - - if (system_supports_sme2()) - size += ZT_SIG_REG_SIZE; - - return size; + return __sme_state_size(task_get_sme_vl(task)); } #else @@ -421,6 +446,11 @@ static inline int sme_set_current_vl(unsigned long arg) { return -EINVAL; } static inline int sme_get_current_vl(void) { return -EINVAL; } static inline void sme_suspend_exit(void) { } +static inline size_t __sme_state_size(unsigned int sme_vl) +{ + return 0; +} + static inline size_t sme_state_size(struct task_struct const *task) { return 0; diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h index 07fbf5bf85a7..2a8155c4a882 100644 --- a/arch/arm64/include/asm/hugetlb.h +++ b/arch/arm64/include/asm/hugetlb.h @@ -69,29 +69,38 @@ extern void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, #include <asm-generic/hugetlb.h> -#define __HAVE_ARCH_FLUSH_HUGETLB_TLB_RANGE -static inline void flush_hugetlb_tlb_range(struct vm_area_struct *vma, - unsigned long start, - unsigned long end) +static inline void __flush_hugetlb_tlb_range(struct vm_area_struct *vma, + unsigned long start, + unsigned long end, + unsigned long stride, + bool last_level) { - unsigned long stride = huge_page_size(hstate_vma(vma)); - switch (stride) { #ifndef __PAGETABLE_PMD_FOLDED case PUD_SIZE: - __flush_tlb_range(vma, start, end, PUD_SIZE, false, 1); + __flush_tlb_range(vma, start, end, PUD_SIZE, last_level, 1); break; #endif case CONT_PMD_SIZE: case PMD_SIZE: - __flush_tlb_range(vma, start, end, PMD_SIZE, false, 2); + __flush_tlb_range(vma, start, end, PMD_SIZE, last_level, 2); break; case CONT_PTE_SIZE: - __flush_tlb_range(vma, start, end, PAGE_SIZE, false, 3); + __flush_tlb_range(vma, start, end, PAGE_SIZE, last_level, 3); break; default: - __flush_tlb_range(vma, start, end, PAGE_SIZE, false, TLBI_TTL_UNKNOWN); + __flush_tlb_range(vma, start, end, PAGE_SIZE, last_level, TLBI_TTL_UNKNOWN); } } +#define __HAVE_ARCH_FLUSH_HUGETLB_TLB_RANGE +static inline void flush_hugetlb_tlb_range(struct vm_area_struct *vma, + unsigned long start, + unsigned long end) +{ + unsigned long stride = huge_page_size(hstate_vma(vma)); + + __flush_hugetlb_tlb_range(vma, start, end, stride, false); +} + #endif /* __ASM_HUGETLB_H */ diff --git a/arch/arm64/include/asm/mem_encrypt.h b/arch/arm64/include/asm/mem_encrypt.h index a2a1eeb36d4b..314b2b52025f 100644 --- a/arch/arm64/include/asm/mem_encrypt.h +++ b/arch/arm64/include/asm/mem_encrypt.h @@ -4,6 +4,8 @@ #include <asm/rsi.h> +struct device; + struct arm64_mem_crypt_ops { int (*encrypt)(unsigned long addr, int numpages); int (*decrypt)(unsigned long addr, int numpages); diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index d3b538be1500..5285757ee0c1 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -40,6 +40,85 @@ #include <linux/sched.h> #include <linux/page_table_check.h> +static inline void emit_pte_barriers(void) +{ + /* + * These barriers are emitted under certain conditions after a pte entry + * was modified (see e.g. __set_pte_complete()). The dsb makes the store + * visible to the table walker. The isb ensures that any previous + * speculative "invalid translation" marker that is in the CPU's + * pipeline gets cleared, so that any access to that address after + * setting the pte to valid won't cause a spurious fault. If the thread + * gets preempted after storing to the pgtable but before emitting these + * barriers, __switch_to() emits a dsb which ensure the walker gets to + * see the store. There is no guarantee of an isb being issued though. + * This is safe because it will still get issued (albeit on a + * potentially different CPU) when the thread starts running again, + * before any access to the address. + */ + dsb(ishst); + isb(); +} + +static inline void queue_pte_barriers(void) +{ + unsigned long flags; + + if (in_interrupt()) { + emit_pte_barriers(); + return; + } + + flags = read_thread_flags(); + + if (flags & BIT(TIF_LAZY_MMU)) { + /* Avoid the atomic op if already set. */ + if (!(flags & BIT(TIF_LAZY_MMU_PENDING))) + set_thread_flag(TIF_LAZY_MMU_PENDING); + } else { + emit_pte_barriers(); + } +} + +#define __HAVE_ARCH_ENTER_LAZY_MMU_MODE +static inline void arch_enter_lazy_mmu_mode(void) +{ + /* + * lazy_mmu_mode is not supposed to permit nesting. But in practice this + * does happen with CONFIG_DEBUG_PAGEALLOC, where a page allocation + * inside a lazy_mmu_mode section (such as zap_pte_range()) will change + * permissions on the linear map with apply_to_page_range(), which + * re-enters lazy_mmu_mode. So we tolerate nesting in our + * implementation. The first call to arch_leave_lazy_mmu_mode() will + * flush and clear the flag such that the remainder of the work in the + * outer nest behaves as if outside of lazy mmu mode. This is safe and + * keeps tracking simple. + */ + + if (in_interrupt()) + return; + + set_thread_flag(TIF_LAZY_MMU); +} + +static inline void arch_flush_lazy_mmu_mode(void) +{ + if (in_interrupt()) + return; + + if (test_and_clear_thread_flag(TIF_LAZY_MMU_PENDING)) + emit_pte_barriers(); +} + +static inline void arch_leave_lazy_mmu_mode(void) +{ + if (in_interrupt()) + return; + + arch_flush_lazy_mmu_mode(); + clear_thread_flag(TIF_LAZY_MMU); +} + #ifdef CONFIG_TRANSPARENT_HUGEPAGE #define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE @@ -320,18 +399,20 @@ static inline void __set_pte_nosync(pte_t *ptep, pte_t pte) WRITE_ONCE(*ptep, pte); } -static inline void __set_pte(pte_t *ptep, pte_t pte) +static inline void __set_pte_complete(pte_t pte) { - __set_pte_nosync(ptep, pte); - /* * Only if the new pte is valid and kernel, otherwise TLB maintenance - * or update_mmu_cache() have the necessary barriers. + * has the necessary barriers. */ - if (pte_valid_not_user(pte)) { - dsb(ishst); - isb(); - } + if (pte_valid_not_user(pte)) + queue_pte_barriers(); +} + +static inline void __set_pte(pte_t *ptep, pte_t pte) +{ + __set_pte_nosync(ptep, pte); + __set_pte_complete(pte); } static inline pte_t __ptep_get(pte_t *ptep) @@ -423,23 +504,6 @@ static inline pte_t pte_advance_pfn(pte_t pte, unsigned long nr) return pfn_pte(pte_pfn(pte) + nr, pte_pgprot(pte)); } -static inline void __set_ptes(struct mm_struct *mm, - unsigned long __always_unused addr, - pte_t *ptep, pte_t pte, unsigned int nr) -{ - page_table_check_ptes_set(mm, ptep, pte, nr); - __sync_cache_and_tags(pte, nr); - - for (;;) { - __check_safe_pte_update(mm, ptep, pte); - __set_pte(ptep, pte); - if (--nr == 0) - break; - ptep++; - pte = pte_advance_pfn(pte, 1); - } -} - /* * Hugetlb definitions. */ @@ -649,30 +713,64 @@ static inline pgprot_t pud_pgprot(pud_t pud) return __pgprot(pud_val(pfn_pud(pfn, __pgprot(0))) ^ pud_val(pud)); } -static inline void __set_pte_at(struct mm_struct *mm, - unsigned long __always_unused addr, - pte_t *ptep, pte_t pte, unsigned int nr) +static inline void __set_ptes_anysz(struct mm_struct *mm, pte_t *ptep, + pte_t pte, unsigned int nr, + unsigned long pgsize) { - __sync_cache_and_tags(pte, nr); - __check_safe_pte_update(mm, ptep, pte); - __set_pte(ptep, pte); + unsigned long stride = pgsize >> PAGE_SHIFT; + + switch (pgsize) { + case PAGE_SIZE: + page_table_check_ptes_set(mm, ptep, pte, nr); + break; + case PMD_SIZE: + page_table_check_pmds_set(mm, (pmd_t *)ptep, pte_pmd(pte), nr); + break; +#ifndef __PAGETABLE_PMD_FOLDED + case PUD_SIZE: + page_table_check_puds_set(mm, (pud_t *)ptep, pte_pud(pte), nr); + break; +#endif + default: + VM_WARN_ON(1); + } + + __sync_cache_and_tags(pte, nr * stride); + + for (;;) { + __check_safe_pte_update(mm, ptep, pte); + __set_pte_nosync(ptep, pte); + if (--nr == 0) + break; + ptep++; + pte = pte_advance_pfn(pte, stride); + } + + __set_pte_complete(pte); } -static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, - pmd_t *pmdp, pmd_t pmd) +static inline void __set_ptes(struct mm_struct *mm, + unsigned long __always_unused addr, + pte_t *ptep, pte_t pte, unsigned int nr) { - page_table_check_pmd_set(mm, pmdp, pmd); - return __set_pte_at(mm, addr, (pte_t *)pmdp, pmd_pte(pmd), - PMD_SIZE >> PAGE_SHIFT); + __set_ptes_anysz(mm, ptep, pte, nr, PAGE_SIZE); } -static inline void set_pud_at(struct mm_struct *mm, unsigned long addr, - pud_t *pudp, pud_t pud) +static inline void __set_pmds(struct mm_struct *mm, + unsigned long __always_unused addr, + pmd_t *pmdp, pmd_t pmd, unsigned int nr) +{ + __set_ptes_anysz(mm, (pte_t *)pmdp, pmd_pte(pmd), nr, PMD_SIZE); +} +#define set_pmd_at(mm, addr, pmdp, pmd) __set_pmds(mm, addr, pmdp, pmd, 1) + +static inline void __set_puds(struct mm_struct *mm, + unsigned long __always_unused addr, + pud_t *pudp, pud_t pud, unsigned int nr) { - page_table_check_pud_set(mm, pudp, pud); - return __set_pte_at(mm, addr, (pte_t *)pudp, pud_pte(pud), - PUD_SIZE >> PAGE_SHIFT); + __set_ptes_anysz(mm, (pte_t *)pudp, pud_pte(pud), nr, PUD_SIZE); } +#define set_pud_at(mm, addr, pudp, pud) __set_puds(mm, addr, pudp, pud, 1) #define __p4d_to_phys(p4d) __pte_to_phys(p4d_pte(p4d)) #define __phys_to_p4d_val(phys) __phys_to_pte_val(phys) @@ -739,8 +837,7 @@ static inline int pmd_trans_huge(pmd_t pmd) * If pmd is present-invalid, pmd_table() won't detect it * as a table, so force the valid bit for the comparison. */ - return pmd_val(pmd) && pmd_present(pmd) && - !pmd_table(__pmd(pmd_val(pmd) | PTE_VALID)); + return pmd_present(pmd) && !pmd_table(__pmd(pmd_val(pmd) | PTE_VALID)); } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ @@ -754,8 +851,6 @@ static inline bool pud_table(pud_t pud) { return true; } PUD_TYPE_TABLE) #endif -extern pgd_t init_pg_dir[]; -extern pgd_t init_pg_end[]; extern pgd_t swapper_pg_dir[]; extern pgd_t idmap_pg_dir[]; extern pgd_t tramp_pg_dir[]; @@ -780,10 +875,8 @@ static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) WRITE_ONCE(*pmdp, pmd); - if (pmd_valid(pmd)) { - dsb(ishst); - isb(); - } + if (pmd_valid(pmd)) + queue_pte_barriers(); } static inline void pmd_clear(pmd_t *pmdp) @@ -848,10 +941,8 @@ static inline void set_pud(pud_t *pudp, pud_t pud) WRITE_ONCE(*pudp, pud); - if (pud_valid(pud)) { - dsb(ishst); - isb(); - } + if (pud_valid(pud)) + queue_pte_barriers(); } static inline void pud_clear(pud_t *pudp) @@ -930,8 +1021,7 @@ static inline void set_p4d(p4d_t *p4dp, p4d_t p4d) } WRITE_ONCE(*p4dp, p4d); - dsb(ishst); - isb(); + queue_pte_barriers(); } static inline void p4d_clear(p4d_t *p4dp) @@ -1059,8 +1149,7 @@ static inline void set_pgd(pgd_t *pgdp, pgd_t pgd) } WRITE_ONCE(*pgdp, pgd); - dsb(ishst); - isb(); + queue_pte_barriers(); } static inline void pgd_clear(pgd_t *pgdp) @@ -1301,16 +1390,37 @@ static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, } #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG */ -static inline pte_t __ptep_get_and_clear(struct mm_struct *mm, - unsigned long address, pte_t *ptep) +static inline pte_t __ptep_get_and_clear_anysz(struct mm_struct *mm, + pte_t *ptep, + unsigned long pgsize) { pte_t pte = __pte(xchg_relaxed(&pte_val(*ptep), 0)); - page_table_check_pte_clear(mm, pte); + switch (pgsize) { + case PAGE_SIZE: + page_table_check_pte_clear(mm, pte); + break; + case PMD_SIZE: + page_table_check_pmd_clear(mm, pte_pmd(pte)); + break; +#ifndef __PAGETABLE_PMD_FOLDED + case PUD_SIZE: + page_table_check_pud_clear(mm, pte_pud(pte)); + break; +#endif + default: + VM_WARN_ON(1); + } return pte; } +static inline pte_t __ptep_get_and_clear(struct mm_struct *mm, + unsigned long address, pte_t *ptep) +{ + return __ptep_get_and_clear_anysz(mm, ptep, PAGE_SIZE); +} + static inline void __clear_full_ptes(struct mm_struct *mm, unsigned long addr, pte_t *ptep, unsigned int nr, int full) { @@ -1347,11 +1457,7 @@ static inline pte_t __get_and_clear_full_ptes(struct mm_struct *mm, static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, unsigned long address, pmd_t *pmdp) { - pmd_t pmd = __pmd(xchg_relaxed(&pmd_val(*pmdp), 0)); - - page_table_check_pmd_clear(mm, pmd); - - return pmd; + return pte_pmd(__ptep_get_and_clear_anysz(mm, (pte_t *)pmdp, PMD_SIZE)); } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ diff --git a/arch/arm64/include/asm/rsi_cmds.h b/arch/arm64/include/asm/rsi_cmds.h index e6a211001bd3..2c8763876dfb 100644 --- a/arch/arm64/include/asm/rsi_cmds.h +++ b/arch/arm64/include/asm/rsi_cmds.h @@ -7,6 +7,8 @@ #define __ASM_RSI_CMDS_H #include <linux/arm-smccc.h> +#include <linux/string.h> +#include <asm/memory.h> #include <asm/rsi_smc.h> diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index 1114c1c3300a..1269c2487574 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -59,11 +59,12 @@ void arch_setup_new_exec(void); #define TIF_SIGPENDING 0 /* signal pending */ #define TIF_NEED_RESCHED 1 /* rescheduling necessary */ -#define TIF_NOTIFY_RESUME 2 /* callback before returning to user */ -#define TIF_FOREIGN_FPSTATE 3 /* CPU's FP state is not current's */ -#define TIF_UPROBE 4 /* uprobe breakpoint or singlestep */ -#define TIF_MTE_ASYNC_FAULT 5 /* MTE Asynchronous Tag Check Fault */ -#define TIF_NOTIFY_SIGNAL 6 /* signal notifications exist */ +#define TIF_NEED_RESCHED_LAZY 2 /* Lazy rescheduling needed */ +#define TIF_NOTIFY_RESUME 3 /* callback before returning to user */ +#define TIF_FOREIGN_FPSTATE 4 /* CPU's FP state is not current's */ +#define TIF_UPROBE 5 /* uprobe breakpoint or singlestep */ +#define TIF_MTE_ASYNC_FAULT 6 /* MTE Asynchronous Tag Check Fault */ +#define TIF_NOTIFY_SIGNAL 7 /* signal notifications exist */ #define TIF_SYSCALL_TRACE 8 /* syscall trace active */ #define TIF_SYSCALL_AUDIT 9 /* syscall auditing */ #define TIF_SYSCALL_TRACEPOINT 10 /* syscall tracepoint for ftrace */ @@ -82,9 +83,12 @@ void arch_setup_new_exec(void); #define TIF_SME_VL_INHERIT 28 /* Inherit SME vl_onexec across exec */ #define TIF_KERNEL_FPSTATE 29 /* Task is in a kernel mode FPSIMD section */ #define TIF_TSC_SIGSEGV 30 /* SIGSEGV on counter-timer access */ +#define TIF_LAZY_MMU 31 /* Task in lazy mmu mode */ +#define TIF_LAZY_MMU_PENDING 32 /* Ops pending for lazy mmu mode exit */ #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) +#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY) #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) #define _TIF_FOREIGN_FPSTATE (1 << TIF_FOREIGN_FPSTATE) #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) @@ -100,10 +104,10 @@ void arch_setup_new_exec(void); #define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL) #define _TIF_TSC_SIGSEGV (1 << TIF_TSC_SIGSEGV) -#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ +#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY | \ _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \ _TIF_UPROBE | _TIF_MTE_ASYNC_FAULT | \ - _TIF_NOTIFY_SIGNAL) + _TIF_NOTIFY_SIGNAL | _TIF_SIGPENDING) #define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \ diff --git a/arch/arm64/include/asm/vdso/gettimeofday.h b/arch/arm64/include/asm/vdso/gettimeofday.h index 3322c7047d84..da1ab8759592 100644 --- a/arch/arm64/include/asm/vdso/gettimeofday.h +++ b/arch/arm64/include/asm/vdso/gettimeofday.h @@ -8,6 +8,7 @@ #ifndef __ASSEMBLY__ #include <asm/alternative.h> +#include <asm/arch_timer.h> #include <asm/barrier.h> #include <asm/unistd.h> #include <asm/sysreg.h> @@ -69,8 +70,6 @@ int clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) static __always_inline u64 __arch_get_hw_counter(s32 clock_mode, const struct vdso_time_data *vd) { - u64 res; - /* * Core checks for mode already, so this raced against a concurrent * update. Return something. Core will do another round and then @@ -79,24 +78,7 @@ static __always_inline u64 __arch_get_hw_counter(s32 clock_mode, if (clock_mode == VDSO_CLOCKMODE_NONE) return 0; - /* - * If FEAT_ECV is available, use the self-synchronizing counter. - * Otherwise the isb is required to prevent that the counter value - * is speculated. - */ - asm volatile( - ALTERNATIVE("isb\n" - "mrs %0, cntvct_el0", - "nop\n" - __mrs_s("%0", SYS_CNTVCTSS_EL0), - ARM64_HAS_ECV) - : "=r" (res) - : - : "memory"); - - arch_counter_enforce_ordering(res); - - return res; + return __arch_counter_get_cntvct(); } #if IS_ENABLED(CONFIG_CC_IS_GCC) && IS_ENABLED(CONFIG_PAGE_SIZE_64KB) diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h index ebf4a9f943ed..aa280f356b96 100644 --- a/arch/arm64/include/asm/virt.h +++ b/arch/arm64/include/asm/virt.h @@ -67,7 +67,8 @@ * __boot_cpu_mode records what mode CPUs were booted in. * A correctly-implemented bootloader must start all CPUs in the same mode: * In this case, both 32bit halves of __boot_cpu_mode will contain the - * same value (either 0 if booted in EL1, BOOT_CPU_MODE_EL2 if booted in EL2). + * same value (either BOOT_CPU_MODE_EL1 if booted in EL1, BOOT_CPU_MODE_EL2 if + * booted in EL2). * * Should the bootloader fail to do this, the two values will be different. * This allows the kernel to flag an error when the secondaries have come up. diff --git a/arch/arm64/include/asm/vmalloc.h b/arch/arm64/include/asm/vmalloc.h index 38fafffe699f..12f534e8f3ed 100644 --- a/arch/arm64/include/asm/vmalloc.h +++ b/arch/arm64/include/asm/vmalloc.h @@ -23,6 +23,51 @@ static inline bool arch_vmap_pmd_supported(pgprot_t prot) return !IS_ENABLED(CONFIG_PTDUMP_DEBUGFS); } +#define arch_vmap_pte_range_map_size arch_vmap_pte_range_map_size +static inline unsigned long arch_vmap_pte_range_map_size(unsigned long addr, + unsigned long end, u64 pfn, + unsigned int max_page_shift) +{ + /* + * If the block is at least CONT_PTE_SIZE in size, and is naturally + * aligned in both virtual and physical space, then we can pte-map the + * block using the PTE_CONT bit for more efficient use of the TLB. + */ + if (max_page_shift < CONT_PTE_SHIFT) + return PAGE_SIZE; + + if (end - addr < CONT_PTE_SIZE) + return PAGE_SIZE; + + if (!IS_ALIGNED(addr, CONT_PTE_SIZE)) + return PAGE_SIZE; + + if (!IS_ALIGNED(PFN_PHYS(pfn), CONT_PTE_SIZE)) + return PAGE_SIZE; + + return CONT_PTE_SIZE; +} + +#define arch_vmap_pte_range_unmap_size arch_vmap_pte_range_unmap_size +static inline unsigned long arch_vmap_pte_range_unmap_size(unsigned long addr, + pte_t *ptep) +{ + /* + * The caller handles alignment so it's sufficient just to check + * PTE_CONT. + */ + return pte_valid_cont(__ptep_get(ptep)) ? CONT_PTE_SIZE : PAGE_SIZE; +} + +#define arch_vmap_pte_supported_shift arch_vmap_pte_supported_shift +static inline int arch_vmap_pte_supported_shift(unsigned long size) +{ + if (size >= CONT_PTE_SIZE) + return CONT_PTE_SHIFT; + + return PAGE_SHIFT; +} + #endif #define arch_vmap_pgprot_tagged arch_vmap_pgprot_tagged diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index eb1a840e4110..30d4bbe68661 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -182,5 +182,7 @@ int main(void) #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS DEFINE(FTRACE_OPS_DIRECT_CALL, offsetof(struct ftrace_ops, direct_call)); #endif + DEFINE(PIE_E0_ASM, PIE_E0); + DEFINE(PIE_E1_ASM, PIE_E1); return 0; } diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 4c46d80aa64b..379c82d22c75 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -765,17 +765,17 @@ static const struct arm64_ftr_bits ftr_raz[] = { #define ARM64_FTR_REG(id, table) \ __ARM64_FTR_REG_OVERRIDE(#id, id, table, &no_override) -struct arm64_ftr_override id_aa64mmfr0_override; -struct arm64_ftr_override id_aa64mmfr1_override; -struct arm64_ftr_override id_aa64mmfr2_override; -struct arm64_ftr_override id_aa64pfr0_override; -struct arm64_ftr_override id_aa64pfr1_override; -struct arm64_ftr_override id_aa64zfr0_override; -struct arm64_ftr_override id_aa64smfr0_override; -struct arm64_ftr_override id_aa64isar1_override; -struct arm64_ftr_override id_aa64isar2_override; - -struct arm64_ftr_override arm64_sw_feature_override; +struct arm64_ftr_override __read_mostly id_aa64mmfr0_override; +struct arm64_ftr_override __read_mostly id_aa64mmfr1_override; +struct arm64_ftr_override __read_mostly id_aa64mmfr2_override; +struct arm64_ftr_override __read_mostly id_aa64pfr0_override; +struct arm64_ftr_override __read_mostly id_aa64pfr1_override; +struct arm64_ftr_override __read_mostly id_aa64zfr0_override; +struct arm64_ftr_override __read_mostly id_aa64smfr0_override; +struct arm64_ftr_override __read_mostly id_aa64isar1_override; +struct arm64_ftr_override __read_mostly id_aa64isar2_override; + +struct arm64_ftr_override __read_mostly arm64_sw_feature_override; static const struct __ftr_reg_entry { u32 sys_id; @@ -1410,6 +1410,8 @@ void update_cpu_features(int cpu, info->reg_id_aa64mmfr2, boot->reg_id_aa64mmfr2); taint |= check_update_ftr_reg(SYS_ID_AA64MMFR3_EL1, cpu, info->reg_id_aa64mmfr3, boot->reg_id_aa64mmfr3); + taint |= check_update_ftr_reg(SYS_ID_AA64MMFR4_EL1, cpu, + info->reg_id_aa64mmfr4, boot->reg_id_aa64mmfr4); taint |= check_update_ftr_reg(SYS_ID_AA64PFR0_EL1, cpu, info->reg_id_aa64pfr0, boot->reg_id_aa64pfr0); diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 285d7d538342..94525abd1c22 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -209,80 +209,79 @@ static const char *const compat_hwcap2_str[] = { static int c_show(struct seq_file *m, void *v) { - int i, j; + int j; + int cpu = m->index; bool compat = personality(current->personality) == PER_LINUX32; + struct cpuinfo_arm64 *cpuinfo = v; + u32 midr = cpuinfo->reg_midr; - for_each_online_cpu(i) { - struct cpuinfo_arm64 *cpuinfo = &per_cpu(cpu_data, i); - u32 midr = cpuinfo->reg_midr; - - /* - * glibc reads /proc/cpuinfo to determine the number of - * online processors, looking for lines beginning with - * "processor". Give glibc what it expects. - */ - seq_printf(m, "processor\t: %d\n", i); - if (compat) - seq_printf(m, "model name\t: ARMv8 Processor rev %d (%s)\n", - MIDR_REVISION(midr), COMPAT_ELF_PLATFORM); + /* + * glibc reads /proc/cpuinfo to determine the number of + * online processors, looking for lines beginning with + * "processor". Give glibc what it expects. + */ + seq_printf(m, "processor\t: %d\n", cpu); + if (compat) + seq_printf(m, "model name\t: ARMv8 Processor rev %d (%s)\n", + MIDR_REVISION(midr), COMPAT_ELF_PLATFORM); - seq_printf(m, "BogoMIPS\t: %lu.%02lu\n", - loops_per_jiffy / (500000UL/HZ), - loops_per_jiffy / (5000UL/HZ) % 100); + seq_printf(m, "BogoMIPS\t: %lu.%02lu\n", + loops_per_jiffy / (500000UL/HZ), + loops_per_jiffy / (5000UL/HZ) % 100); - /* - * Dump out the common processor features in a single line. - * Userspace should read the hwcaps with getauxval(AT_HWCAP) - * rather than attempting to parse this, but there's a body of - * software which does already (at least for 32-bit). - */ - seq_puts(m, "Features\t:"); - if (compat) { + /* + * Dump out the common processor features in a single line. + * Userspace should read the hwcaps with getauxval(AT_HWCAP) + * rather than attempting to parse this, but there's a body of + * software which does already (at least for 32-bit). + */ + seq_puts(m, "Features\t:"); + if (compat) { #ifdef CONFIG_COMPAT - for (j = 0; j < ARRAY_SIZE(compat_hwcap_str); j++) { - if (compat_elf_hwcap & (1 << j)) { - /* - * Warn once if any feature should not - * have been present on arm64 platform. - */ - if (WARN_ON_ONCE(!compat_hwcap_str[j])) - continue; - - seq_printf(m, " %s", compat_hwcap_str[j]); - } + for (j = 0; j < ARRAY_SIZE(compat_hwcap_str); j++) { + if (compat_elf_hwcap & (1 << j)) { + /* + * Warn once if any feature should not + * have been present on arm64 platform. + */ + if (WARN_ON_ONCE(!compat_hwcap_str[j])) + continue; + + seq_printf(m, " %s", compat_hwcap_str[j]); } + } - for (j = 0; j < ARRAY_SIZE(compat_hwcap2_str); j++) - if (compat_elf_hwcap2 & (1 << j)) - seq_printf(m, " %s", compat_hwcap2_str[j]); + for (j = 0; j < ARRAY_SIZE(compat_hwcap2_str); j++) + if (compat_elf_hwcap2 & (1 << j)) + seq_printf(m, " %s", compat_hwcap2_str[j]); #endif /* CONFIG_COMPAT */ - } else { - for (j = 0; j < ARRAY_SIZE(hwcap_str); j++) - if (cpu_have_feature(j)) - seq_printf(m, " %s", hwcap_str[j]); - } - seq_puts(m, "\n"); - - seq_printf(m, "CPU implementer\t: 0x%02x\n", - MIDR_IMPLEMENTOR(midr)); - seq_printf(m, "CPU architecture: 8\n"); - seq_printf(m, "CPU variant\t: 0x%x\n", MIDR_VARIANT(midr)); - seq_printf(m, "CPU part\t: 0x%03x\n", MIDR_PARTNUM(midr)); - seq_printf(m, "CPU revision\t: %d\n\n", MIDR_REVISION(midr)); + } else { + for (j = 0; j < ARRAY_SIZE(hwcap_str); j++) + if (cpu_have_feature(j)) + seq_printf(m, " %s", hwcap_str[j]); } + seq_puts(m, "\n"); + + seq_printf(m, "CPU implementer\t: 0x%02x\n", + MIDR_IMPLEMENTOR(midr)); + seq_puts(m, "CPU architecture: 8\n"); + seq_printf(m, "CPU variant\t: 0x%x\n", MIDR_VARIANT(midr)); + seq_printf(m, "CPU part\t: 0x%03x\n", MIDR_PARTNUM(midr)); + seq_printf(m, "CPU revision\t: %d\n\n", MIDR_REVISION(midr)); return 0; } static void *c_start(struct seq_file *m, loff_t *pos) { - return *pos < 1 ? (void *)1 : NULL; + *pos = cpumask_next(*pos - 1, cpu_online_mask); + return *pos < nr_cpu_ids ? &per_cpu(cpu_data, *pos) : NULL; } static void *c_next(struct seq_file *m, void *v, loff_t *pos) { ++*pos; - return NULL; + return c_start(m, pos); } static void c_stop(struct seq_file *m, void *v) @@ -328,11 +327,13 @@ static const struct kobj_type cpuregs_kobj_type = { CPUREGS_ATTR_RO(midr_el1, midr); CPUREGS_ATTR_RO(revidr_el1, revidr); +CPUREGS_ATTR_RO(aidr_el1, aidr); CPUREGS_ATTR_RO(smidr_el1, smidr); static struct attribute *cpuregs_id_attrs[] = { &cpuregs_attr_midr_el1.attr, &cpuregs_attr_revidr_el1.attr, + &cpuregs_attr_aidr_el1.attr, NULL }; @@ -469,6 +470,7 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) info->reg_dczid = read_cpuid(DCZID_EL0); info->reg_midr = read_cpuid_id(); info->reg_revidr = read_cpuid(REVIDR_EL1); + info->reg_aidr = read_cpuid(AIDR_EL1); info->reg_id_aa64dfr0 = read_cpuid(ID_AA64DFR0_EL1); info->reg_id_aa64dfr1 = read_cpuid(ID_AA64DFR1_EL1); diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index 1d25d8899dbf..250e9d7c08a7 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -169,14 +169,14 @@ static DEFINE_RAW_SPINLOCK(efi_rt_lock); void arch_efi_call_virt_setup(void) { efi_virtmap_load(); - __efi_fpsimd_begin(); raw_spin_lock(&efi_rt_lock); + __efi_fpsimd_begin(); } void arch_efi_call_virt_teardown(void) { - raw_spin_unlock(&efi_rt_lock); __efi_fpsimd_end(); + raw_spin_unlock(&efi_rt_lock); efi_virtmap_unload(); } diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index b260ddc4d3e9..7c1970b341b8 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -132,7 +132,7 @@ static void do_notify_resume(struct pt_regs *regs, unsigned long thread_flags) do { local_irq_enable(); - if (thread_flags & _TIF_NEED_RESCHED) + if (thread_flags & (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)) schedule(); if (thread_flags & _TIF_UPROBE) @@ -393,20 +393,16 @@ static bool cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs) * As per the ABI exit SME streaming mode and clear the SVE state not * shared with FPSIMD on syscall entry. */ -static inline void fp_user_discard(void) +static inline void fpsimd_syscall_enter(void) { - /* - * If SME is active then exit streaming mode. If ZA is active - * then flush the SVE registers but leave userspace access to - * both SVE and SME enabled, otherwise disable SME for the - * task and fall through to disabling SVE too. This means - * that after a syscall we never have any streaming mode - * register state to track, if this changes the KVM code will - * need updating. - */ + /* Ensure PSTATE.SM is clear, but leave PSTATE.ZA as-is. */ if (system_supports_sme()) sme_smstop_sm(); + /* + * The CPU is not in streaming mode. If non-streaming SVE is not + * supported, there is no SVE state that needs to be discarded. + */ if (!system_supports_sve()) return; @@ -416,6 +412,33 @@ static inline void fp_user_discard(void) sve_vq_minus_one = sve_vq_from_vl(task_get_sve_vl(current)) - 1; sve_flush_live(true, sve_vq_minus_one); } + + /* + * Any live non-FPSIMD SVE state has been zeroed. Allow + * fpsimd_save_user_state() to lazily discard SVE state until either + * the live state is unbound or fpsimd_syscall_exit() is called. + */ + __this_cpu_write(fpsimd_last_state.to_save, FP_STATE_FPSIMD); +} + +static __always_inline void fpsimd_syscall_exit(void) +{ + if (!system_supports_sve()) + return; + + /* + * The current task's user FPSIMD/SVE/SME state is now bound to this + * CPU. The fpsimd_last_state.to_save value is either: + * + * - FP_STATE_FPSIMD, if the state has not been reloaded on this CPU + * since fpsimd_syscall_enter(). + * + * - FP_STATE_CURRENT, if the state has been reloaded on this CPU at + * any point. + * + * Reset this to FP_STATE_CURRENT to stop lazy discarding. + */ + __this_cpu_write(fpsimd_last_state.to_save, FP_STATE_CURRENT); } UNHANDLED(el1t, 64, sync) @@ -739,10 +762,11 @@ static void noinstr el0_svc(struct pt_regs *regs) { enter_from_user_mode(regs); cortex_a76_erratum_1463225_svc_handler(); - fp_user_discard(); + fpsimd_syscall_enter(); local_daif_restore(DAIF_PROCCTX); do_el0_svc(regs); exit_to_user_mode(regs); + fpsimd_syscall_exit(); } static void noinstr el0_fpac(struct pt_regs *regs, unsigned long esr) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 8370d55f0353..c37f02d7194e 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -119,7 +119,7 @@ * whatever is in the FPSIMD registers is not saved to memory, but discarded. */ -static DEFINE_PER_CPU(struct cpu_fp_state, fpsimd_last_state); +DEFINE_PER_CPU(struct cpu_fp_state, fpsimd_last_state); __ro_after_init struct vl_info vl_info[ARM64_VEC_MAX] = { #ifdef CONFIG_ARM64_SVE @@ -180,12 +180,12 @@ static inline void set_sve_default_vl(int val) set_default_vl(ARM64_VEC_SVE, val); } -static void __percpu *efi_sve_state; +static u8 *efi_sve_state; #else /* ! CONFIG_ARM64_SVE */ /* Dummy declaration for code that will be optimised out: */ -extern void __percpu *efi_sve_state; +extern u8 *efi_sve_state; #endif /* ! CONFIG_ARM64_SVE */ @@ -359,20 +359,15 @@ static void task_fpsimd_load(void) WARN_ON(preemptible()); WARN_ON(test_thread_flag(TIF_KERNEL_FPSTATE)); - if (system_supports_fpmr()) - write_sysreg_s(current->thread.uw.fpmr, SYS_FPMR); - if (system_supports_sve() || system_supports_sme()) { switch (current->thread.fp_type) { case FP_STATE_FPSIMD: /* Stop tracking SVE for this task until next use. */ - if (test_and_clear_thread_flag(TIF_SVE)) - sve_user_disable(); + clear_thread_flag(TIF_SVE); break; case FP_STATE_SVE: - if (!thread_sm_enabled(¤t->thread) && - !WARN_ON_ONCE(!test_and_set_thread_flag(TIF_SVE))) - sve_user_enable(); + if (!thread_sm_enabled(¤t->thread)) + WARN_ON_ONCE(!test_and_set_thread_flag(TIF_SVE)); if (test_thread_flag(TIF_SVE)) sve_set_vq(sve_vq_from_vl(task_get_sve_vl(current)) - 1); @@ -413,6 +408,9 @@ static void task_fpsimd_load(void) restore_ffr = system_supports_fa64(); } + if (system_supports_fpmr()) + write_sysreg_s(current->thread.uw.fpmr, SYS_FPMR); + if (restore_sve_regs) { WARN_ON_ONCE(current->thread.fp_type != FP_STATE_SVE); sve_load_state(sve_pffr(¤t->thread), @@ -453,12 +451,15 @@ static void fpsimd_save_user_state(void) *(last->fpmr) = read_sysreg_s(SYS_FPMR); /* - * If a task is in a syscall the ABI allows us to only - * preserve the state shared with FPSIMD so don't bother - * saving the full SVE state in that case. + * Save SVE state if it is live. + * + * The syscall ABI discards live SVE state at syscall entry. When + * entering a syscall, fpsimd_syscall_enter() sets to_save to + * FP_STATE_FPSIMD to allow the SVE state to be lazily discarded until + * either new SVE state is loaded+bound or fpsimd_syscall_exit() is + * called prior to a return to userspace. */ - if ((last->to_save == FP_STATE_CURRENT && test_thread_flag(TIF_SVE) && - !in_syscall(current_pt_regs())) || + if ((last->to_save == FP_STATE_CURRENT && test_thread_flag(TIF_SVE)) || last->to_save == FP_STATE_SVE) { save_sve_regs = true; save_ffr = true; @@ -651,7 +652,7 @@ static void __fpsimd_to_sve(void *sst, struct user_fpsimd_state const *fst, * task->thread.uw.fpsimd_state must be up to date before calling this * function. */ -static void fpsimd_to_sve(struct task_struct *task) +static inline void fpsimd_to_sve(struct task_struct *task) { unsigned int vq; void *sst = task->thread.sve_state; @@ -675,7 +676,7 @@ static void fpsimd_to_sve(struct task_struct *task) * bytes of allocated kernel memory. * task->thread.sve_state must be up to date before calling this function. */ -static void sve_to_fpsimd(struct task_struct *task) +static inline void sve_to_fpsimd(struct task_struct *task) { unsigned int vq, vl; void const *sst = task->thread.sve_state; @@ -694,44 +695,39 @@ static void sve_to_fpsimd(struct task_struct *task) } } -void cpu_enable_fpmr(const struct arm64_cpu_capabilities *__always_unused p) +static inline void __fpsimd_zero_vregs(struct user_fpsimd_state *fpsimd) { - write_sysreg_s(read_sysreg_s(SYS_SCTLR_EL1) | SCTLR_EL1_EnFPM_MASK, - SYS_SCTLR_EL1); + memset(&fpsimd->vregs, 0, sizeof(fpsimd->vregs)); } -#ifdef CONFIG_ARM64_SVE /* - * Call __sve_free() directly only if you know task can't be scheduled - * or preempted. + * Simulate the effects of an SMSTOP SM instruction. */ -static void __sve_free(struct task_struct *task) +void task_smstop_sm(struct task_struct *task) { - kfree(task->thread.sve_state); - task->thread.sve_state = NULL; -} + if (!thread_sm_enabled(&task->thread)) + return; -static void sve_free(struct task_struct *task) -{ - WARN_ON(test_tsk_thread_flag(task, TIF_SVE)); + __fpsimd_zero_vregs(&task->thread.uw.fpsimd_state); + task->thread.uw.fpsimd_state.fpsr = 0x0800009f; + if (system_supports_fpmr()) + task->thread.uw.fpmr = 0; - __sve_free(task); + task->thread.svcr &= ~SVCR_SM_MASK; + task->thread.fp_type = FP_STATE_FPSIMD; } -/* - * Return how many bytes of memory are required to store the full SVE - * state for task, given task's currently configured vector length. - */ -size_t sve_state_size(struct task_struct const *task) +void cpu_enable_fpmr(const struct arm64_cpu_capabilities *__always_unused p) { - unsigned int vl = 0; - - if (system_supports_sve()) - vl = task_get_sve_vl(task); - if (system_supports_sme()) - vl = max(vl, task_get_sme_vl(task)); + write_sysreg_s(read_sysreg_s(SYS_SCTLR_EL1) | SCTLR_EL1_EnFPM_MASK, + SYS_SCTLR_EL1); +} - return SVE_SIG_REGS_SIZE(sve_vq_from_vl(vl)); +#ifdef CONFIG_ARM64_SVE +static void sve_free(struct task_struct *task) +{ + kfree(task->thread.sve_state); + task->thread.sve_state = NULL; } /* @@ -758,69 +754,34 @@ void sve_alloc(struct task_struct *task, bool flush) kzalloc(sve_state_size(task), GFP_KERNEL); } - -/* - * Force the FPSIMD state shared with SVE to be updated in the SVE state - * even if the SVE state is the current active state. - * - * This should only be called by ptrace. task must be non-runnable. - * task->thread.sve_state must point to at least sve_state_size(task) - * bytes of allocated kernel memory. - */ -void fpsimd_force_sync_to_sve(struct task_struct *task) -{ - fpsimd_to_sve(task); -} - -/* - * Ensure that task->thread.sve_state is up to date with respect to - * the user task, irrespective of when SVE is in use or not. - * - * This should only be called by ptrace. task must be non-runnable. - * task->thread.sve_state must point to at least sve_state_size(task) - * bytes of allocated kernel memory. - */ -void fpsimd_sync_to_sve(struct task_struct *task) -{ - if (!test_tsk_thread_flag(task, TIF_SVE) && - !thread_sm_enabled(&task->thread)) - fpsimd_to_sve(task); -} - /* - * Ensure that task->thread.uw.fpsimd_state is up to date with respect to - * the user task, irrespective of whether SVE is in use or not. + * Ensure that task->thread.uw.fpsimd_state is up to date with respect to the + * task's currently effective FPSIMD/SVE state. * - * This should only be called by ptrace. task must be non-runnable. - * task->thread.sve_state must point to at least sve_state_size(task) - * bytes of allocated kernel memory. + * The task's FPSIMD/SVE/SME state must not be subject to concurrent + * manipulation. */ -void sve_sync_to_fpsimd(struct task_struct *task) +void fpsimd_sync_from_effective_state(struct task_struct *task) { if (task->thread.fp_type == FP_STATE_SVE) sve_to_fpsimd(task); } /* - * Ensure that task->thread.sve_state is up to date with respect to - * the task->thread.uw.fpsimd_state. + * Ensure that the task's currently effective FPSIMD/SVE state is up to date + * with respect to task->thread.uw.fpsimd_state, zeroing any effective + * non-FPSIMD (S)SVE state. * - * This should only be called by ptrace to merge new FPSIMD register - * values into a task for which SVE is currently active. - * task must be non-runnable. - * task->thread.sve_state must point to at least sve_state_size(task) - * bytes of allocated kernel memory. - * task->thread.uw.fpsimd_state must already have been initialised with - * the new FPSIMD register values to be merged in. + * The task's FPSIMD/SVE/SME state must not be subject to concurrent + * manipulation. */ -void sve_sync_from_fpsimd_zeropad(struct task_struct *task) +void fpsimd_sync_to_effective_state_zeropad(struct task_struct *task) { unsigned int vq; void *sst = task->thread.sve_state; struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state; - if (!test_tsk_thread_flag(task, TIF_SVE) && - !thread_sm_enabled(&task->thread)) + if (task->thread.fp_type != FP_STATE_SVE) return; vq = sve_vq_from_vl(thread_get_cur_vl(&task->thread)); @@ -829,10 +790,73 @@ void sve_sync_from_fpsimd_zeropad(struct task_struct *task) __fpsimd_to_sve(sst, fst, vq); } +static int change_live_vector_length(struct task_struct *task, + enum vec_type type, + unsigned long vl) +{ + unsigned int sve_vl = task_get_sve_vl(task); + unsigned int sme_vl = task_get_sme_vl(task); + void *sve_state = NULL, *sme_state = NULL; + + if (type == ARM64_VEC_SME) + sme_vl = vl; + else + sve_vl = vl; + + /* + * Allocate the new sve_state and sme_state before freeing the old + * copies so that allocation failure can be handled without needing to + * mutate the task's state in any way. + * + * Changes to the SVE vector length must not discard live ZA state or + * clear PSTATE.ZA, as userspace code which is unaware of the AAPCS64 + * ZA lazy saving scheme may attempt to change the SVE vector length + * while unsaved/dormant ZA state exists. + */ + sve_state = kzalloc(__sve_state_size(sve_vl, sme_vl), GFP_KERNEL); + if (!sve_state) + goto out_mem; + + if (type == ARM64_VEC_SME) { + sme_state = kzalloc(__sme_state_size(sme_vl), GFP_KERNEL); + if (!sme_state) + goto out_mem; + } + + if (task == current) + fpsimd_save_and_flush_current_state(); + else + fpsimd_flush_task_state(task); + + /* + * Always preserve PSTATE.SM and the effective FPSIMD state, zeroing + * other SVE state. + */ + fpsimd_sync_from_effective_state(task); + task_set_vl(task, type, vl); + kfree(task->thread.sve_state); + task->thread.sve_state = sve_state; + fpsimd_sync_to_effective_state_zeropad(task); + + if (type == ARM64_VEC_SME) { + task->thread.svcr &= ~SVCR_ZA_MASK; + kfree(task->thread.sme_state); + task->thread.sme_state = sme_state; + } + + return 0; + +out_mem: + kfree(sve_state); + kfree(sme_state); + return -ENOMEM; +} + int vec_set_vector_length(struct task_struct *task, enum vec_type type, unsigned long vl, unsigned long flags) { - bool free_sme = false; + bool onexec = flags & PR_SVE_SET_VL_ONEXEC; + bool inherit = flags & PR_SVE_VL_INHERIT; if (flags & ~(unsigned long)(PR_SVE_VL_INHERIT | PR_SVE_SET_VL_ONEXEC)) @@ -852,71 +876,17 @@ int vec_set_vector_length(struct task_struct *task, enum vec_type type, vl = find_supported_vector_length(type, vl); - if (flags & (PR_SVE_VL_INHERIT | - PR_SVE_SET_VL_ONEXEC)) + if (!onexec && vl != task_get_vl(task, type)) { + if (change_live_vector_length(task, type, vl)) + return -ENOMEM; + } + + if (onexec || inherit) task_set_vl_onexec(task, type, vl); else /* Reset VL to system default on next exec: */ task_set_vl_onexec(task, type, 0); - /* Only actually set the VL if not deferred: */ - if (flags & PR_SVE_SET_VL_ONEXEC) - goto out; - - if (vl == task_get_vl(task, type)) - goto out; - - /* - * To ensure the FPSIMD bits of the SVE vector registers are preserved, - * write any live register state back to task_struct, and convert to a - * regular FPSIMD thread. - */ - if (task == current) { - get_cpu_fpsimd_context(); - - fpsimd_save_user_state(); - } - - fpsimd_flush_task_state(task); - if (test_and_clear_tsk_thread_flag(task, TIF_SVE) || - thread_sm_enabled(&task->thread)) { - sve_to_fpsimd(task); - task->thread.fp_type = FP_STATE_FPSIMD; - } - - if (system_supports_sme()) { - if (type == ARM64_VEC_SME || - !(task->thread.svcr & (SVCR_SM_MASK | SVCR_ZA_MASK))) { - /* - * We are changing the SME VL or weren't using - * SME anyway, discard the state and force a - * reallocation. - */ - task->thread.svcr &= ~(SVCR_SM_MASK | - SVCR_ZA_MASK); - clear_tsk_thread_flag(task, TIF_SME); - free_sme = true; - } - } - - if (task == current) - put_cpu_fpsimd_context(); - - task_set_vl(task, type, vl); - - /* - * Free the changed states if they are not in use, SME will be - * reallocated to the correct size on next use and we just - * allocate SVE now in case it is needed for use in streaming - * mode. - */ - sve_free(task); - sve_alloc(task, true); - - if (free_sme) - sme_free(task); - -out: update_tsk_thread_flag(task, vec_vl_inherit_flag(type), flags & PR_SVE_VL_INHERIT); @@ -1131,15 +1101,15 @@ static void __init sve_efi_setup(void) if (!sve_vl_valid(max_vl)) goto fail; - efi_sve_state = __alloc_percpu( - SVE_SIG_REGS_SIZE(sve_vq_from_vl(max_vl)), SVE_VQ_BYTES); + efi_sve_state = kmalloc(SVE_SIG_REGS_SIZE(sve_vq_from_vl(max_vl)), + GFP_KERNEL); if (!efi_sve_state) goto fail; return; fail: - panic("Cannot allocate percpu memory for EFI SVE save/restore"); + panic("Cannot allocate memory for EFI SVE save/restore"); } void cpu_enable_sve(const struct arm64_cpu_capabilities *__always_unused p) @@ -1212,7 +1182,7 @@ void __init sve_setup(void) */ void fpsimd_release_task(struct task_struct *dead_task) { - __sve_free(dead_task); + sve_free(dead_task); sme_free(dead_task); } @@ -1436,7 +1406,7 @@ void do_sme_acc(unsigned long esr, struct pt_regs *regs) * If this not a trap due to SME being disabled then something * is being used in the wrong mode, report as SIGILL. */ - if (ESR_ELx_ISS(esr) != ESR_ELx_SME_ISS_SME_DISABLED) { + if (ESR_ELx_SME_ISS_SMTC(esr) != ESR_ELx_SME_ISS_SMTC_SME_DISABLED) { force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0); return; } @@ -1460,6 +1430,8 @@ void do_sme_acc(unsigned long esr, struct pt_regs *regs) sme_set_vq(vq_minus_one); fpsimd_bind_task_to_cpu(); + } else { + fpsimd_flush_task_state(current); } put_cpu_fpsimd_context(); @@ -1573,8 +1545,8 @@ void fpsimd_thread_switch(struct task_struct *next) fpsimd_save_user_state(); if (test_tsk_thread_flag(next, TIF_KERNEL_FPSTATE)) { - fpsimd_load_kernel_state(next); fpsimd_flush_cpu_state(); + fpsimd_load_kernel_state(next); } else { /* * Fix up TIF_FOREIGN_FPSTATE to correctly describe next's @@ -1661,6 +1633,9 @@ void fpsimd_flush_thread(void) current->thread.svcr = 0; } + if (system_supports_fpmr()) + current->thread.uw.fpmr = 0; + current->thread.fp_type = FP_STATE_FPSIMD; put_cpu_fpsimd_context(); @@ -1683,18 +1658,6 @@ void fpsimd_preserve_current_state(void) } /* - * Like fpsimd_preserve_current_state(), but ensure that - * current->thread.uw.fpsimd_state is updated so that it can be copied to - * the signal frame. - */ -void fpsimd_signal_preserve_current_state(void) -{ - fpsimd_preserve_current_state(); - if (current->thread.fp_type == FP_STATE_SVE) - sve_to_fpsimd(current); -} - -/* * Associate current's FPSIMD context with this cpu * The caller must have ownership of the cpu FPSIMD context before calling * this function. @@ -1786,30 +1749,14 @@ void fpsimd_restore_current_state(void) put_cpu_fpsimd_context(); } -/* - * Load an updated userland FPSIMD state for 'current' from memory and set the - * flag that indicates that the FPSIMD register contents are the most recent - * FPSIMD state of 'current'. This is used by the signal code to restore the - * register state when returning from a signal handler in FPSIMD only cases, - * any SVE context will be discarded. - */ void fpsimd_update_current_state(struct user_fpsimd_state const *state) { if (WARN_ON(!system_supports_fpsimd())) return; - get_cpu_fpsimd_context(); - current->thread.uw.fpsimd_state = *state; - if (test_thread_flag(TIF_SVE)) + if (current->thread.fp_type == FP_STATE_SVE) fpsimd_to_sve(current); - - task_fpsimd_load(); - fpsimd_bind_task_to_cpu(); - - clear_thread_flag(TIF_FOREIGN_FPSTATE); - - put_cpu_fpsimd_context(); } /* @@ -1839,6 +1786,17 @@ void fpsimd_flush_task_state(struct task_struct *t) barrier(); } +void fpsimd_save_and_flush_current_state(void) +{ + if (!system_supports_fpsimd()) + return; + + get_cpu_fpsimd_context(); + fpsimd_save_user_state(); + fpsimd_flush_task_state(current); + put_cpu_fpsimd_context(); +} + /* * Save the FPSIMD state to memory and invalidate cpu view. * This function must be called with preemption disabled. @@ -1948,10 +1906,10 @@ EXPORT_SYMBOL_GPL(kernel_neon_end); #ifdef CONFIG_EFI -static DEFINE_PER_CPU(struct user_fpsimd_state, efi_fpsimd_state); -static DEFINE_PER_CPU(bool, efi_fpsimd_state_used); -static DEFINE_PER_CPU(bool, efi_sve_state_used); -static DEFINE_PER_CPU(bool, efi_sm_state); +static struct user_fpsimd_state efi_fpsimd_state; +static bool efi_fpsimd_state_used; +static bool efi_sve_state_used; +static bool efi_sm_state; /* * EFI runtime services support functions @@ -1984,18 +1942,16 @@ void __efi_fpsimd_begin(void) * If !efi_sve_state, SVE can't be in use yet and doesn't need * preserving: */ - if (system_supports_sve() && likely(efi_sve_state)) { - char *sve_state = this_cpu_ptr(efi_sve_state); + if (system_supports_sve() && efi_sve_state != NULL) { bool ffr = true; u64 svcr; - __this_cpu_write(efi_sve_state_used, true); + efi_sve_state_used = true; if (system_supports_sme()) { svcr = read_sysreg_s(SYS_SVCR); - __this_cpu_write(efi_sm_state, - svcr & SVCR_SM_MASK); + efi_sm_state = svcr & SVCR_SM_MASK; /* * Unless we have FA64 FFR does not @@ -2005,19 +1961,18 @@ void __efi_fpsimd_begin(void) ffr = !(svcr & SVCR_SM_MASK); } - sve_save_state(sve_state + sve_ffr_offset(sve_max_vl()), - &this_cpu_ptr(&efi_fpsimd_state)->fpsr, - ffr); + sve_save_state(efi_sve_state + sve_ffr_offset(sve_max_vl()), + &efi_fpsimd_state.fpsr, ffr); if (system_supports_sme()) sysreg_clear_set_s(SYS_SVCR, SVCR_SM_MASK, 0); } else { - fpsimd_save_state(this_cpu_ptr(&efi_fpsimd_state)); + fpsimd_save_state(&efi_fpsimd_state); } - __this_cpu_write(efi_fpsimd_state_used, true); + efi_fpsimd_state_used = true; } } @@ -2029,12 +1984,10 @@ void __efi_fpsimd_end(void) if (!system_supports_fpsimd()) return; - if (!__this_cpu_xchg(efi_fpsimd_state_used, false)) { + if (!efi_fpsimd_state_used) { kernel_neon_end(); } else { - if (system_supports_sve() && - likely(__this_cpu_read(efi_sve_state_used))) { - char const *sve_state = this_cpu_ptr(efi_sve_state); + if (system_supports_sve() && efi_sve_state_used) { bool ffr = true; /* @@ -2043,7 +1996,7 @@ void __efi_fpsimd_end(void) * streaming mode. */ if (system_supports_sme()) { - if (__this_cpu_read(efi_sm_state)) { + if (efi_sm_state) { sysreg_clear_set_s(SYS_SVCR, 0, SVCR_SM_MASK); @@ -2057,14 +2010,15 @@ void __efi_fpsimd_end(void) } } - sve_load_state(sve_state + sve_ffr_offset(sve_max_vl()), - &this_cpu_ptr(&efi_fpsimd_state)->fpsr, - ffr); + sve_load_state(efi_sve_state + sve_ffr_offset(sve_max_vl()), + &efi_fpsimd_state.fpsr, ffr); - __this_cpu_write(efi_sve_state_used, false); + efi_sve_state_used = false; } else { - fpsimd_load_state(this_cpu_ptr(&efi_fpsimd_state)); + fpsimd_load_state(&efi_fpsimd_state); } + + efi_fpsimd_state_used = false; } } diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 2ce73525de2c..ca04b338cb0d 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -89,7 +89,7 @@ SYM_CODE_START(primary_entry) adrp x1, early_init_stack mov sp, x1 mov x29, xzr - adrp x0, init_idmap_pg_dir + adrp x0, __pi_init_idmap_pg_dir mov x1, xzr bl __pi_create_init_idmap @@ -101,7 +101,7 @@ SYM_CODE_START(primary_entry) cbnz x19, 0f dmb sy mov x1, x0 // end of used region - adrp x0, init_idmap_pg_dir + adrp x0, __pi_init_idmap_pg_dir adr_l x2, dcache_inval_poc blr x2 b 1f @@ -507,7 +507,7 @@ SYM_FUNC_END(__no_granule_support) SYM_FUNC_START_LOCAL(__primary_switch) adrp x1, reserved_pg_dir - adrp x2, init_idmap_pg_dir + adrp x2, __pi_init_idmap_pg_dir bl __enable_mmu adrp x1, early_init_stack diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index 2004b4f41ade..2bc390d94331 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -10,6 +10,12 @@ #error This file should only be included in vmlinux.lds.S #endif +#define PI_EXPORT_SYM(sym) \ + __PI_EXPORT_SYM(sym, __pi_ ## sym, Cannot export BSS symbol sym to startup code) +#define __PI_EXPORT_SYM(sym, pisym, msg)\ + PROVIDE(pisym = sym); \ + ASSERT((sym - KIMAGE_VADDR) < (__bss_start - KIMAGE_VADDR), #msg) + PROVIDE(__efistub_primary_entry = primary_entry); /* @@ -36,37 +42,30 @@ PROVIDE(__pi___memcpy = __pi_memcpy); PROVIDE(__pi___memmove = __pi_memmove); PROVIDE(__pi___memset = __pi_memset); -PROVIDE(__pi_id_aa64isar1_override = id_aa64isar1_override); -PROVIDE(__pi_id_aa64isar2_override = id_aa64isar2_override); -PROVIDE(__pi_id_aa64mmfr0_override = id_aa64mmfr0_override); -PROVIDE(__pi_id_aa64mmfr1_override = id_aa64mmfr1_override); -PROVIDE(__pi_id_aa64mmfr2_override = id_aa64mmfr2_override); -PROVIDE(__pi_id_aa64pfr0_override = id_aa64pfr0_override); -PROVIDE(__pi_id_aa64pfr1_override = id_aa64pfr1_override); -PROVIDE(__pi_id_aa64smfr0_override = id_aa64smfr0_override); -PROVIDE(__pi_id_aa64zfr0_override = id_aa64zfr0_override); -PROVIDE(__pi_arm64_sw_feature_override = arm64_sw_feature_override); -PROVIDE(__pi_arm64_use_ng_mappings = arm64_use_ng_mappings); -PROVIDE(__pi__ctype = _ctype); -PROVIDE(__pi_memstart_offset_seed = memstart_offset_seed); - -PROVIDE(__pi_init_idmap_pg_dir = init_idmap_pg_dir); -PROVIDE(__pi_init_idmap_pg_end = init_idmap_pg_end); -PROVIDE(__pi_init_pg_dir = init_pg_dir); -PROVIDE(__pi_init_pg_end = init_pg_end); -PROVIDE(__pi_swapper_pg_dir = swapper_pg_dir); - -PROVIDE(__pi__text = _text); -PROVIDE(__pi__stext = _stext); -PROVIDE(__pi__etext = _etext); -PROVIDE(__pi___start_rodata = __start_rodata); -PROVIDE(__pi___inittext_begin = __inittext_begin); -PROVIDE(__pi___inittext_end = __inittext_end); -PROVIDE(__pi___initdata_begin = __initdata_begin); -PROVIDE(__pi___initdata_end = __initdata_end); -PROVIDE(__pi__data = _data); -PROVIDE(__pi___bss_start = __bss_start); -PROVIDE(__pi__end = _end); +PI_EXPORT_SYM(id_aa64isar1_override); +PI_EXPORT_SYM(id_aa64isar2_override); +PI_EXPORT_SYM(id_aa64mmfr0_override); +PI_EXPORT_SYM(id_aa64mmfr1_override); +PI_EXPORT_SYM(id_aa64mmfr2_override); +PI_EXPORT_SYM(id_aa64pfr0_override); +PI_EXPORT_SYM(id_aa64pfr1_override); +PI_EXPORT_SYM(id_aa64smfr0_override); +PI_EXPORT_SYM(id_aa64zfr0_override); +PI_EXPORT_SYM(arm64_sw_feature_override); +PI_EXPORT_SYM(arm64_use_ng_mappings); +PI_EXPORT_SYM(_ctype); + +PI_EXPORT_SYM(swapper_pg_dir); + +PI_EXPORT_SYM(_text); +PI_EXPORT_SYM(_stext); +PI_EXPORT_SYM(_etext); +PI_EXPORT_SYM(__start_rodata); +PI_EXPORT_SYM(__inittext_begin); +PI_EXPORT_SYM(__inittext_end); +PI_EXPORT_SYM(__initdata_begin); +PI_EXPORT_SYM(__initdata_end); +PI_EXPORT_SYM(_data); #ifdef CONFIG_KVM diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c index 1da3e25f9d9e..c9503ed45a6c 100644 --- a/arch/arm64/kernel/kaslr.c +++ b/arch/arm64/kernel/kaslr.c @@ -10,8 +10,6 @@ #include <asm/cpufeature.h> #include <asm/memory.h> -u16 __initdata memstart_offset_seed; - bool __ro_after_init __kaslr_is_enabled = false; void __init kaslr_init(void) diff --git a/arch/arm64/kernel/pi/kaslr_early.c b/arch/arm64/kernel/pi/kaslr_early.c index 0257b43819db..e0e018046a46 100644 --- a/arch/arm64/kernel/pi/kaslr_early.c +++ b/arch/arm64/kernel/pi/kaslr_early.c @@ -18,8 +18,6 @@ #include "pi.h" -extern u16 memstart_offset_seed; - static u64 __init get_kaslr_seed(void *fdt, int node) { static char const seed_str[] __initconst = "kaslr-seed"; @@ -53,8 +51,6 @@ u64 __init kaslr_early_init(void *fdt, int chosen) return 0; } - memstart_offset_seed = seed & U16_MAX; - /* * OK, so we are proceeding with KASLR enabled. Calculate a suitable * kernel image offset from the seed. Let's place the kernel in the diff --git a/arch/arm64/kernel/pi/pi.h b/arch/arm64/kernel/pi/pi.h index c91e5e965cd3..1f4731a4e17e 100644 --- a/arch/arm64/kernel/pi/pi.h +++ b/arch/arm64/kernel/pi/pi.h @@ -22,6 +22,7 @@ static inline void *prel64_to_pointer(const prel64_t *offset) extern bool dynamic_scs_is_enabled; extern pgd_t init_idmap_pg_dir[], init_idmap_pg_end[]; +extern pgd_t init_pg_dir[], init_pg_end[]; void init_feature_override(u64 boot_status, const void *fdt, int chosen); u64 kaslr_early_init(void *fdt, int chosen); diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 42faebb7b712..a5ca15daeb8a 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -344,50 +344,34 @@ void arch_release_task_struct(struct task_struct *tsk) int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) { - if (current->mm) - fpsimd_preserve_current_state(); + /* + * The current/src task's FPSIMD state may or may not be live, and may + * have been altered by ptrace after entry to the kernel. Save the + * effective FPSIMD state so that this will be copied into dst. + */ + fpsimd_save_and_flush_current_state(); + fpsimd_sync_from_effective_state(src); + *dst = *src; /* - * Detach src's sve_state (if any) from dst so that it does not - * get erroneously used or freed prematurely. dst's copies - * will be allocated on demand later on if dst uses SVE. - * For consistency, also clear TIF_SVE here: this could be done - * later in copy_process(), but to avoid tripping up future - * maintainers it is best not to leave TIF flags and buffers in - * an inconsistent state, even temporarily. + * Drop stale reference to src's sve_state and convert dst to + * non-streaming FPSIMD mode. */ + dst->thread.fp_type = FP_STATE_FPSIMD; dst->thread.sve_state = NULL; clear_tsk_thread_flag(dst, TIF_SVE); + task_smstop_sm(dst); /* - * In the unlikely event that we create a new thread with ZA - * enabled we should retain the ZA and ZT state so duplicate - * it here. This may be shortly freed if we exec() or if - * CLONE_SETTLS but it's simpler to do it here. To avoid - * confusing the rest of the code ensure that we have a - * sve_state allocated whenever sme_state is allocated. + * Drop stale reference to src's sme_state and ensure dst has ZA + * disabled. + * + * When necessary, ZA will be inherited later in copy_thread_za(). */ - if (thread_za_enabled(&src->thread)) { - dst->thread.sve_state = kzalloc(sve_state_size(src), - GFP_KERNEL); - if (!dst->thread.sve_state) - return -ENOMEM; - - dst->thread.sme_state = kmemdup(src->thread.sme_state, - sme_state_size(src), - GFP_KERNEL); - if (!dst->thread.sme_state) { - kfree(dst->thread.sve_state); - dst->thread.sve_state = NULL; - return -ENOMEM; - } - } else { - dst->thread.sme_state = NULL; - clear_tsk_thread_flag(dst, TIF_SME); - } - - dst->thread.fp_type = FP_STATE_FPSIMD; + dst->thread.sme_state = NULL; + clear_tsk_thread_flag(dst, TIF_SME); + dst->thread.svcr &= ~SVCR_ZA_MASK; /* clear any pending asynchronous tag fault raised by the parent */ clear_tsk_thread_flag(dst, TIF_MTE_ASYNC_FAULT); @@ -395,6 +379,31 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) return 0; } +static int copy_thread_za(struct task_struct *dst, struct task_struct *src) +{ + if (!thread_za_enabled(&src->thread)) + return 0; + + dst->thread.sve_state = kzalloc(sve_state_size(src), + GFP_KERNEL); + if (!dst->thread.sve_state) + return -ENOMEM; + + dst->thread.sme_state = kmemdup(src->thread.sme_state, + sme_state_size(src), + GFP_KERNEL); + if (!dst->thread.sme_state) { + kfree(dst->thread.sve_state); + dst->thread.sve_state = NULL; + return -ENOMEM; + } + + set_tsk_thread_flag(dst, TIF_SME); + dst->thread.svcr |= SVCR_ZA_MASK; + + return 0; +} + asmlinkage void ret_from_fork(void) asm("ret_from_fork"); int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) @@ -427,8 +436,6 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) * out-of-sync with the saved value. */ *task_user_tls(p) = read_sysreg(tpidr_el0); - if (system_supports_tpidr2()) - p->thread.tpidr2_el0 = read_sysreg_s(SYS_TPIDR2_EL0); if (system_supports_poe()) p->thread.por_el0 = read_sysreg_s(SYS_POR_EL0); @@ -441,13 +448,39 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) } /* + * Due to the AAPCS64 "ZA lazy saving scheme", PSTATE.ZA and + * TPIDR2 need to be manipulated as a pair, and either both + * need to be inherited or both need to be reset. + * + * Within a process, child threads must not inherit their + * parent's TPIDR2 value or they may clobber their parent's + * stack at some later point. + * + * When a process is fork()'d, the child must inherit ZA and + * TPIDR2 from its parent in case there was dormant ZA state. + * + * Use CLONE_VM to determine when the child will share the + * address space with the parent, and cannot safely inherit the + * state. + */ + if (system_supports_sme()) { + if (!(clone_flags & CLONE_VM)) { + p->thread.tpidr2_el0 = read_sysreg_s(SYS_TPIDR2_EL0); + ret = copy_thread_za(p, current); + if (ret) + return ret; + } else { + p->thread.tpidr2_el0 = 0; + WARN_ON_ONCE(p->thread.svcr & SVCR_ZA_MASK); + } + } + + /* * If a TLS pointer was passed to clone, use it for the new - * thread. We also reset TPIDR2 if it's in use. + * thread. */ - if (clone_flags & CLONE_SETTLS) { + if (clone_flags & CLONE_SETTLS) p->thread.uw.tp_value = tls; - p->thread.tpidr2_el0 = 0; - } ret = copy_thread_gcs(p, args); if (ret != 0) @@ -680,10 +713,11 @@ struct task_struct *__switch_to(struct task_struct *prev, gcs_thread_switch(next); /* - * Complete any pending TLB or cache maintenance on this CPU in case - * the thread migrates to a different CPU. - * This full barrier is also required by the membarrier system - * call. + * Complete any pending TLB or cache maintenance on this CPU in case the + * thread migrates to a different CPU. This full barrier is also + * required by the membarrier system call. Additionally it makes any + * in-progress pgtable writes visible to the table walker; See + * emit_pte_barriers(). */ dsb(ish); diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index f79b0d5f71ac..a360e52db02f 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -594,7 +594,7 @@ static int __fpr_get(struct task_struct *target, { struct user_fpsimd_state *uregs; - sve_sync_to_fpsimd(target); + fpsimd_sync_from_effective_state(target); uregs = &target->thread.uw.fpsimd_state; @@ -626,7 +626,7 @@ static int __fpr_set(struct task_struct *target, * Ensure target->thread.uw.fpsimd_state is up to date, so that a * short copyin can't resurrect stale data. */ - sve_sync_to_fpsimd(target); + fpsimd_sync_from_effective_state(target); newstate = target->thread.uw.fpsimd_state; @@ -653,7 +653,7 @@ static int fpr_set(struct task_struct *target, const struct user_regset *regset, if (ret) return ret; - sve_sync_from_fpsimd_zeropad(target); + fpsimd_sync_to_effective_state_zeropad(target); fpsimd_flush_task_state(target); return ret; @@ -775,6 +775,11 @@ static void sve_init_header_from_task(struct user_sve_header *header, task_type = ARM64_VEC_SVE; active = (task_type == type); + if (active && target->thread.fp_type == FP_STATE_SVE) + header->flags = SVE_PT_REGS_SVE; + else + header->flags = SVE_PT_REGS_FPSIMD; + switch (type) { case ARM64_VEC_SVE: if (test_tsk_thread_flag(target, TIF_SVE_VL_INHERIT)) @@ -789,19 +794,14 @@ static void sve_init_header_from_task(struct user_sve_header *header, return; } - if (active) { - if (target->thread.fp_type == FP_STATE_FPSIMD) { - header->flags |= SVE_PT_REGS_FPSIMD; - } else { - header->flags |= SVE_PT_REGS_SVE; - } - } - header->vl = task_get_vl(target, type); vq = sve_vq_from_vl(header->vl); header->max_vl = vec_max_vl(type); - header->size = SVE_PT_SIZE(vq, header->flags); + if (active) + header->size = SVE_PT_SIZE(vq, header->flags); + else + header->size = sizeof(header); header->max_size = SVE_PT_SIZE(sve_vq_from_vl(header->max_vl), SVE_PT_REGS_SVE); } @@ -820,18 +820,25 @@ static int sve_get_common(struct task_struct *target, unsigned int vq; unsigned long start, end; + if (target == current) + fpsimd_preserve_current_state(); + /* Header */ sve_init_header_from_task(&header, target, type); vq = sve_vq_from_vl(header.vl); membuf_write(&to, &header, sizeof(header)); - if (target == current) - fpsimd_preserve_current_state(); - BUILD_BUG_ON(SVE_PT_FPSIMD_OFFSET != sizeof(header)); BUILD_BUG_ON(SVE_PT_SVE_OFFSET != sizeof(header)); + /* + * When the requested vector type is not active, do not present data + * from the other mode to userspace. + */ + if (header.size == sizeof(header)) + return 0; + switch ((header.flags & SVE_PT_REGS_MASK)) { case SVE_PT_REGS_FPSIMD: return __fpr_get(target, regset, to); @@ -859,7 +866,7 @@ static int sve_get_common(struct task_struct *target, return membuf_zero(&to, end - start); default: - return 0; + BUILD_BUG(); } } @@ -883,6 +890,9 @@ static int sve_set_common(struct task_struct *target, struct user_sve_header header; unsigned int vq; unsigned long start, end; + bool fpsimd; + + fpsimd_flush_task_state(target); /* Header */ if (count < sizeof(header)) @@ -890,7 +900,16 @@ static int sve_set_common(struct task_struct *target, ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &header, 0, sizeof(header)); if (ret) - goto out; + return ret; + + /* + * Streaming SVE data is always stored and presented in SVE format. + * Require the user to provide SVE formatted data for consistency, and + * to avoid the risk that we configure the task into an invalid state. + */ + fpsimd = (header.flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD; + if (fpsimd && type == ARM64_VEC_SME) + return -EINVAL; /* * Apart from SVE_PT_REGS_MASK, all SVE_PT_* flags are consumed by @@ -899,7 +918,21 @@ static int sve_set_common(struct task_struct *target, ret = vec_set_vector_length(target, type, header.vl, ((unsigned long)header.flags & ~SVE_PT_REGS_MASK) << 16); if (ret) - goto out; + return ret; + + /* Allocate SME storage if necessary, preserving any existing ZA/ZT state */ + if (type == ARM64_VEC_SME) { + sme_alloc(target, false); + if (!target->thread.sme_state) + return -ENOMEM; + } + + /* Allocate SVE storage if necessary, zeroing any existing SVE state */ + if (!fpsimd) { + sve_alloc(target, true); + if (!target->thread.sve_state) + return -ENOMEM; + } /* * Actual VL set may be different from what the user asked @@ -910,81 +943,47 @@ static int sve_set_common(struct task_struct *target, /* Enter/exit streaming mode */ if (system_supports_sme()) { - u64 old_svcr = target->thread.svcr; - switch (type) { case ARM64_VEC_SVE: target->thread.svcr &= ~SVCR_SM_MASK; + set_tsk_thread_flag(target, TIF_SVE); break; case ARM64_VEC_SME: target->thread.svcr |= SVCR_SM_MASK; - - /* - * Disable traps and ensure there is SME storage but - * preserve any currently set values in ZA/ZT. - */ - sme_alloc(target, false); set_tsk_thread_flag(target, TIF_SME); break; default: WARN_ON_ONCE(1); - ret = -EINVAL; - goto out; + return -EINVAL; } - - /* - * If we switched then invalidate any existing SVE - * state and ensure there's storage. - */ - if (target->thread.svcr != old_svcr) - sve_alloc(target, true); } + /* Always zero V regs, FPSR, and FPCR */ + memset(¤t->thread.uw.fpsimd_state, 0, + sizeof(current->thread.uw.fpsimd_state)); + /* Registers: FPSIMD-only case */ BUILD_BUG_ON(SVE_PT_FPSIMD_OFFSET != sizeof(header)); - if ((header.flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD) { - ret = __fpr_set(target, regset, pos, count, kbuf, ubuf, - SVE_PT_FPSIMD_OFFSET); + if (fpsimd) { clear_tsk_thread_flag(target, TIF_SVE); target->thread.fp_type = FP_STATE_FPSIMD; - goto out; + ret = __fpr_set(target, regset, pos, count, kbuf, ubuf, + SVE_PT_FPSIMD_OFFSET); + return ret; } - /* - * Otherwise: no registers or full SVE case. For backwards - * compatibility reasons we treat empty flags as SVE registers. - */ + /* Otherwise: no registers or full SVE case. */ + + target->thread.fp_type = FP_STATE_SVE; /* * If setting a different VL from the requested VL and there is * register data, the data layout will be wrong: don't even * try to set the registers in this case. */ - if (count && vq != sve_vq_from_vl(header.vl)) { - ret = -EIO; - goto out; - } - - sve_alloc(target, true); - if (!target->thread.sve_state) { - ret = -ENOMEM; - clear_tsk_thread_flag(target, TIF_SVE); - target->thread.fp_type = FP_STATE_FPSIMD; - goto out; - } - - /* - * Ensure target->thread.sve_state is up to date with target's - * FPSIMD regs, so that a short copyin leaves trailing - * registers unmodified. Only enable SVE if we are - * configuring normal SVE, a system with streaming SVE may not - * have normal SVE. - */ - fpsimd_sync_to_sve(target); - if (type == ARM64_VEC_SVE) - set_tsk_thread_flag(target, TIF_SVE); - target->thread.fp_type = FP_STATE_SVE; + if (count && vq != sve_vq_from_vl(header.vl)) + return -EIO; BUILD_BUG_ON(SVE_PT_SVE_OFFSET != sizeof(header)); start = SVE_PT_SVE_OFFSET; @@ -993,7 +992,7 @@ static int sve_set_common(struct task_struct *target, target->thread.sve_state, start, end); if (ret) - goto out; + return ret; start = end; end = SVE_PT_SVE_FPSR_OFFSET(vq); @@ -1009,8 +1008,6 @@ static int sve_set_common(struct task_struct *target, &target->thread.uw.fpsimd_state.fpsr, start, end); -out: - fpsimd_flush_task_state(target); return ret; } diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 85104587f849..77c7926a4df6 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -169,7 +169,7 @@ static void __init smp_build_mpidr_hash(void) static void __init setup_machine_fdt(phys_addr_t dt_phys) { - int size; + int size = 0; void *dt_virt = fixmap_remap_fdt(dt_phys, &size, PAGE_KERNEL); const char *name; @@ -182,10 +182,10 @@ static void __init setup_machine_fdt(phys_addr_t dt_phys) */ if (!early_init_dt_scan(dt_virt, dt_phys)) { pr_crit("\n" - "Error: invalid device tree blob at physical address %pa (virtual address 0x%px)\n" - "The dtb must be 8-byte aligned and must not exceed 2 MB in size\n" - "\nPlease check your bootloader.", - &dt_phys, dt_virt); + "Error: invalid device tree blob: PA=%pa, VA=%px, size=%d bytes\n" + "The dtb must be 8-byte aligned and must not exceed 2 MB in size.\n" + "\nPlease check your bootloader.\n", + &dt_phys, dt_virt, size); /* * Note that in this _really_ early stage we cannot even BUG() diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index a7c37afb4ebe..417140cd399b 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -250,6 +250,8 @@ static int preserve_fpsimd_context(struct fpsimd_context __user *ctx) ¤t->thread.uw.fpsimd_state; int err; + fpsimd_sync_from_effective_state(current); + /* copy the FP and status/control registers */ err = __copy_to_user(ctx->vregs, fpsimd->vregs, sizeof(fpsimd->vregs)); __put_user_error(fpsimd->fpsr, &ctx->fpsr, err); @@ -262,37 +264,46 @@ static int preserve_fpsimd_context(struct fpsimd_context __user *ctx) return err ? -EFAULT : 0; } -static int restore_fpsimd_context(struct user_ctxs *user) +static int read_fpsimd_context(struct user_fpsimd_state *fpsimd, + struct user_ctxs *user) { - struct user_fpsimd_state fpsimd; - int err = 0; + int err; /* check the size information */ if (user->fpsimd_size != sizeof(struct fpsimd_context)) return -EINVAL; /* copy the FP and status/control registers */ - err = __copy_from_user(fpsimd.vregs, &(user->fpsimd->vregs), - sizeof(fpsimd.vregs)); - __get_user_error(fpsimd.fpsr, &(user->fpsimd->fpsr), err); - __get_user_error(fpsimd.fpcr, &(user->fpsimd->fpcr), err); + err = __copy_from_user(fpsimd->vregs, &(user->fpsimd->vregs), + sizeof(fpsimd->vregs)); + __get_user_error(fpsimd->fpsr, &(user->fpsimd->fpsr), err); + __get_user_error(fpsimd->fpcr, &(user->fpsimd->fpcr), err); + + return err ? -EFAULT : 0; +} + +static int restore_fpsimd_context(struct user_ctxs *user) +{ + struct user_fpsimd_state fpsimd; + int err; + + err = read_fpsimd_context(&fpsimd, user); + if (err) + return err; clear_thread_flag(TIF_SVE); + current->thread.svcr &= ~SVCR_SM_MASK; current->thread.fp_type = FP_STATE_FPSIMD; /* load the hardware registers from the fpsimd_state structure */ - if (!err) - fpsimd_update_current_state(&fpsimd); - - return err ? -EFAULT : 0; + fpsimd_update_current_state(&fpsimd); + return 0; } static int preserve_fpmr_context(struct fpmr_context __user *ctx) { int err = 0; - current->thread.uw.fpmr = read_sysreg_s(SYS_FPMR); - __put_user_error(FPMR_MAGIC, &ctx->head.magic, err); __put_user_error(sizeof(*ctx), &ctx->head.size, err); __put_user_error(current->thread.uw.fpmr, &ctx->fpmr, err); @@ -310,7 +321,7 @@ static int restore_fpmr_context(struct user_ctxs *user) __get_user_error(fpmr, &user->fpmr->fpmr, err); if (!err) - write_sysreg_s(fpmr, SYS_FPMR); + current->thread.uw.fpmr = fpmr; return err; } @@ -372,11 +383,6 @@ static int preserve_sve_context(struct sve_context __user *ctx) err |= __copy_to_user(&ctx->__reserved, reserved, sizeof(reserved)); if (vq) { - /* - * This assumes that the SVE state has already been saved to - * the task struct by calling the function - * fpsimd_signal_preserve_current_state(). - */ err |= __copy_to_user((char __user *)ctx + SVE_SIG_REGS_OFFSET, current->thread.sve_state, SVE_SIG_REGS_SIZE(vq)); @@ -391,6 +397,7 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user) unsigned int vl, vq; struct user_fpsimd_state fpsimd; u16 user_vl, flags; + bool sm; if (user->sve_size < sizeof(*user->sve)) return -EINVAL; @@ -400,7 +407,8 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user) if (err) return err; - if (flags & SVE_SIG_FLAG_SM) { + sm = flags & SVE_SIG_FLAG_SM; + if (sm) { if (!system_supports_sme()) return -EINVAL; @@ -420,28 +428,23 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user) if (user_vl != vl) return -EINVAL; - if (user->sve_size == sizeof(*user->sve)) { - clear_thread_flag(TIF_SVE); - current->thread.svcr &= ~SVCR_SM_MASK; - current->thread.fp_type = FP_STATE_FPSIMD; - goto fpsimd_only; - } + /* + * Non-streaming SVE state may be preserved without an SVE payload, in + * which case the SVE context only has a header with VL==0, and all + * state can be restored from the FPSIMD context. + * + * Streaming SVE state is always preserved with an SVE payload. For + * consistency and robustness, reject restoring streaming SVE state + * without an SVE payload. + */ + if (!sm && user->sve_size == sizeof(*user->sve)) + return restore_fpsimd_context(user); vq = sve_vq_from_vl(vl); if (user->sve_size < SVE_SIG_CONTEXT_SIZE(vq)) return -EINVAL; - /* - * Careful: we are about __copy_from_user() directly into - * thread.sve_state with preemption enabled, so protection is - * needed to prevent a racing context switch from writing stale - * registers back over the new data. - */ - - fpsimd_flush_task_state(current); - /* From now, fpsimd_thread_switch() won't touch thread.sve_state */ - sve_alloc(current, true); if (!current->thread.sve_state) { clear_thread_flag(TIF_SVE); @@ -461,19 +464,14 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user) set_thread_flag(TIF_SVE); current->thread.fp_type = FP_STATE_SVE; -fpsimd_only: - /* copy the FP and status/control registers */ - /* restore_sigframe() already checked that user->fpsimd != NULL. */ - err = __copy_from_user(fpsimd.vregs, user->fpsimd->vregs, - sizeof(fpsimd.vregs)); - __get_user_error(fpsimd.fpsr, &user->fpsimd->fpsr, err); - __get_user_error(fpsimd.fpcr, &user->fpsimd->fpcr, err); + err = read_fpsimd_context(&fpsimd, user); + if (err) + return err; - /* load the hardware registers from the fpsimd_state structure */ - if (!err) - fpsimd_update_current_state(&fpsimd); + /* Merge the FPSIMD registers into the SVE state */ + fpsimd_update_current_state(&fpsimd); - return err ? -EFAULT : 0; + return 0; } #else /* ! CONFIG_ARM64_SVE */ @@ -493,13 +491,12 @@ extern int preserve_sve_context(void __user *ctx); static int preserve_tpidr2_context(struct tpidr2_context __user *ctx) { + u64 tpidr2_el0 = read_sysreg_s(SYS_TPIDR2_EL0); int err = 0; - current->thread.tpidr2_el0 = read_sysreg_s(SYS_TPIDR2_EL0); - __put_user_error(TPIDR2_MAGIC, &ctx->head.magic, err); __put_user_error(sizeof(*ctx), &ctx->head.size, err); - __put_user_error(current->thread.tpidr2_el0, &ctx->tpidr2, err); + __put_user_error(tpidr2_el0, &ctx->tpidr2, err); return err; } @@ -541,11 +538,6 @@ static int preserve_za_context(struct za_context __user *ctx) err |= __copy_to_user(&ctx->__reserved, reserved, sizeof(reserved)); if (vq) { - /* - * This assumes that the ZA state has already been saved to - * the task struct by calling the function - * fpsimd_signal_preserve_current_state(). - */ err |= __copy_to_user((char __user *)ctx + ZA_SIG_REGS_OFFSET, current->thread.sme_state, ZA_SIG_REGS_SIZE(vq)); @@ -580,16 +572,6 @@ static int restore_za_context(struct user_ctxs *user) if (user->za_size < ZA_SIG_CONTEXT_SIZE(vq)) return -EINVAL; - /* - * Careful: we are about __copy_from_user() directly into - * thread.sme_state with preemption enabled, so protection is - * needed to prevent a racing context switch from writing stale - * registers back over the new data. - */ - - fpsimd_flush_task_state(current); - /* From now, fpsimd_thread_switch() won't touch thread.sve_state */ - sme_alloc(current, true); if (!current->thread.sme_state) { current->thread.svcr &= ~SVCR_ZA_MASK; @@ -627,11 +609,6 @@ static int preserve_zt_context(struct zt_context __user *ctx) BUILD_BUG_ON(sizeof(ctx->__reserved) != sizeof(reserved)); err |= __copy_to_user(&ctx->__reserved, reserved, sizeof(reserved)); - /* - * This assumes that the ZT state has already been saved to - * the task struct by calling the function - * fpsimd_signal_preserve_current_state(). - */ err |= __copy_to_user((char __user *)ctx + ZT_SIG_REGS_OFFSET, thread_zt_state(¤t->thread), ZT_SIG_REGS_SIZE(1)); @@ -657,16 +634,6 @@ static int restore_zt_context(struct user_ctxs *user) if (nregs != 1) return -EINVAL; - /* - * Careful: we are about __copy_from_user() directly into - * thread.zt_state with preemption enabled, so protection is - * needed to prevent a racing context switch from writing stale - * registers back over the new data. - */ - - fpsimd_flush_task_state(current); - /* From now, fpsimd_thread_switch() won't touch ZT in thread state */ - err = __copy_from_user(thread_zt_state(¤t->thread), (char __user const *)user->zt + ZT_SIG_REGS_OFFSET, @@ -1017,6 +984,8 @@ static int restore_sigframe(struct pt_regs *regs, */ forget_syscall(regs); + fpsimd_save_and_flush_current_state(); + err |= !valid_user_regs(®s->user_regs, current); if (err == 0) err = parse_user_sigframe(&user, sf); @@ -1507,21 +1476,9 @@ static int setup_return(struct pt_regs *regs, struct ksignal *ksig, /* Signal handlers are invoked with ZA and streaming mode disabled */ if (system_supports_sme()) { - /* - * If we were in streaming mode the saved register - * state was SVE but we will exit SM and use the - * FPSIMD register state - flush the saved FPSIMD - * register state in case it gets loaded. - */ - if (current->thread.svcr & SVCR_SM_MASK) { - memset(¤t->thread.uw.fpsimd_state, 0, - sizeof(current->thread.uw.fpsimd_state)); - current->thread.fp_type = FP_STATE_FPSIMD; - } - - current->thread.svcr &= ~(SVCR_ZA_MASK | - SVCR_SM_MASK); - sme_smstop(); + task_smstop_sm(current); + current->thread.svcr &= ~SVCR_ZA_MASK; + write_sysreg_s(0, SYS_TPIDR2_EL0); } return 0; @@ -1535,7 +1492,7 @@ static int setup_rt_frame(int usig, struct ksignal *ksig, sigset_t *set, struct user_access_state ua_state; int err = 0; - fpsimd_signal_preserve_current_state(); + fpsimd_save_and_flush_current_state(); if (get_sigframe(&user, ksig, regs)) return 1; diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c index 81e798b6dada..bb3b526ff43f 100644 --- a/arch/arm64/kernel/signal32.c +++ b/arch/arm64/kernel/signal32.c @@ -103,7 +103,7 @@ static int compat_preserve_vfp_context(struct compat_vfp_sigframe __user *frame) * Note that this also saves V16-31, which aren't visible * in AArch32. */ - fpsimd_signal_preserve_current_state(); + fpsimd_save_and_flush_current_state(); /* Place structure header on the stack */ __put_user_error(magic, &frame->magic, err); @@ -169,14 +169,17 @@ static int compat_restore_vfp_context(struct compat_vfp_sigframe __user *frame) fpsimd.fpsr = fpscr & VFP_FPSCR_STAT_MASK; fpsimd.fpcr = fpscr & VFP_FPSCR_CTRL_MASK; + if (err) + return -EFAULT; + /* * We don't need to touch the exception register, so * reload the hardware state. */ - if (!err) - fpsimd_update_current_state(&fpsimd); + fpsimd_save_and_flush_current_state(); + current->thread.uw.fpsimd_state = fpsimd; - return err ? -EFAULT : 0; + return 0; } static int compat_restore_sigframe(struct pt_regs *regs, diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index e73326bd3ff7..e4a525a865c1 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -249,9 +249,9 @@ SECTIONS __inittext_end = .; __initdata_begin = .; - init_idmap_pg_dir = .; + __pi_init_idmap_pg_dir = .; . += INIT_IDMAP_DIR_SIZE; - init_idmap_pg_end = .; + __pi_init_idmap_pg_end = .; .init.data : { INIT_DATA @@ -319,11 +319,12 @@ SECTIONS /* start of zero-init region */ BSS_SECTION(SBSS_ALIGN, 0, 0) + __pi___bss_start = __bss_start; . = ALIGN(PAGE_SIZE); - init_pg_dir = .; + __pi_init_pg_dir = .; . += INIT_DIR_SIZE; - init_pg_end = .; + __pi_init_pg_end = .; /* end of zero-init region */ . += SZ_4K; /* stack for the early C runtime */ @@ -332,6 +333,7 @@ SECTIONS . = ALIGN(SEGMENT_ALIGN); __pecoff_data_size = ABSOLUTE(. - __initdata_begin); _end = .; + __pi__end = .; STABS_DEBUG DWARF_DEBUG diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index cfe8cb8ba1cc..0c8737f4f2ce 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -129,7 +129,7 @@ pte_t huge_ptep_get(struct mm_struct *mm, unsigned long addr, pte_t *ptep) if (!pte_present(orig_pte) || !pte_cont(orig_pte)) return orig_pte; - ncontig = num_contig_ptes(page_size(pte_page(orig_pte)), &pgsize); + ncontig = find_num_contig(mm, addr, ptep, &pgsize); for (i = 0; i < ncontig; i++, ptep++) { pte_t pte = __ptep_get(ptep); @@ -159,12 +159,11 @@ static pte_t get_clear_contig(struct mm_struct *mm, pte_t pte, tmp_pte; bool present; - pte = __ptep_get_and_clear(mm, addr, ptep); + pte = __ptep_get_and_clear_anysz(mm, ptep, pgsize); present = pte_present(pte); while (--ncontig) { ptep++; - addr += pgsize; - tmp_pte = __ptep_get_and_clear(mm, addr, ptep); + tmp_pte = __ptep_get_and_clear_anysz(mm, ptep, pgsize); if (present) { if (pte_dirty(tmp_pte)) pte = pte_mkdirty(pte); @@ -183,8 +182,9 @@ static pte_t get_clear_contig_flush(struct mm_struct *mm, { pte_t orig_pte = get_clear_contig(mm, addr, ptep, pgsize, ncontig); struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0); + unsigned long end = addr + (pgsize * ncontig); - flush_tlb_range(&vma, addr, addr + (pgsize * ncontig)); + __flush_hugetlb_tlb_range(&vma, addr, end, pgsize, true); return orig_pte; } @@ -207,9 +207,12 @@ static void clear_flush(struct mm_struct *mm, unsigned long i, saddr = addr; for (i = 0; i < ncontig; i++, addr += pgsize, ptep++) - __ptep_get_and_clear(mm, addr, ptep); + __ptep_get_and_clear_anysz(mm, ptep, pgsize); - flush_tlb_range(&vma, saddr, addr); + if (mm == &init_mm) + flush_tlb_kernel_range(saddr, addr); + else + __flush_hugetlb_tlb_range(&vma, saddr, addr, pgsize, true); } void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, @@ -218,30 +221,20 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, size_t pgsize; int i; int ncontig; - unsigned long pfn, dpfn; - pgprot_t hugeprot; ncontig = num_contig_ptes(sz, &pgsize); if (!pte_present(pte)) { for (i = 0; i < ncontig; i++, ptep++, addr += pgsize) - __set_ptes(mm, addr, ptep, pte, 1); - return; - } - - if (!pte_cont(pte)) { - __set_ptes(mm, addr, ptep, pte, 1); + __set_ptes_anysz(mm, ptep, pte, 1, pgsize); return; } - pfn = pte_pfn(pte); - dpfn = pgsize >> PAGE_SHIFT; - hugeprot = pte_pgprot(pte); - - clear_flush(mm, addr, ptep, pgsize, ncontig); + /* Only need to "break" if transitioning valid -> valid. */ + if (pte_cont(pte) && pte_valid(__ptep_get(ptep))) + clear_flush(mm, addr, ptep, pgsize, ncontig); - for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn) - __set_ptes(mm, addr, ptep, pfn_pte(pfn, hugeprot), 1); + __set_ptes_anysz(mm, ptep, pte, ncontig, pgsize); } pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, @@ -431,23 +424,23 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, pte_t pte, int dirty) { - int ncontig, i; + int ncontig; size_t pgsize = 0; - unsigned long pfn = pte_pfn(pte), dpfn; struct mm_struct *mm = vma->vm_mm; - pgprot_t hugeprot; pte_t orig_pte; + VM_WARN_ON(!pte_present(pte)); + if (!pte_cont(pte)) return __ptep_set_access_flags(vma, addr, ptep, pte, dirty); - ncontig = find_num_contig(mm, addr, ptep, &pgsize); - dpfn = pgsize >> PAGE_SHIFT; + ncontig = num_contig_ptes(huge_page_size(hstate_vma(vma)), &pgsize); if (!__cont_access_flags_changed(ptep, pte, ncontig)) return 0; orig_pte = get_clear_contig_flush(mm, addr, ptep, pgsize, ncontig); + VM_WARN_ON(!pte_present(orig_pte)); /* Make sure we don't lose the dirty or young state */ if (pte_dirty(orig_pte)) @@ -456,38 +449,31 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma, if (pte_young(orig_pte)) pte = pte_mkyoung(pte); - hugeprot = pte_pgprot(pte); - for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn) - __set_ptes(mm, addr, ptep, pfn_pte(pfn, hugeprot), 1); - + __set_ptes_anysz(mm, ptep, pte, ncontig, pgsize); return 1; } void huge_ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - unsigned long pfn, dpfn; - pgprot_t hugeprot; - int ncontig, i; + int ncontig; size_t pgsize; pte_t pte; - if (!pte_cont(__ptep_get(ptep))) { + pte = __ptep_get(ptep); + VM_WARN_ON(!pte_present(pte)); + + if (!pte_cont(pte)) { __ptep_set_wrprotect(mm, addr, ptep); return; } ncontig = find_num_contig(mm, addr, ptep, &pgsize); - dpfn = pgsize >> PAGE_SHIFT; pte = get_clear_contig_flush(mm, addr, ptep, pgsize, ncontig); pte = pte_wrprotect(pte); - hugeprot = pte_pgprot(pte); - pfn = pte_pfn(pte); - - for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn) - __set_ptes(mm, addr, ptep, pfn_pte(pfn, hugeprot), 1); + __set_ptes_anysz(mm, ptep, pte, ncontig, pgsize); } pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, @@ -497,10 +483,7 @@ pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, size_t pgsize; int ncontig; - if (!pte_cont(__ptep_get(ptep))) - return ptep_clear_flush(vma, addr, ptep); - - ncontig = find_num_contig(mm, addr, ptep, &pgsize); + ncontig = num_contig_ptes(huge_page_size(hstate_vma(vma)), &pgsize); return get_clear_contig_flush(mm, addr, ptep, pgsize, ncontig); } diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index b99bf3980fc6..0c8c35dd645e 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -275,26 +275,6 @@ void __init arm64_memblock_init(void) } } - if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) { - extern u16 memstart_offset_seed; - u64 mmfr0 = read_cpuid(ID_AA64MMFR0_EL1); - int parange = cpuid_feature_extract_unsigned_field( - mmfr0, ID_AA64MMFR0_EL1_PARANGE_SHIFT); - s64 range = linear_region_size - - BIT(id_aa64mmfr0_parange_to_phys_shift(parange)); - - /* - * If the size of the linear region exceeds, by a sufficient - * margin, the size of the region that the physical memory can - * span, randomize the linear region as well. - */ - if (memstart_offset_seed > 0 && range >= (s64)ARM64_MEMSTART_ALIGN) { - range /= ARM64_MEMSTART_ALIGN; - memstart_addr -= ARM64_MEMSTART_ALIGN * - ((range * memstart_offset_seed) >> 16); - } - } - /* * Register the kernel text, kernel data, initrd, and initial * pagetables with memblock. diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c index 39fd1f7ff02a..04d4a8f676db 100644 --- a/arch/arm64/mm/pageattr.c +++ b/arch/arm64/mm/pageattr.c @@ -96,8 +96,8 @@ static int change_memory_common(unsigned long addr, int numpages, * we are operating on does not result in such splitting. * * Let's restrict ourselves to mappings created by vmalloc (or vmap). - * Those are guaranteed to consist entirely of page mappings, and - * splitting is never needed. + * Disallow VM_ALLOW_HUGE_VMAP mappings to guarantee that only page + * mappings are updated and splitting is never needed. * * So check whether the [addr, addr + size) interval is entirely * covered by precisely one VM area that has the VM_ALLOC flag set. @@ -105,7 +105,7 @@ static int change_memory_common(unsigned long addr, int numpages, area = find_vm_area((void *)addr); if (!area || end > (unsigned long)kasan_reset_tag(area->addr) + area->size || - !(area->flags & VM_ALLOC)) + ((area->flags & (VM_ALLOC | VM_ALLOW_HUGE_VMAP)) != VM_ALLOC)) return -EINVAL; if (!numpages) diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index fb30c8804f87..80d470aa469d 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -512,26 +512,11 @@ alternative_else_nop_endif ubfx x1, x1, #ID_AA64MMFR3_EL1_S1PIE_SHIFT, #4 cbz x1, .Lskip_indirection - /* - * The PROT_* macros describing the various memory types may resolve to - * C expressions if they include the PTE_MAYBE_* macros, and so they - * can only be used from C code. The PIE_E* constants below are also - * defined in terms of those macros, but will mask out those - * PTE_MAYBE_* constants, whether they are set or not. So #define them - * as 0x0 here so we can evaluate the PIE_E* constants in asm context. - */ - -#define PTE_MAYBE_NG 0 -#define PTE_MAYBE_SHARED 0 - - mov_q x0, PIE_E0 + mov_q x0, PIE_E0_ASM msr REG_PIRE0_EL1, x0 - mov_q x0, PIE_E1 + mov_q x0, PIE_E1_ASM msr REG_PIR_EL1, x0 -#undef PTE_MAYBE_NG -#undef PTE_MAYBE_SHARED - orr tcr2, tcr2, TCR2_EL1_PIE msr REG_TCR2_EL1, x0 diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 634d78422adb..da8b89dd2910 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -2113,7 +2113,7 @@ bool bpf_jit_supports_subprog_tailcalls(void) } static void invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l, - int args_off, int retval_off, int run_ctx_off, + int bargs_off, int retval_off, int run_ctx_off, bool save_ret) { __le32 *branch; @@ -2155,7 +2155,7 @@ static void invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l, branch = ctx->image + ctx->idx; emit(A64_NOP, ctx); - emit(A64_ADD_I(1, A64_R(0), A64_SP, args_off), ctx); + emit(A64_ADD_I(1, A64_R(0), A64_SP, bargs_off), ctx); if (!p->jited) emit_addr_mov_i64(A64_R(1), (const u64)p->insnsi, ctx); @@ -2180,7 +2180,7 @@ static void invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l, } static void invoke_bpf_mod_ret(struct jit_ctx *ctx, struct bpf_tramp_links *tl, - int args_off, int retval_off, int run_ctx_off, + int bargs_off, int retval_off, int run_ctx_off, __le32 **branches) { int i; @@ -2190,7 +2190,7 @@ static void invoke_bpf_mod_ret(struct jit_ctx *ctx, struct bpf_tramp_links *tl, */ emit(A64_STR64I(A64_ZR, A64_SP, retval_off), ctx); for (i = 0; i < tl->nr_links; i++) { - invoke_bpf_prog(ctx, tl->links[i], args_off, retval_off, + invoke_bpf_prog(ctx, tl->links[i], bargs_off, retval_off, run_ctx_off, true); /* if (*(u64 *)(sp + retval_off) != 0) * goto do_fexit; @@ -2204,23 +2204,125 @@ static void invoke_bpf_mod_ret(struct jit_ctx *ctx, struct bpf_tramp_links *tl, } } -static void save_args(struct jit_ctx *ctx, int args_off, int nregs) +struct arg_aux { + /* how many args are passed through registers, the rest of the args are + * passed through stack + */ + int args_in_regs; + /* how many registers are used to pass arguments */ + int regs_for_args; + /* how much stack is used for additional args passed to bpf program + * that did not fit in original function registers + */ + int bstack_for_args; + /* home much stack is used for additional args passed to the + * original function when called from trampoline (this one needs + * arguments to be properly aligned) + */ + int ostack_for_args; +}; + +static int calc_arg_aux(const struct btf_func_model *m, + struct arg_aux *a) { - int i; + int stack_slots, nregs, slots, i; + + /* verifier ensures m->nr_args <= MAX_BPF_FUNC_ARGS */ + for (i = 0, nregs = 0; i < m->nr_args; i++) { + slots = (m->arg_size[i] + 7) / 8; + if (nregs + slots <= 8) /* passed through register ? */ + nregs += slots; + else + break; + } + + a->args_in_regs = i; + a->regs_for_args = nregs; + a->ostack_for_args = 0; + a->bstack_for_args = 0; + + /* the rest arguments are passed through stack */ + for (; i < m->nr_args; i++) { + /* We can not know for sure about exact alignment needs for + * struct passed on stack, so deny those + */ + if (m->arg_flags[i] & BTF_FMODEL_STRUCT_ARG) + return -ENOTSUPP; + stack_slots = (m->arg_size[i] + 7) / 8; + a->bstack_for_args += stack_slots * 8; + a->ostack_for_args = a->ostack_for_args + stack_slots * 8; + } + + return 0; +} - for (i = 0; i < nregs; i++) { - emit(A64_STR64I(i, A64_SP, args_off), ctx); - args_off += 8; +static void clear_garbage(struct jit_ctx *ctx, int reg, int effective_bytes) +{ + if (effective_bytes) { + int garbage_bits = 64 - 8 * effective_bytes; +#ifdef CONFIG_CPU_BIG_ENDIAN + /* garbage bits are at the right end */ + emit(A64_LSR(1, reg, reg, garbage_bits), ctx); + emit(A64_LSL(1, reg, reg, garbage_bits), ctx); +#else + /* garbage bits are at the left end */ + emit(A64_LSL(1, reg, reg, garbage_bits), ctx); + emit(A64_LSR(1, reg, reg, garbage_bits), ctx); +#endif } } -static void restore_args(struct jit_ctx *ctx, int args_off, int nregs) +static void save_args(struct jit_ctx *ctx, int bargs_off, int oargs_off, + const struct btf_func_model *m, + const struct arg_aux *a, + bool for_call_origin) { int i; + int reg; + int doff; + int soff; + int slots; + u8 tmp = bpf2a64[TMP_REG_1]; + + /* store arguments to the stack for the bpf program, or restore + * arguments from stack for the original function + */ + for (reg = 0; reg < a->regs_for_args; reg++) { + emit(for_call_origin ? + A64_LDR64I(reg, A64_SP, bargs_off) : + A64_STR64I(reg, A64_SP, bargs_off), + ctx); + bargs_off += 8; + } + + soff = 32; /* on stack arguments start from FP + 32 */ + doff = (for_call_origin ? oargs_off : bargs_off); + + /* save on stack arguments */ + for (i = a->args_in_regs; i < m->nr_args; i++) { + slots = (m->arg_size[i] + 7) / 8; + /* verifier ensures arg_size <= 16, so slots equals 1 or 2 */ + while (slots-- > 0) { + emit(A64_LDR64I(tmp, A64_FP, soff), ctx); + /* if there is unused space in the last slot, clear + * the garbage contained in the space. + */ + if (slots == 0 && !for_call_origin) + clear_garbage(ctx, tmp, m->arg_size[i] % 8); + emit(A64_STR64I(tmp, A64_SP, doff), ctx); + soff += 8; + doff += 8; + } + } +} - for (i = 0; i < nregs; i++) { - emit(A64_LDR64I(i, A64_SP, args_off), ctx); - args_off += 8; +static void restore_args(struct jit_ctx *ctx, int bargs_off, int nregs) +{ + int reg; + + for (reg = 0; reg < nregs; reg++) { + emit(A64_LDR64I(reg, A64_SP, bargs_off), ctx); + bargs_off += 8; } } @@ -2243,17 +2345,21 @@ static bool is_struct_ops_tramp(const struct bpf_tramp_links *fentry_links) */ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, struct bpf_tramp_links *tlinks, void *func_addr, - int nregs, u32 flags) + const struct btf_func_model *m, + const struct arg_aux *a, + u32 flags) { int i; int stack_size; int retaddr_off; int regs_off; int retval_off; - int args_off; - int nregs_off; + int bargs_off; + int nfuncargs_off; int ip_off; int run_ctx_off; + int oargs_off; + int nfuncargs; struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY]; struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT]; struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN]; @@ -2262,31 +2368,38 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, bool is_struct_ops = is_struct_ops_tramp(fentry); /* trampoline stack layout: - * [ parent ip ] - * [ FP ] - * SP + retaddr_off [ self ip ] - * [ FP ] + * [ parent ip ] + * [ FP ] + * SP + retaddr_off [ self ip ] + * [ FP ] * - * [ padding ] align SP to multiples of 16 + * [ padding ] align SP to multiples of 16 * - * [ x20 ] callee saved reg x20 - * SP + regs_off [ x19 ] callee saved reg x19 + * [ x20 ] callee saved reg x20 + * SP + regs_off [ x19 ] callee saved reg x19 * - * SP + retval_off [ return value ] BPF_TRAMP_F_CALL_ORIG or - * BPF_TRAMP_F_RET_FENTRY_RET + * SP + retval_off [ return value ] BPF_TRAMP_F_CALL_ORIG or + * BPF_TRAMP_F_RET_FENTRY_RET + * [ arg reg N ] + * [ ... ] + * SP + bargs_off [ arg reg 1 ] for bpf * - * [ arg reg N ] - * [ ... ] - * SP + args_off [ arg reg 1 ] + * SP + nfuncargs_off [ arg regs count ] * - * SP + nregs_off [ arg regs count ] + * SP + ip_off [ traced function ] BPF_TRAMP_F_IP_ARG flag * - * SP + ip_off [ traced function ] BPF_TRAMP_F_IP_ARG flag + * SP + run_ctx_off [ bpf_tramp_run_ctx ] * - * SP + run_ctx_off [ bpf_tramp_run_ctx ] + * [ stack arg N ] + * [ ... ] + * SP + oargs_off [ stack arg 1 ] for original func */ stack_size = 0; + oargs_off = stack_size; + if (flags & BPF_TRAMP_F_CALL_ORIG) + stack_size += a->ostack_for_args; + run_ctx_off = stack_size; /* room for bpf_tramp_run_ctx */ stack_size += round_up(sizeof(struct bpf_tramp_run_ctx), 8); @@ -2296,13 +2409,14 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, if (flags & BPF_TRAMP_F_IP_ARG) stack_size += 8; - nregs_off = stack_size; + nfuncargs_off = stack_size; /* room for args count */ stack_size += 8; - args_off = stack_size; + bargs_off = stack_size; /* room for args */ - stack_size += nregs * 8; + nfuncargs = a->regs_for_args + a->bstack_for_args / 8; + stack_size += 8 * nfuncargs; /* room for return value */ retval_off = stack_size; @@ -2349,11 +2463,11 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, } /* save arg regs count*/ - emit(A64_MOVZ(1, A64_R(10), nregs, 0), ctx); - emit(A64_STR64I(A64_R(10), A64_SP, nregs_off), ctx); + emit(A64_MOVZ(1, A64_R(10), nfuncargs, 0), ctx); + emit(A64_STR64I(A64_R(10), A64_SP, nfuncargs_off), ctx); - /* save arg regs */ - save_args(ctx, args_off, nregs); + /* save args for bpf */ + save_args(ctx, bargs_off, oargs_off, m, a, false); /* save callee saved registers */ emit(A64_STR64I(A64_R(19), A64_SP, regs_off), ctx); @@ -2369,7 +2483,7 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, } for (i = 0; i < fentry->nr_links; i++) - invoke_bpf_prog(ctx, fentry->links[i], args_off, + invoke_bpf_prog(ctx, fentry->links[i], bargs_off, retval_off, run_ctx_off, flags & BPF_TRAMP_F_RET_FENTRY_RET); @@ -2379,12 +2493,13 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, if (!branches) return -ENOMEM; - invoke_bpf_mod_ret(ctx, fmod_ret, args_off, retval_off, + invoke_bpf_mod_ret(ctx, fmod_ret, bargs_off, retval_off, run_ctx_off, branches); } if (flags & BPF_TRAMP_F_CALL_ORIG) { - restore_args(ctx, args_off, nregs); + /* save args for original func */ + save_args(ctx, bargs_off, oargs_off, m, a, true); /* call original func */ emit(A64_LDR64I(A64_R(10), A64_SP, retaddr_off), ctx); emit(A64_ADR(A64_LR, AARCH64_INSN_SIZE * 2), ctx); @@ -2403,7 +2518,7 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, } for (i = 0; i < fexit->nr_links; i++) - invoke_bpf_prog(ctx, fexit->links[i], args_off, retval_off, + invoke_bpf_prog(ctx, fexit->links[i], bargs_off, retval_off, run_ctx_off, false); if (flags & BPF_TRAMP_F_CALL_ORIG) { @@ -2417,7 +2532,7 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, } if (flags & BPF_TRAMP_F_RESTORE_REGS) - restore_args(ctx, args_off, nregs); + restore_args(ctx, bargs_off, a->regs_for_args); /* restore callee saved register x19 and x20 */ emit(A64_LDR64I(A64_R(19), A64_SP, regs_off), ctx); @@ -2454,21 +2569,6 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, return ctx->idx; } -static int btf_func_model_nregs(const struct btf_func_model *m) -{ - int nregs = m->nr_args; - int i; - - /* extra registers needed for struct argument */ - for (i = 0; i < MAX_BPF_FUNC_ARGS; i++) { - /* The arg_size is at most 16 bytes, enforced by the verifier. */ - if (m->arg_flags[i] & BTF_FMODEL_STRUCT_ARG) - nregs += (m->arg_size[i] + 7) / 8 - 1; - } - - return nregs; -} - int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags, struct bpf_tramp_links *tlinks, void *func_addr) { @@ -2477,14 +2577,14 @@ int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags, .idx = 0, }; struct bpf_tramp_image im; - int nregs, ret; + struct arg_aux aaux; + int ret; - nregs = btf_func_model_nregs(m); - /* the first 8 registers are used for arguments */ - if (nregs > 8) - return -ENOTSUPP; + ret = calc_arg_aux(m, &aaux); + if (ret < 0) + return ret; - ret = prepare_trampoline(&ctx, &im, tlinks, func_addr, nregs, flags); + ret = prepare_trampoline(&ctx, &im, tlinks, func_addr, m, &aaux, flags); if (ret < 0) return ret; @@ -2511,9 +2611,10 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *ro_image, u32 flags, struct bpf_tramp_links *tlinks, void *func_addr) { - int ret, nregs; - void *image, *tmp; u32 size = ro_image_end - ro_image; + struct arg_aux aaux; + void *image, *tmp; + int ret; /* image doesn't need to be in module memory range, so we can * use kvmalloc. @@ -2529,13 +2630,12 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *ro_image, .write = true, }; - nregs = btf_func_model_nregs(m); - /* the first 8 registers are used for arguments */ - if (nregs > 8) - return -ENOTSUPP; jit_fill_hole(image, (unsigned int)(ro_image_end - ro_image)); - ret = prepare_trampoline(&ctx, im, tlinks, func_addr, nregs, flags); + ret = calc_arg_aux(m, &aaux); + if (ret) + goto out; + ret = prepare_trampoline(&ctx, im, tlinks, func_addr, m, &aaux, flags); if (ret > 0 && validate_code(&ctx) < 0) { ret = -EINVAL; diff --git a/arch/arm64/xen/hypercall.S b/arch/arm64/xen/hypercall.S index 9d01361696a1..ae551b857137 100644 --- a/arch/arm64/xen/hypercall.S +++ b/arch/arm64/xen/hypercall.S @@ -83,7 +83,26 @@ HYPERCALL3(vcpu_op); HYPERCALL1(platform_op_raw); HYPERCALL2(multicall); HYPERCALL2(vm_assist); -HYPERCALL3(dm_op); + +SYM_FUNC_START(HYPERVISOR_dm_op) + mov x16, #__HYPERVISOR_dm_op; \ + /* + * dm_op hypercalls are issued by the userspace. The kernel needs to + * enable access to TTBR0_EL1 as the hypervisor would issue stage 1 + * translations to user memory via AT instructions. Since AT + * instructions are not affected by the PAN bit (ARMv8.1), we only + * need the explicit uaccess_enable/disable if the TTBR0 PAN emulation + * is enabled (it implies that hardware UAO and PAN disabled). + */ + uaccess_ttbr0_enable x6, x7, x8 + hvc XEN_IMM + + /* + * Disable userspace access from kernel once the hyp call completed. + */ + uaccess_ttbr0_disable x6, x7 + ret +SYM_FUNC_END(HYPERVISOR_dm_op); SYM_FUNC_START(privcmd_call) mov x16, x0 |