diff options
123 files changed, 3971 insertions, 1627 deletions
diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index bcc974d276dc..df79e129d097 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -493,12 +493,13 @@ What: /sys/devices/system/cpu/cpuX/regs/ /sys/devices/system/cpu/cpuX/regs/identification/ /sys/devices/system/cpu/cpuX/regs/identification/midr_el1 /sys/devices/system/cpu/cpuX/regs/identification/revidr_el1 + /sys/devices/system/cpu/cpuX/regs/identification/smidr_el1 Date: June 2016 Contact: Linux ARM Kernel Mailing list <linux-arm-kernel@lists.infradead.org> Description: AArch64 CPU registers 'identification' directory exposes the CPU ID registers for - identifying model and revision of the CPU. + identifying model and revision of the CPU and SMCU. What: /sys/devices/system/cpu/aarch32_el0 Date: May 2021 diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index cc3ea8febc62..5e9147fe8968 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -400,6 +400,12 @@ arm64.nomte [ARM64] Unconditionally disable Memory Tagging Extension support + arm64.nosve [ARM64] Unconditionally disable Scalable Vector + Extension support + + arm64.nosme [ARM64] Unconditionally disable Scalable Matrix + Extension support + ataflop= [HW,M68k] atarimouse= [HW,MOUSE] Atari Mouse @@ -3161,7 +3167,7 @@ improves system performance, but it may also expose users to several CPU vulnerabilities. Equivalent to: nopti [X86,PPC] - kpti=0 [ARM64] + if nokaslr then kpti=0 [ARM64] nospectre_v1 [X86,PPC] nobp=0 [S390] nospectre_v2 [X86,PPC,S390,ARM64] diff --git a/Documentation/admin-guide/perf/hns3-pmu.rst b/Documentation/admin-guide/perf/hns3-pmu.rst new file mode 100644 index 000000000000..578407e487d6 --- /dev/null +++ b/Documentation/admin-guide/perf/hns3-pmu.rst @@ -0,0 +1,136 @@ +====================================== +HNS3 Performance Monitoring Unit (PMU) +====================================== + +HNS3(HiSilicon network system 3) Performance Monitoring Unit (PMU) is an +End Point device to collect performance statistics of HiSilicon SoC NIC. +On Hip09, each SICL(Super I/O cluster) has one PMU device. + +HNS3 PMU supports collection of performance statistics such as bandwidth, +latency, packet rate and interrupt rate. + +Each HNS3 PMU supports 8 hardware events. + +HNS3 PMU driver +=============== + +The HNS3 PMU driver registers a perf PMU with the name of its sicl id.:: + + /sys/devices/hns3_pmu_sicl_<sicl_id> + +PMU driver provides description of available events, filter modes, format, +identifier and cpumask in sysfs. + +The "events" directory describes the event code of all supported events +shown in perf list. + +The "filtermode" directory describes the supported filter modes of each +event. + +The "format" directory describes all formats of the config (events) and +config1 (filter options) fields of the perf_event_attr structure. + +The "identifier" file shows version of PMU hardware device. + +The "bdf_min" and "bdf_max" files show the supported bdf range of each +pmu device. + +The "hw_clk_freq" file shows the hardware clock frequency of each pmu +device. + +Example usage of checking event code and subevent code:: + + $# cat /sys/devices/hns3_pmu_sicl_0/events/dly_tx_normal_to_mac_time + config=0x00204 + $# cat /sys/devices/hns3_pmu_sicl_0/events/dly_tx_normal_to_mac_packet_num + config=0x10204 + +Each performance statistic has a pair of events to get two values to +calculate real performance data in userspace. + +The bits 0~15 of config (here 0x0204) are the true hardware event code. If +two events have same value of bits 0~15 of config, that means they are +event pair. And the bit 16 of config indicates getting counter 0 or +counter 1 of hardware event. + +After getting two values of event pair in usersapce, the formula of +computation to calculate real performance data is::: + + counter 0 / counter 1 + +Example usage of checking supported filter mode:: + + $# cat /sys/devices/hns3_pmu_sicl_0/filtermode/bw_ssu_rpu_byte_num + filter mode supported: global/port/port-tc/func/func-queue/ + +Example usage of perf:: + + $# perf list + hns3_pmu_sicl_0/bw_ssu_rpu_byte_num/ [kernel PMU event] + hns3_pmu_sicl_0/bw_ssu_rpu_time/ [kernel PMU event] + ------------------------------------------ + + $# perf stat -g -e hns3_pmu_sicl_0/bw_ssu_rpu_byte_num,global=1/ -e hns3_pmu_sicl_0/bw_ssu_rpu_time,global=1/ -I 1000 + or + $# perf stat -g -e hns3_pmu_sicl_0/config=0x00002,global=1/ -e hns3_pmu_sicl_0/config=0x10002,global=1/ -I 1000 + + +Filter modes +-------------- + +1. global mode +PMU collect performance statistics for all HNS3 PCIe functions of IO DIE. +Set the "global" filter option to 1 will enable this mode. +Example usage of perf:: + + $# perf stat -a -e hns3_pmu_sicl_0/config=0x1020F,global=1/ -I 1000 + +2. port mode +PMU collect performance statistic of one whole physical port. The port id +is same as mac id. The "tc" filter option must be set to 0xF in this mode, +here tc stands for traffic class. + +Example usage of perf:: + + $# perf stat -a -e hns3_pmu_sicl_0/config=0x1020F,port=0,tc=0xF/ -I 1000 + +3. port-tc mode +PMU collect performance statistic of one tc of physical port. The port id +is same as mac id. The "tc" filter option must be set to 0 ~ 7 in this +mode. +Example usage of perf:: + + $# perf stat -a -e hns3_pmu_sicl_0/config=0x1020F,port=0,tc=0/ -I 1000 + +4. func mode +PMU collect performance statistic of one PF/VF. The function id is BDF of +PF/VF, its conversion formula:: + + func = (bus << 8) + (device << 3) + (function) + +for example: + BDF func + 35:00.0 0x3500 + 35:00.1 0x3501 + 35:01.0 0x3508 + +In this mode, the "queue" filter option must be set to 0xFFFF. +Example usage of perf:: + + $# perf stat -a -e hns3_pmu_sicl_0/config=0x1020F,bdf=0x3500,queue=0xFFFF/ -I 1000 + +5. func-queue mode +PMU collect performance statistic of one queue of PF/VF. The function id +is BDF of PF/VF, the "queue" filter option must be set to the exact queue +id of function. +Example usage of perf:: + + $# perf stat -a -e hns3_pmu_sicl_0/config=0x1020F,bdf=0x3500,queue=0/ -I 1000 + +6. func-intr mode +PMU collect performance statistic of one interrupt of PF/VF. The function +id is BDF of PF/VF, the "intr" filter option must be set to the exact +interrupt id of function. +Example usage of perf:: + + $# perf stat -a -e hns3_pmu_sicl_0/config=0x00301,bdf=0x3500,intr=0/ -I 1000 diff --git a/Documentation/admin-guide/perf/index.rst b/Documentation/admin-guide/perf/index.rst index 69b23f087c05..9c9ece88ce53 100644 --- a/Documentation/admin-guide/perf/index.rst +++ b/Documentation/admin-guide/perf/index.rst @@ -9,6 +9,7 @@ Performance monitor support hisi-pmu hisi-pcie-pmu + hns3-pmu imx-ddr qcom_l2_pmu qcom_l3_pmu diff --git a/Documentation/arm64/elf_hwcaps.rst b/Documentation/arm64/elf_hwcaps.rst index 3d116fb536c5..31fc10b833dd 100644 --- a/Documentation/arm64/elf_hwcaps.rst +++ b/Documentation/arm64/elf_hwcaps.rst @@ -301,6 +301,10 @@ HWCAP2_WFXT Functionality implied by ID_AA64ISAR2_EL1.WFXT == 0b0010. +HWCAP2_EBF16 + + Functionality implied by ID_AA64ISAR1_EL1.BF16 == 0b0010. + 4. Unused AT_HWCAP bits ----------------------- diff --git a/Documentation/arm64/memory.rst b/Documentation/arm64/memory.rst index 901cd094f4ec..2a641ba7be3b 100644 --- a/Documentation/arm64/memory.rst +++ b/Documentation/arm64/memory.rst @@ -33,9 +33,8 @@ AArch64 Linux memory layout with 4KB pages + 4 levels (48-bit):: 0000000000000000 0000ffffffffffff 256TB user ffff000000000000 ffff7fffffffffff 128TB kernel logical memory map [ffff600000000000 ffff7fffffffffff] 32TB [kasan shadow region] - ffff800000000000 ffff800007ffffff 128MB bpf jit region - ffff800008000000 ffff80000fffffff 128MB modules - ffff800010000000 fffffbffefffffff 124TB vmalloc + ffff800000000000 ffff800007ffffff 128MB modules + ffff800008000000 fffffbffefffffff 124TB vmalloc fffffbfff0000000 fffffbfffdffffff 224MB fixed mappings (top down) fffffbfffe000000 fffffbfffe7fffff 8MB [guard region] fffffbfffe800000 fffffbffff7fffff 16MB PCI I/O space @@ -51,9 +50,8 @@ AArch64 Linux memory layout with 64KB pages + 3 levels (52-bit with HW support): 0000000000000000 000fffffffffffff 4PB user fff0000000000000 ffff7fffffffffff ~4PB kernel logical memory map [fffd800000000000 ffff7fffffffffff] 512TB [kasan shadow region] - ffff800000000000 ffff800007ffffff 128MB bpf jit region - ffff800008000000 ffff80000fffffff 128MB modules - ffff800010000000 fffffbffefffffff 124TB vmalloc + ffff800000000000 ffff800007ffffff 128MB modules + ffff800008000000 fffffbffefffffff 124TB vmalloc fffffbfff0000000 fffffbfffdffffff 224MB fixed mappings (top down) fffffbfffe000000 fffffbfffe7fffff 8MB [guard region] fffffbfffe800000 fffffbffff7fffff 16MB PCI I/O space diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst index d27db84d585e..33b04db8408f 100644 --- a/Documentation/arm64/silicon-errata.rst +++ b/Documentation/arm64/silicon-errata.rst @@ -82,10 +82,14 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A57 | #1319537 | ARM64_ERRATUM_1319367 | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A57 | #1742098 | ARM64_ERRATUM_1742098 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A72 | #853709 | N/A | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A72 | #1319367 | ARM64_ERRATUM_1319367 | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A72 | #1655431 | ARM64_ERRATUM_1742098 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A73 | #858921 | ARM64_ERRATUM_858921 | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A76 | #1188873,1418040| ARM64_ERRATUM_1418040 | @@ -102,6 +106,8 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A510 | #2077057 | ARM64_ERRATUM_2077057 | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A510 | #2441009 | ARM64_ERRATUM_2441009 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A710 | #2119858 | ARM64_ERRATUM_2119858 | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A710 | #2054223 | ARM64_ERRATUM_2054223 | diff --git a/Documentation/features/vm/ioremap_prot/arch-support.txt b/Documentation/features/vm/ioremap_prot/arch-support.txt index b01bf7bca3e6..6bd78eb4dc6e 100644 --- a/Documentation/features/vm/ioremap_prot/arch-support.txt +++ b/Documentation/features/vm/ioremap_prot/arch-support.txt @@ -9,7 +9,7 @@ | alpha: | TODO | | arc: | ok | | arm: | TODO | - | arm64: | TODO | + | arm64: | ok | | csky: | TODO | | hexagon: | TODO | | ia64: | TODO | diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt index b12df9137e1c..832b5d36e279 100644 --- a/Documentation/memory-barriers.txt +++ b/Documentation/memory-barriers.txt @@ -1894,6 +1894,7 @@ There are some more advanced barrier functions: (*) dma_wmb(); (*) dma_rmb(); + (*) dma_mb(); These are for use with consistent memory to guarantee the ordering of writes or reads of shared memory accessible to both the CPU and a @@ -1925,11 +1926,11 @@ There are some more advanced barrier functions: The dma_rmb() allows us guarantee the device has released ownership before we read the data from the descriptor, and the dma_wmb() allows us to guarantee the data is written to the descriptor before the device - can see it now has ownership. Note that, when using writel(), a prior - wmb() is not needed to guarantee that the cache coherent memory writes - have completed before writing to the MMIO region. The cheaper - writel_relaxed() does not provide this guarantee and must not be used - here. + can see it now has ownership. The dma_mb() implies both a dma_rmb() and + a dma_wmb(). Note that, when using writel(), a prior wmb() is not needed + to guarantee that the cache coherent memory writes have completed before + writing to the MMIO region. The cheaper writel_relaxed() does not provide + this guarantee and must not be used here. See the subsection "Kernel I/O barrier effects" for more information on relaxed I/O accessors and the Documentation/core-api/dma-api.rst file for diff --git a/Documentation/virt/kvm/arm/hyp-abi.rst b/Documentation/virt/kvm/arm/hyp-abi.rst index 4d43fbc25195..412b276449d3 100644 --- a/Documentation/virt/kvm/arm/hyp-abi.rst +++ b/Documentation/virt/kvm/arm/hyp-abi.rst @@ -60,12 +60,13 @@ these functions (see arch/arm{,64}/include/asm/virt.h): * :: - x0 = HVC_VHE_RESTART (arm64 only) + x0 = HVC_FINALISE_EL2 (arm64 only) - Attempt to upgrade the kernel's exception level from EL1 to EL2 by enabling - the VHE mode. This is conditioned by the CPU supporting VHE, the EL2 MMU - being off, and VHE not being disabled by any other means (command line - option, for example). + Finish configuring EL2 depending on the command-line options, + including an attempt to upgrade the kernel's exception level from + EL1 to EL2 by enabling the VHE mode. This is conditioned by the CPU + supporting VHE, the EL2 MMU being off, and VHE not being disabled by + any other means (command line option, for example). Any other value of r0/x0 triggers a hypervisor-specific handling, which is not documented here. diff --git a/MAINTAINERS b/MAINTAINERS index e10b1835e5d7..04ec80ee7352 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9038,6 +9038,12 @@ F: Documentation/admin-guide/perf/hisi-pcie-pmu.rst F: Documentation/admin-guide/perf/hisi-pmu.rst F: drivers/perf/hisilicon +HISILICON HNS3 PMU DRIVER +M: Guangbin Huang <huangguangbin2@huawei.com> +S: Supported +F: Documentation/admin-guide/perf/hns3-pmu.rst +F: drivers/perf/hisilicon/hns3_pmu.c + HISILICON QM AND ZIP Controller DRIVER M: Zhou Wang <wangzhou1@hisilicon.com> L: linux-crypto@vger.kernel.org diff --git a/arch/Kconfig b/arch/Kconfig index 71b9272acb28..5ea3e3838c21 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -223,6 +223,9 @@ config HAVE_FUNCTION_DESCRIPTORS config TRACE_IRQFLAGS_SUPPORT bool +config TRACE_IRQFLAGS_NMI_SUPPORT + bool + # # An arch should select this if it provides all these things: # diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h index eba7cbc93b86..7fcdc785366c 100644 --- a/arch/arm/include/asm/io.h +++ b/arch/arm/include/asm/io.h @@ -139,11 +139,9 @@ extern void __iomem *__arm_ioremap_caller(phys_addr_t, size_t, unsigned int, extern void __iomem *__arm_ioremap_pfn(unsigned long, unsigned long, size_t, unsigned int); extern void __iomem *__arm_ioremap_exec(phys_addr_t, size_t, bool cached); void __arm_iomem_set_ro(void __iomem *ptr, size_t size); -extern void __iounmap(volatile void __iomem *addr); extern void __iomem * (*arch_ioremap_caller)(phys_addr_t, size_t, unsigned int, void *); -extern void (*arch_iounmap)(volatile void __iomem *); /* * Bad read/write accesses... @@ -380,7 +378,7 @@ void __iomem *ioremap_wc(resource_size_t res_cookie, size_t size); #define ioremap_wc ioremap_wc #define ioremap_wt ioremap_wc -void iounmap(volatile void __iomem *iomem_cookie); +void iounmap(volatile void __iomem *io_addr); #define iounmap iounmap void *arch_memremap_wb(phys_addr_t phys_addr, size_t size); diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c index 576c0e6c92fc..2129070065c3 100644 --- a/arch/arm/mm/ioremap.c +++ b/arch/arm/mm/ioremap.c @@ -418,7 +418,7 @@ void *arch_memremap_wb(phys_addr_t phys_addr, size_t size) __builtin_return_address(0)); } -void __iounmap(volatile void __iomem *io_addr) +void iounmap(volatile void __iomem *io_addr) { void *addr = (void *)(PAGE_MASK & (unsigned long)io_addr); struct static_vm *svm; @@ -446,13 +446,6 @@ void __iounmap(volatile void __iomem *io_addr) vunmap(addr); } - -void (*arch_iounmap)(volatile void __iomem *) = __iounmap; - -void iounmap(volatile void __iomem *cookie) -{ - arch_iounmap(cookie); -} EXPORT_SYMBOL(iounmap); #if defined(CONFIG_PCI) || IS_ENABLED(CONFIG_PCMCIA) diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c index 2658f52903da..c42debaded95 100644 --- a/arch/arm/mm/nommu.c +++ b/arch/arm/mm/nommu.c @@ -230,14 +230,7 @@ void *arch_memremap_wb(phys_addr_t phys_addr, size_t size) return (void *)phys_addr; } -void __iounmap(volatile void __iomem *addr) -{ -} -EXPORT_SYMBOL(__iounmap); - -void (*arch_iounmap)(volatile void __iomem *); - -void iounmap(volatile void __iomem *addr) +void iounmap(volatile void __iomem *io_addr) { } EXPORT_SYMBOL(iounmap); diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 1652a9800ebe..340e61199057 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -101,6 +101,7 @@ config ARM64 select ARCH_WANT_HUGETLB_PAGE_OPTIMIZE_VMEMMAP select ARCH_WANT_LD_ORPHAN_WARN select ARCH_WANTS_NO_INSTR + select ARCH_WANTS_THP_SWAP if ARM64_4K_PAGES select ARCH_HAS_UBSAN_SANITIZE_ALL select ARM_AMBA select ARM_ARCH_TIMER @@ -126,6 +127,7 @@ config ARM64 select GENERIC_CPU_VULNERABILITIES select GENERIC_EARLY_IOREMAP select GENERIC_IDLE_POLL_SETUP + select GENERIC_IOREMAP select GENERIC_IRQ_IPI select GENERIC_IRQ_PROBE select GENERIC_IRQ_SHOW @@ -188,6 +190,7 @@ config ARM64 select HAVE_FUNCTION_GRAPH_TRACER select HAVE_GCC_PLUGINS select HAVE_HW_BREAKPOINT if PERF_EVENTS + select HAVE_IOREMAP_PROT select HAVE_IRQ_TIME_ACCOUNTING select HAVE_KVM select HAVE_NMI @@ -226,6 +229,7 @@ config ARM64 select THREAD_INFO_IN_TASK select HAVE_ARCH_USERFAULTFD_MINOR if USERFAULTFD select TRACE_IRQFLAGS_SUPPORT + select TRACE_IRQFLAGS_NMI_SUPPORT help ARM 64-bit (AArch64) Linux support. @@ -503,6 +507,22 @@ config ARM64_ERRATUM_834220 If unsure, say Y. +config ARM64_ERRATUM_1742098 + bool "Cortex-A57/A72: 1742098: ELR recorded incorrectly on interrupt taken between cryptographic instructions in a sequence" + depends on COMPAT + default y + help + This option removes the AES hwcap for aarch32 user-space to + workaround erratum 1742098 on Cortex-A57 and Cortex-A72. + + Affected parts may corrupt the AES state if an interrupt is + taken between a pair of AES instructions. These instructions + are only present if the cryptography extensions are present. + All software should have a fallback implementation for CPUs + that don't implement the cryptography extensions. + + If unsure, say Y. + config ARM64_ERRATUM_845719 bool "Cortex-A53: 845719: a load might read incorrect data" depends on COMPAT @@ -821,6 +841,23 @@ config ARM64_ERRATUM_2224489 If unsure, say Y. +config ARM64_ERRATUM_2441009 + bool "Cortex-A510: Completion of affected memory accesses might not be guaranteed by completion of a TLBI" + default y + select ARM64_WORKAROUND_REPEAT_TLBI + help + This option adds a workaround for ARM Cortex-A510 erratum #2441009. + + Under very rare circumstances, affected Cortex-A510 CPUs + may not handle a race between a break-before-make sequence on one + CPU, and another CPU accessing the same page. This could allow a + store to a page that has been unmapped. + + Work around this by adding the affected CPUs to the list that needs + TLB sequences to be done twice. + + If unsure, say Y. + config ARM64_ERRATUM_2064142 bool "Cortex-A510: 2064142: workaround TRBE register writes while disabled" depends on CORESIGHT_TRBE diff --git a/arch/arm64/boot/Makefile b/arch/arm64/boot/Makefile index ebe80faab883..a0e3dedd2883 100644 --- a/arch/arm64/boot/Makefile +++ b/arch/arm64/boot/Makefile @@ -16,7 +16,7 @@ OBJCOPYFLAGS_Image :=-O binary -R .note -R .note.gnu.build-id -R .comment -S -targets := Image Image.bz2 Image.gz Image.lz4 Image.lzma Image.lzo +targets := Image Image.bz2 Image.gz Image.lz4 Image.lzma Image.lzo Image.zst $(obj)/Image: vmlinux FORCE $(call if_changed,objcopy) @@ -35,3 +35,6 @@ $(obj)/Image.lzma: $(obj)/Image FORCE $(obj)/Image.lzo: $(obj)/Image FORCE $(call if_changed,lzo) + +$(obj)/Image.zst: $(obj)/Image FORCE + $(call if_changed,zstd) diff --git a/arch/arm64/include/asm/asm-extable.h b/arch/arm64/include/asm/asm-extable.h index c39f2437e08e..980d1dd8e1a3 100644 --- a/arch/arm64/include/asm/asm-extable.h +++ b/arch/arm64/include/asm/asm-extable.h @@ -2,12 +2,27 @@ #ifndef __ASM_ASM_EXTABLE_H #define __ASM_ASM_EXTABLE_H +#include <linux/bits.h> +#include <asm/gpr-num.h> + #define EX_TYPE_NONE 0 -#define EX_TYPE_FIXUP 1 -#define EX_TYPE_BPF 2 -#define EX_TYPE_UACCESS_ERR_ZERO 3 +#define EX_TYPE_BPF 1 +#define EX_TYPE_UACCESS_ERR_ZERO 2 +#define EX_TYPE_KACCESS_ERR_ZERO 3 #define EX_TYPE_LOAD_UNALIGNED_ZEROPAD 4 +/* Data fields for EX_TYPE_UACCESS_ERR_ZERO */ +#define EX_DATA_REG_ERR_SHIFT 0 +#define EX_DATA_REG_ERR GENMASK(4, 0) +#define EX_DATA_REG_ZERO_SHIFT 5 +#define EX_DATA_REG_ZERO GENMASK(9, 5) + +/* Data fields for EX_TYPE_LOAD_UNALIGNED_ZEROPAD */ +#define EX_DATA_REG_DATA_SHIFT 0 +#define EX_DATA_REG_DATA GENMASK(4, 0) +#define EX_DATA_REG_ADDR_SHIFT 5 +#define EX_DATA_REG_ADDR GENMASK(9, 5) + #ifdef __ASSEMBLY__ #define __ASM_EXTABLE_RAW(insn, fixup, type, data) \ @@ -19,31 +34,45 @@ .short (data); \ .popsection; +#define EX_DATA_REG(reg, gpr) \ + (.L__gpr_num_##gpr << EX_DATA_REG_##reg##_SHIFT) + +#define _ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, err, zero) \ + __ASM_EXTABLE_RAW(insn, fixup, \ + EX_TYPE_UACCESS_ERR_ZERO, \ + ( \ + EX_DATA_REG(ERR, err) | \ + EX_DATA_REG(ZERO, zero) \ + )) + +#define _ASM_EXTABLE_UACCESS_ERR(insn, fixup, err) \ + _ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, err, wzr) + +#define _ASM_EXTABLE_UACCESS(insn, fixup) \ + _ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, wzr, wzr) + /* - * Create an exception table entry for `insn`, which will branch to `fixup` + * Create an exception table entry for uaccess `insn`, which will branch to `fixup` * when an unhandled fault is taken. */ - .macro _asm_extable, insn, fixup - __ASM_EXTABLE_RAW(\insn, \fixup, EX_TYPE_FIXUP, 0) + .macro _asm_extable_uaccess, insn, fixup + _ASM_EXTABLE_UACCESS(\insn, \fixup) .endm /* * Create an exception table entry for `insn` if `fixup` is provided. Otherwise * do nothing. */ - .macro _cond_extable, insn, fixup - .ifnc \fixup, - _asm_extable \insn, \fixup + .macro _cond_uaccess_extable, insn, fixup + .ifnc \fixup, + _asm_extable_uaccess \insn, \fixup .endif .endm #else /* __ASSEMBLY__ */ -#include <linux/bits.h> #include <linux/stringify.h> -#include <asm/gpr-num.h> - #define __ASM_EXTABLE_RAW(insn, fixup, type, data) \ ".pushsection __ex_table, \"a\"\n" \ ".align 2\n" \ @@ -53,14 +82,6 @@ ".short (" data ")\n" \ ".popsection\n" -#define _ASM_EXTABLE(insn, fixup) \ - __ASM_EXTABLE_RAW(#insn, #fixup, __stringify(EX_TYPE_FIXUP), "0") - -#define EX_DATA_REG_ERR_SHIFT 0 -#define EX_DATA_REG_ERR GENMASK(4, 0) -#define EX_DATA_REG_ZERO_SHIFT 5 -#define EX_DATA_REG_ZERO GENMASK(9, 5) - #define EX_DATA_REG(reg, gpr) \ "((.L__gpr_num_" #gpr ") << " __stringify(EX_DATA_REG_##reg##_SHIFT) ")" @@ -73,13 +94,23 @@ EX_DATA_REG(ZERO, zero) \ ")") +#define _ASM_EXTABLE_KACCESS_ERR_ZERO(insn, fixup, err, zero) \ + __DEFINE_ASM_GPR_NUMS \ + __ASM_EXTABLE_RAW(#insn, #fixup, \ + __stringify(EX_TYPE_KACCESS_ERR_ZERO), \ + "(" \ + EX_DATA_REG(ERR, err) " | " \ + EX_DATA_REG(ZERO, zero) \ + ")") + #define _ASM_EXTABLE_UACCESS_ERR(insn, fixup, err) \ _ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, err, wzr) -#define EX_DATA_REG_DATA_SHIFT 0 -#define EX_DATA_REG_DATA GENMASK(4, 0) -#define EX_DATA_REG_ADDR_SHIFT 5 -#define EX_DATA_REG_ADDR GENMASK(9, 5) +#define _ASM_EXTABLE_UACCESS(insn, fixup) \ + _ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, wzr, wzr) + +#define _ASM_EXTABLE_KACCESS_ERR(insn, fixup, err) \ + _ASM_EXTABLE_KACCESS_ERR_ZERO(insn, fixup, err, wzr) #define _ASM_EXTABLE_LOAD_UNALIGNED_ZEROPAD(insn, fixup, data, addr) \ __DEFINE_ASM_GPR_NUMS \ diff --git a/arch/arm64/include/asm/asm-uaccess.h b/arch/arm64/include/asm/asm-uaccess.h index 0557af834e03..75b211c98dea 100644 --- a/arch/arm64/include/asm/asm-uaccess.h +++ b/arch/arm64/include/asm/asm-uaccess.h @@ -61,7 +61,7 @@ alternative_else_nop_endif #define USER(l, x...) \ 9999: x; \ - _asm_extable 9999b, l + _asm_extable_uaccess 9999b, l /* * Generate the assembly for LDTR/STTR with exception table entries. @@ -73,8 +73,8 @@ alternative_else_nop_endif 8889: ldtr \reg2, [\addr, #8]; add \addr, \addr, \post_inc; - _asm_extable 8888b,\l; - _asm_extable 8889b,\l; + _asm_extable_uaccess 8888b, \l; + _asm_extable_uaccess 8889b, \l; .endm .macro user_stp l, reg1, reg2, addr, post_inc @@ -82,14 +82,14 @@ alternative_else_nop_endif 8889: sttr \reg2, [\addr, #8]; add \addr, \addr, \post_inc; - _asm_extable 8888b,\l; - _asm_extable 8889b,\l; + _asm_extable_uaccess 8888b,\l; + _asm_extable_uaccess 8889b,\l; .endm .macro user_ldst l, inst, reg, addr, post_inc 8888: \inst \reg, [\addr]; add \addr, \addr, \post_inc; - _asm_extable 8888b,\l; + _asm_extable_uaccess 8888b, \l; .endm #endif diff --git a/arch/arm64/include/asm/asm_pointer_auth.h b/arch/arm64/include/asm/asm_pointer_auth.h index ead62f7dd269..13ecc79854ee 100644 --- a/arch/arm64/include/asm/asm_pointer_auth.h +++ b/arch/arm64/include/asm/asm_pointer_auth.h @@ -59,9 +59,9 @@ alternative_else_nop_endif .macro __ptrauth_keys_init_cpu tsk, tmp1, tmp2, tmp3 mrs \tmp1, id_aa64isar1_el1 - ubfx \tmp1, \tmp1, #ID_AA64ISAR1_APA_SHIFT, #8 + ubfx \tmp1, \tmp1, #ID_AA64ISAR1_EL1_APA_SHIFT, #8 mrs_s \tmp2, SYS_ID_AA64ISAR2_EL1 - ubfx \tmp2, \tmp2, #ID_AA64ISAR2_APA3_SHIFT, #4 + ubfx \tmp2, \tmp2, #ID_AA64ISAR2_EL1_APA3_SHIFT, #4 orr \tmp1, \tmp1, \tmp2 cbz \tmp1, .Lno_addr_auth\@ mov_q \tmp1, (SCTLR_ELx_ENIA | SCTLR_ELx_ENIB | \ diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 8c5a61aeaf8e..5846145be523 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -360,6 +360,20 @@ alternative_cb_end .endm /* + * idmap_get_t0sz - get the T0SZ value needed to cover the ID map + * + * Calculate the maximum allowed value for TCR_EL1.T0SZ so that the + * entire ID map region can be mapped. As T0SZ == (64 - #bits used), + * this number conveniently equals the number of leading zeroes in + * the physical address of _end. + */ + .macro idmap_get_t0sz, reg + adrp \reg, _end + orr \reg, \reg, #(1 << VA_BITS_MIN) - 1 + clz \reg, \reg + .endm + +/* * tcr_compute_pa_size - set TCR.(I)PS to the highest supported * ID_AA64MMFR0_EL1.PARange value * @@ -423,7 +437,7 @@ alternative_endif b.lo .Ldcache_op\@ dsb \domain - _cond_extable .Ldcache_op\@, \fixup + _cond_uaccess_extable .Ldcache_op\@, \fixup .endm /* @@ -462,7 +476,19 @@ alternative_endif dsb ish isb - _cond_extable .Licache_op\@, \fixup + _cond_uaccess_extable .Licache_op\@, \fixup + .endm + +/* + * load_ttbr1 - install @pgtbl as a TTBR1 page table + * pgtbl preserved + * tmp1/tmp2 clobbered, either may overlap with pgtbl + */ + .macro load_ttbr1, pgtbl, tmp1, tmp2 + phys_to_ttbr \tmp1, \pgtbl + offset_ttbr1 \tmp1, \tmp2 + msr ttbr1_el1, \tmp1 + isb .endm /* @@ -478,10 +504,7 @@ alternative_endif isb tlbi vmalle1 dsb nsh - phys_to_ttbr \tmp, \page_table - offset_ttbr1 \tmp, \tmp2 - msr ttbr1_el1, \tmp - isb + load_ttbr1 \page_table, \tmp, \tmp2 .endm /* diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index 9f3e2c3d2ca0..2cfc4245d2e2 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -50,13 +50,13 @@ #define pmr_sync() do {} while (0) #endif -#define mb() dsb(sy) -#define rmb() dsb(ld) -#define wmb() dsb(st) +#define __mb() dsb(sy) +#define __rmb() dsb(ld) +#define __wmb() dsb(st) -#define dma_mb() dmb(osh) -#define dma_rmb() dmb(oshld) -#define dma_wmb() dmb(oshst) +#define __dma_mb() dmb(osh) +#define __dma_rmb() dmb(oshld) +#define __dma_wmb() dmb(oshst) #define io_stop_wc() dgh() diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h index 7c2181c72116..ca9b487112cc 100644 --- a/arch/arm64/include/asm/cache.h +++ b/arch/arm64/include/asm/cache.h @@ -5,34 +5,9 @@ #ifndef __ASM_CACHE_H #define __ASM_CACHE_H -#include <asm/cputype.h> -#include <asm/mte-def.h> - -#define CTR_L1IP_SHIFT 14 -#define CTR_L1IP_MASK 3 -#define CTR_DMINLINE_SHIFT 16 -#define CTR_IMINLINE_SHIFT 0 -#define CTR_IMINLINE_MASK 0xf -#define CTR_ERG_SHIFT 20 -#define CTR_CWG_SHIFT 24 -#define CTR_CWG_MASK 15 -#define CTR_IDC_SHIFT 28 -#define CTR_DIC_SHIFT 29 - -#define CTR_CACHE_MINLINE_MASK \ - (0xf << CTR_DMINLINE_SHIFT | CTR_IMINLINE_MASK << CTR_IMINLINE_SHIFT) - -#define CTR_L1IP(ctr) (((ctr) >> CTR_L1IP_SHIFT) & CTR_L1IP_MASK) - -#define ICACHE_POLICY_VPIPT 0 -#define ICACHE_POLICY_RESERVED 1 -#define ICACHE_POLICY_VIPT 2 -#define ICACHE_POLICY_PIPT 3 - #define L1_CACHE_SHIFT (6) #define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) - #define CLIDR_LOUU_SHIFT 27 #define CLIDR_LOC_SHIFT 24 #define CLIDR_LOUIS_SHIFT 21 @@ -55,6 +30,10 @@ #include <linux/bitops.h> #include <linux/kasan-enabled.h> +#include <asm/cputype.h> +#include <asm/mte-def.h> +#include <asm/sysreg.h> + #ifdef CONFIG_KASAN_SW_TAGS #define ARCH_SLAB_MINALIGN (1ULL << KASAN_SHADOW_SCALE_SHIFT) #elif defined(CONFIG_KASAN_HW_TAGS) @@ -66,6 +45,12 @@ static inline unsigned int arch_slab_minalign(void) #define arch_slab_minalign() arch_slab_minalign() #endif +#define CTR_CACHE_MINLINE_MASK \ + (0xf << CTR_EL0_DMINLINE_SHIFT | \ + CTR_EL0_IMINLINE_MASK << CTR_EL0_IMINLINE_SHIFT) + +#define CTR_L1IP(ctr) SYS_FIELD_GET(CTR_EL0, L1Ip, ctr) + #define ICACHEF_ALIASING 0 #define ICACHEF_VPIPT 1 extern unsigned long __icache_flags; @@ -86,7 +71,7 @@ static __always_inline int icache_is_vpipt(void) static inline u32 cache_type_cwg(void) { - return (read_cpuid_cachetype() >> CTR_CWG_SHIFT) & CTR_CWG_MASK; + return (read_cpuid_cachetype() >> CTR_EL0_CWG_SHIFT) & CTR_EL0_CWG_MASK; } #define __read_mostly __section(".data..read_mostly") @@ -120,12 +105,12 @@ static inline u32 __attribute_const__ read_cpuid_effective_cachetype(void) { u32 ctr = read_cpuid_cachetype(); - if (!(ctr & BIT(CTR_IDC_SHIFT))) { + if (!(ctr & BIT(CTR_EL0_IDC_SHIFT))) { u64 clidr = read_sysreg(clidr_el1); if (CLIDR_LOC(clidr) == 0 || (CLIDR_LOUIS(clidr) == 0 && CLIDR_LOUU(clidr) == 0)) - ctr |= BIT(CTR_IDC_SHIFT); + ctr |= BIT(CTR_EL0_IDC_SHIFT); } return ctr; diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index 5a228e203ef9..37185e978aeb 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -105,13 +105,6 @@ static inline void flush_icache_range(unsigned long start, unsigned long end) #define flush_icache_range flush_icache_range /* - * Cache maintenance functions used by the DMA API. No to be used directly. - */ -extern void __dma_map_area(const void *, size_t, int); -extern void __dma_unmap_area(const void *, size_t, int); -extern void __dma_flush_area(const void *, size_t); - -/* * Copy user data from/to a page which is mapped into a different * processes address space. Really, we want to allow our "user * space" model to handle this. diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h index 115cdec1ae87..fd7a92219eea 100644 --- a/arch/arm64/include/asm/cpu.h +++ b/arch/arm64/include/asm/cpu.h @@ -46,6 +46,7 @@ struct cpuinfo_arm64 { u64 reg_midr; u64 reg_revidr; u64 reg_gmid; + u64 reg_smidr; u64 reg_id_aa64dfr0; u64 reg_id_aa64dfr1; diff --git a/arch/arm64/include/asm/cpu_ops.h b/arch/arm64/include/asm/cpu_ops.h index e95c4df83911..a444c8915e88 100644 --- a/arch/arm64/include/asm/cpu_ops.h +++ b/arch/arm64/include/asm/cpu_ops.h @@ -31,11 +31,6 @@ * @cpu_die: Makes a cpu leave the kernel. Must not fail. Called from the * cpu being killed. * @cpu_kill: Ensures a cpu has left the kernel. Called from another cpu. - * @cpu_init_idle: Reads any data necessary to initialize CPU idle states for - * a proposed logical id. - * @cpu_suspend: Suspends a cpu and saves the required context. May fail owing - * to wrong parameters or error conditions. Called from the - * CPU being suspended. Must be called with IRQs disabled. */ struct cpu_operations { const char *name; @@ -49,10 +44,6 @@ struct cpu_operations { void (*cpu_die)(unsigned int cpu); int (*cpu_kill)(unsigned int cpu); #endif -#ifdef CONFIG_CPU_IDLE - int (*cpu_init_idle)(unsigned int); - int (*cpu_suspend)(unsigned long); -#endif }; int __init init_cpu_ops(int cpu); diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 14a8f3d93add..fd7d75a275f6 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -11,7 +11,7 @@ #include <asm/hwcap.h> #include <asm/sysreg.h> -#define MAX_CPU_FEATURES 64 +#define MAX_CPU_FEATURES 128 #define cpu_feature(x) KERNEL_HWCAP_ ## x #ifndef __ASSEMBLY__ @@ -673,7 +673,7 @@ static inline bool supports_clearbhb(int scope) isar2 = read_sanitised_ftr_reg(SYS_ID_AA64ISAR2_EL1); return cpuid_feature_extract_unsigned_field(isar2, - ID_AA64ISAR2_CLEARBHB_SHIFT); + ID_AA64ISAR2_EL1_BC_SHIFT); } const struct cpumask *system_32bit_el0_cpumask(void); @@ -908,7 +908,10 @@ static inline unsigned int get_vmid_bits(u64 mmfr1) } extern struct arm64_ftr_override id_aa64mmfr1_override; +extern struct arm64_ftr_override id_aa64pfr0_override; extern struct arm64_ftr_override id_aa64pfr1_override; +extern struct arm64_ftr_override id_aa64zfr0_override; +extern struct arm64_ftr_override id_aa64smfr0_override; extern struct arm64_ftr_override id_aa64isar1_override; extern struct arm64_ftr_override id_aa64isar2_override; diff --git a/arch/arm64/include/asm/cpuidle.h b/arch/arm64/include/asm/cpuidle.h index 14a19d1141bd..2047713e097d 100644 --- a/arch/arm64/include/asm/cpuidle.h +++ b/arch/arm64/include/asm/cpuidle.h @@ -4,21 +4,6 @@ #include <asm/proc-fns.h> -#ifdef CONFIG_CPU_IDLE -extern int arm_cpuidle_init(unsigned int cpu); -extern int arm_cpuidle_suspend(int index); -#else -static inline int arm_cpuidle_init(unsigned int cpu) -{ - return -EOPNOTSUPP; -} - -static inline int arm_cpuidle_suspend(int index) -{ - return -EOPNOTSUPP; -} -#endif - #ifdef CONFIG_ARM64_PSEUDO_NMI #include <asm/arch_gicv3.h> diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index 34ceff08cac4..2630faa5bc08 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -129,64 +129,6 @@ msr cptr_el2, x0 // Disable copro. traps to EL2 .endm -/* SVE register access */ -.macro __init_el2_nvhe_sve - mrs x1, id_aa64pfr0_el1 - ubfx x1, x1, #ID_AA64PFR0_SVE_SHIFT, #4 - cbz x1, .Lskip_sve_\@ - - bic x0, x0, #CPTR_EL2_TZ // Also disable SVE traps - msr cptr_el2, x0 // Disable copro. traps to EL2 - isb - mov x1, #ZCR_ELx_LEN_MASK // SVE: Enable full vector - msr_s SYS_ZCR_EL2, x1 // length for EL1. -.Lskip_sve_\@: -.endm - -/* SME register access and priority mapping */ -.macro __init_el2_nvhe_sme - mrs x1, id_aa64pfr1_el1 - ubfx x1, x1, #ID_AA64PFR1_SME_SHIFT, #4 - cbz x1, .Lskip_sme_\@ - - bic x0, x0, #CPTR_EL2_TSM // Also disable SME traps - msr cptr_el2, x0 // Disable copro. traps to EL2 - isb - - mrs x1, sctlr_el2 - orr x1, x1, #SCTLR_ELx_ENTP2 // Disable TPIDR2 traps - msr sctlr_el2, x1 - isb - - mov x1, #0 // SMCR controls - - mrs_s x2, SYS_ID_AA64SMFR0_EL1 - ubfx x2, x2, #ID_AA64SMFR0_FA64_SHIFT, #1 // Full FP in SM? - cbz x2, .Lskip_sme_fa64_\@ - - orr x1, x1, SMCR_ELx_FA64_MASK -.Lskip_sme_fa64_\@: - - orr x1, x1, #SMCR_ELx_LEN_MASK // Enable full SME vector - msr_s SYS_SMCR_EL2, x1 // length for EL1. - - mrs_s x1, SYS_SMIDR_EL1 // Priority mapping supported? - ubfx x1, x1, #SMIDR_EL1_SMPS_SHIFT, #1 - cbz x1, .Lskip_sme_\@ - - msr_s SYS_SMPRIMAP_EL2, xzr // Make all priorities equal - - mrs x1, id_aa64mmfr1_el1 // HCRX_EL2 present? - ubfx x1, x1, #ID_AA64MMFR1_HCX_SHIFT, #4 - cbz x1, .Lskip_sme_\@ - - mrs_s x1, SYS_HCRX_EL2 - orr x1, x1, #HCRX_EL2_SMPME_MASK // Enable priority mapping - msr_s SYS_HCRX_EL2, x1 - -.Lskip_sme_\@: -.endm - /* Disable any fine grained traps */ .macro __init_el2_fgt mrs x1, id_aa64mmfr0_el1 @@ -250,8 +192,6 @@ __init_el2_hstr __init_el2_nvhe_idregs __init_el2_nvhe_cptr - __init_el2_nvhe_sve - __init_el2_nvhe_sme __init_el2_fgt __init_el2_nvhe_prepare_eret .endm diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h index daff882883f9..71ed5fdf718b 100644 --- a/arch/arm64/include/asm/fixmap.h +++ b/arch/arm64/include/asm/fixmap.h @@ -62,10 +62,12 @@ enum fixed_addresses { #endif /* CONFIG_ACPI_APEI_GHES */ #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 +#ifdef CONFIG_RELOCATABLE + FIX_ENTRY_TRAMP_TEXT4, /* one extra slot for the data page */ +#endif FIX_ENTRY_TRAMP_TEXT3, FIX_ENTRY_TRAMP_TEXT2, FIX_ENTRY_TRAMP_TEXT1, - FIX_ENTRY_TRAMP_DATA, #define TRAMP_VALIAS (__fix_to_virt(FIX_ENTRY_TRAMP_TEXT1)) #endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */ __end_of_permanent_fixed_addresses, diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h index aa443d8f8cfb..cef4ae7a3d8b 100644 --- a/arch/arm64/include/asm/hwcap.h +++ b/arch/arm64/include/asm/hwcap.h @@ -85,7 +85,7 @@ #define KERNEL_HWCAP_PACA __khwcap_feature(PACA) #define KERNEL_HWCAP_PACG __khwcap_feature(PACG) -#define __khwcap2_feature(x) (const_ilog2(HWCAP2_ ## x) + 32) +#define __khwcap2_feature(x) (const_ilog2(HWCAP2_ ## x) + 64) #define KERNEL_HWCAP_DCPODP __khwcap2_feature(DCPODP) #define KERNEL_HWCAP_SVE2 __khwcap2_feature(SVE2) #define KERNEL_HWCAP_SVEAES __khwcap2_feature(SVEAES) @@ -118,6 +118,7 @@ #define KERNEL_HWCAP_SME_F32F32 __khwcap2_feature(SME_F32F32) #define KERNEL_HWCAP_SME_FA64 __khwcap2_feature(SME_FA64) #define KERNEL_HWCAP_WFXT __khwcap2_feature(WFXT) +#define KERNEL_HWCAP_EBF16 __khwcap2_feature(EBF16) /* * This yields a mask that user programs can use to figure out what diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h index 3995652daf81..87dd42d74afe 100644 --- a/arch/arm64/include/asm/io.h +++ b/arch/arm64/include/asm/io.h @@ -163,13 +163,16 @@ extern void __memset_io(volatile void __iomem *, int, size_t); /* * I/O memory mapping functions. */ -extern void __iomem *__ioremap(phys_addr_t phys_addr, size_t size, pgprot_t prot); -extern void iounmap(volatile void __iomem *addr); -extern void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size); -#define ioremap(addr, size) __ioremap((addr), (size), __pgprot(PROT_DEVICE_nGnRE)) -#define ioremap_wc(addr, size) __ioremap((addr), (size), __pgprot(PROT_NORMAL_NC)) -#define ioremap_np(addr, size) __ioremap((addr), (size), __pgprot(PROT_DEVICE_nGnRnE)) +bool ioremap_allowed(phys_addr_t phys_addr, size_t size, unsigned long prot); +#define ioremap_allowed ioremap_allowed + +#define _PAGE_IOREMAP PROT_DEVICE_nGnRE + +#define ioremap_wc(addr, size) \ + ioremap_prot((addr), (size), PROT_NORMAL_NC) +#define ioremap_np(addr, size) \ + ioremap_prot((addr), (size), PROT_DEVICE_nGnRnE) /* * io{read,write}{16,32,64}be() macros @@ -184,6 +187,15 @@ extern void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size); #include <asm-generic/io.h> +#define ioremap_cache ioremap_cache +static inline void __iomem *ioremap_cache(phys_addr_t addr, size_t size) +{ + if (pfn_is_map_memory(__phys_to_pfn(addr))) + return (void __iomem *)__phys_to_virt(addr); + + return ioremap_prot(addr, size, PROT_NORMAL); +} + /* * More restrictive address range checking than the default implementation * (PHYS_OFFSET and PHYS_MASK taken into account). diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h index 96dc0f7da258..02e59fa8f293 100644 --- a/arch/arm64/include/asm/kernel-pgtable.h +++ b/arch/arm64/include/asm/kernel-pgtable.h @@ -8,6 +8,7 @@ #ifndef __ASM_KERNEL_PGTABLE_H #define __ASM_KERNEL_PGTABLE_H +#include <asm/boot.h> #include <asm/pgtable-hwdef.h> #include <asm/sparsemem.h> @@ -35,10 +36,8 @@ */ #if ARM64_KERNEL_USES_PMD_MAPS #define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS - 1) -#define IDMAP_PGTABLE_LEVELS (ARM64_HW_PGTABLE_LEVELS(PHYS_MASK_SHIFT) - 1) #else #define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS) -#define IDMAP_PGTABLE_LEVELS (ARM64_HW_PGTABLE_LEVELS(PHYS_MASK_SHIFT)) #endif @@ -87,7 +86,14 @@ + EARLY_PUDS((vstart), (vend)) /* each PUD needs a next level page table */ \ + EARLY_PMDS((vstart), (vend))) /* each PMD needs a next level page table */ #define INIT_DIR_SIZE (PAGE_SIZE * EARLY_PAGES(KIMAGE_VADDR, _end)) -#define IDMAP_DIR_SIZE (IDMAP_PGTABLE_LEVELS * PAGE_SIZE) + +/* the initial ID map may need two extra pages if it needs to be extended */ +#if VA_BITS < 48 +#define INIT_IDMAP_DIR_SIZE ((INIT_IDMAP_DIR_PAGES + 2) * PAGE_SIZE) +#else +#define INIT_IDMAP_DIR_SIZE (INIT_IDMAP_DIR_PAGES * PAGE_SIZE) +#endif +#define INIT_IDMAP_DIR_PAGES EARLY_PAGES(KIMAGE_VADDR, _end + MAX_FDT_SIZE + SWAPPER_BLOCK_SIZE) /* Initial memory map size */ #if ARM64_KERNEL_USES_PMD_MAPS @@ -107,9 +113,11 @@ #define SWAPPER_PMD_FLAGS (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S) #if ARM64_KERNEL_USES_PMD_MAPS -#define SWAPPER_MM_MMUFLAGS (PMD_ATTRINDX(MT_NORMAL) | SWAPPER_PMD_FLAGS) +#define SWAPPER_RW_MMUFLAGS (PMD_ATTRINDX(MT_NORMAL) | SWAPPER_PMD_FLAGS) +#define SWAPPER_RX_MMUFLAGS (SWAPPER_RW_MMUFLAGS | PMD_SECT_RDONLY) #else -#define SWAPPER_MM_MMUFLAGS (PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS) +#define SWAPPER_RW_MMUFLAGS (PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS) +#define SWAPPER_RX_MMUFLAGS (SWAPPER_RW_MMUFLAGS | PTE_RDONLY) #endif /* diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 0af70d9abede..227d256cd4b9 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -174,7 +174,11 @@ #include <linux/types.h> #include <asm/bug.h> +#if VA_BITS > 48 extern u64 vabits_actual; +#else +#define vabits_actual ((u64)VA_BITS) +#endif extern s64 memstart_addr; /* PHYS_OFFSET - the physical address of the start of memory. */ @@ -351,6 +355,11 @@ static inline void *phys_to_virt(phys_addr_t x) }) void dump_mem_limit(void); + +static inline bool defer_reserve_crashkernel(void) +{ + return IS_ENABLED(CONFIG_ZONE_DMA) || IS_ENABLED(CONFIG_ZONE_DMA32); +} #endif /* !ASSEMBLY */ /* diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index 6770667b34a3..c7ccd82db1d2 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -60,8 +60,7 @@ static inline void cpu_switch_mm(pgd_t *pgd, struct mm_struct *mm) * TCR_T0SZ(VA_BITS), unless system RAM is positioned very high in * physical memory, in which case it will be smaller. */ -extern u64 idmap_t0sz; -extern u64 idmap_ptrs_per_pgd; +extern int idmap_t0sz; /* * Ensure TCR.T0SZ is set to the provided value. @@ -106,13 +105,18 @@ static inline void cpu_uninstall_idmap(void) cpu_switch_mm(mm->pgd, mm); } -static inline void cpu_install_idmap(void) +static inline void __cpu_install_idmap(pgd_t *idmap) { cpu_set_reserved_ttbr0(); local_flush_tlb_all(); cpu_set_idmap_tcr_t0sz(); - cpu_switch_mm(lm_alias(idmap_pg_dir), &init_mm); + cpu_switch_mm(lm_alias(idmap), &init_mm); +} + +static inline void cpu_install_idmap(void) +{ + __cpu_install_idmap(idmap_pg_dir); } /* @@ -143,7 +147,7 @@ static inline void cpu_install_ttbr0(phys_addr_t ttbr0, unsigned long t0sz) * Atomically replaces the active TTBR1_EL1 PGD with a new VA-compatible PGD, * avoiding the possibility of conflicting TLB entries being allocated. */ -static inline void __nocfi cpu_replace_ttbr1(pgd_t *pgdp) +static inline void __nocfi cpu_replace_ttbr1(pgd_t *pgdp, pgd_t *idmap) { typedef void (ttbr_replace_func)(phys_addr_t); extern ttbr_replace_func idmap_cpu_replace_ttbr1; @@ -166,7 +170,7 @@ static inline void __nocfi cpu_replace_ttbr1(pgd_t *pgdp) replace_phys = (void *)__pa_symbol(function_nocfi(idmap_cpu_replace_ttbr1)); - cpu_install_idmap(); + __cpu_install_idmap(idmap); replace_phys(ttbr1); cpu_uninstall_idmap(); } diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index dd3d12bce07b..5ab8d163198f 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -281,10 +281,9 @@ */ #ifdef CONFIG_ARM64_PA_BITS_52 /* - * This should be GENMASK_ULL(47, 2). * TTBR_ELx[1] is RES0 in this configuration. */ -#define TTBR_BADDR_MASK_52 (((UL(1) << 46) - 1) << 2) +#define TTBR_BADDR_MASK_52 GENMASK_ULL(47, 2) #endif #ifdef CONFIG_ARM64_VA_BITS_52 diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 0b6632f18364..b5df82aa99e6 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -45,6 +45,12 @@ __flush_tlb_range(vma, addr, end, PUD_SIZE, false, 1) #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ +static inline bool arch_thp_swp_supported(void) +{ + return !system_supports_mte(); +} +#define arch_thp_swp_supported arch_thp_swp_supported + /* * Outside of a few very special situations (e.g. hibernation), we always * use broadcast TLB invalidation instructions, therefore a spurious page @@ -427,6 +433,16 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte) return clear_pte_bit(pte, __pgprot(PTE_SWP_EXCLUSIVE)); } +/* + * Select all bits except the pfn + */ +static inline pgprot_t pte_pgprot(pte_t pte) +{ + unsigned long pfn = pte_pfn(pte); + + return __pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^ pte_val(pte)); +} + #ifdef CONFIG_NUMA_BALANCING /* * See the comment in include/linux/pgtable.h diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 9e58749db21d..86eb0bfe3b38 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -272,8 +272,9 @@ void tls_preserve_current_state(void); static inline void start_thread_common(struct pt_regs *regs, unsigned long pc) { + s32 previous_syscall = regs->syscallno; memset(regs, 0, sizeof(*regs)); - forget_syscall(regs); + regs->syscallno = previous_syscall; regs->pc = pc; if (system_uses_irq_prio_masking()) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 42ff95dba6da..7c71358d44c4 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -192,8 +192,6 @@ #define SYS_ID_AA64PFR0_EL1 sys_reg(3, 0, 0, 4, 0) #define SYS_ID_AA64PFR1_EL1 sys_reg(3, 0, 0, 4, 1) -#define SYS_ID_AA64ZFR0_EL1 sys_reg(3, 0, 0, 4, 4) -#define SYS_ID_AA64SMFR0_EL1 sys_reg(3, 0, 0, 4, 5) #define SYS_ID_AA64DFR0_EL1 sys_reg(3, 0, 0, 5, 0) #define SYS_ID_AA64DFR1_EL1 sys_reg(3, 0, 0, 5, 1) @@ -201,9 +199,6 @@ #define SYS_ID_AA64AFR0_EL1 sys_reg(3, 0, 0, 5, 4) #define SYS_ID_AA64AFR1_EL1 sys_reg(3, 0, 0, 5, 5) -#define SYS_ID_AA64ISAR1_EL1 sys_reg(3, 0, 0, 6, 1) -#define SYS_ID_AA64ISAR2_EL1 sys_reg(3, 0, 0, 6, 2) - #define SYS_ID_AA64MMFR0_EL1 sys_reg(3, 0, 0, 7, 0) #define SYS_ID_AA64MMFR1_EL1 sys_reg(3, 0, 0, 7, 1) #define SYS_ID_AA64MMFR2_EL1 sys_reg(3, 0, 0, 7, 2) @@ -410,12 +405,6 @@ #define SYS_MAIR_EL1 sys_reg(3, 0, 10, 2, 0) #define SYS_AMAIR_EL1 sys_reg(3, 0, 10, 3, 0) -#define SYS_LORSA_EL1 sys_reg(3, 0, 10, 4, 0) -#define SYS_LOREA_EL1 sys_reg(3, 0, 10, 4, 1) -#define SYS_LORN_EL1 sys_reg(3, 0, 10, 4, 2) -#define SYS_LORC_EL1 sys_reg(3, 0, 10, 4, 3) -#define SYS_LORID_EL1 sys_reg(3, 0, 10, 4, 7) - #define SYS_VBAR_EL1 sys_reg(3, 0, 12, 0, 0) #define SYS_DISR_EL1 sys_reg(3, 0, 12, 1, 1) @@ -454,16 +443,12 @@ #define SYS_CNTKCTL_EL1 sys_reg(3, 0, 14, 1, 0) #define SYS_CCSIDR_EL1 sys_reg(3, 1, 0, 0, 0) -#define SYS_GMID_EL1 sys_reg(3, 1, 0, 0, 4) #define SYS_AIDR_EL1 sys_reg(3, 1, 0, 0, 7) #define SMIDR_EL1_IMPLEMENTER_SHIFT 24 #define SMIDR_EL1_SMPS_SHIFT 15 #define SMIDR_EL1_AFFINITY_SHIFT 0 -#define SYS_CTR_EL0 sys_reg(3, 3, 0, 0, 1) -#define SYS_DCZID_EL0 sys_reg(3, 3, 0, 0, 7) - #define SYS_RNDR_EL0 sys_reg(3, 3, 2, 4, 0) #define SYS_RNDRRS_EL0 sys_reg(3, 3, 2, 4, 1) @@ -704,66 +689,6 @@ /* Position the attr at the correct index */ #define MAIR_ATTRIDX(attr, idx) ((attr) << ((idx) * 8)) -/* id_aa64isar1 */ -#define ID_AA64ISAR1_I8MM_SHIFT 52 -#define ID_AA64ISAR1_DGH_SHIFT 48 -#define ID_AA64ISAR1_BF16_SHIFT 44 -#define ID_AA64ISAR1_SPECRES_SHIFT 40 -#define ID_AA64ISAR1_SB_SHIFT 36 -#define ID_AA64ISAR1_FRINTTS_SHIFT 32 -#define ID_AA64ISAR1_GPI_SHIFT 28 -#define ID_AA64ISAR1_GPA_SHIFT 24 -#define ID_AA64ISAR1_LRCPC_SHIFT 20 -#define ID_AA64ISAR1_FCMA_SHIFT 16 -#define ID_AA64ISAR1_JSCVT_SHIFT 12 -#define ID_AA64ISAR1_API_SHIFT 8 -#define ID_AA64ISAR1_APA_SHIFT 4 -#define ID_AA64ISAR1_DPB_SHIFT 0 - -#define ID_AA64ISAR1_APA_NI 0x0 -#define ID_AA64ISAR1_APA_ARCHITECTED 0x1 -#define ID_AA64ISAR1_APA_ARCH_EPAC 0x2 -#define ID_AA64ISAR1_APA_ARCH_EPAC2 0x3 -#define ID_AA64ISAR1_APA_ARCH_EPAC2_FPAC 0x4 -#define ID_AA64ISAR1_APA_ARCH_EPAC2_FPAC_CMB 0x5 -#define ID_AA64ISAR1_API_NI 0x0 -#define ID_AA64ISAR1_API_IMP_DEF 0x1 -#define ID_AA64ISAR1_API_IMP_DEF_EPAC 0x2 -#define ID_AA64ISAR1_API_IMP_DEF_EPAC2 0x3 -#define ID_AA64ISAR1_API_IMP_DEF_EPAC2_FPAC 0x4 -#define ID_AA64ISAR1_API_IMP_DEF_EPAC2_FPAC_CMB 0x5 -#define ID_AA64ISAR1_GPA_NI 0x0 -#define ID_AA64ISAR1_GPA_ARCHITECTED 0x1 -#define ID_AA64ISAR1_GPI_NI 0x0 -#define ID_AA64ISAR1_GPI_IMP_DEF 0x1 - -/* id_aa64isar2 */ -#define ID_AA64ISAR2_CLEARBHB_SHIFT 28 -#define ID_AA64ISAR2_APA3_SHIFT 12 -#define ID_AA64ISAR2_GPA3_SHIFT 8 -#define ID_AA64ISAR2_RPRES_SHIFT 4 -#define ID_AA64ISAR2_WFXT_SHIFT 0 - -#define ID_AA64ISAR2_RPRES_8BIT 0x0 -#define ID_AA64ISAR2_RPRES_12BIT 0x1 -/* - * Value 0x1 has been removed from the architecture, and is - * reserved, but has not yet been removed from the ARM ARM - * as of ARM DDI 0487G.b. - */ -#define ID_AA64ISAR2_WFXT_NI 0x0 -#define ID_AA64ISAR2_WFXT_SUPPORTED 0x2 - -#define ID_AA64ISAR2_APA3_NI 0x0 -#define ID_AA64ISAR2_APA3_ARCHITECTED 0x1 -#define ID_AA64ISAR2_APA3_ARCH_EPAC 0x2 -#define ID_AA64ISAR2_APA3_ARCH_EPAC2 0x3 -#define ID_AA64ISAR2_APA3_ARCH_EPAC2_FPAC 0x4 -#define ID_AA64ISAR2_APA3_ARCH_EPAC2_FPAC_CMB 0x5 - -#define ID_AA64ISAR2_GPA3_NI 0x0 -#define ID_AA64ISAR2_GPA3_ARCHITECTED 0x1 - /* id_aa64pfr0 */ #define ID_AA64PFR0_CSV3_SHIFT 60 #define ID_AA64PFR0_CSV2_SHIFT 56 @@ -811,45 +736,6 @@ #define ID_AA64PFR1_MTE 0x2 #define ID_AA64PFR1_MTE_ASYMM 0x3 -/* id_aa64zfr0 */ -#define ID_AA64ZFR0_F64MM_SHIFT 56 -#define ID_AA64ZFR0_F32MM_SHIFT 52 -#define ID_AA64ZFR0_I8MM_SHIFT 44 -#define ID_AA64ZFR0_SM4_SHIFT 40 -#define ID_AA64ZFR0_SHA3_SHIFT 32 -#define ID_AA64ZFR0_BF16_SHIFT 20 -#define ID_AA64ZFR0_BITPERM_SHIFT 16 -#define ID_AA64ZFR0_AES_SHIFT 4 -#define ID_AA64ZFR0_SVEVER_SHIFT 0 - -#define ID_AA64ZFR0_F64MM 0x1 -#define ID_AA64ZFR0_F32MM 0x1 -#define ID_AA64ZFR0_I8MM 0x1 -#define ID_AA64ZFR0_BF16 0x1 -#define ID_AA64ZFR0_SM4 0x1 -#define ID_AA64ZFR0_SHA3 0x1 -#define ID_AA64ZFR0_BITPERM 0x1 -#define ID_AA64ZFR0_AES 0x1 -#define ID_AA64ZFR0_AES_PMULL 0x2 -#define ID_AA64ZFR0_SVEVER_SVE2 0x1 - -/* id_aa64smfr0 */ -#define ID_AA64SMFR0_FA64_SHIFT 63 -#define ID_AA64SMFR0_I16I64_SHIFT 52 -#define ID_AA64SMFR0_F64F64_SHIFT 48 -#define ID_AA64SMFR0_I8I32_SHIFT 36 -#define ID_AA64SMFR0_F16F32_SHIFT 35 -#define ID_AA64SMFR0_B16F32_SHIFT 34 -#define ID_AA64SMFR0_F32F32_SHIFT 32 - -#define ID_AA64SMFR0_FA64 0x1 -#define ID_AA64SMFR0_I16I64 0xf -#define ID_AA64SMFR0_F64F64 0x1 -#define ID_AA64SMFR0_I8I32 0xf -#define ID_AA64SMFR0_F16F32 0x1 -#define ID_AA64SMFR0_B16F32 0x1 -#define ID_AA64SMFR0_F32F32 0x1 - /* id_aa64mmfr0 */ #define ID_AA64MMFR0_ECV_SHIFT 60 #define ID_AA64MMFR0_FGT_SHIFT 56 @@ -902,6 +788,7 @@ /* id_aa64mmfr1 */ #define ID_AA64MMFR1_ECBHB_SHIFT 60 +#define ID_AA64MMFR1_TIDCP1_SHIFT 52 #define ID_AA64MMFR1_HCX_SHIFT 40 #define ID_AA64MMFR1_AFP_SHIFT 44 #define ID_AA64MMFR1_ETS_SHIFT 36 @@ -918,6 +805,9 @@ #define ID_AA64MMFR1_VMIDBITS_8 0 #define ID_AA64MMFR1_VMIDBITS_16 2 +#define ID_AA64MMFR1_TIDCP1_NI 0 +#define ID_AA64MMFR1_TIDCP1_IMP 1 + /* id_aa64mmfr2 */ #define ID_AA64MMFR2_E0PD_SHIFT 60 #define ID_AA64MMFR2_EVT_SHIFT 56 @@ -1084,9 +974,6 @@ #define MVFR2_FPMISC_SHIFT 4 #define MVFR2_SIMDMISC_SHIFT 0 -#define DCZID_DZP_SHIFT 4 -#define DCZID_BS_SHIFT 0 - #define CPACR_EL1_FPEN_EL1EN (BIT(20)) /* enable EL1 access */ #define CPACR_EL1_FPEN_EL0EN (BIT(21)) /* enable EL0 access, if EL1EN set */ @@ -1121,8 +1008,8 @@ #define SYS_RGSR_EL1_SEED_MASK 0xffffUL /* GMID_EL1 field definitions */ -#define SYS_GMID_EL1_BS_SHIFT 0 -#define SYS_GMID_EL1_BS_SIZE 4 +#define GMID_EL1_BS_SHIFT 0 +#define GMID_EL1_BS_SIZE 4 /* TFSR{,E0}_EL1 bit definitions */ #define SYS_TFSR_EL1_TF0_SHIFT 0 @@ -1324,6 +1211,9 @@ #endif +#define SYS_FIELD_GET(reg, field, val) \ + FIELD_GET(reg##_##field##_MASK, val) + #define SYS_FIELD_PREP(reg, field, val) \ FIELD_PREP(reg##_##field##_MASK, val) diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 63f9c828f1a7..2fc9f0861769 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -232,34 +232,34 @@ static inline void __user *__uaccess_mask_ptr(const void __user *ptr) * The "__xxx_error" versions set the third argument to -EFAULT if an error * occurs, and leave it unchanged on success. */ -#define __get_mem_asm(load, reg, x, addr, err) \ +#define __get_mem_asm(load, reg, x, addr, err, type) \ asm volatile( \ "1: " load " " reg "1, [%2]\n" \ "2:\n" \ - _ASM_EXTABLE_UACCESS_ERR_ZERO(1b, 2b, %w0, %w1) \ + _ASM_EXTABLE_##type##ACCESS_ERR_ZERO(1b, 2b, %w0, %w1) \ : "+r" (err), "=&r" (x) \ : "r" (addr)) -#define __raw_get_mem(ldr, x, ptr, err) \ -do { \ - unsigned long __gu_val; \ - switch (sizeof(*(ptr))) { \ - case 1: \ - __get_mem_asm(ldr "b", "%w", __gu_val, (ptr), (err)); \ - break; \ - case 2: \ - __get_mem_asm(ldr "h", "%w", __gu_val, (ptr), (err)); \ - break; \ - case 4: \ - __get_mem_asm(ldr, "%w", __gu_val, (ptr), (err)); \ - break; \ - case 8: \ - __get_mem_asm(ldr, "%x", __gu_val, (ptr), (err)); \ - break; \ - default: \ - BUILD_BUG(); \ - } \ - (x) = (__force __typeof__(*(ptr)))__gu_val; \ +#define __raw_get_mem(ldr, x, ptr, err, type) \ +do { \ + unsigned long __gu_val; \ + switch (sizeof(*(ptr))) { \ + case 1: \ + __get_mem_asm(ldr "b", "%w", __gu_val, (ptr), (err), type); \ + break; \ + case 2: \ + __get_mem_asm(ldr "h", "%w", __gu_val, (ptr), (err), type); \ + break; \ + case 4: \ + __get_mem_asm(ldr, "%w", __gu_val, (ptr), (err), type); \ + break; \ + case 8: \ + __get_mem_asm(ldr, "%x", __gu_val, (ptr), (err), type); \ + break; \ + default: \ + BUILD_BUG(); \ + } \ + (x) = (__force __typeof__(*(ptr)))__gu_val; \ } while (0) /* @@ -274,7 +274,7 @@ do { \ __chk_user_ptr(ptr); \ \ uaccess_ttbr0_enable(); \ - __raw_get_mem("ldtr", __rgu_val, __rgu_ptr, err); \ + __raw_get_mem("ldtr", __rgu_val, __rgu_ptr, err, U); \ uaccess_ttbr0_disable(); \ \ (x) = __rgu_val; \ @@ -314,40 +314,40 @@ do { \ \ __uaccess_enable_tco_async(); \ __raw_get_mem("ldr", *((type *)(__gkn_dst)), \ - (__force type *)(__gkn_src), __gkn_err); \ + (__force type *)(__gkn_src), __gkn_err, K); \ __uaccess_disable_tco_async(); \ \ if (unlikely(__gkn_err)) \ goto err_label; \ } while (0) -#define __put_mem_asm(store, reg, x, addr, err) \ +#define __put_mem_asm(store, reg, x, addr, err, type) \ asm volatile( \ "1: " store " " reg "1, [%2]\n" \ "2:\n" \ - _ASM_EXTABLE_UACCESS_ERR(1b, 2b, %w0) \ + _ASM_EXTABLE_##type##ACCESS_ERR(1b, 2b, %w0) \ : "+r" (err) \ : "r" (x), "r" (addr)) -#define __raw_put_mem(str, x, ptr, err) \ -do { \ - __typeof__(*(ptr)) __pu_val = (x); \ - switch (sizeof(*(ptr))) { \ - case 1: \ - __put_mem_asm(str "b", "%w", __pu_val, (ptr), (err)); \ - break; \ - case 2: \ - __put_mem_asm(str "h", "%w", __pu_val, (ptr), (err)); \ - break; \ - case 4: \ - __put_mem_asm(str, "%w", __pu_val, (ptr), (err)); \ - break; \ - case 8: \ - __put_mem_asm(str, "%x", __pu_val, (ptr), (err)); \ - break; \ - default: \ - BUILD_BUG(); \ - } \ +#define __raw_put_mem(str, x, ptr, err, type) \ +do { \ + __typeof__(*(ptr)) __pu_val = (x); \ + switch (sizeof(*(ptr))) { \ + case 1: \ + __put_mem_asm(str "b", "%w", __pu_val, (ptr), (err), type); \ + break; \ + case 2: \ + __put_mem_asm(str "h", "%w", __pu_val, (ptr), (err), type); \ + break; \ + case 4: \ + __put_mem_asm(str, "%w", __pu_val, (ptr), (err), type); \ + break; \ + case 8: \ + __put_mem_asm(str, "%x", __pu_val, (ptr), (err), type); \ + break; \ + default: \ + BUILD_BUG(); \ + } \ } while (0) /* @@ -362,7 +362,7 @@ do { \ __chk_user_ptr(__rpu_ptr); \ \ uaccess_ttbr0_enable(); \ - __raw_put_mem("sttr", __rpu_val, __rpu_ptr, err); \ + __raw_put_mem("sttr", __rpu_val, __rpu_ptr, err, U); \ uaccess_ttbr0_disable(); \ } while (0) @@ -400,7 +400,7 @@ do { \ \ __uaccess_enable_tco_async(); \ __raw_put_mem("str", *((type *)(__pkn_src)), \ - (__force type *)(__pkn_dst), __pkn_err); \ + (__force type *)(__pkn_dst), __pkn_err, K); \ __uaccess_disable_tco_async(); \ \ if (unlikely(__pkn_err)) \ diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h index 0e80db4327b6..4eb601e7de50 100644 --- a/arch/arm64/include/asm/virt.h +++ b/arch/arm64/include/asm/virt.h @@ -36,9 +36,9 @@ #define HVC_RESET_VECTORS 2 /* - * HVC_VHE_RESTART - Upgrade the CPU from EL1 to EL2, if possible + * HVC_FINALISE_EL2 - Upgrade the CPU from EL1 to EL2, if possible */ -#define HVC_VHE_RESTART 3 +#define HVC_FINALISE_EL2 3 /* Max number of HYP stub hypercalls */ #define HVC_STUB_HCALL_NR 4 @@ -49,6 +49,13 @@ #define BOOT_CPU_MODE_EL1 (0xe11) #define BOOT_CPU_MODE_EL2 (0xe12) +/* + * Flags returned together with the boot mode, but not preserved in + * __boot_cpu_mode. Used by the idreg override code to work out the + * boot state. + */ +#define BOOT_CPU_FLAG_E2H BIT_ULL(32) + #ifndef __ASSEMBLY__ #include <asm/ptrace.h> diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h index 4bb2cc8ac446..1ad2568a2569 100644 --- a/arch/arm64/include/uapi/asm/hwcap.h +++ b/arch/arm64/include/uapi/asm/hwcap.h @@ -19,6 +19,9 @@ /* * HWCAP flags - for AT_HWCAP + * + * Bits 62 and 63 are reserved for use by libc. + * Bits 32-61 are unallocated for potential use by libc. */ #define HWCAP_FP (1 << 0) #define HWCAP_ASIMD (1 << 1) @@ -88,5 +91,6 @@ #define HWCAP2_SME_F32F32 (1 << 29) #define HWCAP2_SME_FA64 (1 << 30) #define HWCAP2_WFXT (1UL << 31) +#define HWCAP2_EBF16 (1UL << 32) #endif /* _UAPI__ASM_HWCAP_H */ diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index fa7981d0d917..1add7b01efa7 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -14,6 +14,11 @@ CFLAGS_REMOVE_return_address.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_syscall.o = -fstack-protector -fstack-protector-strong CFLAGS_syscall.o += -fno-stack-protector +# When KASAN is enabled, a stack trace is recorded for every alloc/free, which +# can significantly impact performance. Avoid instrumenting the stack trace +# collection code to minimize this impact. +KASAN_SANITIZE_stacktrace.o := n + # It's not safe to invoke KCOV when portions of the kernel environment aren't # available or are out-of-sync with HW state. Since `noinstr` doesn't always # inhibit KCOV instrumentation, disable it for the entire compilation unit. @@ -59,7 +64,7 @@ obj-$(CONFIG_ACPI) += acpi.o obj-$(CONFIG_ACPI_NUMA) += acpi_numa.o obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL) += acpi_parking_protocol.o obj-$(CONFIG_PARAVIRT) += paravirt.o -obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o +obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o pi/ obj-$(CONFIG_HIBERNATION) += hibernate.o hibernate-asm.o obj-$(CONFIG_ELF_CORE) += elfcore.o obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o relocate_kernel.o \ diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c index e4dea8db6924..a5a256e3f9fe 100644 --- a/arch/arm64/kernel/acpi.c +++ b/arch/arm64/kernel/acpi.c @@ -351,7 +351,7 @@ void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size) prot = __acpi_get_writethrough_mem_attribute(); } } - return __ioremap(phys, size, prot); + return ioremap_prot(phys, size, pgprot_val(prot)); } /* diff --git a/arch/arm64/kernel/acpi_numa.c b/arch/arm64/kernel/acpi_numa.c index fdfecf0991ce..e51535a5f939 100644 --- a/arch/arm64/kernel/acpi_numa.c +++ b/arch/arm64/kernel/acpi_numa.c @@ -109,7 +109,7 @@ void __init acpi_numa_gicc_affinity_init(struct acpi_srat_gicc_affinity *pa) pxm = pa->proximity_domain; node = acpi_map_pxm_to_node(pxm); - if (node == NUMA_NO_NODE || node >= MAX_NUMNODES) { + if (node == NUMA_NO_NODE) { pr_err("SRAT: Too many proximity domains %d\n", pxm); bad_srat(); return; diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c index 7bbf5104b7b7..9bcaa5eacf16 100644 --- a/arch/arm64/kernel/alternative.c +++ b/arch/arm64/kernel/alternative.c @@ -121,7 +121,7 @@ static void clean_dcache_range_nopatch(u64 start, u64 end) ctr_el0 = read_sanitised_ftr_reg(SYS_CTR_EL0); d_size = 4 << cpuid_feature_extract_unsigned_field(ctr_el0, - CTR_DMINLINE_SHIFT); + CTR_EL0_DminLine_SHIFT); cur = start & ~(d_size - 1); do { /* diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index 6875a16b09d2..fb0e7c7b2e20 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -59,6 +59,7 @@ struct insn_emulation { static LIST_HEAD(insn_emulation); static int nr_insn_emulated __initdata; static DEFINE_RAW_SPINLOCK(insn_emulation_lock); +static DEFINE_MUTEX(insn_emulation_mutex); static void register_emulation_hooks(struct insn_emulation_ops *ops) { @@ -207,10 +208,10 @@ static int emulation_proc_handler(struct ctl_table *table, int write, loff_t *ppos) { int ret = 0; - struct insn_emulation *insn = (struct insn_emulation *) table->data; + struct insn_emulation *insn = container_of(table->data, struct insn_emulation, current_mode); enum insn_emulation_mode prev_mode = insn->current_mode; - table->data = &insn->current_mode; + mutex_lock(&insn_emulation_mutex); ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); if (ret || !write || prev_mode == insn->current_mode) @@ -223,7 +224,7 @@ static int emulation_proc_handler(struct ctl_table *table, int write, update_insn_emulation_mode(insn, INSN_UNDEF); } ret: - table->data = insn; + mutex_unlock(&insn_emulation_mutex); return ret; } @@ -247,7 +248,7 @@ static void __init register_insn_emulation_sysctl(void) sysctl->maxlen = sizeof(int); sysctl->procname = insn->ops->name; - sysctl->data = insn; + sysctl->data = &insn->current_mode; sysctl->extra1 = &insn->min; sysctl->extra2 = &insn->max; sysctl->proc_handler = emulation_proc_handler; diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index c05cc3b6162e..7e6289e709fc 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -187,7 +187,7 @@ has_neoverse_n1_erratum_1542419(const struct arm64_cpu_capabilities *entry, int scope) { u32 midr = read_cpuid_id(); - bool has_dic = read_cpuid_cachetype() & BIT(CTR_DIC_SHIFT); + bool has_dic = read_cpuid_cachetype() & BIT(CTR_EL0_DIC_SHIFT); const struct midr_range range = MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1); WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); @@ -212,6 +212,12 @@ static const struct arm64_cpu_capabilities arm64_repeat_tlbi_list[] = { ERRATA_MIDR_RANGE(MIDR_QCOM_KRYO_4XX_GOLD, 0xc, 0xe, 0xf, 0xe), }, #endif +#ifdef CONFIG_ARM64_ERRATUM_2441009 + { + /* Cortex-A510 r0p0 -> r1p1. Fixed in r1p2 */ + ERRATA_MIDR_RANGE(MIDR_CORTEX_A510, 0, 0, 1, 1), + }, +#endif {}, }; #endif @@ -395,6 +401,14 @@ static struct midr_range trbe_write_out_of_range_cpus[] = { }; #endif /* CONFIG_ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE */ +#ifdef CONFIG_ARM64_ERRATUM_1742098 +static struct midr_range broken_aarch32_aes[] = { + MIDR_RANGE(MIDR_CORTEX_A57, 0, 1, 0xf, 0xf), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A72), + {}, +}; +#endif /* CONFIG_ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE */ + const struct arm64_cpu_capabilities arm64_errata[] = { #ifdef CONFIG_ARM64_WORKAROUND_CLEAN_CACHE { @@ -480,7 +494,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = { #endif #ifdef CONFIG_ARM64_WORKAROUND_REPEAT_TLBI { - .desc = "Qualcomm erratum 1009, or ARM erratum 1286807", + .desc = "Qualcomm erratum 1009, or ARM erratum 1286807, 2441009", .capability = ARM64_WORKAROUND_REPEAT_TLBI, .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, .matches = cpucap_multi_entry_cap_matches, @@ -658,6 +672,14 @@ const struct arm64_cpu_capabilities arm64_errata[] = { ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A510, 0, 0, 1) }, #endif +#ifdef CONFIG_ARM64_ERRATUM_1742098 + { + .desc = "ARM erratum 1742098", + .capability = ARM64_WORKAROUND_1742098, + CAP_MIDR_RANGE_LIST(broken_aarch32_aes), + .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, + }, +#endif { } }; diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 8d88433de81d..ad64cab0a2ba 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -79,6 +79,7 @@ #include <asm/cpufeature.h> #include <asm/cpu_ops.h> #include <asm/fpsimd.h> +#include <asm/hwcap.h> #include <asm/insn.h> #include <asm/kvm_host.h> #include <asm/mmu_context.h> @@ -91,7 +92,7 @@ #include <asm/virt.h> /* Kernel representation of AT_HWCAP and AT_HWCAP2 */ -static unsigned long elf_hwcap __read_mostly; +static DECLARE_BITMAP(elf_hwcap, MAX_CPU_FEATURES) __read_mostly; #ifdef CONFIG_COMPAT #define COMPAT_ELF_HWCAP_DEFAULT \ @@ -209,35 +210,35 @@ static const struct arm64_ftr_bits ftr_id_aa64isar0[] = { }; static const struct arm64_ftr_bits ftr_id_aa64isar1[] = { - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_I8MM_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_DGH_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_BF16_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_SPECRES_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_SB_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_FRINTTS_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_I8MM_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_DGH_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_BF16_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_SPECRES_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_SB_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_FRINTTS_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH), - FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_GPI_SHIFT, 4, 0), + FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_GPI_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH), - FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_GPA_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_LRCPC_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_FCMA_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_JSCVT_SHIFT, 4, 0), + FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_GPA_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_LRCPC_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_FCMA_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_JSCVT_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH), - FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_API_SHIFT, 4, 0), + FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_EL1_API_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH), - FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_APA_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_DPB_SHIFT, 4, 0), + FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_EL1_APA_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_DPB_SHIFT, 4, 0), ARM64_FTR_END, }; static const struct arm64_ftr_bits ftr_id_aa64isar2[] = { - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_HIGHER_SAFE, ID_AA64ISAR2_CLEARBHB_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_HIGHER_SAFE, ID_AA64ISAR2_EL1_BC_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH), - FTR_STRICT, FTR_EXACT, ID_AA64ISAR2_APA3_SHIFT, 4, 0), + FTR_STRICT, FTR_EXACT, ID_AA64ISAR2_EL1_APA3_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH), - FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_GPA3_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_RPRES_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_WFXT_SHIFT, 4, 0), + FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_GPA3_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_RPRES_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_WFxT_SHIFT, 4, 0), ARM64_FTR_END, }; @@ -276,41 +277,41 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = { static const struct arm64_ftr_bits ftr_id_aa64zfr0[] = { ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), - FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_F64MM_SHIFT, 4, 0), + FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_F64MM_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), - FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_F32MM_SHIFT, 4, 0), + FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_F32MM_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), - FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_I8MM_SHIFT, 4, 0), + FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_I8MM_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), - FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_SM4_SHIFT, 4, 0), + FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_SM4_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), - FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_SHA3_SHIFT, 4, 0), + FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_SHA3_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), - FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_BF16_SHIFT, 4, 0), + FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_BF16_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), - FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_BITPERM_SHIFT, 4, 0), + FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_BitPerm_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), - FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_AES_SHIFT, 4, 0), + FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_AES_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), - FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_SVEVER_SHIFT, 4, 0), + FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_SVEver_SHIFT, 4, 0), ARM64_FTR_END, }; static const struct arm64_ftr_bits ftr_id_aa64smfr0[] = { ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), - FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_FA64_SHIFT, 1, 0), + FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_FA64_SHIFT, 1, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), - FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_I16I64_SHIFT, 4, 0), + FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_I16I64_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), - FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_F64F64_SHIFT, 1, 0), + FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_F64F64_SHIFT, 1, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), - FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_I8I32_SHIFT, 4, 0), + FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_I8I32_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), - FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_F16F32_SHIFT, 1, 0), + FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_F16F32_SHIFT, 1, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), - FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_B16F32_SHIFT, 1, 0), + FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_B16F32_SHIFT, 1, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), - FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_F32F32_SHIFT, 1, 0), + FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_F32F32_SHIFT, 1, 0), ARM64_FTR_END, }; @@ -361,6 +362,7 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = { }; static const struct arm64_ftr_bits ftr_id_aa64mmfr1[] = { + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_TIDCP1_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_AFP_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_ETS_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_TWED_SHIFT, 4, 0), @@ -396,18 +398,18 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr2[] = { static const struct arm64_ftr_bits ftr_ctr[] = { ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, 31, 1, 1), /* RES1 */ - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_DIC_SHIFT, 1, 1), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_IDC_SHIFT, 1, 1), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_OR_ZERO_SAFE, CTR_CWG_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_OR_ZERO_SAFE, CTR_ERG_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_DMINLINE_SHIFT, 4, 1), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_EL0_DIC_SHIFT, 1, 1), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_EL0_IDC_SHIFT, 1, 1), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_OR_ZERO_SAFE, CTR_EL0_CWG_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_OR_ZERO_SAFE, CTR_EL0_ERG_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_EL0_DminLine_SHIFT, 4, 1), /* * Linux can handle differing I-cache policies. Userspace JITs will * make use of *minLine. * If we have differing I-cache policies, report it as the weakest - VIPT. */ - ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_EXACT, CTR_L1IP_SHIFT, 2, ICACHE_POLICY_VIPT), /* L1Ip */ - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_IMINLINE_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_EXACT, CTR_EL0_L1Ip_SHIFT, 2, CTR_EL0_L1Ip_VIPT), /* L1Ip */ + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_EL0_IminLine_SHIFT, 4, 0), ARM64_FTR_END, }; @@ -453,13 +455,13 @@ static const struct arm64_ftr_bits ftr_mvfr2[] = { }; static const struct arm64_ftr_bits ftr_dczid[] = { - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, DCZID_DZP_SHIFT, 1, 1), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, DCZID_BS_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, DCZID_EL0_DZP_SHIFT, 1, 1), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, DCZID_EL0_BS_SHIFT, 4, 0), ARM64_FTR_END, }; static const struct arm64_ftr_bits ftr_gmid[] = { - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, SYS_GMID_EL1_BS_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, GMID_EL1_BS_SHIFT, 4, 0), ARM64_FTR_END, }; @@ -561,7 +563,7 @@ static const struct arm64_ftr_bits ftr_id_pfr2[] = { static const struct arm64_ftr_bits ftr_id_dfr0[] = { /* [31:28] TraceFilt */ - S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_PERFMON_SHIFT, 4, 0xf), + S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_DFR0_PERFMON_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_MPROFDBG_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_MMAPTRC_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_COPTRC_SHIFT, 4, 0), @@ -631,7 +633,10 @@ static const struct arm64_ftr_bits ftr_raz[] = { __ARM64_FTR_REG_OVERRIDE(#id, id, table, &no_override) struct arm64_ftr_override __ro_after_init id_aa64mmfr1_override; +struct arm64_ftr_override __ro_after_init id_aa64pfr0_override; struct arm64_ftr_override __ro_after_init id_aa64pfr1_override; +struct arm64_ftr_override __ro_after_init id_aa64zfr0_override; +struct arm64_ftr_override __ro_after_init id_aa64smfr0_override; struct arm64_ftr_override __ro_after_init id_aa64isar1_override; struct arm64_ftr_override __ro_after_init id_aa64isar2_override; @@ -668,11 +673,14 @@ static const struct __ftr_reg_entry { ARM64_FTR_REG(SYS_ID_MMFR5_EL1, ftr_id_mmfr5), /* Op1 = 0, CRn = 0, CRm = 4 */ - ARM64_FTR_REG(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0), + ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0, + &id_aa64pfr0_override), ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64PFR1_EL1, ftr_id_aa64pfr1, &id_aa64pfr1_override), - ARM64_FTR_REG(SYS_ID_AA64ZFR0_EL1, ftr_id_aa64zfr0), - ARM64_FTR_REG(SYS_ID_AA64SMFR0_EL1, ftr_id_aa64smfr0), + ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64ZFR0_EL1, ftr_id_aa64zfr0, + &id_aa64zfr0_override), + ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64SMFR0_EL1, ftr_id_aa64smfr0, + &id_aa64smfr0_override), /* Op1 = 0, CRn = 0, CRm = 5 */ ARM64_FTR_REG(SYS_ID_AA64DFR0_EL1, ftr_id_aa64dfr0), @@ -993,15 +1001,24 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info) if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) init_32bit_cpu_features(&info->aarch32); - if (id_aa64pfr0_sve(info->reg_id_aa64pfr0)) { + if (IS_ENABLED(CONFIG_ARM64_SVE) && + id_aa64pfr0_sve(read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1))) { + info->reg_zcr = read_zcr_features(); init_cpu_ftr_reg(SYS_ZCR_EL1, info->reg_zcr); vec_init_vq_map(ARM64_VEC_SVE); } - if (id_aa64pfr1_sme(info->reg_id_aa64pfr1)) { + if (IS_ENABLED(CONFIG_ARM64_SME) && + id_aa64pfr1_sme(read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1))) { + info->reg_smcr = read_smcr_features(); + /* + * We mask out SMPS since even if the hardware + * supports priorities the kernel does not at present + * and we block access to them. + */ + info->reg_smidr = read_cpuid(SMIDR_EL1) & ~SMIDR_EL1_SMPS; init_cpu_ftr_reg(SYS_SMCR_EL1, info->reg_smcr); - if (IS_ENABLED(CONFIG_ARM64_SME)) - vec_init_vq_map(ARM64_VEC_SME); + vec_init_vq_map(ARM64_VEC_SME); } if (id_aa64pfr1_mte(info->reg_id_aa64pfr1)) @@ -1233,23 +1250,31 @@ void update_cpu_features(int cpu, taint |= check_update_ftr_reg(SYS_ID_AA64SMFR0_EL1, cpu, info->reg_id_aa64smfr0, boot->reg_id_aa64smfr0); - if (id_aa64pfr0_sve(info->reg_id_aa64pfr0)) { + if (IS_ENABLED(CONFIG_ARM64_SVE) && + id_aa64pfr0_sve(read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1))) { + info->reg_zcr = read_zcr_features(); taint |= check_update_ftr_reg(SYS_ZCR_EL1, cpu, info->reg_zcr, boot->reg_zcr); - /* Probe vector lengths, unless we already gave up on SVE */ - if (id_aa64pfr0_sve(read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1)) && - !system_capabilities_finalized()) + /* Probe vector lengths */ + if (!system_capabilities_finalized()) vec_update_vq_map(ARM64_VEC_SVE); } - if (id_aa64pfr1_sme(info->reg_id_aa64pfr1)) { + if (IS_ENABLED(CONFIG_ARM64_SME) && + id_aa64pfr1_sme(read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1))) { + info->reg_smcr = read_smcr_features(); + /* + * We mask out SMPS since even if the hardware + * supports priorities the kernel does not at present + * and we block access to them. + */ + info->reg_smidr = read_cpuid(SMIDR_EL1) & ~SMIDR_EL1_SMPS; taint |= check_update_ftr_reg(SYS_SMCR_EL1, cpu, info->reg_smcr, boot->reg_smcr); - /* Probe vector lengths, unless we already gave up on SME */ - if (id_aa64pfr1_sme(read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1)) && - !system_capabilities_finalized()) + /* Probe vector lengths */ + if (!system_capabilities_finalized()) vec_update_vq_map(ARM64_VEC_SME); } @@ -1480,7 +1505,7 @@ static bool has_cache_idc(const struct arm64_cpu_capabilities *entry, else ctr = read_cpuid_effective_cachetype(); - return ctr & BIT(CTR_IDC_SHIFT); + return ctr & BIT(CTR_EL0_IDC_SHIFT); } static void cpu_emulate_effective_ctr(const struct arm64_cpu_capabilities *__unused) @@ -1491,7 +1516,7 @@ static void cpu_emulate_effective_ctr(const struct arm64_cpu_capabilities *__unu * to the CTR_EL0 on this CPU and emulate it with the real/safe * value. */ - if (!(read_cpuid_cachetype() & BIT(CTR_IDC_SHIFT))) + if (!(read_cpuid_cachetype() & BIT(CTR_EL0_IDC_SHIFT))) sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCT, 0); } @@ -1505,7 +1530,7 @@ static bool has_cache_dic(const struct arm64_cpu_capabilities *entry, else ctr = read_cpuid_cachetype(); - return ctr & BIT(CTR_DIC_SHIFT); + return ctr & BIT(CTR_EL0_DIC_SHIFT); } static bool __maybe_unused @@ -1645,14 +1670,34 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry, } #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 +#define KPTI_NG_TEMP_VA (-(1UL << PMD_SHIFT)) + +extern +void create_kpti_ng_temp_pgd(pgd_t *pgdir, phys_addr_t phys, unsigned long virt, + phys_addr_t size, pgprot_t prot, + phys_addr_t (*pgtable_alloc)(int), int flags); + +static phys_addr_t kpti_ng_temp_alloc; + +static phys_addr_t kpti_ng_pgd_alloc(int shift) +{ + kpti_ng_temp_alloc -= PAGE_SIZE; + return kpti_ng_temp_alloc; +} + static void __nocfi kpti_install_ng_mappings(const struct arm64_cpu_capabilities *__unused) { - typedef void (kpti_remap_fn)(int, int, phys_addr_t); + typedef void (kpti_remap_fn)(int, int, phys_addr_t, unsigned long); extern kpti_remap_fn idmap_kpti_install_ng_mappings; kpti_remap_fn *remap_fn; int cpu = smp_processor_id(); + int levels = CONFIG_PGTABLE_LEVELS; + int order = order_base_2(levels); + u64 kpti_ng_temp_pgd_pa = 0; + pgd_t *kpti_ng_temp_pgd; + u64 alloc = 0; if (__this_cpu_read(this_cpu_vector) == vectors) { const char *v = arm64_get_bp_hardening_vector(EL1_VECTOR_KPTI); @@ -1670,12 +1715,40 @@ kpti_install_ng_mappings(const struct arm64_cpu_capabilities *__unused) remap_fn = (void *)__pa_symbol(function_nocfi(idmap_kpti_install_ng_mappings)); + if (!cpu) { + alloc = __get_free_pages(GFP_ATOMIC | __GFP_ZERO, order); + kpti_ng_temp_pgd = (pgd_t *)(alloc + (levels - 1) * PAGE_SIZE); + kpti_ng_temp_alloc = kpti_ng_temp_pgd_pa = __pa(kpti_ng_temp_pgd); + + // + // Create a minimal page table hierarchy that permits us to map + // the swapper page tables temporarily as we traverse them. + // + // The physical pages are laid out as follows: + // + // +--------+-/-------+-/------ +-\\--------+ + // : PTE[] : | PMD[] : | PUD[] : || PGD[] : + // +--------+-\-------+-\------ +-//--------+ + // ^ + // The first page is mapped into this hierarchy at a PMD_SHIFT + // aligned virtual address, so that we can manipulate the PTE + // level entries while the mapping is active. The first entry + // covers the PTE[] page itself, the remaining entries are free + // to be used as a ad-hoc fixmap. + // + create_kpti_ng_temp_pgd(kpti_ng_temp_pgd, __pa(alloc), + KPTI_NG_TEMP_VA, PAGE_SIZE, PAGE_KERNEL, + kpti_ng_pgd_alloc, 0); + } + cpu_install_idmap(); - remap_fn(cpu, num_online_cpus(), __pa_symbol(swapper_pg_dir)); + remap_fn(cpu, num_online_cpus(), kpti_ng_temp_pgd_pa, KPTI_NG_TEMP_VA); cpu_uninstall_idmap(); - if (!cpu) + if (!cpu) { + free_pages(alloc, order); arm64_use_ng_mappings = true; + } } #else static void @@ -1971,6 +2044,14 @@ static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap) } #endif /* CONFIG_ARM64_MTE */ +static void elf_hwcap_fixup(void) +{ +#ifdef CONFIG_ARM64_ERRATUM_1742098 + if (cpus_have_const_cap(ARM64_WORKAROUND_1742098)) + compat_elf_hwcap2 &= ~COMPAT_HWCAP2_AES; +#endif /* ARM64_ERRATUM_1742098 */ +} + #ifdef CONFIG_KVM static bool is_kvm_protected_mode(const struct arm64_cpu_capabilities *entry, int __unused) { @@ -1978,6 +2059,11 @@ static bool is_kvm_protected_mode(const struct arm64_cpu_capabilities *entry, in } #endif /* CONFIG_KVM */ +static void cpu_trap_el0_impdef(const struct arm64_cpu_capabilities *__unused) +{ + sysreg_clear_set(sctlr_el1, 0, SCTLR_EL1_TIDCP); +} + /* Internal helper functions to match cpu capability type */ static bool cpucap_late_cpu_optional(const struct arm64_cpu_capabilities *cap) @@ -2132,7 +2218,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64ISAR1_EL1, - .field_pos = ID_AA64ISAR1_DPB_SHIFT, + .field_pos = ID_AA64ISAR1_EL1_DPB_SHIFT, .field_width = 4, .min_field_value = 1, }, @@ -2143,7 +2229,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64ISAR1_EL1, .sign = FTR_UNSIGNED, - .field_pos = ID_AA64ISAR1_DPB_SHIFT, + .field_pos = ID_AA64ISAR1_EL1_DPB_SHIFT, .field_width = 4, .min_field_value = 2, }, @@ -2303,7 +2389,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64ISAR1_EL1, - .field_pos = ID_AA64ISAR1_SB_SHIFT, + .field_pos = ID_AA64ISAR1_EL1_SB_SHIFT, .field_width = 4, .sign = FTR_UNSIGNED, .min_field_value = 1, @@ -2315,9 +2401,9 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .type = ARM64_CPUCAP_BOOT_CPU_FEATURE, .sys_reg = SYS_ID_AA64ISAR1_EL1, .sign = FTR_UNSIGNED, - .field_pos = ID_AA64ISAR1_APA_SHIFT, + .field_pos = ID_AA64ISAR1_EL1_APA_SHIFT, .field_width = 4, - .min_field_value = ID_AA64ISAR1_APA_ARCHITECTED, + .min_field_value = ID_AA64ISAR1_EL1_APA_PAuth, .matches = has_address_auth_cpucap, }, { @@ -2326,9 +2412,9 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .type = ARM64_CPUCAP_BOOT_CPU_FEATURE, .sys_reg = SYS_ID_AA64ISAR2_EL1, .sign = FTR_UNSIGNED, - .field_pos = ID_AA64ISAR2_APA3_SHIFT, + .field_pos = ID_AA64ISAR2_EL1_APA3_SHIFT, .field_width = 4, - .min_field_value = ID_AA64ISAR2_APA3_ARCHITECTED, + .min_field_value = ID_AA64ISAR2_EL1_APA3_PAuth, .matches = has_address_auth_cpucap, }, { @@ -2337,9 +2423,9 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .type = ARM64_CPUCAP_BOOT_CPU_FEATURE, .sys_reg = SYS_ID_AA64ISAR1_EL1, .sign = FTR_UNSIGNED, - .field_pos = ID_AA64ISAR1_API_SHIFT, + .field_pos = ID_AA64ISAR1_EL1_API_SHIFT, .field_width = 4, - .min_field_value = ID_AA64ISAR1_API_IMP_DEF, + .min_field_value = ID_AA64ISAR1_EL1_API_PAuth, .matches = has_address_auth_cpucap, }, { @@ -2353,9 +2439,9 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .type = ARM64_CPUCAP_SYSTEM_FEATURE, .sys_reg = SYS_ID_AA64ISAR1_EL1, .sign = FTR_UNSIGNED, - .field_pos = ID_AA64ISAR1_GPA_SHIFT, + .field_pos = ID_AA64ISAR1_EL1_GPA_SHIFT, .field_width = 4, - .min_field_value = ID_AA64ISAR1_GPA_ARCHITECTED, + .min_field_value = ID_AA64ISAR1_EL1_GPA_IMP, .matches = has_cpuid_feature, }, { @@ -2364,9 +2450,9 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .type = ARM64_CPUCAP_SYSTEM_FEATURE, .sys_reg = SYS_ID_AA64ISAR2_EL1, .sign = FTR_UNSIGNED, - .field_pos = ID_AA64ISAR2_GPA3_SHIFT, + .field_pos = ID_AA64ISAR2_EL1_GPA3_SHIFT, .field_width = 4, - .min_field_value = ID_AA64ISAR2_GPA3_ARCHITECTED, + .min_field_value = ID_AA64ISAR2_EL1_GPA3_IMP, .matches = has_cpuid_feature, }, { @@ -2375,9 +2461,9 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .type = ARM64_CPUCAP_SYSTEM_FEATURE, .sys_reg = SYS_ID_AA64ISAR1_EL1, .sign = FTR_UNSIGNED, - .field_pos = ID_AA64ISAR1_GPI_SHIFT, + .field_pos = ID_AA64ISAR1_EL1_GPI_SHIFT, .field_width = 4, - .min_field_value = ID_AA64ISAR1_GPI_IMP_DEF, + .min_field_value = ID_AA64ISAR1_EL1_GPI_IMP, .matches = has_cpuid_feature, }, { @@ -2478,7 +2564,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .type = ARM64_CPUCAP_SYSTEM_FEATURE, .sys_reg = SYS_ID_AA64ISAR1_EL1, .sign = FTR_UNSIGNED, - .field_pos = ID_AA64ISAR1_LRCPC_SHIFT, + .field_pos = ID_AA64ISAR1_EL1_LRCPC_SHIFT, .field_width = 4, .matches = has_cpuid_feature, .min_field_value = 1, @@ -2503,9 +2589,9 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .capability = ARM64_SME_FA64, .sys_reg = SYS_ID_AA64SMFR0_EL1, .sign = FTR_UNSIGNED, - .field_pos = ID_AA64SMFR0_FA64_SHIFT, + .field_pos = ID_AA64SMFR0_EL1_FA64_SHIFT, .field_width = 1, - .min_field_value = ID_AA64SMFR0_FA64, + .min_field_value = ID_AA64SMFR0_EL1_FA64_IMP, .matches = has_cpuid_feature, .cpu_enable = fa64_kernel_enable, }, @@ -2516,10 +2602,22 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .type = ARM64_CPUCAP_SYSTEM_FEATURE, .sys_reg = SYS_ID_AA64ISAR2_EL1, .sign = FTR_UNSIGNED, - .field_pos = ID_AA64ISAR2_WFXT_SHIFT, + .field_pos = ID_AA64ISAR2_EL1_WFxT_SHIFT, .field_width = 4, .matches = has_cpuid_feature, - .min_field_value = ID_AA64ISAR2_WFXT_SUPPORTED, + .min_field_value = ID_AA64ISAR2_EL1_WFxT_IMP, + }, + { + .desc = "Trap EL0 IMPLEMENTATION DEFINED functionality", + .capability = ARM64_HAS_TIDCP1, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .sys_reg = SYS_ID_AA64MMFR1_EL1, + .sign = FTR_UNSIGNED, + .field_pos = ID_AA64MMFR1_TIDCP1_SHIFT, + .field_width = 4, + .min_field_value = ID_AA64MMFR1_TIDCP1_IMP, + .matches = has_cpuid_feature, + .cpu_enable = cpu_trap_el0_impdef, }, {}, }; @@ -2560,33 +2658,33 @@ static const struct arm64_cpu_capabilities arm64_features[] = { #ifdef CONFIG_ARM64_PTR_AUTH static const struct arm64_cpu_capabilities ptr_auth_hwcap_addr_matches[] = { { - HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_APA_SHIFT, + HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_APA_SHIFT, 4, FTR_UNSIGNED, - ID_AA64ISAR1_APA_ARCHITECTED) + ID_AA64ISAR1_EL1_APA_PAuth) }, { - HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_APA3_SHIFT, - 4, FTR_UNSIGNED, ID_AA64ISAR2_APA3_ARCHITECTED) + HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_EL1_APA3_SHIFT, + 4, FTR_UNSIGNED, ID_AA64ISAR2_EL1_APA3_PAuth) }, { - HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_API_SHIFT, - 4, FTR_UNSIGNED, ID_AA64ISAR1_API_IMP_DEF) + HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_API_SHIFT, + 4, FTR_UNSIGNED, ID_AA64ISAR1_EL1_API_PAuth) }, {}, }; static const struct arm64_cpu_capabilities ptr_auth_hwcap_gen_matches[] = { { - HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_GPA_SHIFT, - 4, FTR_UNSIGNED, ID_AA64ISAR1_GPA_ARCHITECTED) + HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_GPA_SHIFT, + 4, FTR_UNSIGNED, ID_AA64ISAR1_EL1_GPA_IMP) }, { - HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_GPA3_SHIFT, - 4, FTR_UNSIGNED, ID_AA64ISAR2_GPA3_ARCHITECTED) + HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_EL1_GPA3_SHIFT, + 4, FTR_UNSIGNED, ID_AA64ISAR2_EL1_GPA3_IMP) }, { - HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_GPI_SHIFT, - 4, FTR_UNSIGNED, ID_AA64ISAR1_GPI_IMP_DEF) + HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_GPI_SHIFT, + 4, FTR_UNSIGNED, ID_AA64ISAR1_EL1_GPI_IMP) }, {}, }; @@ -2614,30 +2712,31 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, 4, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_ASIMD), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, 4, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDHP), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_DIT_SHIFT, 4, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DIT), - HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DPB_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DCPOP), - HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DPB_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_DCPODP), - HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_JSCVT_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_JSCVT), - HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FCMA_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FCMA), - HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_LRCPC), - HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_ILRCPC), - HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FRINTTS_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FRINT), - HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_SB_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SB), - HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_BF16_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_BF16), - HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DGH_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DGH), - HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_I8MM_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_I8MM), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_DPB_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DCPOP), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_DPB_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_DCPODP), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_JSCVT_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_JSCVT), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_FCMA_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FCMA), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_LRCPC_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_LRCPC), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_LRCPC_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_ILRCPC), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_FRINTTS_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FRINT), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_SB_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SB), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_BF16_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_BF16), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_BF16_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_EBF16), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_DGH_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DGH), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_I8MM_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_I8MM), HWCAP_CAP(SYS_ID_AA64MMFR2_EL1, ID_AA64MMFR2_AT_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_USCAT), #ifdef CONFIG_ARM64_SVE HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_SVE_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR0_SVE, CAP_HWCAP, KERNEL_HWCAP_SVE), - HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_SVEVER_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_SVEVER_SVE2, CAP_HWCAP, KERNEL_HWCAP_SVE2), - HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_AES_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_AES, CAP_HWCAP, KERNEL_HWCAP_SVEAES), - HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_AES_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_AES_PMULL, CAP_HWCAP, KERNEL_HWCAP_SVEPMULL), - HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_BITPERM_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_BITPERM, CAP_HWCAP, KERNEL_HWCAP_SVEBITPERM), - HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_BF16_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_BF16, CAP_HWCAP, KERNEL_HWCAP_SVEBF16), - HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_SHA3_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_SHA3, CAP_HWCAP, KERNEL_HWCAP_SVESHA3), - HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_SM4_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_SM4, CAP_HWCAP, KERNEL_HWCAP_SVESM4), - HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_I8MM_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_I8MM, CAP_HWCAP, KERNEL_HWCAP_SVEI8MM), - HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_F32MM_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_F32MM, CAP_HWCAP, KERNEL_HWCAP_SVEF32MM), - HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_F64MM_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_F64MM, CAP_HWCAP, KERNEL_HWCAP_SVEF64MM), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_SVEver_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_SVEver_SVE2, CAP_HWCAP, KERNEL_HWCAP_SVE2), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_AES_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_AES_IMP, CAP_HWCAP, KERNEL_HWCAP_SVEAES), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_AES_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_AES_PMULL128, CAP_HWCAP, KERNEL_HWCAP_SVEPMULL), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_BitPerm_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_BitPerm_IMP, CAP_HWCAP, KERNEL_HWCAP_SVEBITPERM), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_BF16_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_BF16_IMP, CAP_HWCAP, KERNEL_HWCAP_SVEBF16), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_SHA3_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_SHA3_IMP, CAP_HWCAP, KERNEL_HWCAP_SVESHA3), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_SM4_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_SM4_IMP, CAP_HWCAP, KERNEL_HWCAP_SVESM4), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_I8MM_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_I8MM_IMP, CAP_HWCAP, KERNEL_HWCAP_SVEI8MM), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_F32MM_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_F32MM_IMP, CAP_HWCAP, KERNEL_HWCAP_SVEF32MM), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_F64MM_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_F64MM_IMP, CAP_HWCAP, KERNEL_HWCAP_SVEF64MM), #endif HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_SSBS_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_SSBS_PSTATE_INSNS, CAP_HWCAP, KERNEL_HWCAP_SSBS), #ifdef CONFIG_ARM64_BTI @@ -2653,17 +2752,17 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { #endif /* CONFIG_ARM64_MTE */ HWCAP_CAP(SYS_ID_AA64MMFR0_EL1, ID_AA64MMFR0_ECV_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ECV), HWCAP_CAP(SYS_ID_AA64MMFR1_EL1, ID_AA64MMFR1_AFP_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_AFP), - HWCAP_CAP(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_RPRES_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_RPRES), - HWCAP_CAP(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_WFXT_SHIFT, 4, FTR_UNSIGNED, ID_AA64ISAR2_WFXT_SUPPORTED, CAP_HWCAP, KERNEL_HWCAP_WFXT), + HWCAP_CAP(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_EL1_RPRES_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_RPRES), + HWCAP_CAP(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_EL1_WFxT_SHIFT, 4, FTR_UNSIGNED, ID_AA64ISAR2_EL1_WFxT_IMP, CAP_HWCAP, KERNEL_HWCAP_WFXT), #ifdef CONFIG_ARM64_SME HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_SME_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_SME, CAP_HWCAP, KERNEL_HWCAP_SME), - HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_FA64_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_FA64, CAP_HWCAP, KERNEL_HWCAP_SME_FA64), - HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_I16I64_SHIFT, 4, FTR_UNSIGNED, ID_AA64SMFR0_I16I64, CAP_HWCAP, KERNEL_HWCAP_SME_I16I64), - HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_F64F64_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_F64F64, CAP_HWCAP, KERNEL_HWCAP_SME_F64F64), - HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_I8I32_SHIFT, 4, FTR_UNSIGNED, ID_AA64SMFR0_I8I32, CAP_HWCAP, KERNEL_HWCAP_SME_I8I32), - HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_F16F32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_F16F32, CAP_HWCAP, KERNEL_HWCAP_SME_F16F32), - HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_B16F32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_B16F32, CAP_HWCAP, KERNEL_HWCAP_SME_B16F32), - HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_F32F32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_F32F32, CAP_HWCAP, KERNEL_HWCAP_SME_F32F32), + HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_FA64_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_FA64_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_FA64), + HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_I16I64_SHIFT, 4, FTR_UNSIGNED, ID_AA64SMFR0_EL1_I16I64_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I16I64), + HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_F64F64_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_F64F64_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F64F64), + HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_I8I32_SHIFT, 4, FTR_UNSIGNED, ID_AA64SMFR0_EL1_I8I32_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I8I32), + HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_F16F32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_F16F32_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F16F32), + HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_B16F32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_B16F32_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_B16F32), + HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_F32F32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_F32F32_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F32F32), #endif /* CONFIG_ARM64_SME */ {}, }; @@ -3098,14 +3197,12 @@ static bool __maybe_unused __system_matches_cap(unsigned int n) void cpu_set_feature(unsigned int num) { - WARN_ON(num >= MAX_CPU_FEATURES); - elf_hwcap |= BIT(num); + set_bit(num, elf_hwcap); } bool cpu_have_feature(unsigned int num) { - WARN_ON(num >= MAX_CPU_FEATURES); - return elf_hwcap & BIT(num); + return test_bit(num, elf_hwcap); } EXPORT_SYMBOL_GPL(cpu_have_feature); @@ -3116,12 +3213,12 @@ unsigned long cpu_get_elf_hwcap(void) * note that for userspace compatibility we guarantee that bits 62 * and 63 will always be returned as 0. */ - return lower_32_bits(elf_hwcap); + return elf_hwcap[0]; } unsigned long cpu_get_elf_hwcap2(void) { - return upper_32_bits(elf_hwcap); + return elf_hwcap[1]; } static void __init setup_system_capabilities(void) @@ -3143,8 +3240,10 @@ void __init setup_cpu_features(void) setup_system_capabilities(); setup_elf_hwcaps(arm64_elf_hwcaps); - if (system_supports_32bit_el0()) + if (system_supports_32bit_el0()) { setup_elf_hwcaps(compat_elf_hwcaps); + elf_hwcap_fixup(); + } if (system_uses_ttbr0_pan()) pr_info("emulated: Privileged Access Never (PAN) using TTBR0_EL1 switching\n"); @@ -3197,6 +3296,7 @@ static int enable_mismatched_32bit_el0(unsigned int cpu) cpu_active_mask); get_cpu_device(lucky_winner)->offline_disabled = true; setup_elf_hwcaps(compat_elf_hwcaps); + elf_hwcap_fixup(); pr_info("Asymmetric 32-bit EL0 support detected on CPU %u; CPU hot-unplug disabled on CPU %u\n", cpu, lucky_winner); return 0; @@ -3218,7 +3318,7 @@ subsys_initcall_sync(init_32bit_el0_mask); static void __maybe_unused cpu_enable_cnp(struct arm64_cpu_capabilities const *cap) { - cpu_replace_ttbr1(lm_alias(swapper_pg_dir)); + cpu_replace_ttbr1(lm_alias(swapper_pg_dir), idmap_pg_dir); } /* diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c index 3006f4324808..4150e308e99c 100644 --- a/arch/arm64/kernel/cpuidle.c +++ b/arch/arm64/kernel/cpuidle.c @@ -13,35 +13,6 @@ #include <linux/of_device.h> #include <linux/psci.h> -#include <asm/cpuidle.h> -#include <asm/cpu_ops.h> - -int arm_cpuidle_init(unsigned int cpu) -{ - const struct cpu_operations *ops = get_cpu_ops(cpu); - int ret = -EOPNOTSUPP; - - if (ops && ops->cpu_suspend && ops->cpu_init_idle) - ret = ops->cpu_init_idle(cpu); - - return ret; -} - -/** - * arm_cpuidle_suspend() - function to enter a low-power idle state - * @index: argument to pass to CPU suspend operations - * - * Return: 0 on success, -EOPNOTSUPP if CPU suspend hook not initialized, CPU - * operations back-end error code otherwise. - */ -int arm_cpuidle_suspend(int index) -{ - int cpu = smp_processor_id(); - const struct cpu_operations *ops = get_cpu_ops(cpu); - - return ops->cpu_suspend(index); -} - #ifdef CONFIG_ACPI #include <acpi/processor.h> diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 8eff0a34ffd4..d7702f39b4d3 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -33,12 +33,19 @@ DEFINE_PER_CPU(struct cpuinfo_arm64, cpu_data); static struct cpuinfo_arm64 boot_cpu_data; -static const char *icache_policy_str[] = { - [ICACHE_POLICY_VPIPT] = "VPIPT", - [ICACHE_POLICY_RESERVED] = "RESERVED/UNKNOWN", - [ICACHE_POLICY_VIPT] = "VIPT", - [ICACHE_POLICY_PIPT] = "PIPT", -}; +static inline const char *icache_policy_str(int l1ip) +{ + switch (l1ip) { + case CTR_EL0_L1Ip_VPIPT: + return "VPIPT"; + case CTR_EL0_L1Ip_VIPT: + return "VIPT"; + case CTR_EL0_L1Ip_PIPT: + return "PIPT"; + default: + return "RESERVED/UNKNOWN"; + } +} unsigned long __icache_flags; @@ -107,6 +114,7 @@ static const char *const hwcap_str[] = { [KERNEL_HWCAP_SME_F32F32] = "smef32f32", [KERNEL_HWCAP_SME_FA64] = "smefa64", [KERNEL_HWCAP_WFXT] = "wfxt", + [KERNEL_HWCAP_EBF16] = "ebf16", }; #ifdef CONFIG_COMPAT @@ -267,6 +275,7 @@ static struct kobj_type cpuregs_kobj_type = { CPUREGS_ATTR_RO(midr_el1, midr); CPUREGS_ATTR_RO(revidr_el1, revidr); +CPUREGS_ATTR_RO(smidr_el1, smidr); static struct attribute *cpuregs_id_attrs[] = { &cpuregs_attr_midr_el1.attr, @@ -279,6 +288,16 @@ static const struct attribute_group cpuregs_attr_group = { .name = "identification" }; +static struct attribute *sme_cpuregs_id_attrs[] = { + &cpuregs_attr_smidr_el1.attr, + NULL +}; + +static const struct attribute_group sme_cpuregs_attr_group = { + .attrs = sme_cpuregs_id_attrs, + .name = "identification" +}; + static int cpuid_cpu_online(unsigned int cpu) { int rc; @@ -296,6 +315,8 @@ static int cpuid_cpu_online(unsigned int cpu) rc = sysfs_create_group(&info->kobj, &cpuregs_attr_group); if (rc) kobject_del(&info->kobj); + if (system_supports_sme()) + rc = sysfs_merge_group(&info->kobj, &sme_cpuregs_attr_group); out: return rc; } @@ -342,19 +363,19 @@ static void cpuinfo_detect_icache_policy(struct cpuinfo_arm64 *info) u32 l1ip = CTR_L1IP(info->reg_ctr); switch (l1ip) { - case ICACHE_POLICY_PIPT: + case CTR_EL0_L1Ip_PIPT: break; - case ICACHE_POLICY_VPIPT: + case CTR_EL0_L1Ip_VPIPT: set_bit(ICACHEF_VPIPT, &__icache_flags); break; - case ICACHE_POLICY_RESERVED: - case ICACHE_POLICY_VIPT: + case CTR_EL0_L1Ip_VIPT: + default: /* Assume aliasing */ set_bit(ICACHEF_ALIASING, &__icache_flags); break; } - pr_info("Detected %s I-cache on CPU%d\n", icache_policy_str[l1ip], cpu); + pr_info("Detected %s I-cache on CPU%d\n", icache_policy_str(l1ip), cpu); } static void __cpuinfo_store_cpu_32bit(struct cpuinfo_32bit *info) @@ -418,14 +439,6 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) __cpuinfo_store_cpu_32bit(&info->aarch32); - if (IS_ENABLED(CONFIG_ARM64_SVE) && - id_aa64pfr0_sve(info->reg_id_aa64pfr0)) - info->reg_zcr = read_zcr_features(); - - if (IS_ENABLED(CONFIG_ARM64_SME) && - id_aa64pfr1_sme(info->reg_id_aa64pfr1)) - info->reg_smcr = read_smcr_features(); - cpuinfo_detect_icache_policy(info); } diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 5b82b9292400..254fe31c03a0 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -636,18 +636,28 @@ alternative_else_nop_endif */ .endm - .macro tramp_data_page dst - adr_l \dst, .entry.tramp.text - sub \dst, \dst, PAGE_SIZE - .endm - - .macro tramp_data_read_var dst, var -#ifdef CONFIG_RANDOMIZE_BASE - tramp_data_page \dst - add \dst, \dst, #:lo12:__entry_tramp_data_\var - ldr \dst, [\dst] + .macro tramp_data_read_var dst, var +#ifdef CONFIG_RELOCATABLE + ldr \dst, .L__tramp_data_\var + .ifndef .L__tramp_data_\var + .pushsection ".entry.tramp.rodata", "a", %progbits + .align 3 +.L__tramp_data_\var: + .quad \var + .popsection + .endif #else - ldr \dst, =\var + /* + * As !RELOCATABLE implies !RANDOMIZE_BASE the address is always a + * compile time constant (and hence not secret and not worth hiding). + * + * As statically allocated kernel code and data always live in the top + * 47 bits of the address space we can sign-extend bit 47 and avoid an + * instruction to load the upper 16 bits (which must be 0xFFFF). + */ + movz \dst, :abs_g2_s:\var + movk \dst, :abs_g1_nc:\var + movk \dst, :abs_g0_nc:\var #endif .endm @@ -695,7 +705,7 @@ alternative_else_nop_endif msr vbar_el1, x30 isb .else - ldr x30, =vectors + adr_l x30, vectors .endif // \kpti == 1 .if \bhb == BHB_MITIGATION_FW @@ -764,24 +774,7 @@ SYM_CODE_END(tramp_exit_native) SYM_CODE_START(tramp_exit_compat) tramp_exit 32 SYM_CODE_END(tramp_exit_compat) - - .ltorg .popsection // .entry.tramp.text -#ifdef CONFIG_RANDOMIZE_BASE - .pushsection ".rodata", "a" - .align PAGE_SHIFT -SYM_DATA_START(__entry_tramp_data_start) -__entry_tramp_data_vectors: - .quad vectors -#ifdef CONFIG_ARM_SDE_INTERFACE -__entry_tramp_data___sdei_asm_handler: - .quad __sdei_asm_handler -#endif /* CONFIG_ARM_SDE_INTERFACE */ -__entry_tramp_data_this_cpu_vector: - .quad this_cpu_vector -SYM_DATA_END(__entry_tramp_data_start) - .popsection // .rodata -#endif /* CONFIG_RANDOMIZE_BASE */ #endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */ /* @@ -932,7 +925,6 @@ NOKPROBE(call_on_irq_stack) * This clobbers x4, __sdei_handler() will restore this from firmware's * copy. */ -.ltorg .pushsection ".entry.tramp.text", "ax" SYM_CODE_START(__sdei_asm_entry_trampoline) mrs x4, ttbr1_el1 @@ -967,7 +959,6 @@ SYM_CODE_START(__sdei_asm_exit_trampoline) 1: sdei_handler_exit exit_mode=x2 SYM_CODE_END(__sdei_asm_exit_trampoline) NOKPROBE(__sdei_asm_exit_trampoline) - .ltorg .popsection // .entry.tramp.text #endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */ diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index aecf3071efdd..dd63ffc3a2fa 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -445,7 +445,6 @@ static void fpsimd_save(void) if (system_supports_sme()) { u64 *svcr = last->svcr; - *svcr = read_sysreg_s(SYS_SVCR); *svcr = read_sysreg_s(SYS_SVCR); diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 6a98f1a38c29..cefe6a73ee54 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -37,8 +37,6 @@ #include "efi-header.S" -#define __PHYS_OFFSET KERNEL_START - #if (PAGE_OFFSET & 0x1fffff) != 0 #error PAGE_OFFSET must be at least 2MB aligned #endif @@ -51,9 +49,6 @@ * MMU = off, D-cache = off, I-cache = on or off, * x0 = physical address to the FDT blob. * - * This code is mostly position independent so you call this at - * __pa(PAGE_OFFSET). - * * Note that the callee-saved registers are used for storing variables * that are useful before the MMU is enabled. The allocations are described * in the entry routines. @@ -82,25 +77,34 @@ * primary lowlevel boot path: * * Register Scope Purpose + * x20 primary_entry() .. __primary_switch() CPU boot mode * x21 primary_entry() .. start_kernel() FDT pointer passed at boot in x0 + * x22 create_idmap() .. start_kernel() ID map VA of the DT blob * x23 primary_entry() .. start_kernel() physical misalignment/KASLR offset - * x28 __create_page_tables() callee preserved temp register - * x19/x20 __primary_switch() callee preserved temp registers - * x24 __primary_switch() .. relocate_kernel() current RELR displacement + * x24 __primary_switch() linear map KASLR seed + * x25 primary_entry() .. start_kernel() supported VA size + * x28 create_idmap() callee preserved temp register */ SYM_CODE_START(primary_entry) bl preserve_boot_args bl init_kernel_el // w0=cpu_boot_mode - adrp x23, __PHYS_OFFSET - and x23, x23, MIN_KIMG_ALIGN - 1 // KASLR offset, defaults to 0 - bl set_cpu_boot_mode_flag - bl __create_page_tables + mov x20, x0 + bl create_idmap + /* * The following calls CPU setup code, see arch/arm64/mm/proc.S for * details. * On return, the CPU will be ready for the MMU to be turned on and * the TCR will have been set. */ +#if VA_BITS > 48 + mrs_s x0, SYS_ID_AA64MMFR2_EL1 + tst x0, #0xf << ID_AA64MMFR2_LVA_SHIFT + mov x0, #VA_BITS + mov x25, #VA_BITS_MIN + csel x25, x25, x0, eq + mov x0, x25 +#endif bl __cpu_setup // initialise processor b __primary_switch SYM_CODE_END(primary_entry) @@ -122,28 +126,16 @@ SYM_CODE_START_LOCAL(preserve_boot_args) b dcache_inval_poc // tail call SYM_CODE_END(preserve_boot_args) -/* - * Macro to create a table entry to the next page. - * - * tbl: page table address - * virt: virtual address - * shift: #imm page table shift - * ptrs: #imm pointers per table page - * - * Preserves: virt - * Corrupts: ptrs, tmp1, tmp2 - * Returns: tbl -> next level table page address - */ - .macro create_table_entry, tbl, virt, shift, ptrs, tmp1, tmp2 - add \tmp1, \tbl, #PAGE_SIZE - phys_to_pte \tmp2, \tmp1 - orr \tmp2, \tmp2, #PMD_TYPE_TABLE // address of next table and entry type - lsr \tmp1, \virt, #\shift - sub \ptrs, \ptrs, #1 - and \tmp1, \tmp1, \ptrs // table index - str \tmp2, [\tbl, \tmp1, lsl #3] - add \tbl, \tbl, #PAGE_SIZE // next level table page - .endm +SYM_FUNC_START_LOCAL(clear_page_tables) + /* + * Clear the init page tables. + */ + adrp x0, init_pg_dir + adrp x1, init_pg_end + sub x2, x1, x0 + mov x1, xzr + b __pi_memset // tail call +SYM_FUNC_END(clear_page_tables) /* * Macro to populate page table entries, these entries can be pointers to the next level @@ -179,31 +171,20 @@ SYM_CODE_END(preserve_boot_args) * vstart: virtual address of start of range * vend: virtual address of end of range - we map [vstart, vend] * shift: shift used to transform virtual address into index - * ptrs: number of entries in page table + * order: #imm 2log(number of entries in page table) * istart: index in table corresponding to vstart * iend: index in table corresponding to vend * count: On entry: how many extra entries were required in previous level, scales * our end index. * On exit: returns how many extra entries required for next page table level * - * Preserves: vstart, vend, shift, ptrs + * Preserves: vstart, vend * Returns: istart, iend, count */ - .macro compute_indices, vstart, vend, shift, ptrs, istart, iend, count - lsr \iend, \vend, \shift - mov \istart, \ptrs - sub \istart, \istart, #1 - and \iend, \iend, \istart // iend = (vend >> shift) & (ptrs - 1) - mov \istart, \ptrs - mul \istart, \istart, \count - add \iend, \iend, \istart // iend += count * ptrs - // our entries span multiple tables - - lsr \istart, \vstart, \shift - mov \count, \ptrs - sub \count, \count, #1 - and \istart, \istart, \count - + .macro compute_indices, vstart, vend, shift, order, istart, iend, count + ubfx \istart, \vstart, \shift, \order + ubfx \iend, \vend, \shift, \order + add \iend, \iend, \count, lsl \order sub \count, \iend, \istart .endm @@ -218,119 +199,116 @@ SYM_CODE_END(preserve_boot_args) * vend: virtual address of end of range - we map [vstart, vend - 1] * flags: flags to use to map last level entries * phys: physical address corresponding to vstart - physical memory is contiguous - * pgds: the number of pgd entries + * order: #imm 2log(number of entries in PGD table) + * + * If extra_shift is set, an extra level will be populated if the end address does + * not fit in 'extra_shift' bits. This assumes vend is in the TTBR0 range. * * Temporaries: istart, iend, tmp, count, sv - these need to be different registers * Preserves: vstart, flags * Corrupts: tbl, rtbl, vend, istart, iend, tmp, count, sv */ - .macro map_memory, tbl, rtbl, vstart, vend, flags, phys, pgds, istart, iend, tmp, count, sv + .macro map_memory, tbl, rtbl, vstart, vend, flags, phys, order, istart, iend, tmp, count, sv, extra_shift sub \vend, \vend, #1 add \rtbl, \tbl, #PAGE_SIZE - mov \sv, \rtbl mov \count, #0 - compute_indices \vstart, \vend, #PGDIR_SHIFT, \pgds, \istart, \iend, \count + + .ifnb \extra_shift + tst \vend, #~((1 << (\extra_shift)) - 1) + b.eq .L_\@ + compute_indices \vstart, \vend, #\extra_shift, #(PAGE_SHIFT - 3), \istart, \iend, \count + mov \sv, \rtbl populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp mov \tbl, \sv + .endif +.L_\@: + compute_indices \vstart, \vend, #PGDIR_SHIFT, #\order, \istart, \iend, \count mov \sv, \rtbl + populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp + mov \tbl, \sv #if SWAPPER_PGTABLE_LEVELS > 3 - compute_indices \vstart, \vend, #PUD_SHIFT, #PTRS_PER_PUD, \istart, \iend, \count + compute_indices \vstart, \vend, #PUD_SHIFT, #(PAGE_SHIFT - 3), \istart, \iend, \count + mov \sv, \rtbl populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp mov \tbl, \sv - mov \sv, \rtbl #endif #if SWAPPER_PGTABLE_LEVELS > 2 - compute_indices \vstart, \vend, #SWAPPER_TABLE_SHIFT, #PTRS_PER_PMD, \istart, \iend, \count + compute_indices \vstart, \vend, #SWAPPER_TABLE_SHIFT, #(PAGE_SHIFT - 3), \istart, \iend, \count + mov \sv, \rtbl populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp mov \tbl, \sv #endif - compute_indices \vstart, \vend, #SWAPPER_BLOCK_SHIFT, #PTRS_PER_PTE, \istart, \iend, \count - bic \count, \phys, #SWAPPER_BLOCK_SIZE - 1 - populate_entries \tbl, \count, \istart, \iend, \flags, #SWAPPER_BLOCK_SIZE, \tmp + compute_indices \vstart, \vend, #SWAPPER_BLOCK_SHIFT, #(PAGE_SHIFT - 3), \istart, \iend, \count + bic \rtbl, \phys, #SWAPPER_BLOCK_SIZE - 1 + populate_entries \tbl, \rtbl, \istart, \iend, \flags, #SWAPPER_BLOCK_SIZE, \tmp .endm /* - * Setup the initial page tables. We only setup the barest amount which is - * required to get the kernel running. The following sections are required: - * - identity mapping to enable the MMU (low address, TTBR0) - * - first few MB of the kernel linear mapping to jump to once the MMU has - * been enabled + * Remap a subregion created with the map_memory macro with modified attributes + * or output address. The entire remapped region must have been covered in the + * invocation of map_memory. + * + * x0: last level table address (returned in first argument to map_memory) + * x1: start VA of the existing mapping + * x2: start VA of the region to update + * x3: end VA of the region to update (exclusive) + * x4: start PA associated with the region to update + * x5: attributes to set on the updated region + * x6: order of the last level mappings */ -SYM_FUNC_START_LOCAL(__create_page_tables) - mov x28, lr +SYM_FUNC_START_LOCAL(remap_region) + sub x3, x3, #1 // make end inclusive - /* - * Invalidate the init page tables to avoid potential dirty cache lines - * being evicted. Other page tables are allocated in rodata as part of - * the kernel image, and thus are clean to the PoC per the boot - * protocol. - */ - adrp x0, init_pg_dir - adrp x1, init_pg_end - bl dcache_inval_poc + // Get the index offset for the start of the last level table + lsr x1, x1, x6 + bfi x1, xzr, #0, #PAGE_SHIFT - 3 - /* - * Clear the init page tables. - */ - adrp x0, init_pg_dir - adrp x1, init_pg_end - sub x1, x1, x0 -1: stp xzr, xzr, [x0], #16 - stp xzr, xzr, [x0], #16 - stp xzr, xzr, [x0], #16 - stp xzr, xzr, [x0], #16 - subs x1, x1, #64 - b.ne 1b + // Derive the start and end indexes into the last level table + // associated with the provided region + lsr x2, x2, x6 + lsr x3, x3, x6 + sub x2, x2, x1 + sub x3, x3, x1 - mov x7, SWAPPER_MM_MMUFLAGS + mov x1, #1 + lsl x6, x1, x6 // block size at this level - /* - * Create the identity mapping. - */ - adrp x0, idmap_pg_dir - adrp x3, __idmap_text_start // __pa(__idmap_text_start) - -#ifdef CONFIG_ARM64_VA_BITS_52 - mrs_s x6, SYS_ID_AA64MMFR2_EL1 - and x6, x6, #(0xf << ID_AA64MMFR2_LVA_SHIFT) - mov x5, #52 - cbnz x6, 1f -#endif - mov x5, #VA_BITS_MIN -1: - adr_l x6, vabits_actual - str x5, [x6] - dmb sy - dc ivac, x6 // Invalidate potentially stale cache line + populate_entries x0, x4, x2, x3, x5, x6, x7 + ret +SYM_FUNC_END(remap_region) +SYM_FUNC_START_LOCAL(create_idmap) + mov x28, lr /* - * VA_BITS may be too small to allow for an ID mapping to be created - * that covers system RAM if that is located sufficiently high in the - * physical address space. So for the ID map, use an extended virtual - * range in that case, and configure an additional translation level - * if needed. + * The ID map carries a 1:1 mapping of the physical address range + * covered by the loaded image, which could be anywhere in DRAM. This + * means that the required size of the VA (== PA) space is decided at + * boot time, and could be more than the configured size of the VA + * space for ordinary kernel and user space mappings. + * + * There are three cases to consider here: + * - 39 <= VA_BITS < 48, and the ID map needs up to 48 VA bits to cover + * the placement of the image. In this case, we configure one extra + * level of translation on the fly for the ID map only. (This case + * also covers 42-bit VA/52-bit PA on 64k pages). * - * Calculate the maximum allowed value for TCR_EL1.T0SZ so that the - * entire ID map region can be mapped. As T0SZ == (64 - #bits used), - * this number conveniently equals the number of leading zeroes in - * the physical address of __idmap_text_end. + * - VA_BITS == 48, and the ID map needs more than 48 VA bits. This can + * only happen when using 64k pages, in which case we need to extend + * the root level table rather than add a level. Note that we can + * treat this case as 'always extended' as long as we take care not + * to program an unsupported T0SZ value into the TCR register. + * + * - Combinations that would require two additional levels of + * translation are not supported, e.g., VA_BITS==36 on 16k pages, or + * VA_BITS==39/4k pages with 5-level paging, where the input address + * requires more than 47 or 48 bits, respectively. */ - adrp x5, __idmap_text_end - clz x5, x5 - cmp x5, TCR_T0SZ(VA_BITS_MIN) // default T0SZ small enough? - b.ge 1f // .. then skip VA range extension - - adr_l x6, idmap_t0sz - str x5, [x6] - dmb sy - dc ivac, x6 // Invalidate potentially stale cache line - #if (VA_BITS < 48) +#define IDMAP_PGD_ORDER (VA_BITS - PGDIR_SHIFT) #define EXTRA_SHIFT (PGDIR_SHIFT + PAGE_SHIFT - 3) -#define EXTRA_PTRS (1 << (PHYS_MASK_SHIFT - EXTRA_SHIFT)) /* * If VA_BITS < 48, we have to configure an additional table level. @@ -342,36 +320,40 @@ SYM_FUNC_START_LOCAL(__create_page_tables) #if VA_BITS != EXTRA_SHIFT #error "Mismatch between VA_BITS and page size/number of translation levels" #endif - - mov x4, EXTRA_PTRS - create_table_entry x0, x3, EXTRA_SHIFT, x4, x5, x6 #else +#define IDMAP_PGD_ORDER (PHYS_MASK_SHIFT - PGDIR_SHIFT) +#define EXTRA_SHIFT /* * If VA_BITS == 48, we don't have to configure an additional * translation level, but the top-level table has more entries. */ - mov x4, #1 << (PHYS_MASK_SHIFT - PGDIR_SHIFT) - str_l x4, idmap_ptrs_per_pgd, x5 #endif -1: - ldr_l x4, idmap_ptrs_per_pgd - adr_l x6, __idmap_text_end // __pa(__idmap_text_end) - - map_memory x0, x1, x3, x6, x7, x3, x4, x10, x11, x12, x13, x14 - - /* - * Map the kernel image (starting with PHYS_OFFSET). - */ - adrp x0, init_pg_dir - mov_q x5, KIMAGE_VADDR // compile time __va(_text) - add x5, x5, x23 // add KASLR displacement - mov x4, PTRS_PER_PGD - adrp x6, _end // runtime __pa(_end) - adrp x3, _text // runtime __pa(_text) - sub x6, x6, x3 // _end - _text - add x6, x6, x5 // runtime __va(_end) - - map_memory x0, x1, x5, x6, x7, x3, x4, x10, x11, x12, x13, x14 + adrp x0, init_idmap_pg_dir + adrp x3, _text + adrp x6, _end + MAX_FDT_SIZE + SWAPPER_BLOCK_SIZE + mov x7, SWAPPER_RX_MMUFLAGS + + map_memory x0, x1, x3, x6, x7, x3, IDMAP_PGD_ORDER, x10, x11, x12, x13, x14, EXTRA_SHIFT + + /* Remap the kernel page tables r/w in the ID map */ + adrp x1, _text + adrp x2, init_pg_dir + adrp x3, init_pg_end + bic x4, x2, #SWAPPER_BLOCK_SIZE - 1 + mov x5, SWAPPER_RW_MMUFLAGS + mov x6, #SWAPPER_BLOCK_SHIFT + bl remap_region + + /* Remap the FDT after the kernel image */ + adrp x1, _text + adrp x22, _end + SWAPPER_BLOCK_SIZE + bic x2, x22, #SWAPPER_BLOCK_SIZE - 1 + bfi x22, x21, #0, #SWAPPER_BLOCK_SHIFT // remapped FDT address + add x3, x2, #MAX_FDT_SIZE + SWAPPER_BLOCK_SIZE + bic x4, x21, #SWAPPER_BLOCK_SIZE - 1 + mov x5, SWAPPER_RW_MMUFLAGS + mov x6, #SWAPPER_BLOCK_SHIFT + bl remap_region /* * Since the page tables have been populated with non-cacheable @@ -380,16 +362,27 @@ SYM_FUNC_START_LOCAL(__create_page_tables) */ dmb sy - adrp x0, idmap_pg_dir - adrp x1, idmap_pg_end + adrp x0, init_idmap_pg_dir + adrp x1, init_idmap_pg_end bl dcache_inval_poc + ret x28 +SYM_FUNC_END(create_idmap) +SYM_FUNC_START_LOCAL(create_kernel_mapping) adrp x0, init_pg_dir - adrp x1, init_pg_end - bl dcache_inval_poc + mov_q x5, KIMAGE_VADDR // compile time __va(_text) + add x5, x5, x23 // add KASLR displacement + adrp x6, _end // runtime __pa(_end) + adrp x3, _text // runtime __pa(_text) + sub x6, x6, x3 // _end - _text + add x6, x6, x5 // runtime __va(_end) + mov x7, SWAPPER_RW_MMUFLAGS - ret x28 -SYM_FUNC_END(__create_page_tables) + map_memory x0, x1, x5, x6, x7, x3, (VA_BITS - PGDIR_SHIFT), x10, x11, x12, x13, x14 + + dsb ishst // sync with page table walker + ret +SYM_FUNC_END(create_kernel_mapping) /* * Initialize CPU registers with task-specific and cpu-specific context. @@ -420,7 +413,7 @@ SYM_FUNC_END(__create_page_tables) /* * The following fragment of code is executed with the MMU enabled. * - * x0 = __PHYS_OFFSET + * x0 = __pa(KERNEL_START) */ SYM_FUNC_START_LOCAL(__primary_switched) adr_l x4, init_task @@ -439,6 +432,9 @@ SYM_FUNC_START_LOCAL(__primary_switched) sub x4, x4, x0 // the kernel virtual and str_l x4, kimage_voffset, x5 // physical mappings + mov x0, x20 + bl set_cpu_boot_mode_flag + // Clear BSS adr_l x0, __bss_start mov x1, xzr @@ -447,35 +443,30 @@ SYM_FUNC_START_LOCAL(__primary_switched) bl __pi_memset dsb ishst // Make zero page visible to PTW +#if VA_BITS > 48 + adr_l x8, vabits_actual // Set this early so KASAN early init + str x25, [x8] // ... observes the correct value + dc civac, x8 // Make visible to booting secondaries +#endif + +#ifdef CONFIG_RANDOMIZE_BASE + adrp x5, memstart_offset_seed // Save KASLR linear map seed + strh w24, [x5, :lo12:memstart_offset_seed] +#endif #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) bl kasan_early_init #endif mov x0, x21 // pass FDT address in x0 bl early_fdt_map // Try mapping the FDT early + mov x0, x20 // pass the full boot status bl init_feature_override // Parse cpu feature overrides -#ifdef CONFIG_RANDOMIZE_BASE - tst x23, ~(MIN_KIMG_ALIGN - 1) // already running randomized? - b.ne 0f - bl kaslr_early_init // parse FDT for KASLR options - cbz x0, 0f // KASLR disabled? just proceed - orr x23, x23, x0 // record KASLR offset - ldp x29, x30, [sp], #16 // we must enable KASLR, return - ret // to __primary_switch() -0: -#endif - bl switch_to_vhe // Prefer VHE if possible + mov x0, x20 + bl finalise_el2 // Prefer VHE if possible ldp x29, x30, [sp], #16 bl start_kernel ASM_BUG() SYM_FUNC_END(__primary_switched) - .pushsection ".rodata", "a" -SYM_DATA_START(kimage_vaddr) - .quad _text -SYM_DATA_END(kimage_vaddr) -EXPORT_SYMBOL(kimage_vaddr) - .popsection - /* * end early head section, begin head code that is also used for * hotplug and needs to have the same protections as the text region @@ -490,8 +481,9 @@ EXPORT_SYMBOL(kimage_vaddr) * Since we cannot always rely on ERET synchronizing writes to sysregs (e.g. if * SCTLR_ELx.EOS is clear), we place an ISB prior to ERET. * - * Returns either BOOT_CPU_MODE_EL1 or BOOT_CPU_MODE_EL2 in w0 if - * booted in EL1 or EL2 respectively. + * Returns either BOOT_CPU_MODE_EL1 or BOOT_CPU_MODE_EL2 in x0 if + * booted in EL1 or EL2 respectively, with the top 32 bits containing + * potential context flags. These flags are *not* stored in __boot_cpu_mode. */ SYM_FUNC_START(init_kernel_el) mrs x0, CurrentEL @@ -520,6 +512,8 @@ SYM_INNER_LABEL(init_el2, SYM_L_LOCAL) msr vbar_el2, x0 isb + mov_q x1, INIT_SCTLR_EL1_MMU_OFF + /* * Fruity CPUs seem to have HCR_EL2.E2H set to RES1, * making it impossible to start in nVHE mode. Is that @@ -529,34 +523,19 @@ SYM_INNER_LABEL(init_el2, SYM_L_LOCAL) and x0, x0, #HCR_E2H cbz x0, 1f - /* Switching to VHE requires a sane SCTLR_EL1 as a start */ - mov_q x0, INIT_SCTLR_EL1_MMU_OFF - msr_s SYS_SCTLR_EL12, x0 - - /* - * Force an eret into a helper "function", and let it return - * to our original caller... This makes sure that we have - * initialised the basic PSTATE state. - */ - mov x0, #INIT_PSTATE_EL2 - msr spsr_el1, x0 - adr x0, __cpu_stick_to_vhe - msr elr_el1, x0 - eret + /* Set a sane SCTLR_EL1, the VHE way */ + msr_s SYS_SCTLR_EL12, x1 + mov x2, #BOOT_CPU_FLAG_E2H + b 2f 1: - mov_q x0, INIT_SCTLR_EL1_MMU_OFF - msr sctlr_el1, x0 - + msr sctlr_el1, x1 + mov x2, xzr +2: msr elr_el2, lr mov w0, #BOOT_CPU_MODE_EL2 + orr x0, x0, x2 eret - -__cpu_stick_to_vhe: - mov x0, #HVC_VHE_RESTART - hvc #0 - mov x0, #BOOT_CPU_MODE_EL2 - ret SYM_FUNC_END(init_kernel_el) /* @@ -569,52 +548,21 @@ SYM_FUNC_START_LOCAL(set_cpu_boot_mode_flag) b.ne 1f add x1, x1, #4 1: str w0, [x1] // Save CPU boot mode - dmb sy - dc ivac, x1 // Invalidate potentially stale cache line ret SYM_FUNC_END(set_cpu_boot_mode_flag) -/* - * These values are written with the MMU off, but read with the MMU on. - * Writers will invalidate the corresponding address, discarding up to a - * 'Cache Writeback Granule' (CWG) worth of data. The linker script ensures - * sufficient alignment that the CWG doesn't overlap another section. - */ - .pushsection ".mmuoff.data.write", "aw" -/* - * We need to find out the CPU boot mode long after boot, so we need to - * store it in a writable variable. - * - * This is not in .bss, because we set it sufficiently early that the boot-time - * zeroing of .bss would clobber it. - */ -SYM_DATA_START(__boot_cpu_mode) - .long BOOT_CPU_MODE_EL2 - .long BOOT_CPU_MODE_EL1 -SYM_DATA_END(__boot_cpu_mode) -/* - * The booting CPU updates the failed status @__early_cpu_boot_status, - * with MMU turned off. - */ -SYM_DATA_START(__early_cpu_boot_status) - .quad 0 -SYM_DATA_END(__early_cpu_boot_status) - - .popsection - /* * This provides a "holding pen" for platforms to hold all secondary * cores are held until we're ready for them to initialise. */ SYM_FUNC_START(secondary_holding_pen) bl init_kernel_el // w0=cpu_boot_mode - bl set_cpu_boot_mode_flag - mrs x0, mpidr_el1 + mrs x2, mpidr_el1 mov_q x1, MPIDR_HWID_BITMASK - and x0, x0, x1 + and x2, x2, x1 adr_l x3, secondary_holding_pen_release pen: ldr x4, [x3] - cmp x4, x0 + cmp x4, x2 b.eq secondary_startup wfe b pen @@ -626,7 +574,6 @@ SYM_FUNC_END(secondary_holding_pen) */ SYM_FUNC_START(secondary_entry) bl init_kernel_el // w0=cpu_boot_mode - bl set_cpu_boot_mode_flag b secondary_startup SYM_FUNC_END(secondary_entry) @@ -634,16 +581,24 @@ SYM_FUNC_START_LOCAL(secondary_startup) /* * Common entry point for secondary CPUs. */ - bl switch_to_vhe + mov x20, x0 // preserve boot mode + bl finalise_el2 bl __cpu_secondary_check52bitva +#if VA_BITS > 48 + ldr_l x0, vabits_actual +#endif bl __cpu_setup // initialise processor adrp x1, swapper_pg_dir + adrp x2, idmap_pg_dir bl __enable_mmu ldr x8, =__secondary_switched br x8 SYM_FUNC_END(secondary_startup) SYM_FUNC_START_LOCAL(__secondary_switched) + mov x0, x20 + bl set_cpu_boot_mode_flag + str_l xzr, __early_cpu_boot_status, x3 adr_l x5, vectors msr vbar_el1, x5 isb @@ -691,6 +646,7 @@ SYM_FUNC_END(__secondary_too_slow) * * x0 = SCTLR_EL1 value for turning on the MMU. * x1 = TTBR1_EL1 value + * x2 = ID map root table address * * Returns to the caller via x30/lr. This requires the caller to be covered * by the .idmap.text section. @@ -699,20 +655,15 @@ SYM_FUNC_END(__secondary_too_slow) * If it isn't, park the CPU */ SYM_FUNC_START(__enable_mmu) - mrs x2, ID_AA64MMFR0_EL1 - ubfx x2, x2, #ID_AA64MMFR0_TGRAN_SHIFT, 4 - cmp x2, #ID_AA64MMFR0_TGRAN_SUPPORTED_MIN + mrs x3, ID_AA64MMFR0_EL1 + ubfx x3, x3, #ID_AA64MMFR0_TGRAN_SHIFT, 4 + cmp x3, #ID_AA64MMFR0_TGRAN_SUPPORTED_MIN b.lt __no_granule_support - cmp x2, #ID_AA64MMFR0_TGRAN_SUPPORTED_MAX + cmp x3, #ID_AA64MMFR0_TGRAN_SUPPORTED_MAX b.gt __no_granule_support - update_early_cpu_boot_status 0, x2, x3 - adrp x2, idmap_pg_dir - phys_to_ttbr x1, x1 phys_to_ttbr x2, x2 msr ttbr0_el1, x2 // load TTBR0 - offset_ttbr1 x1, x3 - msr ttbr1_el1, x1 // load TTBR1 - isb + load_ttbr1 x1, x1, x3 set_sctlr_el1 x0 @@ -720,7 +671,7 @@ SYM_FUNC_START(__enable_mmu) SYM_FUNC_END(__enable_mmu) SYM_FUNC_START(__cpu_secondary_check52bitva) -#ifdef CONFIG_ARM64_VA_BITS_52 +#if VA_BITS > 48 ldr_l x0, vabits_actual cmp x0, #52 b.ne 2f @@ -755,13 +706,10 @@ SYM_FUNC_START_LOCAL(__relocate_kernel) * Iterate over each entry in the relocation table, and apply the * relocations in place. */ - ldr w9, =__rela_offset // offset to reloc table - ldr w10, =__rela_size // size of reloc table - + adr_l x9, __rela_start + adr_l x10, __rela_end mov_q x11, KIMAGE_VADDR // default virtual offset add x11, x11, x23 // actual virtual offset - add x9, x9, x11 // __va(.rela) - add x10, x9, x10 // __va(.rela) + sizeof(.rela) 0: cmp x9, x10 b.hs 1f @@ -804,21 +752,9 @@ SYM_FUNC_START_LOCAL(__relocate_kernel) * entry in x9, the address being relocated by the current address or * bitmap entry in x13 and the address being relocated by the current * bit in x14. - * - * Because addends are stored in place in the binary, RELR relocations - * cannot be applied idempotently. We use x24 to keep track of the - * currently applied displacement so that we can correctly relocate if - * __relocate_kernel is called twice with non-zero displacements (i.e. - * if there is both a physical misalignment and a KASLR displacement). */ - ldr w9, =__relr_offset // offset to reloc table - ldr w10, =__relr_size // size of reloc table - add x9, x9, x11 // __va(.relr) - add x10, x9, x10 // __va(.relr) + sizeof(.relr) - - sub x15, x23, x24 // delta from previous offset - cbz x15, 7f // nothing to do if unchanged - mov x24, x23 // save new offset + adr_l x9, __relr_start + adr_l x10, __relr_end 2: cmp x9, x10 b.hs 7f @@ -826,7 +762,7 @@ SYM_FUNC_START_LOCAL(__relocate_kernel) tbnz x11, #0, 3f // branch to handle bitmaps add x13, x11, x23 ldr x12, [x13] // relocate address entry - add x12, x12, x15 + add x12, x12, x23 str x12, [x13], #8 // adjust to start of bitmap b 2b @@ -835,7 +771,7 @@ SYM_FUNC_START_LOCAL(__relocate_kernel) cbz x11, 6f tbz x11, #0, 5f // skip bit if not set ldr x12, [x14] // relocate bit - add x12, x12, x15 + add x12, x12, x23 str x12, [x14] 5: add x14, x14, #8 // move to next bit's address @@ -856,43 +792,32 @@ SYM_FUNC_END(__relocate_kernel) #endif SYM_FUNC_START_LOCAL(__primary_switch) + adrp x1, reserved_pg_dir + adrp x2, init_idmap_pg_dir + bl __enable_mmu +#ifdef CONFIG_RELOCATABLE + adrp x23, KERNEL_START + and x23, x23, MIN_KIMG_ALIGN - 1 #ifdef CONFIG_RANDOMIZE_BASE - mov x19, x0 // preserve new SCTLR_EL1 value - mrs x20, sctlr_el1 // preserve old SCTLR_EL1 value + mov x0, x22 + adrp x1, init_pg_end + mov sp, x1 + mov x29, xzr + bl __pi_kaslr_early_init + and x24, x0, #SZ_2M - 1 // capture memstart offset seed + bic x0, x0, #SZ_2M - 1 + orr x23, x23, x0 // record kernel offset +#endif #endif + bl clear_page_tables + bl create_kernel_mapping adrp x1, init_pg_dir - bl __enable_mmu + load_ttbr1 x1, x1, x2 #ifdef CONFIG_RELOCATABLE -#ifdef CONFIG_RELR - mov x24, #0 // no RELR displacement yet -#endif bl __relocate_kernel -#ifdef CONFIG_RANDOMIZE_BASE - ldr x8, =__primary_switched - adrp x0, __PHYS_OFFSET - blr x8 - - /* - * If we return here, we have a KASLR displacement in x23 which we need - * to take into account by discarding the current kernel mapping and - * creating a new one. - */ - pre_disable_mmu_workaround - msr sctlr_el1, x20 // disable the MMU - isb - bl __create_page_tables // recreate kernel mapping - - tlbi vmalle1 // Remove any stale TLB entries - dsb nsh - isb - - set_sctlr_el1 x19 // re-enable the MMU - - bl __relocate_kernel -#endif #endif ldr x8, =__primary_switched - adrp x0, __PHYS_OFFSET + adrp x0, KERNEL_START // __pa(KERNEL_START) br x8 SYM_FUNC_END(__primary_switch) diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index 2e248342476e..af5df48ba915 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -300,11 +300,6 @@ static void swsusp_mte_restore_tags(void) unsigned long pfn = xa_state.xa_index; struct page *page = pfn_to_online_page(pfn); - /* - * It is not required to invoke page_kasan_tag_reset(page) - * at this point since the tags stored in page->flags are - * already restored. - */ mte_restore_page_tags(page_address(page), tags); mte_free_tag_storage(tags); diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S index 43d212618834..12c7fad02ae5 100644 --- a/arch/arm64/kernel/hyp-stub.S +++ b/arch/arm64/kernel/hyp-stub.S @@ -16,6 +16,30 @@ #include <asm/ptrace.h> #include <asm/virt.h> +// Warning, hardcoded register allocation +// This will clobber x1 and x2, and expect x1 to contain +// the id register value as read from the HW +.macro __check_override idreg, fld, width, pass, fail + ubfx x1, x1, #\fld, #\width + cbz x1, \fail + + adr_l x1, \idreg\()_override + ldr x2, [x1, FTR_OVR_VAL_OFFSET] + ldr x1, [x1, FTR_OVR_MASK_OFFSET] + ubfx x2, x2, #\fld, #\width + ubfx x1, x1, #\fld, #\width + cmp x1, xzr + and x2, x2, x1 + csinv x2, x2, xzr, ne + cbnz x2, \pass + b \fail +.endm + +.macro check_override idreg, fld, pass, fail + mrs x1, \idreg\()_el1 + __check_override \idreg \fld 4 \pass \fail +.endm + .text .pushsection .hyp.text, "ax" @@ -51,8 +75,8 @@ SYM_CODE_START_LOCAL(elx_sync) msr vbar_el2, x1 b 9f -1: cmp x0, #HVC_VHE_RESTART - b.eq mutate_to_vhe +1: cmp x0, #HVC_FINALISE_EL2 + b.eq __finalise_el2 2: cmp x0, #HVC_SOFT_RESTART b.ne 3f @@ -73,27 +97,67 @@ SYM_CODE_START_LOCAL(elx_sync) eret SYM_CODE_END(elx_sync) -// nVHE? No way! Give me the real thing! -SYM_CODE_START_LOCAL(mutate_to_vhe) +SYM_CODE_START_LOCAL(__finalise_el2) + check_override id_aa64pfr0 ID_AA64PFR0_SVE_SHIFT .Linit_sve .Lskip_sve + +.Linit_sve: /* SVE register access */ + mrs x0, cptr_el2 // Disable SVE traps + bic x0, x0, #CPTR_EL2_TZ + msr cptr_el2, x0 + isb + mov x1, #ZCR_ELx_LEN_MASK // SVE: Enable full vector + msr_s SYS_ZCR_EL2, x1 // length for EL1. + +.Lskip_sve: + check_override id_aa64pfr1 ID_AA64PFR1_SME_SHIFT .Linit_sme .Lskip_sme + +.Linit_sme: /* SME register access and priority mapping */ + mrs x0, cptr_el2 // Disable SME traps + bic x0, x0, #CPTR_EL2_TSM + msr cptr_el2, x0 + isb + + mrs x1, sctlr_el2 + orr x1, x1, #SCTLR_ELx_ENTP2 // Disable TPIDR2 traps + msr sctlr_el2, x1 + isb + + mov x0, #0 // SMCR controls + + // Full FP in SM? + mrs_s x1, SYS_ID_AA64SMFR0_EL1 + __check_override id_aa64smfr0 ID_AA64SMFR0_EL1_FA64_SHIFT 1 .Linit_sme_fa64 .Lskip_sme_fa64 + +.Linit_sme_fa64: + orr x0, x0, SMCR_ELx_FA64_MASK +.Lskip_sme_fa64: + + orr x0, x0, #SMCR_ELx_LEN_MASK // Enable full SME vector + msr_s SYS_SMCR_EL2, x0 // length for EL1. + + mrs_s x1, SYS_SMIDR_EL1 // Priority mapping supported? + ubfx x1, x1, #SMIDR_EL1_SMPS_SHIFT, #1 + cbz x1, .Lskip_sme + + msr_s SYS_SMPRIMAP_EL2, xzr // Make all priorities equal + + mrs x1, id_aa64mmfr1_el1 // HCRX_EL2 present? + ubfx x1, x1, #ID_AA64MMFR1_HCX_SHIFT, #4 + cbz x1, .Lskip_sme + + mrs_s x1, SYS_HCRX_EL2 + orr x1, x1, #HCRX_EL2_SMPME_MASK // Enable priority mapping + msr_s SYS_HCRX_EL2, x1 + +.Lskip_sme: + + // nVHE? No way! Give me the real thing! // Sanity check: MMU *must* be off mrs x1, sctlr_el2 tbnz x1, #0, 1f // Needs to be VHE capable, obviously - mrs x1, id_aa64mmfr1_el1 - ubfx x1, x1, #ID_AA64MMFR1_VHE_SHIFT, #4 - cbz x1, 1f - - // Check whether VHE is disabled from the command line - adr_l x1, id_aa64mmfr1_override - ldr x2, [x1, FTR_OVR_VAL_OFFSET] - ldr x1, [x1, FTR_OVR_MASK_OFFSET] - ubfx x2, x2, #ID_AA64MMFR1_VHE_SHIFT, #4 - ubfx x1, x1, #ID_AA64MMFR1_VHE_SHIFT, #4 - cmp x1, xzr - and x2, x2, x1 - csinv x2, x2, xzr, ne - cbnz x2, 2f + check_override id_aa64mmfr1 ID_AA64MMFR1_VHE_SHIFT 2f 1f 1: mov_q x0, HVC_STUB_ERR eret @@ -140,10 +204,10 @@ SYM_CODE_START_LOCAL(mutate_to_vhe) msr spsr_el1, x0 b enter_vhe -SYM_CODE_END(mutate_to_vhe) +SYM_CODE_END(__finalise_el2) // At the point where we reach enter_vhe(), we run with - // the MMU off (which is enforced by mutate_to_vhe()). + // the MMU off (which is enforced by __finalise_el2()). // We thus need to be in the idmap, or everything will // explode when enabling the MMU. @@ -222,12 +286,12 @@ SYM_FUNC_START(__hyp_reset_vectors) SYM_FUNC_END(__hyp_reset_vectors) /* - * Entry point to switch to VHE if deemed capable + * Entry point to finalise EL2 and switch to VHE if deemed capable + * + * w0: boot mode, as returned by init_kernel_el() */ -SYM_FUNC_START(switch_to_vhe) +SYM_FUNC_START(finalise_el2) // Need to have booted at EL2 - adr_l x1, __boot_cpu_mode - ldr w0, [x1] cmp w0, #BOOT_CPU_MODE_EL2 b.ne 1f @@ -236,9 +300,8 @@ SYM_FUNC_START(switch_to_vhe) cmp x0, #CurrentEL_EL1 b.ne 1f - // Turn the world upside down - mov x0, #HVC_VHE_RESTART + mov x0, #HVC_FINALISE_EL2 hvc #0 1: ret -SYM_FUNC_END(switch_to_vhe) +SYM_FUNC_END(finalise_el2) diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c index 8a2ceb591686..1b0542c69738 100644 --- a/arch/arm64/kernel/idreg-override.c +++ b/arch/arm64/kernel/idreg-override.c @@ -19,16 +19,21 @@ #define FTR_ALIAS_NAME_LEN 30 #define FTR_ALIAS_OPTION_LEN 116 +static u64 __boot_status __initdata; + struct ftr_set_desc { char name[FTR_DESC_NAME_LEN]; struct arm64_ftr_override *override; struct { char name[FTR_DESC_FIELD_LEN]; u8 shift; + u8 width; bool (*filter)(u64 val); } fields[]; }; +#define FIELD(n, s, f) { .name = n, .shift = s, .width = 4, .filter = f } + static bool __init mmfr1_vh_filter(u64 val) { /* @@ -37,24 +42,65 @@ static bool __init mmfr1_vh_filter(u64 val) * the user was trying to force nVHE on us, proceed with * attitude adjustment. */ - return !(is_kernel_in_hyp_mode() && val == 0); + return !(__boot_status == (BOOT_CPU_FLAG_E2H | BOOT_CPU_MODE_EL2) && + val == 0); } static const struct ftr_set_desc mmfr1 __initconst = { .name = "id_aa64mmfr1", .override = &id_aa64mmfr1_override, .fields = { - { "vh", ID_AA64MMFR1_VHE_SHIFT, mmfr1_vh_filter }, + FIELD("vh", ID_AA64MMFR1_VHE_SHIFT, mmfr1_vh_filter), + {} + }, +}; + +static bool __init pfr0_sve_filter(u64 val) +{ + /* + * Disabling SVE also means disabling all the features that + * are associated with it. The easiest way to do it is just to + * override id_aa64zfr0_el1 to be 0. + */ + if (!val) { + id_aa64zfr0_override.val = 0; + id_aa64zfr0_override.mask = GENMASK(63, 0); + } + + return true; +} + +static const struct ftr_set_desc pfr0 __initconst = { + .name = "id_aa64pfr0", + .override = &id_aa64pfr0_override, + .fields = { + FIELD("sve", ID_AA64PFR0_SVE_SHIFT, pfr0_sve_filter), {} }, }; +static bool __init pfr1_sme_filter(u64 val) +{ + /* + * Similarly to SVE, disabling SME also means disabling all + * the features that are associated with it. Just set + * id_aa64smfr0_el1 to 0 and don't look back. + */ + if (!val) { + id_aa64smfr0_override.val = 0; + id_aa64smfr0_override.mask = GENMASK(63, 0); + } + + return true; +} + static const struct ftr_set_desc pfr1 __initconst = { .name = "id_aa64pfr1", .override = &id_aa64pfr1_override, .fields = { - { "bt", ID_AA64PFR1_BT_SHIFT }, - { "mte", ID_AA64PFR1_MTE_SHIFT}, + FIELD("bt", ID_AA64PFR1_BT_SHIFT, NULL ), + FIELD("mte", ID_AA64PFR1_MTE_SHIFT, NULL), + FIELD("sme", ID_AA64PFR1_SME_SHIFT, pfr1_sme_filter), {} }, }; @@ -63,10 +109,10 @@ static const struct ftr_set_desc isar1 __initconst = { .name = "id_aa64isar1", .override = &id_aa64isar1_override, .fields = { - { "gpi", ID_AA64ISAR1_GPI_SHIFT }, - { "gpa", ID_AA64ISAR1_GPA_SHIFT }, - { "api", ID_AA64ISAR1_API_SHIFT }, - { "apa", ID_AA64ISAR1_APA_SHIFT }, + FIELD("gpi", ID_AA64ISAR1_EL1_GPI_SHIFT, NULL), + FIELD("gpa", ID_AA64ISAR1_EL1_GPA_SHIFT, NULL), + FIELD("api", ID_AA64ISAR1_EL1_API_SHIFT, NULL), + FIELD("apa", ID_AA64ISAR1_EL1_APA_SHIFT, NULL), {} }, }; @@ -75,8 +121,18 @@ static const struct ftr_set_desc isar2 __initconst = { .name = "id_aa64isar2", .override = &id_aa64isar2_override, .fields = { - { "gpa3", ID_AA64ISAR2_GPA3_SHIFT }, - { "apa3", ID_AA64ISAR2_APA3_SHIFT }, + FIELD("gpa3", ID_AA64ISAR2_EL1_GPA3_SHIFT, NULL), + FIELD("apa3", ID_AA64ISAR2_EL1_APA3_SHIFT, NULL), + {} + }, +}; + +static const struct ftr_set_desc smfr0 __initconst = { + .name = "id_aa64smfr0", + .override = &id_aa64smfr0_override, + .fields = { + /* FA64 is a one bit field... :-/ */ + { "fa64", ID_AA64SMFR0_EL1_FA64_SHIFT, 1, }, {} }, }; @@ -89,16 +145,18 @@ static const struct ftr_set_desc kaslr __initconst = { .override = &kaslr_feature_override, #endif .fields = { - { "disabled", 0 }, + FIELD("disabled", 0, NULL), {} }, }; static const struct ftr_set_desc * const regs[] __initconst = { &mmfr1, + &pfr0, &pfr1, &isar1, &isar2, + &smfr0, &kaslr, }; @@ -108,6 +166,8 @@ static const struct { } aliases[] __initconst = { { "kvm-arm.mode=nvhe", "id_aa64mmfr1.vh=0" }, { "kvm-arm.mode=protected", "id_aa64mmfr1.vh=0" }, + { "arm64.nosve", "id_aa64pfr0.sve=0 id_aa64pfr1.sme=0" }, + { "arm64.nosme", "id_aa64pfr1.sme=0" }, { "arm64.nobti", "id_aa64pfr1.bt=0" }, { "arm64.nopauth", "id_aa64isar1.gpi=0 id_aa64isar1.gpa=0 " @@ -144,7 +204,8 @@ static void __init match_options(const char *cmdline) for (f = 0; strlen(regs[i]->fields[f].name); f++) { u64 shift = regs[i]->fields[f].shift; - u64 mask = 0xfUL << shift; + u64 width = regs[i]->fields[f].width ?: 4; + u64 mask = GENMASK_ULL(shift + width - 1, shift); u64 v; if (find_field(cmdline, regs[i], f, &v)) @@ -152,7 +213,7 @@ static void __init match_options(const char *cmdline) /* * If an override gets filtered out, advertise - * it by setting the value to 0xf, but + * it by setting the value to the all-ones while * clearing the mask... Yes, this is fragile. */ if (regs[i]->fields[f].filter && @@ -234,9 +295,9 @@ static __init void parse_cmdline(void) } /* Keep checkers quiet */ -void init_feature_override(void); +void init_feature_override(u64 boot_status); -asmlinkage void __init init_feature_override(void) +asmlinkage void __init init_feature_override(u64 boot_status) { int i; @@ -247,6 +308,8 @@ asmlinkage void __init init_feature_override(void) } } + __boot_status = boot_status; + parse_cmdline(); for (i = 0; i < ARRAY_SIZE(regs); i++) { diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index 241c86b67d01..afa69e04e75e 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -10,11 +10,8 @@ #error This file should only be included in vmlinux.lds.S #endif -#ifdef CONFIG_EFI - -__efistub_kernel_size = _edata - _text; -__efistub_primary_entry_offset = primary_entry - _text; - +PROVIDE(__efistub_kernel_size = _edata - _text); +PROVIDE(__efistub_primary_entry_offset = primary_entry - _text); /* * The EFI stub has its own symbol namespace prefixed by __efistub_, to @@ -25,31 +22,37 @@ __efistub_primary_entry_offset = primary_entry - _text; * linked at. The routines below are all implemented in assembler in a * position independent manner */ -__efistub_memcmp = __pi_memcmp; -__efistub_memchr = __pi_memchr; -__efistub_memcpy = __pi_memcpy; -__efistub_memmove = __pi_memmove; -__efistub_memset = __pi_memset; -__efistub_strlen = __pi_strlen; -__efistub_strnlen = __pi_strnlen; -__efistub_strcmp = __pi_strcmp; -__efistub_strncmp = __pi_strncmp; -__efistub_strrchr = __pi_strrchr; -__efistub_dcache_clean_poc = __pi_dcache_clean_poc; - -#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) -__efistub___memcpy = __pi_memcpy; -__efistub___memmove = __pi_memmove; -__efistub___memset = __pi_memset; -#endif +PROVIDE(__efistub_memcmp = __pi_memcmp); +PROVIDE(__efistub_memchr = __pi_memchr); +PROVIDE(__efistub_memcpy = __pi_memcpy); +PROVIDE(__efistub_memmove = __pi_memmove); +PROVIDE(__efistub_memset = __pi_memset); +PROVIDE(__efistub_strlen = __pi_strlen); +PROVIDE(__efistub_strnlen = __pi_strnlen); +PROVIDE(__efistub_strcmp = __pi_strcmp); +PROVIDE(__efistub_strncmp = __pi_strncmp); +PROVIDE(__efistub_strrchr = __pi_strrchr); +PROVIDE(__efistub_dcache_clean_poc = __pi_dcache_clean_poc); + +PROVIDE(__efistub__text = _text); +PROVIDE(__efistub__end = _end); +PROVIDE(__efistub__edata = _edata); +PROVIDE(__efistub_screen_info = screen_info); +PROVIDE(__efistub__ctype = _ctype); -__efistub__text = _text; -__efistub__end = _end; -__efistub__edata = _edata; -__efistub_screen_info = screen_info; -__efistub__ctype = _ctype; +/* + * The __ prefixed memcpy/memset/memmove symbols are provided by KASAN, which + * instruments the conventional ones. Therefore, any references from the EFI + * stub or other position independent, low level C code should be redirected to + * the non-instrumented versions as well. + */ +PROVIDE(__efistub___memcpy = __pi_memcpy); +PROVIDE(__efistub___memmove = __pi_memmove); +PROVIDE(__efistub___memset = __pi_memset); -#endif +PROVIDE(__pi___memcpy = __pi_memcpy); +PROVIDE(__pi___memmove = __pi_memmove); +PROVIDE(__pi___memset = __pi_memset); #ifdef CONFIG_KVM diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c index 418b2bba1521..325455d16dbc 100644 --- a/arch/arm64/kernel/kaslr.c +++ b/arch/arm64/kernel/kaslr.c @@ -13,7 +13,6 @@ #include <linux/pgtable.h> #include <linux/random.h> -#include <asm/cacheflush.h> #include <asm/fixmap.h> #include <asm/kernel-pgtable.h> #include <asm/memory.h> @@ -21,128 +20,45 @@ #include <asm/sections.h> #include <asm/setup.h> -enum kaslr_status { - KASLR_ENABLED, - KASLR_DISABLED_CMDLINE, - KASLR_DISABLED_NO_SEED, - KASLR_DISABLED_FDT_REMAP, -}; - -static enum kaslr_status __initdata kaslr_status; u64 __ro_after_init module_alloc_base; u16 __initdata memstart_offset_seed; -static __init u64 get_kaslr_seed(void *fdt) -{ - int node, len; - fdt64_t *prop; - u64 ret; - - node = fdt_path_offset(fdt, "/chosen"); - if (node < 0) - return 0; - - prop = fdt_getprop_w(fdt, node, "kaslr-seed", &len); - if (!prop || len != sizeof(u64)) - return 0; - - ret = fdt64_to_cpu(*prop); - *prop = 0; - return ret; -} - struct arm64_ftr_override kaslr_feature_override __initdata; -/* - * This routine will be executed with the kernel mapped at its default virtual - * address, and if it returns successfully, the kernel will be remapped, and - * start_kernel() will be executed from a randomized virtual offset. The - * relocation will result in all absolute references (e.g., static variables - * containing function pointers) to be reinitialized, and zero-initialized - * .bss variables will be reset to 0. - */ -u64 __init kaslr_early_init(void) +static int __init kaslr_init(void) { - void *fdt; - u64 seed, offset, mask, module_range; - unsigned long raw; + u64 module_range; + u32 seed; /* * Set a reasonable default for module_alloc_base in case * we end up running with module randomization disabled. */ module_alloc_base = (u64)_etext - MODULES_VSIZE; - dcache_clean_inval_poc((unsigned long)&module_alloc_base, - (unsigned long)&module_alloc_base + - sizeof(module_alloc_base)); - - /* - * Try to map the FDT early. If this fails, we simply bail, - * and proceed with KASLR disabled. We will make another - * attempt at mapping the FDT in setup_machine() - */ - fdt = get_early_fdt_ptr(); - if (!fdt) { - kaslr_status = KASLR_DISABLED_FDT_REMAP; - return 0; - } - /* - * Retrieve (and wipe) the seed from the FDT - */ - seed = get_kaslr_seed(fdt); - - /* - * Check if 'nokaslr' appears on the command line, and - * return 0 if that is the case. - */ if (kaslr_feature_override.val & kaslr_feature_override.mask & 0xf) { - kaslr_status = KASLR_DISABLED_CMDLINE; + pr_info("KASLR disabled on command line\n"); return 0; } - /* - * Mix in any entropy obtainable architecturally if enabled - * and supported. - */ - - if (arch_get_random_seed_long_early(&raw)) - seed ^= raw; - - if (!seed) { - kaslr_status = KASLR_DISABLED_NO_SEED; + if (!kaslr_offset()) { + pr_warn("KASLR disabled due to lack of seed\n"); return 0; } + pr_info("KASLR enabled\n"); + /* - * OK, so we are proceeding with KASLR enabled. Calculate a suitable - * kernel image offset from the seed. Let's place the kernel in the - * middle half of the VMALLOC area (VA_BITS_MIN - 2), and stay clear of - * the lower and upper quarters to avoid colliding with other - * allocations. - * Even if we could randomize at page granularity for 16k and 64k pages, - * let's always round to 2 MB so we don't interfere with the ability to - * map using contiguous PTEs + * KASAN without KASAN_VMALLOC does not expect the module region to + * intersect the vmalloc region, since shadow memory is allocated for + * each module at load time, whereas the vmalloc region will already be + * shadowed by KASAN zero pages. */ - mask = ((1UL << (VA_BITS_MIN - 2)) - 1) & ~(SZ_2M - 1); - offset = BIT(VA_BITS_MIN - 3) + (seed & mask); + BUILD_BUG_ON((IS_ENABLED(CONFIG_KASAN_GENERIC) || + IS_ENABLED(CONFIG_KASAN_SW_TAGS)) && + !IS_ENABLED(CONFIG_KASAN_VMALLOC)); - /* use the top 16 bits to randomize the linear region */ - memstart_offset_seed = seed >> 48; - - if (!IS_ENABLED(CONFIG_KASAN_VMALLOC) && - (IS_ENABLED(CONFIG_KASAN_GENERIC) || - IS_ENABLED(CONFIG_KASAN_SW_TAGS))) - /* - * KASAN without KASAN_VMALLOC does not expect the module region - * to intersect the vmalloc region, since shadow memory is - * allocated for each module at load time, whereas the vmalloc - * region is shadowed by KASAN zero pages. So keep modules - * out of the vmalloc region if KASAN is enabled without - * KASAN_VMALLOC, and put the kernel well within 4 GB of the - * module region. - */ - return offset % SZ_2G; + seed = get_random_u32(); if (IS_ENABLED(CONFIG_RANDOMIZE_MODULE_REGION_FULL)) { /* @@ -154,8 +70,7 @@ u64 __init kaslr_early_init(void) * resolved normally.) */ module_range = SZ_2G - (u64)(_end - _stext); - module_alloc_base = max((u64)_end + offset - SZ_2G, - (u64)MODULES_VADDR); + module_alloc_base = max((u64)_end - SZ_2G, (u64)MODULES_VADDR); } else { /* * Randomize the module region by setting module_alloc_base to @@ -167,40 +82,12 @@ u64 __init kaslr_early_init(void) * when ARM64_MODULE_PLTS is enabled. */ module_range = MODULES_VSIZE - (u64)(_etext - _stext); - module_alloc_base = (u64)_etext + offset - MODULES_VSIZE; } /* use the lower 21 bits to randomize the base of the module region */ module_alloc_base += (module_range * (seed & ((1 << 21) - 1))) >> 21; module_alloc_base &= PAGE_MASK; - dcache_clean_inval_poc((unsigned long)&module_alloc_base, - (unsigned long)&module_alloc_base + - sizeof(module_alloc_base)); - dcache_clean_inval_poc((unsigned long)&memstart_offset_seed, - (unsigned long)&memstart_offset_seed + - sizeof(memstart_offset_seed)); - - return offset; -} - -static int __init kaslr_init(void) -{ - switch (kaslr_status) { - case KASLR_ENABLED: - pr_info("KASLR enabled\n"); - break; - case KASLR_DISABLED_CMDLINE: - pr_info("KASLR disabled on command line\n"); - break; - case KASLR_DISABLED_NO_SEED: - pr_warn("KASLR disabled due to lack of seed\n"); - break; - case KASLR_DISABLED_FDT_REMAP: - pr_warn("KASLR disabled due to FDT remapping failure\n"); - break; - } - return 0; } -core_initcall(kaslr_init) +subsys_initcall(kaslr_init) diff --git a/arch/arm64/kernel/kuser32.S b/arch/arm64/kernel/kuser32.S index 42bd8c0c60e0..692e9d2e31e5 100644 --- a/arch/arm64/kernel/kuser32.S +++ b/arch/arm64/kernel/kuser32.S @@ -15,6 +15,7 @@ #include <asm/unistd.h> + .section .rodata .align 5 .globl __kuser_helper_start __kuser_helper_start: diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c index f6b00743c399..b2b730233274 100644 --- a/arch/arm64/kernel/mte.c +++ b/arch/arm64/kernel/mte.c @@ -48,15 +48,6 @@ static void mte_sync_page_tags(struct page *page, pte_t old_pte, if (!pte_is_tagged) return; - page_kasan_tag_reset(page); - /* - * We need smp_wmb() in between setting the flags and clearing the - * tags because if another thread reads page->flags and builds a - * tagged address out of it, there is an actual dependency to the - * memory access, but on the current thread we do not guarantee that - * the new page->flags are visible before the tags were updated. - */ - smp_wmb(); mte_clear_page_tags(page_address(page)); } diff --git a/arch/arm64/kernel/pi/Makefile b/arch/arm64/kernel/pi/Makefile new file mode 100644 index 000000000000..839291430cb3 --- /dev/null +++ b/arch/arm64/kernel/pi/Makefile @@ -0,0 +1,33 @@ +# SPDX-License-Identifier: GPL-2.0 +# Copyright 2022 Google LLC + +KBUILD_CFLAGS := $(subst $(CC_FLAGS_FTRACE),,$(KBUILD_CFLAGS)) -fpie \ + -Os -DDISABLE_BRANCH_PROFILING $(DISABLE_STACKLEAK_PLUGIN) \ + $(call cc-option,-mbranch-protection=none) \ + -I$(srctree)/scripts/dtc/libfdt -fno-stack-protector \ + -include $(srctree)/include/linux/hidden.h \ + -D__DISABLE_EXPORTS -ffreestanding -D__NO_FORTIFY \ + $(call cc-option,-fno-addrsig) + +# remove SCS flags from all objects in this directory +KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_SCS), $(KBUILD_CFLAGS)) +# disable LTO +KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_LTO), $(KBUILD_CFLAGS)) + +GCOV_PROFILE := n +KASAN_SANITIZE := n +KCSAN_SANITIZE := n +UBSAN_SANITIZE := n +KCOV_INSTRUMENT := n + +$(obj)/%.pi.o: OBJCOPYFLAGS := --prefix-symbols=__pi_ \ + --remove-section=.note.gnu.property \ + --prefix-alloc-sections=.init +$(obj)/%.pi.o: $(obj)/%.o FORCE + $(call if_changed,objcopy) + +$(obj)/lib-%.o: $(srctree)/lib/%.c FORCE + $(call if_changed_rule,cc_o_c) + +obj-y := kaslr_early.pi.o lib-fdt.pi.o lib-fdt_ro.pi.o +extra-y := $(patsubst %.pi.o,%.o,$(obj-y)) diff --git a/arch/arm64/kernel/pi/kaslr_early.c b/arch/arm64/kernel/pi/kaslr_early.c new file mode 100644 index 000000000000..6c3855e69395 --- /dev/null +++ b/arch/arm64/kernel/pi/kaslr_early.c @@ -0,0 +1,112 @@ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright 2022 Google LLC +// Author: Ard Biesheuvel <ardb@google.com> + +// NOTE: code in this file runs *very* early, and is not permitted to use +// global variables or anything that relies on absolute addressing. + +#include <linux/libfdt.h> +#include <linux/init.h> +#include <linux/linkage.h> +#include <linux/types.h> +#include <linux/sizes.h> +#include <linux/string.h> + +#include <asm/archrandom.h> +#include <asm/memory.h> + +/* taken from lib/string.c */ +static char *__strstr(const char *s1, const char *s2) +{ + size_t l1, l2; + + l2 = strlen(s2); + if (!l2) + return (char *)s1; + l1 = strlen(s1); + while (l1 >= l2) { + l1--; + if (!memcmp(s1, s2, l2)) + return (char *)s1; + s1++; + } + return NULL; +} +static bool cmdline_contains_nokaslr(const u8 *cmdline) +{ + const u8 *str; + + str = __strstr(cmdline, "nokaslr"); + return str == cmdline || (str > cmdline && *(str - 1) == ' '); +} + +static bool is_kaslr_disabled_cmdline(void *fdt) +{ + if (!IS_ENABLED(CONFIG_CMDLINE_FORCE)) { + int node; + const u8 *prop; + + node = fdt_path_offset(fdt, "/chosen"); + if (node < 0) + goto out; + + prop = fdt_getprop(fdt, node, "bootargs", NULL); + if (!prop) + goto out; + + if (cmdline_contains_nokaslr(prop)) + return true; + + if (IS_ENABLED(CONFIG_CMDLINE_EXTEND)) + goto out; + + return false; + } +out: + return cmdline_contains_nokaslr(CONFIG_CMDLINE); +} + +static u64 get_kaslr_seed(void *fdt) +{ + int node, len; + fdt64_t *prop; + u64 ret; + + node = fdt_path_offset(fdt, "/chosen"); + if (node < 0) + return 0; + + prop = fdt_getprop_w(fdt, node, "kaslr-seed", &len); + if (!prop || len != sizeof(u64)) + return 0; + + ret = fdt64_to_cpu(*prop); + *prop = 0; + return ret; +} + +asmlinkage u64 kaslr_early_init(void *fdt) +{ + u64 seed; + + if (is_kaslr_disabled_cmdline(fdt)) + return 0; + + seed = get_kaslr_seed(fdt); + if (!seed) { +#ifdef CONFIG_ARCH_RANDOM + if (!__early_cpu_has_rndr() || + !__arm64_rndr((unsigned long *)&seed)) +#endif + return 0; + } + + /* + * OK, so we are proceeding with KASLR enabled. Calculate a suitable + * kernel image offset from the seed. Let's place the kernel in the + * middle half of the VMALLOC area (VA_BITS_MIN - 2), and stay clear of + * the lower and upper quarters to avoid colliding with other + * allocations. + */ + return BIT(VA_BITS_MIN - 3) + (seed & GENMASK(VA_BITS_MIN - 3, 0)); +} diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index b0980fbb6bc7..3e6d0352d7d3 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -280,6 +280,9 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user) vl = task_get_sme_vl(current); } else { + if (!system_supports_sve()) + return -EINVAL; + vl = task_get_sve_vl(current); } @@ -342,9 +345,14 @@ fpsimd_only: #else /* ! CONFIG_ARM64_SVE */ -/* Turn any non-optimised out attempts to use these into a link error: */ +static int restore_sve_fpsimd_context(struct user_ctxs *user) +{ + WARN_ON_ONCE(1); + return -EINVAL; +} + +/* Turn any non-optimised out attempts to use this into a link error: */ extern int preserve_sve_context(void __user *ctx); -extern int restore_sve_fpsimd_context(struct user_ctxs *user); #endif /* ! CONFIG_ARM64_SVE */ @@ -649,14 +657,10 @@ static int restore_sigframe(struct pt_regs *regs, if (!user.fpsimd) return -EINVAL; - if (user.sve) { - if (!system_supports_sve()) - return -EINVAL; - + if (user.sve) err = restore_sve_fpsimd_context(&user); - } else { + else err = restore_fpsimd_context(user.fpsimd); - } } if (err == 0 && system_supports_sme() && user.za) diff --git a/arch/arm64/kernel/sigreturn32.S b/arch/arm64/kernel/sigreturn32.S index 475d30d471ac..ccbd4aab4ba4 100644 --- a/arch/arm64/kernel/sigreturn32.S +++ b/arch/arm64/kernel/sigreturn32.S @@ -15,6 +15,7 @@ #include <asm/unistd.h> + .section .rodata .globl __aarch32_sigret_code_start __aarch32_sigret_code_start: diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S index 4ea9392f86e0..617f78ad43a1 100644 --- a/arch/arm64/kernel/sleep.S +++ b/arch/arm64/kernel/sleep.S @@ -100,10 +100,11 @@ SYM_FUNC_END(__cpu_suspend_enter) .pushsection ".idmap.text", "awx" SYM_CODE_START(cpu_resume) bl init_kernel_el - bl switch_to_vhe + bl finalise_el2 bl __cpu_setup /* enable the MMU early - so we can access sleep_save_stash by va */ adrp x1, swapper_pg_dir + adrp x2, idmap_pg_dir bl __enable_mmu ldr x8, =_cpu_resume br x8 diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index 0467cb79f080..fcaa151b81f1 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -38,6 +38,8 @@ * @kr_cur: When KRETPROBES is selected, holds the kretprobe instance * associated with the most recently encountered replacement lr * value. + * + * @task: The task being unwound. */ struct unwind_state { unsigned long fp; @@ -48,13 +50,13 @@ struct unwind_state { #ifdef CONFIG_KRETPROBES struct llist_node *kr_cur; #endif + struct task_struct *task; }; -static notrace void unwind_init(struct unwind_state *state, unsigned long fp, - unsigned long pc) +static void unwind_init_common(struct unwind_state *state, + struct task_struct *task) { - state->fp = fp; - state->pc = pc; + state->task = task; #ifdef CONFIG_KRETPROBES state->kr_cur = NULL; #endif @@ -72,7 +74,57 @@ static notrace void unwind_init(struct unwind_state *state, unsigned long fp, state->prev_fp = 0; state->prev_type = STACK_TYPE_UNKNOWN; } -NOKPROBE_SYMBOL(unwind_init); + +/* + * Start an unwind from a pt_regs. + * + * The unwind will begin at the PC within the regs. + * + * The regs must be on a stack currently owned by the calling task. + */ +static inline void unwind_init_from_regs(struct unwind_state *state, + struct pt_regs *regs) +{ + unwind_init_common(state, current); + + state->fp = regs->regs[29]; + state->pc = regs->pc; +} + +/* + * Start an unwind from a caller. + * + * The unwind will begin at the caller of whichever function this is inlined + * into. + * + * The function which invokes this must be noinline. + */ +static __always_inline void unwind_init_from_caller(struct unwind_state *state) +{ + unwind_init_common(state, current); + + state->fp = (unsigned long)__builtin_frame_address(1); + state->pc = (unsigned long)__builtin_return_address(0); +} + +/* + * Start an unwind from a blocked task. + * + * The unwind will begin at the blocked tasks saved PC (i.e. the caller of + * cpu_switch_to()). + * + * The caller should ensure the task is blocked in cpu_switch_to() for the + * duration of the unwind, or the unwind will be bogus. It is never valid to + * call this for the current task. + */ +static inline void unwind_init_from_task(struct unwind_state *state, + struct task_struct *task) +{ + unwind_init_common(state, task); + + state->fp = thread_saved_fp(task); + state->pc = thread_saved_pc(task); +} /* * Unwind from one frame record (A) to the next frame record (B). @@ -81,9 +133,9 @@ NOKPROBE_SYMBOL(unwind_init); * records (e.g. a cycle), determined based on the location and fp value of A * and the location (but not the fp value) of B. */ -static int notrace unwind_next(struct task_struct *tsk, - struct unwind_state *state) +static int notrace unwind_next(struct unwind_state *state) { + struct task_struct *tsk = state->task; unsigned long fp = state->fp; struct stack_info info; @@ -117,15 +169,15 @@ static int notrace unwind_next(struct task_struct *tsk, if (fp <= state->prev_fp) return -EINVAL; } else { - set_bit(state->prev_type, state->stacks_done); + __set_bit(state->prev_type, state->stacks_done); } /* * Record this frame record's values and location. The prev_fp and * prev_type are only meaningful to the next unwind_next() invocation. */ - state->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp)); - state->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp + 8)); + state->fp = READ_ONCE(*(unsigned long *)(fp)); + state->pc = READ_ONCE(*(unsigned long *)(fp + 8)); state->prev_fp = fp; state->prev_type = info.type; @@ -157,8 +209,7 @@ static int notrace unwind_next(struct task_struct *tsk, } NOKPROBE_SYMBOL(unwind_next); -static void notrace unwind(struct task_struct *tsk, - struct unwind_state *state, +static void notrace unwind(struct unwind_state *state, stack_trace_consume_fn consume_entry, void *cookie) { while (1) { @@ -166,7 +217,7 @@ static void notrace unwind(struct task_struct *tsk, if (!consume_entry(cookie, state->pc)) break; - ret = unwind_next(tsk, state); + ret = unwind_next(state); if (ret < 0) break; } @@ -212,15 +263,15 @@ noinline notrace void arch_stack_walk(stack_trace_consume_fn consume_entry, { struct unwind_state state; - if (regs) - unwind_init(&state, regs->regs[29], regs->pc); - else if (task == current) - unwind_init(&state, - (unsigned long)__builtin_frame_address(1), - (unsigned long)__builtin_return_address(0)); - else - unwind_init(&state, thread_saved_fp(task), - thread_saved_pc(task)); - - unwind(task, &state, consume_entry, cookie); + if (regs) { + if (task != current) + return; + unwind_init_from_regs(&state, regs); + } else if (task == current) { + unwind_init_from_caller(&state); + } else { + unwind_init_from_task(&state, task); + } + + unwind(&state, consume_entry, cookie); } diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c index 2b0887e58a7c..9135fe0f3df5 100644 --- a/arch/arm64/kernel/suspend.c +++ b/arch/arm64/kernel/suspend.c @@ -52,7 +52,7 @@ void notrace __cpu_suspend_exit(void) /* Restore CnP bit in TTBR1_EL1 */ if (system_supports_cnp()) - cpu_replace_ttbr1(lm_alias(swapper_pg_dir)); + cpu_replace_ttbr1(lm_alias(swapper_pg_dir), idmap_pg_dir); /* * PSTATE was not saved over suspend/resume, re-enable any detected diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 9ac7a81b79be..b7fed33981f7 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -579,11 +579,11 @@ static void ctr_read_handler(unsigned long esr, struct pt_regs *regs) if (cpus_have_const_cap(ARM64_WORKAROUND_1542419)) { /* Hide DIC so that we can trap the unnecessary maintenance...*/ - val &= ~BIT(CTR_DIC_SHIFT); + val &= ~BIT(CTR_EL0_DIC_SHIFT); /* ... and fake IminLine to reduce the number of traps. */ - val &= ~CTR_IMINLINE_MASK; - val |= (PAGE_SHIFT - 2) & CTR_IMINLINE_MASK; + val &= ~CTR_EL0_IminLine_MASK; + val |= (PAGE_SHIFT - 2) & CTR_EL0_IminLine_MASK; } pt_regs_write_reg(regs, rt, val); diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index f6e25d7c346a..bafbf78fab77 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -24,7 +24,13 @@ btildflags-$(CONFIG_ARM64_BTI_KERNEL) += -z force-bti # routines, as x86 does (see 6f121e548f83 ("x86, vdso: Reimplement vdso.so # preparation in build-time C")). ldflags-y := -shared -soname=linux-vdso.so.1 --hash-style=sysv \ - -Bsymbolic --build-id=sha1 -n $(btildflags-y) -T + -Bsymbolic --build-id=sha1 -n $(btildflags-y) + +ifdef CONFIG_LD_ORPHAN_WARN + ldflags-y += --orphan-handling=warn +endif + +ldflags-y += -T ccflags-y := -fno-common -fno-builtin -fno-stack-protector -ffixed-x18 ccflags-y += -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO diff --git a/arch/arm64/kernel/vdso/vdso.lds.S b/arch/arm64/kernel/vdso/vdso.lds.S index a5e61e09ea92..e69fb4aaaf3e 100644 --- a/arch/arm64/kernel/vdso/vdso.lds.S +++ b/arch/arm64/kernel/vdso/vdso.lds.S @@ -11,6 +11,7 @@ #include <linux/const.h> #include <asm/page.h> #include <asm/vdso.h> +#include <asm-generic/vmlinux.lds.h> OUTPUT_FORMAT("elf64-littleaarch64", "elf64-bigaarch64", "elf64-littleaarch64") OUTPUT_ARCH(aarch64) @@ -49,11 +50,24 @@ SECTIONS .dynamic : { *(.dynamic) } :text :dynamic - .rodata : { *(.rodata*) } :text + .rela.dyn : ALIGN(8) { *(.rela .rela*) } + + .rodata : { + *(.rodata*) + *(.got) + *(.got.plt) + *(.plt) + *(.plt.*) + *(.iplt) + *(.igot .igot.plt) + } :text _end = .; PROVIDE(end = .); + DWARF_DEBUG + ELF_DETAILS + /DISCARD/ : { *(.data .data.* .gnu.linkonce.d.* .sdata*) *(.bss .sbss .dynbss .dynsbss) diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile index 05ba1aae1b6f..36c8f66cad25 100644 --- a/arch/arm64/kernel/vdso32/Makefile +++ b/arch/arm64/kernel/vdso32/Makefile @@ -104,6 +104,7 @@ VDSO_AFLAGS += -D__ASSEMBLY__ VDSO_LDFLAGS += -Bsymbolic --no-undefined -soname=linux-vdso.so.1 VDSO_LDFLAGS += -z max-page-size=4096 -z common-page-size=4096 VDSO_LDFLAGS += -shared --hash-style=sysv --build-id=sha1 +VDSO_LDFLAGS += --orphan-handling=warn # Borrow vdsomunge.c from the arm vDSO diff --git a/arch/arm64/kernel/vdso32/vdso.lds.S b/arch/arm64/kernel/vdso32/vdso.lds.S index 3348ce5ea306..8d95d7d35057 100644 --- a/arch/arm64/kernel/vdso32/vdso.lds.S +++ b/arch/arm64/kernel/vdso32/vdso.lds.S @@ -11,6 +11,7 @@ #include <linux/const.h> #include <asm/page.h> #include <asm/vdso.h> +#include <asm-generic/vmlinux.lds.h> OUTPUT_FORMAT("elf32-littlearm", "elf32-bigarm", "elf32-littlearm") OUTPUT_ARCH(arm) @@ -35,12 +36,30 @@ SECTIONS .dynamic : { *(.dynamic) } :text :dynamic - .rodata : { *(.rodata*) } :text + .rodata : { + *(.rodata*) + *(.got) + *(.got.plt) + *(.plt) + *(.rel.iplt) + *(.iplt) + *(.igot.plt) + } :text - .text : { *(.text*) } :text =0xe7f001f2 + .text : { + *(.text*) + *(.glue_7) + *(.glue_7t) + *(.vfp11_veneer) + *(.v4_bx) + } :text =0xe7f001f2 - .got : { *(.got) } - .rel.plt : { *(.rel.plt) } + .rel.dyn : { *(.rel*) } + + .ARM.exidx : { *(.ARM.exidx*) } + DWARF_DEBUG + ELF_DETAILS + .ARM.attributes 0 : { *(.ARM.attributes) } /DISCARD/ : { *(.note.GNU-stack) diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 2d4a8f995175..45131e354e27 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -115,7 +115,8 @@ jiffies = jiffies_64; __entry_tramp_text_start = .; \ *(.entry.tramp.text) \ . = ALIGN(PAGE_SIZE); \ - __entry_tramp_text_end = .; + __entry_tramp_text_end = .; \ + *(.entry.tramp.rodata) #else #define TRAMP_TEXT #endif @@ -198,8 +199,7 @@ SECTIONS } idmap_pg_dir = .; - . += IDMAP_DIR_SIZE; - idmap_pg_end = .; + . += PAGE_SIZE; #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 tramp_pg_dir = .; @@ -235,6 +235,10 @@ SECTIONS __inittext_end = .; __initdata_begin = .; + init_idmap_pg_dir = .; + . += INIT_IDMAP_DIR_SIZE; + init_idmap_pg_end = .; + .init.data : { INIT_DATA INIT_SETUP(16) @@ -253,21 +257,17 @@ SECTIONS HYPERVISOR_RELOC_SECTION .rela.dyn : ALIGN(8) { + __rela_start = .; *(.rela .rela*) + __rela_end = .; } - __rela_offset = ABSOLUTE(ADDR(.rela.dyn) - KIMAGE_VADDR); - __rela_size = SIZEOF(.rela.dyn); - -#ifdef CONFIG_RELR .relr.dyn : ALIGN(8) { + __relr_start = .; *(.relr.dyn) + __relr_end = .; } - __relr_offset = ABSOLUTE(ADDR(.relr.dyn) - KIMAGE_VADDR); - __relr_size = SIZEOF(.relr.dyn); -#endif - . = ALIGN(SEGMENT_ALIGN); __initdata_end = .; __init_end = .; diff --git a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h index fd55014b3497..fa6e466ed57f 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h +++ b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h @@ -176,25 +176,25 @@ ) #define PVM_ID_AA64ISAR1_ALLOW (\ - ARM64_FEATURE_MASK(ID_AA64ISAR1_DPB) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR1_APA) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR1_API) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR1_JSCVT) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR1_FCMA) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR1_LRCPC) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR1_GPA) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR1_GPI) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR1_FRINTTS) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR1_SB) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR1_SPECRES) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR1_BF16) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR1_DGH) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR1_I8MM) \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_DPB) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_APA) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_API) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_JSCVT) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_FCMA) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_LRCPC) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPA) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPI) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_FRINTTS) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_SB) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_SPECRES) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_BF16) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_DGH) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_I8MM) \ ) #define PVM_ID_AA64ISAR2_ALLOW (\ - ARM64_FEATURE_MASK(ID_AA64ISAR2_GPA3) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR2_APA3) \ + ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_GPA3) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_APA3) \ ) u64 pvm_read_id_reg(const struct kvm_vcpu *vcpu, u32 id); diff --git a/arch/arm64/kvm/hyp/nvhe/sys_regs.c b/arch/arm64/kvm/hyp/nvhe/sys_regs.c index 35a4331ba5f3..6b94c3e6ff26 100644 --- a/arch/arm64/kvm/hyp/nvhe/sys_regs.c +++ b/arch/arm64/kvm/hyp/nvhe/sys_regs.c @@ -173,10 +173,10 @@ static u64 get_pvm_id_aa64isar1(const struct kvm_vcpu *vcpu) u64 allow_mask = PVM_ID_AA64ISAR1_ALLOW; if (!vcpu_has_ptrauth(vcpu)) - allow_mask &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR1_APA) | - ARM64_FEATURE_MASK(ID_AA64ISAR1_API) | - ARM64_FEATURE_MASK(ID_AA64ISAR1_GPA) | - ARM64_FEATURE_MASK(ID_AA64ISAR1_GPI)); + allow_mask &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_APA) | + ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_API) | + ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPA) | + ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPI)); return id_aa64isar1_el1_sys_val & allow_mask; } @@ -186,8 +186,8 @@ static u64 get_pvm_id_aa64isar2(const struct kvm_vcpu *vcpu) u64 allow_mask = PVM_ID_AA64ISAR2_ALLOW; if (!vcpu_has_ptrauth(vcpu)) - allow_mask &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR2_APA3) | - ARM64_FEATURE_MASK(ID_AA64ISAR2_GPA3)); + allow_mask &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_APA3) | + ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_GPA3)); return id_aa64isar2_el1_sys_val & allow_mask; } diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index c06c0477fab5..c4fb3874b5e2 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1136,17 +1136,17 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu, break; case SYS_ID_AA64ISAR1_EL1: if (!vcpu_has_ptrauth(vcpu)) - val &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR1_APA) | - ARM64_FEATURE_MASK(ID_AA64ISAR1_API) | - ARM64_FEATURE_MASK(ID_AA64ISAR1_GPA) | - ARM64_FEATURE_MASK(ID_AA64ISAR1_GPI)); + val &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_APA) | + ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_API) | + ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPA) | + ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPI)); break; case SYS_ID_AA64ISAR2_EL1: if (!vcpu_has_ptrauth(vcpu)) - val &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR2_APA3) | - ARM64_FEATURE_MASK(ID_AA64ISAR2_GPA3)); + val &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_APA3) | + ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_GPA3)); if (!cpus_have_final_cap(ARM64_HAS_WFXT)) - val &= ~ARM64_FEATURE_MASK(ID_AA64ISAR2_WFXT); + val &= ~ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_WFxT); break; case SYS_ID_AA64DFR0_EL1: /* Limit debug to ARMv8.0 */ diff --git a/arch/arm64/lib/mte.S b/arch/arm64/lib/mte.S index eeb9e45bcce8..1b7c93ae7e63 100644 --- a/arch/arm64/lib/mte.S +++ b/arch/arm64/lib/mte.S @@ -18,7 +18,7 @@ */ .macro multitag_transfer_size, reg, tmp mrs_s \reg, SYS_GMID_EL1 - ubfx \reg, \reg, #SYS_GMID_EL1_BS_SHIFT, #SYS_GMID_EL1_BS_SIZE + ubfx \reg, \reg, #GMID_EL1_BS_SHIFT, #GMID_EL1_BS_SIZE mov \tmp, #4 lsl \reg, \tmp, \reg .endm diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index 21c907987080..081058d4e436 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -194,44 +194,3 @@ SYM_FUNC_START(__pi_dcache_clean_pop) ret SYM_FUNC_END(__pi_dcache_clean_pop) SYM_FUNC_ALIAS(dcache_clean_pop, __pi_dcache_clean_pop) - -/* - * __dma_flush_area(start, size) - * - * clean & invalidate D / U line - * - * - start - virtual start address of region - * - size - size in question - */ -SYM_FUNC_START(__pi___dma_flush_area) - add x1, x0, x1 - dcache_by_line_op civac, sy, x0, x1, x2, x3 - ret -SYM_FUNC_END(__pi___dma_flush_area) -SYM_FUNC_ALIAS(__dma_flush_area, __pi___dma_flush_area) - -/* - * __dma_map_area(start, size, dir) - * - start - kernel virtual start address - * - size - size of region - * - dir - DMA direction - */ -SYM_FUNC_START(__pi___dma_map_area) - add x1, x0, x1 - b __pi_dcache_clean_poc -SYM_FUNC_END(__pi___dma_map_area) -SYM_FUNC_ALIAS(__dma_map_area, __pi___dma_map_area) - -/* - * __dma_unmap_area(start, size, dir) - * - start - kernel virtual start address - * - size - size of region - * - dir - DMA direction - */ -SYM_FUNC_START(__pi___dma_unmap_area) - add x1, x0, x1 - cmp w2, #DMA_TO_DEVICE - b.ne __pi_dcache_inval_poc - ret -SYM_FUNC_END(__pi___dma_unmap_area) -SYM_FUNC_ALIAS(__dma_unmap_area, __pi___dma_unmap_area) diff --git a/arch/arm64/mm/copypage.c b/arch/arm64/mm/copypage.c index 0dea80bf6de4..24913271e898 100644 --- a/arch/arm64/mm/copypage.c +++ b/arch/arm64/mm/copypage.c @@ -23,15 +23,6 @@ void copy_highpage(struct page *to, struct page *from) if (system_supports_mte() && test_bit(PG_mte_tagged, &from->flags)) { set_bit(PG_mte_tagged, &to->flags); - page_kasan_tag_reset(to); - /* - * We need smp_wmb() in between setting the flags and clearing the - * tags because if another thread reads page->flags and builds a - * tagged address out of it, there is an actual dependency to the - * memory access, but on the current thread we do not guarantee that - * the new page->flags are visible before the tags were updated. - */ - smp_wmb(); mte_copy_page_tags(kto, kfrom); } } diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 6099c81b9322..599cf81f5685 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -14,20 +14,29 @@ #include <asm/xen/xen-ops.h> void arch_sync_dma_for_device(phys_addr_t paddr, size_t size, - enum dma_data_direction dir) + enum dma_data_direction dir) { - __dma_map_area(phys_to_virt(paddr), size, dir); + unsigned long start = (unsigned long)phys_to_virt(paddr); + + dcache_clean_poc(start, start + size); } void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size, - enum dma_data_direction dir) + enum dma_data_direction dir) { - __dma_unmap_area(phys_to_virt(paddr), size, dir); + unsigned long start = (unsigned long)phys_to_virt(paddr); + + if (dir == DMA_TO_DEVICE) + return; + + dcache_inval_poc(start, start + size); } void arch_dma_prep_coherent(struct page *page, size_t size) { - __dma_flush_area(page_address(page), size); + unsigned long start = (unsigned long)page_address(page); + + dcache_clean_inval_poc(start, start + size); } #ifdef CONFIG_IOMMU_DMA diff --git a/arch/arm64/mm/extable.c b/arch/arm64/mm/extable.c index 489455309695..228d681a8715 100644 --- a/arch/arm64/mm/extable.c +++ b/arch/arm64/mm/extable.c @@ -16,13 +16,6 @@ get_ex_fixup(const struct exception_table_entry *ex) return ((unsigned long)&ex->fixup + ex->fixup); } -static bool ex_handler_fixup(const struct exception_table_entry *ex, - struct pt_regs *regs) -{ - regs->pc = get_ex_fixup(ex); - return true; -} - static bool ex_handler_uaccess_err_zero(const struct exception_table_entry *ex, struct pt_regs *regs) { @@ -72,11 +65,10 @@ bool fixup_exception(struct pt_regs *regs) return false; switch (ex->type) { - case EX_TYPE_FIXUP: - return ex_handler_fixup(ex, regs); case EX_TYPE_BPF: return ex_handler_bpf(ex, regs); case EX_TYPE_UACCESS_ERR_ZERO: + case EX_TYPE_KACCESS_ERR_ZERO: return ex_handler_uaccess_err_zero(ex, regs); case EX_TYPE_LOAD_UNALIGNED_ZEROPAD: return ex_handler_load_unaligned_zeropad(ex, regs); diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index c5e11768e5c1..cdf3ffa0c223 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -927,6 +927,5 @@ struct page *alloc_zeroed_user_highpage_movable(struct vm_area_struct *vma, void tag_clear_highpage(struct page *page) { mte_zero_clear_page_tags(page_address(page)); - page_kasan_tag_reset(page); set_bit(PG_mte_tagged, &page->flags); } diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 3618ef3f6d81..5307ffdefb8d 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -100,16 +100,6 @@ int pud_huge(pud_t pud) #endif } -/* - * Select all bits except the pfn - */ -static inline pgprot_t pte_pgprot(pte_t pte) -{ - unsigned long pfn = pte_pfn(pte); - - return __pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^ pte_val(pte)); -} - static int find_num_contig(struct mm_struct *mm, unsigned long addr, pte_t *ptep, size_t *pgsize) { diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 339ee84e5a61..b6ef26fc8ebe 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -389,7 +389,7 @@ void __init arm64_memblock_init(void) early_init_fdt_scan_reserved_mem(); - if (!IS_ENABLED(CONFIG_ZONE_DMA) && !IS_ENABLED(CONFIG_ZONE_DMA32)) + if (!defer_reserve_crashkernel()) reserve_crashkernel(); high_memory = __va(memblock_end_of_DRAM() - 1) + 1; @@ -438,7 +438,7 @@ void __init bootmem_init(void) * request_standard_resources() depends on crashkernel's memory being * reserved, so do it here. */ - if (IS_ENABLED(CONFIG_ZONE_DMA) || IS_ENABLED(CONFIG_ZONE_DMA32)) + if (defer_reserve_crashkernel()) reserve_crashkernel(); memblock_dump_all(); diff --git a/arch/arm64/mm/ioremap.c b/arch/arm64/mm/ioremap.c index b21f91cd830d..c5af103d4ad4 100644 --- a/arch/arm64/mm/ioremap.c +++ b/arch/arm64/mm/ioremap.c @@ -1,96 +1,22 @@ // SPDX-License-Identifier: GPL-2.0-only -/* - * Based on arch/arm/mm/ioremap.c - * - * (C) Copyright 1995 1996 Linus Torvalds - * Hacked for ARM by Phil Blundell <philb@gnu.org> - * Hacked to allow all architectures to build, and various cleanups - * by Russell King - * Copyright (C) 2012 ARM Ltd. - */ -#include <linux/export.h> #include <linux/mm.h> -#include <linux/vmalloc.h> #include <linux/io.h> -#include <asm/fixmap.h> -#include <asm/tlbflush.h> - -static void __iomem *__ioremap_caller(phys_addr_t phys_addr, size_t size, - pgprot_t prot, void *caller) +bool ioremap_allowed(phys_addr_t phys_addr, size_t size, unsigned long prot) { - unsigned long last_addr; - unsigned long offset = phys_addr & ~PAGE_MASK; - int err; - unsigned long addr; - struct vm_struct *area; + unsigned long last_addr = phys_addr + size - 1; - /* - * Page align the mapping address and size, taking account of any - * offset. - */ - phys_addr &= PAGE_MASK; - size = PAGE_ALIGN(size + offset); + /* Don't allow outside PHYS_MASK */ + if (last_addr & ~PHYS_MASK) + return false; - /* - * Don't allow wraparound, zero size or outside PHYS_MASK. - */ - last_addr = phys_addr + size - 1; - if (!size || last_addr < phys_addr || (last_addr & ~PHYS_MASK)) - return NULL; - - /* - * Don't allow RAM to be mapped. - */ + /* Don't allow RAM to be mapped. */ if (WARN_ON(pfn_is_map_memory(__phys_to_pfn(phys_addr)))) - return NULL; - - area = get_vm_area_caller(size, VM_IOREMAP, caller); - if (!area) - return NULL; - addr = (unsigned long)area->addr; - area->phys_addr = phys_addr; - - err = ioremap_page_range(addr, addr + size, phys_addr, prot); - if (err) { - vunmap((void *)addr); - return NULL; - } - - return (void __iomem *)(offset + addr); -} - -void __iomem *__ioremap(phys_addr_t phys_addr, size_t size, pgprot_t prot) -{ - return __ioremap_caller(phys_addr, size, prot, - __builtin_return_address(0)); -} -EXPORT_SYMBOL(__ioremap); - -void iounmap(volatile void __iomem *io_addr) -{ - unsigned long addr = (unsigned long)io_addr & PAGE_MASK; - - /* - * We could get an address outside vmalloc range in case - * of ioremap_cache() reusing a RAM mapping. - */ - if (is_vmalloc_addr((void *)addr)) - vunmap((void *)addr); -} -EXPORT_SYMBOL(iounmap); - -void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size) -{ - /* For normal memory we already have a cacheable mapping. */ - if (pfn_is_map_memory(__phys_to_pfn(phys_addr))) - return (void __iomem *)__phys_to_virt(phys_addr); + return false; - return __ioremap_caller(phys_addr, size, __pgprot(PROT_NORMAL), - __builtin_return_address(0)); + return true; } -EXPORT_SYMBOL(ioremap_cache); /* * Must be called after early_fixmap_init diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c index c12cd700598f..e969e68de005 100644 --- a/arch/arm64/mm/kasan_init.c +++ b/arch/arm64/mm/kasan_init.c @@ -236,7 +236,7 @@ static void __init kasan_init_shadow(void) */ memcpy(tmp_pg_dir, swapper_pg_dir, sizeof(tmp_pg_dir)); dsb(ishst); - cpu_replace_ttbr1(lm_alias(tmp_pg_dir)); + cpu_replace_ttbr1(lm_alias(tmp_pg_dir), idmap_pg_dir); clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END); @@ -280,7 +280,7 @@ static void __init kasan_init_shadow(void) PAGE_KERNEL_RO)); memset(kasan_early_shadow_page, KASAN_SHADOW_INIT, PAGE_SIZE); - cpu_replace_ttbr1(lm_alias(swapper_pg_dir)); + cpu_replace_ttbr1(lm_alias(swapper_pg_dir), idmap_pg_dir); } static void __init kasan_init_depth(void) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 626ec32873c6..db7c4e6ae57b 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -43,15 +43,27 @@ #define NO_CONT_MAPPINGS BIT(1) #define NO_EXEC_MAPPINGS BIT(2) /* assumes FEAT_HPDS is not used */ -u64 idmap_t0sz = TCR_T0SZ(VA_BITS_MIN); -u64 idmap_ptrs_per_pgd = PTRS_PER_PGD; +int idmap_t0sz __ro_after_init; -u64 __section(".mmuoff.data.write") vabits_actual; +#if VA_BITS > 48 +u64 vabits_actual __ro_after_init = VA_BITS_MIN; EXPORT_SYMBOL(vabits_actual); +#endif + +u64 kimage_vaddr __ro_after_init = (u64)&_text; +EXPORT_SYMBOL(kimage_vaddr); u64 kimage_voffset __ro_after_init; EXPORT_SYMBOL(kimage_voffset); +u32 __boot_cpu_mode[] = { BOOT_CPU_MODE_EL2, BOOT_CPU_MODE_EL1 }; + +/* + * The booting CPU updates the failed status @__early_cpu_boot_status, + * with MMU turned off. + */ +long __section(".mmuoff.data.write") __early_cpu_boot_status; + /* * Empty_zero_page is a special page that is used for zero-initialized data * and COW. @@ -388,6 +400,13 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys, } while (pgdp++, addr = next, addr != end); } +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 +extern __alias(__create_pgd_mapping) +void create_kpti_ng_temp_pgd(pgd_t *pgdir, phys_addr_t phys, unsigned long virt, + phys_addr_t size, pgprot_t prot, + phys_addr_t (*pgtable_alloc)(int), int flags); +#endif + static phys_addr_t __pgd_pgtable_alloc(int shift) { void *ptr = (void *)__get_free_page(GFP_PGTABLE_KERNEL); @@ -529,8 +548,7 @@ static void __init map_mem(pgd_t *pgdp) #ifdef CONFIG_KEXEC_CORE if (crash_mem_map) { - if (IS_ENABLED(CONFIG_ZONE_DMA) || - IS_ENABLED(CONFIG_ZONE_DMA32)) + if (defer_reserve_crashkernel()) flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; else if (crashk_res.end) memblock_mark_nomap(crashk_res.start, @@ -571,8 +589,7 @@ static void __init map_mem(pgd_t *pgdp) * through /sys/kernel/kexec_crash_size interface. */ #ifdef CONFIG_KEXEC_CORE - if (crash_mem_map && - !IS_ENABLED(CONFIG_ZONE_DMA) && !IS_ENABLED(CONFIG_ZONE_DMA32)) { + if (crash_mem_map && !defer_reserve_crashkernel()) { if (crashk_res.end) { __map_memblock(pgdp, crashk_res.start, crashk_res.end + 1, @@ -665,13 +682,9 @@ static int __init map_entry_trampoline(void) __set_fixmap(FIX_ENTRY_TRAMP_TEXT1 - i, pa_start + i * PAGE_SIZE, prot); - if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) { - extern char __entry_tramp_data_start[]; - - __set_fixmap(FIX_ENTRY_TRAMP_DATA, - __pa_symbol(__entry_tramp_data_start), - PAGE_KERNEL_RO); - } + if (IS_ENABLED(CONFIG_RELOCATABLE)) + __set_fixmap(FIX_ENTRY_TRAMP_TEXT1 - i, + pa_start + i * PAGE_SIZE, PAGE_KERNEL_RO); return 0; } @@ -762,22 +775,57 @@ static void __init map_kernel(pgd_t *pgdp) kasan_copy_shadow(pgdp); } +static void __init create_idmap(void) +{ + u64 start = __pa_symbol(__idmap_text_start); + u64 size = __pa_symbol(__idmap_text_end) - start; + pgd_t *pgd = idmap_pg_dir; + u64 pgd_phys; + + /* check if we need an additional level of translation */ + if (VA_BITS < 48 && idmap_t0sz < (64 - VA_BITS_MIN)) { + pgd_phys = early_pgtable_alloc(PAGE_SHIFT); + set_pgd(&idmap_pg_dir[start >> VA_BITS], + __pgd(pgd_phys | P4D_TYPE_TABLE)); + pgd = __va(pgd_phys); + } + __create_pgd_mapping(pgd, start, start, size, PAGE_KERNEL_ROX, + early_pgtable_alloc, 0); + + if (IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0)) { + extern u32 __idmap_kpti_flag; + u64 pa = __pa_symbol(&__idmap_kpti_flag); + + /* + * The KPTI G-to-nG conversion code needs a read-write mapping + * of its synchronization flag in the ID map. + */ + __create_pgd_mapping(pgd, pa, pa, sizeof(u32), PAGE_KERNEL, + early_pgtable_alloc, 0); + } +} + void __init paging_init(void) { pgd_t *pgdp = pgd_set_fixmap(__pa_symbol(swapper_pg_dir)); + extern pgd_t init_idmap_pg_dir[]; + + idmap_t0sz = 63UL - __fls(__pa_symbol(_end) | GENMASK(VA_BITS_MIN - 1, 0)); map_kernel(pgdp); map_mem(pgdp); pgd_clear_fixmap(); - cpu_replace_ttbr1(lm_alias(swapper_pg_dir)); + cpu_replace_ttbr1(lm_alias(swapper_pg_dir), init_idmap_pg_dir); init_mm.pgd = swapper_pg_dir; memblock_phys_free(__pa_symbol(init_pg_dir), __pa_symbol(init_pg_end) - __pa_symbol(init_pg_dir)); memblock_allow_resize(); + + create_idmap(); } /* diff --git a/arch/arm64/mm/mteswap.c b/arch/arm64/mm/mteswap.c index a9e50e930484..4334dec93bd4 100644 --- a/arch/arm64/mm/mteswap.c +++ b/arch/arm64/mm/mteswap.c @@ -53,15 +53,6 @@ bool mte_restore_tags(swp_entry_t entry, struct page *page) if (!tags) return false; - page_kasan_tag_reset(page); - /* - * We need smp_wmb() in between setting the flags and clearing the - * tags because if another thread reads page->flags and builds a - * tagged address out of it, there is an actual dependency to the - * memory access, but on the current thread we do not guarantee that - * the new page->flags are visible before the tags were updated. - */ - smp_wmb(); mte_restore_page_tags(page_address(page), tags); return true; diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 50bbed947bec..7837a69524c5 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -14,6 +14,7 @@ #include <asm/asm-offsets.h> #include <asm/asm_pointer_auth.h> #include <asm/hwcap.h> +#include <asm/kernel-pgtable.h> #include <asm/pgtable-hwdef.h> #include <asm/cpufeature.h> #include <asm/alternative.h> @@ -200,34 +201,64 @@ SYM_FUNC_END(idmap_cpu_replace_ttbr1) .popsection #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 + +#define KPTI_NG_PTE_FLAGS (PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS) + .pushsection ".idmap.text", "awx" - .macro __idmap_kpti_get_pgtable_ent, type - dc cvac, cur_\()\type\()p // Ensure any existing dirty - dmb sy // lines are written back before - ldr \type, [cur_\()\type\()p] // loading the entry - tbz \type, #0, skip_\()\type // Skip invalid and - tbnz \type, #11, skip_\()\type // non-global entries + .macro kpti_mk_tbl_ng, type, num_entries + add end_\type\()p, cur_\type\()p, #\num_entries * 8 +.Ldo_\type: + ldr \type, [cur_\type\()p] // Load the entry + tbz \type, #0, .Lnext_\type // Skip invalid and + tbnz \type, #11, .Lnext_\type // non-global entries + orr \type, \type, #PTE_NG // Same bit for blocks and pages + str \type, [cur_\type\()p] // Update the entry + .ifnc \type, pte + tbnz \type, #1, .Lderef_\type + .endif +.Lnext_\type: + add cur_\type\()p, cur_\type\()p, #8 + cmp cur_\type\()p, end_\type\()p + b.ne .Ldo_\type .endm - .macro __idmap_kpti_put_pgtable_ent_ng, type - orr \type, \type, #PTE_NG // Same bit for blocks and pages - str \type, [cur_\()\type\()p] // Update the entry and ensure - dmb sy // that it is visible to all - dc civac, cur_\()\type\()p // CPUs. + /* + * Dereference the current table entry and map it into the temporary + * fixmap slot associated with the current level. + */ + .macro kpti_map_pgtbl, type, level + str xzr, [temp_pte, #8 * (\level + 1)] // break before make + dsb nshst + add pte, temp_pte, #PAGE_SIZE * (\level + 1) + lsr pte, pte, #12 + tlbi vaae1, pte + dsb nsh + isb + + phys_to_pte pte, cur_\type\()p + add cur_\type\()p, temp_pte, #PAGE_SIZE * (\level + 1) + orr pte, pte, pte_flags + str pte, [temp_pte, #8 * (\level + 1)] + dsb nshst .endm /* - * void __kpti_install_ng_mappings(int cpu, int num_cpus, phys_addr_t swapper) + * void __kpti_install_ng_mappings(int cpu, int num_secondaries, phys_addr_t temp_pgd, + * unsigned long temp_pte_va) * * Called exactly once from stop_machine context by each CPU found during boot. */ -__idmap_kpti_flag: - .long 1 + .pushsection ".data", "aw", %progbits +SYM_DATA(__idmap_kpti_flag, .long 1) + .popsection + SYM_FUNC_START(idmap_kpti_install_ng_mappings) cpu .req w0 + temp_pte .req x0 num_cpus .req w1 - swapper_pa .req x2 + pte_flags .req x1 + temp_pgd_phys .req x2 swapper_ttb .req x3 flag_ptr .req x4 cur_pgdp .req x5 @@ -235,17 +266,16 @@ SYM_FUNC_START(idmap_kpti_install_ng_mappings) pgd .req x7 cur_pudp .req x8 end_pudp .req x9 - pud .req x10 cur_pmdp .req x11 end_pmdp .req x12 - pmd .req x13 cur_ptep .req x14 end_ptep .req x15 pte .req x16 + valid .req x17 + mov x5, x3 // preserve temp_pte arg mrs swapper_ttb, ttbr1_el1 - restore_ttbr1 swapper_ttb - adr flag_ptr, __idmap_kpti_flag + adr_l flag_ptr, __idmap_kpti_flag cbnz cpu, __idmap_kpti_secondary @@ -256,98 +286,71 @@ SYM_FUNC_START(idmap_kpti_install_ng_mappings) eor w17, w17, num_cpus cbnz w17, 1b - /* We need to walk swapper, so turn off the MMU. */ - pre_disable_mmu_workaround - mrs x17, sctlr_el1 - bic x17, x17, #SCTLR_ELx_M - msr sctlr_el1, x17 + /* Switch to the temporary page tables on this CPU only */ + __idmap_cpu_set_reserved_ttbr1 x8, x9 + offset_ttbr1 temp_pgd_phys, x8 + msr ttbr1_el1, temp_pgd_phys isb + mov temp_pte, x5 + mov pte_flags, #KPTI_NG_PTE_FLAGS + /* Everybody is enjoying the idmap, so we can rewrite swapper. */ /* PGD */ - mov cur_pgdp, swapper_pa - add end_pgdp, cur_pgdp, #(PTRS_PER_PGD * 8) -do_pgd: __idmap_kpti_get_pgtable_ent pgd - tbnz pgd, #1, walk_puds -next_pgd: - __idmap_kpti_put_pgtable_ent_ng pgd -skip_pgd: - add cur_pgdp, cur_pgdp, #8 - cmp cur_pgdp, end_pgdp - b.ne do_pgd - - /* Publish the updated tables and nuke all the TLBs */ - dsb sy - tlbi vmalle1is - dsb ish - isb + adrp cur_pgdp, swapper_pg_dir + kpti_map_pgtbl pgd, 0 + kpti_mk_tbl_ng pgd, PTRS_PER_PGD - /* We're done: fire up the MMU again */ - mrs x17, sctlr_el1 - orr x17, x17, #SCTLR_ELx_M - set_sctlr_el1 x17 + /* Ensure all the updated entries are visible to secondary CPUs */ + dsb ishst + + /* We're done: fire up swapper_pg_dir again */ + __idmap_cpu_set_reserved_ttbr1 x8, x9 + msr ttbr1_el1, swapper_ttb + isb /* Set the flag to zero to indicate that we're all done */ str wzr, [flag_ptr] ret +.Lderef_pgd: /* PUD */ -walk_puds: - .if CONFIG_PGTABLE_LEVELS > 3 + .if CONFIG_PGTABLE_LEVELS > 3 + pud .req x10 pte_to_phys cur_pudp, pgd - add end_pudp, cur_pudp, #(PTRS_PER_PUD * 8) -do_pud: __idmap_kpti_get_pgtable_ent pud - tbnz pud, #1, walk_pmds -next_pud: - __idmap_kpti_put_pgtable_ent_ng pud -skip_pud: - add cur_pudp, cur_pudp, 8 - cmp cur_pudp, end_pudp - b.ne do_pud - b next_pgd - .else /* CONFIG_PGTABLE_LEVELS <= 3 */ - mov pud, pgd - b walk_pmds -next_pud: - b next_pgd + kpti_map_pgtbl pud, 1 + kpti_mk_tbl_ng pud, PTRS_PER_PUD + b .Lnext_pgd + .else /* CONFIG_PGTABLE_LEVELS <= 3 */ + pud .req pgd + .set .Lnext_pud, .Lnext_pgd .endif +.Lderef_pud: /* PMD */ -walk_pmds: - .if CONFIG_PGTABLE_LEVELS > 2 + .if CONFIG_PGTABLE_LEVELS > 2 + pmd .req x13 pte_to_phys cur_pmdp, pud - add end_pmdp, cur_pmdp, #(PTRS_PER_PMD * 8) -do_pmd: __idmap_kpti_get_pgtable_ent pmd - tbnz pmd, #1, walk_ptes -next_pmd: - __idmap_kpti_put_pgtable_ent_ng pmd -skip_pmd: - add cur_pmdp, cur_pmdp, #8 - cmp cur_pmdp, end_pmdp - b.ne do_pmd - b next_pud - .else /* CONFIG_PGTABLE_LEVELS <= 2 */ - mov pmd, pud - b walk_ptes -next_pmd: - b next_pud + kpti_map_pgtbl pmd, 2 + kpti_mk_tbl_ng pmd, PTRS_PER_PMD + b .Lnext_pud + .else /* CONFIG_PGTABLE_LEVELS <= 2 */ + pmd .req pgd + .set .Lnext_pmd, .Lnext_pgd .endif +.Lderef_pmd: /* PTE */ -walk_ptes: pte_to_phys cur_ptep, pmd - add end_ptep, cur_ptep, #(PTRS_PER_PTE * 8) -do_pte: __idmap_kpti_get_pgtable_ent pte - __idmap_kpti_put_pgtable_ent_ng pte -skip_pte: - add cur_ptep, cur_ptep, #8 - cmp cur_ptep, end_ptep - b.ne do_pte - b next_pmd + kpti_map_pgtbl pte, 3 + kpti_mk_tbl_ng pte, PTRS_PER_PTE + b .Lnext_pmd .unreq cpu + .unreq temp_pte .unreq num_cpus - .unreq swapper_pa + .unreq pte_flags + .unreq temp_pgd_phys .unreq cur_pgdp .unreq end_pgdp .unreq pgd @@ -360,6 +363,7 @@ skip_pte: .unreq cur_ptep .unreq end_ptep .unreq pte + .unreq valid /* Secondary CPUs end up here */ __idmap_kpti_secondary: @@ -379,7 +383,6 @@ __idmap_kpti_secondary: cbnz w16, 1b /* All done, act like nothing happened */ - offset_ttbr1 swapper_ttb, x16 msr ttbr1_el1, swapper_ttb isb ret @@ -395,6 +398,8 @@ SYM_FUNC_END(idmap_kpti_install_ng_mappings) * * Initialise the processor for turning the MMU on. * + * Input: + * x0 - actual number of VA bits (ignored unless VA_BITS > 48) * Output: * Return in x0 the value of the SCTLR_EL1 register. */ @@ -464,12 +469,11 @@ SYM_FUNC_START(__cpu_setup) tcr_clear_errata_bits tcr, x9, x5 #ifdef CONFIG_ARM64_VA_BITS_52 - ldr_l x9, vabits_actual - sub x9, xzr, x9 + sub x9, xzr, x0 add x9, x9, #64 tcr_set_t1sz tcr, x9 #else - ldr_l x9, idmap_t0sz + idmap_get_t0sz x9 #endif tcr_set_t0sz tcr, x9 diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index 507b20373953..779653771507 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -36,6 +36,7 @@ HAS_RNG HAS_SB HAS_STAGE2_FWB HAS_SYSREG_GIC_CPUIF +HAS_TIDCP1 HAS_TLB_RANGE HAS_VIRT_HOST_EXTN HAS_WFXT @@ -61,6 +62,7 @@ WORKAROUND_1418040 WORKAROUND_1463225 WORKAROUND_1508412 WORKAROUND_1542419 +WORKAROUND_1742098 WORKAROUND_1902691 WORKAROUND_2038923 WORKAROUND_2064142 diff --git a/arch/arm64/tools/gen-sysreg.awk b/arch/arm64/tools/gen-sysreg.awk index 5c55509eb43f..db461921d256 100755 --- a/arch/arm64/tools/gen-sysreg.awk +++ b/arch/arm64/tools/gen-sysreg.awk @@ -88,7 +88,7 @@ END { # skip blank lines and comment lines /^$/ { next } -/^#/ { next } +/^[\t ]*#/ { next } /^SysregFields/ { change_block("SysregFields", "None", "SysregFields") diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index ff5e552f7420..9ae483ec1e56 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -46,6 +46,89 @@ # feature that introduces them (eg, FEAT_LS64_ACCDATA introduces enumeration # item ACCDATA) though it may be more taseful to do something else. +Sysreg ID_AA64ZFR0_EL1 3 0 0 4 4 +Res0 63:60 +Enum 59:56 F64MM + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 55:52 F32MM + 0b0000 NI + 0b0001 IMP +EndEnum +Res0 51:48 +Enum 47:44 I8MM + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 43:40 SM4 + 0b0000 NI + 0b0001 IMP +EndEnum +Res0 39:36 +Enum 35:32 SHA3 + 0b0000 NI + 0b0001 IMP +EndEnum +Res0 31:24 +Enum 23:20 BF16 + 0b0000 NI + 0b0001 IMP + 0b0010 EBF16 +EndEnum +Enum 19:16 BitPerm + 0b0000 NI + 0b0001 IMP +EndEnum +Res0 15:8 +Enum 7:4 AES + 0b0000 NI + 0b0001 IMP + 0b0010 PMULL128 +EndEnum +Enum 3:0 SVEver + 0b0000 IMP + 0b0001 SVE2 +EndEnum +EndSysreg + +Sysreg ID_AA64SMFR0_EL1 3 0 0 4 5 +Enum 63 FA64 + 0b0 NI + 0b1 IMP +EndEnum +Res0 62:60 +Field 59:56 SMEver +Enum 55:52 I16I64 + 0b0000 NI + 0b1111 IMP +EndEnum +Res0 51:49 +Enum 48 F64F64 + 0b0 NI + 0b1 IMP +EndEnum +Res0 47:40 +Enum 39:36 I8I32 + 0b0000 NI + 0b1111 IMP +EndEnum +Enum 35 F16F32 + 0b0 NI + 0b1 IMP +EndEnum +Enum 34 B16F32 + 0b0 NI + 0b1 IMP +EndEnum +Res0 33 +Enum 32 F32F32 + 0b0 NI + 0b1 IMP +EndEnum +Res0 31:0 +EndSysreg + Sysreg ID_AA64ISAR0_EL1 3 0 0 6 0 Enum 63:60 RNDR 0b0000 NI @@ -114,6 +197,122 @@ EndEnum Res0 3:0 EndSysreg +Sysreg ID_AA64ISAR1_EL1 3 0 0 6 1 +Enum 63:60 LS64 + 0b0000 NI + 0b0001 LS64 + 0b0010 LS64_V + 0b0011 LS64_ACCDATA +EndEnum +Enum 59:56 XS + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 55:52 I8MM + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 51:48 DGH + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 47:44 BF16 + 0b0000 NI + 0b0001 IMP + 0b0010 EBF16 +EndEnum +Enum 43:40 SPECRES + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 39:36 SB + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 35:32 FRINTTS + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 31:28 GPI + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 27:24 GPA + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 23:20 LRCPC + 0b0000 NI + 0b0001 IMP + 0b0010 LRCPC2 +EndEnum +Enum 19:16 FCMA + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 15:12 JSCVT + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 11:8 API + 0b0000 NI + 0b0001 PAuth + 0b0010 EPAC + 0b0011 PAuth2 + 0b0100 FPAC + 0b0101 FPACCOMBINE +EndEnum +Enum 7:4 APA + 0b0000 NI + 0b0001 PAuth + 0b0010 EPAC + 0b0011 PAuth2 + 0b0100 FPAC + 0b0101 FPACCOMBINE +EndEnum +Enum 3:0 DPB + 0b0000 NI + 0b0001 IMP + 0b0010 DPB2 +EndEnum +EndSysreg + +Sysreg ID_AA64ISAR2_EL1 3 0 0 6 2 +Res0 63:28 +Enum 27:24 PAC_frac + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 23:20 BC + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 19:16 MOPS + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 15:12 APA3 + 0b0000 NI + 0b0001 PAuth + 0b0010 EPAC + 0b0011 PAuth2 + 0b0100 FPAC + 0b0101 FPACCOMBINE +EndEnum +Enum 11:8 GPA3 + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 7:4 RPRES + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 3:0 WFxT + 0b0000 NI + 0b0010 IMP +EndEnum +EndSysreg + Sysreg SCTLR_EL1 3 0 1 0 0 Field 63 TIDCP Field 62 SPINMASK @@ -257,6 +456,11 @@ Field 5:3 Ctype2 Field 2:0 Ctype1 EndSysreg +Sysreg GMID_EL1 3 1 0 0 4 +Res0 63:4 +Field 3:0 BS +EndSysreg + Sysreg SMIDR_EL1 3 1 0 0 6 Res0 63:32 Field 31:24 IMPLEMENTER @@ -273,6 +477,33 @@ Field 3:1 Level Field 0 InD EndSysreg +Sysreg CTR_EL0 3 3 0 0 1 +Res0 63:38 +Field 37:32 TminLine +Res1 31 +Res0 30 +Field 29 DIC +Field 28 IDC +Field 27:24 CWG +Field 23:20 ERG +Field 19:16 DminLine +Enum 15:14 L1Ip + 0b00 VPIPT + # This is named as AIVIVT in the ARM but documented as reserved + 0b01 RESERVED + 0b10 VIPT + 0b11 PIPT +EndEnum +Res0 13:4 +Field 3:0 IminLine +EndSysreg + +Sysreg DCZID_EL0 3 3 0 0 7 +Res0 63:5 +Field 4 DZP +Field 3:0 BS +EndSysreg + Sysreg SVCR 3 3 4 2 2 Res0 63:2 Field 1 ZA @@ -367,3 +598,36 @@ EndSysreg Sysreg TTBR1_EL1 3 0 2 0 1 Fields TTBRx_EL1 EndSysreg + +Sysreg LORSA_EL1 3 0 10 4 0 +Res0 63:52 +Field 51:16 SA +Res0 15:1 +Field 0 Valid +EndSysreg + +Sysreg LOREA_EL1 3 0 10 4 1 +Res0 63:52 +Field 51:48 EA_51_48 +Field 47:16 EA_47_16 +Res0 15:0 +EndSysreg + +Sysreg LORN_EL1 3 0 10 4 2 +Res0 63:8 +Field 7:0 Num +EndSysreg + +Sysreg LORC_EL1 3 0 10 4 3 +Res0 63:10 +Field 9:2 DS +Res0 1 +Field 0 EN +EndSysreg + +Sysreg LORID_EL1 3 0 10 4 7 +Res0 63:24 +Field 23:16 LD +Res0 15:8 +Field 7:0 LR +EndSysreg diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index b2ec65522c58..5aa4c2ecf5c7 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -278,6 +278,7 @@ config X86 select SYSCTL_EXCEPTION_TRACE select THREAD_INFO_IN_TASK select TRACE_IRQFLAGS_SUPPORT + select TRACE_IRQFLAGS_NMI_SUPPORT select USER_STACKTRACE_SUPPORT select VIRT_TO_BUS select HAVE_ARCH_KCSAN if X86_64 diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 340399f69954..bdfe08f1a930 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -1,8 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -config TRACE_IRQFLAGS_NMI_SUPPORT - def_bool y - config EARLY_PRINTK_USB bool diff --git a/drivers/cpuidle/Kconfig.arm b/drivers/cpuidle/Kconfig.arm index be7f512109f7..747aa537389b 100644 --- a/drivers/cpuidle/Kconfig.arm +++ b/drivers/cpuidle/Kconfig.arm @@ -3,7 +3,8 @@ # ARM CPU Idle drivers # config ARM_CPUIDLE - bool "Generic ARM/ARM64 CPU idle Driver" + bool "Generic ARM CPU idle Driver" + depends on ARM select DT_IDLE_STATES select CPU_IDLE_MULTIPLE_DRIVERS help diff --git a/drivers/perf/arm-cci.c b/drivers/perf/arm-cci.c index 96e09fa40909..03b1309875ae 100644 --- a/drivers/perf/arm-cci.c +++ b/drivers/perf/arm-cci.c @@ -1139,7 +1139,7 @@ static void cci_pmu_start(struct perf_event *event, int pmu_flags) /* * To handle interrupt latency, we always reprogram the period - * regardlesss of PERF_EF_RELOAD. + * regardless of PERF_EF_RELOAD. */ if (pmu_flags & PERF_EF_RELOAD) WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); @@ -1261,7 +1261,7 @@ static int validate_group(struct perf_event *event) */ .used_mask = mask, }; - memset(mask, 0, BITS_TO_LONGS(cci_pmu->num_cntrs) * sizeof(unsigned long)); + bitmap_zero(mask, cci_pmu->num_cntrs); if (!validate_event(event->pmu, &fake_pmu, leader)) return -EINVAL; @@ -1629,10 +1629,9 @@ static struct cci_pmu *cci_pmu_alloc(struct device *dev) GFP_KERNEL); if (!cci_pmu->hw_events.events) return ERR_PTR(-ENOMEM); - cci_pmu->hw_events.used_mask = devm_kcalloc(dev, - BITS_TO_LONGS(CCI_PMU_MAX_HW_CNTRS(model)), - sizeof(*cci_pmu->hw_events.used_mask), - GFP_KERNEL); + cci_pmu->hw_events.used_mask = devm_bitmap_zalloc(dev, + CCI_PMU_MAX_HW_CNTRS(model), + GFP_KERNEL); if (!cci_pmu->hw_events.used_mask) return ERR_PTR(-ENOMEM); diff --git a/drivers/perf/arm-ccn.c b/drivers/perf/arm-ccn.c index 40b352e8aa7f..728d13d8e98a 100644 --- a/drivers/perf/arm-ccn.c +++ b/drivers/perf/arm-ccn.c @@ -1250,7 +1250,7 @@ static int arm_ccn_pmu_init(struct arm_ccn *ccn) ccn->dt.cmp_mask[CCN_IDX_MASK_OPCODE].h = ~(0x1f << 9); /* Get a convenient /sys/event_source/devices/ name */ - ccn->dt.id = ida_simple_get(&arm_ccn_pmu_ida, 0, 0, GFP_KERNEL); + ccn->dt.id = ida_alloc(&arm_ccn_pmu_ida, GFP_KERNEL); if (ccn->dt.id == 0) { name = "ccn"; } else { @@ -1312,7 +1312,7 @@ error_pmu_register: &ccn->dt.node); error_set_affinity: error_choose_name: - ida_simple_remove(&arm_ccn_pmu_ida, ccn->dt.id); + ida_free(&arm_ccn_pmu_ida, ccn->dt.id); for (i = 0; i < ccn->num_xps; i++) writel(0, ccn->xp[i].base + CCN_XP_DT_CONTROL); writel(0, ccn->dt.base + CCN_DT_PMCR); @@ -1329,7 +1329,7 @@ static void arm_ccn_pmu_cleanup(struct arm_ccn *ccn) writel(0, ccn->xp[i].base + CCN_XP_DT_CONTROL); writel(0, ccn->dt.base + CCN_DT_PMCR); perf_pmu_unregister(&ccn->dt.pmu); - ida_simple_remove(&arm_ccn_pmu_ida, ccn->dt.id); + ida_free(&arm_ccn_pmu_ida, ccn->dt.id); } static int arm_ccn_for_each_valid_region(struct arm_ccn *ccn, diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c index db670b265897..b65a7d9640e1 100644 --- a/drivers/perf/arm_spe_pmu.c +++ b/drivers/perf/arm_spe_pmu.c @@ -39,6 +39,24 @@ #include <asm/mmu.h> #include <asm/sysreg.h> +/* + * Cache if the event is allowed to trace Context information. + * This allows us to perform the check, i.e, perfmon_capable(), + * in the context of the event owner, once, during the event_init(). + */ +#define SPE_PMU_HW_FLAGS_CX BIT(0) + +static void set_spe_event_has_cx(struct perf_event *event) +{ + if (IS_ENABLED(CONFIG_PID_IN_CONTEXTIDR) && perfmon_capable()) + event->hw.flags |= SPE_PMU_HW_FLAGS_CX; +} + +static bool get_spe_event_has_cx(struct perf_event *event) +{ + return !!(event->hw.flags & SPE_PMU_HW_FLAGS_CX); +} + #define ARM_SPE_BUF_PAD_BYTE 0 struct arm_spe_pmu_buf { @@ -272,7 +290,7 @@ static u64 arm_spe_event_to_pmscr(struct perf_event *event) if (!attr->exclude_kernel) reg |= BIT(SYS_PMSCR_EL1_E1SPE_SHIFT); - if (IS_ENABLED(CONFIG_PID_IN_CONTEXTIDR) && perfmon_capable()) + if (get_spe_event_has_cx(event)) reg |= BIT(SYS_PMSCR_EL1_CX_SHIFT); return reg; @@ -709,10 +727,10 @@ static int arm_spe_pmu_event_init(struct perf_event *event) !(spe_pmu->features & SPE_PMU_FEAT_FILT_LAT)) return -EOPNOTSUPP; + set_spe_event_has_cx(event); reg = arm_spe_event_to_pmscr(event); if (!perfmon_capable() && (reg & (BIT(SYS_PMSCR_EL1_PA_SHIFT) | - BIT(SYS_PMSCR_EL1_CX_SHIFT) | BIT(SYS_PMSCR_EL1_PCT_SHIFT)))) return -EACCES; diff --git a/drivers/perf/fsl_imx8_ddr_perf.c b/drivers/perf/fsl_imx8_ddr_perf.c index b1b2a55de77f..8e058e08fe81 100644 --- a/drivers/perf/fsl_imx8_ddr_perf.c +++ b/drivers/perf/fsl_imx8_ddr_perf.c @@ -611,7 +611,7 @@ static int ddr_perf_init(struct ddr_pmu *pmu, void __iomem *base, .dev = dev, }; - pmu->id = ida_simple_get(&ddr_ida, 0, 0, GFP_KERNEL); + pmu->id = ida_alloc(&ddr_ida, GFP_KERNEL); return pmu->id; } @@ -765,7 +765,7 @@ ddr_perf_err: cpuhp_instance_err: cpuhp_remove_multi_state(pmu->cpuhp_state); cpuhp_state_err: - ida_simple_remove(&ddr_ida, pmu->id); + ida_free(&ddr_ida, pmu->id); dev_warn(&pdev->dev, "i.MX8 DDR Perf PMU failed (%d), disabled\n", ret); return ret; } @@ -779,7 +779,7 @@ static int ddr_perf_remove(struct platform_device *pdev) perf_pmu_unregister(&pmu->pmu); - ida_simple_remove(&ddr_ida, pmu->id); + ida_free(&ddr_ida, pmu->id); return 0; } diff --git a/drivers/perf/hisilicon/Kconfig b/drivers/perf/hisilicon/Kconfig index 5546218b5598..171bfc1b6bc2 100644 --- a/drivers/perf/hisilicon/Kconfig +++ b/drivers/perf/hisilicon/Kconfig @@ -14,3 +14,13 @@ config HISI_PCIE_PMU RCiEP devices. Adds the PCIe PMU into perf events system for monitoring latency, bandwidth etc. + +config HNS3_PMU + tristate "HNS3 PERF PMU" + depends on ARM64 || COMPILE_TEST + depends on PCI + help + Provide support for HNS3 performance monitoring unit (PMU) RCiEP + devices. + Adds the HNS3 PMU into perf events system for monitoring latency, + bandwidth etc. diff --git a/drivers/perf/hisilicon/Makefile b/drivers/perf/hisilicon/Makefile index 6be83517acaa..4d2c9abe3372 100644 --- a/drivers/perf/hisilicon/Makefile +++ b/drivers/perf/hisilicon/Makefile @@ -4,3 +4,4 @@ obj-$(CONFIG_HISI_PMU) += hisi_uncore_pmu.o hisi_uncore_l3c_pmu.o \ hisi_uncore_pa_pmu.o hisi_uncore_cpa_pmu.o obj-$(CONFIG_HISI_PCIE_PMU) += hisi_pcie_pmu.o +obj-$(CONFIG_HNS3_PMU) += hns3_pmu.o diff --git a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c index 62299ab5a9be..50d0c0a2f1fe 100644 --- a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c @@ -516,21 +516,7 @@ static int hisi_ddrc_pmu_probe(struct platform_device *pdev) "hisi_sccl%u_ddrc%u", ddrc_pmu->sccl_id, ddrc_pmu->index_id); - ddrc_pmu->pmu = (struct pmu) { - .name = name, - .module = THIS_MODULE, - .task_ctx_nr = perf_invalid_context, - .event_init = hisi_uncore_pmu_event_init, - .pmu_enable = hisi_uncore_pmu_enable, - .pmu_disable = hisi_uncore_pmu_disable, - .add = hisi_uncore_pmu_add, - .del = hisi_uncore_pmu_del, - .start = hisi_uncore_pmu_start, - .stop = hisi_uncore_pmu_stop, - .read = hisi_uncore_pmu_read, - .attr_groups = ddrc_pmu->pmu_events.attr_groups, - .capabilities = PERF_PMU_CAP_NO_EXCLUDE, - }; + hisi_pmu_init(&ddrc_pmu->pmu, name, ddrc_pmu->pmu_events.attr_groups, THIS_MODULE); ret = perf_pmu_register(&ddrc_pmu->pmu, name, -1); if (ret) { diff --git a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c index 393513150106..13017b3412a5 100644 --- a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c @@ -519,21 +519,7 @@ static int hisi_hha_pmu_probe(struct platform_device *pdev) name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%u_hha%u", hha_pmu->sccl_id, hha_pmu->index_id); - hha_pmu->pmu = (struct pmu) { - .name = name, - .module = THIS_MODULE, - .task_ctx_nr = perf_invalid_context, - .event_init = hisi_uncore_pmu_event_init, - .pmu_enable = hisi_uncore_pmu_enable, - .pmu_disable = hisi_uncore_pmu_disable, - .add = hisi_uncore_pmu_add, - .del = hisi_uncore_pmu_del, - .start = hisi_uncore_pmu_start, - .stop = hisi_uncore_pmu_stop, - .read = hisi_uncore_pmu_read, - .attr_groups = hha_pmu->pmu_events.attr_groups, - .capabilities = PERF_PMU_CAP_NO_EXCLUDE, - }; + hisi_pmu_init(&hha_pmu->pmu, name, hha_pmu->pmu_events.attr_groups, THIS_MODULE); ret = perf_pmu_register(&hha_pmu->pmu, name, -1); if (ret) { diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c index 560ab964c8b5..2995f3630d49 100644 --- a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c @@ -557,21 +557,7 @@ static int hisi_l3c_pmu_probe(struct platform_device *pdev) */ name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%u_l3c%u", l3c_pmu->sccl_id, l3c_pmu->ccl_id); - l3c_pmu->pmu = (struct pmu) { - .name = name, - .module = THIS_MODULE, - .task_ctx_nr = perf_invalid_context, - .event_init = hisi_uncore_pmu_event_init, - .pmu_enable = hisi_uncore_pmu_enable, - .pmu_disable = hisi_uncore_pmu_disable, - .add = hisi_uncore_pmu_add, - .del = hisi_uncore_pmu_del, - .start = hisi_uncore_pmu_start, - .stop = hisi_uncore_pmu_stop, - .read = hisi_uncore_pmu_read, - .attr_groups = l3c_pmu->pmu_events.attr_groups, - .capabilities = PERF_PMU_CAP_NO_EXCLUDE, - }; + hisi_pmu_init(&l3c_pmu->pmu, name, l3c_pmu->pmu_events.attr_groups, THIS_MODULE); ret = perf_pmu_register(&l3c_pmu->pmu, name, -1); if (ret) { diff --git a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c index a0ee84d97c41..47d3cc9b6eec 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c @@ -412,21 +412,7 @@ static int hisi_pa_pmu_probe(struct platform_device *pdev) return ret; } - pa_pmu->pmu = (struct pmu) { - .module = THIS_MODULE, - .task_ctx_nr = perf_invalid_context, - .event_init = hisi_uncore_pmu_event_init, - .pmu_enable = hisi_uncore_pmu_enable, - .pmu_disable = hisi_uncore_pmu_disable, - .add = hisi_uncore_pmu_add, - .del = hisi_uncore_pmu_del, - .start = hisi_uncore_pmu_start, - .stop = hisi_uncore_pmu_stop, - .read = hisi_uncore_pmu_read, - .attr_groups = pa_pmu->pmu_events.attr_groups, - .capabilities = PERF_PMU_CAP_NO_EXCLUDE, - }; - + hisi_pmu_init(&pa_pmu->pmu, name, pa_pmu->pmu_events.attr_groups, THIS_MODULE); ret = perf_pmu_register(&pa_pmu->pmu, name, -1); if (ret) { dev_err(pa_pmu->dev, "PMU register failed, ret = %d\n", ret); diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c index 980b9ee6eb14..fbc8a93d5eac 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c @@ -531,4 +531,22 @@ int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) } EXPORT_SYMBOL_GPL(hisi_uncore_pmu_offline_cpu); +void hisi_pmu_init(struct pmu *pmu, const char *name, + const struct attribute_group **attr_groups, struct module *module) +{ + pmu->name = name; + pmu->module = module; + pmu->task_ctx_nr = perf_invalid_context; + pmu->event_init = hisi_uncore_pmu_event_init; + pmu->pmu_enable = hisi_uncore_pmu_enable; + pmu->pmu_disable = hisi_uncore_pmu_disable; + pmu->add = hisi_uncore_pmu_add; + pmu->del = hisi_uncore_pmu_del; + pmu->start = hisi_uncore_pmu_start; + pmu->stop = hisi_uncore_pmu_stop; + pmu->read = hisi_uncore_pmu_read; + pmu->attr_groups = attr_groups; +} +EXPORT_SYMBOL_GPL(hisi_pmu_init); + MODULE_LICENSE("GPL v2"); diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.h b/drivers/perf/hisilicon/hisi_uncore_pmu.h index 96eeddad55ff..b59de33cd059 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.h +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.h @@ -121,4 +121,6 @@ ssize_t hisi_uncore_pmu_identifier_attr_show(struct device *dev, int hisi_uncore_pmu_init_irq(struct hisi_pmu *hisi_pmu, struct platform_device *pdev); +void hisi_pmu_init(struct pmu *pmu, const char *name, + const struct attribute_group **attr_groups, struct module *module); #endif /* __HISI_UNCORE_PMU_H__ */ diff --git a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c index 6aedc303ff56..b9c79f17230c 100644 --- a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c @@ -445,20 +445,7 @@ static int hisi_sllc_pmu_probe(struct platform_device *pdev) return ret; } - sllc_pmu->pmu = (struct pmu) { - .module = THIS_MODULE, - .task_ctx_nr = perf_invalid_context, - .event_init = hisi_uncore_pmu_event_init, - .pmu_enable = hisi_uncore_pmu_enable, - .pmu_disable = hisi_uncore_pmu_disable, - .add = hisi_uncore_pmu_add, - .del = hisi_uncore_pmu_del, - .start = hisi_uncore_pmu_start, - .stop = hisi_uncore_pmu_stop, - .read = hisi_uncore_pmu_read, - .attr_groups = sllc_pmu->pmu_events.attr_groups, - .capabilities = PERF_PMU_CAP_NO_EXCLUDE, - }; + hisi_pmu_init(&sllc_pmu->pmu, name, sllc_pmu->pmu_events.attr_groups, THIS_MODULE); ret = perf_pmu_register(&sllc_pmu->pmu, name, -1); if (ret) { diff --git a/drivers/perf/hisilicon/hns3_pmu.c b/drivers/perf/hisilicon/hns3_pmu.c new file mode 100644 index 000000000000..e0457d84af6b --- /dev/null +++ b/drivers/perf/hisilicon/hns3_pmu.c @@ -0,0 +1,1671 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * This driver adds support for HNS3 PMU iEP device. Related perf events are + * bandwidth, latency, packet rate, interrupt rate etc. + * + * Copyright (C) 2022 HiSilicon Limited + */ +#include <linux/bitfield.h> +#include <linux/bitmap.h> +#include <linux/bug.h> +#include <linux/cpuhotplug.h> +#include <linux/cpumask.h> +#include <linux/delay.h> +#include <linux/device.h> +#include <linux/err.h> +#include <linux/interrupt.h> +#include <linux/iopoll.h> +#include <linux/io-64-nonatomic-hi-lo.h> +#include <linux/irq.h> +#include <linux/kernel.h> +#include <linux/list.h> +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/pci-epf.h> +#include <linux/perf_event.h> +#include <linux/smp.h> + +/* registers offset address */ +#define HNS3_PMU_REG_GLOBAL_CTRL 0x0000 +#define HNS3_PMU_REG_CLOCK_FREQ 0x0020 +#define HNS3_PMU_REG_BDF 0x0fe0 +#define HNS3_PMU_REG_VERSION 0x0fe4 +#define HNS3_PMU_REG_DEVICE_ID 0x0fe8 + +#define HNS3_PMU_REG_EVENT_OFFSET 0x1000 +#define HNS3_PMU_REG_EVENT_SIZE 0x1000 +#define HNS3_PMU_REG_EVENT_CTRL_LOW 0x00 +#define HNS3_PMU_REG_EVENT_CTRL_HIGH 0x04 +#define HNS3_PMU_REG_EVENT_INTR_STATUS 0x08 +#define HNS3_PMU_REG_EVENT_INTR_MASK 0x0c +#define HNS3_PMU_REG_EVENT_COUNTER 0x10 +#define HNS3_PMU_REG_EVENT_EXT_COUNTER 0x18 +#define HNS3_PMU_REG_EVENT_QID_CTRL 0x28 +#define HNS3_PMU_REG_EVENT_QID_PARA 0x2c + +#define HNS3_PMU_FILTER_SUPPORT_GLOBAL BIT(0) +#define HNS3_PMU_FILTER_SUPPORT_PORT BIT(1) +#define HNS3_PMU_FILTER_SUPPORT_PORT_TC BIT(2) +#define HNS3_PMU_FILTER_SUPPORT_FUNC BIT(3) +#define HNS3_PMU_FILTER_SUPPORT_FUNC_QUEUE BIT(4) +#define HNS3_PMU_FILTER_SUPPORT_FUNC_INTR BIT(5) + +#define HNS3_PMU_FILTER_ALL_TC 0xf +#define HNS3_PMU_FILTER_ALL_QUEUE 0xffff + +#define HNS3_PMU_CTRL_SUBEVENT_S 4 +#define HNS3_PMU_CTRL_FILTER_MODE_S 24 + +#define HNS3_PMU_GLOBAL_START BIT(0) + +#define HNS3_PMU_EVENT_STATUS_RESET BIT(11) +#define HNS3_PMU_EVENT_EN BIT(12) +#define HNS3_PMU_EVENT_OVERFLOW_RESTART BIT(15) + +#define HNS3_PMU_QID_PARA_FUNC_S 0 +#define HNS3_PMU_QID_PARA_QUEUE_S 16 + +#define HNS3_PMU_QID_CTRL_REQ_ENABLE BIT(0) +#define HNS3_PMU_QID_CTRL_DONE BIT(1) +#define HNS3_PMU_QID_CTRL_MISS BIT(2) + +#define HNS3_PMU_INTR_MASK_OVERFLOW BIT(1) + +#define HNS3_PMU_MAX_HW_EVENTS 8 + +/* + * Each hardware event contains two registers (counter and ext_counter) for + * bandwidth, packet rate, latency and interrupt rate. These two registers will + * be triggered to run at the same when a hardware event is enabled. The meaning + * of counter and ext_counter of different event type are different, their + * meaning show as follow: + * + * +----------------+------------------+---------------+ + * | event type | counter | ext_counter | + * +----------------+------------------+---------------+ + * | bandwidth | byte number | cycle number | + * +----------------+------------------+---------------+ + * | packet rate | packet number | cycle number | + * +----------------+------------------+---------------+ + * | latency | cycle number | packet number | + * +----------------+------------------+---------------+ + * | interrupt rate | interrupt number | cycle number | + * +----------------+------------------+---------------+ + * + * The cycle number indicates increment of counter of hardware timer, the + * frequency of hardware timer can be read from hw_clk_freq file. + * + * Performance of each hardware event is calculated by: counter / ext_counter. + * + * Since processing of data is preferred to be done in userspace, we expose + * ext_counter as a separate event for userspace and use bit 16 to indicate it. + * For example, event 0x00001 and 0x10001 are actually one event for hardware + * because bit 0-15 are same. If the bit 16 of one event is 0 means to read + * counter register, otherwise means to read ext_counter register. + */ +/* bandwidth events */ +#define HNS3_PMU_EVT_BW_SSU_EGU_BYTE_NUM 0x00001 +#define HNS3_PMU_EVT_BW_SSU_EGU_TIME 0x10001 +#define HNS3_PMU_EVT_BW_SSU_RPU_BYTE_NUM 0x00002 +#define HNS3_PMU_EVT_BW_SSU_RPU_TIME 0x10002 +#define HNS3_PMU_EVT_BW_SSU_ROCE_BYTE_NUM 0x00003 +#define HNS3_PMU_EVT_BW_SSU_ROCE_TIME 0x10003 +#define HNS3_PMU_EVT_BW_ROCE_SSU_BYTE_NUM 0x00004 +#define HNS3_PMU_EVT_BW_ROCE_SSU_TIME 0x10004 +#define HNS3_PMU_EVT_BW_TPU_SSU_BYTE_NUM 0x00005 +#define HNS3_PMU_EVT_BW_TPU_SSU_TIME 0x10005 +#define HNS3_PMU_EVT_BW_RPU_RCBRX_BYTE_NUM 0x00006 +#define HNS3_PMU_EVT_BW_RPU_RCBRX_TIME 0x10006 +#define HNS3_PMU_EVT_BW_RCBTX_TXSCH_BYTE_NUM 0x00008 +#define HNS3_PMU_EVT_BW_RCBTX_TXSCH_TIME 0x10008 +#define HNS3_PMU_EVT_BW_WR_FBD_BYTE_NUM 0x00009 +#define HNS3_PMU_EVT_BW_WR_FBD_TIME 0x10009 +#define HNS3_PMU_EVT_BW_WR_EBD_BYTE_NUM 0x0000a +#define HNS3_PMU_EVT_BW_WR_EBD_TIME 0x1000a +#define HNS3_PMU_EVT_BW_RD_FBD_BYTE_NUM 0x0000b +#define HNS3_PMU_EVT_BW_RD_FBD_TIME 0x1000b +#define HNS3_PMU_EVT_BW_RD_EBD_BYTE_NUM 0x0000c +#define HNS3_PMU_EVT_BW_RD_EBD_TIME 0x1000c +#define HNS3_PMU_EVT_BW_RD_PAY_M0_BYTE_NUM 0x0000d +#define HNS3_PMU_EVT_BW_RD_PAY_M0_TIME 0x1000d +#define HNS3_PMU_EVT_BW_RD_PAY_M1_BYTE_NUM 0x0000e +#define HNS3_PMU_EVT_BW_RD_PAY_M1_TIME 0x1000e +#define HNS3_PMU_EVT_BW_WR_PAY_M0_BYTE_NUM 0x0000f +#define HNS3_PMU_EVT_BW_WR_PAY_M0_TIME 0x1000f +#define HNS3_PMU_EVT_BW_WR_PAY_M1_BYTE_NUM 0x00010 +#define HNS3_PMU_EVT_BW_WR_PAY_M1_TIME 0x10010 + +/* packet rate events */ +#define HNS3_PMU_EVT_PPS_IGU_SSU_PACKET_NUM 0x00100 +#define HNS3_PMU_EVT_PPS_IGU_SSU_TIME 0x10100 +#define HNS3_PMU_EVT_PPS_SSU_EGU_PACKET_NUM 0x00101 +#define HNS3_PMU_EVT_PPS_SSU_EGU_TIME 0x10101 +#define HNS3_PMU_EVT_PPS_SSU_RPU_PACKET_NUM 0x00102 +#define HNS3_PMU_EVT_PPS_SSU_RPU_TIME 0x10102 +#define HNS3_PMU_EVT_PPS_SSU_ROCE_PACKET_NUM 0x00103 +#define HNS3_PMU_EVT_PPS_SSU_ROCE_TIME 0x10103 +#define HNS3_PMU_EVT_PPS_ROCE_SSU_PACKET_NUM 0x00104 +#define HNS3_PMU_EVT_PPS_ROCE_SSU_TIME 0x10104 +#define HNS3_PMU_EVT_PPS_TPU_SSU_PACKET_NUM 0x00105 +#define HNS3_PMU_EVT_PPS_TPU_SSU_TIME 0x10105 +#define HNS3_PMU_EVT_PPS_RPU_RCBRX_PACKET_NUM 0x00106 +#define HNS3_PMU_EVT_PPS_RPU_RCBRX_TIME 0x10106 +#define HNS3_PMU_EVT_PPS_RCBTX_TPU_PACKET_NUM 0x00107 +#define HNS3_PMU_EVT_PPS_RCBTX_TPU_TIME 0x10107 +#define HNS3_PMU_EVT_PPS_RCBTX_TXSCH_PACKET_NUM 0x00108 +#define HNS3_PMU_EVT_PPS_RCBTX_TXSCH_TIME 0x10108 +#define HNS3_PMU_EVT_PPS_WR_FBD_PACKET_NUM 0x00109 +#define HNS3_PMU_EVT_PPS_WR_FBD_TIME 0x10109 +#define HNS3_PMU_EVT_PPS_WR_EBD_PACKET_NUM 0x0010a +#define HNS3_PMU_EVT_PPS_WR_EBD_TIME 0x1010a +#define HNS3_PMU_EVT_PPS_RD_FBD_PACKET_NUM 0x0010b +#define HNS3_PMU_EVT_PPS_RD_FBD_TIME 0x1010b +#define HNS3_PMU_EVT_PPS_RD_EBD_PACKET_NUM 0x0010c +#define HNS3_PMU_EVT_PPS_RD_EBD_TIME 0x1010c +#define HNS3_PMU_EVT_PPS_RD_PAY_M0_PACKET_NUM 0x0010d +#define HNS3_PMU_EVT_PPS_RD_PAY_M0_TIME 0x1010d +#define HNS3_PMU_EVT_PPS_RD_PAY_M1_PACKET_NUM 0x0010e +#define HNS3_PMU_EVT_PPS_RD_PAY_M1_TIME 0x1010e +#define HNS3_PMU_EVT_PPS_WR_PAY_M0_PACKET_NUM 0x0010f +#define HNS3_PMU_EVT_PPS_WR_PAY_M0_TIME 0x1010f +#define HNS3_PMU_EVT_PPS_WR_PAY_M1_PACKET_NUM 0x00110 +#define HNS3_PMU_EVT_PPS_WR_PAY_M1_TIME 0x10110 +#define HNS3_PMU_EVT_PPS_NICROH_TX_PRE_PACKET_NUM 0x00111 +#define HNS3_PMU_EVT_PPS_NICROH_TX_PRE_TIME 0x10111 +#define HNS3_PMU_EVT_PPS_NICROH_RX_PRE_PACKET_NUM 0x00112 +#define HNS3_PMU_EVT_PPS_NICROH_RX_PRE_TIME 0x10112 + +/* latency events */ +#define HNS3_PMU_EVT_DLY_TX_PUSH_TIME 0x00202 +#define HNS3_PMU_EVT_DLY_TX_PUSH_PACKET_NUM 0x10202 +#define HNS3_PMU_EVT_DLY_TX_TIME 0x00204 +#define HNS3_PMU_EVT_DLY_TX_PACKET_NUM 0x10204 +#define HNS3_PMU_EVT_DLY_SSU_TX_NIC_TIME 0x00206 +#define HNS3_PMU_EVT_DLY_SSU_TX_NIC_PACKET_NUM 0x10206 +#define HNS3_PMU_EVT_DLY_SSU_TX_ROCE_TIME 0x00207 +#define HNS3_PMU_EVT_DLY_SSU_TX_ROCE_PACKET_NUM 0x10207 +#define HNS3_PMU_EVT_DLY_SSU_RX_NIC_TIME 0x00208 +#define HNS3_PMU_EVT_DLY_SSU_RX_NIC_PACKET_NUM 0x10208 +#define HNS3_PMU_EVT_DLY_SSU_RX_ROCE_TIME 0x00209 +#define HNS3_PMU_EVT_DLY_SSU_RX_ROCE_PACKET_NUM 0x10209 +#define HNS3_PMU_EVT_DLY_RPU_TIME 0x0020e +#define HNS3_PMU_EVT_DLY_RPU_PACKET_NUM 0x1020e +#define HNS3_PMU_EVT_DLY_TPU_TIME 0x0020f +#define HNS3_PMU_EVT_DLY_TPU_PACKET_NUM 0x1020f +#define HNS3_PMU_EVT_DLY_RPE_TIME 0x00210 +#define HNS3_PMU_EVT_DLY_RPE_PACKET_NUM 0x10210 +#define HNS3_PMU_EVT_DLY_TPE_TIME 0x00211 +#define HNS3_PMU_EVT_DLY_TPE_PACKET_NUM 0x10211 +#define HNS3_PMU_EVT_DLY_TPE_PUSH_TIME 0x00212 +#define HNS3_PMU_EVT_DLY_TPE_PUSH_PACKET_NUM 0x10212 +#define HNS3_PMU_EVT_DLY_WR_FBD_TIME 0x00213 +#define HNS3_PMU_EVT_DLY_WR_FBD_PACKET_NUM 0x10213 +#define HNS3_PMU_EVT_DLY_WR_EBD_TIME 0x00214 +#define HNS3_PMU_EVT_DLY_WR_EBD_PACKET_NUM 0x10214 +#define HNS3_PMU_EVT_DLY_RD_FBD_TIME 0x00215 +#define HNS3_PMU_EVT_DLY_RD_FBD_PACKET_NUM 0x10215 +#define HNS3_PMU_EVT_DLY_RD_EBD_TIME 0x00216 +#define HNS3_PMU_EVT_DLY_RD_EBD_PACKET_NUM 0x10216 +#define HNS3_PMU_EVT_DLY_RD_PAY_M0_TIME 0x00217 +#define HNS3_PMU_EVT_DLY_RD_PAY_M0_PACKET_NUM 0x10217 +#define HNS3_PMU_EVT_DLY_RD_PAY_M1_TIME 0x00218 +#define HNS3_PMU_EVT_DLY_RD_PAY_M1_PACKET_NUM 0x10218 +#define HNS3_PMU_EVT_DLY_WR_PAY_M0_TIME 0x00219 +#define HNS3_PMU_EVT_DLY_WR_PAY_M0_PACKET_NUM 0x10219 +#define HNS3_PMU_EVT_DLY_WR_PAY_M1_TIME 0x0021a +#define HNS3_PMU_EVT_DLY_WR_PAY_M1_PACKET_NUM 0x1021a +#define HNS3_PMU_EVT_DLY_MSIX_WRITE_TIME 0x0021c +#define HNS3_PMU_EVT_DLY_MSIX_WRITE_PACKET_NUM 0x1021c + +/* interrupt rate events */ +#define HNS3_PMU_EVT_PPS_MSIX_NIC_INTR_NUM 0x00300 +#define HNS3_PMU_EVT_PPS_MSIX_NIC_TIME 0x10300 + +/* filter mode supported by each bandwidth event */ +#define HNS3_PMU_FILTER_BW_SSU_EGU 0x07 +#define HNS3_PMU_FILTER_BW_SSU_RPU 0x1f +#define HNS3_PMU_FILTER_BW_SSU_ROCE 0x0f +#define HNS3_PMU_FILTER_BW_ROCE_SSU 0x0f +#define HNS3_PMU_FILTER_BW_TPU_SSU 0x1f +#define HNS3_PMU_FILTER_BW_RPU_RCBRX 0x11 +#define HNS3_PMU_FILTER_BW_RCBTX_TXSCH 0x11 +#define HNS3_PMU_FILTER_BW_WR_FBD 0x1b +#define HNS3_PMU_FILTER_BW_WR_EBD 0x11 +#define HNS3_PMU_FILTER_BW_RD_FBD 0x01 +#define HNS3_PMU_FILTER_BW_RD_EBD 0x1b +#define HNS3_PMU_FILTER_BW_RD_PAY_M0 0x01 +#define HNS3_PMU_FILTER_BW_RD_PAY_M1 0x01 +#define HNS3_PMU_FILTER_BW_WR_PAY_M0 0x01 +#define HNS3_PMU_FILTER_BW_WR_PAY_M1 0x01 + +/* filter mode supported by each packet rate event */ +#define HNS3_PMU_FILTER_PPS_IGU_SSU 0x07 +#define HNS3_PMU_FILTER_PPS_SSU_EGU 0x07 +#define HNS3_PMU_FILTER_PPS_SSU_RPU 0x1f +#define HNS3_PMU_FILTER_PPS_SSU_ROCE 0x0f +#define HNS3_PMU_FILTER_PPS_ROCE_SSU 0x0f +#define HNS3_PMU_FILTER_PPS_TPU_SSU 0x1f +#define HNS3_PMU_FILTER_PPS_RPU_RCBRX 0x11 +#define HNS3_PMU_FILTER_PPS_RCBTX_TPU 0x1f +#define HNS3_PMU_FILTER_PPS_RCBTX_TXSCH 0x11 +#define HNS3_PMU_FILTER_PPS_WR_FBD 0x1b +#define HNS3_PMU_FILTER_PPS_WR_EBD 0x11 +#define HNS3_PMU_FILTER_PPS_RD_FBD 0x01 +#define HNS3_PMU_FILTER_PPS_RD_EBD 0x1b +#define HNS3_PMU_FILTER_PPS_RD_PAY_M0 0x01 +#define HNS3_PMU_FILTER_PPS_RD_PAY_M1 0x01 +#define HNS3_PMU_FILTER_PPS_WR_PAY_M0 0x01 +#define HNS3_PMU_FILTER_PPS_WR_PAY_M1 0x01 +#define HNS3_PMU_FILTER_PPS_NICROH_TX_PRE 0x01 +#define HNS3_PMU_FILTER_PPS_NICROH_RX_PRE 0x01 + +/* filter mode supported by each latency event */ +#define HNS3_PMU_FILTER_DLY_TX_PUSH 0x01 +#define HNS3_PMU_FILTER_DLY_TX 0x01 +#define HNS3_PMU_FILTER_DLY_SSU_TX_NIC 0x07 +#define HNS3_PMU_FILTER_DLY_SSU_TX_ROCE 0x07 +#define HNS3_PMU_FILTER_DLY_SSU_RX_NIC 0x07 +#define HNS3_PMU_FILTER_DLY_SSU_RX_ROCE 0x07 +#define HNS3_PMU_FILTER_DLY_RPU 0x11 +#define HNS3_PMU_FILTER_DLY_TPU 0x1f +#define HNS3_PMU_FILTER_DLY_RPE 0x01 +#define HNS3_PMU_FILTER_DLY_TPE 0x0b +#define HNS3_PMU_FILTER_DLY_TPE_PUSH 0x1b +#define HNS3_PMU_FILTER_DLY_WR_FBD 0x1b +#define HNS3_PMU_FILTER_DLY_WR_EBD 0x11 +#define HNS3_PMU_FILTER_DLY_RD_FBD 0x01 +#define HNS3_PMU_FILTER_DLY_RD_EBD 0x1b +#define HNS3_PMU_FILTER_DLY_RD_PAY_M0 0x01 +#define HNS3_PMU_FILTER_DLY_RD_PAY_M1 0x01 +#define HNS3_PMU_FILTER_DLY_WR_PAY_M0 0x01 +#define HNS3_PMU_FILTER_DLY_WR_PAY_M1 0x01 +#define HNS3_PMU_FILTER_DLY_MSIX_WRITE 0x01 + +/* filter mode supported by each interrupt rate event */ +#define HNS3_PMU_FILTER_INTR_MSIX_NIC 0x01 + +enum hns3_pmu_hw_filter_mode { + HNS3_PMU_HW_FILTER_GLOBAL, + HNS3_PMU_HW_FILTER_PORT, + HNS3_PMU_HW_FILTER_PORT_TC, + HNS3_PMU_HW_FILTER_FUNC, + HNS3_PMU_HW_FILTER_FUNC_QUEUE, + HNS3_PMU_HW_FILTER_FUNC_INTR, +}; + +struct hns3_pmu_event_attr { + u32 event; + u16 filter_support; +}; + +struct hns3_pmu { + struct perf_event *hw_events[HNS3_PMU_MAX_HW_EVENTS]; + struct hlist_node node; + struct pci_dev *pdev; + struct pmu pmu; + void __iomem *base; + int irq; + int on_cpu; + u32 identifier; + u32 hw_clk_freq; /* hardware clock frequency of PMU */ + /* maximum and minimum bdf allowed by PMU */ + u16 bdf_min; + u16 bdf_max; +}; + +#define to_hns3_pmu(p) (container_of((p), struct hns3_pmu, pmu)) + +#define GET_PCI_DEVFN(bdf) ((bdf) & 0xff) + +#define FILTER_CONDITION_PORT(port) ((1 << (port)) & 0xff) +#define FILTER_CONDITION_PORT_TC(port, tc) (((port) << 3) | ((tc) & 0x07)) +#define FILTER_CONDITION_FUNC_INTR(func, intr) (((intr) << 8) | (func)) + +#define HNS3_PMU_FILTER_ATTR(_name, _config, _start, _end) \ + static inline u64 hns3_pmu_get_##_name(struct perf_event *event) \ + { \ + return FIELD_GET(GENMASK_ULL(_end, _start), \ + event->attr._config); \ + } + +HNS3_PMU_FILTER_ATTR(subevent, config, 0, 7); +HNS3_PMU_FILTER_ATTR(event_type, config, 8, 15); +HNS3_PMU_FILTER_ATTR(ext_counter_used, config, 16, 16); +HNS3_PMU_FILTER_ATTR(port, config1, 0, 3); +HNS3_PMU_FILTER_ATTR(tc, config1, 4, 7); +HNS3_PMU_FILTER_ATTR(bdf, config1, 8, 23); +HNS3_PMU_FILTER_ATTR(queue, config1, 24, 39); +HNS3_PMU_FILTER_ATTR(intr, config1, 40, 51); +HNS3_PMU_FILTER_ATTR(global, config1, 52, 52); + +#define HNS3_BW_EVT_BYTE_NUM(_name) (&(struct hns3_pmu_event_attr) {\ + HNS3_PMU_EVT_BW_##_name##_BYTE_NUM, \ + HNS3_PMU_FILTER_BW_##_name}) +#define HNS3_BW_EVT_TIME(_name) (&(struct hns3_pmu_event_attr) {\ + HNS3_PMU_EVT_BW_##_name##_TIME, \ + HNS3_PMU_FILTER_BW_##_name}) +#define HNS3_PPS_EVT_PACKET_NUM(_name) (&(struct hns3_pmu_event_attr) {\ + HNS3_PMU_EVT_PPS_##_name##_PACKET_NUM, \ + HNS3_PMU_FILTER_PPS_##_name}) +#define HNS3_PPS_EVT_TIME(_name) (&(struct hns3_pmu_event_attr) {\ + HNS3_PMU_EVT_PPS_##_name##_TIME, \ + HNS3_PMU_FILTER_PPS_##_name}) +#define HNS3_DLY_EVT_TIME(_name) (&(struct hns3_pmu_event_attr) {\ + HNS3_PMU_EVT_DLY_##_name##_TIME, \ + HNS3_PMU_FILTER_DLY_##_name}) +#define HNS3_DLY_EVT_PACKET_NUM(_name) (&(struct hns3_pmu_event_attr) {\ + HNS3_PMU_EVT_DLY_##_name##_PACKET_NUM, \ + HNS3_PMU_FILTER_DLY_##_name}) +#define HNS3_INTR_EVT_INTR_NUM(_name) (&(struct hns3_pmu_event_attr) {\ + HNS3_PMU_EVT_PPS_##_name##_INTR_NUM, \ + HNS3_PMU_FILTER_INTR_##_name}) +#define HNS3_INTR_EVT_TIME(_name) (&(struct hns3_pmu_event_attr) {\ + HNS3_PMU_EVT_PPS_##_name##_TIME, \ + HNS3_PMU_FILTER_INTR_##_name}) + +static ssize_t hns3_pmu_format_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct dev_ext_attribute *eattr; + + eattr = container_of(attr, struct dev_ext_attribute, attr); + + return sysfs_emit(buf, "%s\n", (char *)eattr->var); +} + +static ssize_t hns3_pmu_event_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct hns3_pmu_event_attr *event; + struct dev_ext_attribute *eattr; + + eattr = container_of(attr, struct dev_ext_attribute, attr); + event = eattr->var; + + return sysfs_emit(buf, "config=0x%x\n", event->event); +} + +static ssize_t hns3_pmu_filter_mode_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct hns3_pmu_event_attr *event; + struct dev_ext_attribute *eattr; + int len; + + eattr = container_of(attr, struct dev_ext_attribute, attr); + event = eattr->var; + + len = sysfs_emit_at(buf, 0, "filter mode supported: "); + if (event->filter_support & HNS3_PMU_FILTER_SUPPORT_GLOBAL) + len += sysfs_emit_at(buf, len, "global "); + if (event->filter_support & HNS3_PMU_FILTER_SUPPORT_PORT) + len += sysfs_emit_at(buf, len, "port "); + if (event->filter_support & HNS3_PMU_FILTER_SUPPORT_PORT_TC) + len += sysfs_emit_at(buf, len, "port-tc "); + if (event->filter_support & HNS3_PMU_FILTER_SUPPORT_FUNC) + len += sysfs_emit_at(buf, len, "func "); + if (event->filter_support & HNS3_PMU_FILTER_SUPPORT_FUNC_QUEUE) + len += sysfs_emit_at(buf, len, "func-queue "); + if (event->filter_support & HNS3_PMU_FILTER_SUPPORT_FUNC_INTR) + len += sysfs_emit_at(buf, len, "func-intr "); + + len += sysfs_emit_at(buf, len, "\n"); + + return len; +} + +#define HNS3_PMU_ATTR(_name, _func, _config) \ + (&((struct dev_ext_attribute[]) { \ + { __ATTR(_name, 0444, _func, NULL), (void *)_config } \ + })[0].attr.attr) + +#define HNS3_PMU_FORMAT_ATTR(_name, _format) \ + HNS3_PMU_ATTR(_name, hns3_pmu_format_show, (void *)_format) +#define HNS3_PMU_EVENT_ATTR(_name, _event) \ + HNS3_PMU_ATTR(_name, hns3_pmu_event_show, (void *)_event) +#define HNS3_PMU_FLT_MODE_ATTR(_name, _event) \ + HNS3_PMU_ATTR(_name, hns3_pmu_filter_mode_show, (void *)_event) + +#define HNS3_PMU_BW_EVT_PAIR(_name, _macro) \ + HNS3_PMU_EVENT_ATTR(_name##_byte_num, HNS3_BW_EVT_BYTE_NUM(_macro)), \ + HNS3_PMU_EVENT_ATTR(_name##_time, HNS3_BW_EVT_TIME(_macro)) +#define HNS3_PMU_PPS_EVT_PAIR(_name, _macro) \ + HNS3_PMU_EVENT_ATTR(_name##_packet_num, HNS3_PPS_EVT_PACKET_NUM(_macro)), \ + HNS3_PMU_EVENT_ATTR(_name##_time, HNS3_PPS_EVT_TIME(_macro)) +#define HNS3_PMU_DLY_EVT_PAIR(_name, _macro) \ + HNS3_PMU_EVENT_ATTR(_name##_time, HNS3_DLY_EVT_TIME(_macro)), \ + HNS3_PMU_EVENT_ATTR(_name##_packet_num, HNS3_DLY_EVT_PACKET_NUM(_macro)) +#define HNS3_PMU_INTR_EVT_PAIR(_name, _macro) \ + HNS3_PMU_EVENT_ATTR(_name##_intr_num, HNS3_INTR_EVT_INTR_NUM(_macro)), \ + HNS3_PMU_EVENT_ATTR(_name##_time, HNS3_INTR_EVT_TIME(_macro)) + +#define HNS3_PMU_BW_FLT_MODE_PAIR(_name, _macro) \ + HNS3_PMU_FLT_MODE_ATTR(_name##_byte_num, HNS3_BW_EVT_BYTE_NUM(_macro)), \ + HNS3_PMU_FLT_MODE_ATTR(_name##_time, HNS3_BW_EVT_TIME(_macro)) +#define HNS3_PMU_PPS_FLT_MODE_PAIR(_name, _macro) \ + HNS3_PMU_FLT_MODE_ATTR(_name##_packet_num, HNS3_PPS_EVT_PACKET_NUM(_macro)), \ + HNS3_PMU_FLT_MODE_ATTR(_name##_time, HNS3_PPS_EVT_TIME(_macro)) +#define HNS3_PMU_DLY_FLT_MODE_PAIR(_name, _macro) \ + HNS3_PMU_FLT_MODE_ATTR(_name##_time, HNS3_DLY_EVT_TIME(_macro)), \ + HNS3_PMU_FLT_MODE_ATTR(_name##_packet_num, HNS3_DLY_EVT_PACKET_NUM(_macro)) +#define HNS3_PMU_INTR_FLT_MODE_PAIR(_name, _macro) \ + HNS3_PMU_FLT_MODE_ATTR(_name##_intr_num, HNS3_INTR_EVT_INTR_NUM(_macro)), \ + HNS3_PMU_FLT_MODE_ATTR(_name##_time, HNS3_INTR_EVT_TIME(_macro)) + +static u8 hns3_pmu_hw_filter_modes[] = { + HNS3_PMU_HW_FILTER_GLOBAL, + HNS3_PMU_HW_FILTER_PORT, + HNS3_PMU_HW_FILTER_PORT_TC, + HNS3_PMU_HW_FILTER_FUNC, + HNS3_PMU_HW_FILTER_FUNC_QUEUE, + HNS3_PMU_HW_FILTER_FUNC_INTR, +}; + +#define HNS3_PMU_SET_HW_FILTER(_hwc, _mode) \ + ((_hwc)->addr_filters = (void *)&hns3_pmu_hw_filter_modes[(_mode)]) + +static ssize_t identifier_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct hns3_pmu *hns3_pmu = to_hns3_pmu(dev_get_drvdata(dev)); + + return sysfs_emit(buf, "0x%x\n", hns3_pmu->identifier); +} +static DEVICE_ATTR_RO(identifier); + +static ssize_t cpumask_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct hns3_pmu *hns3_pmu = to_hns3_pmu(dev_get_drvdata(dev)); + + return sysfs_emit(buf, "%d\n", hns3_pmu->on_cpu); +} +static DEVICE_ATTR_RO(cpumask); + +static ssize_t bdf_min_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct hns3_pmu *hns3_pmu = to_hns3_pmu(dev_get_drvdata(dev)); + u16 bdf = hns3_pmu->bdf_min; + + return sysfs_emit(buf, "%02x:%02x.%x\n", PCI_BUS_NUM(bdf), + PCI_SLOT(bdf), PCI_FUNC(bdf)); +} +static DEVICE_ATTR_RO(bdf_min); + +static ssize_t bdf_max_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct hns3_pmu *hns3_pmu = to_hns3_pmu(dev_get_drvdata(dev)); + u16 bdf = hns3_pmu->bdf_max; + + return sysfs_emit(buf, "%02x:%02x.%x\n", PCI_BUS_NUM(bdf), + PCI_SLOT(bdf), PCI_FUNC(bdf)); +} +static DEVICE_ATTR_RO(bdf_max); + +static ssize_t hw_clk_freq_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct hns3_pmu *hns3_pmu = to_hns3_pmu(dev_get_drvdata(dev)); + + return sysfs_emit(buf, "%u\n", hns3_pmu->hw_clk_freq); +} +static DEVICE_ATTR_RO(hw_clk_freq); + +static struct attribute *hns3_pmu_events_attr[] = { + /* bandwidth events */ + HNS3_PMU_BW_EVT_PAIR(bw_ssu_egu, SSU_EGU), + HNS3_PMU_BW_EVT_PAIR(bw_ssu_rpu, SSU_RPU), + HNS3_PMU_BW_EVT_PAIR(bw_ssu_roce, SSU_ROCE), + HNS3_PMU_BW_EVT_PAIR(bw_roce_ssu, ROCE_SSU), + HNS3_PMU_BW_EVT_PAIR(bw_tpu_ssu, TPU_SSU), + HNS3_PMU_BW_EVT_PAIR(bw_rpu_rcbrx, RPU_RCBRX), + HNS3_PMU_BW_EVT_PAIR(bw_rcbtx_txsch, RCBTX_TXSCH), + HNS3_PMU_BW_EVT_PAIR(bw_wr_fbd, WR_FBD), + HNS3_PMU_BW_EVT_PAIR(bw_wr_ebd, WR_EBD), + HNS3_PMU_BW_EVT_PAIR(bw_rd_fbd, RD_FBD), + HNS3_PMU_BW_EVT_PAIR(bw_rd_ebd, RD_EBD), + HNS3_PMU_BW_EVT_PAIR(bw_rd_pay_m0, RD_PAY_M0), + HNS3_PMU_BW_EVT_PAIR(bw_rd_pay_m1, RD_PAY_M1), + HNS3_PMU_BW_EVT_PAIR(bw_wr_pay_m0, WR_PAY_M0), + HNS3_PMU_BW_EVT_PAIR(bw_wr_pay_m1, WR_PAY_M1), + + /* packet rate events */ + HNS3_PMU_PPS_EVT_PAIR(pps_igu_ssu, IGU_SSU), + HNS3_PMU_PPS_EVT_PAIR(pps_ssu_egu, SSU_EGU), + HNS3_PMU_PPS_EVT_PAIR(pps_ssu_rpu, SSU_RPU), + HNS3_PMU_PPS_EVT_PAIR(pps_ssu_roce, SSU_ROCE), + HNS3_PMU_PPS_EVT_PAIR(pps_roce_ssu, ROCE_SSU), + HNS3_PMU_PPS_EVT_PAIR(pps_tpu_ssu, TPU_SSU), + HNS3_PMU_PPS_EVT_PAIR(pps_rpu_rcbrx, RPU_RCBRX), + HNS3_PMU_PPS_EVT_PAIR(pps_rcbtx_tpu, RCBTX_TPU), + HNS3_PMU_PPS_EVT_PAIR(pps_rcbtx_txsch, RCBTX_TXSCH), + HNS3_PMU_PPS_EVT_PAIR(pps_wr_fbd, WR_FBD), + HNS3_PMU_PPS_EVT_PAIR(pps_wr_ebd, WR_EBD), + HNS3_PMU_PPS_EVT_PAIR(pps_rd_fbd, RD_FBD), + HNS3_PMU_PPS_EVT_PAIR(pps_rd_ebd, RD_EBD), + HNS3_PMU_PPS_EVT_PAIR(pps_rd_pay_m0, RD_PAY_M0), + HNS3_PMU_PPS_EVT_PAIR(pps_rd_pay_m1, RD_PAY_M1), + HNS3_PMU_PPS_EVT_PAIR(pps_wr_pay_m0, WR_PAY_M0), + HNS3_PMU_PPS_EVT_PAIR(pps_wr_pay_m1, WR_PAY_M1), + HNS3_PMU_PPS_EVT_PAIR(pps_intr_nicroh_tx_pre, NICROH_TX_PRE), + HNS3_PMU_PPS_EVT_PAIR(pps_intr_nicroh_rx_pre, NICROH_RX_PRE), + + /* latency events */ + HNS3_PMU_DLY_EVT_PAIR(dly_tx_push_to_mac, TX_PUSH), + HNS3_PMU_DLY_EVT_PAIR(dly_tx_normal_to_mac, TX), + HNS3_PMU_DLY_EVT_PAIR(dly_ssu_tx_th_nic, SSU_TX_NIC), + HNS3_PMU_DLY_EVT_PAIR(dly_ssu_tx_th_roce, SSU_TX_ROCE), + HNS3_PMU_DLY_EVT_PAIR(dly_ssu_rx_th_nic, SSU_RX_NIC), + HNS3_PMU_DLY_EVT_PAIR(dly_ssu_rx_th_roce, SSU_RX_ROCE), + HNS3_PMU_DLY_EVT_PAIR(dly_rpu, RPU), + HNS3_PMU_DLY_EVT_PAIR(dly_tpu, TPU), + HNS3_PMU_DLY_EVT_PAIR(dly_rpe, RPE), + HNS3_PMU_DLY_EVT_PAIR(dly_tpe_normal, TPE), + HNS3_PMU_DLY_EVT_PAIR(dly_tpe_push, TPE_PUSH), + HNS3_PMU_DLY_EVT_PAIR(dly_wr_fbd, WR_FBD), + HNS3_PMU_DLY_EVT_PAIR(dly_wr_ebd, WR_EBD), + HNS3_PMU_DLY_EVT_PAIR(dly_rd_fbd, RD_FBD), + HNS3_PMU_DLY_EVT_PAIR(dly_rd_ebd, RD_EBD), + HNS3_PMU_DLY_EVT_PAIR(dly_rd_pay_m0, RD_PAY_M0), + HNS3_PMU_DLY_EVT_PAIR(dly_rd_pay_m1, RD_PAY_M1), + HNS3_PMU_DLY_EVT_PAIR(dly_wr_pay_m0, WR_PAY_M0), + HNS3_PMU_DLY_EVT_PAIR(dly_wr_pay_m1, WR_PAY_M1), + HNS3_PMU_DLY_EVT_PAIR(dly_msix_write, MSIX_WRITE), + + /* interrupt rate events */ + HNS3_PMU_INTR_EVT_PAIR(pps_intr_msix_nic, MSIX_NIC), + + NULL +}; + +static struct attribute *hns3_pmu_filter_mode_attr[] = { + /* bandwidth events */ + HNS3_PMU_BW_FLT_MODE_PAIR(bw_ssu_egu, SSU_EGU), + HNS3_PMU_BW_FLT_MODE_PAIR(bw_ssu_rpu, SSU_RPU), + HNS3_PMU_BW_FLT_MODE_PAIR(bw_ssu_roce, SSU_ROCE), + HNS3_PMU_BW_FLT_MODE_PAIR(bw_roce_ssu, ROCE_SSU), + HNS3_PMU_BW_FLT_MODE_PAIR(bw_tpu_ssu, TPU_SSU), + HNS3_PMU_BW_FLT_MODE_PAIR(bw_rpu_rcbrx, RPU_RCBRX), + HNS3_PMU_BW_FLT_MODE_PAIR(bw_rcbtx_txsch, RCBTX_TXSCH), + HNS3_PMU_BW_FLT_MODE_PAIR(bw_wr_fbd, WR_FBD), + HNS3_PMU_BW_FLT_MODE_PAIR(bw_wr_ebd, WR_EBD), + HNS3_PMU_BW_FLT_MODE_PAIR(bw_rd_fbd, RD_FBD), + HNS3_PMU_BW_FLT_MODE_PAIR(bw_rd_ebd, RD_EBD), + HNS3_PMU_BW_FLT_MODE_PAIR(bw_rd_pay_m0, RD_PAY_M0), + HNS3_PMU_BW_FLT_MODE_PAIR(bw_rd_pay_m1, RD_PAY_M1), + HNS3_PMU_BW_FLT_MODE_PAIR(bw_wr_pay_m0, WR_PAY_M0), + HNS3_PMU_BW_FLT_MODE_PAIR(bw_wr_pay_m1, WR_PAY_M1), + + /* packet rate events */ + HNS3_PMU_PPS_FLT_MODE_PAIR(pps_igu_ssu, IGU_SSU), + HNS3_PMU_PPS_FLT_MODE_PAIR(pps_ssu_egu, SSU_EGU), + HNS3_PMU_PPS_FLT_MODE_PAIR(pps_ssu_rpu, SSU_RPU), + HNS3_PMU_PPS_FLT_MODE_PAIR(pps_ssu_roce, SSU_ROCE), + HNS3_PMU_PPS_FLT_MODE_PAIR(pps_roce_ssu, ROCE_SSU), + HNS3_PMU_PPS_FLT_MODE_PAIR(pps_tpu_ssu, TPU_SSU), + HNS3_PMU_PPS_FLT_MODE_PAIR(pps_rpu_rcbrx, RPU_RCBRX), + HNS3_PMU_PPS_FLT_MODE_PAIR(pps_rcbtx_tpu, RCBTX_TPU), + HNS3_PMU_PPS_FLT_MODE_PAIR(pps_rcbtx_txsch, RCBTX_TXSCH), + HNS3_PMU_PPS_FLT_MODE_PAIR(pps_wr_fbd, WR_FBD), + HNS3_PMU_PPS_FLT_MODE_PAIR(pps_wr_ebd, WR_EBD), + HNS3_PMU_PPS_FLT_MODE_PAIR(pps_rd_fbd, RD_FBD), + HNS3_PMU_PPS_FLT_MODE_PAIR(pps_rd_ebd, RD_EBD), + HNS3_PMU_PPS_FLT_MODE_PAIR(pps_rd_pay_m0, RD_PAY_M0), + HNS3_PMU_PPS_FLT_MODE_PAIR(pps_rd_pay_m1, RD_PAY_M1), + HNS3_PMU_PPS_FLT_MODE_PAIR(pps_wr_pay_m0, WR_PAY_M0), + HNS3_PMU_PPS_FLT_MODE_PAIR(pps_wr_pay_m1, WR_PAY_M1), + HNS3_PMU_PPS_FLT_MODE_PAIR(pps_intr_nicroh_tx_pre, NICROH_TX_PRE), + HNS3_PMU_PPS_FLT_MODE_PAIR(pps_intr_nicroh_rx_pre, NICROH_RX_PRE), + + /* latency events */ + HNS3_PMU_DLY_FLT_MODE_PAIR(dly_tx_push_to_mac, TX_PUSH), + HNS3_PMU_DLY_FLT_MODE_PAIR(dly_tx_normal_to_mac, TX), + HNS3_PMU_DLY_FLT_MODE_PAIR(dly_ssu_tx_th_nic, SSU_TX_NIC), + HNS3_PMU_DLY_FLT_MODE_PAIR(dly_ssu_tx_th_roce, SSU_TX_ROCE), + HNS3_PMU_DLY_FLT_MODE_PAIR(dly_ssu_rx_th_nic, SSU_RX_NIC), + HNS3_PMU_DLY_FLT_MODE_PAIR(dly_ssu_rx_th_roce, SSU_RX_ROCE), + HNS3_PMU_DLY_FLT_MODE_PAIR(dly_rpu, RPU), + HNS3_PMU_DLY_FLT_MODE_PAIR(dly_tpu, TPU), + HNS3_PMU_DLY_FLT_MODE_PAIR(dly_rpe, RPE), + HNS3_PMU_DLY_FLT_MODE_PAIR(dly_tpe_normal, TPE), + HNS3_PMU_DLY_FLT_MODE_PAIR(dly_tpe_push, TPE_PUSH), + HNS3_PMU_DLY_FLT_MODE_PAIR(dly_wr_fbd, WR_FBD), + HNS3_PMU_DLY_FLT_MODE_PAIR(dly_wr_ebd, WR_EBD), + HNS3_PMU_DLY_FLT_MODE_PAIR(dly_rd_fbd, RD_FBD), + HNS3_PMU_DLY_FLT_MODE_PAIR(dly_rd_ebd, RD_EBD), + HNS3_PMU_DLY_FLT_MODE_PAIR(dly_rd_pay_m0, RD_PAY_M0), + HNS3_PMU_DLY_FLT_MODE_PAIR(dly_rd_pay_m1, RD_PAY_M1), + HNS3_PMU_DLY_FLT_MODE_PAIR(dly_wr_pay_m0, WR_PAY_M0), + HNS3_PMU_DLY_FLT_MODE_PAIR(dly_wr_pay_m1, WR_PAY_M1), + HNS3_PMU_DLY_FLT_MODE_PAIR(dly_msix_write, MSIX_WRITE), + + /* interrupt rate events */ + HNS3_PMU_INTR_FLT_MODE_PAIR(pps_intr_msix_nic, MSIX_NIC), + + NULL +}; + +static struct attribute_group hns3_pmu_events_group = { + .name = "events", + .attrs = hns3_pmu_events_attr, +}; + +static struct attribute_group hns3_pmu_filter_mode_group = { + .name = "filtermode", + .attrs = hns3_pmu_filter_mode_attr, +}; + +static struct attribute *hns3_pmu_format_attr[] = { + HNS3_PMU_FORMAT_ATTR(subevent, "config:0-7"), + HNS3_PMU_FORMAT_ATTR(event_type, "config:8-15"), + HNS3_PMU_FORMAT_ATTR(ext_counter_used, "config:16"), + HNS3_PMU_FORMAT_ATTR(port, "config1:0-3"), + HNS3_PMU_FORMAT_ATTR(tc, "config1:4-7"), + HNS3_PMU_FORMAT_ATTR(bdf, "config1:8-23"), + HNS3_PMU_FORMAT_ATTR(queue, "config1:24-39"), + HNS3_PMU_FORMAT_ATTR(intr, "config1:40-51"), + HNS3_PMU_FORMAT_ATTR(global, "config1:52"), + NULL +}; + +static struct attribute_group hns3_pmu_format_group = { + .name = "format", + .attrs = hns3_pmu_format_attr, +}; + +static struct attribute *hns3_pmu_cpumask_attrs[] = { + &dev_attr_cpumask.attr, + NULL +}; + +static struct attribute_group hns3_pmu_cpumask_attr_group = { + .attrs = hns3_pmu_cpumask_attrs, +}; + +static struct attribute *hns3_pmu_identifier_attrs[] = { + &dev_attr_identifier.attr, + NULL +}; + +static struct attribute_group hns3_pmu_identifier_attr_group = { + .attrs = hns3_pmu_identifier_attrs, +}; + +static struct attribute *hns3_pmu_bdf_range_attrs[] = { + &dev_attr_bdf_min.attr, + &dev_attr_bdf_max.attr, + NULL +}; + +static struct attribute_group hns3_pmu_bdf_range_attr_group = { + .attrs = hns3_pmu_bdf_range_attrs, +}; + +static struct attribute *hns3_pmu_hw_clk_freq_attrs[] = { + &dev_attr_hw_clk_freq.attr, + NULL +}; + +static struct attribute_group hns3_pmu_hw_clk_freq_attr_group = { + .attrs = hns3_pmu_hw_clk_freq_attrs, +}; + +static const struct attribute_group *hns3_pmu_attr_groups[] = { + &hns3_pmu_events_group, + &hns3_pmu_filter_mode_group, + &hns3_pmu_format_group, + &hns3_pmu_cpumask_attr_group, + &hns3_pmu_identifier_attr_group, + &hns3_pmu_bdf_range_attr_group, + &hns3_pmu_hw_clk_freq_attr_group, + NULL +}; + +static u32 hns3_pmu_get_event(struct perf_event *event) +{ + return hns3_pmu_get_ext_counter_used(event) << 16 | + hns3_pmu_get_event_type(event) << 8 | + hns3_pmu_get_subevent(event); +} + +static u32 hns3_pmu_get_real_event(struct perf_event *event) +{ + return hns3_pmu_get_event_type(event) << 8 | + hns3_pmu_get_subevent(event); +} + +static u32 hns3_pmu_get_offset(u32 offset, u32 idx) +{ + return offset + HNS3_PMU_REG_EVENT_OFFSET + + HNS3_PMU_REG_EVENT_SIZE * idx; +} + +static u32 hns3_pmu_readl(struct hns3_pmu *hns3_pmu, u32 reg_offset, u32 idx) +{ + u32 offset = hns3_pmu_get_offset(reg_offset, idx); + + return readl(hns3_pmu->base + offset); +} + +static void hns3_pmu_writel(struct hns3_pmu *hns3_pmu, u32 reg_offset, u32 idx, + u32 val) +{ + u32 offset = hns3_pmu_get_offset(reg_offset, idx); + + writel(val, hns3_pmu->base + offset); +} + +static u64 hns3_pmu_readq(struct hns3_pmu *hns3_pmu, u32 reg_offset, u32 idx) +{ + u32 offset = hns3_pmu_get_offset(reg_offset, idx); + + return readq(hns3_pmu->base + offset); +} + +static void hns3_pmu_writeq(struct hns3_pmu *hns3_pmu, u32 reg_offset, u32 idx, + u64 val) +{ + u32 offset = hns3_pmu_get_offset(reg_offset, idx); + + writeq(val, hns3_pmu->base + offset); +} + +static bool hns3_pmu_cmp_event(struct perf_event *target, + struct perf_event *event) +{ + return hns3_pmu_get_real_event(target) == hns3_pmu_get_real_event(event); +} + +static int hns3_pmu_find_related_event_idx(struct hns3_pmu *hns3_pmu, + struct perf_event *event) +{ + struct perf_event *sibling; + int hw_event_used = 0; + int idx; + + for (idx = 0; idx < HNS3_PMU_MAX_HW_EVENTS; idx++) { + sibling = hns3_pmu->hw_events[idx]; + if (!sibling) + continue; + + hw_event_used++; + + if (!hns3_pmu_cmp_event(sibling, event)) + continue; + + /* Related events is used in group */ + if (sibling->group_leader == event->group_leader) + return idx; + } + + /* No related event and all hardware events are used up */ + if (hw_event_used >= HNS3_PMU_MAX_HW_EVENTS) + return -EBUSY; + + /* No related event and there is extra hardware events can be use */ + return -ENOENT; +} + +static int hns3_pmu_get_event_idx(struct hns3_pmu *hns3_pmu) +{ + int idx; + + for (idx = 0; idx < HNS3_PMU_MAX_HW_EVENTS; idx++) { + if (!hns3_pmu->hw_events[idx]) + return idx; + } + + return -EBUSY; +} + +static bool hns3_pmu_valid_bdf(struct hns3_pmu *hns3_pmu, u16 bdf) +{ + struct pci_dev *pdev; + + if (bdf < hns3_pmu->bdf_min || bdf > hns3_pmu->bdf_max) { + pci_err(hns3_pmu->pdev, "Invalid EP device: %#x!\n", bdf); + return false; + } + + pdev = pci_get_domain_bus_and_slot(pci_domain_nr(hns3_pmu->pdev->bus), + PCI_BUS_NUM(bdf), + GET_PCI_DEVFN(bdf)); + if (!pdev) { + pci_err(hns3_pmu->pdev, "Nonexistent EP device: %#x!\n", bdf); + return false; + } + + pci_dev_put(pdev); + return true; +} + +static void hns3_pmu_set_qid_para(struct hns3_pmu *hns3_pmu, u32 idx, u16 bdf, + u16 queue) +{ + u32 val; + + val = GET_PCI_DEVFN(bdf); + val |= (u32)queue << HNS3_PMU_QID_PARA_QUEUE_S; + hns3_pmu_writel(hns3_pmu, HNS3_PMU_REG_EVENT_QID_PARA, idx, val); +} + +static bool hns3_pmu_qid_req_start(struct hns3_pmu *hns3_pmu, u32 idx) +{ + bool queue_id_valid = false; + u32 reg_qid_ctrl, val; + int err; + + /* enable queue id request */ + hns3_pmu_writel(hns3_pmu, HNS3_PMU_REG_EVENT_QID_CTRL, idx, + HNS3_PMU_QID_CTRL_REQ_ENABLE); + + reg_qid_ctrl = hns3_pmu_get_offset(HNS3_PMU_REG_EVENT_QID_CTRL, idx); + err = readl_poll_timeout(hns3_pmu->base + reg_qid_ctrl, val, + val & HNS3_PMU_QID_CTRL_DONE, 1, 1000); + if (err == -ETIMEDOUT) { + pci_err(hns3_pmu->pdev, "QID request timeout!\n"); + goto out; + } + + queue_id_valid = !(val & HNS3_PMU_QID_CTRL_MISS); + +out: + /* disable qid request and clear status */ + hns3_pmu_writel(hns3_pmu, HNS3_PMU_REG_EVENT_QID_CTRL, idx, 0); + + return queue_id_valid; +} + +static bool hns3_pmu_valid_queue(struct hns3_pmu *hns3_pmu, u32 idx, u16 bdf, + u16 queue) +{ + hns3_pmu_set_qid_para(hns3_pmu, idx, bdf, queue); + + return hns3_pmu_qid_req_start(hns3_pmu, idx); +} + +static struct hns3_pmu_event_attr *hns3_pmu_get_pmu_event(u32 event) +{ + struct hns3_pmu_event_attr *pmu_event; + struct dev_ext_attribute *eattr; + struct device_attribute *dattr; + struct attribute *attr; + u32 i; + + for (i = 0; i < ARRAY_SIZE(hns3_pmu_events_attr) - 1; i++) { + attr = hns3_pmu_events_attr[i]; + dattr = container_of(attr, struct device_attribute, attr); + eattr = container_of(dattr, struct dev_ext_attribute, attr); + pmu_event = eattr->var; + + if (event == pmu_event->event) + return pmu_event; + } + + return NULL; +} + +static int hns3_pmu_set_func_mode(struct perf_event *event, + struct hns3_pmu *hns3_pmu) +{ + struct hw_perf_event *hwc = &event->hw; + u16 bdf = hns3_pmu_get_bdf(event); + + if (!hns3_pmu_valid_bdf(hns3_pmu, bdf)) + return -ENOENT; + + HNS3_PMU_SET_HW_FILTER(hwc, HNS3_PMU_HW_FILTER_FUNC); + + return 0; +} + +static int hns3_pmu_set_func_queue_mode(struct perf_event *event, + struct hns3_pmu *hns3_pmu) +{ + u16 queue_id = hns3_pmu_get_queue(event); + struct hw_perf_event *hwc = &event->hw; + u16 bdf = hns3_pmu_get_bdf(event); + + if (!hns3_pmu_valid_bdf(hns3_pmu, bdf)) + return -ENOENT; + + if (!hns3_pmu_valid_queue(hns3_pmu, hwc->idx, bdf, queue_id)) { + pci_err(hns3_pmu->pdev, "Invalid queue: %u\n", queue_id); + return -ENOENT; + } + + HNS3_PMU_SET_HW_FILTER(hwc, HNS3_PMU_HW_FILTER_FUNC_QUEUE); + + return 0; +} + +static bool +hns3_pmu_is_enabled_global_mode(struct perf_event *event, + struct hns3_pmu_event_attr *pmu_event) +{ + u8 global = hns3_pmu_get_global(event); + + if (!(pmu_event->filter_support & HNS3_PMU_FILTER_SUPPORT_GLOBAL)) + return false; + + return global; +} + +static bool hns3_pmu_is_enabled_func_mode(struct perf_event *event, + struct hns3_pmu_event_attr *pmu_event) +{ + u16 queue_id = hns3_pmu_get_queue(event); + u16 bdf = hns3_pmu_get_bdf(event); + + if (!(pmu_event->filter_support & HNS3_PMU_FILTER_SUPPORT_FUNC)) + return false; + else if (queue_id != HNS3_PMU_FILTER_ALL_QUEUE) + return false; + + return bdf; +} + +static bool +hns3_pmu_is_enabled_func_queue_mode(struct perf_event *event, + struct hns3_pmu_event_attr *pmu_event) +{ + u16 queue_id = hns3_pmu_get_queue(event); + u16 bdf = hns3_pmu_get_bdf(event); + + if (!(pmu_event->filter_support & HNS3_PMU_FILTER_SUPPORT_FUNC_QUEUE)) + return false; + else if (queue_id == HNS3_PMU_FILTER_ALL_QUEUE) + return false; + + return bdf; +} + +static bool hns3_pmu_is_enabled_port_mode(struct perf_event *event, + struct hns3_pmu_event_attr *pmu_event) +{ + u8 tc_id = hns3_pmu_get_tc(event); + + if (!(pmu_event->filter_support & HNS3_PMU_FILTER_SUPPORT_PORT)) + return false; + + return tc_id == HNS3_PMU_FILTER_ALL_TC; +} + +static bool +hns3_pmu_is_enabled_port_tc_mode(struct perf_event *event, + struct hns3_pmu_event_attr *pmu_event) +{ + u8 tc_id = hns3_pmu_get_tc(event); + + if (!(pmu_event->filter_support & HNS3_PMU_FILTER_SUPPORT_PORT_TC)) + return false; + + return tc_id != HNS3_PMU_FILTER_ALL_TC; +} + +static bool +hns3_pmu_is_enabled_func_intr_mode(struct perf_event *event, + struct hns3_pmu *hns3_pmu, + struct hns3_pmu_event_attr *pmu_event) +{ + u16 bdf = hns3_pmu_get_bdf(event); + + if (!(pmu_event->filter_support & HNS3_PMU_FILTER_SUPPORT_FUNC_INTR)) + return false; + + return hns3_pmu_valid_bdf(hns3_pmu, bdf); +} + +static int hns3_pmu_select_filter_mode(struct perf_event *event, + struct hns3_pmu *hns3_pmu) +{ + u32 event_id = hns3_pmu_get_event(event); + struct hw_perf_event *hwc = &event->hw; + struct hns3_pmu_event_attr *pmu_event; + + pmu_event = hns3_pmu_get_pmu_event(event_id); + if (!pmu_event) { + pci_err(hns3_pmu->pdev, "Invalid pmu event\n"); + return -ENOENT; + } + + if (hns3_pmu_is_enabled_global_mode(event, pmu_event)) { + HNS3_PMU_SET_HW_FILTER(hwc, HNS3_PMU_HW_FILTER_GLOBAL); + return 0; + } + + if (hns3_pmu_is_enabled_func_mode(event, pmu_event)) + return hns3_pmu_set_func_mode(event, hns3_pmu); + + if (hns3_pmu_is_enabled_func_queue_mode(event, pmu_event)) + return hns3_pmu_set_func_queue_mode(event, hns3_pmu); + + if (hns3_pmu_is_enabled_port_mode(event, pmu_event)) { + HNS3_PMU_SET_HW_FILTER(hwc, HNS3_PMU_HW_FILTER_PORT); + return 0; + } + + if (hns3_pmu_is_enabled_port_tc_mode(event, pmu_event)) { + HNS3_PMU_SET_HW_FILTER(hwc, HNS3_PMU_HW_FILTER_PORT_TC); + return 0; + } + + if (hns3_pmu_is_enabled_func_intr_mode(event, hns3_pmu, pmu_event)) { + HNS3_PMU_SET_HW_FILTER(hwc, HNS3_PMU_HW_FILTER_FUNC_INTR); + return 0; + } + + return -ENOENT; +} + +static bool hns3_pmu_validate_event_group(struct perf_event *event) +{ + struct perf_event *sibling, *leader = event->group_leader; + struct perf_event *event_group[HNS3_PMU_MAX_HW_EVENTS]; + int counters = 1; + int num; + + event_group[0] = leader; + if (!is_software_event(leader)) { + if (leader->pmu != event->pmu) + return false; + + if (leader != event && !hns3_pmu_cmp_event(leader, event)) + event_group[counters++] = event; + } + + for_each_sibling_event(sibling, event->group_leader) { + if (is_software_event(sibling)) + continue; + + if (sibling->pmu != event->pmu) + return false; + + for (num = 0; num < counters; num++) { + if (hns3_pmu_cmp_event(event_group[num], sibling)) + break; + } + + if (num == counters) + event_group[counters++] = sibling; + } + + return counters <= HNS3_PMU_MAX_HW_EVENTS; +} + +static u32 hns3_pmu_get_filter_condition(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + u16 intr_id = hns3_pmu_get_intr(event); + u8 port_id = hns3_pmu_get_port(event); + u16 bdf = hns3_pmu_get_bdf(event); + u8 tc_id = hns3_pmu_get_tc(event); + u8 filter_mode; + + filter_mode = *(u8 *)hwc->addr_filters; + switch (filter_mode) { + case HNS3_PMU_HW_FILTER_PORT: + return FILTER_CONDITION_PORT(port_id); + case HNS3_PMU_HW_FILTER_PORT_TC: + return FILTER_CONDITION_PORT_TC(port_id, tc_id); + case HNS3_PMU_HW_FILTER_FUNC: + case HNS3_PMU_HW_FILTER_FUNC_QUEUE: + return GET_PCI_DEVFN(bdf); + case HNS3_PMU_HW_FILTER_FUNC_INTR: + return FILTER_CONDITION_FUNC_INTR(GET_PCI_DEVFN(bdf), intr_id); + default: + break; + } + + return 0; +} + +static void hns3_pmu_config_filter(struct perf_event *event) +{ + struct hns3_pmu *hns3_pmu = to_hns3_pmu(event->pmu); + u8 event_type = hns3_pmu_get_event_type(event); + u8 subevent_id = hns3_pmu_get_subevent(event); + u16 queue_id = hns3_pmu_get_queue(event); + struct hw_perf_event *hwc = &event->hw; + u8 filter_mode = *(u8 *)hwc->addr_filters; + u16 bdf = hns3_pmu_get_bdf(event); + u32 idx = hwc->idx; + u32 val; + + val = event_type; + val |= subevent_id << HNS3_PMU_CTRL_SUBEVENT_S; + val |= filter_mode << HNS3_PMU_CTRL_FILTER_MODE_S; + val |= HNS3_PMU_EVENT_OVERFLOW_RESTART; + hns3_pmu_writel(hns3_pmu, HNS3_PMU_REG_EVENT_CTRL_LOW, idx, val); + + val = hns3_pmu_get_filter_condition(event); + hns3_pmu_writel(hns3_pmu, HNS3_PMU_REG_EVENT_CTRL_HIGH, idx, val); + + if (filter_mode == HNS3_PMU_HW_FILTER_FUNC_QUEUE) + hns3_pmu_set_qid_para(hns3_pmu, idx, bdf, queue_id); +} + +static void hns3_pmu_enable_counter(struct hns3_pmu *hns3_pmu, + struct hw_perf_event *hwc) +{ + u32 idx = hwc->idx; + u32 val; + + val = hns3_pmu_readl(hns3_pmu, HNS3_PMU_REG_EVENT_CTRL_LOW, idx); + val |= HNS3_PMU_EVENT_EN; + hns3_pmu_writel(hns3_pmu, HNS3_PMU_REG_EVENT_CTRL_LOW, idx, val); +} + +static void hns3_pmu_disable_counter(struct hns3_pmu *hns3_pmu, + struct hw_perf_event *hwc) +{ + u32 idx = hwc->idx; + u32 val; + + val = hns3_pmu_readl(hns3_pmu, HNS3_PMU_REG_EVENT_CTRL_LOW, idx); + val &= ~HNS3_PMU_EVENT_EN; + hns3_pmu_writel(hns3_pmu, HNS3_PMU_REG_EVENT_CTRL_LOW, idx, val); +} + +static void hns3_pmu_enable_intr(struct hns3_pmu *hns3_pmu, + struct hw_perf_event *hwc) +{ + u32 idx = hwc->idx; + u32 val; + + val = hns3_pmu_readl(hns3_pmu, HNS3_PMU_REG_EVENT_INTR_MASK, idx); + val &= ~HNS3_PMU_INTR_MASK_OVERFLOW; + hns3_pmu_writel(hns3_pmu, HNS3_PMU_REG_EVENT_INTR_MASK, idx, val); +} + +static void hns3_pmu_disable_intr(struct hns3_pmu *hns3_pmu, + struct hw_perf_event *hwc) +{ + u32 idx = hwc->idx; + u32 val; + + val = hns3_pmu_readl(hns3_pmu, HNS3_PMU_REG_EVENT_INTR_MASK, idx); + val |= HNS3_PMU_INTR_MASK_OVERFLOW; + hns3_pmu_writel(hns3_pmu, HNS3_PMU_REG_EVENT_INTR_MASK, idx, val); +} + +static void hns3_pmu_clear_intr_status(struct hns3_pmu *hns3_pmu, u32 idx) +{ + u32 val; + + val = hns3_pmu_readl(hns3_pmu, HNS3_PMU_REG_EVENT_CTRL_LOW, idx); + val |= HNS3_PMU_EVENT_STATUS_RESET; + hns3_pmu_writel(hns3_pmu, HNS3_PMU_REG_EVENT_CTRL_LOW, idx, val); + + val = hns3_pmu_readl(hns3_pmu, HNS3_PMU_REG_EVENT_CTRL_LOW, idx); + val &= ~HNS3_PMU_EVENT_STATUS_RESET; + hns3_pmu_writel(hns3_pmu, HNS3_PMU_REG_EVENT_CTRL_LOW, idx, val); +} + +static u64 hns3_pmu_read_counter(struct perf_event *event) +{ + struct hns3_pmu *hns3_pmu = to_hns3_pmu(event->pmu); + + return hns3_pmu_readq(hns3_pmu, event->hw.event_base, event->hw.idx); +} + +static void hns3_pmu_write_counter(struct perf_event *event, u64 value) +{ + struct hns3_pmu *hns3_pmu = to_hns3_pmu(event->pmu); + u32 idx = event->hw.idx; + + hns3_pmu_writeq(hns3_pmu, HNS3_PMU_REG_EVENT_COUNTER, idx, value); + hns3_pmu_writeq(hns3_pmu, HNS3_PMU_REG_EVENT_EXT_COUNTER, idx, value); +} + +static void hns3_pmu_init_counter(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + local64_set(&hwc->prev_count, 0); + hns3_pmu_write_counter(event, 0); +} + +static int hns3_pmu_event_init(struct perf_event *event) +{ + struct hns3_pmu *hns3_pmu = to_hns3_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + int idx; + int ret; + + if (event->attr.type != event->pmu->type) + return -ENOENT; + + /* Sampling is not supported */ + if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK) + return -EOPNOTSUPP; + + event->cpu = hns3_pmu->on_cpu; + + idx = hns3_pmu_get_event_idx(hns3_pmu); + if (idx < 0) { + pci_err(hns3_pmu->pdev, "Up to %u events are supported!\n", + HNS3_PMU_MAX_HW_EVENTS); + return -EBUSY; + } + + hwc->idx = idx; + + ret = hns3_pmu_select_filter_mode(event, hns3_pmu); + if (ret) { + pci_err(hns3_pmu->pdev, "Invalid filter, ret = %d.\n", ret); + return ret; + } + + if (!hns3_pmu_validate_event_group(event)) { + pci_err(hns3_pmu->pdev, "Invalid event group.\n"); + return -EINVAL; + } + + if (hns3_pmu_get_ext_counter_used(event)) + hwc->event_base = HNS3_PMU_REG_EVENT_EXT_COUNTER; + else + hwc->event_base = HNS3_PMU_REG_EVENT_COUNTER; + + return 0; +} + +static void hns3_pmu_read(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + u64 new_cnt, prev_cnt, delta; + + do { + prev_cnt = local64_read(&hwc->prev_count); + new_cnt = hns3_pmu_read_counter(event); + } while (local64_cmpxchg(&hwc->prev_count, prev_cnt, new_cnt) != + prev_cnt); + + delta = new_cnt - prev_cnt; + local64_add(delta, &event->count); +} + +static void hns3_pmu_start(struct perf_event *event, int flags) +{ + struct hns3_pmu *hns3_pmu = to_hns3_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + + if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED))) + return; + + WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); + hwc->state = 0; + + hns3_pmu_config_filter(event); + hns3_pmu_init_counter(event); + hns3_pmu_enable_intr(hns3_pmu, hwc); + hns3_pmu_enable_counter(hns3_pmu, hwc); + + perf_event_update_userpage(event); +} + +static void hns3_pmu_stop(struct perf_event *event, int flags) +{ + struct hns3_pmu *hns3_pmu = to_hns3_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + + hns3_pmu_disable_counter(hns3_pmu, hwc); + hns3_pmu_disable_intr(hns3_pmu, hwc); + + WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); + hwc->state |= PERF_HES_STOPPED; + + if (hwc->state & PERF_HES_UPTODATE) + return; + + /* Read hardware counter and update the perf counter statistics */ + hns3_pmu_read(event); + hwc->state |= PERF_HES_UPTODATE; +} + +static int hns3_pmu_add(struct perf_event *event, int flags) +{ + struct hns3_pmu *hns3_pmu = to_hns3_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + int idx; + + hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE; + + /* Check all working events to find a related event. */ + idx = hns3_pmu_find_related_event_idx(hns3_pmu, event); + if (idx < 0 && idx != -ENOENT) + return idx; + + /* Current event shares an enabled hardware event with related event */ + if (idx >= 0 && idx < HNS3_PMU_MAX_HW_EVENTS) { + hwc->idx = idx; + goto start_count; + } + + idx = hns3_pmu_get_event_idx(hns3_pmu); + if (idx < 0) + return idx; + + hwc->idx = idx; + hns3_pmu->hw_events[idx] = event; + +start_count: + if (flags & PERF_EF_START) + hns3_pmu_start(event, PERF_EF_RELOAD); + + return 0; +} + +static void hns3_pmu_del(struct perf_event *event, int flags) +{ + struct hns3_pmu *hns3_pmu = to_hns3_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + + hns3_pmu_stop(event, PERF_EF_UPDATE); + hns3_pmu->hw_events[hwc->idx] = NULL; + perf_event_update_userpage(event); +} + +static void hns3_pmu_enable(struct pmu *pmu) +{ + struct hns3_pmu *hns3_pmu = to_hns3_pmu(pmu); + u32 val; + + val = readl(hns3_pmu->base + HNS3_PMU_REG_GLOBAL_CTRL); + val |= HNS3_PMU_GLOBAL_START; + writel(val, hns3_pmu->base + HNS3_PMU_REG_GLOBAL_CTRL); +} + +static void hns3_pmu_disable(struct pmu *pmu) +{ + struct hns3_pmu *hns3_pmu = to_hns3_pmu(pmu); + u32 val; + + val = readl(hns3_pmu->base + HNS3_PMU_REG_GLOBAL_CTRL); + val &= ~HNS3_PMU_GLOBAL_START; + writel(val, hns3_pmu->base + HNS3_PMU_REG_GLOBAL_CTRL); +} + +static int hns3_pmu_alloc_pmu(struct pci_dev *pdev, struct hns3_pmu *hns3_pmu) +{ + u16 device_id; + char *name; + u32 val; + + hns3_pmu->base = pcim_iomap_table(pdev)[BAR_2]; + if (!hns3_pmu->base) { + pci_err(pdev, "ioremap failed\n"); + return -ENOMEM; + } + + hns3_pmu->hw_clk_freq = readl(hns3_pmu->base + HNS3_PMU_REG_CLOCK_FREQ); + + val = readl(hns3_pmu->base + HNS3_PMU_REG_BDF); + hns3_pmu->bdf_min = val & 0xffff; + hns3_pmu->bdf_max = val >> 16; + + val = readl(hns3_pmu->base + HNS3_PMU_REG_DEVICE_ID); + device_id = val & 0xffff; + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hns3_pmu_sicl_%u", device_id); + if (!name) + return -ENOMEM; + + hns3_pmu->pdev = pdev; + hns3_pmu->on_cpu = -1; + hns3_pmu->identifier = readl(hns3_pmu->base + HNS3_PMU_REG_VERSION); + hns3_pmu->pmu = (struct pmu) { + .name = name, + .module = THIS_MODULE, + .event_init = hns3_pmu_event_init, + .pmu_enable = hns3_pmu_enable, + .pmu_disable = hns3_pmu_disable, + .add = hns3_pmu_add, + .del = hns3_pmu_del, + .start = hns3_pmu_start, + .stop = hns3_pmu_stop, + .read = hns3_pmu_read, + .task_ctx_nr = perf_invalid_context, + .attr_groups = hns3_pmu_attr_groups, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE, + }; + + return 0; +} + +static irqreturn_t hns3_pmu_irq(int irq, void *data) +{ + struct hns3_pmu *hns3_pmu = data; + u32 intr_status, idx; + + for (idx = 0; idx < HNS3_PMU_MAX_HW_EVENTS; idx++) { + intr_status = hns3_pmu_readl(hns3_pmu, + HNS3_PMU_REG_EVENT_INTR_STATUS, + idx); + + /* + * As each counter will restart from 0 when it is overflowed, + * extra processing is no need, just clear interrupt status. + */ + if (intr_status) + hns3_pmu_clear_intr_status(hns3_pmu, idx); + } + + return IRQ_HANDLED; +} + +static int hns3_pmu_online_cpu(unsigned int cpu, struct hlist_node *node) +{ + struct hns3_pmu *hns3_pmu; + + hns3_pmu = hlist_entry_safe(node, struct hns3_pmu, node); + if (!hns3_pmu) + return -ENODEV; + + if (hns3_pmu->on_cpu == -1) { + hns3_pmu->on_cpu = cpu; + irq_set_affinity(hns3_pmu->irq, cpumask_of(cpu)); + } + + return 0; +} + +static int hns3_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) +{ + struct hns3_pmu *hns3_pmu; + unsigned int target; + + hns3_pmu = hlist_entry_safe(node, struct hns3_pmu, node); + if (!hns3_pmu) + return -ENODEV; + + /* Nothing to do if this CPU doesn't own the PMU */ + if (hns3_pmu->on_cpu != cpu) + return 0; + + /* Choose a new CPU from all online cpus */ + target = cpumask_any_but(cpu_online_mask, cpu); + if (target >= nr_cpu_ids) + return 0; + + perf_pmu_migrate_context(&hns3_pmu->pmu, cpu, target); + hns3_pmu->on_cpu = target; + irq_set_affinity(hns3_pmu->irq, cpumask_of(target)); + + return 0; +} + +static void hns3_pmu_free_irq(void *data) +{ + struct pci_dev *pdev = data; + + pci_free_irq_vectors(pdev); +} + +static int hns3_pmu_irq_register(struct pci_dev *pdev, + struct hns3_pmu *hns3_pmu) +{ + int irq, ret; + + ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSI); + if (ret < 0) { + pci_err(pdev, "failed to enable MSI vectors, ret = %d.\n", ret); + return ret; + } + + ret = devm_add_action(&pdev->dev, hns3_pmu_free_irq, pdev); + if (ret) { + pci_err(pdev, "failed to add free irq action, ret = %d.\n", ret); + return ret; + } + + irq = pci_irq_vector(pdev, 0); + ret = devm_request_irq(&pdev->dev, irq, hns3_pmu_irq, 0, + hns3_pmu->pmu.name, hns3_pmu); + if (ret) { + pci_err(pdev, "failed to register irq, ret = %d.\n", ret); + return ret; + } + + hns3_pmu->irq = irq; + + return 0; +} + +static int hns3_pmu_init_pmu(struct pci_dev *pdev, struct hns3_pmu *hns3_pmu) +{ + int ret; + + ret = hns3_pmu_alloc_pmu(pdev, hns3_pmu); + if (ret) + return ret; + + ret = hns3_pmu_irq_register(pdev, hns3_pmu); + if (ret) + return ret; + + ret = cpuhp_state_add_instance(CPUHP_AP_PERF_ARM_HNS3_PMU_ONLINE, + &hns3_pmu->node); + if (ret) { + pci_err(pdev, "failed to register hotplug, ret = %d.\n", ret); + return ret; + } + + ret = perf_pmu_register(&hns3_pmu->pmu, hns3_pmu->pmu.name, -1); + if (ret) { + pci_err(pdev, "failed to register perf PMU, ret = %d.\n", ret); + cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_HNS3_PMU_ONLINE, + &hns3_pmu->node); + } + + return ret; +} + +static void hns3_pmu_uninit_pmu(struct pci_dev *pdev) +{ + struct hns3_pmu *hns3_pmu = pci_get_drvdata(pdev); + + perf_pmu_unregister(&hns3_pmu->pmu); + cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_HNS3_PMU_ONLINE, + &hns3_pmu->node); +} + +static int hns3_pmu_init_dev(struct pci_dev *pdev) +{ + int ret; + + ret = pcim_enable_device(pdev); + if (ret) { + pci_err(pdev, "failed to enable pci device, ret = %d.\n", ret); + return ret; + } + + ret = pcim_iomap_regions(pdev, BIT(BAR_2), "hns3_pmu"); + if (ret < 0) { + pci_err(pdev, "failed to request pci region, ret = %d.\n", ret); + return ret; + } + + pci_set_master(pdev); + + return 0; +} + +static int hns3_pmu_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ + struct hns3_pmu *hns3_pmu; + int ret; + + hns3_pmu = devm_kzalloc(&pdev->dev, sizeof(*hns3_pmu), GFP_KERNEL); + if (!hns3_pmu) + return -ENOMEM; + + ret = hns3_pmu_init_dev(pdev); + if (ret) + return ret; + + ret = hns3_pmu_init_pmu(pdev, hns3_pmu); + if (ret) { + pci_clear_master(pdev); + return ret; + } + + pci_set_drvdata(pdev, hns3_pmu); + + return ret; +} + +static void hns3_pmu_remove(struct pci_dev *pdev) +{ + hns3_pmu_uninit_pmu(pdev); + pci_clear_master(pdev); + pci_set_drvdata(pdev, NULL); +} + +static const struct pci_device_id hns3_pmu_ids[] = { + { PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, 0xa22b) }, + { 0, } +}; +MODULE_DEVICE_TABLE(pci, hns3_pmu_ids); + +static struct pci_driver hns3_pmu_driver = { + .name = "hns3_pmu", + .id_table = hns3_pmu_ids, + .probe = hns3_pmu_probe, + .remove = hns3_pmu_remove, +}; + +static int __init hns3_pmu_module_init(void) +{ + int ret; + + ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_HNS3_PMU_ONLINE, + "AP_PERF_ARM_HNS3_PMU_ONLINE", + hns3_pmu_online_cpu, + hns3_pmu_offline_cpu); + if (ret) { + pr_err("failed to setup HNS3 PMU hotplug, ret = %d.\n", ret); + return ret; + } + + ret = pci_register_driver(&hns3_pmu_driver); + if (ret) { + pr_err("failed to register pci driver, ret = %d.\n", ret); + cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_HNS3_PMU_ONLINE); + } + + return ret; +} +module_init(hns3_pmu_module_init); + +static void __exit hns3_pmu_module_exit(void) +{ + pci_unregister_driver(&hns3_pmu_driver); + cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_HNS3_PMU_ONLINE); +} +module_exit(hns3_pmu_module_exit); + +MODULE_DESCRIPTION("HNS3 PMU driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/perf/marvell_cn10k_tad_pmu.c b/drivers/perf/marvell_cn10k_tad_pmu.c index 282d3a071a67..69c3050a4348 100644 --- a/drivers/perf/marvell_cn10k_tad_pmu.c +++ b/drivers/perf/marvell_cn10k_tad_pmu.c @@ -2,10 +2,6 @@ /* Marvell CN10K LLC-TAD perf driver * * Copyright (C) 2021 Marvell - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #define pr_fmt(fmt) "tad_pmu: " fmt @@ -18,9 +14,9 @@ #include <linux/perf_event.h> #include <linux/platform_device.h> -#define TAD_PFC_OFFSET 0x0 +#define TAD_PFC_OFFSET 0x800 #define TAD_PFC(counter) (TAD_PFC_OFFSET | (counter << 3)) -#define TAD_PRF_OFFSET 0x100 +#define TAD_PRF_OFFSET 0x900 #define TAD_PRF(counter) (TAD_PRF_OFFSET | (counter << 3)) #define TAD_PRF_CNTSEL_MASK 0xFF #define TAD_MAX_COUNTERS 8 @@ -100,9 +96,7 @@ static void tad_pmu_event_counter_start(struct perf_event *event, int flags) * which sets TAD()_PRF()[CNTSEL] != 0 */ for (i = 0; i < tad_pmu->region_cnt; i++) { - reg_val = readq_relaxed(tad_pmu->regions[i].base + - TAD_PRF(counter_idx)); - reg_val |= (event_idx & 0xFF); + reg_val = event_idx & 0xFF; writeq_relaxed(reg_val, tad_pmu->regions[i].base + TAD_PRF(counter_idx)); } diff --git a/drivers/perf/riscv_pmu.c b/drivers/perf/riscv_pmu.c index b2b8d2074ed0..2c961839903d 100644 --- a/drivers/perf/riscv_pmu.c +++ b/drivers/perf/riscv_pmu.c @@ -121,7 +121,7 @@ u64 riscv_pmu_event_update(struct perf_event *event) return delta; } -static void riscv_pmu_stop(struct perf_event *event, int flags) +void riscv_pmu_stop(struct perf_event *event, int flags) { struct hw_perf_event *hwc = &event->hw; struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); @@ -175,7 +175,7 @@ int riscv_pmu_event_set_period(struct perf_event *event) return overflow; } -static void riscv_pmu_start(struct perf_event *event, int flags) +void riscv_pmu_start(struct perf_event *event, int flags) { struct hw_perf_event *hwc = &event->hw; struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); diff --git a/drivers/perf/riscv_pmu_sbi.c b/drivers/perf/riscv_pmu_sbi.c index dca3537a8dcc..79a3de515ece 100644 --- a/drivers/perf/riscv_pmu_sbi.c +++ b/drivers/perf/riscv_pmu_sbi.c @@ -17,10 +17,30 @@ #include <linux/irqdomain.h> #include <linux/of_irq.h> #include <linux/of.h> +#include <linux/cpu_pm.h> #include <asm/sbi.h> #include <asm/hwcap.h> +PMU_FORMAT_ATTR(event, "config:0-47"); +PMU_FORMAT_ATTR(firmware, "config:63"); + +static struct attribute *riscv_arch_formats_attr[] = { + &format_attr_event.attr, + &format_attr_firmware.attr, + NULL, +}; + +static struct attribute_group riscv_pmu_format_group = { + .name = "format", + .attrs = riscv_arch_formats_attr, +}; + +static const struct attribute_group *riscv_pmu_attr_groups[] = { + &riscv_pmu_format_group, + NULL, +}; + union sbi_pmu_ctr_info { unsigned long value; struct { @@ -666,12 +686,15 @@ static int pmu_sbi_setup_irqs(struct riscv_pmu *pmu, struct platform_device *pde child = of_get_compatible_child(cpu, "riscv,cpu-intc"); if (!child) { pr_err("Failed to find INTC node\n"); + of_node_put(cpu); return -ENODEV; } domain = irq_find_host(child); of_node_put(child); - if (domain) + if (domain) { + of_node_put(cpu); break; + } } if (!domain) { pr_err("Failed to find INTC IRQ root domain\n"); @@ -693,6 +716,73 @@ static int pmu_sbi_setup_irqs(struct riscv_pmu *pmu, struct platform_device *pde return 0; } +#ifdef CONFIG_CPU_PM +static int riscv_pm_pmu_notify(struct notifier_block *b, unsigned long cmd, + void *v) +{ + struct riscv_pmu *rvpmu = container_of(b, struct riscv_pmu, riscv_pm_nb); + struct cpu_hw_events *cpuc = this_cpu_ptr(rvpmu->hw_events); + int enabled = bitmap_weight(cpuc->used_hw_ctrs, RISCV_MAX_COUNTERS); + struct perf_event *event; + int idx; + + if (!enabled) + return NOTIFY_OK; + + for (idx = 0; idx < RISCV_MAX_COUNTERS; idx++) { + event = cpuc->events[idx]; + if (!event) + continue; + + switch (cmd) { + case CPU_PM_ENTER: + /* + * Stop and update the counter + */ + riscv_pmu_stop(event, PERF_EF_UPDATE); + break; + case CPU_PM_EXIT: + case CPU_PM_ENTER_FAILED: + /* + * Restore and enable the counter. + * + * Requires RCU read locking to be functional, + * wrap the call within RCU_NONIDLE to make the + * RCU subsystem aware this cpu is not idle from + * an RCU perspective for the riscv_pmu_start() call + * duration. + */ + RCU_NONIDLE(riscv_pmu_start(event, PERF_EF_RELOAD)); + break; + default: + break; + } + } + + return NOTIFY_OK; +} + +static int riscv_pm_pmu_register(struct riscv_pmu *pmu) +{ + pmu->riscv_pm_nb.notifier_call = riscv_pm_pmu_notify; + return cpu_pm_register_notifier(&pmu->riscv_pm_nb); +} + +static void riscv_pm_pmu_unregister(struct riscv_pmu *pmu) +{ + cpu_pm_unregister_notifier(&pmu->riscv_pm_nb); +} +#else +static inline int riscv_pm_pmu_register(struct riscv_pmu *pmu) { return 0; } +static inline void riscv_pm_pmu_unregister(struct riscv_pmu *pmu) { } +#endif + +static void riscv_pmu_destroy(struct riscv_pmu *pmu) +{ + riscv_pm_pmu_unregister(pmu); + cpuhp_state_remove_instance(CPUHP_AP_PERF_RISCV_STARTING, &pmu->node); +} + static int pmu_sbi_device_probe(struct platform_device *pdev) { struct riscv_pmu *pmu = NULL; @@ -720,6 +810,7 @@ static int pmu_sbi_device_probe(struct platform_device *pdev) pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; pmu->pmu.capabilities |= PERF_PMU_CAP_NO_EXCLUDE; } + pmu->pmu.attr_groups = riscv_pmu_attr_groups; pmu->num_counters = num_counters; pmu->ctr_start = pmu_sbi_ctr_start; pmu->ctr_stop = pmu_sbi_ctr_stop; @@ -733,14 +824,19 @@ static int pmu_sbi_device_probe(struct platform_device *pdev) if (ret) return ret; + ret = riscv_pm_pmu_register(pmu); + if (ret) + goto out_unregister; + ret = perf_pmu_register(&pmu->pmu, "cpu", PERF_TYPE_RAW); - if (ret) { - cpuhp_state_remove_instance(CPUHP_AP_PERF_RISCV_STARTING, &pmu->node); - return ret; - } + if (ret) + goto out_unregister; return 0; +out_unregister: + riscv_pmu_destroy(pmu); + out_free: kfree(pmu); return ret; diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h index fd7e8fbaeef1..961f4d88f9ef 100644 --- a/include/asm-generic/barrier.h +++ b/include/asm-generic/barrier.h @@ -38,6 +38,10 @@ #define wmb() do { kcsan_wmb(); __wmb(); } while (0) #endif +#ifdef __dma_mb +#define dma_mb() do { kcsan_mb(); __dma_mb(); } while (0) +#endif + #ifdef __dma_rmb #define dma_rmb() do { kcsan_rmb(); __dma_rmb(); } while (0) #endif @@ -65,6 +69,10 @@ #define wmb() mb() #endif +#ifndef dma_mb +#define dma_mb() mb() +#endif + #ifndef dma_rmb #define dma_rmb() rmb() #endif diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h index 98954dda5734..72974cb81343 100644 --- a/include/asm-generic/io.h +++ b/include/asm-generic/io.h @@ -964,7 +964,34 @@ static inline void iounmap(volatile void __iomem *addr) #elif defined(CONFIG_GENERIC_IOREMAP) #include <linux/pgtable.h> -void __iomem *ioremap_prot(phys_addr_t addr, size_t size, unsigned long prot); +/* + * Arch code can implement the following two hooks when using GENERIC_IOREMAP + * ioremap_allowed() return a bool, + * - true means continue to remap + * - false means skip remap and return directly + * iounmap_allowed() return a bool, + * - true means continue to vunmap + * - false means skip vunmap and return directly + */ +#ifndef ioremap_allowed +#define ioremap_allowed ioremap_allowed +static inline bool ioremap_allowed(phys_addr_t phys_addr, size_t size, + unsigned long prot) +{ + return true; +} +#endif + +#ifndef iounmap_allowed +#define iounmap_allowed iounmap_allowed +static inline bool iounmap_allowed(void *addr) +{ + return true; +} +#endif + +void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size, + unsigned long prot); void iounmap(volatile void __iomem *addr); static inline void __iomem *ioremap(phys_addr_t addr, size_t size) diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index b66c5f389159..154daff0b46b 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -229,6 +229,7 @@ enum cpuhp_state { CPUHP_AP_PERF_ARM_HISI_PA_ONLINE, CPUHP_AP_PERF_ARM_HISI_SLLC_ONLINE, CPUHP_AP_PERF_ARM_HISI_PCIE_PMU_ONLINE, + CPUHP_AP_PERF_ARM_HNS3_PMU_ONLINE, CPUHP_AP_PERF_ARM_L2X0_ONLINE, CPUHP_AP_PERF_ARM_QCOM_L2_ONLINE, CPUHP_AP_PERF_ARM_QCOM_L3_ONLINE, diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 2d2ccae933c2..0ace7759acd2 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -348,7 +348,7 @@ struct vm_area_struct; #define GFP_DMA32 __GFP_DMA32 #define GFP_HIGHUSER (GFP_USER | __GFP_HIGHMEM) #define GFP_HIGHUSER_MOVABLE (GFP_HIGHUSER | __GFP_MOVABLE | \ - __GFP_SKIP_KASAN_POISON) + __GFP_SKIP_KASAN_POISON | __GFP_SKIP_KASAN_UNPOISON) #define GFP_TRANSHUGE_LIGHT ((GFP_HIGHUSER_MOVABLE | __GFP_COMP | \ __GFP_NOMEMALLOC | __GFP_NOWARN) & ~__GFP_RECLAIM) #define GFP_TRANSHUGE (GFP_TRANSHUGE_LIGHT | __GFP_DIRECT_RECLAIM) diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index de29821231c9..4ddaf6ad73ef 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -461,4 +461,16 @@ static inline int split_folio_to_list(struct folio *folio, return split_huge_page_to_list(&folio->page, list); } +/* + * archs that select ARCH_WANTS_THP_SWAP but don't support THP_SWP due to + * limitations in the implementation like arm64 MTE can override this to + * false + */ +#ifndef arch_thp_swp_supported +static inline bool arch_thp_swp_supported(void) +{ + return true; +} +#endif + #endif /* _LINUX_HUGE_MM_H */ diff --git a/include/linux/perf/riscv_pmu.h b/include/linux/perf/riscv_pmu.h index 46f9b6fe306e..bf66fe011fa8 100644 --- a/include/linux/perf/riscv_pmu.h +++ b/include/linux/perf/riscv_pmu.h @@ -56,9 +56,13 @@ struct riscv_pmu { struct cpu_hw_events __percpu *hw_events; struct hlist_node node; + struct notifier_block riscv_pm_nb; }; #define to_riscv_pmu(p) (container_of(p, struct riscv_pmu, pmu)) + +void riscv_pmu_start(struct perf_event *event, int flags); +void riscv_pmu_stop(struct perf_event *event, int flags); unsigned long riscv_pmu_ctr_read_csr(unsigned long csr); int riscv_pmu_event_set_period(struct perf_event *event); uint64_t riscv_pmu_ctr_get_width_mask(struct perf_event *event); diff --git a/mm/ioremap.c b/mm/ioremap.c index 5fe598ecd9b7..8652426282cc 100644 --- a/mm/ioremap.c +++ b/mm/ioremap.c @@ -11,29 +11,35 @@ #include <linux/io.h> #include <linux/export.h> -void __iomem *ioremap_prot(phys_addr_t addr, size_t size, unsigned long prot) +void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size, + unsigned long prot) { unsigned long offset, vaddr; phys_addr_t last_addr; struct vm_struct *area; /* Disallow wrap-around or zero size */ - last_addr = addr + size - 1; - if (!size || last_addr < addr) + last_addr = phys_addr + size - 1; + if (!size || last_addr < phys_addr) return NULL; /* Page-align mappings */ - offset = addr & (~PAGE_MASK); - addr -= offset; + offset = phys_addr & (~PAGE_MASK); + phys_addr -= offset; size = PAGE_ALIGN(size + offset); + if (!ioremap_allowed(phys_addr, size, prot)) + return NULL; + area = get_vm_area_caller(size, VM_IOREMAP, __builtin_return_address(0)); if (!area) return NULL; vaddr = (unsigned long)area->addr; + area->phys_addr = phys_addr; - if (ioremap_page_range(vaddr, vaddr + size, addr, __pgprot(prot))) { + if (ioremap_page_range(vaddr, vaddr + size, phys_addr, + __pgprot(prot))) { free_vm_area(area); return NULL; } @@ -44,6 +50,12 @@ EXPORT_SYMBOL(ioremap_prot); void iounmap(volatile void __iomem *addr) { - vunmap((void *)((unsigned long)addr & PAGE_MASK)); + void *vaddr = (void *)((unsigned long)addr & PAGE_MASK); + + if (!iounmap_allowed(vaddr)) + return; + + if (is_vmalloc_addr(vaddr)) + vunmap(vaddr); } EXPORT_SYMBOL(iounmap); diff --git a/mm/kasan/common.c b/mm/kasan/common.c index c40c0e7b3b5f..78be2beb7453 100644 --- a/mm/kasan/common.c +++ b/mm/kasan/common.c @@ -108,9 +108,10 @@ void __kasan_unpoison_pages(struct page *page, unsigned int order, bool init) return; tag = kasan_random_tag(); + kasan_unpoison(set_tag(page_address(page), tag), + PAGE_SIZE << order, init); for (i = 0; i < (1 << order); i++) page_kasan_tag_set(page + i, tag); - kasan_unpoison(page_address(page), PAGE_SIZE << order, init); } void __kasan_poison_pages(struct page *page, unsigned int order, bool init) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index b5b14b78c4fd..b0bcab50f0a3 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2361,7 +2361,7 @@ static inline bool check_new_pcp(struct page *page, unsigned int order) } #endif /* CONFIG_DEBUG_VM */ -static inline bool should_skip_kasan_unpoison(gfp_t flags, bool init_tags) +static inline bool should_skip_kasan_unpoison(gfp_t flags) { /* Don't skip if a software KASAN mode is enabled. */ if (IS_ENABLED(CONFIG_KASAN_GENERIC) || @@ -2373,12 +2373,10 @@ static inline bool should_skip_kasan_unpoison(gfp_t flags, bool init_tags) return true; /* - * With hardware tag-based KASAN enabled, skip if either: - * - * 1. Memory tags have already been cleared via tag_clear_highpage(). - * 2. Skipping has been requested via __GFP_SKIP_KASAN_UNPOISON. + * With hardware tag-based KASAN enabled, skip if this has been + * requested via __GFP_SKIP_KASAN_UNPOISON. */ - return init_tags || (flags & __GFP_SKIP_KASAN_UNPOISON); + return flags & __GFP_SKIP_KASAN_UNPOISON; } static inline bool should_skip_init(gfp_t flags) @@ -2397,6 +2395,7 @@ inline void post_alloc_hook(struct page *page, unsigned int order, bool init = !want_init_on_free() && want_init_on_alloc(gfp_flags) && !should_skip_init(gfp_flags); bool init_tags = init && (gfp_flags & __GFP_ZEROTAGS); + int i; set_page_private(page, 0); set_page_refcounted(page); @@ -2422,8 +2421,6 @@ inline void post_alloc_hook(struct page *page, unsigned int order, * should be initialized as well). */ if (init_tags) { - int i; - /* Initialize both memory and tags. */ for (i = 0; i != 1 << order; ++i) tag_clear_highpage(page + i); @@ -2431,13 +2428,17 @@ inline void post_alloc_hook(struct page *page, unsigned int order, /* Note that memory is already initialized by the loop above. */ init = false; } - if (!should_skip_kasan_unpoison(gfp_flags, init_tags)) { + if (!should_skip_kasan_unpoison(gfp_flags)) { /* Unpoison shadow memory or set memory tags. */ kasan_unpoison_pages(page, order, init); /* Note that memory is already initialized by KASAN. */ if (kasan_has_integrated_init()) init = false; + } else { + /* Ensure page_address() dereferencing does not fault. */ + for (i = 0; i != 1 << order; ++i) + page_kasan_tag_reset(page + i); } /* If memory is still not initialized, do it now. */ if (init) diff --git a/mm/swap_slots.c b/mm/swap_slots.c index 2a65a89b5b4d..10b94d64cc25 100644 --- a/mm/swap_slots.c +++ b/mm/swap_slots.c @@ -307,7 +307,7 @@ swp_entry_t folio_alloc_swap(struct folio *folio) entry.val = 0; if (folio_test_large(folio)) { - if (IS_ENABLED(CONFIG_THP_SWAP)) + if (IS_ENABLED(CONFIG_THP_SWAP) && arch_thp_swp_supported()) get_swap_pages(1, &entry, folio_nr_pages(folio)); goto out; } |