diff options
Diffstat (limited to 'arch/riscv')
37 files changed, 401 insertions, 331 deletions
diff --git a/arch/riscv/Kbuild b/arch/riscv/Kbuild index afa83e307a2e..d25ad1c19f88 100644 --- a/arch/riscv/Kbuild +++ b/arch/riscv/Kbuild @@ -5,7 +5,7 @@ obj-$(CONFIG_BUILTIN_DTB) += boot/dts/ obj-y += errata/ obj-$(CONFIG_KVM) += kvm/ -obj-$(CONFIG_ARCH_HAS_KEXEC_PURGATORY) += purgatory/ +obj-$(CONFIG_ARCH_SUPPORTS_KEXEC_PURGATORY) += purgatory/ # for cleaning subdir- += boot diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 4c07b9189c86..f9fbfbf11ad2 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -53,7 +53,7 @@ config RISCV select ARCH_WANT_GENERAL_HUGETLB if !RISCV_ISA_SVNAPOT select ARCH_WANT_HUGE_PMD_SHARE if 64BIT select ARCH_WANT_LD_ORPHAN_WARN if !XIP_KERNEL - select ARCH_WANT_OPTIMIZE_VMEMMAP + select ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP select ARCH_WANTS_THP_SWAP if HAVE_ARCH_TRANSPARENT_HUGEPAGE select BINFMT_FLAT_NO_DATA_START_OFFSET if !MMU select BUILDTIME_TABLE_SORT if MMU @@ -570,24 +570,30 @@ config TOOLCHAIN_HAS_ZIHINTPAUSE config TOOLCHAIN_NEEDS_EXPLICIT_ZICSR_ZIFENCEI def_bool y # https://sourceware.org/git/?p=binutils-gdb.git;a=commit;h=aed44286efa8ae8717a77d94b51ac3614e2ca6dc - depends on AS_IS_GNU && AS_VERSION >= 23800 - help - Newer binutils versions default to ISA spec version 20191213 which - moves some instructions from the I extension to the Zicsr and Zifencei - extensions. + # https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=98416dbb0a62579d4a7a4a76bab51b5b52fec2cd + depends on AS_IS_GNU && AS_VERSION >= 23600 + help + Binutils-2.38 and GCC-12.1.0 bumped the default ISA spec to the newer + 20191213 version, which moves some instructions from the I extension to + the Zicsr and Zifencei extensions. This requires explicitly specifying + Zicsr and Zifencei when binutils >= 2.38 or GCC >= 12.1.0. Zicsr + and Zifencei are supported in binutils from version 2.36 onwards. + To make life easier, and avoid forcing toolchains that default to a + newer ISA spec to version 2.2, relax the check to binutils >= 2.36. + For clang < 17 or GCC < 11.3.0, for which this is not possible or need + special treatment, this is dealt with in TOOLCHAIN_NEEDS_OLD_ISA_SPEC. config TOOLCHAIN_NEEDS_OLD_ISA_SPEC def_bool y depends on TOOLCHAIN_NEEDS_EXPLICIT_ZICSR_ZIFENCEI # https://github.com/llvm/llvm-project/commit/22e199e6afb1263c943c0c0d4498694e15bf8a16 - depends on CC_IS_CLANG && CLANG_VERSION < 170000 + # https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=d29f5d6ab513c52fd872f532c492e35ae9fd6671 + depends on (CC_IS_CLANG && CLANG_VERSION < 170000) || (CC_IS_GCC && GCC_VERSION < 110300) help - Certain versions of clang do not support zicsr and zifencei via -march - but newer versions of binutils require it for the reasons noted in the - help text of CONFIG_TOOLCHAIN_NEEDS_EXPLICIT_ZICSR_ZIFENCEI. This - option causes an older ISA spec compatible with these older versions - of clang to be passed to GAS, which has the same result as passing zicsr - and zifencei to -march. + Certain versions of clang and GCC do not support zicsr and zifencei via + -march. This option causes an older ISA spec compatible with these older + versions of clang and GCC to be passed to GAS, which has the same result + as passing zicsr and zifencei to -march. config FPU bool "FPU support" @@ -650,48 +656,30 @@ config RISCV_BOOT_SPINWAIT If unsure what to do here, say N. -config KEXEC - bool "Kexec system call" - depends on MMU +config ARCH_SUPPORTS_KEXEC + def_bool MMU + +config ARCH_SELECTS_KEXEC + def_bool y + depends on KEXEC select HOTPLUG_CPU if SMP - select KEXEC_CORE - help - kexec is a system call that implements the ability to shutdown your - current kernel, and to start another kernel. It is like a reboot - but it is independent of the system firmware. And like a reboot - you can start any kernel with it, not just Linux. - The name comes from the similarity to the exec system call. +config ARCH_SUPPORTS_KEXEC_FILE + def_bool 64BIT && MMU -config KEXEC_FILE - bool "kexec file based systmem call" - depends on 64BIT && MMU +config ARCH_SELECTS_KEXEC_FILE + def_bool y + depends on KEXEC_FILE select HAVE_IMA_KEXEC if IMA - select KEXEC_CORE select KEXEC_ELF - help - This is new version of kexec system call. This system call is - file based and takes file descriptors as system call argument - for kernel and initramfs as opposed to list of segments as - accepted by previous system call. - - If you don't know what to do here, say Y. -config ARCH_HAS_KEXEC_PURGATORY +config ARCH_SUPPORTS_KEXEC_PURGATORY def_bool KEXEC_FILE depends on CRYPTO=y depends on CRYPTO_SHA256=y -config CRASH_DUMP - bool "Build kdump crash kernel" - help - Generate crash dump after being started by kexec. This should - be normally only set in special crash dump kernels which are - loaded in the main kernel with kexec-tools into a specially - reserved region and then later executed after a crash by - kdump/kexec. - - For more details see Documentation/admin-guide/kdump/kdump.rst +config ARCH_SUPPORTS_CRASH_DUMP + def_bool y config COMPAT bool "Kernel support for 32-bit U-mode" diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig index 0a0107460a5c..ab86ec3b9eab 100644 --- a/arch/riscv/configs/defconfig +++ b/arch/riscv/configs/defconfig @@ -192,7 +192,7 @@ CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y CONFIG_BTRFS_FS=m CONFIG_BTRFS_FS_POSIX_ACL=y -CONFIG_AUTOFS4_FS=y +CONFIG_AUTOFS_FS=y CONFIG_OVERLAY_FS=m CONFIG_ISO9660_FS=y CONFIG_JOLIET=y diff --git a/arch/riscv/configs/nommu_k210_defconfig b/arch/riscv/configs/nommu_k210_defconfig index e36fffd6fb18..146c46d0525b 100644 --- a/arch/riscv/configs/nommu_k210_defconfig +++ b/arch/riscv/configs/nommu_k210_defconfig @@ -21,7 +21,7 @@ CONFIG_CC_OPTIMIZE_FOR_SIZE=y # CONFIG_IO_URING is not set # CONFIG_ADVISE_SYSCALLS is not set # CONFIG_KALLSYMS is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_VM_EVENT_COUNTERS is not set # CONFIG_COMPAT_BRK is not set CONFIG_SLUB=y diff --git a/arch/riscv/configs/nommu_k210_sdcard_defconfig b/arch/riscv/configs/nommu_k210_sdcard_defconfig index c1ad85f0a4f7..95d8d1808f19 100644 --- a/arch/riscv/configs/nommu_k210_sdcard_defconfig +++ b/arch/riscv/configs/nommu_k210_sdcard_defconfig @@ -13,7 +13,7 @@ CONFIG_CC_OPTIMIZE_FOR_SIZE=y # CONFIG_IO_URING is not set # CONFIG_ADVISE_SYSCALLS is not set # CONFIG_KALLSYMS is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_VM_EVENT_COUNTERS is not set # CONFIG_COMPAT_BRK is not set CONFIG_SLUB=y diff --git a/arch/riscv/configs/rv32_defconfig b/arch/riscv/configs/rv32_defconfig index 38760e4296cf..89b601e253a6 100644 --- a/arch/riscv/configs/rv32_defconfig +++ b/arch/riscv/configs/rv32_defconfig @@ -98,7 +98,7 @@ CONFIG_RPMSG_CTRL=y CONFIG_RPMSG_VIRTIO=y CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y -CONFIG_AUTOFS4_FS=y +CONFIG_AUTOFS_FS=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_TMPFS=y diff --git a/arch/riscv/include/asm/acpi.h b/arch/riscv/include/asm/acpi.h index f71ce21ff684..d5604d2073bc 100644 --- a/arch/riscv/include/asm/acpi.h +++ b/arch/riscv/include/asm/acpi.h @@ -19,7 +19,7 @@ typedef u64 phys_cpuid_t; #define PHYS_CPUID_INVALID INVALID_HARTID /* ACPI table mapping after acpi_permanent_mmap is set */ -void *acpi_os_ioremap(acpi_physical_address phys, acpi_size size); +void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size); #define acpi_os_ioremap acpi_os_ioremap #define acpi_strict 1 /* No out-of-spec workarounds on RISC-V */ diff --git a/arch/riscv/include/asm/cacheflush.h b/arch/riscv/include/asm/cacheflush.h index 8091b8bf4883..c4dca559bb97 100644 --- a/arch/riscv/include/asm/cacheflush.h +++ b/arch/riscv/include/asm/cacheflush.h @@ -15,20 +15,19 @@ static inline void local_flush_icache_all(void) #define PG_dcache_clean PG_arch_1 -static inline void flush_dcache_page(struct page *page) +static inline void flush_dcache_folio(struct folio *folio) { - /* - * HugeTLB pages are always fully mapped and only head page will be - * set PG_dcache_clean (see comments in flush_icache_pte()). - */ - if (PageHuge(page)) - page = compound_head(page); - - if (test_bit(PG_dcache_clean, &page->flags)) - clear_bit(PG_dcache_clean, &page->flags); + if (test_bit(PG_dcache_clean, &folio->flags)) + clear_bit(PG_dcache_clean, &folio->flags); } +#define flush_dcache_folio flush_dcache_folio #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 +static inline void flush_dcache_page(struct page *page) +{ + flush_dcache_folio(page_folio(page)); +} + /* * RISC-V doesn't have an instruction to flush parts of the instruction cache, * so instead we just flush the whole thing. @@ -37,6 +36,10 @@ static inline void flush_dcache_page(struct page *page) #define flush_icache_user_page(vma, pg, addr, len) \ flush_icache_mm(vma->vm_mm, 0) +#ifdef CONFIG_64BIT +#define flush_cache_vmap(start, end) flush_tlb_kernel_range(start, end) +#endif + #ifndef CONFIG_SMP #define flush_icache_all() local_flush_icache_all() diff --git a/arch/riscv/include/asm/efi.h b/arch/riscv/include/asm/efi.h index 29e9a0d84b16..8a6a128ec57f 100644 --- a/arch/riscv/include/asm/efi.h +++ b/arch/riscv/include/asm/efi.h @@ -21,12 +21,6 @@ extern void efi_init(void); int efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md); int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md, bool); -#define arch_efi_call_virt_setup() ({ \ - sync_kernel_mappings(efi_mm.pgd); \ - efi_virtmap_load(); \ - }) -#define arch_efi_call_virt_teardown() efi_virtmap_unload() - #define ARCH_EFI_IRQ_FLAGS_MASK (SR_IE | SR_SPIE) /* Load initrd anywhere in system RAM */ @@ -46,8 +40,8 @@ static inline unsigned long efi_get_kimg_min_align(void) #define EFI_KIMG_PREFERRED_ADDRESS efi_get_kimg_min_align() -void efi_virtmap_load(void); -void efi_virtmap_unload(void); +void arch_efi_call_virt_setup(void); +void arch_efi_call_virt_teardown(void); unsigned long stext_offset(void); diff --git a/arch/riscv/include/asm/hugetlb.h b/arch/riscv/include/asm/hugetlb.h index ce1ebda1a49a..34e24f078cc1 100644 --- a/arch/riscv/include/asm/hugetlb.h +++ b/arch/riscv/include/asm/hugetlb.h @@ -2,6 +2,7 @@ #ifndef _ASM_RISCV_HUGETLB_H #define _ASM_RISCV_HUGETLB_H +#include <asm/cacheflush.h> #include <asm/page.h> static inline void arch_clear_hugepage_flags(struct page *page) diff --git a/arch/riscv/include/asm/insn.h b/arch/riscv/include/asm/insn.h index 4e1505cef8aa..fce00400c9bc 100644 --- a/arch/riscv/include/asm/insn.h +++ b/arch/riscv/include/asm/insn.h @@ -110,6 +110,7 @@ #define RVC_INSN_FUNCT4_OPOFF 12 #define RVC_INSN_FUNCT3_MASK GENMASK(15, 13) #define RVC_INSN_FUNCT3_OPOFF 13 +#define RVC_INSN_J_RS1_MASK GENMASK(11, 7) #define RVC_INSN_J_RS2_MASK GENMASK(6, 2) #define RVC_INSN_OPCODE_MASK GENMASK(1, 0) #define RVC_ENCODE_FUNCT3(f_) (RVC_FUNCT3_##f_ << RVC_INSN_FUNCT3_OPOFF) @@ -245,8 +246,6 @@ __RISCV_INSN_FUNCS(c_jal, RVC_MASK_C_JAL, RVC_MATCH_C_JAL) __RISCV_INSN_FUNCS(auipc, RVG_MASK_AUIPC, RVG_MATCH_AUIPC) __RISCV_INSN_FUNCS(jalr, RVG_MASK_JALR, RVG_MATCH_JALR) __RISCV_INSN_FUNCS(jal, RVG_MASK_JAL, RVG_MATCH_JAL) -__RISCV_INSN_FUNCS(c_jr, RVC_MASK_C_JR, RVC_MATCH_C_JR) -__RISCV_INSN_FUNCS(c_jalr, RVC_MASK_C_JALR, RVC_MATCH_C_JALR) __RISCV_INSN_FUNCS(c_j, RVC_MASK_C_J, RVC_MATCH_C_J) __RISCV_INSN_FUNCS(beq, RVG_MASK_BEQ, RVG_MATCH_BEQ) __RISCV_INSN_FUNCS(bne, RVG_MASK_BNE, RVG_MATCH_BNE) @@ -273,6 +272,18 @@ static __always_inline bool riscv_insn_is_branch(u32 code) return (code & RV_INSN_OPCODE_MASK) == RVG_OPCODE_BRANCH; } +static __always_inline bool riscv_insn_is_c_jr(u32 code) +{ + return (code & RVC_MASK_C_JR) == RVC_MATCH_C_JR && + (code & RVC_INSN_J_RS1_MASK) != 0; +} + +static __always_inline bool riscv_insn_is_c_jalr(u32 code) +{ + return (code & RVC_MASK_C_JALR) == RVC_MATCH_C_JALR && + (code & RVC_INSN_J_RS1_MASK) != 0; +} + #define RV_IMM_SIGN(x) (-(((x) >> 31) & 1)) #define RVC_IMM_SIGN(x) (-(((x) >> 12) & 1)) #define RV_X(X, s, mask) (((X) >> (s)) & (mask)) diff --git a/arch/riscv/include/asm/mmio.h b/arch/riscv/include/asm/mmio.h index aff6c33ab0c0..4c58ee7f95ec 100644 --- a/arch/riscv/include/asm/mmio.h +++ b/arch/riscv/include/asm/mmio.h @@ -101,9 +101,9 @@ static inline u64 __raw_readq(const volatile void __iomem *addr) * Relaxed I/O memory access primitives. These follow the Device memory * ordering rules but do not guarantee any ordering relative to Normal memory * accesses. These are defined to order the indicated access (either a read or - * write) with all other I/O memory accesses. Since the platform specification - * defines that all I/O regions are strongly ordered on channel 2, no explicit - * fences are required to enforce this ordering. + * write) with all other I/O memory accesses to the same peripheral. Since the + * platform specification defines that all I/O regions are strongly ordered on + * channel 0, no explicit fences are required to enforce this ordering. */ /* FIXME: These are now the same as asm-generic */ #define __io_rbr() do {} while (0) @@ -125,14 +125,14 @@ static inline u64 __raw_readq(const volatile void __iomem *addr) #endif /* - * I/O memory access primitives. Reads are ordered relative to any - * following Normal memory access. Writes are ordered relative to any prior - * Normal memory access. The memory barriers here are necessary as RISC-V + * I/O memory access primitives. Reads are ordered relative to any following + * Normal memory read and delay() loop. Writes are ordered relative to any + * prior Normal memory write. The memory barriers here are necessary as RISC-V * doesn't define any ordering between the memory space and the I/O space. */ #define __io_br() do {} while (0) -#define __io_ar(v) __asm__ __volatile__ ("fence i,r" : : : "memory") -#define __io_bw() __asm__ __volatile__ ("fence w,o" : : : "memory") +#define __io_ar(v) ({ __asm__ __volatile__ ("fence i,ir" : : : "memory"); }) +#define __io_bw() ({ __asm__ __volatile__ ("fence w,o" : : : "memory"); }) #define __io_aw() mmiowb_set_pending() #define readb(c) ({ u8 __v; __io_br(); __v = readb_cpu(c); __io_ar(__v); __v; }) diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h index 59dc12b5b7e8..d169a4f41a2e 100644 --- a/arch/riscv/include/asm/pgalloc.h +++ b/arch/riscv/include/asm/pgalloc.h @@ -153,10 +153,10 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm) #endif /* __PAGETABLE_PMD_FOLDED */ -#define __pte_free_tlb(tlb, pte, buf) \ -do { \ - pgtable_pte_page_dtor(pte); \ - tlb_remove_page((tlb), pte); \ +#define __pte_free_tlb(tlb, pte, buf) \ +do { \ + pagetable_pte_dtor(page_ptdesc(pte)); \ + tlb_remove_page_ptdesc((tlb), page_ptdesc(pte));\ } while (0) #endif /* CONFIG_MMU */ diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 75970ee2bda2..a6f47c092bdc 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -188,6 +188,8 @@ extern struct pt_alloc_ops pt_ops __initdata; #define PAGE_KERNEL_IO __pgprot(_PAGE_IOREMAP) extern pgd_t swapper_pg_dir[]; +extern pgd_t trampoline_pg_dir[]; +extern pgd_t early_pg_dir[]; #ifdef CONFIG_TRANSPARENT_HUGEPAGE static inline int pmd_present(pmd_t pmd) @@ -445,8 +447,9 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) /* Commit new configuration to MMU hardware */ -static inline void update_mmu_cache(struct vm_area_struct *vma, - unsigned long address, pte_t *ptep) +static inline void update_mmu_cache_range(struct vm_fault *vmf, + struct vm_area_struct *vma, unsigned long address, + pte_t *ptep, unsigned int nr) { /* * The kernel assumes that TLBs don't cache invalid entries, but @@ -455,8 +458,11 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, * Relying on flush_tlb_fix_spurious_fault would suffice, but * the extra traps reduce performance. So, eagerly SFENCE.VMA. */ - local_flush_tlb_page(address); + while (nr--) + local_flush_tlb_page(address + nr * PAGE_SIZE); } +#define update_mmu_cache(vma, addr, ptep) \ + update_mmu_cache_range(NULL, vma, addr, ptep, 1) #define __HAVE_ARCH_UPDATE_MMU_TLB #define update_mmu_tlb update_mmu_cache @@ -487,8 +493,7 @@ static inline void set_pte(pte_t *ptep, pte_t pteval) void flush_icache_pte(pte_t pte); -static inline void __set_pte_at(struct mm_struct *mm, - unsigned long addr, pte_t *ptep, pte_t pteval) +static inline void __set_pte_at(pte_t *ptep, pte_t pteval) { if (pte_present(pteval) && pte_exec(pteval)) flush_icache_pte(pteval); @@ -496,17 +501,25 @@ static inline void __set_pte_at(struct mm_struct *mm, set_pte(ptep, pteval); } -static inline void set_pte_at(struct mm_struct *mm, - unsigned long addr, pte_t *ptep, pte_t pteval) +static inline void set_ptes(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pteval, unsigned int nr) { - page_table_check_pte_set(mm, addr, ptep, pteval); - __set_pte_at(mm, addr, ptep, pteval); + page_table_check_ptes_set(mm, ptep, pteval, nr); + + for (;;) { + __set_pte_at(ptep, pteval); + if (--nr == 0) + break; + ptep++; + pte_val(pteval) += 1 << _PAGE_PFN_SHIFT; + } } +#define set_ptes set_ptes static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - __set_pte_at(mm, addr, ptep, __pte(0)); + __set_pte_at(ptep, __pte(0)); } #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS @@ -515,7 +528,7 @@ static inline int ptep_set_access_flags(struct vm_area_struct *vma, pte_t entry, int dirty) { if (!pte_same(*ptep, entry)) - set_pte_at(vma->vm_mm, address, ptep, entry); + __set_pte_at(ptep, entry); /* * update_mmu_cache will unconditionally execute, handling both * the case that the PTE changed and the spurious fault case. @@ -529,7 +542,7 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, { pte_t pte = __pte(atomic_long_xchg((atomic_long_t *)ptep, 0)); - page_table_check_pte_clear(mm, address, pte); + page_table_check_pte_clear(mm, pte); return pte; } @@ -687,15 +700,15 @@ static inline pmd_t pmd_mkdirty(pmd_t pmd) static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_t pmd) { - page_table_check_pmd_set(mm, addr, pmdp, pmd); - return __set_pte_at(mm, addr, (pte_t *)pmdp, pmd_pte(pmd)); + page_table_check_pmd_set(mm, pmdp, pmd); + return __set_pte_at((pte_t *)pmdp, pmd_pte(pmd)); } static inline void set_pud_at(struct mm_struct *mm, unsigned long addr, pud_t *pudp, pud_t pud) { - page_table_check_pud_set(mm, addr, pudp, pud); - return __set_pte_at(mm, addr, (pte_t *)pudp, pud_pte(pud)); + page_table_check_pud_set(mm, pudp, pud); + return __set_pte_at((pte_t *)pudp, pud_pte(pud)); } #ifdef CONFIG_PAGE_TABLE_CHECK @@ -742,7 +755,7 @@ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, { pmd_t pmd = __pmd(atomic_long_xchg((atomic_long_t *)pmdp, 0)); - page_table_check_pmd_clear(mm, address, pmd); + page_table_check_pmd_clear(mm, pmd); return pmd; } @@ -758,7 +771,7 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm, static inline pmd_t pmdp_establish(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp, pmd_t pmd) { - page_table_check_pmd_set(vma->vm_mm, address, pmdp, pmd); + page_table_check_pmd_set(vma->vm_mm, pmdp, pmd); return __pmd(atomic_long_xchg((atomic_long_t *)pmdp, pmd_val(pmd))); } diff --git a/arch/riscv/include/asm/vector.h b/arch/riscv/include/asm/vector.h index 3d78930cab51..c5ee07b3df07 100644 --- a/arch/riscv/include/asm/vector.h +++ b/arch/riscv/include/asm/vector.h @@ -70,8 +70,9 @@ static __always_inline void __vstate_csr_save(struct __riscv_v_ext_state *dest) "csrr %1, " __stringify(CSR_VTYPE) "\n\t" "csrr %2, " __stringify(CSR_VL) "\n\t" "csrr %3, " __stringify(CSR_VCSR) "\n\t" + "csrr %4, " __stringify(CSR_VLENB) "\n\t" : "=r" (dest->vstart), "=r" (dest->vtype), "=r" (dest->vl), - "=r" (dest->vcsr) : :); + "=r" (dest->vcsr), "=r" (dest->vlenb) : :); } static __always_inline void __vstate_csr_restore(struct __riscv_v_ext_state *src) diff --git a/arch/riscv/include/asm/vmalloc.h b/arch/riscv/include/asm/vmalloc.h index 58d3e447f191..924d01b56c9a 100644 --- a/arch/riscv/include/asm/vmalloc.h +++ b/arch/riscv/include/asm/vmalloc.h @@ -3,12 +3,14 @@ #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP +extern bool pgtable_l4_enabled, pgtable_l5_enabled; + #define IOREMAP_MAX_ORDER (PUD_SHIFT) #define arch_vmap_pud_supported arch_vmap_pud_supported static inline bool arch_vmap_pud_supported(pgprot_t prot) { - return true; + return pgtable_l4_enabled || pgtable_l5_enabled; } #define arch_vmap_pmd_supported arch_vmap_pmd_supported diff --git a/arch/riscv/include/uapi/asm/bitsperlong.h b/arch/riscv/include/uapi/asm/bitsperlong.h new file mode 100644 index 000000000000..7d0b32e3b701 --- /dev/null +++ b/arch/riscv/include/uapi/asm/bitsperlong.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */ +/* + * Copyright (C) 2012 ARM Ltd. + * Copyright (C) 2015 Regents of the University of California + */ + +#ifndef _UAPI_ASM_RISCV_BITSPERLONG_H +#define _UAPI_ASM_RISCV_BITSPERLONG_H + +#define __BITS_PER_LONG (__SIZEOF_POINTER__ * 8) + +#include <asm-generic/bitsperlong.h> + +#endif /* _UAPI_ASM_RISCV_BITSPERLONG_H */ diff --git a/arch/riscv/include/uapi/asm/ptrace.h b/arch/riscv/include/uapi/asm/ptrace.h index e17c550986a6..283800130614 100644 --- a/arch/riscv/include/uapi/asm/ptrace.h +++ b/arch/riscv/include/uapi/asm/ptrace.h @@ -97,6 +97,7 @@ struct __riscv_v_ext_state { unsigned long vl; unsigned long vtype; unsigned long vcsr; + unsigned long vlenb; void *datap; /* * In signal handler, datap will be set a correct user stack offset diff --git a/arch/riscv/kernel/acpi.c b/arch/riscv/kernel/acpi.c index 5ee03ebab80e..56cb2c986c48 100644 --- a/arch/riscv/kernel/acpi.c +++ b/arch/riscv/kernel/acpi.c @@ -215,9 +215,9 @@ void __init __acpi_unmap_table(void __iomem *map, unsigned long size) early_iounmap(map, size); } -void *acpi_os_ioremap(acpi_physical_address phys, acpi_size size) +void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size) { - return memremap(phys, size, MEMREMAP_WB); + return (void __iomem *)memremap(phys, size, MEMREMAP_WB); } #ifdef CONFIG_PCI diff --git a/arch/riscv/kernel/compat_vdso/Makefile b/arch/riscv/kernel/compat_vdso/Makefile index 189345773e7e..b86e5e2c3aea 100644 --- a/arch/riscv/kernel/compat_vdso/Makefile +++ b/arch/riscv/kernel/compat_vdso/Makefile @@ -11,7 +11,13 @@ compat_vdso-syms += flush_icache COMPAT_CC := $(CC) COMPAT_LD := $(LD) -COMPAT_CC_FLAGS := -march=rv32g -mabi=ilp32 +# binutils 2.35 does not support the zifencei extension, but in the ISA +# spec 20191213, G stands for IMAFD_ZICSR_ZIFENCEI. +ifdef CONFIG_TOOLCHAIN_NEEDS_EXPLICIT_ZICSR_ZIFENCEI + COMPAT_CC_FLAGS := -march=rv32g -mabi=ilp32 +else + COMPAT_CC_FLAGS := -march=rv32imafd -mabi=ilp32 +endif COMPAT_LD_FLAGS := -melf32lriscv # Disable attributes, as they're useless and break the build. diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c index a2fc952318e9..35b854cf078e 100644 --- a/arch/riscv/kernel/cpu.c +++ b/arch/riscv/kernel/cpu.c @@ -17,6 +17,11 @@ #include <asm/smp.h> #include <asm/pgtable.h> +bool arch_match_cpu_phys_id(int cpu, u64 phys_id) +{ + return phys_id == cpuid_to_hartid_map(cpu); +} + /* * Returns the hart ID of the given device tree node, or -ENODEV if the node * isn't an enabled and valid RISC-V hart node. diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index bdcf460ea53d..a8f66c015229 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -318,18 +318,13 @@ void __init riscv_fill_hwcap(void) } /* - * Linux requires the following extensions, so we may as well - * always set them. - */ - set_bit(RISCV_ISA_EXT_ZICSR, isainfo->isa); - set_bit(RISCV_ISA_EXT_ZIFENCEI, isainfo->isa); - - /* * These ones were as they were part of the base ISA when the * port & dt-bindings were upstreamed, and so can be set * unconditionally where `i` is in riscv,isa on DT systems. */ if (acpi_disabled) { + set_bit(RISCV_ISA_EXT_ZICSR, isainfo->isa); + set_bit(RISCV_ISA_EXT_ZIFENCEI, isainfo->isa); set_bit(RISCV_ISA_EXT_ZICNTR, isainfo->isa); set_bit(RISCV_ISA_EXT_ZIHPM, isainfo->isa); } diff --git a/arch/riscv/kernel/crash_core.c b/arch/riscv/kernel/crash_core.c index b351a3c01355..55f1d7856b54 100644 --- a/arch/riscv/kernel/crash_core.c +++ b/arch/riscv/kernel/crash_core.c @@ -18,4 +18,6 @@ void arch_crash_save_vmcoreinfo(void) vmcoreinfo_append_str("NUMBER(MODULES_END)=0x%lx\n", MODULES_END); #endif vmcoreinfo_append_str("NUMBER(KERNEL_LINK_ADDR)=0x%lx\n", KERNEL_LINK_ADDR); + vmcoreinfo_append_str("NUMBER(va_kernel_pa_offset)=0x%lx\n", + kernel_map.va_kernel_pa_offset); } diff --git a/arch/riscv/kernel/elf_kexec.c b/arch/riscv/kernel/elf_kexec.c index 5372b708fae2..f4099059ed8f 100644 --- a/arch/riscv/kernel/elf_kexec.c +++ b/arch/riscv/kernel/elf_kexec.c @@ -260,7 +260,7 @@ static void *elf_kexec_load(struct kimage *image, char *kernel_buf, cmdline = modified_cmdline; } -#ifdef CONFIG_ARCH_HAS_KEXEC_PURGATORY +#ifdef CONFIG_ARCH_SUPPORTS_KEXEC_PURGATORY /* Add purgatory to the image */ kbuf.top_down = true; kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; @@ -274,14 +274,14 @@ static void *elf_kexec_load(struct kimage *image, char *kernel_buf, sizeof(kernel_start), 0); if (ret) pr_err("Error update purgatory ret=%d\n", ret); -#endif /* CONFIG_ARCH_HAS_KEXEC_PURGATORY */ +#endif /* CONFIG_ARCH_SUPPORTS_KEXEC_PURGATORY */ /* Add the initrd to the image */ if (initrd != NULL) { kbuf.buffer = initrd; kbuf.bufsz = kbuf.memsz = initrd_len; kbuf.buf_align = PAGE_SIZE; - kbuf.top_down = false; + kbuf.top_down = true; kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; ret = kexec_add_buffer(&kbuf); if (ret) @@ -425,6 +425,7 @@ int arch_kexec_apply_relocations_add(struct purgatory_info *pi, * sym, instead of searching the whole relsec. */ case R_RISCV_PCREL_HI20: + case R_RISCV_CALL_PLT: case R_RISCV_CALL: *(u64 *)loc = CLEAN_IMM(UITYPE, *(u64 *)loc) | ENCODE_UJTYPE_IMM(val - addr); diff --git a/arch/riscv/kernel/irq.c b/arch/riscv/kernel/irq.c index d0577cc6a081..a8efa053c4a5 100644 --- a/arch/riscv/kernel/irq.c +++ b/arch/riscv/kernel/irq.c @@ -84,6 +84,9 @@ void do_softirq_own_stack(void) : [sp] "r" (sp) : "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "t0", "t1", "t2", "t3", "t4", "t5", "t6", +#ifndef CONFIG_FRAME_POINTER + "s0", +#endif "memory"); } else #endif diff --git a/arch/riscv/kernel/ptrace.c b/arch/riscv/kernel/ptrace.c index 1d572cf3140f..487303e3ef22 100644 --- a/arch/riscv/kernel/ptrace.c +++ b/arch/riscv/kernel/ptrace.c @@ -25,9 +25,6 @@ enum riscv_regset { #ifdef CONFIG_FPU REGSET_F, #endif -#ifdef CONFIG_RISCV_ISA_V - REGSET_V, -#endif }; static int riscv_gpr_get(struct task_struct *target, @@ -84,61 +81,6 @@ static int riscv_fpr_set(struct task_struct *target, } #endif -#ifdef CONFIG_RISCV_ISA_V -static int riscv_vr_get(struct task_struct *target, - const struct user_regset *regset, - struct membuf to) -{ - struct __riscv_v_ext_state *vstate = &target->thread.vstate; - - if (!riscv_v_vstate_query(task_pt_regs(target))) - return -EINVAL; - - /* - * Ensure the vector registers have been saved to the memory before - * copying them to membuf. - */ - if (target == current) - riscv_v_vstate_save(current, task_pt_regs(current)); - - /* Copy vector header from vstate. */ - membuf_write(&to, vstate, offsetof(struct __riscv_v_ext_state, datap)); - membuf_zero(&to, sizeof(vstate->datap)); - - /* Copy all the vector registers from vstate. */ - return membuf_write(&to, vstate->datap, riscv_v_vsize); -} - -static int riscv_vr_set(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - int ret, size; - struct __riscv_v_ext_state *vstate = &target->thread.vstate; - - if (!riscv_v_vstate_query(task_pt_regs(target))) - return -EINVAL; - - /* Copy rest of the vstate except datap */ - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, vstate, 0, - offsetof(struct __riscv_v_ext_state, datap)); - if (unlikely(ret)) - return ret; - - /* Skip copy datap. */ - size = sizeof(vstate->datap); - count -= size; - ubuf += size; - - /* Copy all the vector registers. */ - pos = 0; - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, vstate->datap, - 0, riscv_v_vsize); - return ret; -} -#endif - static const struct user_regset riscv_user_regset[] = { [REGSET_X] = { .core_note_type = NT_PRSTATUS, @@ -158,17 +100,6 @@ static const struct user_regset riscv_user_regset[] = { .set = riscv_fpr_set, }, #endif -#ifdef CONFIG_RISCV_ISA_V - [REGSET_V] = { - .core_note_type = NT_RISCV_VECTOR, - .align = 16, - .n = ((32 * RISCV_MAX_VLENB) + - sizeof(struct __riscv_v_ext_state)) / sizeof(__u32), - .size = sizeof(__u32), - .regset_get = riscv_vr_get, - .set = riscv_vr_set, - }, -#endif }; static const struct user_regset_view riscv_user_native_view = { diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c index 85bbce0f758c..40420afbb1a0 100644 --- a/arch/riscv/kernel/smp.c +++ b/arch/riscv/kernel/smp.c @@ -61,11 +61,6 @@ int riscv_hartid_to_cpuid(unsigned long hartid) return -ENOENT; } -bool arch_match_cpu_phys_id(int cpu, u64 phys_id) -{ - return phys_id == cpuid_to_hartid_map(cpu); -} - static void ipi_stop(void) { set_cpu_online(smp_processor_id(), false); diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index f910dfccbf5d..f798c853bede 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -297,7 +297,7 @@ asmlinkage __visible __trap_section void do_trap_break(struct pt_regs *regs) asmlinkage __visible __trap_section void do_trap_ecall_u(struct pt_regs *regs) { if (user_mode(regs)) { - ulong syscall = regs->a7; + long syscall = regs->a7; regs->epc += 4; regs->orig_a0 = regs->a0; @@ -306,9 +306,9 @@ asmlinkage __visible __trap_section void do_trap_ecall_u(struct pt_regs *regs) syscall = syscall_enter_from_user_mode(regs, syscall); - if (syscall < NR_syscalls) + if (syscall >= 0 && syscall < NR_syscalls) syscall_handler(regs, syscall); - else + else if (syscall != -1) regs->a0 = -ENOSYS; syscall_exit_to_user_mode(regs); @@ -372,6 +372,9 @@ asmlinkage void noinstr do_irq(struct pt_regs *regs) : [sp] "r" (sp), [regs] "r" (regs) : "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "t0", "t1", "t2", "t3", "t4", "t5", "t6", +#ifndef CONFIG_FRAME_POINTER + "s0", +#endif "memory"); } else #endif diff --git a/arch/riscv/lib/uaccess.S b/arch/riscv/lib/uaccess.S index ec486e5369d9..09b47ebacf2e 100644 --- a/arch/riscv/lib/uaccess.S +++ b/arch/riscv/lib/uaccess.S @@ -17,8 +17,11 @@ ENTRY(__asm_copy_from_user) li t6, SR_SUM csrs CSR_STATUS, t6 - /* Save for return value */ - mv t5, a2 + /* + * Save the terminal address which will be used to compute the number + * of bytes copied in case of a fixup exception. + */ + add t5, a0, a2 /* * Register allocation for code below: @@ -176,7 +179,7 @@ ENTRY(__asm_copy_from_user) 10: /* Disable access to user memory */ csrc CSR_STATUS, t6 - mv a0, t5 + sub a0, t5, a0 ret ENDPROC(__asm_copy_to_user) ENDPROC(__asm_copy_from_user) @@ -228,7 +231,7 @@ ENTRY(__clear_user) 11: /* Disable access to user memory */ csrc CSR_STATUS, t6 - mv a0, a1 + sub a0, a3, a0 ret ENDPROC(__clear_user) EXPORT_SYMBOL(__clear_user) diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c index fbc59b3f69f2..f1387272a551 100644 --- a/arch/riscv/mm/cacheflush.c +++ b/arch/riscv/mm/cacheflush.c @@ -82,18 +82,11 @@ void flush_icache_mm(struct mm_struct *mm, bool local) #ifdef CONFIG_MMU void flush_icache_pte(pte_t pte) { - struct page *page = pte_page(pte); + struct folio *folio = page_folio(pte_page(pte)); - /* - * HugeTLB pages are always fully mapped, so only setting head page's - * PG_dcache_clean flag is enough. - */ - if (PageHuge(page)) - page = compound_head(page); - - if (!test_bit(PG_dcache_clean, &page->flags)) { + if (!test_bit(PG_dcache_clean, &folio->flags)) { flush_icache_all(); - set_bit(PG_dcache_clean, &page->flags); + set_bit(PG_dcache_clean, &folio->flags); } } #endif /* CONFIG_MMU */ diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c index 6ea2cce4cc17..6115d7514972 100644 --- a/arch/riscv/mm/fault.c +++ b/arch/riscv/mm/fault.c @@ -283,7 +283,6 @@ void handle_page_fault(struct pt_regs *regs) flags |= FAULT_FLAG_WRITE; else if (cause == EXC_INST_PAGE_FAULT) flags |= FAULT_FLAG_INSTRUCTION; -#ifdef CONFIG_PER_VMA_LOCK if (!(flags & FAULT_FLAG_USER)) goto lock_mmap; @@ -297,7 +296,8 @@ void handle_page_fault(struct pt_regs *regs) } fault = handle_mm_fault(vma, addr, flags | FAULT_FLAG_VMA_LOCK, regs); - vma_end_read(vma); + if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED))) + vma_end_read(vma); if (!(fault & VM_FAULT_RETRY)) { count_vm_vma_lock_event(VMA_LOCK_SUCCESS); @@ -311,7 +311,6 @@ void handle_page_fault(struct pt_regs *regs) return; } lock_mmap: -#endif /* CONFIG_PER_VMA_LOCK */ retry: vma = lock_mm_and_find_vma(mm, addr, regs); @@ -368,9 +367,7 @@ retry: mmap_read_unlock(mm); -#ifdef CONFIG_PER_VMA_LOCK done: -#endif if (unlikely(fault & VM_FAULT_ERROR)) { tsk->thread.bad_cause = cause; mm_fault_error(regs, addr, fault); diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 70fb31960b63..c07ff3e2c90a 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -26,12 +26,13 @@ #include <linux/kfence.h> #include <asm/fixmap.h> -#include <asm/tlbflush.h> -#include <asm/sections.h> -#include <asm/soc.h> #include <asm/io.h> -#include <asm/ptdump.h> #include <asm/numa.h> +#include <asm/pgtable.h> +#include <asm/ptdump.h> +#include <asm/sections.h> +#include <asm/soc.h> +#include <asm/tlbflush.h> #include "../kernel/head.h" @@ -214,8 +215,13 @@ static void __init setup_bootmem(void) memblock_reserve(vmlinux_start, vmlinux_end - vmlinux_start); phys_ram_end = memblock_end_of_DRAM(); + + /* + * Make sure we align the start of the memory on a PMD boundary so that + * at worst, we map the linear mapping with PMD mappings. + */ if (!IS_ENABLED(CONFIG_XIP_KERNEL)) - phys_ram_base = memblock_start_of_DRAM(); + phys_ram_base = memblock_start_of_DRAM() & PMD_MASK; /* * In 64-bit, any use of __va/__pa before this point is wrong as we @@ -353,12 +359,10 @@ static inline phys_addr_t __init alloc_pte_fixmap(uintptr_t va) static phys_addr_t __init alloc_pte_late(uintptr_t va) { - unsigned long vaddr; + struct ptdesc *ptdesc = pagetable_alloc(GFP_KERNEL & ~__GFP_HIGHMEM, 0); - vaddr = __get_free_page(GFP_KERNEL); - BUG_ON(!vaddr || !pgtable_pte_page_ctor(virt_to_page((void *)vaddr))); - - return __pa(vaddr); + BUG_ON(!ptdesc || !pagetable_pte_ctor(ptdesc)); + return __pa((pte_t *)ptdesc_address(ptdesc)); } static void __init create_pte_mapping(pte_t *ptep, @@ -436,12 +440,10 @@ static phys_addr_t __init alloc_pmd_fixmap(uintptr_t va) static phys_addr_t __init alloc_pmd_late(uintptr_t va) { - unsigned long vaddr; - - vaddr = __get_free_page(GFP_KERNEL); - BUG_ON(!vaddr || !pgtable_pmd_page_ctor(virt_to_page((void *)vaddr))); + struct ptdesc *ptdesc = pagetable_alloc(GFP_KERNEL & ~__GFP_HIGHMEM, 0); - return __pa(vaddr); + BUG_ON(!ptdesc || !pagetable_pmd_ctor(ptdesc)); + return __pa((pmd_t *)ptdesc_address(ptdesc)); } static void __init create_pmd_mapping(pmd_t *pmdp, @@ -1346,7 +1348,7 @@ static void __init reserve_crashkernel(void) */ crash_base = memblock_phys_alloc_range(crash_size, PMD_SIZE, search_start, - min(search_end, (unsigned long) SZ_4G)); + min(search_end, (unsigned long)(SZ_4G - 1))); if (crash_base == 0) { /* Try again without restricting region to 32bit addressible memory */ crash_base = memblock_phys_alloc_range(crash_size, PMD_SIZE, diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c index 8fc0efcf905c..a01bc15dce24 100644 --- a/arch/riscv/mm/kasan_init.c +++ b/arch/riscv/mm/kasan_init.c @@ -22,7 +22,6 @@ * region is not and then we have to go down to the PUD level. */ -extern pgd_t early_pg_dir[PTRS_PER_PGD]; pgd_t tmp_pg_dir[PTRS_PER_PGD] __page_aligned_bss; p4d_t tmp_p4d[PTRS_PER_P4D] __page_aligned_bss; pud_t tmp_pud[PTRS_PER_PUD] __page_aligned_bss; diff --git a/arch/riscv/mm/pageattr.c b/arch/riscv/mm/pageattr.c index ea3d61de065b..161d0b34c2cb 100644 --- a/arch/riscv/mm/pageattr.c +++ b/arch/riscv/mm/pageattr.c @@ -102,6 +102,7 @@ static const struct mm_walk_ops pageattr_ops = { .pmd_entry = pageattr_pmd_entry, .pte_entry = pageattr_pte_entry, .pte_hole = pageattr_pte_hole, + .walk_lock = PGWALK_RDLOCK, }; static int __set_memory(unsigned long addr, int numpages, pgprot_t set_mask, diff --git a/arch/riscv/net/bpf_jit.h b/arch/riscv/net/bpf_jit.h index bf9802a63061..d21c6c92a683 100644 --- a/arch/riscv/net/bpf_jit.h +++ b/arch/riscv/net/bpf_jit.h @@ -69,7 +69,7 @@ struct rv_jit_context { struct bpf_prog *prog; u16 *insns; /* RV insns */ int ninsns; - int body_len; + int prologue_len; int epilogue_offset; int *offset; /* BPF to RV */ int nexentries; @@ -216,8 +216,8 @@ static inline int rv_offset(int insn, int off, struct rv_jit_context *ctx) int from, to; off++; /* BPF branch is from PC+1, RV is from PC */ - from = (insn > 0) ? ctx->offset[insn - 1] : 0; - to = (insn + off > 0) ? ctx->offset[insn + off - 1] : 0; + from = (insn > 0) ? ctx->offset[insn - 1] : ctx->prologue_len; + to = (insn + off > 0) ? ctx->offset[insn + off - 1] : ctx->prologue_len; return ninsns_rvoff(to - from); } @@ -431,11 +431,21 @@ static inline u32 rv_mulhu(u8 rd, u8 rs1, u8 rs2) return rv_r_insn(1, rs2, rs1, 3, rd, 0x33); } +static inline u32 rv_div(u8 rd, u8 rs1, u8 rs2) +{ + return rv_r_insn(1, rs2, rs1, 4, rd, 0x33); +} + static inline u32 rv_divu(u8 rd, u8 rs1, u8 rs2) { return rv_r_insn(1, rs2, rs1, 5, rd, 0x33); } +static inline u32 rv_rem(u8 rd, u8 rs1, u8 rs2) +{ + return rv_r_insn(1, rs2, rs1, 6, rd, 0x33); +} + static inline u32 rv_remu(u8 rd, u8 rs1, u8 rs2) { return rv_r_insn(1, rs2, rs1, 7, rd, 0x33); @@ -501,6 +511,16 @@ static inline u32 rv_ble(u8 rs1, u8 rs2, u16 imm12_1) return rv_bge(rs2, rs1, imm12_1); } +static inline u32 rv_lb(u8 rd, u16 imm11_0, u8 rs1) +{ + return rv_i_insn(imm11_0, rs1, 0, rd, 0x03); +} + +static inline u32 rv_lh(u8 rd, u16 imm11_0, u8 rs1) +{ + return rv_i_insn(imm11_0, rs1, 1, rd, 0x03); +} + static inline u32 rv_lw(u8 rd, u16 imm11_0, u8 rs1) { return rv_i_insn(imm11_0, rs1, 2, rd, 0x03); @@ -766,11 +786,21 @@ static inline u32 rv_mulw(u8 rd, u8 rs1, u8 rs2) return rv_r_insn(1, rs2, rs1, 0, rd, 0x3b); } +static inline u32 rv_divw(u8 rd, u8 rs1, u8 rs2) +{ + return rv_r_insn(1, rs2, rs1, 4, rd, 0x3b); +} + static inline u32 rv_divuw(u8 rd, u8 rs1, u8 rs2) { return rv_r_insn(1, rs2, rs1, 5, rd, 0x3b); } +static inline u32 rv_remw(u8 rd, u8 rs1, u8 rs2) +{ + return rv_r_insn(1, rs2, rs1, 6, rd, 0x3b); +} + static inline u32 rv_remuw(u8 rd, u8 rs1, u8 rs2) { return rv_r_insn(1, rs2, rs1, 7, rd, 0x3b); diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c index c648864c8cd1..8423f4ddf8f5 100644 --- a/arch/riscv/net/bpf_jit_comp64.c +++ b/arch/riscv/net/bpf_jit_comp64.c @@ -13,6 +13,8 @@ #include <asm/patch.h> #include "bpf_jit.h" +#define RV_FENTRY_NINSNS 2 + #define RV_REG_TCC RV_REG_A6 #define RV_REG_TCC_SAVED RV_REG_S6 /* Store A6 in S6 if program do calls */ @@ -241,7 +243,7 @@ static void __build_epilogue(bool is_tail_call, struct rv_jit_context *ctx) if (!is_tail_call) emit_mv(RV_REG_A0, RV_REG_A5, ctx); emit_jalr(RV_REG_ZERO, is_tail_call ? RV_REG_T3 : RV_REG_RA, - is_tail_call ? 20 : 0, /* skip reserved nops and TCC init */ + is_tail_call ? (RV_FENTRY_NINSNS + 1) * 4 : 0, /* skip reserved nops and TCC init */ ctx); } @@ -578,7 +580,8 @@ static int add_exception_handler(const struct bpf_insn *insn, unsigned long pc; off_t offset; - if (!ctx->insns || !ctx->prog->aux->extable || BPF_MODE(insn->code) != BPF_PROBE_MEM) + if (!ctx->insns || !ctx->prog->aux->extable || + (BPF_MODE(insn->code) != BPF_PROBE_MEM && BPF_MODE(insn->code) != BPF_PROBE_MEMSX)) return 0; if (WARN_ON_ONCE(ctx->nexentries >= ctx->prog->aux->num_exentries)) @@ -618,32 +621,7 @@ static int add_exception_handler(const struct bpf_insn *insn, return 0; } -static int gen_call_or_nops(void *target, void *ip, u32 *insns) -{ - s64 rvoff; - int i, ret; - struct rv_jit_context ctx; - - ctx.ninsns = 0; - ctx.insns = (u16 *)insns; - - if (!target) { - for (i = 0; i < 4; i++) - emit(rv_nop(), &ctx); - return 0; - } - - rvoff = (s64)(target - (ip + 4)); - emit(rv_sd(RV_REG_SP, -8, RV_REG_RA), &ctx); - ret = emit_jump_and_link(RV_REG_RA, rvoff, false, &ctx); - if (ret) - return ret; - emit(rv_ld(RV_REG_RA, -8, RV_REG_SP), &ctx); - - return 0; -} - -static int gen_jump_or_nops(void *target, void *ip, u32 *insns) +static int gen_jump_or_nops(void *target, void *ip, u32 *insns, bool is_call) { s64 rvoff; struct rv_jit_context ctx; @@ -658,38 +636,35 @@ static int gen_jump_or_nops(void *target, void *ip, u32 *insns) } rvoff = (s64)(target - ip); - return emit_jump_and_link(RV_REG_ZERO, rvoff, false, &ctx); + return emit_jump_and_link(is_call ? RV_REG_T0 : RV_REG_ZERO, rvoff, false, &ctx); } int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type, void *old_addr, void *new_addr) { - u32 old_insns[4], new_insns[4]; + u32 old_insns[RV_FENTRY_NINSNS], new_insns[RV_FENTRY_NINSNS]; bool is_call = poke_type == BPF_MOD_CALL; - int (*gen_insns)(void *target, void *ip, u32 *insns); - int ninsns = is_call ? 4 : 2; int ret; - if (!is_bpf_text_address((unsigned long)ip)) + if (!is_kernel_text((unsigned long)ip) && + !is_bpf_text_address((unsigned long)ip)) return -ENOTSUPP; - gen_insns = is_call ? gen_call_or_nops : gen_jump_or_nops; - - ret = gen_insns(old_addr, ip, old_insns); + ret = gen_jump_or_nops(old_addr, ip, old_insns, is_call); if (ret) return ret; - if (memcmp(ip, old_insns, ninsns * 4)) + if (memcmp(ip, old_insns, RV_FENTRY_NINSNS * 4)) return -EFAULT; - ret = gen_insns(new_addr, ip, new_insns); + ret = gen_jump_or_nops(new_addr, ip, new_insns, is_call); if (ret) return ret; cpus_read_lock(); mutex_lock(&text_mutex); - if (memcmp(ip, new_insns, ninsns * 4)) - ret = patch_text(ip, new_insns, ninsns); + if (memcmp(ip, new_insns, RV_FENTRY_NINSNS * 4)) + ret = patch_text(ip, new_insns, RV_FENTRY_NINSNS); mutex_unlock(&text_mutex); cpus_read_unlock(); @@ -787,8 +762,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, int i, ret, offset; int *branches_off = NULL; int stack_size = 0, nregs = m->nr_args; - int retaddr_off, fp_off, retval_off, args_off; - int nregs_off, ip_off, run_ctx_off, sreg_off; + int retval_off, args_off, nregs_off, ip_off, run_ctx_off, sreg_off; struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY]; struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT]; struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN]; @@ -796,13 +770,27 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, bool save_ret; u32 insn; - /* Generated trampoline stack layout: + /* Two types of generated trampoline stack layout: * - * FP - 8 [ RA of parent func ] return address of parent + * 1. trampoline called from function entry + * -------------------------------------- + * FP + 8 [ RA to parent func ] return address to parent * function - * FP - retaddr_off [ RA of traced func ] return address of traced + * FP + 0 [ FP of parent func ] frame pointer of parent * function - * FP - fp_off [ FP of parent func ] + * FP - 8 [ T0 to traced func ] return address of traced + * function + * FP - 16 [ FP of traced func ] frame pointer of traced + * function + * -------------------------------------- + * + * 2. trampoline called directly + * -------------------------------------- + * FP - 8 [ RA to caller func ] return address to caller + * function + * FP - 16 [ FP of caller func ] frame pointer of caller + * function + * -------------------------------------- * * FP - retval_off [ return value ] BPF_TRAMP_F_CALL_ORIG or * BPF_TRAMP_F_RET_FENTRY_RET @@ -833,14 +821,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, if (nregs > 8) return -ENOTSUPP; - /* room for parent function return address */ - stack_size += 8; - - stack_size += 8; - retaddr_off = stack_size; - - stack_size += 8; - fp_off = stack_size; + /* room of trampoline frame to store return address and frame pointer */ + stack_size += 16; save_ret = flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET); if (save_ret) { @@ -867,12 +849,29 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, stack_size = round_up(stack_size, 16); - emit_addi(RV_REG_SP, RV_REG_SP, -stack_size, ctx); - - emit_sd(RV_REG_SP, stack_size - retaddr_off, RV_REG_RA, ctx); - emit_sd(RV_REG_SP, stack_size - fp_off, RV_REG_FP, ctx); - - emit_addi(RV_REG_FP, RV_REG_SP, stack_size, ctx); + if (func_addr) { + /* For the trampoline called from function entry, + * the frame of traced function and the frame of + * trampoline need to be considered. + */ + emit_addi(RV_REG_SP, RV_REG_SP, -16, ctx); + emit_sd(RV_REG_SP, 8, RV_REG_RA, ctx); + emit_sd(RV_REG_SP, 0, RV_REG_FP, ctx); + emit_addi(RV_REG_FP, RV_REG_SP, 16, ctx); + + emit_addi(RV_REG_SP, RV_REG_SP, -stack_size, ctx); + emit_sd(RV_REG_SP, stack_size - 8, RV_REG_T0, ctx); + emit_sd(RV_REG_SP, stack_size - 16, RV_REG_FP, ctx); + emit_addi(RV_REG_FP, RV_REG_SP, stack_size, ctx); + } else { + /* For the trampoline called directly, just handle + * the frame of trampoline. + */ + emit_addi(RV_REG_SP, RV_REG_SP, -stack_size, ctx); + emit_sd(RV_REG_SP, stack_size - 8, RV_REG_RA, ctx); + emit_sd(RV_REG_SP, stack_size - 16, RV_REG_FP, ctx); + emit_addi(RV_REG_FP, RV_REG_SP, stack_size, ctx); + } /* callee saved register S1 to pass start time */ emit_sd(RV_REG_FP, -sreg_off, RV_REG_S1, ctx); @@ -890,7 +889,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, /* skip to actual body of traced function */ if (flags & BPF_TRAMP_F_SKIP_FRAME) - orig_call += 16; + orig_call += RV_FENTRY_NINSNS * 4; if (flags & BPF_TRAMP_F_CALL_ORIG) { emit_imm(RV_REG_A0, (const s64)im, ctx); @@ -967,17 +966,30 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, emit_ld(RV_REG_S1, -sreg_off, RV_REG_FP, ctx); - if (flags & BPF_TRAMP_F_SKIP_FRAME) - /* return address of parent function */ - emit_ld(RV_REG_RA, stack_size - 8, RV_REG_SP, ctx); - else - /* return address of traced function */ - emit_ld(RV_REG_RA, stack_size - retaddr_off, RV_REG_SP, ctx); + if (func_addr) { + /* trampoline called from function entry */ + emit_ld(RV_REG_T0, stack_size - 8, RV_REG_SP, ctx); + emit_ld(RV_REG_FP, stack_size - 16, RV_REG_SP, ctx); + emit_addi(RV_REG_SP, RV_REG_SP, stack_size, ctx); - emit_ld(RV_REG_FP, stack_size - fp_off, RV_REG_SP, ctx); - emit_addi(RV_REG_SP, RV_REG_SP, stack_size, ctx); + emit_ld(RV_REG_RA, 8, RV_REG_SP, ctx); + emit_ld(RV_REG_FP, 0, RV_REG_SP, ctx); + emit_addi(RV_REG_SP, RV_REG_SP, 16, ctx); - emit_jalr(RV_REG_ZERO, RV_REG_RA, 0, ctx); + if (flags & BPF_TRAMP_F_SKIP_FRAME) + /* return to parent function */ + emit_jalr(RV_REG_ZERO, RV_REG_RA, 0, ctx); + else + /* return to traced function */ + emit_jalr(RV_REG_ZERO, RV_REG_T0, 0, ctx); + } else { + /* trampoline called directly */ + emit_ld(RV_REG_RA, stack_size - 8, RV_REG_SP, ctx); + emit_ld(RV_REG_FP, stack_size - 16, RV_REG_SP, ctx); + emit_addi(RV_REG_SP, RV_REG_SP, stack_size, ctx); + + emit_jalr(RV_REG_ZERO, RV_REG_RA, 0, ctx); + } ret = ctx->ninsns; out: @@ -1035,7 +1047,19 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, emit_zext_32(rd, ctx); break; } - emit_mv(rd, rs, ctx); + switch (insn->off) { + case 0: + emit_mv(rd, rs, ctx); + break; + case 8: + case 16: + emit_slli(RV_REG_T1, rs, 64 - insn->off, ctx); + emit_srai(rd, RV_REG_T1, 64 - insn->off, ctx); + break; + case 32: + emit_addiw(rd, rs, 0, ctx); + break; + } if (!is64 && !aux->verifier_zext) emit_zext_32(rd, ctx); break; @@ -1083,13 +1107,19 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, break; case BPF_ALU | BPF_DIV | BPF_X: case BPF_ALU64 | BPF_DIV | BPF_X: - emit(is64 ? rv_divu(rd, rd, rs) : rv_divuw(rd, rd, rs), ctx); + if (off) + emit(is64 ? rv_div(rd, rd, rs) : rv_divw(rd, rd, rs), ctx); + else + emit(is64 ? rv_divu(rd, rd, rs) : rv_divuw(rd, rd, rs), ctx); if (!is64 && !aux->verifier_zext) emit_zext_32(rd, ctx); break; case BPF_ALU | BPF_MOD | BPF_X: case BPF_ALU64 | BPF_MOD | BPF_X: - emit(is64 ? rv_remu(rd, rd, rs) : rv_remuw(rd, rd, rs), ctx); + if (off) + emit(is64 ? rv_rem(rd, rd, rs) : rv_remw(rd, rd, rs), ctx); + else + emit(is64 ? rv_remu(rd, rd, rs) : rv_remuw(rd, rd, rs), ctx); if (!is64 && !aux->verifier_zext) emit_zext_32(rd, ctx); break; @@ -1138,6 +1168,7 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, break; case BPF_ALU | BPF_END | BPF_FROM_BE: + case BPF_ALU64 | BPF_END | BPF_FROM_LE: emit_li(RV_REG_T2, 0, ctx); emit_andi(RV_REG_T1, rd, 0xff, ctx); @@ -1260,16 +1291,24 @@ out_be: case BPF_ALU | BPF_DIV | BPF_K: case BPF_ALU64 | BPF_DIV | BPF_K: emit_imm(RV_REG_T1, imm, ctx); - emit(is64 ? rv_divu(rd, rd, RV_REG_T1) : - rv_divuw(rd, rd, RV_REG_T1), ctx); + if (off) + emit(is64 ? rv_div(rd, rd, RV_REG_T1) : + rv_divw(rd, rd, RV_REG_T1), ctx); + else + emit(is64 ? rv_divu(rd, rd, RV_REG_T1) : + rv_divuw(rd, rd, RV_REG_T1), ctx); if (!is64 && !aux->verifier_zext) emit_zext_32(rd, ctx); break; case BPF_ALU | BPF_MOD | BPF_K: case BPF_ALU64 | BPF_MOD | BPF_K: emit_imm(RV_REG_T1, imm, ctx); - emit(is64 ? rv_remu(rd, rd, RV_REG_T1) : - rv_remuw(rd, rd, RV_REG_T1), ctx); + if (off) + emit(is64 ? rv_rem(rd, rd, RV_REG_T1) : + rv_remw(rd, rd, RV_REG_T1), ctx); + else + emit(is64 ? rv_remu(rd, rd, RV_REG_T1) : + rv_remuw(rd, rd, RV_REG_T1), ctx); if (!is64 && !aux->verifier_zext) emit_zext_32(rd, ctx); break; @@ -1303,7 +1342,11 @@ out_be: /* JUMP off */ case BPF_JMP | BPF_JA: - rvoff = rv_offset(i, off, ctx); + case BPF_JMP32 | BPF_JA: + if (BPF_CLASS(code) == BPF_JMP) + rvoff = rv_offset(i, off, ctx); + else + rvoff = rv_offset(i, imm, ctx); ret = emit_jump_and_link(RV_REG_ZERO, rvoff, true, ctx); if (ret) return ret; @@ -1475,7 +1518,7 @@ out_be: return 1; } - /* LDX: dst = *(size *)(src + off) */ + /* LDX: dst = *(unsigned size *)(src + off) */ case BPF_LDX | BPF_MEM | BPF_B: case BPF_LDX | BPF_MEM | BPF_H: case BPF_LDX | BPF_MEM | BPF_W: @@ -1484,14 +1527,28 @@ out_be: case BPF_LDX | BPF_PROBE_MEM | BPF_H: case BPF_LDX | BPF_PROBE_MEM | BPF_W: case BPF_LDX | BPF_PROBE_MEM | BPF_DW: + /* LDSX: dst = *(signed size *)(src + off) */ + case BPF_LDX | BPF_MEMSX | BPF_B: + case BPF_LDX | BPF_MEMSX | BPF_H: + case BPF_LDX | BPF_MEMSX | BPF_W: + case BPF_LDX | BPF_PROBE_MEMSX | BPF_B: + case BPF_LDX | BPF_PROBE_MEMSX | BPF_H: + case BPF_LDX | BPF_PROBE_MEMSX | BPF_W: { int insn_len, insns_start; + bool sign_ext; + + sign_ext = BPF_MODE(insn->code) == BPF_MEMSX || + BPF_MODE(insn->code) == BPF_PROBE_MEMSX; switch (BPF_SIZE(code)) { case BPF_B: if (is_12b_int(off)) { insns_start = ctx->ninsns; - emit(rv_lbu(rd, off, rs), ctx); + if (sign_ext) + emit(rv_lb(rd, off, rs), ctx); + else + emit(rv_lbu(rd, off, rs), ctx); insn_len = ctx->ninsns - insns_start; break; } @@ -1499,15 +1556,19 @@ out_be: emit_imm(RV_REG_T1, off, ctx); emit_add(RV_REG_T1, RV_REG_T1, rs, ctx); insns_start = ctx->ninsns; - emit(rv_lbu(rd, 0, RV_REG_T1), ctx); + if (sign_ext) + emit(rv_lb(rd, 0, RV_REG_T1), ctx); + else + emit(rv_lbu(rd, 0, RV_REG_T1), ctx); insn_len = ctx->ninsns - insns_start; - if (insn_is_zext(&insn[1])) - return 1; break; case BPF_H: if (is_12b_int(off)) { insns_start = ctx->ninsns; - emit(rv_lhu(rd, off, rs), ctx); + if (sign_ext) + emit(rv_lh(rd, off, rs), ctx); + else + emit(rv_lhu(rd, off, rs), ctx); insn_len = ctx->ninsns - insns_start; break; } @@ -1515,15 +1576,19 @@ out_be: emit_imm(RV_REG_T1, off, ctx); emit_add(RV_REG_T1, RV_REG_T1, rs, ctx); insns_start = ctx->ninsns; - emit(rv_lhu(rd, 0, RV_REG_T1), ctx); + if (sign_ext) + emit(rv_lh(rd, 0, RV_REG_T1), ctx); + else + emit(rv_lhu(rd, 0, RV_REG_T1), ctx); insn_len = ctx->ninsns - insns_start; - if (insn_is_zext(&insn[1])) - return 1; break; case BPF_W: if (is_12b_int(off)) { insns_start = ctx->ninsns; - emit(rv_lwu(rd, off, rs), ctx); + if (sign_ext) + emit(rv_lw(rd, off, rs), ctx); + else + emit(rv_lwu(rd, off, rs), ctx); insn_len = ctx->ninsns - insns_start; break; } @@ -1531,10 +1596,11 @@ out_be: emit_imm(RV_REG_T1, off, ctx); emit_add(RV_REG_T1, RV_REG_T1, rs, ctx); insns_start = ctx->ninsns; - emit(rv_lwu(rd, 0, RV_REG_T1), ctx); + if (sign_ext) + emit(rv_lw(rd, 0, RV_REG_T1), ctx); + else + emit(rv_lwu(rd, 0, RV_REG_T1), ctx); insn_len = ctx->ninsns - insns_start; - if (insn_is_zext(&insn[1])) - return 1; break; case BPF_DW: if (is_12b_int(off)) { @@ -1555,6 +1621,9 @@ out_be: ret = add_exception_handler(insn, ctx, rd, insn_len); if (ret) return ret; + + if (BPF_SIZE(code) != BPF_DW && insn_is_zext(&insn[1])) + return 1; break; } /* speculation barrier */ @@ -1691,8 +1760,8 @@ void bpf_jit_build_prologue(struct rv_jit_context *ctx) store_offset = stack_adjust - 8; - /* reserve 4 nop insns */ - for (i = 0; i < 4; i++) + /* nops reserved for auipc+jalr pair */ + for (i = 0; i < RV_FENTRY_NINSNS; i++) emit(rv_nop(), ctx); /* First instruction is always setting the tail-call-counter diff --git a/arch/riscv/net/bpf_jit_core.c b/arch/riscv/net/bpf_jit_core.c index 737baf8715da..7a26a3e1c73c 100644 --- a/arch/riscv/net/bpf_jit_core.c +++ b/arch/riscv/net/bpf_jit_core.c @@ -44,7 +44,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) unsigned int prog_size = 0, extable_size = 0; bool tmp_blinded = false, extra_pass = false; struct bpf_prog *tmp, *orig_prog = prog; - int pass = 0, prev_ninsns = 0, prologue_len, i; + int pass = 0, prev_ninsns = 0, i; struct rv_jit_data *jit_data; struct rv_jit_context *ctx; @@ -83,6 +83,12 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) prog = orig_prog; goto out_offset; } + + if (build_body(ctx, extra_pass, NULL)) { + prog = orig_prog; + goto out_offset; + } + for (i = 0; i < prog->len; i++) { prev_ninsns += 32; ctx->offset[i] = prev_ninsns; @@ -91,12 +97,15 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) for (i = 0; i < NR_JIT_ITERATIONS; i++) { pass++; ctx->ninsns = 0; + + bpf_jit_build_prologue(ctx); + ctx->prologue_len = ctx->ninsns; + if (build_body(ctx, extra_pass, ctx->offset)) { prog = orig_prog; goto out_offset; } - ctx->body_len = ctx->ninsns; - bpf_jit_build_prologue(ctx); + ctx->epilogue_offset = ctx->ninsns; bpf_jit_build_epilogue(ctx); @@ -162,10 +171,8 @@ skip_init_ctx: if (!prog->is_func || extra_pass) { bpf_jit_binary_lock_ro(jit_data->header); - prologue_len = ctx->epilogue_offset - ctx->body_len; for (i = 0; i < prog->len; i++) - ctx->offset[i] = ninsns_rvoff(prologue_len + - ctx->offset[i]); + ctx->offset[i] = ninsns_rvoff(ctx->offset[i]); bpf_prog_fill_jited_linfo(prog, ctx->offset); out_offset: kfree(ctx->offset); |