diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2021-02-26 21:28:35 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2021-02-26 21:28:35 +0300 |
commit | 8b83369ddcb3fb9cab5c1088987ce477565bb630 (patch) | |
tree | 825b1bc89bf731c15c5463befe4e9f14870f0811 /arch/riscv/mm | |
parent | 8f47d753d4ecc6d3e306e22d885d6772625a3423 (diff) | |
parent | d7fbcf40df86bb67193d9faf52138fc1202decb2 (diff) | |
download | linux-8b83369ddcb3fb9cab5c1088987ce477565bb630.tar.xz |
Merge tag 'riscv-for-linus-5.12-mw0' of git://git.kernel.org/pub/scm/linux/kernel/git/riscv/linux
Pull RISC-V updates from Palmer Dabbelt:
"A handful of new RISC-V related patches for this merge window:
- A check to ensure drivers are properly using uaccess. This isn't
manifesting with any of the drivers I'm currently using, but may
catch errors in new drivers.
- Some preliminary support for the FU740, along with the HiFive
Unleashed it will appear on.
- NUMA support for RISC-V, which involves making the arm64 code
generic.
- Support for kasan on the vmalloc region.
- A handful of new drivers for the Kendryte K210, along with the DT
plumbing required to boot on a handful of K210-based boards.
- Support for allocating ASIDs.
- Preliminary support for kernels larger than 128MiB.
- Various other improvements to our KASAN support, including the
utilization of huge pages when allocating the KASAN regions.
We may have already found a bug with the KASAN_VMALLOC code, but it's
passing my tests. There's a fix in the works, but that will probably
miss the merge window.
* tag 'riscv-for-linus-5.12-mw0' of git://git.kernel.org/pub/scm/linux/kernel/git/riscv/linux: (75 commits)
riscv: Improve kasan population by using hugepages when possible
riscv: Improve kasan population function
riscv: Use KASAN_SHADOW_INIT define for kasan memory initialization
riscv: Improve kasan definitions
riscv: Get rid of MAX_EARLY_MAPPING_SIZE
soc: canaan: Sort the Makefile alphabetically
riscv: Disable KSAN_SANITIZE for vDSO
riscv: Remove unnecessary declaration
riscv: Add Canaan Kendryte K210 SD card defconfig
riscv: Update Canaan Kendryte K210 defconfig
riscv: Add Kendryte KD233 board device tree
riscv: Add SiPeed MAIXDUINO board device tree
riscv: Add SiPeed MAIX GO board device tree
riscv: Add SiPeed MAIX DOCK board device tree
riscv: Add SiPeed MAIX BiT board device tree
riscv: Update Canaan Kendryte K210 device tree
dt-bindings: add resets property to dw-apb-timer
dt-bindings: fix sifive gpio properties
dt-bindings: update sifive uart compatible string
dt-bindings: update sifive clint compatible string
...
Diffstat (limited to 'arch/riscv/mm')
-rw-r--r-- | arch/riscv/mm/Makefile | 3 | ||||
-rw-r--r-- | arch/riscv/mm/context.c | 265 | ||||
-rw-r--r-- | arch/riscv/mm/fault.c | 38 | ||||
-rw-r--r-- | arch/riscv/mm/init.c | 108 | ||||
-rw-r--r-- | arch/riscv/mm/kasan_init.c | 176 |
5 files changed, 467 insertions, 123 deletions
diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile index c0185e556ca5..7ebaef10ea1b 100644 --- a/arch/riscv/mm/Makefile +++ b/arch/riscv/mm/Makefile @@ -2,7 +2,8 @@ CFLAGS_init.o := -mcmodel=medany ifdef CONFIG_FTRACE -CFLAGS_REMOVE_init.o = -pg +CFLAGS_REMOVE_init.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_cacheflush.o = $(CC_FLAGS_FTRACE) endif KCOV_INSTRUMENT_init.o := n diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c index 613ec81a8979..68aa312fc352 100644 --- a/arch/riscv/mm/context.c +++ b/arch/riscv/mm/context.c @@ -2,13 +2,273 @@ /* * Copyright (C) 2012 Regents of the University of California * Copyright (C) 2017 SiFive + * Copyright (C) 2021 Western Digital Corporation or its affiliates. */ +#include <linux/bitops.h> +#include <linux/cpumask.h> #include <linux/mm.h> +#include <linux/percpu.h> +#include <linux/slab.h> +#include <linux/spinlock.h> +#include <linux/static_key.h> #include <asm/tlbflush.h> #include <asm/cacheflush.h> #include <asm/mmu_context.h> +#ifdef CONFIG_MMU + +static DEFINE_STATIC_KEY_FALSE(use_asid_allocator); + +static unsigned long asid_bits; +static unsigned long num_asids; +static unsigned long asid_mask; + +static atomic_long_t current_version; + +static DEFINE_RAW_SPINLOCK(context_lock); +static cpumask_t context_tlb_flush_pending; +static unsigned long *context_asid_map; + +static DEFINE_PER_CPU(atomic_long_t, active_context); +static DEFINE_PER_CPU(unsigned long, reserved_context); + +static bool check_update_reserved_context(unsigned long cntx, + unsigned long newcntx) +{ + int cpu; + bool hit = false; + + /* + * Iterate over the set of reserved CONTEXT looking for a match. + * If we find one, then we can update our mm to use new CONTEXT + * (i.e. the same CONTEXT in the current_version) but we can't + * exit the loop early, since we need to ensure that all copies + * of the old CONTEXT are updated to reflect the mm. Failure to do + * so could result in us missing the reserved CONTEXT in a future + * version. + */ + for_each_possible_cpu(cpu) { + if (per_cpu(reserved_context, cpu) == cntx) { + hit = true; + per_cpu(reserved_context, cpu) = newcntx; + } + } + + return hit; +} + +static void __flush_context(void) +{ + int i; + unsigned long cntx; + + /* Must be called with context_lock held */ + lockdep_assert_held(&context_lock); + + /* Update the list of reserved ASIDs and the ASID bitmap. */ + bitmap_clear(context_asid_map, 0, num_asids); + + /* Mark already active ASIDs as used */ + for_each_possible_cpu(i) { + cntx = atomic_long_xchg_relaxed(&per_cpu(active_context, i), 0); + /* + * If this CPU has already been through a rollover, but + * hasn't run another task in the meantime, we must preserve + * its reserved CONTEXT, as this is the only trace we have of + * the process it is still running. + */ + if (cntx == 0) + cntx = per_cpu(reserved_context, i); + + __set_bit(cntx & asid_mask, context_asid_map); + per_cpu(reserved_context, i) = cntx; + } + + /* Mark ASID #0 as used because it is used at boot-time */ + __set_bit(0, context_asid_map); + + /* Queue a TLB invalidation for each CPU on next context-switch */ + cpumask_setall(&context_tlb_flush_pending); +} + +static unsigned long __new_context(struct mm_struct *mm) +{ + static u32 cur_idx = 1; + unsigned long cntx = atomic_long_read(&mm->context.id); + unsigned long asid, ver = atomic_long_read(¤t_version); + + /* Must be called with context_lock held */ + lockdep_assert_held(&context_lock); + + if (cntx != 0) { + unsigned long newcntx = ver | (cntx & asid_mask); + + /* + * If our current CONTEXT was active during a rollover, we + * can continue to use it and this was just a false alarm. + */ + if (check_update_reserved_context(cntx, newcntx)) + return newcntx; + + /* + * We had a valid CONTEXT in a previous life, so try to + * re-use it if possible. + */ + if (!__test_and_set_bit(cntx & asid_mask, context_asid_map)) + return newcntx; + } + + /* + * Allocate a free ASID. If we can't find one then increment + * current_version and flush all ASIDs. + */ + asid = find_next_zero_bit(context_asid_map, num_asids, cur_idx); + if (asid != num_asids) + goto set_asid; + + /* We're out of ASIDs, so increment current_version */ + ver = atomic_long_add_return_relaxed(num_asids, ¤t_version); + + /* Flush everything */ + __flush_context(); + + /* We have more ASIDs than CPUs, so this will always succeed */ + asid = find_next_zero_bit(context_asid_map, num_asids, 1); + +set_asid: + __set_bit(asid, context_asid_map); + cur_idx = asid; + return asid | ver; +} + +static void set_mm_asid(struct mm_struct *mm, unsigned int cpu) +{ + unsigned long flags; + bool need_flush_tlb = false; + unsigned long cntx, old_active_cntx; + + cntx = atomic_long_read(&mm->context.id); + + /* + * If our active_context is non-zero and the context matches the + * current_version, then we update the active_context entry with a + * relaxed cmpxchg. + * + * Following is how we handle racing with a concurrent rollover: + * + * - We get a zero back from the cmpxchg and end up waiting on the + * lock. Taking the lock synchronises with the rollover and so + * we are forced to see the updated verion. + * + * - We get a valid context back from the cmpxchg then we continue + * using old ASID because __flush_context() would have marked ASID + * of active_context as used and next context switch we will + * allocate new context. + */ + old_active_cntx = atomic_long_read(&per_cpu(active_context, cpu)); + if (old_active_cntx && + ((cntx & ~asid_mask) == atomic_long_read(¤t_version)) && + atomic_long_cmpxchg_relaxed(&per_cpu(active_context, cpu), + old_active_cntx, cntx)) + goto switch_mm_fast; + + raw_spin_lock_irqsave(&context_lock, flags); + + /* Check that our ASID belongs to the current_version. */ + cntx = atomic_long_read(&mm->context.id); + if ((cntx & ~asid_mask) != atomic_long_read(¤t_version)) { + cntx = __new_context(mm); + atomic_long_set(&mm->context.id, cntx); + } + + if (cpumask_test_and_clear_cpu(cpu, &context_tlb_flush_pending)) + need_flush_tlb = true; + + atomic_long_set(&per_cpu(active_context, cpu), cntx); + + raw_spin_unlock_irqrestore(&context_lock, flags); + +switch_mm_fast: + csr_write(CSR_SATP, virt_to_pfn(mm->pgd) | + ((cntx & asid_mask) << SATP_ASID_SHIFT) | + SATP_MODE); + + if (need_flush_tlb) + local_flush_tlb_all(); +} + +static void set_mm_noasid(struct mm_struct *mm) +{ + /* Switch the page table and blindly nuke entire local TLB */ + csr_write(CSR_SATP, virt_to_pfn(mm->pgd) | SATP_MODE); + local_flush_tlb_all(); +} + +static inline void set_mm(struct mm_struct *mm, unsigned int cpu) +{ + if (static_branch_unlikely(&use_asid_allocator)) + set_mm_asid(mm, cpu); + else + set_mm_noasid(mm); +} + +static int asids_init(void) +{ + unsigned long old; + + /* Figure-out number of ASID bits in HW */ + old = csr_read(CSR_SATP); + asid_bits = old | (SATP_ASID_MASK << SATP_ASID_SHIFT); + csr_write(CSR_SATP, asid_bits); + asid_bits = (csr_read(CSR_SATP) >> SATP_ASID_SHIFT) & SATP_ASID_MASK; + asid_bits = fls_long(asid_bits); + csr_write(CSR_SATP, old); + + /* + * In the process of determining number of ASID bits (above) + * we polluted the TLB of current HART so let's do TLB flushed + * to remove unwanted TLB enteries. + */ + local_flush_tlb_all(); + + /* Pre-compute ASID details */ + num_asids = 1 << asid_bits; + asid_mask = num_asids - 1; + + /* + * Use ASID allocator only if number of HW ASIDs are + * at-least twice more than CPUs + */ + if (num_asids > (2 * num_possible_cpus())) { + atomic_long_set(¤t_version, num_asids); + + context_asid_map = kcalloc(BITS_TO_LONGS(num_asids), + sizeof(*context_asid_map), GFP_KERNEL); + if (!context_asid_map) + panic("Failed to allocate bitmap for %lu ASIDs\n", + num_asids); + + __set_bit(0, context_asid_map); + + static_branch_enable(&use_asid_allocator); + + pr_info("ASID allocator using %lu bits (%lu entries)\n", + asid_bits, num_asids); + } else { + pr_info("ASID allocator disabled\n"); + } + + return 0; +} +early_initcall(asids_init); +#else +static inline void set_mm(struct mm_struct *mm, unsigned int cpu) +{ + /* Nothing to do here when there is no MMU */ +} +#endif + /* * When necessary, performs a deferred icache flush for the given MM context, * on the local CPU. RISC-V has no direct mechanism for instruction cache @@ -58,10 +318,7 @@ void switch_mm(struct mm_struct *prev, struct mm_struct *next, cpumask_clear_cpu(cpu, mm_cpumask(prev)); cpumask_set_cpu(cpu, mm_cpumask(next)); -#ifdef CONFIG_MMU - csr_write(CSR_SATP, virt_to_pfn(next->pgd) | SATP_MODE); - local_flush_tlb_all(); -#endif + set_mm(next, cpu); flush_icache_deferred(next); } diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c index 3c8b9e433c67..8f17519208c7 100644 --- a/arch/riscv/mm/fault.c +++ b/arch/riscv/mm/fault.c @@ -13,14 +13,30 @@ #include <linux/perf_event.h> #include <linux/signal.h> #include <linux/uaccess.h> +#include <linux/kprobes.h> #include <asm/ptrace.h> #include <asm/tlbflush.h> #include "../kernel/head.h" +static void die_kernel_fault(const char *msg, unsigned long addr, + struct pt_regs *regs) +{ + bust_spinlocks(1); + + pr_alert("Unable to handle kernel %s at virtual address " REG_FMT "\n", msg, + addr); + + bust_spinlocks(0); + die(regs, "Oops"); + do_exit(SIGKILL); +} + static inline void no_context(struct pt_regs *regs, unsigned long addr) { + const char *msg; + /* Are we prepared to handle this kernel fault? */ if (fixup_exception(regs)) return; @@ -29,12 +45,8 @@ static inline void no_context(struct pt_regs *regs, unsigned long addr) * Oops. The kernel tried to access some bad page. We'll have to * terminate things with extreme prejudice. */ - bust_spinlocks(1); - pr_alert("Unable to handle kernel %s at virtual address " REG_FMT "\n", - (addr < PAGE_SIZE) ? "NULL pointer dereference" : - "paging request", addr); - die(regs, "Oops"); - do_exit(SIGKILL); + msg = (addr < PAGE_SIZE) ? "NULL pointer dereference" : "paging request"; + die_kernel_fault(msg, addr, regs); } static inline void mm_fault_error(struct pt_regs *regs, unsigned long addr, vm_fault_t fault) @@ -202,6 +214,9 @@ asmlinkage void do_page_fault(struct pt_regs *regs) tsk = current; mm = tsk->mm; + if (kprobe_page_fault(regs, cause)) + return; + /* * Fault-in kernel-space virtual memory on-demand. * The 'reference' page table is init_mm.pgd. @@ -225,6 +240,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs) * in an atomic region, then we must not take the fault. */ if (unlikely(faulthandler_disabled() || !mm)) { + tsk->thread.bad_cause = cause; no_context(regs, addr); return; } @@ -232,6 +248,11 @@ asmlinkage void do_page_fault(struct pt_regs *regs) if (user_mode(regs)) flags |= FAULT_FLAG_USER; + if (!user_mode(regs) && addr < TASK_SIZE && + unlikely(!(regs->status & SR_SUM))) + die_kernel_fault("access to user memory without uaccess routines", + addr, regs); + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); if (cause == EXC_STORE_PAGE_FAULT) @@ -242,16 +263,19 @@ retry: mmap_read_lock(mm); vma = find_vma(mm, addr); if (unlikely(!vma)) { + tsk->thread.bad_cause = cause; bad_area(regs, mm, code, addr); return; } if (likely(vma->vm_start <= addr)) goto good_area; if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) { + tsk->thread.bad_cause = cause; bad_area(regs, mm, code, addr); return; } if (unlikely(expand_stack(vma, addr))) { + tsk->thread.bad_cause = cause; bad_area(regs, mm, code, addr); return; } @@ -264,6 +288,7 @@ good_area: code = SEGV_ACCERR; if (unlikely(access_error(cause, vma))) { + tsk->thread.bad_cause = cause; bad_area(regs, mm, code, addr); return; } @@ -297,6 +322,7 @@ good_area: mmap_read_unlock(mm); if (unlikely(fault & VM_FAULT_ERROR)) { + tsk->thread.bad_cause = cause; mm_fault_error(regs, addr, fault); return; } diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index f9f9568d689e..9dfb8b2dffd6 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -21,6 +21,7 @@ #include <asm/soc.h> #include <asm/io.h> #include <asm/ptdump.h> +#include <asm/numa.h> #include "../kernel/head.h" @@ -105,55 +106,6 @@ void __init mem_init(void) print_vm_layout(); } -#ifdef CONFIG_BLK_DEV_INITRD -static void __init setup_initrd(void) -{ - phys_addr_t start; - unsigned long size; - - /* Ignore the virtul address computed during device tree parsing */ - initrd_start = initrd_end = 0; - - if (!phys_initrd_size) - return; - /* - * Round the memory region to page boundaries as per free_initrd_mem() - * This allows us to detect whether the pages overlapping the initrd - * are in use, but more importantly, reserves the entire set of pages - * as we don't want these pages allocated for other purposes. - */ - start = round_down(phys_initrd_start, PAGE_SIZE); - size = phys_initrd_size + (phys_initrd_start - start); - size = round_up(size, PAGE_SIZE); - - if (!memblock_is_region_memory(start, size)) { - pr_err("INITRD: 0x%08llx+0x%08lx is not a memory region", - (u64)start, size); - goto disable; - } - - if (memblock_is_region_reserved(start, size)) { - pr_err("INITRD: 0x%08llx+0x%08lx overlaps in-use memory region\n", - (u64)start, size); - goto disable; - } - - memblock_reserve(start, size); - /* Now convert initrd to virtual addresses */ - initrd_start = (unsigned long)__va(phys_initrd_start); - initrd_end = initrd_start + phys_initrd_size; - initrd_below_start_ok = 1; - - pr_info("Initial ramdisk at: 0x%p (%lu bytes)\n", - (void *)(initrd_start), size); - return; -disable: - pr_cont(" - disabling initrd\n"); - initrd_start = 0; - initrd_end = 0; -} -#endif /* CONFIG_BLK_DEV_INITRD */ - void __init setup_bootmem(void) { phys_addr_t mem_start = 0; @@ -198,20 +150,19 @@ void __init setup_bootmem(void) dma32_phys_limit = min(4UL * SZ_1G, (unsigned long)PFN_PHYS(max_low_pfn)); set_max_mapnr(max_low_pfn - ARCH_PFN_OFFSET); -#ifdef CONFIG_BLK_DEV_INITRD - setup_initrd(); -#endif /* CONFIG_BLK_DEV_INITRD */ - + reserve_initrd_mem(); /* - * Avoid using early_init_fdt_reserve_self() since __pa() does + * If DTB is built in, no need to reserve its memblock. + * Otherwise, do reserve it but avoid using + * early_init_fdt_reserve_self() since __pa() does * not work for DTB pointers that are fixmap addresses */ - memblock_reserve(dtb_early_pa, fdt_totalsize(dtb_early_va)); + if (!IS_ENABLED(CONFIG_BUILTIN_DTB)) + memblock_reserve(dtb_early_pa, fdt_totalsize(dtb_early_va)); early_init_fdt_scan_reserved_mem(); dma_contiguous_reserve(dma32_phys_limit); memblock_allow_resize(); - memblock_dump_all(); } #ifdef CONFIG_MMU @@ -226,8 +177,6 @@ pgd_t swapper_pg_dir[PTRS_PER_PGD] __page_aligned_bss; pgd_t trampoline_pg_dir[PTRS_PER_PGD] __page_aligned_bss; pte_t fixmap_pte[PTRS_PER_PTE] __page_aligned_bss; -#define MAX_EARLY_MAPPING_SIZE SZ_128M - pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE); void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot) @@ -302,13 +251,7 @@ static void __init create_pte_mapping(pte_t *ptep, pmd_t trampoline_pmd[PTRS_PER_PMD] __page_aligned_bss; pmd_t fixmap_pmd[PTRS_PER_PMD] __page_aligned_bss; - -#if MAX_EARLY_MAPPING_SIZE < PGDIR_SIZE -#define NUM_EARLY_PMDS 1UL -#else -#define NUM_EARLY_PMDS (1UL + MAX_EARLY_MAPPING_SIZE / PGDIR_SIZE) -#endif -pmd_t early_pmd[PTRS_PER_PMD * NUM_EARLY_PMDS] __initdata __aligned(PAGE_SIZE); +pmd_t early_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE); pmd_t early_dtb_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE); static pmd_t *__init get_pmd_virt_early(phys_addr_t pa) @@ -330,11 +273,9 @@ static pmd_t *get_pmd_virt_late(phys_addr_t pa) static phys_addr_t __init alloc_pmd_early(uintptr_t va) { - uintptr_t pmd_num; + BUG_ON((va - PAGE_OFFSET) >> PGDIR_SHIFT); - pmd_num = (va - PAGE_OFFSET) >> PGDIR_SHIFT; - BUG_ON(pmd_num >= NUM_EARLY_PMDS); - return (uintptr_t)&early_pmd[pmd_num * PTRS_PER_PMD]; + return (uintptr_t)early_pmd; } static phys_addr_t __init alloc_pmd_fixmap(uintptr_t va) @@ -452,7 +393,7 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) uintptr_t va, pa, end_va; uintptr_t load_pa = (uintptr_t)(&_start); uintptr_t load_sz = (uintptr_t)(&_end) - load_pa; - uintptr_t map_size = best_map_size(load_pa, MAX_EARLY_MAPPING_SIZE); + uintptr_t map_size; #ifndef __PAGETABLE_PMD_FOLDED pmd_t fix_bmap_spmd, fix_bmap_epmd; #endif @@ -464,12 +405,11 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) * Enforce boot alignment requirements of RV32 and * RV64 by only allowing PMD or PGD mappings. */ - BUG_ON(map_size == PAGE_SIZE); + map_size = PMD_SIZE; /* Sanity check alignment and size */ BUG_ON((PAGE_OFFSET % PGDIR_SIZE) != 0); BUG_ON((load_pa % map_size) != 0); - BUG_ON(load_sz > MAX_EARLY_MAPPING_SIZE); pt_ops.alloc_pte = alloc_pte_early; pt_ops.get_pte_virt = get_pte_virt_early; @@ -511,6 +451,7 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) /* Setup early PMD for DTB */ create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA, (uintptr_t)early_dtb_pmd, PGDIR_SIZE, PAGE_TABLE); +#ifndef CONFIG_BUILTIN_DTB /* Create two consecutive PMD mappings for FDT early scan */ pa = dtb_pa & ~(PMD_SIZE - 1); create_pmd_mapping(early_dtb_pmd, DTB_EARLY_BASE_VA, @@ -518,7 +459,11 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) create_pmd_mapping(early_dtb_pmd, DTB_EARLY_BASE_VA + PMD_SIZE, pa + PMD_SIZE, PMD_SIZE, PAGE_KERNEL); dtb_early_va = (void *)DTB_EARLY_BASE_VA + (dtb_pa & (PMD_SIZE - 1)); +#else /* CONFIG_BUILTIN_DTB */ + dtb_early_va = __va(dtb_pa); +#endif /* CONFIG_BUILTIN_DTB */ #else +#ifndef CONFIG_BUILTIN_DTB /* Create two consecutive PGD mappings for FDT early scan */ pa = dtb_pa & ~(PGDIR_SIZE - 1); create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA, @@ -526,6 +471,9 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA + PGDIR_SIZE, pa + PGDIR_SIZE, PGDIR_SIZE, PAGE_KERNEL); dtb_early_va = (void *)DTB_EARLY_BASE_VA + (dtb_pa & (PGDIR_SIZE - 1)); +#else /* CONFIG_BUILTIN_DTB */ + dtb_early_va = __va(dtb_pa); +#endif /* CONFIG_BUILTIN_DTB */ #endif dtb_early_pa = dtb_pa; @@ -616,15 +564,7 @@ static void __init setup_vm_final(void) #else asmlinkage void __init setup_vm(uintptr_t dtb_pa) { -#ifdef CONFIG_BUILTIN_DTB - dtb_early_va = soc_lookup_builtin_dtb(); - if (!dtb_early_va) { - /* Fallback to first available DTS */ - dtb_early_va = (void *) __dtb_start; - } -#else dtb_early_va = (void *)dtb_pa; -#endif dtb_early_pa = dtb_pa; } @@ -665,9 +605,15 @@ void mark_rodata_ro(void) void __init paging_init(void) { setup_vm_final(); - sparse_init(); setup_zero_page(); +} + +void __init misc_mem_init(void) +{ + arch_numa_init(); + sparse_init(); zone_sizes_init(); + memblock_dump_all(); } #ifdef CONFIG_SPARSEMEM_VMEMMAP diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c index a8a2ffd9114a..3fc18f469efb 100644 --- a/arch/riscv/mm/kasan_init.c +++ b/arch/riscv/mm/kasan_init.c @@ -9,6 +9,19 @@ #include <linux/pgtable.h> #include <asm/tlbflush.h> #include <asm/fixmap.h> +#include <asm/pgalloc.h> + +static __init void *early_alloc(size_t size, int node) +{ + void *ptr = memblock_alloc_try_nid(size, size, + __pa(MAX_DMA_ADDRESS), MEMBLOCK_ALLOC_ACCESSIBLE, node); + + if (!ptr) + panic("%pS: Failed to allocate %zu bytes align=%zx nid=%d from=%llx\n", + __func__, size, size, node, (u64)__pa(MAX_DMA_ADDRESS)); + + return ptr; +} extern pgd_t early_pg_dir[PTRS_PER_PGD]; asmlinkage void __init kasan_early_init(void) @@ -47,40 +60,133 @@ asmlinkage void __init kasan_early_init(void) local_flush_tlb_all(); } -static void __init populate(void *start, void *end) +static void kasan_populate_pte(pmd_t *pmd, unsigned long vaddr, unsigned long end) +{ + phys_addr_t phys_addr; + pte_t *ptep, *base_pte; + + if (pmd_none(*pmd)) + base_pte = memblock_alloc(PTRS_PER_PTE * sizeof(pte_t), PAGE_SIZE); + else + base_pte = (pte_t *)pmd_page_vaddr(*pmd); + + ptep = base_pte + pte_index(vaddr); + + do { + if (pte_none(*ptep)) { + phys_addr = memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE); + set_pte(ptep, pfn_pte(PFN_DOWN(phys_addr), PAGE_KERNEL)); + } + } while (ptep++, vaddr += PAGE_SIZE, vaddr != end); + + set_pmd(pmd, pfn_pmd(PFN_DOWN(__pa(base_pte)), PAGE_TABLE)); +} + +static void kasan_populate_pmd(pgd_t *pgd, unsigned long vaddr, unsigned long end) +{ + phys_addr_t phys_addr; + pmd_t *pmdp, *base_pmd; + unsigned long next; + + base_pmd = (pmd_t *)pgd_page_vaddr(*pgd); + if (base_pmd == lm_alias(kasan_early_shadow_pmd)) + base_pmd = memblock_alloc(PTRS_PER_PMD * sizeof(pmd_t), PAGE_SIZE); + + pmdp = base_pmd + pmd_index(vaddr); + + do { + next = pmd_addr_end(vaddr, end); + + if (pmd_none(*pmdp) && IS_ALIGNED(vaddr, PMD_SIZE) && (next - vaddr) >= PMD_SIZE) { + phys_addr = memblock_phys_alloc(PMD_SIZE, PMD_SIZE); + if (phys_addr) { + set_pmd(pmdp, pfn_pmd(PFN_DOWN(phys_addr), PAGE_KERNEL)); + continue; + } + } + + kasan_populate_pte(pmdp, vaddr, next); + } while (pmdp++, vaddr = next, vaddr != end); + + /* + * Wait for the whole PGD to be populated before setting the PGD in + * the page table, otherwise, if we did set the PGD before populating + * it entirely, memblock could allocate a page at a physical address + * where KASAN is not populated yet and then we'd get a page fault. + */ + set_pgd(pgd, pfn_pgd(PFN_DOWN(__pa(base_pmd)), PAGE_TABLE)); +} + +static void kasan_populate_pgd(unsigned long vaddr, unsigned long end) +{ + phys_addr_t phys_addr; + pgd_t *pgdp = pgd_offset_k(vaddr); + unsigned long next; + + do { + next = pgd_addr_end(vaddr, end); + + /* + * pgdp can't be none since kasan_early_init initialized all KASAN + * shadow region with kasan_early_shadow_pmd: if this is stillthe case, + * that means we can try to allocate a hugepage as a replacement. + */ + if (pgd_page_vaddr(*pgdp) == (unsigned long)lm_alias(kasan_early_shadow_pmd) && + IS_ALIGNED(vaddr, PGDIR_SIZE) && (next - vaddr) >= PGDIR_SIZE) { + phys_addr = memblock_phys_alloc(PGDIR_SIZE, PGDIR_SIZE); + if (phys_addr) { + set_pgd(pgdp, pfn_pgd(PFN_DOWN(phys_addr), PAGE_KERNEL)); + continue; + } + } + + kasan_populate_pmd(pgdp, vaddr, next); + } while (pgdp++, vaddr = next, vaddr != end); +} + +static void __init kasan_populate(void *start, void *end) { - unsigned long i, offset; unsigned long vaddr = (unsigned long)start & PAGE_MASK; unsigned long vend = PAGE_ALIGN((unsigned long)end); - unsigned long n_pages = (vend - vaddr) / PAGE_SIZE; - unsigned long n_ptes = - ((n_pages + PTRS_PER_PTE) & -PTRS_PER_PTE) / PTRS_PER_PTE; - unsigned long n_pmds = - ((n_ptes + PTRS_PER_PMD) & -PTRS_PER_PMD) / PTRS_PER_PMD; - - pte_t *pte = - memblock_alloc(n_ptes * PTRS_PER_PTE * sizeof(pte_t), PAGE_SIZE); - pmd_t *pmd = - memblock_alloc(n_pmds * PTRS_PER_PMD * sizeof(pmd_t), PAGE_SIZE); - pgd_t *pgd = pgd_offset_k(vaddr); - - for (i = 0; i < n_pages; i++) { - phys_addr_t phys = memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE); - set_pte(&pte[i], pfn_pte(PHYS_PFN(phys), PAGE_KERNEL)); - } - - for (i = 0, offset = 0; i < n_ptes; i++, offset += PTRS_PER_PTE) - set_pmd(&pmd[i], - pfn_pmd(PFN_DOWN(__pa(&pte[offset])), - __pgprot(_PAGE_TABLE))); - for (i = 0, offset = 0; i < n_pmds; i++, offset += PTRS_PER_PMD) - set_pgd(&pgd[i], - pfn_pgd(PFN_DOWN(__pa(&pmd[offset])), - __pgprot(_PAGE_TABLE))); + kasan_populate_pgd(vaddr, vend); local_flush_tlb_all(); - memset(start, 0, end - start); + memset(start, KASAN_SHADOW_INIT, end - start); +} + +void __init kasan_shallow_populate(void *start, void *end) +{ + unsigned long vaddr = (unsigned long)start & PAGE_MASK; + unsigned long vend = PAGE_ALIGN((unsigned long)end); + unsigned long pfn; + int index; + void *p; + pud_t *pud_dir, *pud_k; + pgd_t *pgd_dir, *pgd_k; + p4d_t *p4d_dir, *p4d_k; + + while (vaddr < vend) { + index = pgd_index(vaddr); + pfn = csr_read(CSR_SATP) & SATP_PPN; + pgd_dir = (pgd_t *)pfn_to_virt(pfn) + index; + pgd_k = init_mm.pgd + index; + pgd_dir = pgd_offset_k(vaddr); + set_pgd(pgd_dir, *pgd_k); + + p4d_dir = p4d_offset(pgd_dir, vaddr); + p4d_k = p4d_offset(pgd_k, vaddr); + + vaddr = (vaddr + PUD_SIZE) & PUD_MASK; + pud_dir = pud_offset(p4d_dir, vaddr); + pud_k = pud_offset(p4d_k, vaddr); + + if (pud_present(*pud_dir)) { + p = early_alloc(PAGE_SIZE, NUMA_NO_NODE); + pud_populate(&init_mm, pud_dir, p); + } + vaddr += PAGE_SIZE; + } } void __init kasan_init(void) @@ -90,7 +196,15 @@ void __init kasan_init(void) kasan_populate_early_shadow((void *)KASAN_SHADOW_START, (void *)kasan_mem_to_shadow((void *) - VMALLOC_END)); + VMEMMAP_END)); + if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) + kasan_shallow_populate( + (void *)kasan_mem_to_shadow((void *)VMALLOC_START), + (void *)kasan_mem_to_shadow((void *)VMALLOC_END)); + else + kasan_populate_early_shadow( + (void *)kasan_mem_to_shadow((void *)VMALLOC_START), + (void *)kasan_mem_to_shadow((void *)VMALLOC_END)); for_each_mem_range(i, &_start, &_end) { void *start = (void *)__va(_start); @@ -99,7 +213,7 @@ void __init kasan_init(void) if (start >= end) break; - populate(kasan_mem_to_shadow(start), kasan_mem_to_shadow(end)); + kasan_populate(kasan_mem_to_shadow(start), kasan_mem_to_shadow(end)); }; for (i = 0; i < PTRS_PER_PTE; i++) @@ -108,6 +222,6 @@ void __init kasan_init(void) __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_ACCESSED))); - memset(kasan_early_shadow_page, 0, PAGE_SIZE); + memset(kasan_early_shadow_page, KASAN_SHADOW_INIT, PAGE_SIZE); init_task.kasan_depth = 0; } |