diff options
Diffstat (limited to 'arch/riscv/mm')
-rw-r--r-- | arch/riscv/mm/fault.c | 356 | ||||
-rw-r--r-- | arch/riscv/mm/init.c | 276 | ||||
-rw-r--r-- | arch/riscv/mm/kasan_init.c | 10 | ||||
-rw-r--r-- | arch/riscv/mm/pageattr.c | 6 | ||||
-rw-r--r-- | arch/riscv/mm/ptdump.c | 48 |
5 files changed, 425 insertions, 271 deletions
diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c index 716d64e36f83..1359e21c0c62 100644 --- a/arch/riscv/mm/fault.c +++ b/arch/riscv/mm/fault.c @@ -19,6 +19,167 @@ #include "../kernel/head.h" +static inline void no_context(struct pt_regs *regs, unsigned long addr) +{ + /* Are we prepared to handle this kernel fault? */ + if (fixup_exception(regs)) + return; + + /* + * Oops. The kernel tried to access some bad page. We'll have to + * terminate things with extreme prejudice. + */ + bust_spinlocks(1); + pr_alert("Unable to handle kernel %s at virtual address " REG_FMT "\n", + (addr < PAGE_SIZE) ? "NULL pointer dereference" : + "paging request", addr); + die(regs, "Oops"); + do_exit(SIGKILL); +} + +static inline void mm_fault_error(struct pt_regs *regs, unsigned long addr, vm_fault_t fault) +{ + if (fault & VM_FAULT_OOM) { + /* + * We ran out of memory, call the OOM killer, and return the userspace + * (which will retry the fault, or kill us if we got oom-killed). + */ + if (!user_mode(regs)) { + no_context(regs, addr); + return; + } + pagefault_out_of_memory(); + return; + } else if (fault & VM_FAULT_SIGBUS) { + /* Kernel mode? Handle exceptions or die */ + if (!user_mode(regs)) { + no_context(regs, addr); + return; + } + do_trap(regs, SIGBUS, BUS_ADRERR, addr); + return; + } + BUG(); +} + +static inline void bad_area(struct pt_regs *regs, struct mm_struct *mm, int code, unsigned long addr) +{ + /* + * Something tried to access memory that isn't in our memory map. + * Fix it, but check if it's kernel or user first. + */ + mmap_read_unlock(mm); + /* User mode accesses just cause a SIGSEGV */ + if (user_mode(regs)) { + do_trap(regs, SIGSEGV, code, addr); + return; + } + + no_context(regs, addr); +} + +static inline void vmalloc_fault(struct pt_regs *regs, int code, unsigned long addr) +{ + pgd_t *pgd, *pgd_k; + pud_t *pud, *pud_k; + p4d_t *p4d, *p4d_k; + pmd_t *pmd, *pmd_k; + pte_t *pte_k; + int index; + + /* User mode accesses just cause a SIGSEGV */ + if (user_mode(regs)) + return do_trap(regs, SIGSEGV, code, addr); + + /* + * Synchronize this task's top level page-table + * with the 'reference' page table. + * + * Do _not_ use "tsk->active_mm->pgd" here. + * We might be inside an interrupt in the middle + * of a task switch. + */ + index = pgd_index(addr); + pgd = (pgd_t *)pfn_to_virt(csr_read(CSR_SATP)) + index; + pgd_k = init_mm.pgd + index; + + if (!pgd_present(*pgd_k)) { + no_context(regs, addr); + return; + } + set_pgd(pgd, *pgd_k); + + p4d = p4d_offset(pgd, addr); + p4d_k = p4d_offset(pgd_k, addr); + if (!p4d_present(*p4d_k)) { + no_context(regs, addr); + return; + } + + pud = pud_offset(p4d, addr); + pud_k = pud_offset(p4d_k, addr); + if (!pud_present(*pud_k)) { + no_context(regs, addr); + return; + } + + /* + * Since the vmalloc area is global, it is unnecessary + * to copy individual PTEs + */ + pmd = pmd_offset(pud, addr); + pmd_k = pmd_offset(pud_k, addr); + if (!pmd_present(*pmd_k)) { + no_context(regs, addr); + return; + } + set_pmd(pmd, *pmd_k); + + /* + * Make sure the actual PTE exists as well to + * catch kernel vmalloc-area accesses to non-mapped + * addresses. If we don't do this, this will just + * silently loop forever. + */ + pte_k = pte_offset_kernel(pmd_k, addr); + if (!pte_present(*pte_k)) { + no_context(regs, addr); + return; + } + + /* + * The kernel assumes that TLBs don't cache invalid + * entries, but in RISC-V, SFENCE.VMA specifies an + * ordering constraint, not a cache flush; it is + * necessary even after writing invalid entries. + */ + local_flush_tlb_page(addr); +} + +static inline bool access_error(unsigned long cause, struct vm_area_struct *vma) +{ + switch (cause) { + case EXC_INST_PAGE_FAULT: + if (!(vma->vm_flags & VM_EXEC)) { + return true; + } + break; + case EXC_LOAD_PAGE_FAULT: + if (!(vma->vm_flags & VM_READ)) { + return true; + } + break; + case EXC_STORE_PAGE_FAULT: + if (!(vma->vm_flags & VM_WRITE)) { + return true; + } + break; + default: + panic("%s: unhandled cause %lu", __func__, cause); + } + return false; +} + /* * This routine handles page faults. It determines the address and the * problem, and then passes it off to one of the appropriate routines. @@ -48,8 +209,10 @@ asmlinkage void do_page_fault(struct pt_regs *regs) * only copy the information from the master page table, * nothing more. */ - if (unlikely((addr >= VMALLOC_START) && (addr <= VMALLOC_END))) - goto vmalloc_fault; + if (unlikely((addr >= VMALLOC_START) && (addr <= VMALLOC_END))) { + vmalloc_fault(regs, code, addr); + return; + } /* Enable interrupts if they were enabled in the parent context. */ if (likely(regs->status & SR_PIE)) @@ -59,25 +222,37 @@ asmlinkage void do_page_fault(struct pt_regs *regs) * If we're in an interrupt, have no user context, or are running * in an atomic region, then we must not take the fault. */ - if (unlikely(faulthandler_disabled() || !mm)) - goto no_context; + if (unlikely(faulthandler_disabled() || !mm)) { + no_context(regs, addr); + return; + } if (user_mode(regs)) flags |= FAULT_FLAG_USER; perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); + if (cause == EXC_STORE_PAGE_FAULT) + flags |= FAULT_FLAG_WRITE; + else if (cause == EXC_INST_PAGE_FAULT) + flags |= FAULT_FLAG_INSTRUCTION; retry: mmap_read_lock(mm); vma = find_vma(mm, addr); - if (unlikely(!vma)) - goto bad_area; + if (unlikely(!vma)) { + bad_area(regs, mm, code, addr); + return; + } if (likely(vma->vm_start <= addr)) goto good_area; - if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) - goto bad_area; - if (unlikely(expand_stack(vma, addr))) - goto bad_area; + if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) { + bad_area(regs, mm, code, addr); + return; + } + if (unlikely(expand_stack(vma, addr))) { + bad_area(regs, mm, code, addr); + return; + } /* * Ok, we have a good vm_area for this memory access, so @@ -86,22 +261,9 @@ retry: good_area: code = SEGV_ACCERR; - switch (cause) { - case EXC_INST_PAGE_FAULT: - if (!(vma->vm_flags & VM_EXEC)) - goto bad_area; - break; - case EXC_LOAD_PAGE_FAULT: - if (!(vma->vm_flags & VM_READ)) - goto bad_area; - break; - case EXC_STORE_PAGE_FAULT: - if (!(vma->vm_flags & VM_WRITE)) - goto bad_area; - flags |= FAULT_FLAG_WRITE; - break; - default: - panic("%s: unhandled cause %lu", __func__, cause); + if (unlikely(access_error(cause, vma))) { + bad_area(regs, mm, code, addr); + return; } /* @@ -119,144 +281,22 @@ good_area: if (fault_signal_pending(fault, regs)) return; - if (unlikely(fault & VM_FAULT_ERROR)) { - if (fault & VM_FAULT_OOM) - goto out_of_memory; - else if (fault & VM_FAULT_SIGBUS) - goto do_sigbus; - BUG(); - } - - if (flags & FAULT_FLAG_ALLOW_RETRY) { - if (fault & VM_FAULT_RETRY) { - flags |= FAULT_FLAG_TRIED; + if (unlikely((fault & VM_FAULT_RETRY) && (flags & FAULT_FLAG_ALLOW_RETRY))) { + flags |= FAULT_FLAG_TRIED; - /* - * No need to mmap_read_unlock(mm) as we would - * have already released it in __lock_page_or_retry - * in mm/filemap.c. - */ - goto retry; - } + /* + * No need to mmap_read_unlock(mm) as we would + * have already released it in __lock_page_or_retry + * in mm/filemap.c. + */ + goto retry; } mmap_read_unlock(mm); - return; - /* - * Something tried to access memory that isn't in our memory map. - * Fix it, but check if it's kernel or user first. - */ -bad_area: - mmap_read_unlock(mm); - /* User mode accesses just cause a SIGSEGV */ - if (user_mode(regs)) { - do_trap(regs, SIGSEGV, code, addr); + if (unlikely(fault & VM_FAULT_ERROR)) { + mm_fault_error(regs, addr, fault); return; } - -no_context: - /* Are we prepared to handle this kernel fault? */ - if (fixup_exception(regs)) - return; - - /* - * Oops. The kernel tried to access some bad page. We'll have to - * terminate things with extreme prejudice. - */ - bust_spinlocks(1); - pr_alert("Unable to handle kernel %s at virtual address " REG_FMT "\n", - (addr < PAGE_SIZE) ? "NULL pointer dereference" : - "paging request", addr); - die(regs, "Oops"); - do_exit(SIGKILL); - - /* - * We ran out of memory, call the OOM killer, and return the userspace - * (which will retry the fault, or kill us if we got oom-killed). - */ -out_of_memory: - mmap_read_unlock(mm); - if (!user_mode(regs)) - goto no_context; - pagefault_out_of_memory(); - return; - -do_sigbus: - mmap_read_unlock(mm); - /* Kernel mode? Handle exceptions or die */ - if (!user_mode(regs)) - goto no_context; - do_trap(regs, SIGBUS, BUS_ADRERR, addr); return; - -vmalloc_fault: - { - pgd_t *pgd, *pgd_k; - pud_t *pud, *pud_k; - p4d_t *p4d, *p4d_k; - pmd_t *pmd, *pmd_k; - pte_t *pte_k; - int index; - - /* User mode accesses just cause a SIGSEGV */ - if (user_mode(regs)) - return do_trap(regs, SIGSEGV, code, addr); - - /* - * Synchronize this task's top level page-table - * with the 'reference' page table. - * - * Do _not_ use "tsk->active_mm->pgd" here. - * We might be inside an interrupt in the middle - * of a task switch. - */ - index = pgd_index(addr); - pgd = (pgd_t *)pfn_to_virt(csr_read(CSR_SATP)) + index; - pgd_k = init_mm.pgd + index; - - if (!pgd_present(*pgd_k)) - goto no_context; - set_pgd(pgd, *pgd_k); - - p4d = p4d_offset(pgd, addr); - p4d_k = p4d_offset(pgd_k, addr); - if (!p4d_present(*p4d_k)) - goto no_context; - - pud = pud_offset(p4d, addr); - pud_k = pud_offset(p4d_k, addr); - if (!pud_present(*pud_k)) - goto no_context; - - /* - * Since the vmalloc area is global, it is unnecessary - * to copy individual PTEs - */ - pmd = pmd_offset(pud, addr); - pmd_k = pmd_offset(pud_k, addr); - if (!pmd_present(*pmd_k)) - goto no_context; - set_pmd(pmd, *pmd_k); - - /* - * Make sure the actual PTE exists as well to - * catch kernel vmalloc-area accesses to non-mapped - * addresses. If we don't do this, this will just - * silently loop forever. - */ - pte_k = pte_offset_kernel(pmd_k, addr); - if (!pte_present(*pte_k)) - goto no_context; - - /* - * The kernel assumes that TLBs don't cache invalid - * entries, but in RISC-V, SFENCE.VMA specifies an - * ordering constraint, not a cache flush; it is - * necessary even after writing invalid entries. - */ - local_flush_tlb_page(addr); - - return; - } } diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index f750e012dbe5..87c305c566ac 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -13,6 +13,7 @@ #include <linux/of_fdt.h> #include <linux/libfdt.h> #include <linux/set_memory.h> +#include <linux/dma-map-ops.h> #include <asm/fixmap.h> #include <asm/tlbflush.h> @@ -28,15 +29,27 @@ unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] EXPORT_SYMBOL(empty_zero_page); extern char _start[]; -void *dtb_early_va; +#define DTB_EARLY_BASE_VA PGDIR_SIZE +void *dtb_early_va __initdata; +uintptr_t dtb_early_pa __initdata; + +struct pt_alloc_ops { + pte_t *(*get_pte_virt)(phys_addr_t pa); + phys_addr_t (*alloc_pte)(uintptr_t va); +#ifndef __PAGETABLE_PMD_FOLDED + pmd_t *(*get_pmd_virt)(phys_addr_t pa); + phys_addr_t (*alloc_pmd)(uintptr_t va); +#endif +}; + +static phys_addr_t dma32_phys_limit __ro_after_init; static void __init zone_sizes_init(void) { unsigned long max_zone_pfns[MAX_NR_ZONES] = { 0, }; #ifdef CONFIG_ZONE_DMA32 - max_zone_pfns[ZONE_DMA32] = PFN_DOWN(min(4UL * SZ_1G, - (unsigned long) PFN_PHYS(max_low_pfn))); + max_zone_pfns[ZONE_DMA32] = PFN_DOWN(dma32_phys_limit); #endif max_zone_pfns[ZONE_NORMAL] = max_low_pfn; @@ -141,25 +154,23 @@ disable: } #endif /* CONFIG_BLK_DEV_INITRD */ -static phys_addr_t dtb_early_pa __initdata; - void __init setup_bootmem(void) { - struct memblock_region *reg; phys_addr_t mem_size = 0; phys_addr_t total_mem = 0; - phys_addr_t mem_start, end = 0; + phys_addr_t mem_start, start, end = 0; phys_addr_t vmlinux_end = __pa_symbol(&_end); phys_addr_t vmlinux_start = __pa_symbol(&_start); + u64 i; /* Find the memory region containing the kernel */ - for_each_memblock(memory, reg) { - end = reg->base + reg->size; + for_each_mem_range(i, &start, &end) { + phys_addr_t size = end - start; if (!total_mem) - mem_start = reg->base; - if (reg->base <= vmlinux_start && vmlinux_end <= end) - BUG_ON(reg->size == 0); - total_mem = total_mem + reg->size; + mem_start = start; + if (start <= vmlinux_start && vmlinux_end <= end) + BUG_ON(size == 0); + total_mem = total_mem + size; } /* @@ -176,6 +187,7 @@ void __init setup_bootmem(void) max_pfn = PFN_DOWN(memblock_end_of_DRAM()); max_low_pfn = max_pfn; + dma32_phys_limit = min(4UL * SZ_1G, (unsigned long)PFN_PHYS(max_low_pfn)); set_max_mapnr(max_low_pfn); #ifdef CONFIG_BLK_DEV_INITRD @@ -189,20 +201,14 @@ void __init setup_bootmem(void) memblock_reserve(dtb_early_pa, fdt_totalsize(dtb_early_va)); early_init_fdt_scan_reserved_mem(); + dma_contiguous_reserve(dma32_phys_limit); memblock_allow_resize(); memblock_dump_all(); - - for_each_memblock(memory, reg) { - unsigned long start_pfn = memblock_region_memory_base_pfn(reg); - unsigned long end_pfn = memblock_region_memory_end_pfn(reg); - - memblock_set_node(PFN_PHYS(start_pfn), - PFN_PHYS(end_pfn - start_pfn), - &memblock.memory, 0); - } } #ifdef CONFIG_MMU +static struct pt_alloc_ops pt_ops; + unsigned long va_pa_offset; EXPORT_SYMBOL(va_pa_offset); unsigned long pfn_base; @@ -211,7 +217,6 @@ EXPORT_SYMBOL(pfn_base); pgd_t swapper_pg_dir[PTRS_PER_PGD] __page_aligned_bss; pgd_t trampoline_pg_dir[PTRS_PER_PGD] __page_aligned_bss; pte_t fixmap_pte[PTRS_PER_PTE] __page_aligned_bss; -static bool mmu_enabled; #define MAX_EARLY_MAPPING_SIZE SZ_128M @@ -233,27 +238,46 @@ void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot) local_flush_tlb_page(addr); } -static pte_t *__init get_pte_virt(phys_addr_t pa) +static inline pte_t *__init get_pte_virt_early(phys_addr_t pa) { - if (mmu_enabled) { - clear_fixmap(FIX_PTE); - return (pte_t *)set_fixmap_offset(FIX_PTE, pa); - } else { - return (pte_t *)((uintptr_t)pa); - } + return (pte_t *)((uintptr_t)pa); +} + +static inline pte_t *__init get_pte_virt_fixmap(phys_addr_t pa) +{ + clear_fixmap(FIX_PTE); + return (pte_t *)set_fixmap_offset(FIX_PTE, pa); } -static phys_addr_t __init alloc_pte(uintptr_t va) +static inline pte_t *get_pte_virt_late(phys_addr_t pa) +{ + return (pte_t *) __va(pa); +} + +static inline phys_addr_t __init alloc_pte_early(uintptr_t va) { /* * We only create PMD or PGD early mappings so we * should never reach here with MMU disabled. */ - BUG_ON(!mmu_enabled); + BUG(); +} +static inline phys_addr_t __init alloc_pte_fixmap(uintptr_t va) +{ return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE); } +static phys_addr_t alloc_pte_late(uintptr_t va) +{ + unsigned long vaddr; + + vaddr = __get_free_page(GFP_KERNEL); + if (!vaddr || !pgtable_pte_page_ctor(virt_to_page(vaddr))) + BUG(); + return __pa(vaddr); +} + static void __init create_pte_mapping(pte_t *ptep, uintptr_t va, phys_addr_t pa, phys_addr_t sz, pgprot_t prot) @@ -278,28 +302,46 @@ pmd_t fixmap_pmd[PTRS_PER_PMD] __page_aligned_bss; #endif pmd_t early_pmd[PTRS_PER_PMD * NUM_EARLY_PMDS] __initdata __aligned(PAGE_SIZE); -static pmd_t *__init get_pmd_virt(phys_addr_t pa) +static pmd_t *__init get_pmd_virt_early(phys_addr_t pa) { - if (mmu_enabled) { - clear_fixmap(FIX_PMD); - return (pmd_t *)set_fixmap_offset(FIX_PMD, pa); - } else { - return (pmd_t *)((uintptr_t)pa); - } + /* Before MMU is enabled */ + return (pmd_t *)((uintptr_t)pa); } -static phys_addr_t __init alloc_pmd(uintptr_t va) +static pmd_t *__init get_pmd_virt_fixmap(phys_addr_t pa) { - uintptr_t pmd_num; + clear_fixmap(FIX_PMD); + return (pmd_t *)set_fixmap_offset(FIX_PMD, pa); +} + +static pmd_t *get_pmd_virt_late(phys_addr_t pa) +{ + return (pmd_t *) __va(pa); +} - if (mmu_enabled) - return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE); +static phys_addr_t __init alloc_pmd_early(uintptr_t va) +{ + uintptr_t pmd_num; pmd_num = (va - PAGE_OFFSET) >> PGDIR_SHIFT; BUG_ON(pmd_num >= NUM_EARLY_PMDS); return (uintptr_t)&early_pmd[pmd_num * PTRS_PER_PMD]; } +static phys_addr_t __init alloc_pmd_fixmap(uintptr_t va) +{ + return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE); +} + +static phys_addr_t alloc_pmd_late(uintptr_t va) +{ + unsigned long vaddr; + + vaddr = __get_free_page(GFP_KERNEL); + BUG_ON(!vaddr); + return __pa(vaddr); +} + static void __init create_pmd_mapping(pmd_t *pmdp, uintptr_t va, phys_addr_t pa, phys_addr_t sz, pgprot_t prot) @@ -315,34 +357,34 @@ static void __init create_pmd_mapping(pmd_t *pmdp, } if (pmd_none(pmdp[pmd_idx])) { - pte_phys = alloc_pte(va); + pte_phys = pt_ops.alloc_pte(va); pmdp[pmd_idx] = pfn_pmd(PFN_DOWN(pte_phys), PAGE_TABLE); - ptep = get_pte_virt(pte_phys); + ptep = pt_ops.get_pte_virt(pte_phys); memset(ptep, 0, PAGE_SIZE); } else { pte_phys = PFN_PHYS(_pmd_pfn(pmdp[pmd_idx])); - ptep = get_pte_virt(pte_phys); + ptep = pt_ops.get_pte_virt(pte_phys); } create_pte_mapping(ptep, va, pa, sz, prot); } #define pgd_next_t pmd_t -#define alloc_pgd_next(__va) alloc_pmd(__va) -#define get_pgd_next_virt(__pa) get_pmd_virt(__pa) +#define alloc_pgd_next(__va) pt_ops.alloc_pmd(__va) +#define get_pgd_next_virt(__pa) pt_ops.get_pmd_virt(__pa) #define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot) \ create_pmd_mapping(__nextp, __va, __pa, __sz, __prot) #define fixmap_pgd_next fixmap_pmd #else #define pgd_next_t pte_t -#define alloc_pgd_next(__va) alloc_pte(__va) -#define get_pgd_next_virt(__pa) get_pte_virt(__pa) +#define alloc_pgd_next(__va) pt_ops.alloc_pte(__va) +#define get_pgd_next_virt(__pa) pt_ops.get_pte_virt(__pa) #define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot) \ create_pte_mapping(__nextp, __va, __pa, __sz, __prot) #define fixmap_pgd_next fixmap_pte #endif -static void __init create_pgd_mapping(pgd_t *pgdp, +void __init create_pgd_mapping(pgd_t *pgdp, uintptr_t va, phys_addr_t pa, phys_addr_t sz, pgprot_t prot) { @@ -398,10 +440,13 @@ static uintptr_t __init best_map_size(phys_addr_t base, phys_addr_t size) asmlinkage void __init setup_vm(uintptr_t dtb_pa) { - uintptr_t va, end_va; + uintptr_t va, pa, end_va; uintptr_t load_pa = (uintptr_t)(&_start); uintptr_t load_sz = (uintptr_t)(&_end) - load_pa; uintptr_t map_size = best_map_size(load_pa, MAX_EARLY_MAPPING_SIZE); +#ifndef __PAGETABLE_PMD_FOLDED + pmd_t fix_bmap_spmd, fix_bmap_epmd; +#endif va_pa_offset = PAGE_OFFSET - load_pa; pfn_base = PFN_DOWN(load_pa); @@ -417,6 +462,12 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) BUG_ON((load_pa % map_size) != 0); BUG_ON(load_sz > MAX_EARLY_MAPPING_SIZE); + pt_ops.alloc_pte = alloc_pte_early; + pt_ops.get_pte_virt = get_pte_virt_early; +#ifndef __PAGETABLE_PMD_FOLDED + pt_ops.alloc_pmd = alloc_pmd_early; + pt_ops.get_pmd_virt = get_pmd_virt_early; +#endif /* Setup early PGD for fixmap */ create_pgd_mapping(early_pg_dir, FIXADDR_START, (uintptr_t)fixmap_pgd_next, PGDIR_SIZE, PAGE_TABLE); @@ -447,42 +498,71 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) load_pa + (va - PAGE_OFFSET), map_size, PAGE_KERNEL_EXEC); - /* Create fixed mapping for early FDT parsing */ - end_va = __fix_to_virt(FIX_FDT) + FIX_FDT_SIZE; - for (va = __fix_to_virt(FIX_FDT); va < end_va; va += PAGE_SIZE) - create_pte_mapping(fixmap_pte, va, - dtb_pa + (va - __fix_to_virt(FIX_FDT)), - PAGE_SIZE, PAGE_KERNEL); - - /* Save pointer to DTB for early FDT parsing */ - dtb_early_va = (void *)fix_to_virt(FIX_FDT) + (dtb_pa & ~PAGE_MASK); - /* Save physical address for memblock reservation */ + /* Create two consecutive PGD mappings for FDT early scan */ + pa = dtb_pa & ~(PGDIR_SIZE - 1); + create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA, + pa, PGDIR_SIZE, PAGE_KERNEL); + create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA + PGDIR_SIZE, + pa + PGDIR_SIZE, PGDIR_SIZE, PAGE_KERNEL); + dtb_early_va = (void *)DTB_EARLY_BASE_VA + (dtb_pa & (PGDIR_SIZE - 1)); dtb_early_pa = dtb_pa; + + /* + * Bootime fixmap only can handle PMD_SIZE mapping. Thus, boot-ioremap + * range can not span multiple pmds. + */ + BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT) + != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT)); + +#ifndef __PAGETABLE_PMD_FOLDED + /* + * Early ioremap fixmap is already created as it lies within first 2MB + * of fixmap region. We always map PMD_SIZE. Thus, both FIX_BTMAP_END + * FIX_BTMAP_BEGIN should lie in the same pmd. Verify that and warn + * the user if not. + */ + fix_bmap_spmd = fixmap_pmd[pmd_index(__fix_to_virt(FIX_BTMAP_BEGIN))]; + fix_bmap_epmd = fixmap_pmd[pmd_index(__fix_to_virt(FIX_BTMAP_END))]; + if (pmd_val(fix_bmap_spmd) != pmd_val(fix_bmap_epmd)) { + WARN_ON(1); + pr_warn("fixmap btmap start [%08lx] != end [%08lx]\n", + pmd_val(fix_bmap_spmd), pmd_val(fix_bmap_epmd)); + pr_warn("fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n", + fix_to_virt(FIX_BTMAP_BEGIN)); + pr_warn("fix_to_virt(FIX_BTMAP_END): %08lx\n", + fix_to_virt(FIX_BTMAP_END)); + + pr_warn("FIX_BTMAP_END: %d\n", FIX_BTMAP_END); + pr_warn("FIX_BTMAP_BEGIN: %d\n", FIX_BTMAP_BEGIN); + } +#endif } static void __init setup_vm_final(void) { uintptr_t va, map_size; phys_addr_t pa, start, end; - struct memblock_region *reg; - - /* Set mmu_enabled flag */ - mmu_enabled = true; + u64 i; + /** + * MMU is enabled at this point. But page table setup is not complete yet. + * fixmap page table alloc functions should be used at this point + */ + pt_ops.alloc_pte = alloc_pte_fixmap; + pt_ops.get_pte_virt = get_pte_virt_fixmap; +#ifndef __PAGETABLE_PMD_FOLDED + pt_ops.alloc_pmd = alloc_pmd_fixmap; + pt_ops.get_pmd_virt = get_pmd_virt_fixmap; +#endif /* Setup swapper PGD for fixmap */ create_pgd_mapping(swapper_pg_dir, FIXADDR_START, __pa_symbol(fixmap_pgd_next), PGDIR_SIZE, PAGE_TABLE); /* Map all memory banks */ - for_each_memblock(memory, reg) { - start = reg->base; - end = start + reg->size; - + for_each_mem_range(i, &start, &end) { if (start >= end) break; - if (memblock_is_nomap(reg)) - continue; if (start <= __pa(PAGE_OFFSET) && __pa(PAGE_OFFSET) < end) start = __pa(PAGE_OFFSET); @@ -502,6 +582,14 @@ static void __init setup_vm_final(void) /* Move to swapper page table */ csr_write(CSR_SATP, PFN_DOWN(__pa_symbol(swapper_pg_dir)) | SATP_MODE); local_flush_tlb_all(); + + /* generic page allocation functions must be used to setup page table */ + pt_ops.alloc_pte = alloc_pte_late; + pt_ops.get_pte_virt = get_pte_virt_late; +#ifndef __PAGETABLE_PMD_FOLDED + pt_ops.alloc_pmd = alloc_pmd_late; + pt_ops.get_pmd_virt = get_pmd_virt_late; +#endif } #else asmlinkage void __init setup_vm(uintptr_t dtb_pa) @@ -524,48 +612,33 @@ static inline void setup_vm_final(void) #endif /* CONFIG_MMU */ #ifdef CONFIG_STRICT_KERNEL_RWX -void mark_rodata_ro(void) +void protect_kernel_text_data(void) { - unsigned long text_start = (unsigned long)_text; - unsigned long text_end = (unsigned long)_etext; + unsigned long text_start = (unsigned long)_start; + unsigned long init_text_start = (unsigned long)__init_text_begin; + unsigned long init_data_start = (unsigned long)__init_data_begin; unsigned long rodata_start = (unsigned long)__start_rodata; unsigned long data_start = (unsigned long)_data; unsigned long max_low = (unsigned long)(__va(PFN_PHYS(max_low_pfn))); - set_memory_ro(text_start, (text_end - text_start) >> PAGE_SHIFT); - set_memory_ro(rodata_start, (data_start - rodata_start) >> PAGE_SHIFT); + set_memory_ro(text_start, (init_text_start - text_start) >> PAGE_SHIFT); + set_memory_ro(init_text_start, (init_data_start - init_text_start) >> PAGE_SHIFT); + set_memory_nx(init_data_start, (rodata_start - init_data_start) >> PAGE_SHIFT); + /* rodata section is marked readonly in mark_rodata_ro */ set_memory_nx(rodata_start, (data_start - rodata_start) >> PAGE_SHIFT); set_memory_nx(data_start, (max_low - data_start) >> PAGE_SHIFT); - - debug_checkwx(); } -#endif -static void __init resource_init(void) +void mark_rodata_ro(void) { - struct memblock_region *region; - - for_each_memblock(memory, region) { - struct resource *res; - - res = memblock_alloc(sizeof(struct resource), SMP_CACHE_BYTES); - if (!res) - panic("%s: Failed to allocate %zu bytes\n", __func__, - sizeof(struct resource)); + unsigned long rodata_start = (unsigned long)__start_rodata; + unsigned long data_start = (unsigned long)_data; - if (memblock_is_nomap(region)) { - res->name = "reserved"; - res->flags = IORESOURCE_MEM; - } else { - res->name = "System RAM"; - res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; - } - res->start = __pfn_to_phys(memblock_region_memory_base_pfn(region)); - res->end = __pfn_to_phys(memblock_region_memory_end_pfn(region)) - 1; + set_memory_ro(rodata_start, (data_start - rodata_start) >> PAGE_SHIFT); - request_resource(&iomem_resource, res); - } + debug_checkwx(); } +#endif void __init paging_init(void) { @@ -573,7 +646,6 @@ void __init paging_init(void) sparse_init(); setup_zero_page(); zone_sizes_init(); - resource_init(); } #ifdef CONFIG_SPARSEMEM_VMEMMAP diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c index 87b4ab3d3c77..12ddd1f6bf70 100644 --- a/arch/riscv/mm/kasan_init.c +++ b/arch/riscv/mm/kasan_init.c @@ -85,16 +85,16 @@ static void __init populate(void *start, void *end) void __init kasan_init(void) { - struct memblock_region *reg; - unsigned long i; + phys_addr_t _start, _end; + u64 i; kasan_populate_early_shadow((void *)KASAN_SHADOW_START, (void *)kasan_mem_to_shadow((void *) VMALLOC_END)); - for_each_memblock(memory, reg) { - void *start = (void *)__va(reg->base); - void *end = (void *)__va(reg->base + reg->size); + for_each_mem_range(i, &_start, &_end) { + void *start = (void *)_start; + void *end = (void *)_end; if (start >= end) break; diff --git a/arch/riscv/mm/pageattr.c b/arch/riscv/mm/pageattr.c index 19fecb362d81..c64ec224fd0c 100644 --- a/arch/riscv/mm/pageattr.c +++ b/arch/riscv/mm/pageattr.c @@ -128,6 +128,12 @@ static int __set_memory(unsigned long addr, int numpages, pgprot_t set_mask, return ret; } +int set_memory_rw_nx(unsigned long addr, int numpages) +{ + return __set_memory(addr, numpages, __pgprot(_PAGE_READ | _PAGE_WRITE), + __pgprot(_PAGE_EXEC)); +} + int set_memory_ro(unsigned long addr, int numpages) { return __set_memory(addr, numpages, __pgprot(_PAGE_READ), diff --git a/arch/riscv/mm/ptdump.c b/arch/riscv/mm/ptdump.c index 0831c2e61a8f..ace74dec7492 100644 --- a/arch/riscv/mm/ptdump.c +++ b/arch/riscv/mm/ptdump.c @@ -3,6 +3,7 @@ * Copyright (C) 2019 SiFive */ +#include <linux/efi.h> #include <linux/init.h> #include <linux/debugfs.h> #include <linux/seq_file.h> @@ -49,6 +50,14 @@ struct addr_marker { const char *name; }; +/* Private information for debugfs */ +struct ptd_mm_info { + struct mm_struct *mm; + const struct addr_marker *markers; + unsigned long base_addr; + unsigned long end; +}; + static struct addr_marker address_markers[] = { #ifdef CONFIG_KASAN {KASAN_SHADOW_START, "Kasan shadow start"}, @@ -68,6 +77,28 @@ static struct addr_marker address_markers[] = { {-1, NULL}, }; +static struct ptd_mm_info kernel_ptd_info = { + .mm = &init_mm, + .markers = address_markers, + .base_addr = KERN_VIRT_START, + .end = ULONG_MAX, +}; + +#ifdef CONFIG_EFI +static struct addr_marker efi_addr_markers[] = { + { 0, "UEFI runtime start" }, + { SZ_1G, "UEFI runtime end" }, + { -1, NULL } +}; + +static struct ptd_mm_info efi_ptd_info = { + .mm = &efi_mm, + .markers = efi_addr_markers, + .base_addr = 0, + .end = SZ_2G, +}; +#endif + /* Page Table Entry */ struct prot_bits { u64 mask; @@ -245,22 +276,22 @@ static void note_page(struct ptdump_state *pt_st, unsigned long addr, } } -static void ptdump_walk(struct seq_file *s) +static void ptdump_walk(struct seq_file *s, struct ptd_mm_info *pinfo) { struct pg_state st = { .seq = s, - .marker = address_markers, + .marker = pinfo->markers, .level = -1, .ptdump = { .note_page = note_page, .range = (struct ptdump_range[]) { - {KERN_VIRT_START, ULONG_MAX}, + {pinfo->base_addr, pinfo->end}, {0, 0} } } }; - ptdump_walk_pgd(&st.ptdump, &init_mm, NULL); + ptdump_walk_pgd(&st.ptdump, pinfo->mm, NULL); } void ptdump_check_wx(void) @@ -293,7 +324,7 @@ void ptdump_check_wx(void) static int ptdump_show(struct seq_file *m, void *v) { - ptdump_walk(m); + ptdump_walk(m, m->private); return 0; } @@ -308,8 +339,13 @@ static int ptdump_init(void) for (j = 0; j < ARRAY_SIZE(pte_bits); j++) pg_level[i].mask |= pte_bits[j].mask; - debugfs_create_file("kernel_page_tables", 0400, NULL, NULL, + debugfs_create_file("kernel_page_tables", 0400, NULL, &kernel_ptd_info, &ptdump_fops); +#ifdef CONFIG_EFI + if (efi_enabled(EFI_RUNTIME_SERVICES)) + debugfs_create_file("efi_page_tables", 0400, NULL, &efi_ptd_info, + &ptdump_fops); +#endif return 0; } |