diff options
Diffstat (limited to 'arch/arm64/mm')
-rw-r--r-- | arch/arm64/mm/Makefile | 2 | ||||
-rw-r--r-- | arch/arm64/mm/contpte.c | 2 | ||||
-rw-r--r-- | arch/arm64/mm/extable.c | 40 | ||||
-rw-r--r-- | arch/arm64/mm/fault.c | 34 | ||||
-rw-r--r-- | arch/arm64/mm/hugetlbpage.c | 160 | ||||
-rw-r--r-- | arch/arm64/mm/init.c | 38 | ||||
-rw-r--r-- | arch/arm64/mm/ioremap.c | 3 | ||||
-rw-r--r-- | arch/arm64/mm/kasan_init.c | 6 | ||||
-rw-r--r-- | arch/arm64/mm/mmap.c | 2 | ||||
-rw-r--r-- | arch/arm64/mm/mmu.c | 117 | ||||
-rw-r--r-- | arch/arm64/mm/pageattr.c | 6 | ||||
-rw-r--r-- | arch/arm64/mm/pgd.c | 4 | ||||
-rw-r--r-- | arch/arm64/mm/physaddr.c | 2 | ||||
-rw-r--r-- | arch/arm64/mm/proc.S | 23 | ||||
-rw-r--r-- | arch/arm64/mm/ptdump.c | 54 | ||||
-rw-r--r-- | arch/arm64/mm/trans_pgd.c | 9 |
16 files changed, 280 insertions, 222 deletions
diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile index fc92170a8f37..c26489cf96cd 100644 --- a/arch/arm64/mm/Makefile +++ b/arch/arm64/mm/Makefile @@ -5,7 +5,7 @@ obj-y := dma-mapping.o extable.o fault.o init.o \ context.o proc.o pageattr.o fixmap.o obj-$(CONFIG_ARM64_CONTPTE) += contpte.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o -obj-$(CONFIG_PTDUMP_CORE) += ptdump.o +obj-$(CONFIG_PTDUMP) += ptdump.o obj-$(CONFIG_PTDUMP_DEBUGFS) += ptdump_debugfs.o obj-$(CONFIG_TRANS_TABLE) += trans_pgd.o obj-$(CONFIG_TRANS_TABLE) += trans_pgd-asm.o diff --git a/arch/arm64/mm/contpte.c b/arch/arm64/mm/contpte.c index 55107d27d3f8..bcac4f55f9c1 100644 --- a/arch/arm64/mm/contpte.c +++ b/arch/arm64/mm/contpte.c @@ -335,7 +335,7 @@ int contpte_ptep_clear_flush_young(struct vm_area_struct *vma, * eliding the trailing DSB applies here. */ addr = ALIGN_DOWN(addr, CONT_PTE_SIZE); - __flush_tlb_range_nosync(vma, addr, addr + CONT_PTE_SIZE, + __flush_tlb_range_nosync(vma->vm_mm, addr, addr + CONT_PTE_SIZE, PAGE_SIZE, true, 3); } diff --git a/arch/arm64/mm/extable.c b/arch/arm64/mm/extable.c index 228d681a8715..6e0528831cd3 100644 --- a/arch/arm64/mm/extable.c +++ b/arch/arm64/mm/extable.c @@ -8,8 +8,33 @@ #include <linux/uaccess.h> #include <asm/asm-extable.h> +#include <asm/esr.h> #include <asm/ptrace.h> +static bool cpy_faulted_on_uaccess(const struct exception_table_entry *ex, + unsigned long esr) +{ + bool uaccess_is_write = FIELD_GET(EX_DATA_UACCESS_WRITE, ex->data); + bool fault_on_write = esr & ESR_ELx_WNR; + + return uaccess_is_write == fault_on_write; +} + +bool insn_may_access_user(unsigned long addr, unsigned long esr) +{ + const struct exception_table_entry *ex = search_exception_tables(addr); + + if (!ex) + return false; + + switch (ex->type) { + case EX_TYPE_UACCESS_CPY: + return cpy_faulted_on_uaccess(ex, esr); + default: + return true; + } +} + static inline unsigned long get_ex_fixup(const struct exception_table_entry *ex) { @@ -29,6 +54,17 @@ static bool ex_handler_uaccess_err_zero(const struct exception_table_entry *ex, return true; } +static bool ex_handler_uaccess_cpy(const struct exception_table_entry *ex, + struct pt_regs *regs, unsigned long esr) +{ + /* Do not fix up faults on kernel memory accesses */ + if (!cpy_faulted_on_uaccess(ex, esr)) + return false; + + regs->pc = get_ex_fixup(ex); + return true; +} + static bool ex_handler_load_unaligned_zeropad(const struct exception_table_entry *ex, struct pt_regs *regs) @@ -56,7 +92,7 @@ ex_handler_load_unaligned_zeropad(const struct exception_table_entry *ex, return true; } -bool fixup_exception(struct pt_regs *regs) +bool fixup_exception(struct pt_regs *regs, unsigned long esr) { const struct exception_table_entry *ex; @@ -70,6 +106,8 @@ bool fixup_exception(struct pt_regs *regs) case EX_TYPE_UACCESS_ERR_ZERO: case EX_TYPE_KACCESS_ERR_ZERO: return ex_handler_uaccess_err_zero(ex, regs); + case EX_TYPE_UACCESS_CPY: + return ex_handler_uaccess_cpy(ex, regs, esr); case EX_TYPE_LOAD_UNALIGNED_ZEROPAD: return ex_handler_load_unaligned_zeropad(ex, regs); } diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index ef63651099a9..11eb8d1adc84 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -375,7 +375,7 @@ static void __do_kernel_fault(unsigned long addr, unsigned long esr, * Are we prepared to handle this kernel fault? * We are almost certainly not prepared to handle instruction faults. */ - if (!is_el1_instruction_abort(esr) && fixup_exception(regs)) + if (!is_el1_instruction_abort(esr) && fixup_exception(regs, esr)) return; if (WARN_RATELIMIT(is_spurious_el1_translation_fault(addr, esr, regs), @@ -487,17 +487,29 @@ static void do_bad_area(unsigned long far, unsigned long esr, } } -static bool fault_from_pkey(unsigned long esr, struct vm_area_struct *vma, - unsigned int mm_flags) +static bool fault_from_pkey(struct vm_area_struct *vma, unsigned int mm_flags) { - unsigned long iss2 = ESR_ELx_ISS2(esr); - if (!system_supports_poe()) return false; - if (esr_fsc_is_permission_fault(esr) && (iss2 & ESR_ELx_Overlay)) - return true; - + /* + * We do not check whether an Overlay fault has occurred because we + * cannot make a decision based solely on its value: + * + * - If Overlay is set, a fault did occur due to POE, but it may be + * spurious in those cases where we update POR_EL0 without ISB (e.g. + * on context-switch). We would then need to manually check POR_EL0 + * against vma_pkey(vma), which is exactly what + * arch_vma_access_permitted() does. + * + * - If Overlay is not set, we may still need to report a pkey fault. + * This is the case if an access was made within a mapping but with no + * page mapped, and POR_EL0 forbids the access (according to + * vma_pkey()). Such access will result in a SIGSEGV regardless + * because core code checks arch_vma_access_permitted(), but in order + * to report the correct error code - SEGV_PKUERR - we must handle + * that case here. + */ return !arch_vma_access_permitted(vma, mm_flags & FAULT_FLAG_WRITE, mm_flags & FAULT_FLAG_INSTRUCTION, @@ -606,7 +618,7 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr, die_kernel_fault("execution of user memory", addr, esr, regs); - if (!search_exception_tables(regs->pc)) + if (!insn_may_access_user(regs->pc, esr)) die_kernel_fault("access to user memory outside uaccess routines", addr, esr, regs); } @@ -635,7 +647,7 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr, goto bad_area; } - if (fault_from_pkey(esr, vma, mm_flags)) { + if (fault_from_pkey(vma, mm_flags)) { pkey = vma_pkey(vma); vma_end_read(vma); fault = 0; @@ -679,7 +691,7 @@ retry: goto bad_area; } - if (fault_from_pkey(esr, vma, mm_flags)) { + if (fault_from_pkey(vma, mm_flags)) { pkey = vma_pkey(vma); mmap_read_unlock(mm); fault = 0; diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 3215adf48a1b..0c8737f4f2ce 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -100,20 +100,11 @@ static int find_num_contig(struct mm_struct *mm, unsigned long addr, static inline int num_contig_ptes(unsigned long size, size_t *pgsize) { - int contig_ptes = 0; + int contig_ptes = 1; *pgsize = size; switch (size) { -#ifndef __PAGETABLE_PMD_FOLDED - case PUD_SIZE: - if (pud_sect_supported()) - contig_ptes = 1; - break; -#endif - case PMD_SIZE: - contig_ptes = 1; - break; case CONT_PMD_SIZE: *pgsize = PMD_SIZE; contig_ptes = CONT_PMDS; @@ -122,6 +113,8 @@ static inline int num_contig_ptes(unsigned long size, size_t *pgsize) *pgsize = PAGE_SIZE; contig_ptes = CONT_PTES; break; + default: + WARN_ON(!__hugetlb_valid_size(size)); } return contig_ptes; @@ -136,7 +129,7 @@ pte_t huge_ptep_get(struct mm_struct *mm, unsigned long addr, pte_t *ptep) if (!pte_present(orig_pte) || !pte_cont(orig_pte)) return orig_pte; - ncontig = num_contig_ptes(page_size(pte_page(orig_pte)), &pgsize); + ncontig = find_num_contig(mm, addr, ptep, &pgsize); for (i = 0; i < ncontig; i++, ptep++) { pte_t pte = __ptep_get(ptep); @@ -163,24 +156,22 @@ static pte_t get_clear_contig(struct mm_struct *mm, unsigned long pgsize, unsigned long ncontig) { - pte_t orig_pte = __ptep_get(ptep); - unsigned long i; - - for (i = 0; i < ncontig; i++, addr += pgsize, ptep++) { - pte_t pte = __ptep_get_and_clear(mm, addr, ptep); - - /* - * If HW_AFDBM is enabled, then the HW could turn on - * the dirty or accessed bit for any page in the set, - * so check them all. - */ - if (pte_dirty(pte)) - orig_pte = pte_mkdirty(orig_pte); - - if (pte_young(pte)) - orig_pte = pte_mkyoung(orig_pte); + pte_t pte, tmp_pte; + bool present; + + pte = __ptep_get_and_clear_anysz(mm, ptep, pgsize); + present = pte_present(pte); + while (--ncontig) { + ptep++; + tmp_pte = __ptep_get_and_clear_anysz(mm, ptep, pgsize); + if (present) { + if (pte_dirty(tmp_pte)) + pte = pte_mkdirty(pte); + if (pte_young(tmp_pte)) + pte = pte_mkyoung(pte); + } } - return orig_pte; + return pte; } static pte_t get_clear_contig_flush(struct mm_struct *mm, @@ -191,8 +182,9 @@ static pte_t get_clear_contig_flush(struct mm_struct *mm, { pte_t orig_pte = get_clear_contig(mm, addr, ptep, pgsize, ncontig); struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0); + unsigned long end = addr + (pgsize * ncontig); - flush_tlb_range(&vma, addr, addr + (pgsize * ncontig)); + __flush_hugetlb_tlb_range(&vma, addr, end, pgsize, true); return orig_pte; } @@ -215,9 +207,12 @@ static void clear_flush(struct mm_struct *mm, unsigned long i, saddr = addr; for (i = 0; i < ncontig; i++, addr += pgsize, ptep++) - __ptep_get_and_clear(mm, addr, ptep); + __ptep_get_and_clear_anysz(mm, ptep, pgsize); - flush_tlb_range(&vma, saddr, addr); + if (mm == &init_mm) + flush_tlb_kernel_range(saddr, addr); + else + __flush_hugetlb_tlb_range(&vma, saddr, addr, pgsize, true); } void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, @@ -226,30 +221,20 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, size_t pgsize; int i; int ncontig; - unsigned long pfn, dpfn; - pgprot_t hugeprot; ncontig = num_contig_ptes(sz, &pgsize); if (!pte_present(pte)) { for (i = 0; i < ncontig; i++, ptep++, addr += pgsize) - __set_ptes(mm, addr, ptep, pte, 1); - return; - } - - if (!pte_cont(pte)) { - __set_ptes(mm, addr, ptep, pte, 1); + __set_ptes_anysz(mm, ptep, pte, 1, pgsize); return; } - pfn = pte_pfn(pte); - dpfn = pgsize >> PAGE_SHIFT; - hugeprot = pte_pgprot(pte); - - clear_flush(mm, addr, ptep, pgsize, ncontig); + /* Only need to "break" if transitioning valid -> valid. */ + if (pte_cont(pte) && pte_valid(__ptep_get(ptep))) + clear_flush(mm, addr, ptep, pgsize, ncontig); - for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn) - __set_ptes(mm, addr, ptep, pfn_pte(pfn, hugeprot), 1); + __set_ptes_anysz(mm, ptep, pte, ncontig, pgsize); } pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, @@ -342,7 +327,9 @@ unsigned long hugetlb_mask_last_page(struct hstate *h) switch (hp_size) { #ifndef __PAGETABLE_PMD_FOLDED case PUD_SIZE: - return PGDIR_SIZE - PUD_SIZE; + if (pud_sect_supported()) + return PGDIR_SIZE - PUD_SIZE; + break; #endif case CONT_PMD_SIZE: return PUD_SIZE - CONT_PMD_SIZE; @@ -364,23 +351,21 @@ pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags) switch (pagesize) { #ifndef __PAGETABLE_PMD_FOLDED case PUD_SIZE: - entry = pud_pte(pud_mkhuge(pte_pud(entry))); + if (pud_sect_supported()) + return pud_pte(pud_mkhuge(pte_pud(entry))); break; #endif case CONT_PMD_SIZE: - entry = pmd_pte(pmd_mkcont(pte_pmd(entry))); - fallthrough; + return pmd_pte(pmd_mkhuge(pmd_mkcont(pte_pmd(entry)))); case PMD_SIZE: - entry = pmd_pte(pmd_mkhuge(pte_pmd(entry))); - break; + return pmd_pte(pmd_mkhuge(pte_pmd(entry))); case CONT_PTE_SIZE: - entry = pte_mkcont(entry); - break; + return pte_mkcont(entry); default: - pr_warn("%s: unrecognized huge page size 0x%lx\n", - __func__, pagesize); break; } + pr_warn("%s: unrecognized huge page size 0x%lx\n", + __func__, pagesize); return entry; } @@ -396,18 +381,13 @@ void huge_pte_clear(struct mm_struct *mm, unsigned long addr, __pte_clear(mm, addr, ptep); } -pte_t huge_ptep_get_and_clear(struct mm_struct *mm, - unsigned long addr, pte_t *ptep) +pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, unsigned long sz) { int ncontig; size_t pgsize; - pte_t orig_pte = __ptep_get(ptep); - - if (!pte_cont(orig_pte)) - return __ptep_get_and_clear(mm, addr, ptep); - - ncontig = find_num_contig(mm, addr, ptep, &pgsize); + ncontig = num_contig_ptes(sz, &pgsize); return get_clear_contig(mm, addr, ptep, pgsize, ncontig); } @@ -444,23 +424,23 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, pte_t pte, int dirty) { - int ncontig, i; + int ncontig; size_t pgsize = 0; - unsigned long pfn = pte_pfn(pte), dpfn; struct mm_struct *mm = vma->vm_mm; - pgprot_t hugeprot; pte_t orig_pte; + VM_WARN_ON(!pte_present(pte)); + if (!pte_cont(pte)) return __ptep_set_access_flags(vma, addr, ptep, pte, dirty); - ncontig = find_num_contig(mm, addr, ptep, &pgsize); - dpfn = pgsize >> PAGE_SHIFT; + ncontig = num_contig_ptes(huge_page_size(hstate_vma(vma)), &pgsize); if (!__cont_access_flags_changed(ptep, pte, ncontig)) return 0; orig_pte = get_clear_contig_flush(mm, addr, ptep, pgsize, ncontig); + VM_WARN_ON(!pte_present(orig_pte)); /* Make sure we don't lose the dirty or young state */ if (pte_dirty(orig_pte)) @@ -469,38 +449,31 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma, if (pte_young(orig_pte)) pte = pte_mkyoung(pte); - hugeprot = pte_pgprot(pte); - for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn) - __set_ptes(mm, addr, ptep, pfn_pte(pfn, hugeprot), 1); - + __set_ptes_anysz(mm, ptep, pte, ncontig, pgsize); return 1; } void huge_ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - unsigned long pfn, dpfn; - pgprot_t hugeprot; - int ncontig, i; + int ncontig; size_t pgsize; pte_t pte; - if (!pte_cont(__ptep_get(ptep))) { + pte = __ptep_get(ptep); + VM_WARN_ON(!pte_present(pte)); + + if (!pte_cont(pte)) { __ptep_set_wrprotect(mm, addr, ptep); return; } ncontig = find_num_contig(mm, addr, ptep, &pgsize); - dpfn = pgsize >> PAGE_SHIFT; pte = get_clear_contig_flush(mm, addr, ptep, pgsize, ncontig); pte = pte_wrprotect(pte); - hugeprot = pte_pgprot(pte); - pfn = pte_pfn(pte); - - for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn) - __set_ptes(mm, addr, ptep, pfn_pte(pfn, hugeprot), 1); + __set_ptes_anysz(mm, ptep, pte, ncontig, pgsize); } pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, @@ -510,15 +483,24 @@ pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, size_t pgsize; int ncontig; - if (!pte_cont(__ptep_get(ptep))) - return ptep_clear_flush(vma, addr, ptep); - - ncontig = find_num_contig(mm, addr, ptep, &pgsize); + ncontig = num_contig_ptes(huge_page_size(hstate_vma(vma)), &pgsize); return get_clear_contig_flush(mm, addr, ptep, pgsize, ncontig); } static int __init hugetlbpage_init(void) { + /* + * HugeTLB pages are supported on maximum four page table + * levels (PUD, CONT PMD, PMD, CONT PTE) for a given base + * page size, corresponding to hugetlb_add_hstate() calls + * here. + * + * HUGE_MAX_HSTATE should at least match maximum supported + * HugeTLB page sizes on the platform. Any new addition to + * supported HugeTLB page sizes will also require changing + * HUGE_MAX_HSTATE as well. + */ + BUILD_BUG_ON(HUGE_MAX_HSTATE < 4); if (pud_sect_supported()) hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT); @@ -537,6 +519,8 @@ bool __init arch_hugetlb_valid_size(unsigned long size) pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { + unsigned long psize = huge_page_size(hstate_vma(vma)); + if (alternative_has_cap_unlikely(ARM64_WORKAROUND_2645198)) { /* * Break-before-make (BBM) is required for all user space mappings @@ -546,7 +530,7 @@ pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr if (pte_user_exec(__ptep_get(ptep))) return huge_ptep_clear_flush(vma, addr, ptep); } - return huge_ptep_get_and_clear(vma->vm_mm, addr, ptep); + return huge_ptep_get_and_clear(vma->vm_mm, addr, ptep, psize); } void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index ccdef53872a0..0c8c35dd645e 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -98,21 +98,19 @@ static void __init arch_reserve_crashkernel(void) { unsigned long long low_size = 0; unsigned long long crash_base, crash_size; - char *cmdline = boot_command_line; bool high = false; int ret; if (!IS_ENABLED(CONFIG_CRASH_RESERVE)) return; - ret = parse_crashkernel(cmdline, memblock_phys_mem_size(), + ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), &crash_size, &crash_base, &low_size, &high); if (ret) return; - reserve_crashkernel_generic(cmdline, crash_size, crash_base, - low_size, high); + reserve_crashkernel_generic(crash_size, crash_base, low_size, high); } static phys_addr_t __init max_zone_phys(phys_addr_t zone_limit) @@ -277,26 +275,6 @@ void __init arm64_memblock_init(void) } } - if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) { - extern u16 memstart_offset_seed; - u64 mmfr0 = read_cpuid(ID_AA64MMFR0_EL1); - int parange = cpuid_feature_extract_unsigned_field( - mmfr0, ID_AA64MMFR0_EL1_PARANGE_SHIFT); - s64 range = linear_region_size - - BIT(id_aa64mmfr0_parange_to_phys_shift(parange)); - - /* - * If the size of the linear region exceeds, by a sufficient - * margin, the size of the region that the physical memory can - * span, randomize the linear region as well. - */ - if (memstart_offset_seed > 0 && range >= (s64)ARM64_MEMSTART_ALIGN) { - range /= ARM64_MEMSTART_ALIGN; - memstart_addr -= ARM64_MEMSTART_ALIGN * - ((range * memstart_offset_seed) >> 16); - } - } - /* * Register the kernel text, kernel data, initrd, and initial * pagetables with memblock. @@ -309,8 +287,6 @@ void __init arm64_memblock_init(void) } early_init_fdt_scan_reserved_mem(); - - high_memory = __va(memblock_end_of_DRAM() - 1) + 1; } void __init bootmem_init(void) @@ -359,12 +335,7 @@ void __init bootmem_init(void) memblock_dump_all(); } -/* - * mem_init() marks the free areas in the mem_map and tells us how much memory - * is free. This is done after various parts of the system have claimed their - * memory after the kernel image. - */ -void __init mem_init(void) +void __init arch_mm_preinit(void) { unsigned int flags = SWIOTLB_VERBOSE; bool swiotlb = max_pfn > PFN_DOWN(arm64_dma_phys_limit); @@ -388,9 +359,6 @@ void __init mem_init(void) swiotlb_init(swiotlb, flags); swiotlb_update_mem_attributes(); - /* this will put all unused low memory onto the freelists */ - memblock_free_all(); - /* * Check boundaries twice: Some fundamental inconsistencies can be * detected at build time already. diff --git a/arch/arm64/mm/ioremap.c b/arch/arm64/mm/ioremap.c index 6cc0b7e7eb03..10e246f11271 100644 --- a/arch/arm64/mm/ioremap.c +++ b/arch/arm64/mm/ioremap.c @@ -15,10 +15,9 @@ int arm64_ioremap_prot_hook_register(ioremap_prot_hook_t hook) } void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size, - unsigned long prot) + pgprot_t pgprot) { unsigned long last_addr = phys_addr + size - 1; - pgprot_t pgprot = __pgprot(prot); /* Don't allow outside PHYS_MASK */ if (last_addr & ~PHYS_MASK) diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c index b65a29440a0c..d541ce45daeb 100644 --- a/arch/arm64/mm/kasan_init.c +++ b/arch/arm64/mm/kasan_init.c @@ -190,7 +190,7 @@ static void __init kasan_pgd_populate(unsigned long addr, unsigned long end, */ static bool __init root_level_aligned(u64 addr) { - int shift = (ARM64_HW_PGTABLE_LEVELS(vabits_actual) - 1) * (PAGE_SHIFT - 3); + int shift = (ARM64_HW_PGTABLE_LEVELS(vabits_actual) - 1) * PTDESC_TABLE_SHIFT; return (addr % (PAGE_SIZE << shift)) == 0; } @@ -245,7 +245,7 @@ static int __init root_level_idx(u64 addr) */ u64 vabits = IS_ENABLED(CONFIG_ARM64_64K_PAGES) ? VA_BITS : vabits_actual; - int shift = (ARM64_HW_PGTABLE_LEVELS(vabits) - 1) * (PAGE_SHIFT - 3); + int shift = (ARM64_HW_PGTABLE_LEVELS(vabits) - 1) * PTDESC_TABLE_SHIFT; return (addr & ~_PAGE_OFFSET(vabits)) >> (shift + PAGE_SHIFT); } @@ -269,7 +269,7 @@ static void __init clone_next_level(u64 addr, pgd_t *tmp_pg_dir, pud_t *pud) */ static int __init next_level_idx(u64 addr) { - int shift = (ARM64_HW_PGTABLE_LEVELS(vabits_actual) - 2) * (PAGE_SHIFT - 3); + int shift = (ARM64_HW_PGTABLE_LEVELS(vabits_actual) - 2) * PTDESC_TABLE_SHIFT; return (addr >> (shift + PAGE_SHIFT)) % PTRS_PER_PTE; } diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c index 07aeab8a7606..c86c348857c4 100644 --- a/arch/arm64/mm/mmap.c +++ b/arch/arm64/mm/mmap.c @@ -83,7 +83,7 @@ arch_initcall(adjust_protection_map); pgprot_t vm_get_page_prot(unsigned long vm_flags) { - pteval_t prot; + ptdesc_t prot; /* Short circuit GCS to avoid bloating the table. */ if (system_supports_gcs() && (vm_flags & VM_SHADOW_STACK)) { diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index e2739b69e11b..00ab1d648db6 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -46,6 +46,13 @@ #define NO_CONT_MAPPINGS BIT(1) #define NO_EXEC_MAPPINGS BIT(2) /* assumes FEAT_HPDS is not used */ +enum pgtable_type { + TABLE_PTE, + TABLE_PMD, + TABLE_PUD, + TABLE_P4D, +}; + u64 kimage_voffset __ro_after_init; EXPORT_SYMBOL(kimage_voffset); @@ -107,7 +114,7 @@ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, } EXPORT_SYMBOL(phys_mem_access_prot); -static phys_addr_t __init early_pgtable_alloc(int shift) +static phys_addr_t __init early_pgtable_alloc(enum pgtable_type pgtable_type) { phys_addr_t phys; @@ -192,7 +199,7 @@ static void init_pte(pte_t *ptep, unsigned long addr, unsigned long end, static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr, unsigned long end, phys_addr_t phys, pgprot_t prot, - phys_addr_t (*pgtable_alloc)(int), + phys_addr_t (*pgtable_alloc)(enum pgtable_type), int flags) { unsigned long next; @@ -207,7 +214,7 @@ static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr, if (flags & NO_EXEC_MAPPINGS) pmdval |= PMD_TABLE_PXN; BUG_ON(!pgtable_alloc); - pte_phys = pgtable_alloc(PAGE_SHIFT); + pte_phys = pgtable_alloc(TABLE_PTE); ptep = pte_set_fixmap(pte_phys); init_clear_pgtable(ptep); ptep += pte_index(addr); @@ -243,7 +250,7 @@ static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr, static void init_pmd(pmd_t *pmdp, unsigned long addr, unsigned long end, phys_addr_t phys, pgprot_t prot, - phys_addr_t (*pgtable_alloc)(int), int flags) + phys_addr_t (*pgtable_alloc)(enum pgtable_type), int flags) { unsigned long next; @@ -277,7 +284,8 @@ static void init_pmd(pmd_t *pmdp, unsigned long addr, unsigned long end, static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr, unsigned long end, phys_addr_t phys, pgprot_t prot, - phys_addr_t (*pgtable_alloc)(int), int flags) + phys_addr_t (*pgtable_alloc)(enum pgtable_type), + int flags) { unsigned long next; pud_t pud = READ_ONCE(*pudp); @@ -294,7 +302,7 @@ static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr, if (flags & NO_EXEC_MAPPINGS) pudval |= PUD_TABLE_PXN; BUG_ON(!pgtable_alloc); - pmd_phys = pgtable_alloc(PMD_SHIFT); + pmd_phys = pgtable_alloc(TABLE_PMD); pmdp = pmd_set_fixmap(pmd_phys); init_clear_pgtable(pmdp); pmdp += pmd_index(addr); @@ -325,7 +333,7 @@ static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr, static void alloc_init_pud(p4d_t *p4dp, unsigned long addr, unsigned long end, phys_addr_t phys, pgprot_t prot, - phys_addr_t (*pgtable_alloc)(int), + phys_addr_t (*pgtable_alloc)(enum pgtable_type), int flags) { unsigned long next; @@ -339,7 +347,7 @@ static void alloc_init_pud(p4d_t *p4dp, unsigned long addr, unsigned long end, if (flags & NO_EXEC_MAPPINGS) p4dval |= P4D_TABLE_PXN; BUG_ON(!pgtable_alloc); - pud_phys = pgtable_alloc(PUD_SHIFT); + pud_phys = pgtable_alloc(TABLE_PUD); pudp = pud_set_fixmap(pud_phys); init_clear_pgtable(pudp); pudp += pud_index(addr); @@ -383,7 +391,7 @@ static void alloc_init_pud(p4d_t *p4dp, unsigned long addr, unsigned long end, static void alloc_init_p4d(pgd_t *pgdp, unsigned long addr, unsigned long end, phys_addr_t phys, pgprot_t prot, - phys_addr_t (*pgtable_alloc)(int), + phys_addr_t (*pgtable_alloc)(enum pgtable_type), int flags) { unsigned long next; @@ -397,7 +405,7 @@ static void alloc_init_p4d(pgd_t *pgdp, unsigned long addr, unsigned long end, if (flags & NO_EXEC_MAPPINGS) pgdval |= PGD_TABLE_PXN; BUG_ON(!pgtable_alloc); - p4d_phys = pgtable_alloc(P4D_SHIFT); + p4d_phys = pgtable_alloc(TABLE_P4D); p4dp = p4d_set_fixmap(p4d_phys); init_clear_pgtable(p4dp); p4dp += p4d_index(addr); @@ -427,7 +435,7 @@ static void alloc_init_p4d(pgd_t *pgdp, unsigned long addr, unsigned long end, static void __create_pgd_mapping_locked(pgd_t *pgdir, phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot, - phys_addr_t (*pgtable_alloc)(int), + phys_addr_t (*pgtable_alloc)(enum pgtable_type), int flags) { unsigned long addr, end, next; @@ -455,7 +463,7 @@ static void __create_pgd_mapping_locked(pgd_t *pgdir, phys_addr_t phys, static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot, - phys_addr_t (*pgtable_alloc)(int), + phys_addr_t (*pgtable_alloc)(enum pgtable_type), int flags) { mutex_lock(&fixmap_lock); @@ -468,37 +476,48 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys, extern __alias(__create_pgd_mapping_locked) void create_kpti_ng_temp_pgd(pgd_t *pgdir, phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot, - phys_addr_t (*pgtable_alloc)(int), int flags); + phys_addr_t (*pgtable_alloc)(enum pgtable_type), + int flags); #endif -static phys_addr_t __pgd_pgtable_alloc(int shift) +static phys_addr_t __pgd_pgtable_alloc(struct mm_struct *mm, + enum pgtable_type pgtable_type) { /* Page is zeroed by init_clear_pgtable() so don't duplicate effort. */ - void *ptr = (void *)__get_free_page(GFP_PGTABLE_KERNEL & ~__GFP_ZERO); + struct ptdesc *ptdesc = pagetable_alloc(GFP_PGTABLE_KERNEL & ~__GFP_ZERO, 0); + phys_addr_t pa; + + BUG_ON(!ptdesc); + pa = page_to_phys(ptdesc_page(ptdesc)); + + switch (pgtable_type) { + case TABLE_PTE: + BUG_ON(!pagetable_pte_ctor(mm, ptdesc)); + break; + case TABLE_PMD: + BUG_ON(!pagetable_pmd_ctor(mm, ptdesc)); + break; + case TABLE_PUD: + pagetable_pud_ctor(ptdesc); + break; + case TABLE_P4D: + pagetable_p4d_ctor(ptdesc); + break; + } - BUG_ON(!ptr); - return __pa(ptr); + return pa; } -static phys_addr_t pgd_pgtable_alloc(int shift) +static phys_addr_t __maybe_unused +pgd_pgtable_alloc_init_mm(enum pgtable_type pgtable_type) { - phys_addr_t pa = __pgd_pgtable_alloc(shift); - struct ptdesc *ptdesc = page_ptdesc(phys_to_page(pa)); - - /* - * Call proper page table ctor in case later we need to - * call core mm functions like apply_to_page_range() on - * this pre-allocated page table. - * - * We don't select ARCH_ENABLE_SPLIT_PMD_PTLOCK if pmd is - * folded, and if so pagetable_pte_ctor() becomes nop. - */ - if (shift == PAGE_SHIFT) - BUG_ON(!pagetable_pte_ctor(ptdesc)); - else if (shift == PMD_SHIFT) - BUG_ON(!pagetable_pmd_ctor(ptdesc)); + return __pgd_pgtable_alloc(&init_mm, pgtable_type); +} - return pa; +static phys_addr_t +pgd_pgtable_alloc_special_mm(enum pgtable_type pgtable_type) +{ + return __pgd_pgtable_alloc(NULL, pgtable_type); } /* @@ -530,7 +549,7 @@ void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; __create_pgd_mapping(mm->pgd, phys, virt, size, prot, - pgd_pgtable_alloc, flags); + pgd_pgtable_alloc_special_mm, flags); } static void update_mapping_prot(phys_addr_t phys, unsigned long virt, @@ -744,7 +763,7 @@ static int __init map_entry_trampoline(void) memset(tramp_pg_dir, 0, PGD_SIZE); __create_pgd_mapping(tramp_pg_dir, pa_start, TRAMP_VALIAS, entry_tramp_text_size(), prot, - __pgd_pgtable_alloc, NO_BLOCK_MAPPINGS); + pgd_pgtable_alloc_init_mm, NO_BLOCK_MAPPINGS); /* Map both the text and data into the kernel page table */ for (i = 0; i < DIV_ROUND_UP(entry_tramp_text_size(), PAGE_SIZE); i++) @@ -1169,15 +1188,19 @@ int __meminit vmemmap_check_pmd(pmd_t *pmdp, int node, unsigned long addr, unsigned long next) { vmemmap_verify((pte_t *)pmdp, node, addr, next); - return 1; + + return pmd_sect(READ_ONCE(*pmdp)); } int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, struct vmem_altmap *altmap) { WARN_ON((start < VMEMMAP_START) || (end > VMEMMAP_END)); + /* [start, end] should be within one section */ + WARN_ON_ONCE(end - start > PAGES_PER_SECTION * sizeof(struct page)); - if (!IS_ENABLED(CONFIG_ARM64_4K_PAGES)) + if (!IS_ENABLED(CONFIG_ARM64_4K_PAGES) || + (end - start < PAGES_PER_SECTION * sizeof(struct page))) return vmemmap_populate_basepages(start, end, node, altmap); else return vmemmap_populate_hugepages(start, end, node, altmap); @@ -1282,7 +1305,8 @@ int pud_free_pmd_page(pud_t *pudp, unsigned long addr) next = addr; end = addr + PUD_SIZE; do { - pmd_free_pte_page(pmdp, next); + if (pmd_present(pmdp_get(pmdp))) + pmd_free_pte_page(pmdp, next); } while (pmdp++, next += PMD_SIZE, next != end); pud_clear(pudp); @@ -1346,7 +1370,7 @@ int arch_add_memory(int nid, u64 start, u64 size, flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; __create_pgd_mapping(swapper_pg_dir, start, __phys_to_virt(start), - size, params->pgprot, __pgd_pgtable_alloc, + size, params->pgprot, pgd_pgtable_alloc_init_mm, flags); memblock_clear_nomap(start, size); @@ -1357,7 +1381,8 @@ int arch_add_memory(int nid, u64 start, u64 size, __remove_pgd_mapping(swapper_pg_dir, __phys_to_virt(start), size); else { - max_pfn = PFN_UP(start + size); + /* Address of hotplugged memory can be smaller */ + max_pfn = max(max_pfn, PFN_UP(start + size)); max_low_pfn = max_pfn; } @@ -1554,9 +1579,8 @@ void __cpu_replace_ttbr1(pgd_t *pgdp, bool cnp) #ifdef CONFIG_ARCH_HAS_PKEYS int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, unsigned long init_val) { - u64 new_por = POE_RXW; + u64 new_por; u64 old_por; - u64 pkey_shift; if (!system_supports_poe()) return -ENOSPC; @@ -1570,7 +1594,7 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, unsigned long i return -EINVAL; /* Set the bits we need in POR: */ - new_por = POE_RXW; + new_por = POE_RWX; if (init_val & PKEY_DISABLE_WRITE) new_por &= ~POE_W; if (init_val & PKEY_DISABLE_ACCESS) @@ -1581,12 +1605,11 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, unsigned long i new_por &= ~POE_X; /* Shift the bits in to the correct place in POR for pkey: */ - pkey_shift = pkey * POR_BITS_PER_PKEY; - new_por <<= pkey_shift; + new_por = POR_ELx_PERM_PREP(pkey, new_por); /* Get old POR and mask off any old bits in place: */ old_por = read_sysreg_s(SYS_POR_EL0); - old_por &= ~(POE_MASK << pkey_shift); + old_por &= ~(POE_MASK << POR_ELx_PERM_SHIFT(pkey)); /* Write old part along with new part: */ write_sysreg_s(old_por | new_por, SYS_POR_EL0); diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c index 39fd1f7ff02a..04d4a8f676db 100644 --- a/arch/arm64/mm/pageattr.c +++ b/arch/arm64/mm/pageattr.c @@ -96,8 +96,8 @@ static int change_memory_common(unsigned long addr, int numpages, * we are operating on does not result in such splitting. * * Let's restrict ourselves to mappings created by vmalloc (or vmap). - * Those are guaranteed to consist entirely of page mappings, and - * splitting is never needed. + * Disallow VM_ALLOW_HUGE_VMAP mappings to guarantee that only page + * mappings are updated and splitting is never needed. * * So check whether the [addr, addr + size) interval is entirely * covered by precisely one VM area that has the VM_ALLOC flag set. @@ -105,7 +105,7 @@ static int change_memory_common(unsigned long addr, int numpages, area = find_vm_area((void *)addr); if (!area || end > (unsigned long)kasan_reset_tag(area->addr) + area->size || - !(area->flags & VM_ALLOC)) + ((area->flags & (VM_ALLOC | VM_ALLOW_HUGE_VMAP)) != VM_ALLOC)) return -EINVAL; if (!numpages) diff --git a/arch/arm64/mm/pgd.c b/arch/arm64/mm/pgd.c index 0c501cabc238..8160cff35089 100644 --- a/arch/arm64/mm/pgd.c +++ b/arch/arm64/mm/pgd.c @@ -33,7 +33,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm) gfp_t gfp = GFP_PGTABLE_USER; if (pgdir_is_page_size()) - return (pgd_t *)__get_free_page(gfp); + return __pgd_alloc(mm, 0); else return kmem_cache_alloc(pgd_cache, gfp); } @@ -41,7 +41,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm) void pgd_free(struct mm_struct *mm, pgd_t *pgd) { if (pgdir_is_page_size()) - free_page((unsigned long)pgd); + __pgd_free(mm, pgd); else kmem_cache_free(pgd_cache, pgd); } diff --git a/arch/arm64/mm/physaddr.c b/arch/arm64/mm/physaddr.c index cde44c13dda1..7d94e09b01b3 100644 --- a/arch/arm64/mm/physaddr.c +++ b/arch/arm64/mm/physaddr.c @@ -10,7 +10,7 @@ phys_addr_t __virt_to_phys(unsigned long x) { WARN(!__is_lm_address(__tag_reset(x)), - "virt_to_phys used for non-linear address: %pK (%pS)\n", + "virt_to_phys used for non-linear address: %p (%pS)\n", (void *)x, (void *)x); diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index b8edc5765441..54dccfd6aa11 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -501,7 +501,7 @@ alternative_else_nop_endif #ifdef CONFIG_ARM64_HAFT cmp x9, ID_AA64MMFR1_EL1_HAFDBS_HAFT b.lt 1f - orr tcr2, tcr2, TCR2_EL1x_HAFT + orr tcr2, tcr2, TCR2_EL1_HAFT #endif /* CONFIG_ARM64_HAFT */ 1: #endif /* CONFIG_ARM64_HW_AFDBM */ @@ -512,27 +512,12 @@ alternative_else_nop_endif ubfx x1, x1, #ID_AA64MMFR3_EL1_S1PIE_SHIFT, #4 cbz x1, .Lskip_indirection - /* - * The PROT_* macros describing the various memory types may resolve to - * C expressions if they include the PTE_MAYBE_* macros, and so they - * can only be used from C code. The PIE_E* constants below are also - * defined in terms of those macros, but will mask out those - * PTE_MAYBE_* constants, whether they are set or not. So #define them - * as 0x0 here so we can evaluate the PIE_E* constants in asm context. - */ - -#define PTE_MAYBE_NG 0 -#define PTE_MAYBE_SHARED 0 - - mov_q x0, PIE_E0 + mov_q x0, PIE_E0_ASM msr REG_PIRE0_EL1, x0 - mov_q x0, PIE_E1 + mov_q x0, PIE_E1_ASM msr REG_PIR_EL1, x0 -#undef PTE_MAYBE_NG -#undef PTE_MAYBE_SHARED - - orr tcr2, tcr2, TCR2_EL1x_PIE + orr tcr2, tcr2, TCR2_EL1_PIE .Lskip_indirection: diff --git a/arch/arm64/mm/ptdump.c b/arch/arm64/mm/ptdump.c index 688fbe0271ca..421a5de806c6 100644 --- a/arch/arm64/mm/ptdump.c +++ b/arch/arm64/mm/ptdump.c @@ -80,8 +80,8 @@ static const struct ptdump_prot_bits pte_bits[] = { .set = "CON", .clear = " ", }, { - .mask = PTE_TABLE_BIT | PTE_VALID, - .val = PTE_VALID, + .mask = PMD_TYPE_MASK, + .val = PMD_TYPE_SECT, .set = "BLK", .clear = " ", }, { @@ -189,12 +189,12 @@ static void note_prot_wx(struct ptdump_pg_state *st, unsigned long addr) } void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, - u64 val) + pteval_t val) { struct ptdump_pg_state *st = container_of(pt_st, struct ptdump_pg_state, ptdump); struct ptdump_pg_level *pg_level = st->pg_level; static const char units[] = "KMGTPE"; - u64 prot = 0; + ptdesc_t prot = 0; /* check if the current level has been folded dynamically */ if (st->mm && ((level == 1 && mm_p4d_folded(st->mm)) || @@ -251,6 +251,38 @@ void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, } +void note_page_pte(struct ptdump_state *pt_st, unsigned long addr, pte_t pte) +{ + note_page(pt_st, addr, 4, pte_val(pte)); +} + +void note_page_pmd(struct ptdump_state *pt_st, unsigned long addr, pmd_t pmd) +{ + note_page(pt_st, addr, 3, pmd_val(pmd)); +} + +void note_page_pud(struct ptdump_state *pt_st, unsigned long addr, pud_t pud) +{ + note_page(pt_st, addr, 2, pud_val(pud)); +} + +void note_page_p4d(struct ptdump_state *pt_st, unsigned long addr, p4d_t p4d) +{ + note_page(pt_st, addr, 1, p4d_val(p4d)); +} + +void note_page_pgd(struct ptdump_state *pt_st, unsigned long addr, pgd_t pgd) +{ + note_page(pt_st, addr, 0, pgd_val(pgd)); +} + +void note_page_flush(struct ptdump_state *pt_st) +{ + pte_t pte_zero = {0}; + + note_page(pt_st, 0, -1, pte_val(pte_zero)); +} + void ptdump_walk(struct seq_file *s, struct ptdump_info *info) { unsigned long end = ~0UL; @@ -266,7 +298,12 @@ void ptdump_walk(struct seq_file *s, struct ptdump_info *info) .pg_level = &kernel_pg_levels[0], .level = -1, .ptdump = { - .note_page = note_page, + .note_page_pte = note_page_pte, + .note_page_pmd = note_page_pmd, + .note_page_pud = note_page_pud, + .note_page_p4d = note_page_p4d, + .note_page_pgd = note_page_pgd, + .note_page_flush = note_page_flush, .range = (struct ptdump_range[]){ {info->base_addr, end}, {0, 0} @@ -303,7 +340,12 @@ bool ptdump_check_wx(void) .level = -1, .check_wx = true, .ptdump = { - .note_page = note_page, + .note_page_pte = note_page_pte, + .note_page_pmd = note_page_pmd, + .note_page_pud = note_page_pud, + .note_page_p4d = note_page_p4d, + .note_page_pgd = note_page_pgd, + .note_page_flush = note_page_flush, .range = (struct ptdump_range[]) { {_PAGE_OFFSET(vabits_actual), ~0UL}, {0, 0} diff --git a/arch/arm64/mm/trans_pgd.c b/arch/arm64/mm/trans_pgd.c index 0f7b484cb2ff..18543b603c77 100644 --- a/arch/arm64/mm/trans_pgd.c +++ b/arch/arm64/mm/trans_pgd.c @@ -57,7 +57,7 @@ static void _copy_pte(pte_t *dst_ptep, pte_t *src_ptep, unsigned long addr) */ BUG_ON(!pfn_valid(pte_pfn(pte))); - __set_pte(dst_ptep, pte_mkpresent(pte_mkwrite_novma(pte))); + __set_pte(dst_ptep, pte_mkvalid(pte_mkwrite_novma(pte))); } } @@ -162,6 +162,13 @@ static int copy_p4d(struct trans_pgd_info *info, pgd_t *dst_pgdp, unsigned long next; unsigned long addr = start; + if (pgd_none(READ_ONCE(*dst_pgdp))) { + dst_p4dp = trans_alloc(info); + if (!dst_p4dp) + return -ENOMEM; + pgd_populate(NULL, dst_pgdp, dst_p4dp); + } + dst_p4dp = p4d_offset(dst_pgdp, start); src_p4dp = p4d_offset(src_pgdp, start); do { |