summaryrefslogtreecommitdiff
path: root/arch/arm64/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm64/mm')
-rw-r--r--arch/arm64/mm/Makefile2
-rw-r--r--arch/arm64/mm/contpte.c2
-rw-r--r--arch/arm64/mm/extable.c40
-rw-r--r--arch/arm64/mm/fault.c34
-rw-r--r--arch/arm64/mm/hugetlbpage.c160
-rw-r--r--arch/arm64/mm/init.c38
-rw-r--r--arch/arm64/mm/ioremap.c3
-rw-r--r--arch/arm64/mm/kasan_init.c6
-rw-r--r--arch/arm64/mm/mmap.c2
-rw-r--r--arch/arm64/mm/mmu.c117
-rw-r--r--arch/arm64/mm/pageattr.c6
-rw-r--r--arch/arm64/mm/pgd.c4
-rw-r--r--arch/arm64/mm/physaddr.c2
-rw-r--r--arch/arm64/mm/proc.S23
-rw-r--r--arch/arm64/mm/ptdump.c54
-rw-r--r--arch/arm64/mm/trans_pgd.c9
16 files changed, 280 insertions, 222 deletions
diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile
index fc92170a8f37..c26489cf96cd 100644
--- a/arch/arm64/mm/Makefile
+++ b/arch/arm64/mm/Makefile
@@ -5,7 +5,7 @@ obj-y := dma-mapping.o extable.o fault.o init.o \
context.o proc.o pageattr.o fixmap.o
obj-$(CONFIG_ARM64_CONTPTE) += contpte.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
-obj-$(CONFIG_PTDUMP_CORE) += ptdump.o
+obj-$(CONFIG_PTDUMP) += ptdump.o
obj-$(CONFIG_PTDUMP_DEBUGFS) += ptdump_debugfs.o
obj-$(CONFIG_TRANS_TABLE) += trans_pgd.o
obj-$(CONFIG_TRANS_TABLE) += trans_pgd-asm.o
diff --git a/arch/arm64/mm/contpte.c b/arch/arm64/mm/contpte.c
index 55107d27d3f8..bcac4f55f9c1 100644
--- a/arch/arm64/mm/contpte.c
+++ b/arch/arm64/mm/contpte.c
@@ -335,7 +335,7 @@ int contpte_ptep_clear_flush_young(struct vm_area_struct *vma,
* eliding the trailing DSB applies here.
*/
addr = ALIGN_DOWN(addr, CONT_PTE_SIZE);
- __flush_tlb_range_nosync(vma, addr, addr + CONT_PTE_SIZE,
+ __flush_tlb_range_nosync(vma->vm_mm, addr, addr + CONT_PTE_SIZE,
PAGE_SIZE, true, 3);
}
diff --git a/arch/arm64/mm/extable.c b/arch/arm64/mm/extable.c
index 228d681a8715..6e0528831cd3 100644
--- a/arch/arm64/mm/extable.c
+++ b/arch/arm64/mm/extable.c
@@ -8,8 +8,33 @@
#include <linux/uaccess.h>
#include <asm/asm-extable.h>
+#include <asm/esr.h>
#include <asm/ptrace.h>
+static bool cpy_faulted_on_uaccess(const struct exception_table_entry *ex,
+ unsigned long esr)
+{
+ bool uaccess_is_write = FIELD_GET(EX_DATA_UACCESS_WRITE, ex->data);
+ bool fault_on_write = esr & ESR_ELx_WNR;
+
+ return uaccess_is_write == fault_on_write;
+}
+
+bool insn_may_access_user(unsigned long addr, unsigned long esr)
+{
+ const struct exception_table_entry *ex = search_exception_tables(addr);
+
+ if (!ex)
+ return false;
+
+ switch (ex->type) {
+ case EX_TYPE_UACCESS_CPY:
+ return cpy_faulted_on_uaccess(ex, esr);
+ default:
+ return true;
+ }
+}
+
static inline unsigned long
get_ex_fixup(const struct exception_table_entry *ex)
{
@@ -29,6 +54,17 @@ static bool ex_handler_uaccess_err_zero(const struct exception_table_entry *ex,
return true;
}
+static bool ex_handler_uaccess_cpy(const struct exception_table_entry *ex,
+ struct pt_regs *regs, unsigned long esr)
+{
+ /* Do not fix up faults on kernel memory accesses */
+ if (!cpy_faulted_on_uaccess(ex, esr))
+ return false;
+
+ regs->pc = get_ex_fixup(ex);
+ return true;
+}
+
static bool
ex_handler_load_unaligned_zeropad(const struct exception_table_entry *ex,
struct pt_regs *regs)
@@ -56,7 +92,7 @@ ex_handler_load_unaligned_zeropad(const struct exception_table_entry *ex,
return true;
}
-bool fixup_exception(struct pt_regs *regs)
+bool fixup_exception(struct pt_regs *regs, unsigned long esr)
{
const struct exception_table_entry *ex;
@@ -70,6 +106,8 @@ bool fixup_exception(struct pt_regs *regs)
case EX_TYPE_UACCESS_ERR_ZERO:
case EX_TYPE_KACCESS_ERR_ZERO:
return ex_handler_uaccess_err_zero(ex, regs);
+ case EX_TYPE_UACCESS_CPY:
+ return ex_handler_uaccess_cpy(ex, regs, esr);
case EX_TYPE_LOAD_UNALIGNED_ZEROPAD:
return ex_handler_load_unaligned_zeropad(ex, regs);
}
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index ef63651099a9..11eb8d1adc84 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -375,7 +375,7 @@ static void __do_kernel_fault(unsigned long addr, unsigned long esr,
* Are we prepared to handle this kernel fault?
* We are almost certainly not prepared to handle instruction faults.
*/
- if (!is_el1_instruction_abort(esr) && fixup_exception(regs))
+ if (!is_el1_instruction_abort(esr) && fixup_exception(regs, esr))
return;
if (WARN_RATELIMIT(is_spurious_el1_translation_fault(addr, esr, regs),
@@ -487,17 +487,29 @@ static void do_bad_area(unsigned long far, unsigned long esr,
}
}
-static bool fault_from_pkey(unsigned long esr, struct vm_area_struct *vma,
- unsigned int mm_flags)
+static bool fault_from_pkey(struct vm_area_struct *vma, unsigned int mm_flags)
{
- unsigned long iss2 = ESR_ELx_ISS2(esr);
-
if (!system_supports_poe())
return false;
- if (esr_fsc_is_permission_fault(esr) && (iss2 & ESR_ELx_Overlay))
- return true;
-
+ /*
+ * We do not check whether an Overlay fault has occurred because we
+ * cannot make a decision based solely on its value:
+ *
+ * - If Overlay is set, a fault did occur due to POE, but it may be
+ * spurious in those cases where we update POR_EL0 without ISB (e.g.
+ * on context-switch). We would then need to manually check POR_EL0
+ * against vma_pkey(vma), which is exactly what
+ * arch_vma_access_permitted() does.
+ *
+ * - If Overlay is not set, we may still need to report a pkey fault.
+ * This is the case if an access was made within a mapping but with no
+ * page mapped, and POR_EL0 forbids the access (according to
+ * vma_pkey()). Such access will result in a SIGSEGV regardless
+ * because core code checks arch_vma_access_permitted(), but in order
+ * to report the correct error code - SEGV_PKUERR - we must handle
+ * that case here.
+ */
return !arch_vma_access_permitted(vma,
mm_flags & FAULT_FLAG_WRITE,
mm_flags & FAULT_FLAG_INSTRUCTION,
@@ -606,7 +618,7 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
die_kernel_fault("execution of user memory",
addr, esr, regs);
- if (!search_exception_tables(regs->pc))
+ if (!insn_may_access_user(regs->pc, esr))
die_kernel_fault("access to user memory outside uaccess routines",
addr, esr, regs);
}
@@ -635,7 +647,7 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
goto bad_area;
}
- if (fault_from_pkey(esr, vma, mm_flags)) {
+ if (fault_from_pkey(vma, mm_flags)) {
pkey = vma_pkey(vma);
vma_end_read(vma);
fault = 0;
@@ -679,7 +691,7 @@ retry:
goto bad_area;
}
- if (fault_from_pkey(esr, vma, mm_flags)) {
+ if (fault_from_pkey(vma, mm_flags)) {
pkey = vma_pkey(vma);
mmap_read_unlock(mm);
fault = 0;
diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index 3215adf48a1b..0c8737f4f2ce 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -100,20 +100,11 @@ static int find_num_contig(struct mm_struct *mm, unsigned long addr,
static inline int num_contig_ptes(unsigned long size, size_t *pgsize)
{
- int contig_ptes = 0;
+ int contig_ptes = 1;
*pgsize = size;
switch (size) {
-#ifndef __PAGETABLE_PMD_FOLDED
- case PUD_SIZE:
- if (pud_sect_supported())
- contig_ptes = 1;
- break;
-#endif
- case PMD_SIZE:
- contig_ptes = 1;
- break;
case CONT_PMD_SIZE:
*pgsize = PMD_SIZE;
contig_ptes = CONT_PMDS;
@@ -122,6 +113,8 @@ static inline int num_contig_ptes(unsigned long size, size_t *pgsize)
*pgsize = PAGE_SIZE;
contig_ptes = CONT_PTES;
break;
+ default:
+ WARN_ON(!__hugetlb_valid_size(size));
}
return contig_ptes;
@@ -136,7 +129,7 @@ pte_t huge_ptep_get(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
if (!pte_present(orig_pte) || !pte_cont(orig_pte))
return orig_pte;
- ncontig = num_contig_ptes(page_size(pte_page(orig_pte)), &pgsize);
+ ncontig = find_num_contig(mm, addr, ptep, &pgsize);
for (i = 0; i < ncontig; i++, ptep++) {
pte_t pte = __ptep_get(ptep);
@@ -163,24 +156,22 @@ static pte_t get_clear_contig(struct mm_struct *mm,
unsigned long pgsize,
unsigned long ncontig)
{
- pte_t orig_pte = __ptep_get(ptep);
- unsigned long i;
-
- for (i = 0; i < ncontig; i++, addr += pgsize, ptep++) {
- pte_t pte = __ptep_get_and_clear(mm, addr, ptep);
-
- /*
- * If HW_AFDBM is enabled, then the HW could turn on
- * the dirty or accessed bit for any page in the set,
- * so check them all.
- */
- if (pte_dirty(pte))
- orig_pte = pte_mkdirty(orig_pte);
-
- if (pte_young(pte))
- orig_pte = pte_mkyoung(orig_pte);
+ pte_t pte, tmp_pte;
+ bool present;
+
+ pte = __ptep_get_and_clear_anysz(mm, ptep, pgsize);
+ present = pte_present(pte);
+ while (--ncontig) {
+ ptep++;
+ tmp_pte = __ptep_get_and_clear_anysz(mm, ptep, pgsize);
+ if (present) {
+ if (pte_dirty(tmp_pte))
+ pte = pte_mkdirty(pte);
+ if (pte_young(tmp_pte))
+ pte = pte_mkyoung(pte);
+ }
}
- return orig_pte;
+ return pte;
}
static pte_t get_clear_contig_flush(struct mm_struct *mm,
@@ -191,8 +182,9 @@ static pte_t get_clear_contig_flush(struct mm_struct *mm,
{
pte_t orig_pte = get_clear_contig(mm, addr, ptep, pgsize, ncontig);
struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0);
+ unsigned long end = addr + (pgsize * ncontig);
- flush_tlb_range(&vma, addr, addr + (pgsize * ncontig));
+ __flush_hugetlb_tlb_range(&vma, addr, end, pgsize, true);
return orig_pte;
}
@@ -215,9 +207,12 @@ static void clear_flush(struct mm_struct *mm,
unsigned long i, saddr = addr;
for (i = 0; i < ncontig; i++, addr += pgsize, ptep++)
- __ptep_get_and_clear(mm, addr, ptep);
+ __ptep_get_and_clear_anysz(mm, ptep, pgsize);
- flush_tlb_range(&vma, saddr, addr);
+ if (mm == &init_mm)
+ flush_tlb_kernel_range(saddr, addr);
+ else
+ __flush_hugetlb_tlb_range(&vma, saddr, addr, pgsize, true);
}
void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
@@ -226,30 +221,20 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
size_t pgsize;
int i;
int ncontig;
- unsigned long pfn, dpfn;
- pgprot_t hugeprot;
ncontig = num_contig_ptes(sz, &pgsize);
if (!pte_present(pte)) {
for (i = 0; i < ncontig; i++, ptep++, addr += pgsize)
- __set_ptes(mm, addr, ptep, pte, 1);
- return;
- }
-
- if (!pte_cont(pte)) {
- __set_ptes(mm, addr, ptep, pte, 1);
+ __set_ptes_anysz(mm, ptep, pte, 1, pgsize);
return;
}
- pfn = pte_pfn(pte);
- dpfn = pgsize >> PAGE_SHIFT;
- hugeprot = pte_pgprot(pte);
-
- clear_flush(mm, addr, ptep, pgsize, ncontig);
+ /* Only need to "break" if transitioning valid -> valid. */
+ if (pte_cont(pte) && pte_valid(__ptep_get(ptep)))
+ clear_flush(mm, addr, ptep, pgsize, ncontig);
- for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn)
- __set_ptes(mm, addr, ptep, pfn_pte(pfn, hugeprot), 1);
+ __set_ptes_anysz(mm, ptep, pte, ncontig, pgsize);
}
pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
@@ -342,7 +327,9 @@ unsigned long hugetlb_mask_last_page(struct hstate *h)
switch (hp_size) {
#ifndef __PAGETABLE_PMD_FOLDED
case PUD_SIZE:
- return PGDIR_SIZE - PUD_SIZE;
+ if (pud_sect_supported())
+ return PGDIR_SIZE - PUD_SIZE;
+ break;
#endif
case CONT_PMD_SIZE:
return PUD_SIZE - CONT_PMD_SIZE;
@@ -364,23 +351,21 @@ pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags)
switch (pagesize) {
#ifndef __PAGETABLE_PMD_FOLDED
case PUD_SIZE:
- entry = pud_pte(pud_mkhuge(pte_pud(entry)));
+ if (pud_sect_supported())
+ return pud_pte(pud_mkhuge(pte_pud(entry)));
break;
#endif
case CONT_PMD_SIZE:
- entry = pmd_pte(pmd_mkcont(pte_pmd(entry)));
- fallthrough;
+ return pmd_pte(pmd_mkhuge(pmd_mkcont(pte_pmd(entry))));
case PMD_SIZE:
- entry = pmd_pte(pmd_mkhuge(pte_pmd(entry)));
- break;
+ return pmd_pte(pmd_mkhuge(pte_pmd(entry)));
case CONT_PTE_SIZE:
- entry = pte_mkcont(entry);
- break;
+ return pte_mkcont(entry);
default:
- pr_warn("%s: unrecognized huge page size 0x%lx\n",
- __func__, pagesize);
break;
}
+ pr_warn("%s: unrecognized huge page size 0x%lx\n",
+ __func__, pagesize);
return entry;
}
@@ -396,18 +381,13 @@ void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
__pte_clear(mm, addr, ptep);
}
-pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
- unsigned long addr, pte_t *ptep)
+pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, unsigned long sz)
{
int ncontig;
size_t pgsize;
- pte_t orig_pte = __ptep_get(ptep);
-
- if (!pte_cont(orig_pte))
- return __ptep_get_and_clear(mm, addr, ptep);
-
- ncontig = find_num_contig(mm, addr, ptep, &pgsize);
+ ncontig = num_contig_ptes(sz, &pgsize);
return get_clear_contig(mm, addr, ptep, pgsize, ncontig);
}
@@ -444,23 +424,23 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep,
pte_t pte, int dirty)
{
- int ncontig, i;
+ int ncontig;
size_t pgsize = 0;
- unsigned long pfn = pte_pfn(pte), dpfn;
struct mm_struct *mm = vma->vm_mm;
- pgprot_t hugeprot;
pte_t orig_pte;
+ VM_WARN_ON(!pte_present(pte));
+
if (!pte_cont(pte))
return __ptep_set_access_flags(vma, addr, ptep, pte, dirty);
- ncontig = find_num_contig(mm, addr, ptep, &pgsize);
- dpfn = pgsize >> PAGE_SHIFT;
+ ncontig = num_contig_ptes(huge_page_size(hstate_vma(vma)), &pgsize);
if (!__cont_access_flags_changed(ptep, pte, ncontig))
return 0;
orig_pte = get_clear_contig_flush(mm, addr, ptep, pgsize, ncontig);
+ VM_WARN_ON(!pte_present(orig_pte));
/* Make sure we don't lose the dirty or young state */
if (pte_dirty(orig_pte))
@@ -469,38 +449,31 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma,
if (pte_young(orig_pte))
pte = pte_mkyoung(pte);
- hugeprot = pte_pgprot(pte);
- for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn)
- __set_ptes(mm, addr, ptep, pfn_pte(pfn, hugeprot), 1);
-
+ __set_ptes_anysz(mm, ptep, pte, ncontig, pgsize);
return 1;
}
void huge_ptep_set_wrprotect(struct mm_struct *mm,
unsigned long addr, pte_t *ptep)
{
- unsigned long pfn, dpfn;
- pgprot_t hugeprot;
- int ncontig, i;
+ int ncontig;
size_t pgsize;
pte_t pte;
- if (!pte_cont(__ptep_get(ptep))) {
+ pte = __ptep_get(ptep);
+ VM_WARN_ON(!pte_present(pte));
+
+ if (!pte_cont(pte)) {
__ptep_set_wrprotect(mm, addr, ptep);
return;
}
ncontig = find_num_contig(mm, addr, ptep, &pgsize);
- dpfn = pgsize >> PAGE_SHIFT;
pte = get_clear_contig_flush(mm, addr, ptep, pgsize, ncontig);
pte = pte_wrprotect(pte);
- hugeprot = pte_pgprot(pte);
- pfn = pte_pfn(pte);
-
- for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn)
- __set_ptes(mm, addr, ptep, pfn_pte(pfn, hugeprot), 1);
+ __set_ptes_anysz(mm, ptep, pte, ncontig, pgsize);
}
pte_t huge_ptep_clear_flush(struct vm_area_struct *vma,
@@ -510,15 +483,24 @@ pte_t huge_ptep_clear_flush(struct vm_area_struct *vma,
size_t pgsize;
int ncontig;
- if (!pte_cont(__ptep_get(ptep)))
- return ptep_clear_flush(vma, addr, ptep);
-
- ncontig = find_num_contig(mm, addr, ptep, &pgsize);
+ ncontig = num_contig_ptes(huge_page_size(hstate_vma(vma)), &pgsize);
return get_clear_contig_flush(mm, addr, ptep, pgsize, ncontig);
}
static int __init hugetlbpage_init(void)
{
+ /*
+ * HugeTLB pages are supported on maximum four page table
+ * levels (PUD, CONT PMD, PMD, CONT PTE) for a given base
+ * page size, corresponding to hugetlb_add_hstate() calls
+ * here.
+ *
+ * HUGE_MAX_HSTATE should at least match maximum supported
+ * HugeTLB page sizes on the platform. Any new addition to
+ * supported HugeTLB page sizes will also require changing
+ * HUGE_MAX_HSTATE as well.
+ */
+ BUILD_BUG_ON(HUGE_MAX_HSTATE < 4);
if (pud_sect_supported())
hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
@@ -537,6 +519,8 @@ bool __init arch_hugetlb_valid_size(unsigned long size)
pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
{
+ unsigned long psize = huge_page_size(hstate_vma(vma));
+
if (alternative_has_cap_unlikely(ARM64_WORKAROUND_2645198)) {
/*
* Break-before-make (BBM) is required for all user space mappings
@@ -546,7 +530,7 @@ pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr
if (pte_user_exec(__ptep_get(ptep)))
return huge_ptep_clear_flush(vma, addr, ptep);
}
- return huge_ptep_get_and_clear(vma->vm_mm, addr, ptep);
+ return huge_ptep_get_and_clear(vma->vm_mm, addr, ptep, psize);
}
void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep,
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index ccdef53872a0..0c8c35dd645e 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -98,21 +98,19 @@ static void __init arch_reserve_crashkernel(void)
{
unsigned long long low_size = 0;
unsigned long long crash_base, crash_size;
- char *cmdline = boot_command_line;
bool high = false;
int ret;
if (!IS_ENABLED(CONFIG_CRASH_RESERVE))
return;
- ret = parse_crashkernel(cmdline, memblock_phys_mem_size(),
+ ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
&crash_size, &crash_base,
&low_size, &high);
if (ret)
return;
- reserve_crashkernel_generic(cmdline, crash_size, crash_base,
- low_size, high);
+ reserve_crashkernel_generic(crash_size, crash_base, low_size, high);
}
static phys_addr_t __init max_zone_phys(phys_addr_t zone_limit)
@@ -277,26 +275,6 @@ void __init arm64_memblock_init(void)
}
}
- if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) {
- extern u16 memstart_offset_seed;
- u64 mmfr0 = read_cpuid(ID_AA64MMFR0_EL1);
- int parange = cpuid_feature_extract_unsigned_field(
- mmfr0, ID_AA64MMFR0_EL1_PARANGE_SHIFT);
- s64 range = linear_region_size -
- BIT(id_aa64mmfr0_parange_to_phys_shift(parange));
-
- /*
- * If the size of the linear region exceeds, by a sufficient
- * margin, the size of the region that the physical memory can
- * span, randomize the linear region as well.
- */
- if (memstart_offset_seed > 0 && range >= (s64)ARM64_MEMSTART_ALIGN) {
- range /= ARM64_MEMSTART_ALIGN;
- memstart_addr -= ARM64_MEMSTART_ALIGN *
- ((range * memstart_offset_seed) >> 16);
- }
- }
-
/*
* Register the kernel text, kernel data, initrd, and initial
* pagetables with memblock.
@@ -309,8 +287,6 @@ void __init arm64_memblock_init(void)
}
early_init_fdt_scan_reserved_mem();
-
- high_memory = __va(memblock_end_of_DRAM() - 1) + 1;
}
void __init bootmem_init(void)
@@ -359,12 +335,7 @@ void __init bootmem_init(void)
memblock_dump_all();
}
-/*
- * mem_init() marks the free areas in the mem_map and tells us how much memory
- * is free. This is done after various parts of the system have claimed their
- * memory after the kernel image.
- */
-void __init mem_init(void)
+void __init arch_mm_preinit(void)
{
unsigned int flags = SWIOTLB_VERBOSE;
bool swiotlb = max_pfn > PFN_DOWN(arm64_dma_phys_limit);
@@ -388,9 +359,6 @@ void __init mem_init(void)
swiotlb_init(swiotlb, flags);
swiotlb_update_mem_attributes();
- /* this will put all unused low memory onto the freelists */
- memblock_free_all();
-
/*
* Check boundaries twice: Some fundamental inconsistencies can be
* detected at build time already.
diff --git a/arch/arm64/mm/ioremap.c b/arch/arm64/mm/ioremap.c
index 6cc0b7e7eb03..10e246f11271 100644
--- a/arch/arm64/mm/ioremap.c
+++ b/arch/arm64/mm/ioremap.c
@@ -15,10 +15,9 @@ int arm64_ioremap_prot_hook_register(ioremap_prot_hook_t hook)
}
void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size,
- unsigned long prot)
+ pgprot_t pgprot)
{
unsigned long last_addr = phys_addr + size - 1;
- pgprot_t pgprot = __pgprot(prot);
/* Don't allow outside PHYS_MASK */
if (last_addr & ~PHYS_MASK)
diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c
index b65a29440a0c..d541ce45daeb 100644
--- a/arch/arm64/mm/kasan_init.c
+++ b/arch/arm64/mm/kasan_init.c
@@ -190,7 +190,7 @@ static void __init kasan_pgd_populate(unsigned long addr, unsigned long end,
*/
static bool __init root_level_aligned(u64 addr)
{
- int shift = (ARM64_HW_PGTABLE_LEVELS(vabits_actual) - 1) * (PAGE_SHIFT - 3);
+ int shift = (ARM64_HW_PGTABLE_LEVELS(vabits_actual) - 1) * PTDESC_TABLE_SHIFT;
return (addr % (PAGE_SIZE << shift)) == 0;
}
@@ -245,7 +245,7 @@ static int __init root_level_idx(u64 addr)
*/
u64 vabits = IS_ENABLED(CONFIG_ARM64_64K_PAGES) ? VA_BITS
: vabits_actual;
- int shift = (ARM64_HW_PGTABLE_LEVELS(vabits) - 1) * (PAGE_SHIFT - 3);
+ int shift = (ARM64_HW_PGTABLE_LEVELS(vabits) - 1) * PTDESC_TABLE_SHIFT;
return (addr & ~_PAGE_OFFSET(vabits)) >> (shift + PAGE_SHIFT);
}
@@ -269,7 +269,7 @@ static void __init clone_next_level(u64 addr, pgd_t *tmp_pg_dir, pud_t *pud)
*/
static int __init next_level_idx(u64 addr)
{
- int shift = (ARM64_HW_PGTABLE_LEVELS(vabits_actual) - 2) * (PAGE_SHIFT - 3);
+ int shift = (ARM64_HW_PGTABLE_LEVELS(vabits_actual) - 2) * PTDESC_TABLE_SHIFT;
return (addr >> (shift + PAGE_SHIFT)) % PTRS_PER_PTE;
}
diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c
index 07aeab8a7606..c86c348857c4 100644
--- a/arch/arm64/mm/mmap.c
+++ b/arch/arm64/mm/mmap.c
@@ -83,7 +83,7 @@ arch_initcall(adjust_protection_map);
pgprot_t vm_get_page_prot(unsigned long vm_flags)
{
- pteval_t prot;
+ ptdesc_t prot;
/* Short circuit GCS to avoid bloating the table. */
if (system_supports_gcs() && (vm_flags & VM_SHADOW_STACK)) {
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index e2739b69e11b..00ab1d648db6 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -46,6 +46,13 @@
#define NO_CONT_MAPPINGS BIT(1)
#define NO_EXEC_MAPPINGS BIT(2) /* assumes FEAT_HPDS is not used */
+enum pgtable_type {
+ TABLE_PTE,
+ TABLE_PMD,
+ TABLE_PUD,
+ TABLE_P4D,
+};
+
u64 kimage_voffset __ro_after_init;
EXPORT_SYMBOL(kimage_voffset);
@@ -107,7 +114,7 @@ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
}
EXPORT_SYMBOL(phys_mem_access_prot);
-static phys_addr_t __init early_pgtable_alloc(int shift)
+static phys_addr_t __init early_pgtable_alloc(enum pgtable_type pgtable_type)
{
phys_addr_t phys;
@@ -192,7 +199,7 @@ static void init_pte(pte_t *ptep, unsigned long addr, unsigned long end,
static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr,
unsigned long end, phys_addr_t phys,
pgprot_t prot,
- phys_addr_t (*pgtable_alloc)(int),
+ phys_addr_t (*pgtable_alloc)(enum pgtable_type),
int flags)
{
unsigned long next;
@@ -207,7 +214,7 @@ static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr,
if (flags & NO_EXEC_MAPPINGS)
pmdval |= PMD_TABLE_PXN;
BUG_ON(!pgtable_alloc);
- pte_phys = pgtable_alloc(PAGE_SHIFT);
+ pte_phys = pgtable_alloc(TABLE_PTE);
ptep = pte_set_fixmap(pte_phys);
init_clear_pgtable(ptep);
ptep += pte_index(addr);
@@ -243,7 +250,7 @@ static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr,
static void init_pmd(pmd_t *pmdp, unsigned long addr, unsigned long end,
phys_addr_t phys, pgprot_t prot,
- phys_addr_t (*pgtable_alloc)(int), int flags)
+ phys_addr_t (*pgtable_alloc)(enum pgtable_type), int flags)
{
unsigned long next;
@@ -277,7 +284,8 @@ static void init_pmd(pmd_t *pmdp, unsigned long addr, unsigned long end,
static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr,
unsigned long end, phys_addr_t phys,
pgprot_t prot,
- phys_addr_t (*pgtable_alloc)(int), int flags)
+ phys_addr_t (*pgtable_alloc)(enum pgtable_type),
+ int flags)
{
unsigned long next;
pud_t pud = READ_ONCE(*pudp);
@@ -294,7 +302,7 @@ static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr,
if (flags & NO_EXEC_MAPPINGS)
pudval |= PUD_TABLE_PXN;
BUG_ON(!pgtable_alloc);
- pmd_phys = pgtable_alloc(PMD_SHIFT);
+ pmd_phys = pgtable_alloc(TABLE_PMD);
pmdp = pmd_set_fixmap(pmd_phys);
init_clear_pgtable(pmdp);
pmdp += pmd_index(addr);
@@ -325,7 +333,7 @@ static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr,
static void alloc_init_pud(p4d_t *p4dp, unsigned long addr, unsigned long end,
phys_addr_t phys, pgprot_t prot,
- phys_addr_t (*pgtable_alloc)(int),
+ phys_addr_t (*pgtable_alloc)(enum pgtable_type),
int flags)
{
unsigned long next;
@@ -339,7 +347,7 @@ static void alloc_init_pud(p4d_t *p4dp, unsigned long addr, unsigned long end,
if (flags & NO_EXEC_MAPPINGS)
p4dval |= P4D_TABLE_PXN;
BUG_ON(!pgtable_alloc);
- pud_phys = pgtable_alloc(PUD_SHIFT);
+ pud_phys = pgtable_alloc(TABLE_PUD);
pudp = pud_set_fixmap(pud_phys);
init_clear_pgtable(pudp);
pudp += pud_index(addr);
@@ -383,7 +391,7 @@ static void alloc_init_pud(p4d_t *p4dp, unsigned long addr, unsigned long end,
static void alloc_init_p4d(pgd_t *pgdp, unsigned long addr, unsigned long end,
phys_addr_t phys, pgprot_t prot,
- phys_addr_t (*pgtable_alloc)(int),
+ phys_addr_t (*pgtable_alloc)(enum pgtable_type),
int flags)
{
unsigned long next;
@@ -397,7 +405,7 @@ static void alloc_init_p4d(pgd_t *pgdp, unsigned long addr, unsigned long end,
if (flags & NO_EXEC_MAPPINGS)
pgdval |= PGD_TABLE_PXN;
BUG_ON(!pgtable_alloc);
- p4d_phys = pgtable_alloc(P4D_SHIFT);
+ p4d_phys = pgtable_alloc(TABLE_P4D);
p4dp = p4d_set_fixmap(p4d_phys);
init_clear_pgtable(p4dp);
p4dp += p4d_index(addr);
@@ -427,7 +435,7 @@ static void alloc_init_p4d(pgd_t *pgdp, unsigned long addr, unsigned long end,
static void __create_pgd_mapping_locked(pgd_t *pgdir, phys_addr_t phys,
unsigned long virt, phys_addr_t size,
pgprot_t prot,
- phys_addr_t (*pgtable_alloc)(int),
+ phys_addr_t (*pgtable_alloc)(enum pgtable_type),
int flags)
{
unsigned long addr, end, next;
@@ -455,7 +463,7 @@ static void __create_pgd_mapping_locked(pgd_t *pgdir, phys_addr_t phys,
static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
unsigned long virt, phys_addr_t size,
pgprot_t prot,
- phys_addr_t (*pgtable_alloc)(int),
+ phys_addr_t (*pgtable_alloc)(enum pgtable_type),
int flags)
{
mutex_lock(&fixmap_lock);
@@ -468,37 +476,48 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
extern __alias(__create_pgd_mapping_locked)
void create_kpti_ng_temp_pgd(pgd_t *pgdir, phys_addr_t phys, unsigned long virt,
phys_addr_t size, pgprot_t prot,
- phys_addr_t (*pgtable_alloc)(int), int flags);
+ phys_addr_t (*pgtable_alloc)(enum pgtable_type),
+ int flags);
#endif
-static phys_addr_t __pgd_pgtable_alloc(int shift)
+static phys_addr_t __pgd_pgtable_alloc(struct mm_struct *mm,
+ enum pgtable_type pgtable_type)
{
/* Page is zeroed by init_clear_pgtable() so don't duplicate effort. */
- void *ptr = (void *)__get_free_page(GFP_PGTABLE_KERNEL & ~__GFP_ZERO);
+ struct ptdesc *ptdesc = pagetable_alloc(GFP_PGTABLE_KERNEL & ~__GFP_ZERO, 0);
+ phys_addr_t pa;
+
+ BUG_ON(!ptdesc);
+ pa = page_to_phys(ptdesc_page(ptdesc));
+
+ switch (pgtable_type) {
+ case TABLE_PTE:
+ BUG_ON(!pagetable_pte_ctor(mm, ptdesc));
+ break;
+ case TABLE_PMD:
+ BUG_ON(!pagetable_pmd_ctor(mm, ptdesc));
+ break;
+ case TABLE_PUD:
+ pagetable_pud_ctor(ptdesc);
+ break;
+ case TABLE_P4D:
+ pagetable_p4d_ctor(ptdesc);
+ break;
+ }
- BUG_ON(!ptr);
- return __pa(ptr);
+ return pa;
}
-static phys_addr_t pgd_pgtable_alloc(int shift)
+static phys_addr_t __maybe_unused
+pgd_pgtable_alloc_init_mm(enum pgtable_type pgtable_type)
{
- phys_addr_t pa = __pgd_pgtable_alloc(shift);
- struct ptdesc *ptdesc = page_ptdesc(phys_to_page(pa));
-
- /*
- * Call proper page table ctor in case later we need to
- * call core mm functions like apply_to_page_range() on
- * this pre-allocated page table.
- *
- * We don't select ARCH_ENABLE_SPLIT_PMD_PTLOCK if pmd is
- * folded, and if so pagetable_pte_ctor() becomes nop.
- */
- if (shift == PAGE_SHIFT)
- BUG_ON(!pagetable_pte_ctor(ptdesc));
- else if (shift == PMD_SHIFT)
- BUG_ON(!pagetable_pmd_ctor(ptdesc));
+ return __pgd_pgtable_alloc(&init_mm, pgtable_type);
+}
- return pa;
+static phys_addr_t
+pgd_pgtable_alloc_special_mm(enum pgtable_type pgtable_type)
+{
+ return __pgd_pgtable_alloc(NULL, pgtable_type);
}
/*
@@ -530,7 +549,7 @@ void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
__create_pgd_mapping(mm->pgd, phys, virt, size, prot,
- pgd_pgtable_alloc, flags);
+ pgd_pgtable_alloc_special_mm, flags);
}
static void update_mapping_prot(phys_addr_t phys, unsigned long virt,
@@ -744,7 +763,7 @@ static int __init map_entry_trampoline(void)
memset(tramp_pg_dir, 0, PGD_SIZE);
__create_pgd_mapping(tramp_pg_dir, pa_start, TRAMP_VALIAS,
entry_tramp_text_size(), prot,
- __pgd_pgtable_alloc, NO_BLOCK_MAPPINGS);
+ pgd_pgtable_alloc_init_mm, NO_BLOCK_MAPPINGS);
/* Map both the text and data into the kernel page table */
for (i = 0; i < DIV_ROUND_UP(entry_tramp_text_size(), PAGE_SIZE); i++)
@@ -1169,15 +1188,19 @@ int __meminit vmemmap_check_pmd(pmd_t *pmdp, int node,
unsigned long addr, unsigned long next)
{
vmemmap_verify((pte_t *)pmdp, node, addr, next);
- return 1;
+
+ return pmd_sect(READ_ONCE(*pmdp));
}
int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
struct vmem_altmap *altmap)
{
WARN_ON((start < VMEMMAP_START) || (end > VMEMMAP_END));
+ /* [start, end] should be within one section */
+ WARN_ON_ONCE(end - start > PAGES_PER_SECTION * sizeof(struct page));
- if (!IS_ENABLED(CONFIG_ARM64_4K_PAGES))
+ if (!IS_ENABLED(CONFIG_ARM64_4K_PAGES) ||
+ (end - start < PAGES_PER_SECTION * sizeof(struct page)))
return vmemmap_populate_basepages(start, end, node, altmap);
else
return vmemmap_populate_hugepages(start, end, node, altmap);
@@ -1282,7 +1305,8 @@ int pud_free_pmd_page(pud_t *pudp, unsigned long addr)
next = addr;
end = addr + PUD_SIZE;
do {
- pmd_free_pte_page(pmdp, next);
+ if (pmd_present(pmdp_get(pmdp)))
+ pmd_free_pte_page(pmdp, next);
} while (pmdp++, next += PMD_SIZE, next != end);
pud_clear(pudp);
@@ -1346,7 +1370,7 @@ int arch_add_memory(int nid, u64 start, u64 size,
flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
__create_pgd_mapping(swapper_pg_dir, start, __phys_to_virt(start),
- size, params->pgprot, __pgd_pgtable_alloc,
+ size, params->pgprot, pgd_pgtable_alloc_init_mm,
flags);
memblock_clear_nomap(start, size);
@@ -1357,7 +1381,8 @@ int arch_add_memory(int nid, u64 start, u64 size,
__remove_pgd_mapping(swapper_pg_dir,
__phys_to_virt(start), size);
else {
- max_pfn = PFN_UP(start + size);
+ /* Address of hotplugged memory can be smaller */
+ max_pfn = max(max_pfn, PFN_UP(start + size));
max_low_pfn = max_pfn;
}
@@ -1554,9 +1579,8 @@ void __cpu_replace_ttbr1(pgd_t *pgdp, bool cnp)
#ifdef CONFIG_ARCH_HAS_PKEYS
int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, unsigned long init_val)
{
- u64 new_por = POE_RXW;
+ u64 new_por;
u64 old_por;
- u64 pkey_shift;
if (!system_supports_poe())
return -ENOSPC;
@@ -1570,7 +1594,7 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, unsigned long i
return -EINVAL;
/* Set the bits we need in POR: */
- new_por = POE_RXW;
+ new_por = POE_RWX;
if (init_val & PKEY_DISABLE_WRITE)
new_por &= ~POE_W;
if (init_val & PKEY_DISABLE_ACCESS)
@@ -1581,12 +1605,11 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, unsigned long i
new_por &= ~POE_X;
/* Shift the bits in to the correct place in POR for pkey: */
- pkey_shift = pkey * POR_BITS_PER_PKEY;
- new_por <<= pkey_shift;
+ new_por = POR_ELx_PERM_PREP(pkey, new_por);
/* Get old POR and mask off any old bits in place: */
old_por = read_sysreg_s(SYS_POR_EL0);
- old_por &= ~(POE_MASK << pkey_shift);
+ old_por &= ~(POE_MASK << POR_ELx_PERM_SHIFT(pkey));
/* Write old part along with new part: */
write_sysreg_s(old_por | new_por, SYS_POR_EL0);
diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c
index 39fd1f7ff02a..04d4a8f676db 100644
--- a/arch/arm64/mm/pageattr.c
+++ b/arch/arm64/mm/pageattr.c
@@ -96,8 +96,8 @@ static int change_memory_common(unsigned long addr, int numpages,
* we are operating on does not result in such splitting.
*
* Let's restrict ourselves to mappings created by vmalloc (or vmap).
- * Those are guaranteed to consist entirely of page mappings, and
- * splitting is never needed.
+ * Disallow VM_ALLOW_HUGE_VMAP mappings to guarantee that only page
+ * mappings are updated and splitting is never needed.
*
* So check whether the [addr, addr + size) interval is entirely
* covered by precisely one VM area that has the VM_ALLOC flag set.
@@ -105,7 +105,7 @@ static int change_memory_common(unsigned long addr, int numpages,
area = find_vm_area((void *)addr);
if (!area ||
end > (unsigned long)kasan_reset_tag(area->addr) + area->size ||
- !(area->flags & VM_ALLOC))
+ ((area->flags & (VM_ALLOC | VM_ALLOW_HUGE_VMAP)) != VM_ALLOC))
return -EINVAL;
if (!numpages)
diff --git a/arch/arm64/mm/pgd.c b/arch/arm64/mm/pgd.c
index 0c501cabc238..8160cff35089 100644
--- a/arch/arm64/mm/pgd.c
+++ b/arch/arm64/mm/pgd.c
@@ -33,7 +33,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
gfp_t gfp = GFP_PGTABLE_USER;
if (pgdir_is_page_size())
- return (pgd_t *)__get_free_page(gfp);
+ return __pgd_alloc(mm, 0);
else
return kmem_cache_alloc(pgd_cache, gfp);
}
@@ -41,7 +41,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
void pgd_free(struct mm_struct *mm, pgd_t *pgd)
{
if (pgdir_is_page_size())
- free_page((unsigned long)pgd);
+ __pgd_free(mm, pgd);
else
kmem_cache_free(pgd_cache, pgd);
}
diff --git a/arch/arm64/mm/physaddr.c b/arch/arm64/mm/physaddr.c
index cde44c13dda1..7d94e09b01b3 100644
--- a/arch/arm64/mm/physaddr.c
+++ b/arch/arm64/mm/physaddr.c
@@ -10,7 +10,7 @@
phys_addr_t __virt_to_phys(unsigned long x)
{
WARN(!__is_lm_address(__tag_reset(x)),
- "virt_to_phys used for non-linear address: %pK (%pS)\n",
+ "virt_to_phys used for non-linear address: %p (%pS)\n",
(void *)x,
(void *)x);
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index b8edc5765441..54dccfd6aa11 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -501,7 +501,7 @@ alternative_else_nop_endif
#ifdef CONFIG_ARM64_HAFT
cmp x9, ID_AA64MMFR1_EL1_HAFDBS_HAFT
b.lt 1f
- orr tcr2, tcr2, TCR2_EL1x_HAFT
+ orr tcr2, tcr2, TCR2_EL1_HAFT
#endif /* CONFIG_ARM64_HAFT */
1:
#endif /* CONFIG_ARM64_HW_AFDBM */
@@ -512,27 +512,12 @@ alternative_else_nop_endif
ubfx x1, x1, #ID_AA64MMFR3_EL1_S1PIE_SHIFT, #4
cbz x1, .Lskip_indirection
- /*
- * The PROT_* macros describing the various memory types may resolve to
- * C expressions if they include the PTE_MAYBE_* macros, and so they
- * can only be used from C code. The PIE_E* constants below are also
- * defined in terms of those macros, but will mask out those
- * PTE_MAYBE_* constants, whether they are set or not. So #define them
- * as 0x0 here so we can evaluate the PIE_E* constants in asm context.
- */
-
-#define PTE_MAYBE_NG 0
-#define PTE_MAYBE_SHARED 0
-
- mov_q x0, PIE_E0
+ mov_q x0, PIE_E0_ASM
msr REG_PIRE0_EL1, x0
- mov_q x0, PIE_E1
+ mov_q x0, PIE_E1_ASM
msr REG_PIR_EL1, x0
-#undef PTE_MAYBE_NG
-#undef PTE_MAYBE_SHARED
-
- orr tcr2, tcr2, TCR2_EL1x_PIE
+ orr tcr2, tcr2, TCR2_EL1_PIE
.Lskip_indirection:
diff --git a/arch/arm64/mm/ptdump.c b/arch/arm64/mm/ptdump.c
index 688fbe0271ca..421a5de806c6 100644
--- a/arch/arm64/mm/ptdump.c
+++ b/arch/arm64/mm/ptdump.c
@@ -80,8 +80,8 @@ static const struct ptdump_prot_bits pte_bits[] = {
.set = "CON",
.clear = " ",
}, {
- .mask = PTE_TABLE_BIT | PTE_VALID,
- .val = PTE_VALID,
+ .mask = PMD_TYPE_MASK,
+ .val = PMD_TYPE_SECT,
.set = "BLK",
.clear = " ",
}, {
@@ -189,12 +189,12 @@ static void note_prot_wx(struct ptdump_pg_state *st, unsigned long addr)
}
void note_page(struct ptdump_state *pt_st, unsigned long addr, int level,
- u64 val)
+ pteval_t val)
{
struct ptdump_pg_state *st = container_of(pt_st, struct ptdump_pg_state, ptdump);
struct ptdump_pg_level *pg_level = st->pg_level;
static const char units[] = "KMGTPE";
- u64 prot = 0;
+ ptdesc_t prot = 0;
/* check if the current level has been folded dynamically */
if (st->mm && ((level == 1 && mm_p4d_folded(st->mm)) ||
@@ -251,6 +251,38 @@ void note_page(struct ptdump_state *pt_st, unsigned long addr, int level,
}
+void note_page_pte(struct ptdump_state *pt_st, unsigned long addr, pte_t pte)
+{
+ note_page(pt_st, addr, 4, pte_val(pte));
+}
+
+void note_page_pmd(struct ptdump_state *pt_st, unsigned long addr, pmd_t pmd)
+{
+ note_page(pt_st, addr, 3, pmd_val(pmd));
+}
+
+void note_page_pud(struct ptdump_state *pt_st, unsigned long addr, pud_t pud)
+{
+ note_page(pt_st, addr, 2, pud_val(pud));
+}
+
+void note_page_p4d(struct ptdump_state *pt_st, unsigned long addr, p4d_t p4d)
+{
+ note_page(pt_st, addr, 1, p4d_val(p4d));
+}
+
+void note_page_pgd(struct ptdump_state *pt_st, unsigned long addr, pgd_t pgd)
+{
+ note_page(pt_st, addr, 0, pgd_val(pgd));
+}
+
+void note_page_flush(struct ptdump_state *pt_st)
+{
+ pte_t pte_zero = {0};
+
+ note_page(pt_st, 0, -1, pte_val(pte_zero));
+}
+
void ptdump_walk(struct seq_file *s, struct ptdump_info *info)
{
unsigned long end = ~0UL;
@@ -266,7 +298,12 @@ void ptdump_walk(struct seq_file *s, struct ptdump_info *info)
.pg_level = &kernel_pg_levels[0],
.level = -1,
.ptdump = {
- .note_page = note_page,
+ .note_page_pte = note_page_pte,
+ .note_page_pmd = note_page_pmd,
+ .note_page_pud = note_page_pud,
+ .note_page_p4d = note_page_p4d,
+ .note_page_pgd = note_page_pgd,
+ .note_page_flush = note_page_flush,
.range = (struct ptdump_range[]){
{info->base_addr, end},
{0, 0}
@@ -303,7 +340,12 @@ bool ptdump_check_wx(void)
.level = -1,
.check_wx = true,
.ptdump = {
- .note_page = note_page,
+ .note_page_pte = note_page_pte,
+ .note_page_pmd = note_page_pmd,
+ .note_page_pud = note_page_pud,
+ .note_page_p4d = note_page_p4d,
+ .note_page_pgd = note_page_pgd,
+ .note_page_flush = note_page_flush,
.range = (struct ptdump_range[]) {
{_PAGE_OFFSET(vabits_actual), ~0UL},
{0, 0}
diff --git a/arch/arm64/mm/trans_pgd.c b/arch/arm64/mm/trans_pgd.c
index 0f7b484cb2ff..18543b603c77 100644
--- a/arch/arm64/mm/trans_pgd.c
+++ b/arch/arm64/mm/trans_pgd.c
@@ -57,7 +57,7 @@ static void _copy_pte(pte_t *dst_ptep, pte_t *src_ptep, unsigned long addr)
*/
BUG_ON(!pfn_valid(pte_pfn(pte)));
- __set_pte(dst_ptep, pte_mkpresent(pte_mkwrite_novma(pte)));
+ __set_pte(dst_ptep, pte_mkvalid(pte_mkwrite_novma(pte)));
}
}
@@ -162,6 +162,13 @@ static int copy_p4d(struct trans_pgd_info *info, pgd_t *dst_pgdp,
unsigned long next;
unsigned long addr = start;
+ if (pgd_none(READ_ONCE(*dst_pgdp))) {
+ dst_p4dp = trans_alloc(info);
+ if (!dst_p4dp)
+ return -ENOMEM;
+ pgd_populate(NULL, dst_pgdp, dst_p4dp);
+ }
+
dst_p4dp = p4d_offset(dst_pgdp, start);
src_p4dp = p4d_offset(src_pgdp, start);
do {