diff options
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r-- | arch/powerpc/mm/Makefile | 47 | ||||
-rw-r--r-- | arch/powerpc/mm/book3s32/Makefile | 9 | ||||
-rw-r--r-- | arch/powerpc/mm/book3s32/hash_low.S (renamed from arch/powerpc/mm/hash_low_32.S) | 6 | ||||
-rw-r--r-- | arch/powerpc/mm/book3s32/mmu.c (renamed from arch/powerpc/mm/ppc_mmu_32.c) | 76 | ||||
-rw-r--r-- | arch/powerpc/mm/book3s32/mmu_context.c (renamed from arch/powerpc/mm/mmu_context_hash32.c) | 0 | ||||
-rw-r--r-- | arch/powerpc/mm/book3s32/tlb.c (renamed from arch/powerpc/mm/tlb_hash32.c) | 2 | ||||
-rw-r--r-- | arch/powerpc/mm/book3s64/Makefile | 24 | ||||
-rw-r--r-- | arch/powerpc/mm/book3s64/hash_4k.c (renamed from arch/powerpc/mm/hash64_4k.c) | 2 | ||||
-rw-r--r-- | arch/powerpc/mm/book3s64/hash_64k.c (renamed from arch/powerpc/mm/hash64_64k.c) | 2 | ||||
-rw-r--r-- | arch/powerpc/mm/book3s64/hash_hugepage.c (renamed from arch/powerpc/mm/hugepage-hash64.c) | 2 | ||||
-rw-r--r-- | arch/powerpc/mm/book3s64/hash_hugetlbpage.c (renamed from arch/powerpc/mm/hugetlbpage-hash64.c) | 31 | ||||
-rw-r--r-- | arch/powerpc/mm/book3s64/hash_native.c (renamed from arch/powerpc/mm/hash_native_64.c) | 0 | ||||
-rw-r--r-- | arch/powerpc/mm/book3s64/hash_pgtable.c (renamed from arch/powerpc/mm/pgtable-hash64.c) | 15 | ||||
-rw-r--r-- | arch/powerpc/mm/book3s64/hash_tlb.c (renamed from arch/powerpc/mm/tlb_hash64.c) | 18 | ||||
-rw-r--r-- | arch/powerpc/mm/book3s64/hash_utils.c (renamed from arch/powerpc/mm/hash_utils_64.c) | 145 | ||||
-rw-r--r-- | arch/powerpc/mm/book3s64/iommu_api.c (renamed from arch/powerpc/mm/mmu_context_iommu.c) | 0 | ||||
-rw-r--r-- | arch/powerpc/mm/book3s64/mmu_context.c (renamed from arch/powerpc/mm/mmu_context_book3s64.c) | 29 | ||||
-rw-r--r-- | arch/powerpc/mm/book3s64/pgtable.c (renamed from arch/powerpc/mm/pgtable-book3s64.c) | 2 | ||||
-rw-r--r-- | arch/powerpc/mm/book3s64/pkeys.c (renamed from arch/powerpc/mm/pkeys.c) | 1 | ||||
-rw-r--r-- | arch/powerpc/mm/book3s64/radix_hugetlbpage.c (renamed from arch/powerpc/mm/hugetlbpage-radix.c) | 0 | ||||
-rw-r--r-- | arch/powerpc/mm/book3s64/radix_pgtable.c (renamed from arch/powerpc/mm/pgtable-radix.c) | 117 | ||||
-rw-r--r-- | arch/powerpc/mm/book3s64/radix_tlb.c (renamed from arch/powerpc/mm/tlb-radix.c) | 0 | ||||
-rw-r--r-- | arch/powerpc/mm/book3s64/slb.c (renamed from arch/powerpc/mm/slb.c) | 31 | ||||
-rw-r--r-- | arch/powerpc/mm/book3s64/subpage_prot.c (renamed from arch/powerpc/mm/subpage-prot.c) | 39 | ||||
-rw-r--r-- | arch/powerpc/mm/book3s64/vphn.c (renamed from arch/powerpc/mm/vphn.c) | 6 | ||||
-rw-r--r-- | arch/powerpc/mm/book3s64/vphn.h (renamed from arch/powerpc/mm/vphn.h) | 3 | ||||
-rw-r--r-- | arch/powerpc/mm/copro_fault.c | 18 | ||||
-rw-r--r-- | arch/powerpc/mm/dma-noncoherent.c | 2 | ||||
-rw-r--r-- | arch/powerpc/mm/drmem.c | 6 | ||||
-rw-r--r-- | arch/powerpc/mm/fault.c | 49 | ||||
-rw-r--r-- | arch/powerpc/mm/highmem.c | 14 | ||||
-rw-r--r-- | arch/powerpc/mm/hugetlbpage.c | 242 | ||||
-rw-r--r-- | arch/powerpc/mm/init-common.c | 26 | ||||
-rw-r--r-- | arch/powerpc/mm/init_32.c | 8 | ||||
-rw-r--r-- | arch/powerpc/mm/init_64.c | 2 | ||||
-rw-r--r-- | arch/powerpc/mm/kasan/Makefile | 5 | ||||
-rw-r--r-- | arch/powerpc/mm/kasan/kasan_init_32.c | 183 | ||||
-rw-r--r-- | arch/powerpc/mm/mem.c | 17 | ||||
-rw-r--r-- | arch/powerpc/mm/mmu_context.c | 2 | ||||
-rw-r--r-- | arch/powerpc/mm/mmu_decl.h | 9 | ||||
-rw-r--r-- | arch/powerpc/mm/nohash/40x.c (renamed from arch/powerpc/mm/40x_mmu.c) | 2 | ||||
-rw-r--r-- | arch/powerpc/mm/nohash/44x.c (renamed from arch/powerpc/mm/44x_mmu.c) | 2 | ||||
-rw-r--r-- | arch/powerpc/mm/nohash/8xx.c (renamed from arch/powerpc/mm/8xx_mmu.c) | 26 | ||||
-rw-r--r-- | arch/powerpc/mm/nohash/Makefile | 18 | ||||
-rw-r--r-- | arch/powerpc/mm/nohash/book3e_hugetlbpage.c (renamed from arch/powerpc/mm/hugetlbpage-book3e.c) | 52 | ||||
-rw-r--r-- | arch/powerpc/mm/nohash/book3e_pgtable.c (renamed from arch/powerpc/mm/pgtable-book3e.c) | 9 | ||||
-rw-r--r-- | arch/powerpc/mm/nohash/fsl_booke.c (renamed from arch/powerpc/mm/fsl_booke_mmu.c) | 2 | ||||
-rw-r--r-- | arch/powerpc/mm/nohash/mmu_context.c (renamed from arch/powerpc/mm/mmu_context_nohash.c) | 2 | ||||
-rw-r--r-- | arch/powerpc/mm/nohash/tlb.c (renamed from arch/powerpc/mm/tlb_nohash.c) | 19 | ||||
-rw-r--r-- | arch/powerpc/mm/nohash/tlb_low.S (renamed from arch/powerpc/mm/tlb_nohash_low.S) | 0 | ||||
-rw-r--r-- | arch/powerpc/mm/nohash/tlb_low_64e.S (renamed from arch/powerpc/mm/tlb_low_64e.S) | 31 | ||||
-rw-r--r-- | arch/powerpc/mm/numa.c | 35 | ||||
-rw-r--r-- | arch/powerpc/mm/pgtable.c | 114 | ||||
-rw-r--r-- | arch/powerpc/mm/pgtable_32.c | 47 | ||||
-rw-r--r-- | arch/powerpc/mm/pgtable_64.c | 13 | ||||
-rw-r--r-- | arch/powerpc/mm/ptdump/hashpagetable.c | 2 | ||||
-rw-r--r-- | arch/powerpc/mm/ptdump/ptdump.c | 86 | ||||
-rw-r--r-- | arch/powerpc/mm/slice.c | 109 |
58 files changed, 1104 insertions, 655 deletions
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index 3c1bd9fa23cd..0f499db315d6 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -5,53 +5,18 @@ ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) -CFLAGS_REMOVE_slb.o = $(CC_FLAGS_FTRACE) - obj-y := fault.o mem.o pgtable.o mmap.o \ init_$(BITS).o pgtable_$(BITS).o \ + pgtable-frag.o \ init-common.o mmu_context.o drmem.o -obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o tlb_nohash.o \ - tlb_nohash_low.o -obj-$(CONFIG_PPC_BOOK3E) += tlb_low_$(BITS)e.o -hash64-$(CONFIG_PPC_NATIVE) := hash_native_64.o -obj-$(CONFIG_PPC_BOOK3E_64) += pgtable-book3e.o -obj-$(CONFIG_PPC_BOOK3S_64) += pgtable-hash64.o hash_utils_64.o slb.o \ - $(hash64-y) mmu_context_book3s64.o \ - pgtable-book3s64.o pgtable-frag.o -obj-$(CONFIG_PPC32) += pgtable-frag.o -obj-$(CONFIG_PPC_RADIX_MMU) += pgtable-radix.o tlb-radix.o -obj-$(CONFIG_PPC_BOOK3S_32) += ppc_mmu_32.o hash_low_32.o mmu_context_hash32.o -obj-$(CONFIG_PPC_BOOK3S) += tlb_hash$(BITS).o -ifdef CONFIG_PPC_BOOK3S_64 -obj-$(CONFIG_PPC_4K_PAGES) += hash64_4k.o -obj-$(CONFIG_PPC_64K_PAGES) += hash64_64k.o -endif -obj-$(CONFIG_40x) += 40x_mmu.o -obj-$(CONFIG_44x) += 44x_mmu.o -obj-$(CONFIG_PPC_8xx) += 8xx_mmu.o -obj-$(CONFIG_PPC_FSL_BOOK3E) += fsl_booke_mmu.o +obj-$(CONFIG_PPC_MMU_NOHASH) += nohash/ +obj-$(CONFIG_PPC_BOOK3S_32) += book3s32/ +obj-$(CONFIG_PPC_BOOK3S_64) += book3s64/ obj-$(CONFIG_NEED_MULTIPLE_NODES) += numa.o -obj-$(CONFIG_PPC_SPLPAR) += vphn.o obj-$(CONFIG_PPC_MM_SLICES) += slice.o -obj-y += hugetlbpage.o -ifdef CONFIG_HUGETLB_PAGE -obj-$(CONFIG_PPC_BOOK3S_64) += hugetlbpage-hash64.o -obj-$(CONFIG_PPC_RADIX_MMU) += hugetlbpage-radix.o -obj-$(CONFIG_PPC_BOOK3E_MMU) += hugetlbpage-book3e.o -endif -obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += hugepage-hash64.o -obj-$(CONFIG_PPC_SUBPAGE_PROT) += subpage-prot.o +obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o obj-$(CONFIG_HIGHMEM) += highmem.o obj-$(CONFIG_PPC_COPRO_BASE) += copro_fault.o -obj-$(CONFIG_SPAPR_TCE_IOMMU) += mmu_context_iommu.o obj-$(CONFIG_PPC_PTDUMP) += ptdump/ -obj-$(CONFIG_PPC_MEM_KEYS) += pkeys.o - -# Disable kcov instrumentation on sensitive code -# This is necessary for booting with kcov enabled on book3e machines -KCOV_INSTRUMENT_tlb_nohash.o := n -KCOV_INSTRUMENT_fsl_booke_mmu.o := n - -# Instrumenting the SLB fault path can lead to duplicate SLB entries -KCOV_INSTRUMENT_slb.o := n +obj-$(CONFIG_KASAN) += kasan/ diff --git a/arch/powerpc/mm/book3s32/Makefile b/arch/powerpc/mm/book3s32/Makefile new file mode 100644 index 000000000000..1732eaa740a9 --- /dev/null +++ b/arch/powerpc/mm/book3s32/Makefile @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: GPL-2.0 + +KASAN_SANITIZE_mmu.o := n + +ifdef CONFIG_KASAN +CFLAGS_mmu.o += -DDISABLE_BRANCH_PROFILING +endif + +obj-y += mmu.o hash_low.o mmu_context.o tlb.o diff --git a/arch/powerpc/mm/hash_low_32.S b/arch/powerpc/mm/book3s32/hash_low.S index a6c491f18a04..e27792d0b744 100644 --- a/arch/powerpc/mm/hash_low_32.S +++ b/arch/powerpc/mm/book3s32/hash_low.S @@ -309,13 +309,13 @@ Hash_msk = (((1 << Hash_bits) - 1) * 64) _GLOBAL(create_hpte) /* Convert linux-style PTE (r5) to low word of PPC-style PTE (r8) */ - rlwinm r8,r5,32-10,31,31 /* _PAGE_RW -> PP lsb */ - rlwinm r0,r5,32-7,31,31 /* _PAGE_DIRTY -> PP lsb */ + rlwinm r8,r5,32-9,30,30 /* _PAGE_RW -> PP msb */ + rlwinm r0,r5,32-6,30,30 /* _PAGE_DIRTY -> PP msb */ and r8,r8,r0 /* writable if _RW & _DIRTY */ rlwimi r5,r5,32-1,30,30 /* _PAGE_USER -> PP msb */ rlwimi r5,r5,32-2,31,31 /* _PAGE_USER -> PP lsb */ ori r8,r8,0xe04 /* clear out reserved bits */ - andc r8,r5,r8 /* PP = user? (rw&dirty? 2: 3): 0 */ + andc r8,r5,r8 /* PP = user? (rw&dirty? 1: 3): 0 */ BEGIN_FTR_SECTION rlwinm r8,r8,0,~_PAGE_COHERENT /* clear M (coherence not required) */ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/book3s32/mmu.c index 5d9c3ff728c9..fc073cb2c517 100644 --- a/arch/powerpc/mm/ppc_mmu_32.c +++ b/arch/powerpc/mm/book3s32/mmu.c @@ -34,11 +34,12 @@ #include <asm/code-patching.h> #include <asm/sections.h> -#include "mmu_decl.h" +#include <mm/mmu_decl.h> -struct hash_pte *Hash, *Hash_end; -unsigned long Hash_size, Hash_mask; +struct hash_pte *Hash; +static unsigned long Hash_size, Hash_mask; unsigned long _SDR1; +static unsigned int hash_mb, hash_mb2; struct ppc_bat BATS[8][2]; /* 8 pairs of IBAT, DBAT */ @@ -318,7 +319,6 @@ void hash_preload(struct mm_struct *mm, unsigned long ea, */ void __init MMU_init_hw(void) { - unsigned int hmask, mb, mb2; unsigned int n_hpteg, lg_n_hpteg; if (!mmu_has_feature(MMU_FTR_HPTE_TABLE)) @@ -355,26 +355,34 @@ void __init MMU_init_hw(void) __func__, Hash_size, Hash_size); _SDR1 = __pa(Hash) | SDR1_LOW_BITS; - Hash_end = (struct hash_pte *) ((unsigned long)Hash + Hash_size); + pr_info("Total memory = %lldMB; using %ldkB for hash table\n", + (unsigned long long)(total_memory >> 20), Hash_size >> 10); - printk("Total memory = %lldMB; using %ldkB for hash table (at %p)\n", - (unsigned long long)(total_memory >> 20), Hash_size >> 10, Hash); + Hash_mask = n_hpteg - 1; + hash_mb2 = hash_mb = 32 - LG_HPTEG_SIZE - lg_n_hpteg; + if (lg_n_hpteg > 16) + hash_mb2 = 16 - LG_HPTEG_SIZE; +} + +void __init MMU_init_hw_patch(void) +{ + unsigned int hmask = Hash_mask >> (16 - LG_HPTEG_SIZE); + + if (ppc_md.progress) + ppc_md.progress("hash:patch", 0x345); + if (ppc_md.progress) + ppc_md.progress("hash:done", 0x205); + + /* WARNING: Make sure nothing can trigger a KASAN check past this point */ /* * Patch up the instructions in hashtable.S:create_hpte */ - if ( ppc_md.progress ) ppc_md.progress("hash:patch", 0x345); - Hash_mask = n_hpteg - 1; - hmask = Hash_mask >> (16 - LG_HPTEG_SIZE); - mb2 = mb = 32 - LG_HPTEG_SIZE - lg_n_hpteg; - if (lg_n_hpteg > 16) - mb2 = 16 - LG_HPTEG_SIZE; - modify_instruction_site(&patch__hash_page_A0, 0xffff, ((unsigned int)Hash - PAGE_OFFSET) >> 16); - modify_instruction_site(&patch__hash_page_A1, 0x7c0, mb << 6); - modify_instruction_site(&patch__hash_page_A2, 0x7c0, mb2 << 6); + modify_instruction_site(&patch__hash_page_A1, 0x7c0, hash_mb << 6); + modify_instruction_site(&patch__hash_page_A2, 0x7c0, hash_mb2 << 6); modify_instruction_site(&patch__hash_page_B, 0xffff, hmask); modify_instruction_site(&patch__hash_page_C, 0xffff, hmask); @@ -383,11 +391,9 @@ void __init MMU_init_hw(void) */ modify_instruction_site(&patch__flush_hash_A0, 0xffff, ((unsigned int)Hash - PAGE_OFFSET) >> 16); - modify_instruction_site(&patch__flush_hash_A1, 0x7c0, mb << 6); - modify_instruction_site(&patch__flush_hash_A2, 0x7c0, mb2 << 6); + modify_instruction_site(&patch__flush_hash_A1, 0x7c0, hash_mb << 6); + modify_instruction_site(&patch__flush_hash_A2, 0x7c0, hash_mb2 << 6); modify_instruction_site(&patch__flush_hash_B, 0xffff, hmask); - - if ( ppc_md.progress ) ppc_md.progress("hash:done", 0x205); } void setup_initial_memory_limit(phys_addr_t first_memblock_base, @@ -404,3 +410,33 @@ void setup_initial_memory_limit(phys_addr_t first_memblock_base, else /* Anything else has 256M mapped */ memblock_set_current_limit(min_t(u64, first_memblock_size, 0x10000000)); } + +void __init print_system_hash_info(void) +{ + pr_info("Hash_size = 0x%lx\n", Hash_size); + if (Hash_mask) + pr_info("Hash_mask = 0x%lx\n", Hash_mask); +} + +#ifdef CONFIG_PPC_KUEP +void __init setup_kuep(bool disabled) +{ + pr_info("Activating Kernel Userspace Execution Prevention\n"); + + if (cpu_has_feature(CPU_FTR_601)) + pr_warn("KUEP is not working on powerpc 601 (No NX bit in Seg Regs)\n"); + + if (disabled) + pr_warn("KUEP cannot be disabled yet on 6xx when compiled in\n"); +} +#endif + +#ifdef CONFIG_PPC_KUAP +void __init setup_kuap(bool disabled) +{ + pr_info("Activating Kernel Userspace Access Protection\n"); + + if (disabled) + pr_warn("KUAP cannot be disabled yet on 6xx when compiled in\n"); +} +#endif diff --git a/arch/powerpc/mm/mmu_context_hash32.c b/arch/powerpc/mm/book3s32/mmu_context.c index 921c1e33e941..921c1e33e941 100644 --- a/arch/powerpc/mm/mmu_context_hash32.c +++ b/arch/powerpc/mm/book3s32/mmu_context.c diff --git a/arch/powerpc/mm/tlb_hash32.c b/arch/powerpc/mm/book3s32/tlb.c index cf8472cf3d59..8d56f0417f87 100644 --- a/arch/powerpc/mm/tlb_hash32.c +++ b/arch/powerpc/mm/book3s32/tlb.c @@ -32,7 +32,7 @@ #include <asm/tlbflush.h> #include <asm/tlb.h> -#include "mmu_decl.h" +#include <mm/mmu_decl.h> /* * Called when unmapping pages to flush entries from the TLB/hash table. diff --git a/arch/powerpc/mm/book3s64/Makefile b/arch/powerpc/mm/book3s64/Makefile new file mode 100644 index 000000000000..974b4fc19f4f --- /dev/null +++ b/arch/powerpc/mm/book3s64/Makefile @@ -0,0 +1,24 @@ +# SPDX-License-Identifier: GPL-2.0 + +ccflags-y := $(NO_MINIMAL_TOC) + +CFLAGS_REMOVE_slb.o = $(CC_FLAGS_FTRACE) + +obj-y += hash_pgtable.o hash_utils.o slb.o \ + mmu_context.o pgtable.o hash_tlb.o +obj-$(CONFIG_PPC_NATIVE) += hash_native.o +obj-$(CONFIG_PPC_RADIX_MMU) += radix_pgtable.o radix_tlb.o +obj-$(CONFIG_PPC_4K_PAGES) += hash_4k.o +obj-$(CONFIG_PPC_64K_PAGES) += hash_64k.o +obj-$(CONFIG_PPC_SPLPAR) += vphn.o +obj-$(CONFIG_HUGETLB_PAGE) += hash_hugetlbpage.o +ifdef CONFIG_HUGETLB_PAGE +obj-$(CONFIG_PPC_RADIX_MMU) += radix_hugetlbpage.o +endif +obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += hash_hugepage.o +obj-$(CONFIG_PPC_SUBPAGE_PROT) += subpage_prot.o +obj-$(CONFIG_SPAPR_TCE_IOMMU) += iommu_api.o +obj-$(CONFIG_PPC_MEM_KEYS) += pkeys.o + +# Instrumenting the SLB fault path can lead to duplicate SLB entries +KCOV_INSTRUMENT_slb.o := n diff --git a/arch/powerpc/mm/hash64_4k.c b/arch/powerpc/mm/book3s64/hash_4k.c index 6fa6765a10eb..22e787123cdf 100644 --- a/arch/powerpc/mm/hash64_4k.c +++ b/arch/powerpc/mm/book3s64/hash_4k.c @@ -1,6 +1,6 @@ /* * Copyright IBM Corporation, 2015 - * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> + * Author Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2 of the GNU Lesser General Public License diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/book3s64/hash_64k.c index 3afa253d7f52..7084ce2951e6 100644 --- a/arch/powerpc/mm/hash64_64k.c +++ b/arch/powerpc/mm/book3s64/hash_64k.c @@ -1,6 +1,6 @@ /* * Copyright IBM Corporation, 2015 - * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> + * Author Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2 of the GNU Lesser General Public License diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/book3s64/hash_hugepage.c index dfbc3b32f09b..440823797de7 100644 --- a/arch/powerpc/mm/hugepage-hash64.c +++ b/arch/powerpc/mm/book3s64/hash_hugepage.c @@ -1,6 +1,6 @@ /* * Copyright IBM Corporation, 2013 - * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> + * Author Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2.1 of the GNU Lesser General Public License diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/book3s64/hash_hugetlbpage.c index b0d9209d9a86..eefa89c6117b 100644 --- a/arch/powerpc/mm/hugetlbpage-hash64.c +++ b/arch/powerpc/mm/book3s64/hash_hugetlbpage.c @@ -15,6 +15,9 @@ #include <asm/cacheflush.h> #include <asm/machdep.h> +unsigned int hpage_shift; +EXPORT_SYMBOL(hpage_shift); + extern long hpte_insert_repeating(unsigned long hash, unsigned long vpn, unsigned long pa, unsigned long rlags, unsigned long vflags, int psize, int ssize); @@ -34,7 +37,8 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, /* Search the Linux page table for a match with va */ vpn = hpt_vpn(ea, vsid, ssize); - /* At this point, we have a pte (old_pte) which can be used to build + /* + * At this point, we have a pte (old_pte) which can be used to build * or update an HPTE. There are 2 cases: * * 1. There is a valid (present) pte with no associated HPTE (this is @@ -55,8 +59,10 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, if (unlikely(!check_pte_access(access, old_pte))) return 1; - /* Try to lock the PTE, add ACCESSED and DIRTY if it was - * a write access */ + /* + * Try to lock the PTE, add ACCESSED and DIRTY if it was + * a write access + */ new_pte = old_pte | H_PAGE_BUSY | _PAGE_ACCESSED; if (access & _PAGE_WRITE) new_pte |= _PAGE_DIRTY; @@ -74,8 +80,10 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, rpte = __real_pte(__pte(old_pte), ptep, offset); if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) - /* No CPU has hugepages but lacks no execute, so we - * don't need to worry about that case */ + /* + * No CPU has hugepages but lacks no execute, so we + * don't need to worry about that case + */ rflags = hash_page_do_lazy_icache(rflags, __pte(old_pte), trap); /* Check if pte already has an hpte (case 2) */ @@ -145,3 +153,16 @@ void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr old_pte, pte); set_huge_pte_at(vma->vm_mm, addr, ptep, pte); } + +void hugetlbpage_init_default(void) +{ + /* Set default large page size. Currently, we pick 16M or 1M + * depending on what is available + */ + if (mmu_psize_defs[MMU_PAGE_16M].shift) + hpage_shift = mmu_psize_defs[MMU_PAGE_16M].shift; + else if (mmu_psize_defs[MMU_PAGE_1M].shift) + hpage_shift = mmu_psize_defs[MMU_PAGE_1M].shift; + else if (mmu_psize_defs[MMU_PAGE_2M].shift) + hpage_shift = mmu_psize_defs[MMU_PAGE_2M].shift; +} diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/book3s64/hash_native.c index aaa28fd918fe..aaa28fd918fe 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/book3s64/hash_native.c diff --git a/arch/powerpc/mm/pgtable-hash64.c b/arch/powerpc/mm/book3s64/hash_pgtable.c index c08d49046a96..1fd025dba4a3 100644 --- a/arch/powerpc/mm/pgtable-hash64.c +++ b/arch/powerpc/mm/book3s64/hash_pgtable.c @@ -19,7 +19,7 @@ #include <asm/mmu.h> #include <asm/tlb.h> -#include "mmu_decl.h" +#include <mm/mmu_decl.h> #define CREATE_TRACE_POINTS #include <trace/events/thp.h> @@ -112,9 +112,16 @@ int __meminit hash__vmemmap_create_mapping(unsigned long start, unsigned long page_size, unsigned long phys) { - int rc = htab_bolt_mapping(start, start + page_size, phys, - pgprot_val(PAGE_KERNEL), - mmu_vmemmap_psize, mmu_kernel_ssize); + int rc; + + if ((start + page_size) >= H_VMEMMAP_END) { + pr_warn("Outside the supported range\n"); + return -1; + } + + rc = htab_bolt_mapping(start, start + page_size, phys, + pgprot_val(PAGE_KERNEL), + mmu_vmemmap_psize, mmu_kernel_ssize); if (rc < 0) { int rc2 = htab_remove_mapping(start, start + page_size, mmu_vmemmap_psize, diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/book3s64/hash_tlb.c index 87d71dd25441..d4f0101447b1 100644 --- a/arch/powerpc/mm/tlb_hash64.c +++ b/arch/powerpc/mm/book3s64/hash_tlb.c @@ -55,7 +55,8 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr, i = batch->index; - /* Get page size (maybe move back to caller). + /* + * Get page size (maybe move back to caller). * * NOTE: when using special 64K mappings in 4K environment like * for SPEs, we obtain the page size from the slice, which thus @@ -77,10 +78,12 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr, #endif } else { psize = pte_pagesize_index(mm, addr, pte); - /* Mask the address for the standard page size. If we + /* + * Mask the address for the standard page size. If we * have a 64k page kernel, but the hardware does not * support 64k pages, this might be different from the - * hardware page size encoded in the slice table. */ + * hardware page size encoded in the slice table. + */ addr &= PAGE_MASK; offset = PTRS_PER_PTE; } @@ -161,7 +164,8 @@ void hash__tlb_flush(struct mmu_gather *tlb) { struct ppc64_tlb_batch *tlbbatch = &get_cpu_var(ppc64_tlb_batch); - /* If there's a TLB batch pending, then we must flush it because the + /* + * If there's a TLB batch pending, then we must flush it because the * pages are going to be freed and we really don't want to have a CPU * access a freed page because it has a stale TLB */ @@ -201,7 +205,8 @@ void __flush_hash_table_range(struct mm_struct *mm, unsigned long start, BUG_ON(!mm->pgd); - /* Note: Normally, we should only ever use a batch within a + /* + * Note: Normally, we should only ever use a batch within a * PTE locked section. This violates the rule, but will work * since we don't actually modify the PTEs, we just flush the * hash while leaving the PTEs intact (including their reference @@ -238,7 +243,8 @@ void flush_tlb_pmd_range(struct mm_struct *mm, pmd_t *pmd, unsigned long addr) unsigned long flags; addr = _ALIGN_DOWN(addr, PMD_SIZE); - /* Note: Normally, we should only ever use a batch within a + /* + * Note: Normally, we should only ever use a batch within a * PTE locked section. This violates the rule, but will work * since we don't actually modify the PTEs, we just flush the * hash while leaving the PTEs intact (including their reference diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/book3s64/hash_utils.c index 0a4f939a8161..919a861a8ec0 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -37,6 +37,7 @@ #include <linux/context_tracking.h> #include <linux/libfdt.h> #include <linux/pkeys.h> +#include <linux/hugetlb.h> #include <asm/debugfs.h> #include <asm/processor.h> @@ -65,6 +66,8 @@ #include <asm/pte-walk.h> #include <asm/asm-prototypes.h> +#include <mm/mmu_decl.h> + #ifdef DEBUG #define DBG(fmt...) udbg_printf(fmt) #else @@ -128,7 +131,8 @@ static DEFINE_SPINLOCK(linear_map_hash_lock); struct mmu_hash_ops mmu_hash_ops; EXPORT_SYMBOL(mmu_hash_ops); -/* There are definitions of page sizes arrays to be used when none +/* + * These are definitions of page sizes arrays to be used when none * is provided by the firmware. */ @@ -145,7 +149,8 @@ static struct mmu_psize_def mmu_psize_defaults[] = { }, }; -/* POWER4, GPUL, POWER5 +/* + * POWER4, GPUL, POWER5 * * Support for 16Mb large pages */ @@ -479,7 +484,8 @@ static int __init htab_dt_scan_page_sizes(unsigned long node, } #ifdef CONFIG_HUGETLB_PAGE -/* Scan for 16G memory blocks that have been set aside for huge pages +/* + * Scan for 16G memory blocks that have been set aside for huge pages * and reserve those blocks for 16G huge pages. */ static int __init htab_dt_scan_hugepage_blocks(unsigned long node, @@ -496,8 +502,10 @@ static int __init htab_dt_scan_hugepage_blocks(unsigned long node, if (type == NULL || strcmp(type, "memory") != 0) return 0; - /* This property is the log base 2 of the number of virtual pages that - * will represent this memory block. */ + /* + * This property is the log base 2 of the number of virtual pages that + * will represent this memory block. + */ page_count_prop = of_get_flat_dt_prop(node, "ibm,expected#pages", NULL); if (page_count_prop == NULL) return 0; @@ -673,7 +681,8 @@ static void __init htab_init_page_sizes(void) #endif /* CONFIG_PPC_64K_PAGES */ #ifdef CONFIG_SPARSEMEM_VMEMMAP - /* We try to use 16M pages for vmemmap if that is supported + /* + * We try to use 16M pages for vmemmap if that is supported * and we have at least 1G of RAM at boot */ if (mmu_psize_defs[MMU_PAGE_16M].shift && @@ -742,7 +751,8 @@ unsigned htab_shift_for_mem_size(unsigned long mem_size) static unsigned long __init htab_get_table_size(void) { - /* If hash size isn't already provided by the platform, we try to + /* + * If hash size isn't already provided by the platform, we try to * retrieve it from the device-tree. If it's not there neither, we * calculate it now based on the total RAM size */ @@ -755,12 +765,12 @@ static unsigned long __init htab_get_table_size(void) } #ifdef CONFIG_MEMORY_HOTPLUG -void resize_hpt_for_hotplug(unsigned long new_mem_size) +int resize_hpt_for_hotplug(unsigned long new_mem_size) { unsigned target_hpt_shift; if (!mmu_hash_ops.resize_hpt) - return; + return 0; target_hpt_shift = htab_shift_for_mem_size(new_mem_size); @@ -772,23 +782,25 @@ void resize_hpt_for_hotplug(unsigned long new_mem_size) * reduce unless the target shift is at least 2 below the * current shift */ - if ((target_hpt_shift > ppc64_pft_size) - || (target_hpt_shift < (ppc64_pft_size - 1))) { - int rc; - - rc = mmu_hash_ops.resize_hpt(target_hpt_shift); - if (rc && (rc != -ENODEV)) - printk(KERN_WARNING - "Unable to resize hash page table to target order %d: %d\n", - target_hpt_shift, rc); - } + if (target_hpt_shift > ppc64_pft_size || + target_hpt_shift < ppc64_pft_size - 1) + return mmu_hash_ops.resize_hpt(target_hpt_shift); + + return 0; } int hash__create_section_mapping(unsigned long start, unsigned long end, int nid) { - int rc = htab_bolt_mapping(start, end, __pa(start), - pgprot_val(PAGE_KERNEL), mmu_linear_psize, - mmu_kernel_ssize); + int rc; + + if (end >= H_VMALLOC_START) { + pr_warn("Outside the supported range\n"); + return -1; + } + + rc = htab_bolt_mapping(start, end, __pa(start), + pgprot_val(PAGE_KERNEL), mmu_linear_psize, + mmu_kernel_ssize); if (rc < 0) { int rc2 = htab_remove_mapping(start, end, mmu_linear_psize, @@ -929,6 +941,11 @@ static void __init htab_initialize(void) DBG("creating mapping for region: %lx..%lx (prot: %lx)\n", base, size, prot); + if ((base + size) >= H_VMALLOC_START) { + pr_warn("Outside the supported range\n"); + continue; + } + BUG_ON(htab_bolt_mapping(base, base + size, __pa(base), prot, mmu_linear_psize, mmu_kernel_ssize)); } @@ -968,6 +985,7 @@ void __init hash__early_init_devtree(void) htab_scan_page_sizes(); } +struct hash_mm_context init_hash_mm_context; void __init hash__early_init_mmu(void) { #ifndef CONFIG_PPC_64K_PAGES @@ -1013,11 +1031,11 @@ void __init hash__early_init_mmu(void) __pgd_val_bits = HASH_PGD_VAL_BITS; __kernel_virt_start = H_KERN_VIRT_START; - __kernel_virt_size = H_KERN_VIRT_SIZE; __vmalloc_start = H_VMALLOC_START; __vmalloc_end = H_VMALLOC_END; __kernel_io_start = H_KERN_IO_START; - vmemmap = (struct page *)H_VMEMMAP_BASE; + __kernel_io_end = H_KERN_IO_END; + vmemmap = (struct page *)H_VMEMMAP_START; ioremap_bot = IOREMAP_BASE; #ifdef CONFIG_PCI @@ -1035,12 +1053,16 @@ void __init hash__early_init_mmu(void) if (!mmu_hash_ops.hpte_insert) panic("hash__early_init_mmu: No MMU hash ops defined!\n"); - /* Initialize the MMU Hash table and create the linear mapping + /* + * Initialize the MMU Hash table and create the linear mapping * of memory. Has to be done before SLB initialization as this is * currently where the page size encoding is obtained. */ htab_initialize(); + init_mm.context.hash_context = &init_hash_mm_context; + mm_ctx_set_slb_addr_limit(&init_mm.context, SLB_ADDR_LIMIT_DEFAULT); + pr_info("Initializing hash mmu with SLB\n"); /* Initialize SLB management */ slb_initialize(); @@ -1147,10 +1169,13 @@ void demote_segment_4k(struct mm_struct *mm, unsigned long addr) */ static int subpage_protection(struct mm_struct *mm, unsigned long ea) { - struct subpage_prot_table *spt = &mm->context.spt; + struct subpage_prot_table *spt = mm_ctx_subpage_prot(&mm->context); u32 spp = 0; u32 **sbpm, *sbpp; + if (!spt) + return 0; + if (ea >= spt->maxaddr) return 0; if (ea < 0x100000000UL) { @@ -1214,7 +1239,8 @@ static void check_paca_psize(unsigned long ea, struct mm_struct *mm, } } -/* Result code is: +/* + * Result code is: * 0 - handled * 1 - normal page fault * -1 - critical hash insertion error @@ -1238,7 +1264,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, trace_hash_fault(ea, access, trap); /* Get region & vsid */ - switch (REGION_ID(ea)) { + switch (get_region_id(ea)) { case USER_REGION_ID: user_region = 1; if (! mm) { @@ -1252,15 +1278,19 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, break; case VMALLOC_REGION_ID: vsid = get_kernel_vsid(ea, mmu_kernel_ssize); - if (ea < VMALLOC_END) - psize = mmu_vmalloc_psize; - else - psize = mmu_io_psize; + psize = mmu_vmalloc_psize; + ssize = mmu_kernel_ssize; + break; + + case IO_REGION_ID: + vsid = get_kernel_vsid(ea, mmu_kernel_ssize); + psize = mmu_io_psize; ssize = mmu_kernel_ssize; break; default: - /* Not a valid range - * Send the problem up to do_page_fault + /* + * Not a valid range + * Send the problem up to do_page_fault() */ rc = 1; goto bail; @@ -1285,7 +1315,8 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, flags |= HPTE_LOCAL_UPDATE; #ifndef CONFIG_PPC_64K_PAGES - /* If we use 4K pages and our psize is not 4K, then we might + /* + * If we use 4K pages and our psize is not 4K, then we might * be hitting a special driver mapping, and need to align the * address before we fetch the PTE. * @@ -1307,7 +1338,8 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, /* Add _PAGE_PRESENT to the required access perm */ access |= _PAGE_PRESENT; - /* Pre-check access permissions (will be re-checked atomically + /* + * Pre-check access permissions (will be re-checked atomically * in __hash_page_XX but this pre-check is a fast path */ if (!check_pte_access(access, pte_val(*ptep))) { @@ -1354,7 +1386,8 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, psize = MMU_PAGE_4K; } - /* If this PTE is non-cacheable and we have restrictions on + /* + * If this PTE is non-cacheable and we have restrictions on * using non cacheable large pages, then we switch to 4k */ if (mmu_ci_restrictions && psize == MMU_PAGE_64K && pte_ci(*ptep)) { @@ -1395,7 +1428,8 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, flags, ssize, spp); } - /* Dump some info in case of hash insertion failure, they should + /* + * Dump some info in case of hash insertion failure, they should * never happen so it is really useful to know if/when they do */ if (rc == -1) @@ -1421,7 +1455,8 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap, unsigned long flags = 0; struct mm_struct *mm = current->mm; - if (REGION_ID(ea) == VMALLOC_REGION_ID) + if ((get_region_id(ea) == VMALLOC_REGION_ID) || + (get_region_id(ea) == IO_REGION_ID)) mm = &init_mm; if (dsisr & DSISR_NOHPTE) @@ -1437,8 +1472,9 @@ int __hash_page(unsigned long ea, unsigned long msr, unsigned long trap, unsigned long access = _PAGE_PRESENT | _PAGE_READ; unsigned long flags = 0; struct mm_struct *mm = current->mm; + unsigned int region_id = get_region_id(ea); - if (REGION_ID(ea) == VMALLOC_REGION_ID) + if ((region_id == VMALLOC_REGION_ID) || (region_id == IO_REGION_ID)) mm = &init_mm; if (dsisr & DSISR_NOHPTE) @@ -1455,7 +1491,7 @@ int __hash_page(unsigned long ea, unsigned long msr, unsigned long trap, * 2) user space access kernel space. */ access |= _PAGE_PRIVILEGED; - if ((msr & MSR_PR) || (REGION_ID(ea) == USER_REGION_ID)) + if ((msr & MSR_PR) || (region_id == USER_REGION_ID)) access &= ~_PAGE_PRIVILEGED; if (trap == 0x400) @@ -1470,7 +1506,7 @@ static bool should_hash_preload(struct mm_struct *mm, unsigned long ea) int psize = get_slice_psize(mm, ea); /* We only prefault standard pages for now */ - if (unlikely(psize != mm->context.user_psize)) + if (unlikely(psize != mm_ctx_user_psize(&mm->context))) return false; /* @@ -1499,7 +1535,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea, int rc, ssize, update_flags = 0; unsigned long access = _PAGE_PRESENT | _PAGE_READ | (is_exec ? _PAGE_EXEC : 0); - BUG_ON(REGION_ID(ea) != USER_REGION_ID); + BUG_ON(get_region_id(ea) != USER_REGION_ID); if (!should_hash_preload(mm, ea)) return; @@ -1549,7 +1585,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea, /* Hash it in */ #ifdef CONFIG_PPC_64K_PAGES - if (mm->context.user_psize == MMU_PAGE_64K) + if (mm_ctx_user_psize(&mm->context) == MMU_PAGE_64K) rc = __hash_page_64K(ea, access, vsid, ptep, trap, update_flags, ssize); else @@ -1562,8 +1598,8 @@ void hash_preload(struct mm_struct *mm, unsigned long ea, */ if (rc == -1) hash_failure_debug(ea, access, vsid, trap, ssize, - mm->context.user_psize, - mm->context.user_psize, + mm_ctx_user_psize(&mm->context), + mm_ctx_user_psize(&mm->context), pte_val(*ptep)); out_exit: local_irq_restore(flags); @@ -1634,7 +1670,8 @@ unsigned long pte_get_hash_gslot(unsigned long vpn, unsigned long shift, return gslot; } -/* WARNING: This is called from hash_low_64.S, if you change this prototype, +/* + * WARNING: This is called from hash_low_64.S, if you change this prototype, * do not forget to update the assembly call site ! */ void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize, @@ -1855,7 +1892,8 @@ void __kernel_map_pages(struct page *page, int numpages, int enable) void hash__setup_initial_memory_limit(phys_addr_t first_memblock_base, phys_addr_t first_memblock_size) { - /* We don't currently support the first MEMBLOCK not mapping 0 + /* + * We don't currently support the first MEMBLOCK not mapping 0 * physical on those processors */ BUG_ON(first_memblock_base != 0); @@ -1909,3 +1947,14 @@ static int __init hash64_debugfs(void) } machine_device_initcall(pseries, hash64_debugfs); #endif /* CONFIG_DEBUG_FS */ + +void __init print_system_hash_info(void) +{ + pr_info("ppc64_pft_size = 0x%llx\n", ppc64_pft_size); + + if (htab_hash_mask) + pr_info("htab_hash_mask = 0x%lx\n", htab_hash_mask); + pr_info("kernel vmalloc start = 0x%lx\n", KERN_VIRT_START); + pr_info("kernel IO start = 0x%lx\n", KERN_IO_START); + pr_info("kernel vmemmap start = 0x%lx\n", (unsigned long)vmemmap); +} diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/book3s64/iommu_api.c index 8330f135294f..8330f135294f 100644 --- a/arch/powerpc/mm/mmu_context_iommu.c +++ b/arch/powerpc/mm/book3s64/iommu_api.c diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/book3s64/mmu_context.c index f720c5cc0b5e..cb2b08635508 100644 --- a/arch/powerpc/mm/mmu_context_book3s64.c +++ b/arch/powerpc/mm/book3s64/mmu_context.c @@ -63,6 +63,13 @@ static int hash__init_new_context(struct mm_struct *mm) if (index < 0) return index; + mm->context.hash_context = kmalloc(sizeof(struct hash_mm_context), + GFP_KERNEL); + if (!mm->context.hash_context) { + ida_free(&mmu_context_ida, index); + return -ENOMEM; + } + /* * The old code would re-promote on fork, we don't do that when using * slices as it could cause problem promoting slices that have been @@ -77,10 +84,26 @@ static int hash__init_new_context(struct mm_struct *mm) * We should not be calling init_new_context() on init_mm. Hence a * check against 0 is OK. */ - if (mm->context.id == 0) + if (mm->context.id == 0) { + memset(mm->context.hash_context, 0, sizeof(struct hash_mm_context)); slice_init_new_context_exec(mm); + } else { + /* This is fork. Copy hash_context details from current->mm */ + memcpy(mm->context.hash_context, current->mm->context.hash_context, sizeof(struct hash_mm_context)); +#ifdef CONFIG_PPC_SUBPAGE_PROT + /* inherit subpage prot detalis if we have one. */ + if (current->mm->context.hash_context->spt) { + mm->context.hash_context->spt = kmalloc(sizeof(struct subpage_prot_table), + GFP_KERNEL); + if (!mm->context.hash_context->spt) { + ida_free(&mmu_context_ida, index); + kfree(mm->context.hash_context); + return -ENOMEM; + } + } +#endif - subpage_prot_init_new_context(mm); + } pkey_mm_init(mm); return index; @@ -118,6 +141,7 @@ static int radix__init_new_context(struct mm_struct *mm) asm volatile("ptesync;isync" : : : "memory"); mm->context.npu_context = NULL; + mm->context.hash_context = NULL; return index; } @@ -162,6 +186,7 @@ static void destroy_contexts(mm_context_t *ctx) if (context_id) ida_free(&mmu_context_ida, context_id); } + kfree(ctx->hash_context); } static void pmd_frag_destroy(void *pmd_frag) diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/book3s64/pgtable.c index a4341aba0af4..16bda049187a 100644 --- a/arch/powerpc/mm/pgtable-book3s64.c +++ b/arch/powerpc/mm/book3s64/pgtable.c @@ -17,7 +17,7 @@ #include <asm/trace.h> #include <asm/powernv.h> -#include "mmu_decl.h" +#include <mm/mmu_decl.h> #include <trace/events/thp.h> unsigned long __pmd_frag_nr; diff --git a/arch/powerpc/mm/pkeys.c b/arch/powerpc/mm/book3s64/pkeys.c index 587807763737..ae7fca40e5b3 100644 --- a/arch/powerpc/mm/pkeys.c +++ b/arch/powerpc/mm/book3s64/pkeys.c @@ -7,6 +7,7 @@ #include <asm/mman.h> #include <asm/mmu_context.h> +#include <asm/mmu.h> #include <asm/setup.h> #include <linux/pkeys.h> #include <linux/of_device.h> diff --git a/arch/powerpc/mm/hugetlbpage-radix.c b/arch/powerpc/mm/book3s64/radix_hugetlbpage.c index cab06331c0c0..cab06331c0c0 100644 --- a/arch/powerpc/mm/hugetlbpage-radix.c +++ b/arch/powerpc/mm/book3s64/radix_hugetlbpage.c diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index 154472a28c77..c9bcf428dd2b 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -29,6 +29,7 @@ #include <asm/powernv.h> #include <asm/sections.h> #include <asm/trace.h> +#include <asm/uaccess.h> #include <trace/events/thp.h> @@ -135,6 +136,10 @@ static int __map_kernel_page(unsigned long ea, unsigned long pa, */ BUILD_BUG_ON(TASK_SIZE_USER64 > RADIX_PGTABLE_RANGE); +#ifdef CONFIG_PPC_64K_PAGES + BUILD_BUG_ON(RADIX_KERN_MAP_SIZE != (1UL << MAX_EA_BITS_PER_CONTEXT)); +#endif + if (unlikely(!slab_is_available())) return early_map_kernel_page(ea, pa, flags, map_page_size, nid, region_start, region_end); @@ -334,6 +339,12 @@ void __init radix_init_pgtable(void) * page tables will be allocated within the range. No * need or a node (which we don't have yet). */ + + if ((reg->base + reg->size) >= RADIX_VMALLOC_START) { + pr_warn("Outside the supported range\n"); + continue; + } + WARN_ON(create_physical_mapping(reg->base, reg->base + reg->size, -1)); @@ -531,8 +542,15 @@ static void radix_init_amor(void) mtspr(SPRN_AMOR, (3ul << 62)); } -static void radix_init_iamr(void) +#ifdef CONFIG_PPC_KUEP +void setup_kuep(bool disabled) { + if (disabled || !early_radix_enabled()) + return; + + if (smp_processor_id() == boot_cpuid) + pr_info("Activating Kernel Userspace Execution Prevention\n"); + /* * Radix always uses key0 of the IAMR to determine if an access is * allowed. We set bit 0 (IBM bit 1) of key0, to prevent instruction @@ -540,6 +558,25 @@ static void radix_init_iamr(void) */ mtspr(SPRN_IAMR, (1ul << 62)); } +#endif + +#ifdef CONFIG_PPC_KUAP +void setup_kuap(bool disabled) +{ + if (disabled || !early_radix_enabled()) + return; + + if (smp_processor_id() == boot_cpuid) { + pr_info("Activating Kernel Userspace Access Prevention\n"); + cur_cpu_spec->mmu_features |= MMU_FTR_RADIX_KUAP; + } + + /* Make sure userspace can't change the AMR */ + mtspr(SPRN_UAMOR, 0); + mtspr(SPRN_AMR, AMR_KUAP_BLOCKED); + isync(); +} +#endif void __init radix__early_init_mmu(void) { @@ -574,11 +611,11 @@ void __init radix__early_init_mmu(void) __pgd_val_bits = RADIX_PGD_VAL_BITS; __kernel_virt_start = RADIX_KERN_VIRT_START; - __kernel_virt_size = RADIX_KERN_VIRT_SIZE; __vmalloc_start = RADIX_VMALLOC_START; __vmalloc_end = RADIX_VMALLOC_END; __kernel_io_start = RADIX_KERN_IO_START; - vmemmap = (struct page *)RADIX_VMEMMAP_BASE; + __kernel_io_end = RADIX_KERN_IO_END; + vmemmap = (struct page *)RADIX_VMEMMAP_START; ioremap_bot = IOREMAP_BASE; #ifdef CONFIG_PCI @@ -601,7 +638,6 @@ void __init radix__early_init_mmu(void) memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE); - radix_init_iamr(); radix_init_pgtable(); /* Switch to the guard PID before turning on MMU */ radix__switch_mmu_context(NULL, &init_mm); @@ -623,7 +659,6 @@ void radix__early_init_mmu_secondary(void) __pa(partition_tb) | (PATB_SIZE_SHIFT - 12)); radix_init_amor(); } - radix_init_iamr(); radix__switch_mmu_context(NULL, &init_mm); if (cpu_has_feature(CPU_FTR_HVMODE)) @@ -646,7 +681,8 @@ void radix__mmu_cleanup_all(void) void radix__setup_initial_memory_limit(phys_addr_t first_memblock_base, phys_addr_t first_memblock_size) { - /* We don't currently support the first MEMBLOCK not mapping 0 + /* + * We don't currently support the first MEMBLOCK not mapping 0 * physical on those processors */ BUG_ON(first_memblock_base != 0); @@ -866,6 +902,11 @@ static void __meminit remove_pagetable(unsigned long start, unsigned long end) int __meminit radix__create_section_mapping(unsigned long start, unsigned long end, int nid) { + if (end >= RADIX_VMALLOC_START) { + pr_warn("Outside the supported range\n"); + return -1; + } + return create_physical_mapping(start, end, nid); } @@ -893,6 +934,11 @@ int __meminit radix__vmemmap_create_mapping(unsigned long start, int nid = early_pfn_to_nid(phys >> PAGE_SHIFT); int ret; + if ((start + page_size) >= RADIX_VMEMMAP_END) { + pr_warn("Outside the supported range\n"); + return -1; + } + ret = __map_kernel_page_nid(start, phys, __pgprot(flags), page_size, nid); BUG_ON(ret); @@ -958,45 +1004,44 @@ pmd_t radix__pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long addre void radix__pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, pgtable_t pgtable) { - struct list_head *lh = (struct list_head *) pgtable; + struct list_head *lh = (struct list_head *) pgtable; - assert_spin_locked(pmd_lockptr(mm, pmdp)); + assert_spin_locked(pmd_lockptr(mm, pmdp)); - /* FIFO */ - if (!pmd_huge_pte(mm, pmdp)) - INIT_LIST_HEAD(lh); - else - list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp)); - pmd_huge_pte(mm, pmdp) = pgtable; + /* FIFO */ + if (!pmd_huge_pte(mm, pmdp)) + INIT_LIST_HEAD(lh); + else + list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp)); + pmd_huge_pte(mm, pmdp) = pgtable; } pgtable_t radix__pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) { - pte_t *ptep; - pgtable_t pgtable; - struct list_head *lh; - - assert_spin_locked(pmd_lockptr(mm, pmdp)); - - /* FIFO */ - pgtable = pmd_huge_pte(mm, pmdp); - lh = (struct list_head *) pgtable; - if (list_empty(lh)) - pmd_huge_pte(mm, pmdp) = NULL; - else { - pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next; - list_del(lh); - } - ptep = (pte_t *) pgtable; - *ptep = __pte(0); - ptep++; - *ptep = __pte(0); - return pgtable; -} + pte_t *ptep; + pgtable_t pgtable; + struct list_head *lh; + assert_spin_locked(pmd_lockptr(mm, pmdp)); + + /* FIFO */ + pgtable = pmd_huge_pte(mm, pmdp); + lh = (struct list_head *) pgtable; + if (list_empty(lh)) + pmd_huge_pte(mm, pmdp) = NULL; + else { + pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next; + list_del(lh); + } + ptep = (pte_t *) pgtable; + *ptep = __pte(0); + ptep++; + *ptep = __pte(0); + return pgtable; +} pmd_t radix__pmdp_huge_get_and_clear(struct mm_struct *mm, - unsigned long addr, pmd_t *pmdp) + unsigned long addr, pmd_t *pmdp) { pmd_t old_pmd; unsigned long old; diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/book3s64/radix_tlb.c index 6a23b9ebd2a1..6a23b9ebd2a1 100644 --- a/arch/powerpc/mm/tlb-radix.c +++ b/arch/powerpc/mm/book3s64/radix_tlb.c diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/book3s64/slb.c index 5986df48359b..c22742218bd3 100644 --- a/arch/powerpc/mm/slb.c +++ b/arch/powerpc/mm/book3s64/slb.c @@ -554,7 +554,8 @@ void slb_initialize(void) asm volatile("isync; slbia; isync":::"memory"); create_shadowed_slbe(PAGE_OFFSET, mmu_kernel_ssize, lflags, LINEAR_INDEX); - /* For the boot cpu, we're running on the stack in init_thread_union, + /* + * For the boot cpu, we're running on the stack in init_thread_union, * which is in the first segment of the linear mapping, and also * get_paca()->kstack hasn't been initialized yet. * For secondary cpus, we need to bolt the kernel stack entry now. @@ -691,10 +692,10 @@ static long slb_allocate_kernel(unsigned long ea, unsigned long id) unsigned long flags; int ssize; - if (id == KERNEL_REGION_ID) { + if (id == LINEAR_MAP_REGION_ID) { /* We only support upto MAX_PHYSMEM_BITS */ - if ((ea & ~REGION_MASK) > (1UL << MAX_PHYSMEM_BITS)) + if ((ea & EA_MASK) > (1UL << MAX_PHYSMEM_BITS)) return -EFAULT; flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_linear_psize].sllp; @@ -702,20 +703,25 @@ static long slb_allocate_kernel(unsigned long ea, unsigned long id) #ifdef CONFIG_SPARSEMEM_VMEMMAP } else if (id == VMEMMAP_REGION_ID) { - if ((ea & ~REGION_MASK) >= (1ULL << MAX_EA_BITS_PER_CONTEXT)) + if (ea >= H_VMEMMAP_END) return -EFAULT; flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_vmemmap_psize].sllp; #endif } else if (id == VMALLOC_REGION_ID) { - if ((ea & ~REGION_MASK) >= (1ULL << MAX_EA_BITS_PER_CONTEXT)) + if (ea >= H_VMALLOC_END) return -EFAULT; - if (ea < H_VMALLOC_END) - flags = local_paca->vmalloc_sllp; - else - flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_io_psize].sllp; + flags = local_paca->vmalloc_sllp; + + } else if (id == IO_REGION_ID) { + + if (ea >= H_KERN_IO_END) + return -EFAULT; + + flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_io_psize].sllp; + } else { return -EFAULT; } @@ -725,6 +731,7 @@ static long slb_allocate_kernel(unsigned long ea, unsigned long id) ssize = MMU_SEGSIZE_256M; context = get_kernel_context(ea); + return slb_insert_entry(ea, context, flags, ssize, true); } @@ -739,7 +746,7 @@ static long slb_allocate_user(struct mm_struct *mm, unsigned long ea) * consider this as bad access if we take a SLB miss * on an address above addr limit. */ - if (ea >= mm->context.slb_addr_limit) + if (ea >= mm_ctx_slb_addr_limit(&mm->context)) return -EFAULT; context = get_user_context(&mm->context, ea); @@ -761,7 +768,7 @@ static long slb_allocate_user(struct mm_struct *mm, unsigned long ea) long do_slb_fault(struct pt_regs *regs, unsigned long ea) { - unsigned long id = REGION_ID(ea); + unsigned long id = get_region_id(ea); /* IRQs are not reconciled here, so can't check irqs_disabled */ VM_WARN_ON(mfmsr() & MSR_EE); @@ -784,7 +791,7 @@ long do_slb_fault(struct pt_regs *regs, unsigned long ea) * first class kernel code. But for performance it's probably nicer * if they go via fast_exception_return too. */ - if (id >= KERNEL_REGION_ID) { + if (id >= LINEAR_MAP_REGION_ID) { long err; #ifdef CONFIG_DEBUG_VM /* Catch recursive kernel SLB faults. */ diff --git a/arch/powerpc/mm/subpage-prot.c b/arch/powerpc/mm/book3s64/subpage_prot.c index 5e4178790dee..473dd430e306 100644 --- a/arch/powerpc/mm/subpage-prot.c +++ b/arch/powerpc/mm/book3s64/subpage_prot.c @@ -25,10 +25,13 @@ */ void subpage_prot_free(struct mm_struct *mm) { - struct subpage_prot_table *spt = &mm->context.spt; + struct subpage_prot_table *spt = mm_ctx_subpage_prot(&mm->context); unsigned long i, j, addr; u32 **p; + if (!spt) + return; + for (i = 0; i < 4; ++i) { if (spt->low_prot[i]) { free_page((unsigned long)spt->low_prot[i]); @@ -48,13 +51,7 @@ void subpage_prot_free(struct mm_struct *mm) free_page((unsigned long)p); } spt->maxaddr = 0; -} - -void subpage_prot_init_new_context(struct mm_struct *mm) -{ - struct subpage_prot_table *spt = &mm->context.spt; - - memset(spt, 0, sizeof(*spt)); + kfree(spt); } static void hpte_flush_range(struct mm_struct *mm, unsigned long addr, @@ -93,13 +90,18 @@ static void hpte_flush_range(struct mm_struct *mm, unsigned long addr, static void subpage_prot_clear(unsigned long addr, unsigned long len) { struct mm_struct *mm = current->mm; - struct subpage_prot_table *spt = &mm->context.spt; + struct subpage_prot_table *spt; u32 **spm, *spp; unsigned long i; size_t nw; unsigned long next, limit; down_write(&mm->mmap_sem); + + spt = mm_ctx_subpage_prot(&mm->context); + if (!spt) + goto err_out; + limit = addr + len; if (limit > spt->maxaddr) limit = spt->maxaddr; @@ -127,6 +129,8 @@ static void subpage_prot_clear(unsigned long addr, unsigned long len) /* now flush any existing HPTEs for the range */ hpte_flush_range(mm, addr, nw); } + +err_out: up_write(&mm->mmap_sem); } @@ -189,7 +193,7 @@ SYSCALL_DEFINE3(subpage_prot, unsigned long, addr, unsigned long, len, u32 __user *, map) { struct mm_struct *mm = current->mm; - struct subpage_prot_table *spt = &mm->context.spt; + struct subpage_prot_table *spt; u32 **spm, *spp; unsigned long i; size_t nw; @@ -218,6 +222,21 @@ SYSCALL_DEFINE3(subpage_prot, unsigned long, addr, return -EFAULT; down_write(&mm->mmap_sem); + + spt = mm_ctx_subpage_prot(&mm->context); + if (!spt) { + /* + * Allocate subpage prot table if not already done. + * Do this with mmap_sem held + */ + spt = kzalloc(sizeof(struct subpage_prot_table), GFP_KERNEL); + if (!spt) { + err = -ENOMEM; + goto out; + } + mm->context.hash_context->spt = spt; + } + subpage_mark_vma_nohuge(mm, addr, len); for (limit = addr + len; addr < limit; addr = next) { next = pmd_addr_end(addr, limit); diff --git a/arch/powerpc/mm/vphn.c b/arch/powerpc/mm/book3s64/vphn.c index f83044faac23..0ee7734afb50 100644 --- a/arch/powerpc/mm/vphn.c +++ b/arch/powerpc/mm/book3s64/vphn.c @@ -42,7 +42,8 @@ int vphn_unpack_associativity(const long *packed, __be32 *unpacked) u16 new = be16_to_cpup(field++); if (is_32bit) { - /* Let's concatenate the 16 bits of this field to the + /* + * Let's concatenate the 16 bits of this field to the * 15 lower bits of the previous field */ unpacked[++nr_assoc_doms] = @@ -56,7 +57,8 @@ int vphn_unpack_associativity(const long *packed, __be32 *unpacked) unpacked[++nr_assoc_doms] = cpu_to_be32(new & VPHN_FIELD_MASK); } else { - /* Data is in the lower 15 bits of this field + /* + * Data is in the lower 15 bits of this field * concatenated with the next 16 bit field */ last = new; diff --git a/arch/powerpc/mm/vphn.h b/arch/powerpc/mm/book3s64/vphn.h index f9ffdb3942fc..f0b93c2dd578 100644 --- a/arch/powerpc/mm/vphn.h +++ b/arch/powerpc/mm/book3s64/vphn.h @@ -2,8 +2,7 @@ #ifndef _ARCH_POWERPC_MM_VPHN_H_ #define _ARCH_POWERPC_MM_VPHN_H_ -/* The H_HOME_NODE_ASSOCIATIVITY h_call returns 6 64-bit registers. - */ +/* The H_HOME_NODE_ASSOCIATIVITY h_call returns 6 64-bit registers. */ #define VPHN_REGISTER_COUNT 6 /* diff --git a/arch/powerpc/mm/copro_fault.c b/arch/powerpc/mm/copro_fault.c index c8da352e8686..f137286740cb 100644 --- a/arch/powerpc/mm/copro_fault.c +++ b/arch/powerpc/mm/copro_fault.c @@ -105,7 +105,7 @@ int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb) u64 vsid, vsidkey; int psize, ssize; - switch (REGION_ID(ea)) { + switch (get_region_id(ea)) { case USER_REGION_ID: pr_devel("%s: 0x%llx -- USER_REGION_ID\n", __func__, ea); if (mm == NULL) @@ -117,16 +117,20 @@ int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb) break; case VMALLOC_REGION_ID: pr_devel("%s: 0x%llx -- VMALLOC_REGION_ID\n", __func__, ea); - if (ea < VMALLOC_END) - psize = mmu_vmalloc_psize; - else - psize = mmu_io_psize; + psize = mmu_vmalloc_psize; ssize = mmu_kernel_ssize; vsid = get_kernel_vsid(ea, mmu_kernel_ssize); vsidkey = SLB_VSID_KERNEL; break; - case KERNEL_REGION_ID: - pr_devel("%s: 0x%llx -- KERNEL_REGION_ID\n", __func__, ea); + case IO_REGION_ID: + pr_devel("%s: 0x%llx -- IO_REGION_ID\n", __func__, ea); + psize = mmu_io_psize; + ssize = mmu_kernel_ssize; + vsid = get_kernel_vsid(ea, mmu_kernel_ssize); + vsidkey = SLB_VSID_KERNEL; + break; + case LINEAR_MAP_REGION_ID: + pr_devel("%s: 0x%llx -- LINEAR_MAP_REGION_ID\n", __func__, ea); psize = mmu_linear_psize; ssize = mmu_kernel_ssize; vsid = get_kernel_vsid(ea, mmu_kernel_ssize); diff --git a/arch/powerpc/mm/dma-noncoherent.c b/arch/powerpc/mm/dma-noncoherent.c index b5d2658c26af..2f6154b76328 100644 --- a/arch/powerpc/mm/dma-noncoherent.c +++ b/arch/powerpc/mm/dma-noncoherent.c @@ -36,7 +36,7 @@ #include <asm/tlbflush.h> #include <asm/dma.h> -#include "mmu_decl.h" +#include <mm/mmu_decl.h> /* * This address range defaults to a value that is safe for all diff --git a/arch/powerpc/mm/drmem.c b/arch/powerpc/mm/drmem.c index 3f1803672c9b..641891df2046 100644 --- a/arch/powerpc/mm/drmem.c +++ b/arch/powerpc/mm/drmem.c @@ -366,8 +366,10 @@ static void __init init_drmem_v1_lmbs(const __be32 *prop) if (!drmem_info->lmbs) return; - for_each_drmem_lmb(lmb) + for_each_drmem_lmb(lmb) { read_drconf_v1_cell(lmb, &prop); + lmb_set_nid(lmb); + } } static void __init init_drmem_v2_lmbs(const __be32 *prop) @@ -412,6 +414,8 @@ static void __init init_drmem_v2_lmbs(const __be32 *prop) lmb->aa_index = dr_cell.aa_index; lmb->flags = dr_cell.flags; + + lmb_set_nid(lmb); } } } diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 887f11bcf330..b5d3578d9f65 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -44,6 +44,7 @@ #include <asm/mmu_context.h> #include <asm/siginfo.h> #include <asm/debug.h> +#include <asm/kup.h> static inline bool notify_page_fault(struct pt_regs *regs) { @@ -223,19 +224,46 @@ static int mm_fault_error(struct pt_regs *regs, unsigned long addr, } /* Is this a bad kernel fault ? */ -static bool bad_kernel_fault(bool is_exec, unsigned long error_code, - unsigned long address) +static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code, + unsigned long address, bool is_write) { + int is_exec = TRAP(regs) == 0x400; + /* NX faults set DSISR_PROTFAULT on the 8xx, DSISR_NOEXEC_OR_G on others */ if (is_exec && (error_code & (DSISR_NOEXEC_OR_G | DSISR_KEYFAULT | DSISR_PROTFAULT))) { - printk_ratelimited(KERN_CRIT "kernel tried to execute" - " exec-protected page (%lx) -" - "exploit attempt? (uid: %d)\n", - address, from_kuid(&init_user_ns, - current_uid())); + pr_crit_ratelimited("kernel tried to execute %s page (%lx) - exploit attempt? (uid: %d)\n", + address >= TASK_SIZE ? "exec-protected" : "user", + address, + from_kuid(&init_user_ns, current_uid())); + + // Kernel exec fault is always bad + return true; } - return is_exec || (address >= TASK_SIZE); + + if (!is_exec && address < TASK_SIZE && (error_code & DSISR_PROTFAULT) && + !search_exception_tables(regs->nip)) { + pr_crit_ratelimited("Kernel attempted to access user page (%lx) - exploit attempt? (uid: %d)\n", + address, + from_kuid(&init_user_ns, current_uid())); + } + + // Kernel fault on kernel address is bad + if (address >= TASK_SIZE) + return true; + + // Fault on user outside of certain regions (eg. copy_tofrom_user()) is bad + if (!search_exception_tables(regs->nip)) + return true; + + // Read/write fault in a valid region (the exception table search passed + // above), but blocked by KUAP is bad, it can never succeed. + if (bad_kuap_fault(regs, is_write)) + return true; + + // What's left? Kernel fault on user in well defined regions (extable + // matched), and allowed by KUAP in the faulting context. + return false; } static bool bad_stack_expansion(struct pt_regs *regs, unsigned long address, @@ -455,9 +483,10 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address, /* * The kernel should never take an execute fault nor should it - * take a page fault to a kernel address. + * take a page fault to a kernel address or a page fault to a user + * address outside of dedicated places */ - if (unlikely(!is_user && bad_kernel_fault(is_exec, error_code, address))) + if (unlikely(!is_user && bad_kernel_fault(regs, error_code, address, is_write))) return SIGSEGV; /* diff --git a/arch/powerpc/mm/highmem.c b/arch/powerpc/mm/highmem.c index 82a0e37557a5..320c1672b2ae 100644 --- a/arch/powerpc/mm/highmem.c +++ b/arch/powerpc/mm/highmem.c @@ -43,9 +43,7 @@ void *kmap_atomic_prot(struct page *page, pgprot_t prot) type = kmap_atomic_idx_push(); idx = type + KM_TYPE_NR*smp_processor_id(); vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); -#ifdef CONFIG_DEBUG_HIGHMEM - BUG_ON(!pte_none(*(kmap_pte-idx))); -#endif + WARN_ON(IS_ENABLED(CONFIG_DEBUG_HIGHMEM) && !pte_none(*(kmap_pte - idx))); __set_pte_at(&init_mm, vaddr, kmap_pte-idx, mk_pte(page, prot), 1); local_flush_tlb_page(NULL, vaddr); @@ -56,7 +54,6 @@ EXPORT_SYMBOL(kmap_atomic_prot); void __kunmap_atomic(void *kvaddr) { unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; - int type __maybe_unused; if (vaddr < __fix_to_virt(FIX_KMAP_END)) { pagefault_enable(); @@ -64,14 +61,12 @@ void __kunmap_atomic(void *kvaddr) return; } - type = kmap_atomic_idx(); - -#ifdef CONFIG_DEBUG_HIGHMEM - { + if (IS_ENABLED(CONFIG_DEBUG_HIGHMEM)) { + int type = kmap_atomic_idx(); unsigned int idx; idx = type + KM_TYPE_NR * smp_processor_id(); - BUG_ON(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx)); + WARN_ON(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx)); /* * force other mappings to Oops if they'll try to access @@ -80,7 +75,6 @@ void __kunmap_atomic(void *kvaddr) pte_clear(&init_mm, vaddr, kmap_pte-idx); local_flush_tlb_page(NULL, vaddr); } -#endif kmap_atomic_idx_pop(); pagefault_enable(); diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 9e732bb2c84a..c5c9ff2d7afc 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -26,20 +26,8 @@ #include <asm/hugetlb.h> #include <asm/pte-walk.h> - -#ifdef CONFIG_HUGETLB_PAGE - -#define PAGE_SHIFT_64K 16 -#define PAGE_SHIFT_512K 19 -#define PAGE_SHIFT_8M 23 -#define PAGE_SHIFT_16M 24 -#define PAGE_SHIFT_16G 34 - bool hugetlb_disabled = false; -unsigned int HPAGE_SHIFT; -EXPORT_SYMBOL(HPAGE_SHIFT); - #define hugepd_none(hpd) (hpd_val(hpd) == 0) #define PTE_T_ORDER (__builtin_ffs(sizeof(pte_t)) - __builtin_ffs(sizeof(void *))) @@ -98,19 +86,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, for (i = 0; i < num_hugepd; i++, hpdp++) { if (unlikely(!hugepd_none(*hpdp))) break; - else { -#ifdef CONFIG_PPC_BOOK3S_64 - *hpdp = __hugepd(__pa(new) | HUGEPD_VAL_BITS | - (shift_to_mmu_psize(pshift) << 2)); -#elif defined(CONFIG_PPC_8xx) - *hpdp = __hugepd(__pa(new) | _PMD_USER | - (pshift == PAGE_SHIFT_8M ? _PMD_PAGE_8M : - _PMD_PAGE_512K) | _PMD_PRESENT); -#else - /* We use the old format for PPC_FSL_BOOK3E */ - *hpdp = __hugepd(((unsigned long)new & ~PD_HUGE) | pshift); -#endif - } + hugepd_populate(hpdp, new, pshift); } /* If we bailed from the for loop early, an error occurred, clean up */ if (i < num_hugepd) { @@ -250,7 +226,7 @@ int __init alloc_bootmem_huge_page(struct hstate *h) return __alloc_bootmem_huge_page(h); } -#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx) +#ifndef CONFIG_PPC_BOOK3S_64 #define HUGEPD_FREELIST_SIZE \ ((PAGE_SIZE - sizeof(struct hugepd_freelist)) / sizeof(pte_t)) @@ -542,23 +518,6 @@ static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end, return (__boundary - 1 < end - 1) ? __boundary : end; } -int gup_huge_pd(hugepd_t hugepd, unsigned long addr, unsigned pdshift, - unsigned long end, int write, struct page **pages, int *nr) -{ - pte_t *ptep; - unsigned long sz = 1UL << hugepd_shift(hugepd); - unsigned long next; - - ptep = hugepte_offset(hugepd, addr, pdshift); - do { - next = hugepte_addr_end(addr, end, sz); - if (!gup_hugepte(ptep, sz, addr, end, write, pages, nr)) - return 0; - } while (ptep++, addr = next, addr != end); - - return 1; -} - #ifdef CONFIG_PPC_MM_SLICES unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, @@ -578,24 +537,15 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, unsigned long vma_mmu_pagesize(struct vm_area_struct *vma) { -#ifdef CONFIG_PPC_MM_SLICES /* With radix we don't use slice, so derive it from vma*/ - if (!radix_enabled()) { + if (IS_ENABLED(CONFIG_PPC_MM_SLICES) && !radix_enabled()) { unsigned int psize = get_slice_psize(vma->vm_mm, vma->vm_start); return 1UL << mmu_psize_to_shift(psize); } -#endif return vma_kernel_pagesize(vma); } -static inline bool is_power_of_4(unsigned long x) -{ - if (is_power_of_2(x)) - return (__ilog2(x) % 2) ? false : true; - return false; -} - static int __init add_huge_page_size(unsigned long long size) { int shift = __ffs(size); @@ -603,37 +553,13 @@ static int __init add_huge_page_size(unsigned long long size) /* Check that it is a page size supported by the hardware and * that it fits within pagetable and slice limits. */ - if (size <= PAGE_SIZE) - return -EINVAL; -#if defined(CONFIG_PPC_FSL_BOOK3E) - if (!is_power_of_4(size)) + if (size <= PAGE_SIZE || !is_power_of_2(size)) return -EINVAL; -#elif !defined(CONFIG_PPC_8xx) - if (!is_power_of_2(size) || (shift > SLICE_HIGH_SHIFT)) - return -EINVAL; -#endif - if ((mmu_psize = shift_to_mmu_psize(shift)) < 0) + mmu_psize = check_and_get_huge_psize(size); + if (mmu_psize < 0) return -EINVAL; -#ifdef CONFIG_PPC_BOOK3S_64 - /* - * We need to make sure that for different page sizes reported by - * firmware we only add hugetlb support for page sizes that can be - * supported by linux page table layout. - * For now we have - * Radix: 2M and 1G - * Hash: 16M and 16G - */ - if (radix_enabled()) { - if (mmu_psize != MMU_PAGE_2M && mmu_psize != MMU_PAGE_1G) - return -EINVAL; - } else { - if (mmu_psize != MMU_PAGE_16M && mmu_psize != MMU_PAGE_16G) - return -EINVAL; - } -#endif - BUG_ON(mmu_psize_defs[mmu_psize].shift != shift); /* Return if huge page size has already been setup */ @@ -669,10 +595,10 @@ static int __init hugetlbpage_init(void) return 0; } -#if !defined(CONFIG_PPC_FSL_BOOK3E) && !defined(CONFIG_PPC_8xx) - if (!radix_enabled() && !mmu_has_feature(MMU_FTR_16M_PAGE)) + if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && !radix_enabled() && + !mmu_has_feature(MMU_FTR_16M_PAGE)) return -ENODEV; -#endif + for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { unsigned shift; unsigned pdshift; @@ -710,29 +636,13 @@ static int __init hugetlbpage_init(void) pgtable_cache_add(PTE_INDEX_SIZE); else if (pdshift > shift) pgtable_cache_add(pdshift - shift); -#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx) - else + else if (IS_ENABLED(CONFIG_PPC_FSL_BOOK3E) || IS_ENABLED(CONFIG_PPC_8xx)) pgtable_cache_add(PTE_T_ORDER); -#endif } -#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx) - /* Default hpage size = 4M on FSL_BOOK3E and 512k on 8xx */ - if (mmu_psize_defs[MMU_PAGE_4M].shift) - HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_4M].shift; - else if (mmu_psize_defs[MMU_PAGE_512K].shift) - HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_512K].shift; -#else - /* Set default large page size. Currently, we pick 16M or 1M - * depending on what is available - */ - if (mmu_psize_defs[MMU_PAGE_16M].shift) - HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_16M].shift; - else if (mmu_psize_defs[MMU_PAGE_1M].shift) - HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_1M].shift; - else if (mmu_psize_defs[MMU_PAGE_2M].shift) - HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_2M].shift; -#endif + if (IS_ENABLED(CONFIG_HUGETLB_PAGE_SIZE_VARIABLE)) + hugetlbpage_init_default(); + return 0; } @@ -756,113 +666,8 @@ void flush_dcache_icache_hugepage(struct page *page) } } -#endif /* CONFIG_HUGETLB_PAGE */ - -/* - * We have 4 cases for pgds and pmds: - * (1) invalid (all zeroes) - * (2) pointer to next table, as normal; bottom 6 bits == 0 - * (3) leaf pte for huge page _PAGE_PTE set - * (4) hugepd pointer, _PAGE_PTE = 0 and bits [2..6] indicate size of table - * - * So long as we atomically load page table pointers we are safe against teardown, - * we can follow the address down to the the page and take a ref on it. - * This function need to be called with interrupts disabled. We use this variant - * when we have MSR[EE] = 0 but the paca->irq_soft_mask = IRQS_ENABLED - */ -pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea, - bool *is_thp, unsigned *hpage_shift) -{ - pgd_t pgd, *pgdp; - pud_t pud, *pudp; - pmd_t pmd, *pmdp; - pte_t *ret_pte; - hugepd_t *hpdp = NULL; - unsigned pdshift = PGDIR_SHIFT; - - if (hpage_shift) - *hpage_shift = 0; - - if (is_thp) - *is_thp = false; - - pgdp = pgdir + pgd_index(ea); - pgd = READ_ONCE(*pgdp); - /* - * Always operate on the local stack value. This make sure the - * value don't get updated by a parallel THP split/collapse, - * page fault or a page unmap. The return pte_t * is still not - * stable. So should be checked there for above conditions. - */ - if (pgd_none(pgd)) - return NULL; - else if (pgd_huge(pgd)) { - ret_pte = (pte_t *) pgdp; - goto out; - } else if (is_hugepd(__hugepd(pgd_val(pgd)))) - hpdp = (hugepd_t *)&pgd; - else { - /* - * Even if we end up with an unmap, the pgtable will not - * be freed, because we do an rcu free and here we are - * irq disabled - */ - pdshift = PUD_SHIFT; - pudp = pud_offset(&pgd, ea); - pud = READ_ONCE(*pudp); - - if (pud_none(pud)) - return NULL; - else if (pud_huge(pud)) { - ret_pte = (pte_t *) pudp; - goto out; - } else if (is_hugepd(__hugepd(pud_val(pud)))) - hpdp = (hugepd_t *)&pud; - else { - pdshift = PMD_SHIFT; - pmdp = pmd_offset(&pud, ea); - pmd = READ_ONCE(*pmdp); - /* - * A hugepage collapse is captured by pmd_none, because - * it mark the pmd none and do a hpte invalidate. - */ - if (pmd_none(pmd)) - return NULL; - - if (pmd_trans_huge(pmd) || pmd_devmap(pmd)) { - if (is_thp) - *is_thp = true; - ret_pte = (pte_t *) pmdp; - goto out; - } - /* - * pmd_large check below will handle the swap pmd pte - * we need to do both the check because they are config - * dependent. - */ - if (pmd_huge(pmd) || pmd_large(pmd)) { - ret_pte = (pte_t *) pmdp; - goto out; - } else if (is_hugepd(__hugepd(pmd_val(pmd)))) - hpdp = (hugepd_t *)&pmd; - else - return pte_offset_kernel(&pmd, ea); - } - } - if (!hpdp) - return NULL; - - ret_pte = hugepte_offset(*hpdp, ea, pdshift); - pdshift = hugepd_shift(*hpdp); -out: - if (hpage_shift) - *hpage_shift = pdshift; - return ret_pte; -} -EXPORT_SYMBOL_GPL(__find_linux_pte); - -int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, - unsigned long end, int write, struct page **pages, int *nr) +static int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, + unsigned long end, int write, struct page **pages, int *nr) { unsigned long pte_end; struct page *head, *page; @@ -908,3 +713,20 @@ int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, return 1; } + +int gup_huge_pd(hugepd_t hugepd, unsigned long addr, unsigned int pdshift, + unsigned long end, int write, struct page **pages, int *nr) +{ + pte_t *ptep; + unsigned long sz = 1UL << hugepd_shift(hugepd); + unsigned long next; + + ptep = hugepte_offset(hugepd, addr, pdshift); + do { + next = hugepte_addr_end(addr, end, sz); + if (!gup_hugepte(ptep, sz, addr, end, write, pages, nr)) + return 0; + } while (ptep++, addr = next, addr != end); + + return 1; +} diff --git a/arch/powerpc/mm/init-common.c b/arch/powerpc/mm/init-common.c index 1e6910eb70ed..3bcae9e5e954 100644 --- a/arch/powerpc/mm/init-common.c +++ b/arch/powerpc/mm/init-common.c @@ -24,6 +24,32 @@ #include <linux/string.h> #include <asm/pgalloc.h> #include <asm/pgtable.h> +#include <asm/kup.h> + +static bool disable_kuep = !IS_ENABLED(CONFIG_PPC_KUEP); +static bool disable_kuap = !IS_ENABLED(CONFIG_PPC_KUAP); + +static int __init parse_nosmep(char *p) +{ + disable_kuep = true; + pr_warn("Disabling Kernel Userspace Execution Prevention\n"); + return 0; +} +early_param("nosmep", parse_nosmep); + +static int __init parse_nosmap(char *p) +{ + disable_kuap = true; + pr_warn("Disabling Kernel Userspace Access Protection\n"); + return 0; +} +early_param("nosmap", parse_nosmap); + +void __ref setup_kup(void) +{ + setup_kuep(disable_kuep); + setup_kuap(disable_kuap); +} #define CTOR(shift) static void ctor_##shift(void *addr) \ { \ diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index 41a3513cadc9..c3121b6c8cbd 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -45,8 +45,10 @@ #include <asm/tlb.h> #include <asm/sections.h> #include <asm/hugetlb.h> +#include <asm/kup.h> +#include <asm/kasan.h> -#include "mmu_decl.h" +#include <mm/mmu_decl.h> #if defined(CONFIG_KERNEL_START_BOOL) || defined(CONFIG_LOWMEM_SIZE_BOOL) /* The amount of lowmem must be within 0xF0000000 - KERNELBASE. */ @@ -178,6 +180,10 @@ void __init MMU_init(void) btext_unmap(); #endif + kasan_mmu_init(); + + setup_kup(); + /* Shortly after that, the entire linear mapping will be available */ memblock_set_current_limit(lowmem_end_addr); } diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index a4c155af1597..45b02fa11cd8 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -66,7 +66,7 @@ #include <asm/iommu.h> #include <asm/vdso.h> -#include "mmu_decl.h" +#include <mm/mmu_decl.h> phys_addr_t memstart_addr = ~0; EXPORT_SYMBOL_GPL(memstart_addr); diff --git a/arch/powerpc/mm/kasan/Makefile b/arch/powerpc/mm/kasan/Makefile new file mode 100644 index 000000000000..6577897673dd --- /dev/null +++ b/arch/powerpc/mm/kasan/Makefile @@ -0,0 +1,5 @@ +# SPDX-License-Identifier: GPL-2.0 + +KASAN_SANITIZE := n + +obj-$(CONFIG_PPC32) += kasan_init_32.o diff --git a/arch/powerpc/mm/kasan/kasan_init_32.c b/arch/powerpc/mm/kasan/kasan_init_32.c new file mode 100644 index 000000000000..0d62be3cba47 --- /dev/null +++ b/arch/powerpc/mm/kasan/kasan_init_32.c @@ -0,0 +1,183 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define DISABLE_BRANCH_PROFILING + +#include <linux/kasan.h> +#include <linux/printk.h> +#include <linux/memblock.h> +#include <linux/sched/task.h> +#include <linux/vmalloc.h> +#include <asm/pgalloc.h> +#include <asm/code-patching.h> +#include <mm/mmu_decl.h> + +static void kasan_populate_pte(pte_t *ptep, pgprot_t prot) +{ + unsigned long va = (unsigned long)kasan_early_shadow_page; + phys_addr_t pa = __pa(kasan_early_shadow_page); + int i; + + for (i = 0; i < PTRS_PER_PTE; i++, ptep++) + __set_pte_at(&init_mm, va, ptep, pfn_pte(PHYS_PFN(pa), prot), 0); +} + +static int kasan_init_shadow_page_tables(unsigned long k_start, unsigned long k_end) +{ + pmd_t *pmd; + unsigned long k_cur, k_next; + + pmd = pmd_offset(pud_offset(pgd_offset_k(k_start), k_start), k_start); + + for (k_cur = k_start; k_cur != k_end; k_cur = k_next, pmd++) { + pte_t *new; + + k_next = pgd_addr_end(k_cur, k_end); + if ((void *)pmd_page_vaddr(*pmd) != kasan_early_shadow_pte) + continue; + + new = pte_alloc_one_kernel(&init_mm); + + if (!new) + return -ENOMEM; + if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE)) + kasan_populate_pte(new, PAGE_READONLY); + else + kasan_populate_pte(new, PAGE_KERNEL_RO); + pmd_populate_kernel(&init_mm, pmd, new); + } + return 0; +} + +static void __ref *kasan_get_one_page(void) +{ + if (slab_is_available()) + return (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO); + + return memblock_alloc(PAGE_SIZE, PAGE_SIZE); +} + +static int __ref kasan_init_region(void *start, size_t size) +{ + unsigned long k_start = (unsigned long)kasan_mem_to_shadow(start); + unsigned long k_end = (unsigned long)kasan_mem_to_shadow(start + size); + unsigned long k_cur; + int ret; + void *block = NULL; + + ret = kasan_init_shadow_page_tables(k_start, k_end); + if (ret) + return ret; + + if (!slab_is_available()) + block = memblock_alloc(k_end - k_start, PAGE_SIZE); + + for (k_cur = k_start; k_cur < k_end; k_cur += PAGE_SIZE) { + pmd_t *pmd = pmd_offset(pud_offset(pgd_offset_k(k_cur), k_cur), k_cur); + void *va = block ? block + k_cur - k_start : kasan_get_one_page(); + pte_t pte = pfn_pte(PHYS_PFN(__pa(va)), PAGE_KERNEL); + + if (!va) + return -ENOMEM; + + __set_pte_at(&init_mm, k_cur, pte_offset_kernel(pmd, k_cur), pte, 0); + } + flush_tlb_kernel_range(k_start, k_end); + return 0; +} + +static void __init kasan_remap_early_shadow_ro(void) +{ + if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE)) + kasan_populate_pte(kasan_early_shadow_pte, PAGE_READONLY); + else + kasan_populate_pte(kasan_early_shadow_pte, PAGE_KERNEL_RO); + + flush_tlb_kernel_range(KASAN_SHADOW_START, KASAN_SHADOW_END); +} + +void __init kasan_mmu_init(void) +{ + int ret; + struct memblock_region *reg; + + if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE)) { + ret = kasan_init_shadow_page_tables(KASAN_SHADOW_START, KASAN_SHADOW_END); + + if (ret) + panic("kasan: kasan_init_shadow_page_tables() failed"); + } + + for_each_memblock(memory, reg) { + phys_addr_t base = reg->base; + phys_addr_t top = min(base + reg->size, total_lowmem); + + if (base >= top) + continue; + + ret = kasan_init_region(__va(base), top - base); + if (ret) + panic("kasan: kasan_init_region() failed"); + } +} + +void __init kasan_init(void) +{ + kasan_remap_early_shadow_ro(); + + clear_page(kasan_early_shadow_page); + + /* At this point kasan is fully initialized. Enable error messages */ + init_task.kasan_depth = 0; + pr_info("KASAN init done\n"); +} + +#ifdef CONFIG_MODULES +void *module_alloc(unsigned long size) +{ + void *base = vmalloc_exec(size); + + if (!base) + return NULL; + + if (!kasan_init_region(base, size)) + return base; + + vfree(base); + + return NULL; +} +#endif + +#ifdef CONFIG_PPC_BOOK3S_32 +u8 __initdata early_hash[256 << 10] __aligned(256 << 10) = {0}; + +static void __init kasan_early_hash_table(void) +{ + modify_instruction_site(&patch__hash_page_A0, 0xffff, __pa(early_hash) >> 16); + modify_instruction_site(&patch__flush_hash_A0, 0xffff, __pa(early_hash) >> 16); + + Hash = (struct hash_pte *)early_hash; +} +#else +static void __init kasan_early_hash_table(void) {} +#endif + +void __init kasan_early_init(void) +{ + unsigned long addr = KASAN_SHADOW_START; + unsigned long end = KASAN_SHADOW_END; + unsigned long next; + pmd_t *pmd = pmd_offset(pud_offset(pgd_offset_k(addr), addr), addr); + + BUILD_BUG_ON(KASAN_SHADOW_START & ~PGDIR_MASK); + + kasan_populate_pte(kasan_early_shadow_pte, PAGE_KERNEL); + + do { + next = pgd_addr_end(addr, end); + pmd_populate_kernel(&init_mm, pmd, kasan_early_shadow_pte); + } while (pmd++, addr = next, addr != end); + + if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE)) + kasan_early_hash_table(); +} diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index f6787f90e158..cd525d709072 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -54,7 +54,7 @@ #include <asm/swiotlb.h> #include <asm/rtas.h> -#include "mmu_decl.h" +#include <mm/mmu_decl.h> #ifndef CPU_FTR_COHERENT_ICACHE #define CPU_FTR_COHERENT_ICACHE 0 /* XXX for now */ @@ -109,8 +109,8 @@ int __weak remove_section_mapping(unsigned long start, unsigned long end) return -ENODEV; } -int __meminit arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, - bool want_memblock) +int __ref arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, + bool want_memblock) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; @@ -131,8 +131,8 @@ int __meminit arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap * } #ifdef CONFIG_MEMORY_HOTREMOVE -int __meminit arch_remove_memory(int nid, u64 start, u64 size, - struct vmem_altmap *altmap) +int __ref arch_remove_memory(int nid, u64 start, u64 size, + struct vmem_altmap *altmap) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; @@ -161,7 +161,8 @@ int __meminit arch_remove_memory(int nid, u64 start, u64 size, */ vm_unmap_aliases(); - resize_hpt_for_hotplug(memblock_phys_mem_size()); + if (resize_hpt_for_hotplug(memblock_phys_mem_size()) == -ENOSPC) + pr_warn("Hash collision while resizing HPT\n"); return ret; } @@ -309,6 +310,10 @@ void __init mem_init(void) mem_init_print_info(NULL); #ifdef CONFIG_PPC32 pr_info("Kernel virtual memory layout:\n"); +#ifdef CONFIG_KASAN + pr_info(" * 0x%08lx..0x%08lx : kasan shadow mem\n", + KASAN_SHADOW_START, KASAN_SHADOW_END); +#endif pr_info(" * 0x%08lx..0x%08lx : fixmap\n", FIXADDR_START, FIXADDR_TOP); #ifdef CONFIG_HIGHMEM pr_info(" * 0x%08lx..0x%08lx : highmem PTEs\n", diff --git a/arch/powerpc/mm/mmu_context.c b/arch/powerpc/mm/mmu_context.c index bb52320b7369..6b049d82b98a 100644 --- a/arch/powerpc/mm/mmu_context.c +++ b/arch/powerpc/mm/mmu_context.c @@ -98,7 +98,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, switch_mmu_context(prev, next, tsk); } -#ifdef CONFIG_PPC32 +#ifndef CONFIG_PPC_BOOK3S_64 void arch_exit_mmap(struct mm_struct *mm) { void *frag = pte_frag_get(&mm->context); diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index 74ff61dabcb1..7bac0aa2026a 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -83,6 +83,8 @@ static inline void _tlbivax_bcast(unsigned long address, unsigned int pid, } #endif +static inline void print_system_hash_info(void) {} + #else /* CONFIG_PPC_MMU_NOHASH */ extern void hash_preload(struct mm_struct *mm, unsigned long ea, @@ -92,6 +94,8 @@ extern void hash_preload(struct mm_struct *mm, unsigned long ea, extern void _tlbie(unsigned long address); extern void _tlbia(void); +void print_system_hash_info(void); + #endif /* CONFIG_PPC_MMU_NOHASH */ #ifdef CONFIG_PPC32 @@ -104,8 +108,8 @@ extern int __map_without_bats; extern unsigned int rtas_data, rtas_size; struct hash_pte; -extern struct hash_pte *Hash, *Hash_end; -extern unsigned long Hash_size, Hash_mask; +extern struct hash_pte *Hash; +extern u8 early_hash[]; #endif /* CONFIG_PPC32 */ @@ -130,6 +134,7 @@ extern void wii_memory_fixups(void); */ #ifdef CONFIG_PPC32 extern void MMU_init_hw(void); +void MMU_init_hw_patch(void); unsigned long mmu_mapin_ram(unsigned long base, unsigned long top); #endif diff --git a/arch/powerpc/mm/40x_mmu.c b/arch/powerpc/mm/nohash/40x.c index b9cf6f8764b0..460459b6f53e 100644 --- a/arch/powerpc/mm/40x_mmu.c +++ b/arch/powerpc/mm/nohash/40x.c @@ -49,7 +49,7 @@ #include <asm/machdep.h> #include <asm/setup.h> -#include "mmu_decl.h" +#include <mm/mmu_decl.h> extern int __map_without_ltlbs; /* diff --git a/arch/powerpc/mm/44x_mmu.c b/arch/powerpc/mm/nohash/44x.c index aad127acdbaa..c07983ebc02e 100644 --- a/arch/powerpc/mm/44x_mmu.c +++ b/arch/powerpc/mm/nohash/44x.c @@ -31,7 +31,7 @@ #include <asm/cacheflush.h> #include <asm/code-patching.h> -#include "mmu_decl.h" +#include <mm/mmu_decl.h> /* Used by the 44x TLB replacement exception handler. * Just needed it declared someplace. diff --git a/arch/powerpc/mm/8xx_mmu.c b/arch/powerpc/mm/nohash/8xx.c index fe1f6443d57f..70d55b615b62 100644 --- a/arch/powerpc/mm/8xx_mmu.c +++ b/arch/powerpc/mm/nohash/8xx.c @@ -17,7 +17,7 @@ #include <asm/fixmap.h> #include <asm/code-patching.h> -#include "mmu_decl.h" +#include <mm/mmu_decl.h> #define IMMR_SIZE (FIX_IMMR_SIZE << PAGE_SHIFT) @@ -213,3 +213,27 @@ void flush_instruction_cache(void) mtspr(SPRN_IC_CST, IDC_INVALL); isync(); } + +#ifdef CONFIG_PPC_KUEP +void __init setup_kuep(bool disabled) +{ + if (disabled) + return; + + pr_info("Activating Kernel Userspace Execution Prevention\n"); + + mtspr(SPRN_MI_AP, MI_APG_KUEP); +} +#endif + +#ifdef CONFIG_PPC_KUAP +void __init setup_kuap(bool disabled) +{ + pr_info("Activating Kernel Userspace Access Protection\n"); + + if (disabled) + pr_warn("KUAP cannot be disabled yet on 8xx when compiled in\n"); + + mtspr(SPRN_MD_AP, MD_APG_KUAP); +} +#endif diff --git a/arch/powerpc/mm/nohash/Makefile b/arch/powerpc/mm/nohash/Makefile new file mode 100644 index 000000000000..33b6f6f29d3f --- /dev/null +++ b/arch/powerpc/mm/nohash/Makefile @@ -0,0 +1,18 @@ +# SPDX-License-Identifier: GPL-2.0 + +ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) + +obj-y += mmu_context.o tlb.o tlb_low.o +obj-$(CONFIG_PPC_BOOK3E_64) += tlb_low_64e.o book3e_pgtable.o +obj-$(CONFIG_40x) += 40x.o +obj-$(CONFIG_44x) += 44x.o +obj-$(CONFIG_PPC_8xx) += 8xx.o +obj-$(CONFIG_PPC_FSL_BOOK3E) += fsl_booke.o +ifdef CONFIG_HUGETLB_PAGE +obj-$(CONFIG_PPC_FSL_BOOK3E) += book3e_hugetlbpage.o +endif + +# Disable kcov instrumentation on sensitive code +# This is necessary for booting with kcov enabled on book3e machines +KCOV_INSTRUMENT_tlb.o := n +KCOV_INSTRUMENT_fsl_booke.o := n diff --git a/arch/powerpc/mm/hugetlbpage-book3e.c b/arch/powerpc/mm/nohash/book3e_hugetlbpage.c index f84ec46cdb26..61915f4d3c7f 100644 --- a/arch/powerpc/mm/hugetlbpage-book3e.c +++ b/arch/powerpc/mm/nohash/book3e_hugetlbpage.c @@ -11,8 +11,9 @@ #include <asm/mmu.h> -#ifdef CONFIG_PPC_FSL_BOOK3E #ifdef CONFIG_PPC64 +#include <asm/paca.h> + static inline int tlb1_next(void) { struct paca_struct *paca = get_paca(); @@ -29,33 +30,6 @@ static inline int tlb1_next(void) tcd->esel_next = next; return this; } -#else -static inline int tlb1_next(void) -{ - int index, ncams; - - ncams = mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY; - - index = this_cpu_read(next_tlbcam_idx); - - /* Just round-robin the entries and wrap when we hit the end */ - if (unlikely(index == ncams - 1)) - __this_cpu_write(next_tlbcam_idx, tlbcam_index); - else - __this_cpu_inc(next_tlbcam_idx); - - return index; -} -#endif /* !PPC64 */ -#endif /* FSL */ - -static inline int mmu_get_tsize(int psize) -{ - return mmu_psize_defs[psize].enc; -} - -#if defined(CONFIG_PPC_FSL_BOOK3E) && defined(CONFIG_PPC64) -#include <asm/paca.h> static inline void book3e_tlb_lock(void) { @@ -98,6 +72,23 @@ static inline void book3e_tlb_unlock(void) paca->tcd_ptr->lock = 0; } #else +static inline int tlb1_next(void) +{ + int index, ncams; + + ncams = mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY; + + index = this_cpu_read(next_tlbcam_idx); + + /* Just round-robin the entries and wrap when we hit the end */ + if (unlikely(index == ncams - 1)) + __this_cpu_write(next_tlbcam_idx, tlbcam_index); + else + __this_cpu_inc(next_tlbcam_idx); + + return index; +} + static inline void book3e_tlb_lock(void) { } @@ -139,10 +130,7 @@ void book3e_hugetlb_preload(struct vm_area_struct *vma, unsigned long ea, unsigned long psize, tsize, shift; unsigned long flags; struct mm_struct *mm; - -#ifdef CONFIG_PPC_FSL_BOOK3E int index; -#endif if (unlikely(is_kernel_addr(ea))) return; @@ -166,11 +154,9 @@ void book3e_hugetlb_preload(struct vm_area_struct *vma, unsigned long ea, return; } -#ifdef CONFIG_PPC_FSL_BOOK3E /* We have to use the CAM(TLB1) on FSL parts for hugepages */ index = tlb1_next(); mtspr(SPRN_MAS0, MAS0_ESEL(index) | MAS0_TLBSEL(1)); -#endif mas1 = MAS1_VALID | MAS1_TID(mm->context.id) | MAS1_TSIZE(tsize); mas2 = ea & ~((1UL << shift) - 1); diff --git a/arch/powerpc/mm/pgtable-book3e.c b/arch/powerpc/mm/nohash/book3e_pgtable.c index 1032ef7aaf62..75e9e2c35fe2 100644 --- a/arch/powerpc/mm/pgtable-book3e.c +++ b/arch/powerpc/mm/nohash/book3e_pgtable.c @@ -15,7 +15,7 @@ #include <asm/tlb.h> #include <asm/dma.h> -#include "mmu_decl.h" +#include <mm/mmu_decl.h> #ifdef CONFIG_SPARSEMEM_VMEMMAP /* @@ -55,7 +55,7 @@ void vmemmap_remove_mapping(unsigned long start, #endif #endif /* CONFIG_SPARSEMEM_VMEMMAP */ -static __ref void *early_alloc_pgtable(unsigned long size) +static void __init *early_alloc_pgtable(unsigned long size) { void *ptr; @@ -74,7 +74,7 @@ static __ref void *early_alloc_pgtable(unsigned long size) * map_kernel_page adds an entry to the ioremap page table * and adds an entry to the HPT, possibly bolting it */ -int map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot) +int __ref map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot) { pgd_t *pgdp; pud_t *pudp; @@ -98,20 +98,17 @@ int map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot) #ifndef __PAGETABLE_PUD_FOLDED if (pgd_none(*pgdp)) { pudp = early_alloc_pgtable(PUD_TABLE_SIZE); - BUG_ON(pudp == NULL); pgd_populate(&init_mm, pgdp, pudp); } #endif /* !__PAGETABLE_PUD_FOLDED */ pudp = pud_offset(pgdp, ea); if (pud_none(*pudp)) { pmdp = early_alloc_pgtable(PMD_TABLE_SIZE); - BUG_ON(pmdp == NULL); pud_populate(&init_mm, pudp, pmdp); } pmdp = pmd_offset(pudp, ea); if (!pmd_present(*pmdp)) { ptep = early_alloc_pgtable(PAGE_SIZE); - BUG_ON(ptep == NULL); pmd_populate_kernel(&init_mm, pmdp, ptep); } ptep = pte_offset_kernel(pmdp, ea); diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/nohash/fsl_booke.c index 210cbc1faf63..71a1a36751dd 100644 --- a/arch/powerpc/mm/fsl_booke_mmu.c +++ b/arch/powerpc/mm/nohash/fsl_booke.c @@ -54,7 +54,7 @@ #include <asm/setup.h> #include <asm/paca.h> -#include "mmu_decl.h" +#include <mm/mmu_decl.h> unsigned int tlbcam_index; diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/nohash/mmu_context.c index 1945c5f19f5e..ae4505d5b4b8 100644 --- a/arch/powerpc/mm/mmu_context_nohash.c +++ b/arch/powerpc/mm/nohash/mmu_context.c @@ -52,7 +52,7 @@ #include <asm/mmu_context.h> #include <asm/tlbflush.h> -#include "mmu_decl.h" +#include <mm/mmu_decl.h> /* * The MPC8xx has only 16 contexts. We rotate through them on each task switch. diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/nohash/tlb.c index ac23dc1c6535..24f88efb05bf 100644 --- a/arch/powerpc/mm/tlb_nohash.c +++ b/arch/powerpc/mm/nohash/tlb.c @@ -46,7 +46,7 @@ #include <asm/hugetlb.h> #include <asm/paca.h> -#include "mmu_decl.h" +#include <mm/mmu_decl.h> /* * This struct lists the sw-supported page sizes. The hardawre MMU may support @@ -433,11 +433,7 @@ void tlb_flush_pgtable(struct mmu_gather *tlb, unsigned long address) unsigned long rid = (address & rmask) | 0x1000000000000000ul; unsigned long vpte = address & ~rmask; -#ifdef CONFIG_PPC_64K_PAGES - vpte = (vpte >> (PAGE_SHIFT - 4)) & ~0xfffful; -#else vpte = (vpte >> (PAGE_SHIFT - 3)) & ~0xffful; -#endif vpte |= rid; __flush_tlb_page(tlb->mm, vpte, tsize, 0); } @@ -625,21 +621,12 @@ static void early_init_this_mmu(void) case PPC_HTW_IBM: mas4 |= MAS4_INDD; -#ifdef CONFIG_PPC_64K_PAGES - mas4 |= BOOK3E_PAGESZ_256M << MAS4_TSIZED_SHIFT; - mmu_pte_psize = MMU_PAGE_256M; -#else mas4 |= BOOK3E_PAGESZ_1M << MAS4_TSIZED_SHIFT; mmu_pte_psize = MMU_PAGE_1M; -#endif break; case PPC_HTW_NONE: -#ifdef CONFIG_PPC_64K_PAGES - mas4 |= BOOK3E_PAGESZ_64K << MAS4_TSIZED_SHIFT; -#else mas4 |= BOOK3E_PAGESZ_4K << MAS4_TSIZED_SHIFT; -#endif mmu_pte_psize = mmu_virtual_psize; break; } @@ -800,5 +787,9 @@ void __init early_init_mmu(void) #ifdef CONFIG_PPC_47x early_init_mmu_47x(); #endif + +#ifdef CONFIG_PPC_MM_SLICES + mm_ctx_set_slb_addr_limit(&init_mm.context, SLB_ADDR_LIMIT_DEFAULT); +#endif } #endif /* CONFIG_PPC64 */ diff --git a/arch/powerpc/mm/tlb_nohash_low.S b/arch/powerpc/mm/nohash/tlb_low.S index e066a658acac..e066a658acac 100644 --- a/arch/powerpc/mm/tlb_nohash_low.S +++ b/arch/powerpc/mm/nohash/tlb_low.S diff --git a/arch/powerpc/mm/tlb_low_64e.S b/arch/powerpc/mm/nohash/tlb_low_64e.S index 9ed90064f542..58959ce15415 100644 --- a/arch/powerpc/mm/tlb_low_64e.S +++ b/arch/powerpc/mm/nohash/tlb_low_64e.S @@ -24,11 +24,7 @@ #include <asm/kvm_booke_hv_asm.h> #include <asm/feature-fixups.h> -#ifdef CONFIG_PPC_64K_PAGES -#define VPTE_PMD_SHIFT (PTE_INDEX_SIZE+1) -#else #define VPTE_PMD_SHIFT (PTE_INDEX_SIZE) -#endif #define VPTE_PUD_SHIFT (VPTE_PMD_SHIFT + PMD_INDEX_SIZE) #define VPTE_PGD_SHIFT (VPTE_PUD_SHIFT + PUD_INDEX_SIZE) #define VPTE_INDEX_SIZE (VPTE_PGD_SHIFT + PGD_INDEX_SIZE) @@ -167,13 +163,11 @@ MMU_FTR_SECTION_ELSE ldx r14,r14,r15 /* grab pgd entry */ ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_USE_TLBRSRV) -#ifndef CONFIG_PPC_64K_PAGES rldicl r15,r16,64-PUD_SHIFT+3,64-PUD_INDEX_SIZE-3 clrrdi r15,r15,3 cmpdi cr0,r14,0 bge tlb_miss_fault_bolted /* Bad pgd entry or hugepage; bail */ ldx r14,r14,r15 /* grab pud entry */ -#endif /* CONFIG_PPC_64K_PAGES */ rldicl r15,r16,64-PMD_SHIFT+3,64-PMD_INDEX_SIZE-3 clrrdi r15,r15,3 @@ -682,18 +676,7 @@ normal_tlb_miss: * order to handle the weird page table format used by linux */ ori r10,r15,0x1 -#ifdef CONFIG_PPC_64K_PAGES - /* For the top bits, 16 bytes per PTE */ - rldicl r14,r16,64-(PAGE_SHIFT-4),PAGE_SHIFT-4+4 - /* Now create the bottom bits as 0 in position 0x8000 and - * the rest calculated for 8 bytes per PTE - */ - rldicl r15,r16,64-(PAGE_SHIFT-3),64-15 - /* Insert the bottom bits in */ - rlwimi r14,r15,0,16,31 -#else rldicl r14,r16,64-(PAGE_SHIFT-3),PAGE_SHIFT-3+4 -#endif sldi r15,r10,60 clrrdi r14,r14,3 or r10,r15,r14 @@ -732,11 +715,7 @@ finish_normal_tlb_miss: /* Check page size, if not standard, update MAS1 */ rldicl r11,r14,64-8,64-8 -#ifdef CONFIG_PPC_64K_PAGES - cmpldi cr0,r11,BOOK3E_PAGESZ_64K -#else cmpldi cr0,r11,BOOK3E_PAGESZ_4K -#endif beq- 1f mfspr r11,SPRN_MAS1 rlwimi r11,r14,31,21,24 @@ -857,14 +836,12 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_TLBRSRV) cmpdi cr0,r15,0 bge virt_page_table_tlb_miss_fault -#ifndef CONFIG_PPC_64K_PAGES /* Get to PUD entry */ rldicl r11,r16,64-VPTE_PUD_SHIFT,64-PUD_INDEX_SIZE-3 clrrdi r10,r11,3 ldx r15,r10,r15 cmpdi cr0,r15,0 bge virt_page_table_tlb_miss_fault -#endif /* CONFIG_PPC_64K_PAGES */ /* Get to PMD entry */ rldicl r11,r16,64-VPTE_PMD_SHIFT,64-PMD_INDEX_SIZE-3 @@ -1106,14 +1083,12 @@ htw_tlb_miss: cmpdi cr0,r15,0 bge htw_tlb_miss_fault -#ifndef CONFIG_PPC_64K_PAGES /* Get to PUD entry */ rldicl r11,r16,64-(PUD_SHIFT-3),64-PUD_INDEX_SIZE-3 clrrdi r10,r11,3 ldx r15,r10,r15 cmpdi cr0,r15,0 bge htw_tlb_miss_fault -#endif /* CONFIG_PPC_64K_PAGES */ /* Get to PMD entry */ rldicl r11,r16,64-(PMD_SHIFT-3),64-PMD_INDEX_SIZE-3 @@ -1132,9 +1107,7 @@ htw_tlb_miss: * 4K page we need to extract a bit from the virtual address and * insert it into the "PA52" bit of the RPN. */ -#ifndef CONFIG_PPC_64K_PAGES rlwimi r15,r16,32-9,20,20 -#endif /* Now we build the MAS: * * MAS 0 : Fully setup with defaults in MAS4 and TLBnCFG @@ -1144,11 +1117,7 @@ htw_tlb_miss: * MAS 2 : Use defaults * MAS 3+7 : Needs to be done */ -#ifdef CONFIG_PPC_64K_PAGES - ori r10,r15,(BOOK3E_PAGESZ_64K << MAS3_SPSIZE_SHIFT) -#else ori r10,r15,(BOOK3E_PAGESZ_4K << MAS3_SPSIZE_SHIFT) -#endif BEGIN_MMU_FTR_SECTION srdi r16,r10,32 diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index f976676004ad..57e64273cb33 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -32,7 +32,6 @@ #include <asm/sparsemem.h> #include <asm/prom.h> #include <asm/smp.h> -#include <asm/cputhreads.h> #include <asm/topology.h> #include <asm/firmware.h> #include <asm/paca.h> @@ -908,16 +907,22 @@ static int __init early_numa(char *p) } early_param("numa", early_numa); -static bool topology_updates_enabled = true; +/* + * The platform can inform us through one of several mechanisms + * (post-migration device tree updates, PRRN or VPHN) that the NUMA + * assignment of a resource has changed. This controls whether we act + * on that. Disabled by default. + */ +static bool topology_updates_enabled; static int __init early_topology_updates(char *p) { if (!p) return 0; - if (!strcmp(p, "off")) { - pr_info("Disabling topology updates\n"); - topology_updates_enabled = false; + if (!strcmp(p, "on")) { + pr_warn("Caution: enabling topology updates\n"); + topology_updates_enabled = true; } return 0; @@ -1063,7 +1068,7 @@ u64 memory_hotplug_max(void) /* Virtual Processor Home Node (VPHN) support */ #ifdef CONFIG_PPC_SPLPAR -#include "vphn.h" +#include "book3s64/vphn.h" struct topology_update_data { struct topology_update_data *next; @@ -1498,6 +1503,9 @@ int start_topology_update(void) { int rc = 0; + if (!topology_updates_enabled) + return 0; + if (firmware_has_feature(FW_FEATURE_PRRN)) { if (!prrn_enabled) { prrn_enabled = 1; @@ -1531,6 +1539,9 @@ int stop_topology_update(void) { int rc = 0; + if (!topology_updates_enabled) + return 0; + if (prrn_enabled) { prrn_enabled = 0; #ifdef CONFIG_SMP @@ -1588,11 +1599,13 @@ static ssize_t topology_write(struct file *file, const char __user *buf, kbuf[read_len] = '\0'; - if (!strncmp(kbuf, "on", 2)) + if (!strncmp(kbuf, "on", 2)) { + topology_updates_enabled = true; start_topology_update(); - else if (!strncmp(kbuf, "off", 3)) + } else if (!strncmp(kbuf, "off", 3)) { stop_topology_update(); - else + topology_updates_enabled = false; + } else return -EINVAL; return count; @@ -1607,9 +1620,7 @@ static const struct file_operations topology_ops = { static int topology_update_init(void) { - /* Do not poll for changes if disabled at boot */ - if (topology_updates_enabled) - start_topology_update(); + start_topology_update(); if (vphn_enabled) topology_schedule_update(); diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index d3d61d29b4f1..db4a6253df92 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -30,6 +30,7 @@ #include <asm/pgalloc.h> #include <asm/tlbflush.h> #include <asm/tlb.h> +#include <asm/hugetlb.h> static inline int is_exec_fault(void) { @@ -299,3 +300,116 @@ unsigned long vmalloc_to_phys(void *va) return __pa(pfn_to_kaddr(pfn)) + offset_in_page(va); } EXPORT_SYMBOL_GPL(vmalloc_to_phys); + +/* + * We have 4 cases for pgds and pmds: + * (1) invalid (all zeroes) + * (2) pointer to next table, as normal; bottom 6 bits == 0 + * (3) leaf pte for huge page _PAGE_PTE set + * (4) hugepd pointer, _PAGE_PTE = 0 and bits [2..6] indicate size of table + * + * So long as we atomically load page table pointers we are safe against teardown, + * we can follow the address down to the the page and take a ref on it. + * This function need to be called with interrupts disabled. We use this variant + * when we have MSR[EE] = 0 but the paca->irq_soft_mask = IRQS_ENABLED + */ +pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea, + bool *is_thp, unsigned *hpage_shift) +{ + pgd_t pgd, *pgdp; + pud_t pud, *pudp; + pmd_t pmd, *pmdp; + pte_t *ret_pte; + hugepd_t *hpdp = NULL; + unsigned pdshift = PGDIR_SHIFT; + + if (hpage_shift) + *hpage_shift = 0; + + if (is_thp) + *is_thp = false; + + pgdp = pgdir + pgd_index(ea); + pgd = READ_ONCE(*pgdp); + /* + * Always operate on the local stack value. This make sure the + * value don't get updated by a parallel THP split/collapse, + * page fault or a page unmap. The return pte_t * is still not + * stable. So should be checked there for above conditions. + */ + if (pgd_none(pgd)) + return NULL; + + if (pgd_huge(pgd)) { + ret_pte = (pte_t *)pgdp; + goto out; + } + if (is_hugepd(__hugepd(pgd_val(pgd)))) { + hpdp = (hugepd_t *)&pgd; + goto out_huge; + } + + /* + * Even if we end up with an unmap, the pgtable will not + * be freed, because we do an rcu free and here we are + * irq disabled + */ + pdshift = PUD_SHIFT; + pudp = pud_offset(&pgd, ea); + pud = READ_ONCE(*pudp); + + if (pud_none(pud)) + return NULL; + + if (pud_huge(pud)) { + ret_pte = (pte_t *)pudp; + goto out; + } + if (is_hugepd(__hugepd(pud_val(pud)))) { + hpdp = (hugepd_t *)&pud; + goto out_huge; + } + pdshift = PMD_SHIFT; + pmdp = pmd_offset(&pud, ea); + pmd = READ_ONCE(*pmdp); + /* + * A hugepage collapse is captured by pmd_none, because + * it mark the pmd none and do a hpte invalidate. + */ + if (pmd_none(pmd)) + return NULL; + + if (pmd_trans_huge(pmd) || pmd_devmap(pmd)) { + if (is_thp) + *is_thp = true; + ret_pte = (pte_t *)pmdp; + goto out; + } + /* + * pmd_large check below will handle the swap pmd pte + * we need to do both the check because they are config + * dependent. + */ + if (pmd_huge(pmd) || pmd_large(pmd)) { + ret_pte = (pte_t *)pmdp; + goto out; + } + if (is_hugepd(__hugepd(pmd_val(pmd)))) { + hpdp = (hugepd_t *)&pmd; + goto out_huge; + } + + return pte_offset_kernel(&pmd, ea); + +out_huge: + if (!hpdp) + return NULL; + + ret_pte = hugepte_offset(*hpdp, ea, pdshift); + pdshift = hugepd_shift(*hpdp); +out: + if (hpage_shift) + *hpage_shift = pdshift; + return ret_pte; +} +EXPORT_SYMBOL_GPL(__find_linux_pte); diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index 6e56a6240bfa..16ada373b32b 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -36,26 +36,13 @@ #include <asm/setup.h> #include <asm/sections.h> -#include "mmu_decl.h" +#include <mm/mmu_decl.h> unsigned long ioremap_bot; EXPORT_SYMBOL(ioremap_bot); /* aka VMALLOC_END */ extern char etext[], _stext[], _sinittext[], _einittext[]; -__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm) -{ - if (!slab_is_available()) - return memblock_alloc(PTE_FRAG_SIZE, PTE_FRAG_SIZE); - - return (pte_t *)pte_fragment_alloc(mm, 1); -} - -pgtable_t pte_alloc_one(struct mm_struct *mm) -{ - return (pgtable_t)pte_fragment_alloc(mm, 0); -} - void __iomem * ioremap(phys_addr_t addr, unsigned long size) { @@ -205,7 +192,29 @@ void iounmap(volatile void __iomem *addr) } EXPORT_SYMBOL(iounmap); -int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot) +static void __init *early_alloc_pgtable(unsigned long size) +{ + void *ptr = memblock_alloc(size, size); + + if (!ptr) + panic("%s: Failed to allocate %lu bytes align=0x%lx\n", + __func__, size, size); + + return ptr; +} + +static pte_t __init *early_pte_alloc_kernel(pmd_t *pmdp, unsigned long va) +{ + if (pmd_none(*pmdp)) { + pte_t *ptep = early_alloc_pgtable(PTE_FRAG_SIZE); + + pmd_populate_kernel(&init_mm, pmdp, ptep); + } + return pte_offset_kernel(pmdp, va); +} + + +int __ref map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot) { pmd_t *pd; pte_t *pg; @@ -214,7 +223,10 @@ int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot) /* Use upper 10 bits of VA to index the first level map */ pd = pmd_offset(pud_offset(pgd_offset_k(va), va), va); /* Use middle 10 bits of VA to index the second-level map */ - pg = pte_alloc_kernel(pd, va); + if (likely(slab_is_available())) + pg = pte_alloc_kernel(pd, va); + else + pg = early_pte_alloc_kernel(pd, va); if (pg != 0) { err = 0; /* The PTE should never be already set nor present in the @@ -384,6 +396,9 @@ void mark_rodata_ro(void) PFN_DOWN((unsigned long)__start_rodata); change_page_attr(page, numpages, PAGE_KERNEL_RO); + + // mark_initmem_nx() should have already run by now + ptdump_check_wx(); } #endif diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index fb1375c07e8c..d2d976ff8a0e 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -52,7 +52,7 @@ #include <asm/firmware.h> #include <asm/dma.h> -#include "mmu_decl.h" +#include <mm/mmu_decl.h> #ifdef CONFIG_PPC_BOOK3S_64 @@ -90,14 +90,13 @@ unsigned long __pgd_val_bits; EXPORT_SYMBOL(__pgd_val_bits); unsigned long __kernel_virt_start; EXPORT_SYMBOL(__kernel_virt_start); -unsigned long __kernel_virt_size; -EXPORT_SYMBOL(__kernel_virt_size); unsigned long __vmalloc_start; EXPORT_SYMBOL(__vmalloc_start); unsigned long __vmalloc_end; EXPORT_SYMBOL(__vmalloc_end); unsigned long __kernel_io_start; EXPORT_SYMBOL(__kernel_io_start); +unsigned long __kernel_io_end; struct page *vmemmap; EXPORT_SYMBOL(vmemmap); unsigned long __pte_frag_nr; @@ -121,6 +120,11 @@ void __iomem *__ioremap_at(phys_addr_t pa, void *ea, unsigned long size, pgprot_ if (pgprot_val(prot) & H_PAGE_4K_PFN) return NULL; + if ((ea + size) >= (void *)IOREMAP_END) { + pr_warn("Outside the supported range\n"); + return NULL; + } + WARN_ON(pa & ~PAGE_MASK); WARN_ON(((unsigned long)ea) & ~PAGE_MASK); WARN_ON(size & ~PAGE_MASK); @@ -328,6 +332,9 @@ void mark_rodata_ro(void) radix__mark_rodata_ro(); else hash__mark_rodata_ro(); + + // mark_initmem_nx() should have already run by now + ptdump_check_wx(); } void mark_initmem_nx(void) diff --git a/arch/powerpc/mm/ptdump/hashpagetable.c b/arch/powerpc/mm/ptdump/hashpagetable.c index b430e4e08af6..b9bda0105841 100644 --- a/arch/powerpc/mm/ptdump/hashpagetable.c +++ b/arch/powerpc/mm/ptdump/hashpagetable.c @@ -500,7 +500,7 @@ static void populate_markers(void) address_markers[7].start_address = IOREMAP_BASE; address_markers[8].start_address = IOREMAP_END; #ifdef CONFIG_PPC_BOOK3S_64 - address_markers[9].start_address = H_VMEMMAP_BASE; + address_markers[9].start_address = H_VMEMMAP_START; #else address_markers[9].start_address = VMEMMAP_BASE; #endif diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c index 37138428ab55..646876d9da64 100644 --- a/arch/powerpc/mm/ptdump/ptdump.c +++ b/arch/powerpc/mm/ptdump/ptdump.c @@ -31,7 +31,7 @@ #include "ptdump.h" #ifdef CONFIG_PPC32 -#define KERN_VIRT_START 0 +#define KERN_VIRT_START PAGE_OFFSET #endif /* @@ -68,6 +68,8 @@ struct pg_state { unsigned long last_pa; unsigned int level; u64 current_flags; + bool check_wx; + unsigned long wx_pages; }; struct addr_marker { @@ -101,9 +103,25 @@ static struct addr_marker address_markers[] = { { 0, "Fixmap start" }, { 0, "Fixmap end" }, #endif +#ifdef CONFIG_KASAN + { 0, "kasan shadow mem start" }, + { 0, "kasan shadow mem end" }, +#endif { -1, NULL }, }; +#define pt_dump_seq_printf(m, fmt, args...) \ +({ \ + if (m) \ + seq_printf(m, fmt, ##args); \ +}) + +#define pt_dump_seq_putc(m, c) \ +({ \ + if (m) \ + seq_putc(m, c); \ +}) + static void dump_flag_info(struct pg_state *st, const struct flag_info *flag, u64 pte, int num) { @@ -121,19 +139,19 @@ static void dump_flag_info(struct pg_state *st, const struct flag_info val = pte & flag->val; if (flag->shift) val = val >> flag->shift; - seq_printf(st->seq, " %s:%llx", flag->set, val); + pt_dump_seq_printf(st->seq, " %s:%llx", flag->set, val); } else { if ((pte & flag->mask) == flag->val) s = flag->set; else s = flag->clear; if (s) - seq_printf(st->seq, " %s", s); + pt_dump_seq_printf(st->seq, " %s", s); } st->current_flags &= ~flag->mask; } if (st->current_flags != 0) - seq_printf(st->seq, " unknown flags:%llx", st->current_flags); + pt_dump_seq_printf(st->seq, " unknown flags:%llx", st->current_flags); } static void dump_addr(struct pg_state *st, unsigned long addr) @@ -148,12 +166,12 @@ static void dump_addr(struct pg_state *st, unsigned long addr) #define REG "0x%08lx" #endif - seq_printf(st->seq, REG "-" REG " ", st->start_address, addr - 1); + pt_dump_seq_printf(st->seq, REG "-" REG " ", st->start_address, addr - 1); if (st->start_pa == st->last_pa && st->start_address + PAGE_SIZE != addr) { - seq_printf(st->seq, "[" REG "]", st->start_pa); + pt_dump_seq_printf(st->seq, "[" REG "]", st->start_pa); delta = PAGE_SIZE >> 10; } else { - seq_printf(st->seq, " " REG " ", st->start_pa); + pt_dump_seq_printf(st->seq, " " REG " ", st->start_pa); delta = (addr - st->start_address) >> 10; } /* Work out what appropriate unit to use */ @@ -161,10 +179,24 @@ static void dump_addr(struct pg_state *st, unsigned long addr) delta >>= 10; unit++; } - seq_printf(st->seq, "%9lu%c", delta, *unit); + pt_dump_seq_printf(st->seq, "%9lu%c", delta, *unit); } +static void note_prot_wx(struct pg_state *st, unsigned long addr) +{ + if (!st->check_wx) + return; + + if (!((st->current_flags & pgprot_val(PAGE_KERNEL_X)) == pgprot_val(PAGE_KERNEL_X))) + return; + + WARN_ONCE(1, "powerpc/mm: Found insecure W+X mapping at address %p/%pS\n", + (void *)st->start_address, (void *)st->start_address); + + st->wx_pages += (addr - st->start_address) / PAGE_SIZE; +} + static void note_page(struct pg_state *st, unsigned long addr, unsigned int level, u64 val) { @@ -178,7 +210,7 @@ static void note_page(struct pg_state *st, unsigned long addr, st->start_address = addr; st->start_pa = pa; st->last_pa = pa; - seq_printf(st->seq, "---[ %s ]---\n", st->marker->name); + pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name); /* * Dump the section of virtual memory when: * - the PTE flags from one entry to the next differs. @@ -194,6 +226,7 @@ static void note_page(struct pg_state *st, unsigned long addr, /* Check the PTE flags */ if (st->current_flags) { + note_prot_wx(st, addr); dump_addr(st, addr); /* Dump all the flags */ @@ -202,7 +235,7 @@ static void note_page(struct pg_state *st, unsigned long addr, st->current_flags, pg_level[st->level].num); - seq_putc(st->seq, '\n'); + pt_dump_seq_putc(st->seq, '\n'); } /* @@ -211,7 +244,7 @@ static void note_page(struct pg_state *st, unsigned long addr, */ while (addr >= st->marker[1].start_address) { st->marker++; - seq_printf(st->seq, "---[ %s ]---\n", st->marker->name); + pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name); } st->start_address = addr; st->start_pa = pa; @@ -303,8 +336,9 @@ static void populate_markers(void) address_markers[i++].start_address = PHB_IO_END; address_markers[i++].start_address = IOREMAP_BASE; address_markers[i++].start_address = IOREMAP_END; + /* What is the ifdef about? */ #ifdef CONFIG_PPC_BOOK3S_64 - address_markers[i++].start_address = H_VMEMMAP_BASE; + address_markers[i++].start_address = H_VMEMMAP_START; #else address_markers[i++].start_address = VMEMMAP_BASE; #endif @@ -322,6 +356,10 @@ static void populate_markers(void) #endif address_markers[i++].start_address = FIXADDR_START; address_markers[i++].start_address = FIXADDR_TOP; +#ifdef CONFIG_KASAN + address_markers[i++].start_address = KASAN_SHADOW_START; + address_markers[i++].start_address = KASAN_SHADOW_END; +#endif #endif /* CONFIG_PPC64 */ } @@ -366,6 +404,30 @@ static void build_pgtable_complete_mask(void) pg_level[i].mask |= pg_level[i].flag[j].mask; } +#ifdef CONFIG_PPC_DEBUG_WX +void ptdump_check_wx(void) +{ + struct pg_state st = { + .seq = NULL, + .marker = address_markers, + .check_wx = true, + }; + + if (radix_enabled()) + st.start_address = PAGE_OFFSET; + else + st.start_address = KERN_VIRT_START; + + walk_pagetables(&st); + + if (st.wx_pages) + pr_warn("Checked W+X mappings: FAILED, %lu W+X pages found\n", + st.wx_pages); + else + pr_info("Checked W+X mappings: passed, no W+X pages found\n"); +} +#endif + static int ptdump_init(void) { struct dentry *debugfs_file; diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c index aec91dbcdc0b..97fbf7b54422 100644 --- a/arch/powerpc/mm/slice.c +++ b/arch/powerpc/mm/slice.c @@ -101,7 +101,7 @@ static int slice_area_is_free(struct mm_struct *mm, unsigned long addr, { struct vm_area_struct *vma; - if ((mm->context.slb_addr_limit - len) < addr) + if ((mm_ctx_slb_addr_limit(&mm->context) - len) < addr) return 0; vma = find_vma(mm, addr); return (!vma || (addr + len) <= vm_start_gap(vma)); @@ -118,13 +118,11 @@ static int slice_high_has_vma(struct mm_struct *mm, unsigned long slice) unsigned long start = slice << SLICE_HIGH_SHIFT; unsigned long end = start + (1ul << SLICE_HIGH_SHIFT); -#ifdef CONFIG_PPC64 /* Hack, so that each addresses is controlled by exactly one * of the high or low area bitmaps, the first high area starts * at 4GB, not 0 */ if (start == 0) - start = SLICE_LOW_TOP; -#endif + start = (unsigned long)SLICE_LOW_TOP; return !slice_area_is_free(mm, start, end - start); } @@ -150,40 +148,6 @@ static void slice_mask_for_free(struct mm_struct *mm, struct slice_mask *ret, __set_bit(i, ret->high_slices); } -#ifdef CONFIG_PPC_BOOK3S_64 -static struct slice_mask *slice_mask_for_size(struct mm_struct *mm, int psize) -{ -#ifdef CONFIG_PPC_64K_PAGES - if (psize == MMU_PAGE_64K) - return &mm->context.mask_64k; -#endif - if (psize == MMU_PAGE_4K) - return &mm->context.mask_4k; -#ifdef CONFIG_HUGETLB_PAGE - if (psize == MMU_PAGE_16M) - return &mm->context.mask_16m; - if (psize == MMU_PAGE_16G) - return &mm->context.mask_16g; -#endif - BUG(); -} -#elif defined(CONFIG_PPC_8xx) -static struct slice_mask *slice_mask_for_size(struct mm_struct *mm, int psize) -{ - if (psize == mmu_virtual_psize) - return &mm->context.mask_base_psize; -#ifdef CONFIG_HUGETLB_PAGE - if (psize == MMU_PAGE_512K) - return &mm->context.mask_512k; - if (psize == MMU_PAGE_8M) - return &mm->context.mask_8m; -#endif - BUG(); -} -#else -#error "Must define the slice masks for page sizes supported by the platform" -#endif - static bool slice_check_range_fits(struct mm_struct *mm, const struct slice_mask *available, unsigned long start, unsigned long len) @@ -246,14 +210,14 @@ static void slice_convert(struct mm_struct *mm, slice_dbg("slice_convert(mm=%p, psize=%d)\n", mm, psize); slice_print_mask(" mask", mask); - psize_mask = slice_mask_for_size(mm, psize); + psize_mask = slice_mask_for_size(&mm->context, psize); /* We need to use a spinlock here to protect against * concurrent 64k -> 4k demotion ... */ spin_lock_irqsave(&slice_convert_lock, flags); - lpsizes = mm->context.low_slices_psize; + lpsizes = mm_ctx_low_slices(&mm->context); for (i = 0; i < SLICE_NUM_LOW; i++) { if (!(mask->low_slices & (1u << i))) continue; @@ -263,7 +227,7 @@ static void slice_convert(struct mm_struct *mm, /* Update the slice_mask */ old_psize = (lpsizes[index] >> (mask_index * 4)) & 0xf; - old_mask = slice_mask_for_size(mm, old_psize); + old_mask = slice_mask_for_size(&mm->context, old_psize); old_mask->low_slices &= ~(1u << i); psize_mask->low_slices |= 1u << i; @@ -272,8 +236,8 @@ static void slice_convert(struct mm_struct *mm, (((unsigned long)psize) << (mask_index * 4)); } - hpsizes = mm->context.high_slices_psize; - for (i = 0; i < GET_HIGH_SLICE_INDEX(mm->context.slb_addr_limit); i++) { + hpsizes = mm_ctx_high_slices(&mm->context); + for (i = 0; i < GET_HIGH_SLICE_INDEX(mm_ctx_slb_addr_limit(&mm->context)); i++) { if (!test_bit(i, mask->high_slices)) continue; @@ -282,7 +246,7 @@ static void slice_convert(struct mm_struct *mm, /* Update the slice_mask */ old_psize = (hpsizes[index] >> (mask_index * 4)) & 0xf; - old_mask = slice_mask_for_size(mm, old_psize); + old_mask = slice_mask_for_size(&mm->context, old_psize); __clear_bit(i, old_mask->high_slices); __set_bit(i, psize_mask->high_slices); @@ -292,8 +256,8 @@ static void slice_convert(struct mm_struct *mm, } slice_dbg(" lsps=%lx, hsps=%lx\n", - (unsigned long)mm->context.low_slices_psize, - (unsigned long)mm->context.high_slices_psize); + (unsigned long)mm_ctx_low_slices(&mm->context), + (unsigned long)mm_ctx_high_slices(&mm->context)); spin_unlock_irqrestore(&slice_convert_lock, flags); @@ -393,7 +357,7 @@ static unsigned long slice_find_area_topdown(struct mm_struct *mm, * DEFAULT_MAP_WINDOW we should apply this. */ if (high_limit > DEFAULT_MAP_WINDOW) - addr += mm->context.slb_addr_limit - DEFAULT_MAP_WINDOW; + addr += mm_ctx_slb_addr_limit(&mm->context) - DEFAULT_MAP_WINDOW; while (addr > min_addr) { info.high_limit = addr; @@ -505,20 +469,20 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, return -ENOMEM; } - if (high_limit > mm->context.slb_addr_limit) { + if (high_limit > mm_ctx_slb_addr_limit(&mm->context)) { /* * Increasing the slb_addr_limit does not require * slice mask cache to be recalculated because it should * be already initialised beyond the old address limit. */ - mm->context.slb_addr_limit = high_limit; + mm_ctx_set_slb_addr_limit(&mm->context, high_limit); on_each_cpu(slice_flush_segments, mm, 1); } /* Sanity checks */ BUG_ON(mm->task_size == 0); - BUG_ON(mm->context.slb_addr_limit == 0); + BUG_ON(mm_ctx_slb_addr_limit(&mm->context) == 0); VM_BUG_ON(radix_enabled()); slice_dbg("slice_get_unmapped_area(mm=%p, psize=%d...\n", mm, psize); @@ -538,7 +502,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, /* First make up a "good" mask of slices that have the right size * already */ - maskp = slice_mask_for_size(mm, psize); + maskp = slice_mask_for_size(&mm->context, psize); /* * Here "good" means slices that are already the right page size, @@ -565,7 +529,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, * a pointer to good mask for the next code to use. */ if (IS_ENABLED(CONFIG_PPC_64K_PAGES) && psize == MMU_PAGE_64K) { - compat_maskp = slice_mask_for_size(mm, MMU_PAGE_4K); + compat_maskp = slice_mask_for_size(&mm->context, MMU_PAGE_4K); if (fixed) slice_or_mask(&good_mask, maskp, compat_maskp); else @@ -642,14 +606,13 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, newaddr = slice_find_area(mm, len, &potential_mask, psize, topdown, high_limit); -#ifdef CONFIG_PPC_64K_PAGES - if (newaddr == -ENOMEM && psize == MMU_PAGE_64K) { + if (IS_ENABLED(CONFIG_PPC_64K_PAGES) && newaddr == -ENOMEM && + psize == MMU_PAGE_64K) { /* retry the search with 4k-page slices included */ slice_or_mask(&potential_mask, &potential_mask, compat_maskp); newaddr = slice_find_area(mm, len, &potential_mask, psize, topdown, high_limit); } -#endif if (newaddr == -ENOMEM) return -ENOMEM; @@ -696,7 +659,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long flags) { return slice_get_unmapped_area(addr, len, flags, - current->mm->context.user_psize, 0); + mm_ctx_user_psize(¤t->mm->context), 0); } unsigned long arch_get_unmapped_area_topdown(struct file *filp, @@ -706,7 +669,7 @@ unsigned long arch_get_unmapped_area_topdown(struct file *filp, const unsigned long flags) { return slice_get_unmapped_area(addr0, len, flags, - current->mm->context.user_psize, 1); + mm_ctx_user_psize(¤t->mm->context), 1); } unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr) @@ -717,10 +680,10 @@ unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr) VM_BUG_ON(radix_enabled()); if (slice_addr_is_low(addr)) { - psizes = mm->context.low_slices_psize; + psizes = mm_ctx_low_slices(&mm->context); index = GET_LOW_SLICE_INDEX(addr); } else { - psizes = mm->context.high_slices_psize; + psizes = mm_ctx_high_slices(&mm->context); index = GET_HIGH_SLICE_INDEX(addr); } mask_index = index & 0x1; @@ -741,27 +704,22 @@ void slice_init_new_context_exec(struct mm_struct *mm) * case of fork it is just inherited from the mm being * duplicated. */ -#ifdef CONFIG_PPC64 - mm->context.slb_addr_limit = DEFAULT_MAP_WINDOW_USER64; -#else - mm->context.slb_addr_limit = DEFAULT_MAP_WINDOW; -#endif - - mm->context.user_psize = psize; + mm_ctx_set_slb_addr_limit(&mm->context, SLB_ADDR_LIMIT_DEFAULT); + mm_ctx_set_user_psize(&mm->context, psize); /* * Set all slice psizes to the default. */ - lpsizes = mm->context.low_slices_psize; + lpsizes = mm_ctx_low_slices(&mm->context); memset(lpsizes, (psize << 4) | psize, SLICE_NUM_LOW >> 1); - hpsizes = mm->context.high_slices_psize; + hpsizes = mm_ctx_high_slices(&mm->context); memset(hpsizes, (psize << 4) | psize, SLICE_NUM_HIGH >> 1); /* * Slice mask cache starts zeroed, fill the default size cache. */ - mask = slice_mask_for_size(mm, psize); + mask = slice_mask_for_size(&mm->context, psize); mask->low_slices = ~0UL; if (SLICE_NUM_HIGH) bitmap_fill(mask->high_slices, SLICE_NUM_HIGH); @@ -777,7 +735,7 @@ void slice_setup_new_exec(void) if (!is_32bit_task()) return; - mm->context.slb_addr_limit = DEFAULT_MAP_WINDOW; + mm_ctx_set_slb_addr_limit(&mm->context, DEFAULT_MAP_WINDOW); } #endif @@ -816,22 +774,21 @@ int slice_is_hugepage_only_range(struct mm_struct *mm, unsigned long addr, unsigned long len) { const struct slice_mask *maskp; - unsigned int psize = mm->context.user_psize; + unsigned int psize = mm_ctx_user_psize(&mm->context); VM_BUG_ON(radix_enabled()); - maskp = slice_mask_for_size(mm, psize); -#ifdef CONFIG_PPC_64K_PAGES + maskp = slice_mask_for_size(&mm->context, psize); + /* We need to account for 4k slices too */ - if (psize == MMU_PAGE_64K) { + if (IS_ENABLED(CONFIG_PPC_64K_PAGES) && psize == MMU_PAGE_64K) { const struct slice_mask *compat_maskp; struct slice_mask available; - compat_maskp = slice_mask_for_size(mm, MMU_PAGE_4K); + compat_maskp = slice_mask_for_size(&mm->context, MMU_PAGE_4K); slice_or_mask(&available, maskp, compat_maskp); return !slice_check_range_fits(mm, &available, addr, len); } -#endif return !slice_check_range_fits(mm, maskp, addr, len); } |