diff options
Diffstat (limited to 'arch/powerpc/mm')
26 files changed, 351 insertions, 350 deletions
diff --git a/arch/powerpc/mm/44x_mmu.c b/arch/powerpc/mm/44x_mmu.c index 82b1ff759e26..12d92518e898 100644 --- a/arch/powerpc/mm/44x_mmu.c +++ b/arch/powerpc/mm/44x_mmu.c @@ -229,7 +229,7 @@ void setup_initial_memory_limit(phys_addr_t first_memblock_base, } #ifdef CONFIG_SMP -void mmu_init_secondary(int cpu) +void __init mmu_init_secondary(int cpu) { unsigned long addr; unsigned long memstart = memstart_addr & ~(PPC_PIN_SIZE - 1); diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index f06f3577d8d1..cdf6a9960046 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -19,7 +19,7 @@ obj-$(CONFIG_PPC_BOOK3S_64) += pgtable-hash64.o hash_utils_64.o slb_low.o slb.o obj-$(CONFIG_PPC_RADIX_MMU) += pgtable-radix.o tlb-radix.o obj-$(CONFIG_PPC_STD_MMU_32) += ppc_mmu_32.o hash_low_32.o mmu_context_hash32.o obj-$(CONFIG_PPC_STD_MMU) += tlb_hash$(BITS).o -ifeq ($(CONFIG_PPC_BOOK3S_64),y) +ifdef CONFIG_PPC_BOOK3S_64 obj-$(CONFIG_PPC_4K_PAGES) += hash64_4k.o obj-$(CONFIG_PPC_64K_PAGES) += hash64_64k.o endif @@ -31,7 +31,7 @@ obj-$(CONFIG_NEED_MULTIPLE_NODES) += numa.o obj-$(CONFIG_PPC_SPLPAR) += vphn.o obj-$(CONFIG_PPC_MM_SLICES) += slice.o obj-y += hugetlbpage.o -ifeq ($(CONFIG_HUGETLB_PAGE),y) +ifdef CONFIG_HUGETLB_PAGE obj-$(CONFIG_PPC_BOOK3S_64) += hugetlbpage-hash64.o obj-$(CONFIG_PPC_RADIX_MMU) += hugetlbpage-radix.o obj-$(CONFIG_PPC_BOOK3E_MMU) += hugetlbpage-book3e.o diff --git a/arch/powerpc/mm/copro_fault.c b/arch/powerpc/mm/copro_fault.c index 7d0945bd3a61..c8da352e8686 100644 --- a/arch/powerpc/mm/copro_fault.c +++ b/arch/powerpc/mm/copro_fault.c @@ -34,7 +34,7 @@ * to handle fortunately. */ int copro_handle_mm_fault(struct mm_struct *mm, unsigned long ea, - unsigned long dsisr, unsigned *flt) + unsigned long dsisr, vm_fault_t *flt) { struct vm_area_struct *vma; unsigned long is_write; diff --git a/arch/powerpc/mm/dump_hashpagetable.c b/arch/powerpc/mm/dump_hashpagetable.c index 14cfb11b09d0..869294695048 100644 --- a/arch/powerpc/mm/dump_hashpagetable.c +++ b/arch/powerpc/mm/dump_hashpagetable.c @@ -19,7 +19,6 @@ #include <linux/mm.h> #include <linux/sched.h> #include <linux/seq_file.h> -#include <asm/fixmap.h> #include <asm/pgtable.h> #include <linux/const.h> #include <asm/page.h> @@ -260,7 +259,7 @@ static int pseries_find(unsigned long ea, int psize, bool primary, u64 *v, u64 * /* to check in the secondary hash table, we invert the hash */ if (!primary) hash = ~hash; - hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; + hpte_group = (hash & htab_hash_mask) * HPTES_PER_GROUP; /* see if we can find an entry in the hpte with this hash */ for (i = 0; i < HPTES_PER_GROUP; i += 4, hpte_group += 4) { lpar_rc = plpar_pte_read_4(0, hpte_group, (void *)ptes); diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index b1ca7a0974e3..d51cf5f4e45e 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -42,7 +42,6 @@ #include <asm/pgtable.h> #include <asm/mmu.h> #include <asm/mmu_context.h> -#include <asm/tlbflush.h> #include <asm/siginfo.h> #include <asm/debug.h> @@ -156,7 +155,7 @@ static noinline int bad_access(struct pt_regs *regs, unsigned long address) } static int do_sigbus(struct pt_regs *regs, unsigned long address, - unsigned int fault) + vm_fault_t fault) { siginfo_t info; unsigned int lsb = 0; @@ -187,7 +186,8 @@ static int do_sigbus(struct pt_regs *regs, unsigned long address, return 0; } -static int mm_fault_error(struct pt_regs *regs, unsigned long addr, int fault) +static int mm_fault_error(struct pt_regs *regs, unsigned long addr, + vm_fault_t fault) { /* * Kernel page fault interrupted by SIGKILL. We have no reason to @@ -415,7 +415,7 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address, int is_exec = TRAP(regs) == 0x400; int is_user = user_mode(regs); int is_write = page_fault_is_write(error_code); - int fault, major = 0; + vm_fault_t fault, major = 0; bool must_retry = false; if (notify_page_fault(regs)) diff --git a/arch/powerpc/mm/hash64_4k.c b/arch/powerpc/mm/hash64_4k.c index d573d7d07f25..6fa6765a10eb 100644 --- a/arch/powerpc/mm/hash64_4k.c +++ b/arch/powerpc/mm/hash64_4k.c @@ -80,7 +80,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, hash = hpt_hash(vpn, shift, ssize); repeat: - hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; + hpte_group = (hash & htab_hash_mask) * HPTES_PER_GROUP; /* Insert into the hash table, primary slot */ slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags, 0, @@ -89,7 +89,7 @@ repeat: * Primary is full, try the secondary */ if (unlikely(slot == -1)) { - hpte_group = ((~hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; + hpte_group = (~hash & htab_hash_mask) * HPTES_PER_GROUP; slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags, HPTE_V_SECONDARY, @@ -97,8 +97,8 @@ repeat: MMU_PAGE_4K, ssize); if (slot == -1) { if (mftb() & 0x1) - hpte_group = ((hash & htab_hash_mask) * - HPTES_PER_GROUP) & ~0x7UL; + hpte_group = (hash & htab_hash_mask) * + HPTES_PER_GROUP; mmu_hash_ops.hpte_remove(hpte_group); /* * FIXME!! Should be try the group from which we removed ? diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/hash64_64k.c index e601d95c3b20..3afa253d7f52 100644 --- a/arch/powerpc/mm/hash64_64k.c +++ b/arch/powerpc/mm/hash64_64k.c @@ -154,7 +154,7 @@ htab_insert_hpte: } hash = hpt_hash(vpn, shift, ssize); repeat: - hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; + hpte_group = (hash & htab_hash_mask) * HPTES_PER_GROUP; /* Insert into the hash table, primary slot */ slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags, 0, @@ -165,7 +165,7 @@ repeat: if (unlikely(slot == -1)) { bool soft_invalid; - hpte_group = ((~hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; + hpte_group = (~hash & htab_hash_mask) * HPTES_PER_GROUP; slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags, HPTE_V_SECONDARY, MMU_PAGE_4K, MMU_PAGE_4K, @@ -193,8 +193,7 @@ repeat: * that we do not get the same soft-invalid slot. */ if (soft_invalid || (mftb() & 0x1)) - hpte_group = ((hash & htab_hash_mask) * - HPTES_PER_GROUP) & ~0x7UL; + hpte_group = (hash & htab_hash_mask) * HPTES_PER_GROUP; mmu_hash_ops.hpte_remove(hpte_group); /* @@ -288,7 +287,7 @@ int __hash_page_64K(unsigned long ea, unsigned long access, hash = hpt_hash(vpn, shift, ssize); repeat: - hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; + hpte_group = (hash & htab_hash_mask) * HPTES_PER_GROUP; /* Insert into the hash table, primary slot */ slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags, 0, @@ -298,7 +297,7 @@ repeat: * Primary is full, try the secondary */ if (unlikely(slot == -1)) { - hpte_group = ((~hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; + hpte_group = (~hash & htab_hash_mask) * HPTES_PER_GROUP; slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags, HPTE_V_SECONDARY, @@ -306,8 +305,8 @@ repeat: MMU_PAGE_64K, ssize); if (slot == -1) { if (mftb() & 0x1) - hpte_group = ((hash & htab_hash_mask) * - HPTES_PER_GROUP) & ~0x7UL; + hpte_group = (hash & htab_hash_mask) * + HPTES_PER_GROUP; mmu_hash_ops.hpte_remove(hpte_group); /* * FIXME!! Should be try the group from which we removed ? diff --git a/arch/powerpc/mm/hash_low_32.S b/arch/powerpc/mm/hash_low_32.S index ffbd7c0bda96..26acf6c8c20c 100644 --- a/arch/powerpc/mm/hash_low_32.S +++ b/arch/powerpc/mm/hash_low_32.S @@ -27,6 +27,7 @@ #include <asm/thread_info.h> #include <asm/asm-offsets.h> #include <asm/export.h> +#include <asm/feature-fixups.h> #ifdef CONFIG_SMP .section .bss diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index 1d049c78c82a..729f02df8290 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -23,13 +23,13 @@ #include <asm/mmu.h> #include <asm/mmu_context.h> #include <asm/pgtable.h> -#include <asm/tlbflush.h> #include <asm/trace.h> #include <asm/tlb.h> #include <asm/cputable.h> #include <asm/udbg.h> #include <asm/kexec.h> #include <asm/ppc-opcode.h> +#include <asm/feature-fixups.h> #include <misc/cxl-base.h> @@ -423,9 +423,7 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, DBG_LOW(" update(vpn=%016lx, avpnv=%016lx, group=%lx, newpp=%lx)", vpn, want_v & HPTE_V_AVPN, slot, newpp); - hpte_v = be64_to_cpu(hptep->v); - if (cpu_has_feature(CPU_FTR_ARCH_300)) - hpte_v = hpte_new_to_old_v(hpte_v, be64_to_cpu(hptep->r)); + hpte_v = hpte_get_old_v(hptep); /* * We need to invalidate the TLB always because hpte_remove doesn't do * a tlb invalidate. If a hash bucket gets full, we "evict" a more/less @@ -439,9 +437,7 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, } else { native_lock_hpte(hptep); /* recheck with locks held */ - hpte_v = be64_to_cpu(hptep->v); - if (cpu_has_feature(CPU_FTR_ARCH_300)) - hpte_v = hpte_new_to_old_v(hpte_v, be64_to_cpu(hptep->r)); + hpte_v = hpte_get_old_v(hptep); if (unlikely(!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID))) { ret = -1; @@ -481,11 +477,9 @@ static long native_hpte_find(unsigned long vpn, int psize, int ssize) /* Bolted mappings are only ever in the primary group */ slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; for (i = 0; i < HPTES_PER_GROUP; i++) { - hptep = htab_address + slot; - hpte_v = be64_to_cpu(hptep->v); - if (cpu_has_feature(CPU_FTR_ARCH_300)) - hpte_v = hpte_new_to_old_v(hpte_v, be64_to_cpu(hptep->r)); + hptep = htab_address + slot; + hpte_v = hpte_get_old_v(hptep); if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID)) /* HPTE matches */ return slot; @@ -574,11 +568,19 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long vpn, DBG_LOW(" invalidate(vpn=%016lx, hash: %lx)\n", vpn, slot); want_v = hpte_encode_avpn(vpn, bpsize, ssize); - native_lock_hpte(hptep); - hpte_v = be64_to_cpu(hptep->v); - if (cpu_has_feature(CPU_FTR_ARCH_300)) - hpte_v = hpte_new_to_old_v(hpte_v, be64_to_cpu(hptep->r)); + hpte_v = hpte_get_old_v(hptep); + if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID)) { + native_lock_hpte(hptep); + /* recheck with locks held */ + hpte_v = hpte_get_old_v(hptep); + + if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID)) + /* Invalidate the hpte. NOTE: this also unlocks it */ + hptep->v = 0; + else + native_unlock_hpte(hptep); + } /* * We need to invalidate the TLB always because hpte_remove doesn't do * a tlb invalidate. If a hash bucket gets full, we "evict" a more/less @@ -586,13 +588,6 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long vpn, * (hpte_remove) because we assume the old translation is still * technically "valid". */ - if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) - native_unlock_hpte(hptep); - else - /* Invalidate the hpte. NOTE: this also unlocks it */ - hptep->v = 0; - - /* Invalidate the TLB */ tlbie(vpn, bpsize, apsize, ssize, local); local_irq_restore(flags); @@ -634,17 +629,23 @@ static void native_hugepage_invalidate(unsigned long vsid, hptep = htab_address + slot; want_v = hpte_encode_avpn(vpn, psize, ssize); - native_lock_hpte(hptep); - hpte_v = be64_to_cpu(hptep->v); - if (cpu_has_feature(CPU_FTR_ARCH_300)) - hpte_v = hpte_new_to_old_v(hpte_v, be64_to_cpu(hptep->r)); + hpte_v = hpte_get_old_v(hptep); /* Even if we miss, we need to invalidate the TLB */ - if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) - native_unlock_hpte(hptep); - else - /* Invalidate the hpte. NOTE: this also unlocks it */ - hptep->v = 0; + if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID)) { + /* recheck with locks held */ + native_lock_hpte(hptep); + hpte_v = hpte_get_old_v(hptep); + + if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID)) { + /* + * Invalidate the hpte. NOTE: this also unlocks it + */ + + hptep->v = 0; + } else + native_unlock_hpte(hptep); + } /* * We need to do tlb invalidate for all the address, tlbie * instruction compares entry_VA in tlb with the VA specified @@ -812,16 +813,19 @@ static void native_flush_hash_range(unsigned long number, int local) slot += hidx & _PTEIDX_GROUP_IX; hptep = htab_address + slot; want_v = hpte_encode_avpn(vpn, psize, ssize); + hpte_v = hpte_get_old_v(hptep); + + if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) + continue; + /* lock and try again */ native_lock_hpte(hptep); - hpte_v = be64_to_cpu(hptep->v); - if (cpu_has_feature(CPU_FTR_ARCH_300)) - hpte_v = hpte_new_to_old_v(hpte_v, - be64_to_cpu(hptep->r)); - if (!HPTE_V_COMPARE(hpte_v, want_v) || - !(hpte_v & HPTE_V_VALID)) + hpte_v = hpte_get_old_v(hptep); + + if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) native_unlock_hpte(hptep); else hptep->v = 0; + } pte_iterate_hashed_end(); } diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 8318716e5075..f23a89d8e4ce 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -48,7 +48,6 @@ #include <linux/uaccess.h> #include <asm/machdep.h> #include <asm/prom.h> -#include <asm/tlbflush.h> #include <asm/io.h> #include <asm/eeh.h> #include <asm/tlb.h> @@ -808,31 +807,6 @@ int hash__remove_section_mapping(unsigned long start, unsigned long end) } #endif /* CONFIG_MEMORY_HOTPLUG */ -static void update_hid_for_hash(void) -{ - unsigned long hid0; - unsigned long rb = 3UL << PPC_BITLSHIFT(53); /* IS = 3 */ - - asm volatile("ptesync": : :"memory"); - /* prs = 0, ric = 2, rs = 0, r = 1 is = 3 */ - asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) - : : "r"(rb), "i"(0), "i"(0), "i"(2), "r"(0) : "memory"); - asm volatile("eieio; tlbsync; ptesync; isync; slbia": : :"memory"); - trace_tlbie(0, 0, rb, 0, 2, 0, 0); - - /* - * now switch the HID - */ - hid0 = mfspr(SPRN_HID0); - hid0 &= ~HID0_POWER9_RADIX; - mtspr(SPRN_HID0, hid0); - asm volatile("isync": : :"memory"); - - /* Wait for it to happen */ - while ((mfspr(SPRN_HID0) & HID0_POWER9_RADIX)) - cpu_relax(); -} - static void __init hash_init_partition_table(phys_addr_t hash_table, unsigned long htab_size) { @@ -845,8 +819,6 @@ static void __init hash_init_partition_table(phys_addr_t hash_table, htab_size = __ilog2(htab_size) - 18; mmu_partition_table_set_entry(0, hash_table | htab_size, 0); pr_info("Partition table %p\n", partition_tb); - if (cpu_has_feature(CPU_FTR_POWER9_DD1)) - update_hid_for_hash(); } static void __init htab_initialize(void) @@ -1077,9 +1049,6 @@ void hash__early_init_mmu_secondary(void) /* Initialize hash table for that CPU */ if (!firmware_has_feature(FW_FEATURE_LPAR)) { - if (cpu_has_feature(CPU_FTR_POWER9_DD1)) - update_hid_for_hash(); - if (!cpu_has_feature(CPU_FTR_ARCH_300)) mtspr(SPRN_SDR1, _SDR1); else @@ -1783,8 +1752,7 @@ long hpte_insert_repeating(unsigned long hash, unsigned long vpn, long slot; repeat: - hpte_group = ((hash & htab_hash_mask) * - HPTES_PER_GROUP) & ~0x7UL; + hpte_group = (hash & htab_hash_mask) * HPTES_PER_GROUP; /* Insert into the hash table, primary slot */ slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags, vflags, @@ -1792,15 +1760,14 @@ repeat: /* Primary is full, try the secondary */ if (unlikely(slot == -1)) { - hpte_group = ((~hash & htab_hash_mask) * - HPTES_PER_GROUP) & ~0x7UL; + hpte_group = (~hash & htab_hash_mask) * HPTES_PER_GROUP; slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags, vflags | HPTE_V_SECONDARY, psize, psize, ssize); if (slot == -1) { if (mftb() & 0x1) - hpte_group = ((hash & htab_hash_mask) * - HPTES_PER_GROUP)&~0x7UL; + hpte_group = (hash & htab_hash_mask) * + HPTES_PER_GROUP; mmu_hash_ops.hpte_remove(hpte_group); goto repeat; diff --git a/arch/powerpc/mm/highmem.c b/arch/powerpc/mm/highmem.c index 668e87d03f9e..82a0e37557a5 100644 --- a/arch/powerpc/mm/highmem.c +++ b/arch/powerpc/mm/highmem.c @@ -56,7 +56,7 @@ EXPORT_SYMBOL(kmap_atomic_prot); void __kunmap_atomic(void *kvaddr) { unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; - int type; + int type __maybe_unused; if (vaddr < __fix_to_virt(FIX_KMAP_END)) { pagefault_enable(); diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c index f20d16f849c5..01f213d2bcb9 100644 --- a/arch/powerpc/mm/hugepage-hash64.c +++ b/arch/powerpc/mm/hugepage-hash64.c @@ -128,7 +128,7 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, new_pmd |= H_PAGE_HASHPTE; repeat: - hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; + hpte_group = (hash & htab_hash_mask) * HPTES_PER_GROUP; /* Insert into the hash table, primary slot */ slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags, 0, @@ -137,16 +137,15 @@ repeat: * Primary is full, try the secondary */ if (unlikely(slot == -1)) { - hpte_group = ((~hash & htab_hash_mask) * - HPTES_PER_GROUP) & ~0x7UL; + hpte_group = (~hash & htab_hash_mask) * HPTES_PER_GROUP; slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags, HPTE_V_SECONDARY, psize, lpsize, ssize); if (slot == -1) { if (mftb() & 0x1) - hpte_group = ((hash & htab_hash_mask) * - HPTES_PER_GROUP) & ~0x7UL; + hpte_group = (hash & htab_hash_mask) * + HPTES_PER_GROUP; mmu_hash_ops.hpte_remove(hpte_group); goto repeat; diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 7c5f479c5c00..e87f9ef9115b 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -118,15 +118,6 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, } /* - * These macros define how to determine which level of the page table holds - * the hpdp. - */ -#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx) -#define HUGEPD_PGD_SHIFT PGDIR_SHIFT -#define HUGEPD_PUD_SHIFT PUD_SHIFT -#endif - -/* * At this point we do the placement change only for BOOK3S 64. This would * possibly work on other subarchs. */ @@ -174,13 +165,13 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz } } #else - if (pshift >= HUGEPD_PGD_SHIFT) { + if (pshift >= PGDIR_SHIFT) { ptl = &mm->page_table_lock; hpdp = (hugepd_t *)pg; } else { pdshift = PUD_SHIFT; pu = pud_alloc(mm, pg, addr); - if (pshift >= HUGEPD_PUD_SHIFT) { + if (pshift >= PUD_SHIFT) { ptl = pud_lockptr(mm, pu); hpdp = (hugepd_t *)pu; } else { @@ -337,7 +328,8 @@ static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshif if (shift >= pdshift) hugepd_free(tlb, hugepte); else - pgtable_free_tlb(tlb, hugepte, pdshift - shift); + pgtable_free_tlb(tlb, hugepte, + get_hugepd_cache_index(pdshift - shift)); } static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, @@ -620,15 +612,12 @@ static int __init add_huge_page_size(unsigned long long size) * firmware we only add hugetlb support for page sizes that can be * supported by linux page table layout. * For now we have - * Radix: 2M + * Radix: 2M and 1G * Hash: 16M and 16G */ if (radix_enabled()) { - if (mmu_psize != MMU_PAGE_2M) { - if (cpu_has_feature(CPU_FTR_POWER9_DD1) || - (mmu_psize != MMU_PAGE_1G)) - return -EINVAL; - } + if (mmu_psize != MMU_PAGE_2M && mmu_psize != MMU_PAGE_1G) + return -EINVAL; } else { if (mmu_psize != MMU_PAGE_16M && mmu_psize != MMU_PAGE_16G) return -EINVAL; @@ -694,9 +683,9 @@ static int __init hugetlbpage_init(void) else pdshift = PMD_SHIFT; #else - if (shift < HUGEPD_PUD_SHIFT) + if (shift < PUD_SHIFT) pdshift = PMD_SHIFT; - else if (shift < HUGEPD_PGD_SHIFT) + else if (shift < PGDIR_SHIFT) pdshift = PUD_SHIFT; else pdshift = PGDIR_SHIFT; diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c index f3d4b4a0e561..4a892d894a0f 100644 --- a/arch/powerpc/mm/mmu_context_book3s64.c +++ b/arch/powerpc/mm/mmu_context_book3s64.c @@ -200,9 +200,9 @@ static void pte_frag_destroy(void *pte_frag) /* drop all the pending references */ count = ((unsigned long)pte_frag & ~PAGE_MASK) >> PTE_FRAG_SIZE_SHIFT; /* We allow PTE_FRAG_NR fragments from a PTE page */ - if (page_ref_sub_and_test(page, PTE_FRAG_NR - count)) { + if (atomic_sub_and_test(PTE_FRAG_NR - count, &page->pt_frag_refcount)) { pgtable_page_dtor(page); - free_unref_page(page); + __free_page(page); } } @@ -215,13 +215,13 @@ static void pmd_frag_destroy(void *pmd_frag) /* drop all the pending references */ count = ((unsigned long)pmd_frag & ~PAGE_MASK) >> PMD_FRAG_SIZE_SHIFT; /* We allow PTE_FRAG_NR fragments from a PTE page */ - if (page_ref_sub_and_test(page, PMD_FRAG_NR - count)) { + if (atomic_sub_and_test(PMD_FRAG_NR - count, &page->pt_frag_refcount)) { pgtable_pmd_page_dtor(page); - free_unref_page(page); + __free_page(page); } } -static void destroy_pagetable_page(struct mm_struct *mm) +static void destroy_pagetable_cache(struct mm_struct *mm) { void *frag; @@ -244,13 +244,14 @@ void destroy_context(struct mm_struct *mm) WARN_ON(process_tb[mm->context.id].prtb0 != 0); else subpage_prot_free(mm); - destroy_pagetable_page(mm); destroy_contexts(&mm->context); mm->context.id = MMU_NO_CONTEXT; } void arch_exit_mmap(struct mm_struct *mm) { + destroy_pagetable_cache(mm); + if (radix_enabled()) { /* * Radix doesn't have a valid bit in the process table @@ -273,15 +274,7 @@ void arch_exit_mmap(struct mm_struct *mm) #ifdef CONFIG_PPC_RADIX_MMU void radix__switch_mmu_context(struct mm_struct *prev, struct mm_struct *next) { - - if (cpu_has_feature(CPU_FTR_POWER9_DD1)) { - isync(); - mtspr(SPRN_PID, next->context.id); - isync(); - asm volatile(PPC_INVALIDATE_ERAT : : :"memory"); - } else { - mtspr(SPRN_PID, next->context.id); - isync(); - } + mtspr(SPRN_PID, next->context.id); + isync(); } #endif diff --git a/arch/powerpc/mm/mmu_context_hash32.c b/arch/powerpc/mm/mmu_context_hash32.c index aa5a7fd89461..921c1e33e941 100644 --- a/arch/powerpc/mm/mmu_context_hash32.c +++ b/arch/powerpc/mm/mmu_context_hash32.c @@ -27,7 +27,6 @@ #include <linux/export.h> #include <asm/mmu_context.h> -#include <asm/tlbflush.h> /* * On 32-bit PowerPC 6xx/7xx/7xxx CPUs, we use a set of 16 VSIDs diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c index abb43646927a..a4ca57612558 100644 --- a/arch/powerpc/mm/mmu_context_iommu.c +++ b/arch/powerpc/mm/mmu_context_iommu.c @@ -19,6 +19,7 @@ #include <linux/hugetlb.h> #include <linux/swap.h> #include <asm/mmu_context.h> +#include <asm/pte-walk.h> static DEFINE_MUTEX(mem_list_mutex); @@ -27,6 +28,7 @@ struct mm_iommu_table_group_mem_t { struct rcu_head rcu; unsigned long used; atomic64_t mapped; + unsigned int pageshift; u64 ua; /* userspace address */ u64 entries; /* number of entries in hpas[] */ u64 *hpas; /* vmalloc'ed */ @@ -125,6 +127,8 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries, { struct mm_iommu_table_group_mem_t *mem; long i, j, ret = 0, locked_entries = 0; + unsigned int pageshift; + unsigned long flags; struct page *page = NULL; mutex_lock(&mem_list_mutex); @@ -159,6 +163,12 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries, goto unlock_exit; } + /* + * For a starting point for a maximum page size calculation + * we use @ua and @entries natural alignment to allow IOMMU pages + * smaller than huge pages but still bigger than PAGE_SIZE. + */ + mem->pageshift = __ffs(ua | (entries << PAGE_SHIFT)); mem->hpas = vzalloc(array_size(entries, sizeof(mem->hpas[0]))); if (!mem->hpas) { kfree(mem); @@ -199,6 +209,23 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries, } } populate: + pageshift = PAGE_SHIFT; + if (PageCompound(page)) { + pte_t *pte; + struct page *head = compound_head(page); + unsigned int compshift = compound_order(head); + + local_irq_save(flags); /* disables as well */ + pte = find_linux_pte(mm->pgd, ua, NULL, &pageshift); + local_irq_restore(flags); + + /* Double check it is still the same pinned page */ + if (pte && pte_page(*pte) == head && + pageshift == compshift) + pageshift = max_t(unsigned int, pageshift, + PAGE_SHIFT); + } + mem->pageshift = min(mem->pageshift, pageshift); mem->hpas[i] = page_to_pfn(page) << PAGE_SHIFT; } @@ -349,7 +376,7 @@ struct mm_iommu_table_group_mem_t *mm_iommu_find(struct mm_struct *mm, EXPORT_SYMBOL_GPL(mm_iommu_find); long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem, - unsigned long ua, unsigned long *hpa) + unsigned long ua, unsigned int pageshift, unsigned long *hpa) { const long entry = (ua - mem->ua) >> PAGE_SHIFT; u64 *va = &mem->hpas[entry]; @@ -357,6 +384,9 @@ long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem, if (entry >= mem->entries) return -EFAULT; + if (pageshift > mem->pageshift) + return -EFAULT; + *hpa = *va | (ua & ~PAGE_MASK); return 0; @@ -364,7 +394,7 @@ long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem, EXPORT_SYMBOL_GPL(mm_iommu_ua_to_hpa); long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem, - unsigned long ua, unsigned long *hpa) + unsigned long ua, unsigned int pageshift, unsigned long *hpa) { const long entry = (ua - mem->ua) >> PAGE_SHIFT; void *va = &mem->hpas[entry]; @@ -373,6 +403,9 @@ long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem, if (entry >= mem->entries) return -EFAULT; + if (pageshift > mem->pageshift) + return -EFAULT; + pa = (void *) vmalloc_to_phys(va); if (!pa) return -EFAULT; diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index c4c0a09a7775..e5d779eed181 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -19,7 +19,6 @@ * */ #include <linux/mm.h> -#include <asm/tlbflush.h> #include <asm/mmu.h> #ifdef CONFIG_PPC_MMU_NOHASH diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c index c1f4ca45c93a..01d7c0f7c4f0 100644 --- a/arch/powerpc/mm/pgtable-book3s64.c +++ b/arch/powerpc/mm/pgtable-book3s64.c @@ -270,6 +270,8 @@ static pmd_t *__alloc_for_pmdcache(struct mm_struct *mm) return NULL; } + atomic_set(&page->pt_frag_refcount, 1); + ret = page_address(page); /* * if we support only one fragment just return the @@ -285,7 +287,7 @@ static pmd_t *__alloc_for_pmdcache(struct mm_struct *mm) * count. */ if (likely(!mm->context.pmd_frag)) { - set_page_count(page, PMD_FRAG_NR); + atomic_set(&page->pt_frag_refcount, PMD_FRAG_NR); mm->context.pmd_frag = ret + PMD_FRAG_SIZE; } spin_unlock(&mm->page_table_lock); @@ -308,9 +310,10 @@ void pmd_fragment_free(unsigned long *pmd) { struct page *page = virt_to_page(pmd); - if (put_page_testzero(page)) { + BUG_ON(atomic_read(&page->pt_frag_refcount) <= 0); + if (atomic_dec_and_test(&page->pt_frag_refcount)) { pgtable_pmd_page_dtor(page); - free_unref_page(page); + __free_page(page); } } @@ -352,6 +355,7 @@ static pte_t *__alloc_for_ptecache(struct mm_struct *mm, int kernel) return NULL; } + atomic_set(&page->pt_frag_refcount, 1); ret = page_address(page); /* @@ -367,7 +371,7 @@ static pte_t *__alloc_for_ptecache(struct mm_struct *mm, int kernel) * count. */ if (likely(!mm->context.pte_frag)) { - set_page_count(page, PTE_FRAG_NR); + atomic_set(&page->pt_frag_refcount, PTE_FRAG_NR); mm->context.pte_frag = ret + PTE_FRAG_SIZE; } spin_unlock(&mm->page_table_lock); @@ -390,10 +394,11 @@ void pte_fragment_free(unsigned long *table, int kernel) { struct page *page = virt_to_page(table); - if (put_page_testzero(page)) { + BUG_ON(atomic_read(&page->pt_frag_refcount) <= 0); + if (atomic_dec_and_test(&page->pt_frag_refcount)) { if (!kernel) pgtable_page_dtor(page); - free_unref_page(page); + __free_page(page); } } @@ -409,6 +414,18 @@ static inline void pgtable_free(void *table, int index) case PUD_INDEX: kmem_cache_free(PGT_CACHE(PUD_CACHE_INDEX), table); break; +#if defined(CONFIG_PPC_4K_PAGES) && defined(CONFIG_HUGETLB_PAGE) + /* 16M hugepd directory at pud level */ + case HTLB_16M_INDEX: + BUILD_BUG_ON(H_16M_CACHE_INDEX <= 0); + kmem_cache_free(PGT_CACHE(H_16M_CACHE_INDEX), table); + break; + /* 16G hugepd directory at the pgd level */ + case HTLB_16G_INDEX: + BUILD_BUG_ON(H_16G_CACHE_INDEX <= 0); + kmem_cache_free(PGT_CACHE(H_16G_CACHE_INDEX), table); + break; +#endif /* We don't free pgd table via RCU callback */ default: BUG(); @@ -438,3 +455,25 @@ void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int index) return pgtable_free(table, index); } #endif + +#ifdef CONFIG_PROC_FS +atomic_long_t direct_pages_count[MMU_PAGE_COUNT]; + +void arch_report_meminfo(struct seq_file *m) +{ + /* + * Hash maps the memory with one size mmu_linear_psize. + * So don't bother to print these on hash + */ + if (!radix_enabled()) + return; + seq_printf(m, "DirectMap4k: %8lu kB\n", + atomic_long_read(&direct_pages_count[MMU_PAGE_4K]) << 2); + seq_printf(m, "DirectMap64k: %8lu kB\n", + atomic_long_read(&direct_pages_count[MMU_PAGE_64K]) << 6); + seq_printf(m, "DirectMap2M: %8lu kB\n", + atomic_long_read(&direct_pages_count[MMU_PAGE_2M]) << 11); + seq_printf(m, "DirectMap1G: %8lu kB\n", + atomic_long_read(&direct_pages_count[MMU_PAGE_1G]) << 20); +} +#endif /* CONFIG_PROC_FS */ diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index 96f68c5aa1f5..7be99fd9af15 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -226,16 +226,6 @@ void radix__mark_rodata_ro(void) { unsigned long start, end; - /* - * mark_rodata_ro() will mark itself as !writable at some point. - * Due to DD1 workaround in radix__pte_update(), we'll end up with - * an invalid pte and the system will crash quite severly. - */ - if (cpu_has_feature(CPU_FTR_POWER9_DD1)) { - pr_warn("Warning: Unable to mark rodata read only on P9 DD1\n"); - return; - } - start = (unsigned long)_stext; end = (unsigned long)__init_begin; @@ -277,6 +267,7 @@ static int __meminit create_physical_mapping(unsigned long start, #else int split_text_mapping = 0; #endif + int psize; start = _ALIGN_UP(start, PAGE_SIZE); for (addr = start; addr < end; addr += mapping_size) { @@ -290,13 +281,17 @@ static int __meminit create_physical_mapping(unsigned long start, retry: if (IS_ALIGNED(addr, PUD_SIZE) && gap >= PUD_SIZE && mmu_psize_defs[MMU_PAGE_1G].shift && - PUD_SIZE <= max_mapping_size) + PUD_SIZE <= max_mapping_size) { mapping_size = PUD_SIZE; - else if (IS_ALIGNED(addr, PMD_SIZE) && gap >= PMD_SIZE && - mmu_psize_defs[MMU_PAGE_2M].shift) + psize = MMU_PAGE_1G; + } else if (IS_ALIGNED(addr, PMD_SIZE) && gap >= PMD_SIZE && + mmu_psize_defs[MMU_PAGE_2M].shift) { mapping_size = PMD_SIZE; - else + psize = MMU_PAGE_2M; + } else { mapping_size = PAGE_SIZE; + psize = mmu_virtual_psize; + } if (split_text_mapping && (mapping_size == PUD_SIZE) && (addr <= __pa_symbol(__init_begin)) && @@ -307,8 +302,10 @@ retry: if (split_text_mapping && (mapping_size == PMD_SIZE) && (addr <= __pa_symbol(__init_begin)) && - (addr + mapping_size) >= __pa_symbol(_stext)) + (addr + mapping_size) >= __pa_symbol(_stext)) { mapping_size = PAGE_SIZE; + psize = mmu_virtual_psize; + } if (mapping_size != previous_size) { print_mapping(start, addr, previous_size); @@ -326,6 +323,8 @@ retry: rc = __map_kernel_page(vaddr, addr, prot, mapping_size, nid, start, end); if (rc) return rc; + + update_page_count(psize, 1); } print_mapping(start, addr, mapping_size); @@ -533,35 +532,6 @@ found: return; } -static void update_hid_for_radix(void) -{ - unsigned long hid0; - unsigned long rb = 3UL << PPC_BITLSHIFT(53); /* IS = 3 */ - - asm volatile("ptesync": : :"memory"); - /* prs = 0, ric = 2, rs = 0, r = 1 is = 3 */ - asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) - : : "r"(rb), "i"(1), "i"(0), "i"(2), "r"(0) : "memory"); - /* prs = 1, ric = 2, rs = 0, r = 1 is = 3 */ - asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) - : : "r"(rb), "i"(1), "i"(1), "i"(2), "r"(0) : "memory"); - asm volatile("eieio; tlbsync; ptesync; isync; slbia": : :"memory"); - trace_tlbie(0, 0, rb, 0, 2, 0, 1); - trace_tlbie(0, 0, rb, 0, 2, 1, 1); - - /* - * now switch the HID - */ - hid0 = mfspr(SPRN_HID0); - hid0 |= HID0_POWER9_RADIX; - mtspr(SPRN_HID0, hid0); - asm volatile("isync": : :"memory"); - - /* Wait for it to happen */ - while (!(mfspr(SPRN_HID0) & HID0_POWER9_RADIX)) - cpu_relax(); -} - static void radix_init_amor(void) { /* @@ -576,22 +546,12 @@ static void radix_init_amor(void) static void radix_init_iamr(void) { - unsigned long iamr; - - /* - * The IAMR should set to 0 on DD1. - */ - if (cpu_has_feature(CPU_FTR_POWER9_DD1)) - iamr = 0; - else - iamr = (1ul << 62); - /* * Radix always uses key0 of the IAMR to determine if an access is * allowed. We set bit 0 (IBM bit 1) of key0, to prevent instruction * fetch. */ - mtspr(SPRN_IAMR, iamr); + mtspr(SPRN_IAMR, (1ul << 62)); } void __init radix__early_init_mmu(void) @@ -644,8 +604,6 @@ void __init radix__early_init_mmu(void) if (!firmware_has_feature(FW_FEATURE_LPAR)) { radix_init_native(); - if (cpu_has_feature(CPU_FTR_POWER9_DD1)) - update_hid_for_radix(); lpcr = mfspr(SPRN_LPCR); mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR); radix_init_partition_table(); @@ -671,10 +629,6 @@ void radix__early_init_mmu_secondary(void) * update partition table control register and UPRT */ if (!firmware_has_feature(FW_FEATURE_LPAR)) { - - if (cpu_has_feature(CPU_FTR_POWER9_DD1)) - update_hid_for_radix(); - lpcr = mfspr(SPRN_LPCR); mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR); @@ -1095,8 +1049,7 @@ void radix__ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep, * To avoid NMMU hang while relaxing access, we need mark * the pte invalid in between. */ - if (cpu_has_feature(CPU_FTR_POWER9_DD1) || - atomic_read(&mm->context.copros) > 0) { + if (atomic_read(&mm->context.copros) > 0) { unsigned long old_pte, new_pte; old_pte = __radix_pte_update(ptep, ~0, 0); diff --git a/arch/powerpc/mm/pkeys.c b/arch/powerpc/mm/pkeys.c index e6f500fabf5e..333b1f80c435 100644 --- a/arch/powerpc/mm/pkeys.c +++ b/arch/powerpc/mm/pkeys.c @@ -14,9 +14,12 @@ DEFINE_STATIC_KEY_TRUE(pkey_disabled); bool pkey_execute_disable_supported; int pkeys_total; /* Total pkeys as per device tree */ bool pkeys_devtree_defined; /* pkey property exported by device tree */ -u32 initial_allocation_mask; /* Bits set for reserved keys */ -u64 pkey_amr_uamor_mask; /* Bits in AMR/UMOR not to be touched */ +u32 initial_allocation_mask; /* Bits set for the initially allocated keys */ +u32 reserved_allocation_mask; /* Bits set for reserved keys */ +u64 pkey_amr_mask; /* Bits in AMR not to be touched */ u64 pkey_iamr_mask; /* Bits in AMR not to be touched */ +u64 pkey_uamor_mask; /* Bits in UMOR not to be touched */ +int execute_only_key = 2; #define AMR_BITS_PER_PKEY 2 #define AMR_RD_BIT 0x1UL @@ -91,7 +94,7 @@ int pkey_initialize(void) * arch-neutral code. */ pkeys_total = min_t(int, pkeys_total, - (ARCH_VM_PKEY_FLAGS >> VM_PKEY_SHIFT)); + ((ARCH_VM_PKEY_FLAGS >> VM_PKEY_SHIFT)+1)); if (!pkey_mmu_enabled() || radix_enabled() || !pkeys_total) static_branch_enable(&pkey_disabled); @@ -119,20 +122,39 @@ int pkey_initialize(void) #else os_reserved = 0; #endif - initial_allocation_mask = ~0x0; - pkey_amr_uamor_mask = ~0x0ul; + /* Bits are in LE format. */ + reserved_allocation_mask = (0x1 << 1) | (0x1 << execute_only_key); + + /* register mask is in BE format */ + pkey_amr_mask = ~0x0ul; + pkey_amr_mask &= ~(0x3ul << pkeyshift(0)); + pkey_iamr_mask = ~0x0ul; - /* - * key 0, 1 are reserved. - * key 0 is the default key, which allows read/write/execute. - * key 1 is recommended not to be used. PowerISA(3.0) page 1015, - * programming note. - */ - for (i = 2; i < (pkeys_total - os_reserved); i++) { - initial_allocation_mask &= ~(0x1 << i); - pkey_amr_uamor_mask &= ~(0x3ul << pkeyshift(i)); - pkey_iamr_mask &= ~(0x1ul << pkeyshift(i)); + pkey_iamr_mask &= ~(0x3ul << pkeyshift(0)); + pkey_iamr_mask &= ~(0x3ul << pkeyshift(execute_only_key)); + + pkey_uamor_mask = ~0x0ul; + pkey_uamor_mask &= ~(0x3ul << pkeyshift(0)); + pkey_uamor_mask &= ~(0x3ul << pkeyshift(execute_only_key)); + + /* mark the rest of the keys as reserved and hence unavailable */ + for (i = (pkeys_total - os_reserved); i < pkeys_total; i++) { + reserved_allocation_mask |= (0x1 << i); + pkey_uamor_mask &= ~(0x3ul << pkeyshift(i)); + } + initial_allocation_mask = reserved_allocation_mask | (0x1 << 0); + + if (unlikely((pkeys_total - os_reserved) <= execute_only_key)) { + /* + * Insufficient number of keys to support + * execute only key. Mark it unavailable. + * Any AMR, UAMOR, IAMR bit set for + * this key is irrelevant since this key + * can never be allocated. + */ + execute_only_key = -1; } + return 0; } @@ -143,8 +165,7 @@ void pkey_mm_init(struct mm_struct *mm) if (static_branch_likely(&pkey_disabled)) return; mm_pkey_allocation_map(mm) = initial_allocation_mask; - /* -1 means unallocated or invalid */ - mm->context.execute_only_pkey = -1; + mm->context.execute_only_pkey = execute_only_key; } static inline u64 read_amr(void) @@ -213,33 +234,6 @@ static inline void init_iamr(int pkey, u8 init_bits) write_iamr(old_iamr | new_iamr_bits); } -static void pkey_status_change(int pkey, bool enable) -{ - u64 old_uamor; - - /* Reset the AMR and IAMR bits for this key */ - init_amr(pkey, 0x0); - init_iamr(pkey, 0x0); - - /* Enable/disable key */ - old_uamor = read_uamor(); - if (enable) - old_uamor |= (0x3ul << pkeyshift(pkey)); - else - old_uamor &= ~(0x3ul << pkeyshift(pkey)); - write_uamor(old_uamor); -} - -void __arch_activate_pkey(int pkey) -{ - pkey_status_change(pkey, true); -} - -void __arch_deactivate_pkey(int pkey) -{ - pkey_status_change(pkey, false); -} - /* * Set the access rights in AMR IAMR and UAMOR registers for @pkey to that * specified in @init_val. @@ -289,9 +283,6 @@ void thread_pkey_regs_restore(struct thread_struct *new_thread, if (static_branch_likely(&pkey_disabled)) return; - /* - * TODO: Just set UAMOR to zero if @new_thread hasn't used any keys yet. - */ if (old_thread->amr != new_thread->amr) write_amr(new_thread->amr); if (old_thread->iamr != new_thread->iamr) @@ -305,9 +296,13 @@ void thread_pkey_regs_init(struct thread_struct *thread) if (static_branch_likely(&pkey_disabled)) return; - thread->amr = read_amr() & pkey_amr_uamor_mask; - thread->iamr = read_iamr() & pkey_iamr_mask; - thread->uamor = read_uamor() & pkey_amr_uamor_mask; + thread->amr = pkey_amr_mask; + thread->iamr = pkey_iamr_mask; + thread->uamor = pkey_uamor_mask; + + write_uamor(pkey_uamor_mask); + write_amr(pkey_amr_mask); + write_iamr(pkey_iamr_mask); } static inline bool pkey_allows_readwrite(int pkey) @@ -322,48 +317,7 @@ static inline bool pkey_allows_readwrite(int pkey) int __execute_only_pkey(struct mm_struct *mm) { - bool need_to_set_mm_pkey = false; - int execute_only_pkey = mm->context.execute_only_pkey; - int ret; - - /* Do we need to assign a pkey for mm's execute-only maps? */ - if (execute_only_pkey == -1) { - /* Go allocate one to use, which might fail */ - execute_only_pkey = mm_pkey_alloc(mm); - if (execute_only_pkey < 0) - return -1; - need_to_set_mm_pkey = true; - } - - /* - * We do not want to go through the relatively costly dance to set AMR - * if we do not need to. Check it first and assume that if the - * execute-only pkey is readwrite-disabled than we do not have to set it - * ourselves. - */ - if (!need_to_set_mm_pkey && !pkey_allows_readwrite(execute_only_pkey)) - return execute_only_pkey; - - /* - * Set up AMR so that it denies access for everything other than - * execution. - */ - ret = __arch_set_user_pkey_access(current, execute_only_pkey, - PKEY_DISABLE_ACCESS | - PKEY_DISABLE_WRITE); - /* - * If the AMR-set operation failed somehow, just return 0 and - * effectively disable execute-only support. - */ - if (ret) { - mm_pkey_free(mm, execute_only_pkey); - return -1; - } - - /* We got one, store it and use it from here on out */ - if (need_to_set_mm_pkey) - mm->context.execute_only_pkey = execute_only_pkey; - return execute_only_pkey; + return mm->context.execute_only_pkey; } static inline bool vma_is_pkey_exec_only(struct vm_area_struct *vma) @@ -407,9 +361,6 @@ static bool pkey_access_permitted(int pkey, bool write, bool execute) int pkey_shift; u64 amr; - if (!pkey) - return true; - if (!is_pkey_enabled(pkey)) return true; diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c index cb796724a6fc..0b095fa54049 100644 --- a/arch/powerpc/mm/slb.c +++ b/arch/powerpc/mm/slb.c @@ -90,6 +90,45 @@ static inline void create_shadowed_slbe(unsigned long ea, int ssize, : "memory" ); } +/* + * Insert bolted entries into SLB (which may not be empty, so don't clear + * slb_cache_ptr). + */ +void __slb_restore_bolted_realmode(void) +{ + struct slb_shadow *p = get_slb_shadow(); + enum slb_index index; + + /* No isync needed because realmode. */ + for (index = 0; index < SLB_NUM_BOLTED; index++) { + asm volatile("slbmte %0,%1" : + : "r" (be64_to_cpu(p->save_area[index].vsid)), + "r" (be64_to_cpu(p->save_area[index].esid))); + } +} + +/* + * Insert the bolted entries into an empty SLB. + * This is not the same as rebolt because the bolted segments are not + * changed, just loaded from the shadow area. + */ +void slb_restore_bolted_realmode(void) +{ + __slb_restore_bolted_realmode(); + get_paca()->slb_cache_ptr = 0; +} + +/* + * This flushes all SLB entries including 0, so it must be realmode. + */ +void slb_flush_all_realmode(void) +{ + /* + * This flushes all SLB entries including 0, so it must be realmode. + */ + asm volatile("slbmte %0,%0; slbia" : : "r" (0)); +} + static void __slb_flush_and_rebolt(void) { /* If you change this make sure you change SLB_NUM_BOLTED diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S index a83fbd2a4a24..4ac5057ad439 100644 --- a/arch/powerpc/mm/slb_low.S +++ b/arch/powerpc/mm/slb_low.S @@ -22,6 +22,7 @@ #include <asm/mmu.h> #include <asm/pgtable.h> #include <asm/firmware.h> +#include <asm/feature-fixups.h> /* * This macro generates asm code to compute the VSID scramble diff --git a/arch/powerpc/mm/subpage-prot.c b/arch/powerpc/mm/subpage-prot.c index 75cb646a79c3..3327551c8b47 100644 --- a/arch/powerpc/mm/subpage-prot.c +++ b/arch/powerpc/mm/subpage-prot.c @@ -17,7 +17,6 @@ #include <asm/pgtable.h> #include <linux/uaccess.h> -#include <asm/tlbflush.h> /* * Free all pages allocated for subpage protection maps and pointers. @@ -186,9 +185,6 @@ static void subpage_mark_vma_nohuge(struct mm_struct *mm, unsigned long addr, * in a 2-bit field won't allow writes to a page that is otherwise * write-protected. */ -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wpragmas" -#pragma GCC diagnostic ignored "-Wattribute-alias" SYSCALL_DEFINE3(subpage_prot, unsigned long, addr, unsigned long, len, u32 __user *, map) { @@ -272,4 +268,3 @@ SYSCALL_DEFINE3(subpage_prot, unsigned long, addr, up_write(&mm->mmap_sem); return err; } -#pragma GCC diagnostic pop diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c index 67a6e86d3e7e..fef3e1eb3a19 100644 --- a/arch/powerpc/mm/tlb-radix.c +++ b/arch/powerpc/mm/tlb-radix.c @@ -689,22 +689,17 @@ EXPORT_SYMBOL(radix__flush_tlb_kernel_range); static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33; static unsigned long tlb_local_single_page_flush_ceiling __read_mostly = POWER9_TLB_SETS_RADIX * 2; -void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start, - unsigned long end) +static inline void __radix__flush_tlb_range(struct mm_struct *mm, + unsigned long start, unsigned long end, + bool flush_all_sizes) { - struct mm_struct *mm = vma->vm_mm; unsigned long pid; unsigned int page_shift = mmu_psize_defs[mmu_virtual_psize].shift; unsigned long page_size = 1UL << page_shift; unsigned long nr_pages = (end - start) >> page_shift; bool local, full; -#ifdef CONFIG_HUGETLB_PAGE - if (is_vm_hugetlb_page(vma)) - return radix__flush_hugetlb_tlb_range(vma, start, end); -#endif - pid = mm->context.id; if (unlikely(pid == MMU_NO_CONTEXT)) return; @@ -738,37 +733,64 @@ is_local: _tlbie_pid(pid, RIC_FLUSH_TLB); } } else { - bool hflush = false; + bool hflush = flush_all_sizes; + bool gflush = flush_all_sizes; unsigned long hstart, hend; + unsigned long gstart, gend; -#ifdef CONFIG_TRANSPARENT_HUGEPAGE - hstart = (start + HPAGE_PMD_SIZE - 1) >> HPAGE_PMD_SHIFT; - hend = end >> HPAGE_PMD_SHIFT; - if (hstart < hend) { - hstart <<= HPAGE_PMD_SHIFT; - hend <<= HPAGE_PMD_SHIFT; + if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) hflush = true; + + if (hflush) { + hstart = (start + PMD_SIZE - 1) & PMD_MASK; + hend = end & PMD_MASK; + if (hstart == hend) + hflush = false; + } + + if (gflush) { + gstart = (start + PUD_SIZE - 1) & PUD_MASK; + gend = end & PUD_MASK; + if (gstart == gend) + gflush = false; } -#endif asm volatile("ptesync": : :"memory"); if (local) { __tlbiel_va_range(start, end, pid, page_size, mmu_virtual_psize); if (hflush) __tlbiel_va_range(hstart, hend, pid, - HPAGE_PMD_SIZE, MMU_PAGE_2M); + PMD_SIZE, MMU_PAGE_2M); + if (gflush) + __tlbiel_va_range(gstart, gend, pid, + PUD_SIZE, MMU_PAGE_1G); asm volatile("ptesync": : :"memory"); } else { __tlbie_va_range(start, end, pid, page_size, mmu_virtual_psize); if (hflush) __tlbie_va_range(hstart, hend, pid, - HPAGE_PMD_SIZE, MMU_PAGE_2M); + PMD_SIZE, MMU_PAGE_2M); + if (gflush) + __tlbie_va_range(gstart, gend, pid, + PUD_SIZE, MMU_PAGE_1G); fixup_tlbie(); asm volatile("eieio; tlbsync; ptesync": : :"memory"); } } preempt_enable(); } + +void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end) + +{ +#ifdef CONFIG_HUGETLB_PAGE + if (is_vm_hugetlb_page(vma)) + return radix__flush_hugetlb_tlb_range(vma, start, end); +#endif + + __radix__flush_tlb_range(vma->vm_mm, start, end, false); +} EXPORT_SYMBOL(radix__flush_tlb_range); static int radix_get_mmu_psize(int page_size) @@ -837,6 +859,8 @@ void radix__tlb_flush(struct mmu_gather *tlb) int psize = 0; struct mm_struct *mm = tlb->mm; int page_size = tlb->page_size; + unsigned long start = tlb->start; + unsigned long end = tlb->end; /* * if page size is not something we understand, do a full mm flush @@ -847,15 +871,45 @@ void radix__tlb_flush(struct mmu_gather *tlb) */ if (tlb->fullmm) { __flush_all_mm(mm, true); +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE) + } else if (mm_tlb_flush_nested(mm)) { + /* + * If there is a concurrent invalidation that is clearing ptes, + * then it's possible this invalidation will miss one of those + * cleared ptes and miss flushing the TLB. If this invalidate + * returns before the other one flushes TLBs, that can result + * in it returning while there are still valid TLBs inside the + * range to be invalidated. + * + * See mm/memory.c:tlb_finish_mmu() for more details. + * + * The solution to this is ensure the entire range is always + * flushed here. The problem for powerpc is that the flushes + * are page size specific, so this "forced flush" would not + * do the right thing if there are a mix of page sizes in + * the range to be invalidated. So use __flush_tlb_range + * which invalidates all possible page sizes in the range. + * + * PWC flush probably is not be required because the core code + * shouldn't free page tables in this path, but accounting + * for the possibility makes us a bit more robust. + * + * need_flush_all is an uncommon case because page table + * teardown should be done with exclusive locks held (but + * after locks are dropped another invalidate could come + * in), it could be optimized further if necessary. + */ + if (!tlb->need_flush_all) + __radix__flush_tlb_range(mm, start, end, true); + else + radix__flush_all_mm(mm); +#endif } else if ( (psize = radix_get_mmu_psize(page_size)) == -1) { if (!tlb->need_flush_all) radix__flush_tlb_mm(mm); else radix__flush_all_mm(mm); } else { - unsigned long start = tlb->start; - unsigned long end = tlb->end; - if (!tlb->need_flush_all) radix__flush_tlb_range_psize(mm, start, end, psize); else @@ -994,24 +1048,6 @@ void radix__flush_tlb_all(void) asm volatile("eieio; tlbsync; ptesync": : :"memory"); } -void radix__flush_tlb_pte_p9_dd1(unsigned long old_pte, struct mm_struct *mm, - unsigned long address) -{ - /* - * We track page size in pte only for DD1, So we can - * call this only on DD1. - */ - if (!cpu_has_feature(CPU_FTR_POWER9_DD1)) { - VM_WARN_ON(1); - return; - } - - if (old_pte & R_PAGE_LARGE) - radix__flush_tlb_page_psize(mm, address, MMU_PAGE_2M); - else - radix__flush_tlb_page_psize(mm, address, mmu_virtual_psize); -} - #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE extern void radix_kvm_prefetch_workaround(struct mm_struct *mm) { @@ -1043,6 +1079,8 @@ extern void radix_kvm_prefetch_workaround(struct mm_struct *mm) for (; sib <= cpu_last_thread_sibling(cpu) && !flush; sib++) { if (sib == cpu) continue; + if (!cpu_possible(sib)) + continue; if (paca_ptrs[sib]->kvm_hstate.kvm_vcpu) flush = true; } diff --git a/arch/powerpc/mm/tlb_low_64e.S b/arch/powerpc/mm/tlb_low_64e.S index eb82d787d99a..7fd20c52a8ec 100644 --- a/arch/powerpc/mm/tlb_low_64e.S +++ b/arch/powerpc/mm/tlb_low_64e.S @@ -22,6 +22,7 @@ #include <asm/ppc-opcode.h> #include <asm/kvm_asm.h> #include <asm/kvm_booke_hv_asm.h> +#include <asm/feature-fixups.h> #ifdef CONFIG_PPC_64K_PAGES #define VPTE_PMD_SHIFT (PTE_INDEX_SIZE+1) diff --git a/arch/powerpc/mm/tlb_nohash_low.S b/arch/powerpc/mm/tlb_nohash_low.S index 048b8e9f4492..e066a658acac 100644 --- a/arch/powerpc/mm/tlb_nohash_low.S +++ b/arch/powerpc/mm/tlb_nohash_low.S @@ -34,6 +34,8 @@ #include <asm/asm-offsets.h> #include <asm/processor.h> #include <asm/bug.h> +#include <asm/asm-compat.h> +#include <asm/feature-fixups.h> #if defined(CONFIG_40x) |