summaryrefslogtreecommitdiff
path: root/arch/powerpc/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r--arch/powerpc/mm/44x_mmu.c2
-rw-r--r--arch/powerpc/mm/Makefile4
-rw-r--r--arch/powerpc/mm/copro_fault.c2
-rw-r--r--arch/powerpc/mm/dump_hashpagetable.c3
-rw-r--r--arch/powerpc/mm/fault.c8
-rw-r--r--arch/powerpc/mm/hash64_4k.c8
-rw-r--r--arch/powerpc/mm/hash64_64k.c15
-rw-r--r--arch/powerpc/mm/hash_low_32.S1
-rw-r--r--arch/powerpc/mm/hash_native_64.c78
-rw-r--r--arch/powerpc/mm/hash_utils_64.c41
-rw-r--r--arch/powerpc/mm/highmem.c2
-rw-r--r--arch/powerpc/mm/hugepage-hash64.c9
-rw-r--r--arch/powerpc/mm/hugetlbpage.c29
-rw-r--r--arch/powerpc/mm/mmu_context_book3s64.c25
-rw-r--r--arch/powerpc/mm/mmu_context_hash32.c1
-rw-r--r--arch/powerpc/mm/mmu_context_iommu.c37
-rw-r--r--arch/powerpc/mm/mmu_decl.h1
-rw-r--r--arch/powerpc/mm/pgtable-book3s64.c51
-rw-r--r--arch/powerpc/mm/pgtable-radix.c79
-rw-r--r--arch/powerpc/mm/pkeys.c141
-rw-r--r--arch/powerpc/mm/slb.c39
-rw-r--r--arch/powerpc/mm/slb_low.S1
-rw-r--r--arch/powerpc/mm/subpage-prot.c5
-rw-r--r--arch/powerpc/mm/tlb-radix.c116
-rw-r--r--arch/powerpc/mm/tlb_low_64e.S1
-rw-r--r--arch/powerpc/mm/tlb_nohash_low.S2
26 files changed, 351 insertions, 350 deletions
diff --git a/arch/powerpc/mm/44x_mmu.c b/arch/powerpc/mm/44x_mmu.c
index 82b1ff759e26..12d92518e898 100644
--- a/arch/powerpc/mm/44x_mmu.c
+++ b/arch/powerpc/mm/44x_mmu.c
@@ -229,7 +229,7 @@ void setup_initial_memory_limit(phys_addr_t first_memblock_base,
}
#ifdef CONFIG_SMP
-void mmu_init_secondary(int cpu)
+void __init mmu_init_secondary(int cpu)
{
unsigned long addr;
unsigned long memstart = memstart_addr & ~(PPC_PIN_SIZE - 1);
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index f06f3577d8d1..cdf6a9960046 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -19,7 +19,7 @@ obj-$(CONFIG_PPC_BOOK3S_64) += pgtable-hash64.o hash_utils_64.o slb_low.o slb.o
obj-$(CONFIG_PPC_RADIX_MMU) += pgtable-radix.o tlb-radix.o
obj-$(CONFIG_PPC_STD_MMU_32) += ppc_mmu_32.o hash_low_32.o mmu_context_hash32.o
obj-$(CONFIG_PPC_STD_MMU) += tlb_hash$(BITS).o
-ifeq ($(CONFIG_PPC_BOOK3S_64),y)
+ifdef CONFIG_PPC_BOOK3S_64
obj-$(CONFIG_PPC_4K_PAGES) += hash64_4k.o
obj-$(CONFIG_PPC_64K_PAGES) += hash64_64k.o
endif
@@ -31,7 +31,7 @@ obj-$(CONFIG_NEED_MULTIPLE_NODES) += numa.o
obj-$(CONFIG_PPC_SPLPAR) += vphn.o
obj-$(CONFIG_PPC_MM_SLICES) += slice.o
obj-y += hugetlbpage.o
-ifeq ($(CONFIG_HUGETLB_PAGE),y)
+ifdef CONFIG_HUGETLB_PAGE
obj-$(CONFIG_PPC_BOOK3S_64) += hugetlbpage-hash64.o
obj-$(CONFIG_PPC_RADIX_MMU) += hugetlbpage-radix.o
obj-$(CONFIG_PPC_BOOK3E_MMU) += hugetlbpage-book3e.o
diff --git a/arch/powerpc/mm/copro_fault.c b/arch/powerpc/mm/copro_fault.c
index 7d0945bd3a61..c8da352e8686 100644
--- a/arch/powerpc/mm/copro_fault.c
+++ b/arch/powerpc/mm/copro_fault.c
@@ -34,7 +34,7 @@
* to handle fortunately.
*/
int copro_handle_mm_fault(struct mm_struct *mm, unsigned long ea,
- unsigned long dsisr, unsigned *flt)
+ unsigned long dsisr, vm_fault_t *flt)
{
struct vm_area_struct *vma;
unsigned long is_write;
diff --git a/arch/powerpc/mm/dump_hashpagetable.c b/arch/powerpc/mm/dump_hashpagetable.c
index 14cfb11b09d0..869294695048 100644
--- a/arch/powerpc/mm/dump_hashpagetable.c
+++ b/arch/powerpc/mm/dump_hashpagetable.c
@@ -19,7 +19,6 @@
#include <linux/mm.h>
#include <linux/sched.h>
#include <linux/seq_file.h>
-#include <asm/fixmap.h>
#include <asm/pgtable.h>
#include <linux/const.h>
#include <asm/page.h>
@@ -260,7 +259,7 @@ static int pseries_find(unsigned long ea, int psize, bool primary, u64 *v, u64 *
/* to check in the secondary hash table, we invert the hash */
if (!primary)
hash = ~hash;
- hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL;
+ hpte_group = (hash & htab_hash_mask) * HPTES_PER_GROUP;
/* see if we can find an entry in the hpte with this hash */
for (i = 0; i < HPTES_PER_GROUP; i += 4, hpte_group += 4) {
lpar_rc = plpar_pte_read_4(0, hpte_group, (void *)ptes);
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index b1ca7a0974e3..d51cf5f4e45e 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -42,7 +42,6 @@
#include <asm/pgtable.h>
#include <asm/mmu.h>
#include <asm/mmu_context.h>
-#include <asm/tlbflush.h>
#include <asm/siginfo.h>
#include <asm/debug.h>
@@ -156,7 +155,7 @@ static noinline int bad_access(struct pt_regs *regs, unsigned long address)
}
static int do_sigbus(struct pt_regs *regs, unsigned long address,
- unsigned int fault)
+ vm_fault_t fault)
{
siginfo_t info;
unsigned int lsb = 0;
@@ -187,7 +186,8 @@ static int do_sigbus(struct pt_regs *regs, unsigned long address,
return 0;
}
-static int mm_fault_error(struct pt_regs *regs, unsigned long addr, int fault)
+static int mm_fault_error(struct pt_regs *regs, unsigned long addr,
+ vm_fault_t fault)
{
/*
* Kernel page fault interrupted by SIGKILL. We have no reason to
@@ -415,7 +415,7 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address,
int is_exec = TRAP(regs) == 0x400;
int is_user = user_mode(regs);
int is_write = page_fault_is_write(error_code);
- int fault, major = 0;
+ vm_fault_t fault, major = 0;
bool must_retry = false;
if (notify_page_fault(regs))
diff --git a/arch/powerpc/mm/hash64_4k.c b/arch/powerpc/mm/hash64_4k.c
index d573d7d07f25..6fa6765a10eb 100644
--- a/arch/powerpc/mm/hash64_4k.c
+++ b/arch/powerpc/mm/hash64_4k.c
@@ -80,7 +80,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
hash = hpt_hash(vpn, shift, ssize);
repeat:
- hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL;
+ hpte_group = (hash & htab_hash_mask) * HPTES_PER_GROUP;
/* Insert into the hash table, primary slot */
slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags, 0,
@@ -89,7 +89,7 @@ repeat:
* Primary is full, try the secondary
*/
if (unlikely(slot == -1)) {
- hpte_group = ((~hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL;
+ hpte_group = (~hash & htab_hash_mask) * HPTES_PER_GROUP;
slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa,
rflags,
HPTE_V_SECONDARY,
@@ -97,8 +97,8 @@ repeat:
MMU_PAGE_4K, ssize);
if (slot == -1) {
if (mftb() & 0x1)
- hpte_group = ((hash & htab_hash_mask) *
- HPTES_PER_GROUP) & ~0x7UL;
+ hpte_group = (hash & htab_hash_mask) *
+ HPTES_PER_GROUP;
mmu_hash_ops.hpte_remove(hpte_group);
/*
* FIXME!! Should be try the group from which we removed ?
diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/hash64_64k.c
index e601d95c3b20..3afa253d7f52 100644
--- a/arch/powerpc/mm/hash64_64k.c
+++ b/arch/powerpc/mm/hash64_64k.c
@@ -154,7 +154,7 @@ htab_insert_hpte:
}
hash = hpt_hash(vpn, shift, ssize);
repeat:
- hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL;
+ hpte_group = (hash & htab_hash_mask) * HPTES_PER_GROUP;
/* Insert into the hash table, primary slot */
slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags, 0,
@@ -165,7 +165,7 @@ repeat:
if (unlikely(slot == -1)) {
bool soft_invalid;
- hpte_group = ((~hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL;
+ hpte_group = (~hash & htab_hash_mask) * HPTES_PER_GROUP;
slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa,
rflags, HPTE_V_SECONDARY,
MMU_PAGE_4K, MMU_PAGE_4K,
@@ -193,8 +193,7 @@ repeat:
* that we do not get the same soft-invalid slot.
*/
if (soft_invalid || (mftb() & 0x1))
- hpte_group = ((hash & htab_hash_mask) *
- HPTES_PER_GROUP) & ~0x7UL;
+ hpte_group = (hash & htab_hash_mask) * HPTES_PER_GROUP;
mmu_hash_ops.hpte_remove(hpte_group);
/*
@@ -288,7 +287,7 @@ int __hash_page_64K(unsigned long ea, unsigned long access,
hash = hpt_hash(vpn, shift, ssize);
repeat:
- hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL;
+ hpte_group = (hash & htab_hash_mask) * HPTES_PER_GROUP;
/* Insert into the hash table, primary slot */
slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags, 0,
@@ -298,7 +297,7 @@ repeat:
* Primary is full, try the secondary
*/
if (unlikely(slot == -1)) {
- hpte_group = ((~hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL;
+ hpte_group = (~hash & htab_hash_mask) * HPTES_PER_GROUP;
slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa,
rflags,
HPTE_V_SECONDARY,
@@ -306,8 +305,8 @@ repeat:
MMU_PAGE_64K, ssize);
if (slot == -1) {
if (mftb() & 0x1)
- hpte_group = ((hash & htab_hash_mask) *
- HPTES_PER_GROUP) & ~0x7UL;
+ hpte_group = (hash & htab_hash_mask) *
+ HPTES_PER_GROUP;
mmu_hash_ops.hpte_remove(hpte_group);
/*
* FIXME!! Should be try the group from which we removed ?
diff --git a/arch/powerpc/mm/hash_low_32.S b/arch/powerpc/mm/hash_low_32.S
index ffbd7c0bda96..26acf6c8c20c 100644
--- a/arch/powerpc/mm/hash_low_32.S
+++ b/arch/powerpc/mm/hash_low_32.S
@@ -27,6 +27,7 @@
#include <asm/thread_info.h>
#include <asm/asm-offsets.h>
#include <asm/export.h>
+#include <asm/feature-fixups.h>
#ifdef CONFIG_SMP
.section .bss
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index 1d049c78c82a..729f02df8290 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -23,13 +23,13 @@
#include <asm/mmu.h>
#include <asm/mmu_context.h>
#include <asm/pgtable.h>
-#include <asm/tlbflush.h>
#include <asm/trace.h>
#include <asm/tlb.h>
#include <asm/cputable.h>
#include <asm/udbg.h>
#include <asm/kexec.h>
#include <asm/ppc-opcode.h>
+#include <asm/feature-fixups.h>
#include <misc/cxl-base.h>
@@ -423,9 +423,7 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
DBG_LOW(" update(vpn=%016lx, avpnv=%016lx, group=%lx, newpp=%lx)",
vpn, want_v & HPTE_V_AVPN, slot, newpp);
- hpte_v = be64_to_cpu(hptep->v);
- if (cpu_has_feature(CPU_FTR_ARCH_300))
- hpte_v = hpte_new_to_old_v(hpte_v, be64_to_cpu(hptep->r));
+ hpte_v = hpte_get_old_v(hptep);
/*
* We need to invalidate the TLB always because hpte_remove doesn't do
* a tlb invalidate. If a hash bucket gets full, we "evict" a more/less
@@ -439,9 +437,7 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
} else {
native_lock_hpte(hptep);
/* recheck with locks held */
- hpte_v = be64_to_cpu(hptep->v);
- if (cpu_has_feature(CPU_FTR_ARCH_300))
- hpte_v = hpte_new_to_old_v(hpte_v, be64_to_cpu(hptep->r));
+ hpte_v = hpte_get_old_v(hptep);
if (unlikely(!HPTE_V_COMPARE(hpte_v, want_v) ||
!(hpte_v & HPTE_V_VALID))) {
ret = -1;
@@ -481,11 +477,9 @@ static long native_hpte_find(unsigned long vpn, int psize, int ssize)
/* Bolted mappings are only ever in the primary group */
slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
for (i = 0; i < HPTES_PER_GROUP; i++) {
- hptep = htab_address + slot;
- hpte_v = be64_to_cpu(hptep->v);
- if (cpu_has_feature(CPU_FTR_ARCH_300))
- hpte_v = hpte_new_to_old_v(hpte_v, be64_to_cpu(hptep->r));
+ hptep = htab_address + slot;
+ hpte_v = hpte_get_old_v(hptep);
if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID))
/* HPTE matches */
return slot;
@@ -574,11 +568,19 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long vpn,
DBG_LOW(" invalidate(vpn=%016lx, hash: %lx)\n", vpn, slot);
want_v = hpte_encode_avpn(vpn, bpsize, ssize);
- native_lock_hpte(hptep);
- hpte_v = be64_to_cpu(hptep->v);
- if (cpu_has_feature(CPU_FTR_ARCH_300))
- hpte_v = hpte_new_to_old_v(hpte_v, be64_to_cpu(hptep->r));
+ hpte_v = hpte_get_old_v(hptep);
+ if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID)) {
+ native_lock_hpte(hptep);
+ /* recheck with locks held */
+ hpte_v = hpte_get_old_v(hptep);
+
+ if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID))
+ /* Invalidate the hpte. NOTE: this also unlocks it */
+ hptep->v = 0;
+ else
+ native_unlock_hpte(hptep);
+ }
/*
* We need to invalidate the TLB always because hpte_remove doesn't do
* a tlb invalidate. If a hash bucket gets full, we "evict" a more/less
@@ -586,13 +588,6 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long vpn,
* (hpte_remove) because we assume the old translation is still
* technically "valid".
*/
- if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID))
- native_unlock_hpte(hptep);
- else
- /* Invalidate the hpte. NOTE: this also unlocks it */
- hptep->v = 0;
-
- /* Invalidate the TLB */
tlbie(vpn, bpsize, apsize, ssize, local);
local_irq_restore(flags);
@@ -634,17 +629,23 @@ static void native_hugepage_invalidate(unsigned long vsid,
hptep = htab_address + slot;
want_v = hpte_encode_avpn(vpn, psize, ssize);
- native_lock_hpte(hptep);
- hpte_v = be64_to_cpu(hptep->v);
- if (cpu_has_feature(CPU_FTR_ARCH_300))
- hpte_v = hpte_new_to_old_v(hpte_v, be64_to_cpu(hptep->r));
+ hpte_v = hpte_get_old_v(hptep);
/* Even if we miss, we need to invalidate the TLB */
- if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID))
- native_unlock_hpte(hptep);
- else
- /* Invalidate the hpte. NOTE: this also unlocks it */
- hptep->v = 0;
+ if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID)) {
+ /* recheck with locks held */
+ native_lock_hpte(hptep);
+ hpte_v = hpte_get_old_v(hptep);
+
+ if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID)) {
+ /*
+ * Invalidate the hpte. NOTE: this also unlocks it
+ */
+
+ hptep->v = 0;
+ } else
+ native_unlock_hpte(hptep);
+ }
/*
* We need to do tlb invalidate for all the address, tlbie
* instruction compares entry_VA in tlb with the VA specified
@@ -812,16 +813,19 @@ static void native_flush_hash_range(unsigned long number, int local)
slot += hidx & _PTEIDX_GROUP_IX;
hptep = htab_address + slot;
want_v = hpte_encode_avpn(vpn, psize, ssize);
+ hpte_v = hpte_get_old_v(hptep);
+
+ if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID))
+ continue;
+ /* lock and try again */
native_lock_hpte(hptep);
- hpte_v = be64_to_cpu(hptep->v);
- if (cpu_has_feature(CPU_FTR_ARCH_300))
- hpte_v = hpte_new_to_old_v(hpte_v,
- be64_to_cpu(hptep->r));
- if (!HPTE_V_COMPARE(hpte_v, want_v) ||
- !(hpte_v & HPTE_V_VALID))
+ hpte_v = hpte_get_old_v(hptep);
+
+ if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID))
native_unlock_hpte(hptep);
else
hptep->v = 0;
+
} pte_iterate_hashed_end();
}
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 8318716e5075..f23a89d8e4ce 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -48,7 +48,6 @@
#include <linux/uaccess.h>
#include <asm/machdep.h>
#include <asm/prom.h>
-#include <asm/tlbflush.h>
#include <asm/io.h>
#include <asm/eeh.h>
#include <asm/tlb.h>
@@ -808,31 +807,6 @@ int hash__remove_section_mapping(unsigned long start, unsigned long end)
}
#endif /* CONFIG_MEMORY_HOTPLUG */
-static void update_hid_for_hash(void)
-{
- unsigned long hid0;
- unsigned long rb = 3UL << PPC_BITLSHIFT(53); /* IS = 3 */
-
- asm volatile("ptesync": : :"memory");
- /* prs = 0, ric = 2, rs = 0, r = 1 is = 3 */
- asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
- : : "r"(rb), "i"(0), "i"(0), "i"(2), "r"(0) : "memory");
- asm volatile("eieio; tlbsync; ptesync; isync; slbia": : :"memory");
- trace_tlbie(0, 0, rb, 0, 2, 0, 0);
-
- /*
- * now switch the HID
- */
- hid0 = mfspr(SPRN_HID0);
- hid0 &= ~HID0_POWER9_RADIX;
- mtspr(SPRN_HID0, hid0);
- asm volatile("isync": : :"memory");
-
- /* Wait for it to happen */
- while ((mfspr(SPRN_HID0) & HID0_POWER9_RADIX))
- cpu_relax();
-}
-
static void __init hash_init_partition_table(phys_addr_t hash_table,
unsigned long htab_size)
{
@@ -845,8 +819,6 @@ static void __init hash_init_partition_table(phys_addr_t hash_table,
htab_size = __ilog2(htab_size) - 18;
mmu_partition_table_set_entry(0, hash_table | htab_size, 0);
pr_info("Partition table %p\n", partition_tb);
- if (cpu_has_feature(CPU_FTR_POWER9_DD1))
- update_hid_for_hash();
}
static void __init htab_initialize(void)
@@ -1077,9 +1049,6 @@ void hash__early_init_mmu_secondary(void)
/* Initialize hash table for that CPU */
if (!firmware_has_feature(FW_FEATURE_LPAR)) {
- if (cpu_has_feature(CPU_FTR_POWER9_DD1))
- update_hid_for_hash();
-
if (!cpu_has_feature(CPU_FTR_ARCH_300))
mtspr(SPRN_SDR1, _SDR1);
else
@@ -1783,8 +1752,7 @@ long hpte_insert_repeating(unsigned long hash, unsigned long vpn,
long slot;
repeat:
- hpte_group = ((hash & htab_hash_mask) *
- HPTES_PER_GROUP) & ~0x7UL;
+ hpte_group = (hash & htab_hash_mask) * HPTES_PER_GROUP;
/* Insert into the hash table, primary slot */
slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags, vflags,
@@ -1792,15 +1760,14 @@ repeat:
/* Primary is full, try the secondary */
if (unlikely(slot == -1)) {
- hpte_group = ((~hash & htab_hash_mask) *
- HPTES_PER_GROUP) & ~0x7UL;
+ hpte_group = (~hash & htab_hash_mask) * HPTES_PER_GROUP;
slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags,
vflags | HPTE_V_SECONDARY,
psize, psize, ssize);
if (slot == -1) {
if (mftb() & 0x1)
- hpte_group = ((hash & htab_hash_mask) *
- HPTES_PER_GROUP)&~0x7UL;
+ hpte_group = (hash & htab_hash_mask) *
+ HPTES_PER_GROUP;
mmu_hash_ops.hpte_remove(hpte_group);
goto repeat;
diff --git a/arch/powerpc/mm/highmem.c b/arch/powerpc/mm/highmem.c
index 668e87d03f9e..82a0e37557a5 100644
--- a/arch/powerpc/mm/highmem.c
+++ b/arch/powerpc/mm/highmem.c
@@ -56,7 +56,7 @@ EXPORT_SYMBOL(kmap_atomic_prot);
void __kunmap_atomic(void *kvaddr)
{
unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
- int type;
+ int type __maybe_unused;
if (vaddr < __fix_to_virt(FIX_KMAP_END)) {
pagefault_enable();
diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c
index f20d16f849c5..01f213d2bcb9 100644
--- a/arch/powerpc/mm/hugepage-hash64.c
+++ b/arch/powerpc/mm/hugepage-hash64.c
@@ -128,7 +128,7 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid,
new_pmd |= H_PAGE_HASHPTE;
repeat:
- hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL;
+ hpte_group = (hash & htab_hash_mask) * HPTES_PER_GROUP;
/* Insert into the hash table, primary slot */
slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags, 0,
@@ -137,16 +137,15 @@ repeat:
* Primary is full, try the secondary
*/
if (unlikely(slot == -1)) {
- hpte_group = ((~hash & htab_hash_mask) *
- HPTES_PER_GROUP) & ~0x7UL;
+ hpte_group = (~hash & htab_hash_mask) * HPTES_PER_GROUP;
slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa,
rflags,
HPTE_V_SECONDARY,
psize, lpsize, ssize);
if (slot == -1) {
if (mftb() & 0x1)
- hpte_group = ((hash & htab_hash_mask) *
- HPTES_PER_GROUP) & ~0x7UL;
+ hpte_group = (hash & htab_hash_mask) *
+ HPTES_PER_GROUP;
mmu_hash_ops.hpte_remove(hpte_group);
goto repeat;
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 7c5f479c5c00..e87f9ef9115b 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -118,15 +118,6 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
}
/*
- * These macros define how to determine which level of the page table holds
- * the hpdp.
- */
-#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx)
-#define HUGEPD_PGD_SHIFT PGDIR_SHIFT
-#define HUGEPD_PUD_SHIFT PUD_SHIFT
-#endif
-
-/*
* At this point we do the placement change only for BOOK3S 64. This would
* possibly work on other subarchs.
*/
@@ -174,13 +165,13 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz
}
}
#else
- if (pshift >= HUGEPD_PGD_SHIFT) {
+ if (pshift >= PGDIR_SHIFT) {
ptl = &mm->page_table_lock;
hpdp = (hugepd_t *)pg;
} else {
pdshift = PUD_SHIFT;
pu = pud_alloc(mm, pg, addr);
- if (pshift >= HUGEPD_PUD_SHIFT) {
+ if (pshift >= PUD_SHIFT) {
ptl = pud_lockptr(mm, pu);
hpdp = (hugepd_t *)pu;
} else {
@@ -337,7 +328,8 @@ static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshif
if (shift >= pdshift)
hugepd_free(tlb, hugepte);
else
- pgtable_free_tlb(tlb, hugepte, pdshift - shift);
+ pgtable_free_tlb(tlb, hugepte,
+ get_hugepd_cache_index(pdshift - shift));
}
static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
@@ -620,15 +612,12 @@ static int __init add_huge_page_size(unsigned long long size)
* firmware we only add hugetlb support for page sizes that can be
* supported by linux page table layout.
* For now we have
- * Radix: 2M
+ * Radix: 2M and 1G
* Hash: 16M and 16G
*/
if (radix_enabled()) {
- if (mmu_psize != MMU_PAGE_2M) {
- if (cpu_has_feature(CPU_FTR_POWER9_DD1) ||
- (mmu_psize != MMU_PAGE_1G))
- return -EINVAL;
- }
+ if (mmu_psize != MMU_PAGE_2M && mmu_psize != MMU_PAGE_1G)
+ return -EINVAL;
} else {
if (mmu_psize != MMU_PAGE_16M && mmu_psize != MMU_PAGE_16G)
return -EINVAL;
@@ -694,9 +683,9 @@ static int __init hugetlbpage_init(void)
else
pdshift = PMD_SHIFT;
#else
- if (shift < HUGEPD_PUD_SHIFT)
+ if (shift < PUD_SHIFT)
pdshift = PMD_SHIFT;
- else if (shift < HUGEPD_PGD_SHIFT)
+ else if (shift < PGDIR_SHIFT)
pdshift = PUD_SHIFT;
else
pdshift = PGDIR_SHIFT;
diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c
index f3d4b4a0e561..4a892d894a0f 100644
--- a/arch/powerpc/mm/mmu_context_book3s64.c
+++ b/arch/powerpc/mm/mmu_context_book3s64.c
@@ -200,9 +200,9 @@ static void pte_frag_destroy(void *pte_frag)
/* drop all the pending references */
count = ((unsigned long)pte_frag & ~PAGE_MASK) >> PTE_FRAG_SIZE_SHIFT;
/* We allow PTE_FRAG_NR fragments from a PTE page */
- if (page_ref_sub_and_test(page, PTE_FRAG_NR - count)) {
+ if (atomic_sub_and_test(PTE_FRAG_NR - count, &page->pt_frag_refcount)) {
pgtable_page_dtor(page);
- free_unref_page(page);
+ __free_page(page);
}
}
@@ -215,13 +215,13 @@ static void pmd_frag_destroy(void *pmd_frag)
/* drop all the pending references */
count = ((unsigned long)pmd_frag & ~PAGE_MASK) >> PMD_FRAG_SIZE_SHIFT;
/* We allow PTE_FRAG_NR fragments from a PTE page */
- if (page_ref_sub_and_test(page, PMD_FRAG_NR - count)) {
+ if (atomic_sub_and_test(PMD_FRAG_NR - count, &page->pt_frag_refcount)) {
pgtable_pmd_page_dtor(page);
- free_unref_page(page);
+ __free_page(page);
}
}
-static void destroy_pagetable_page(struct mm_struct *mm)
+static void destroy_pagetable_cache(struct mm_struct *mm)
{
void *frag;
@@ -244,13 +244,14 @@ void destroy_context(struct mm_struct *mm)
WARN_ON(process_tb[mm->context.id].prtb0 != 0);
else
subpage_prot_free(mm);
- destroy_pagetable_page(mm);
destroy_contexts(&mm->context);
mm->context.id = MMU_NO_CONTEXT;
}
void arch_exit_mmap(struct mm_struct *mm)
{
+ destroy_pagetable_cache(mm);
+
if (radix_enabled()) {
/*
* Radix doesn't have a valid bit in the process table
@@ -273,15 +274,7 @@ void arch_exit_mmap(struct mm_struct *mm)
#ifdef CONFIG_PPC_RADIX_MMU
void radix__switch_mmu_context(struct mm_struct *prev, struct mm_struct *next)
{
-
- if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
- isync();
- mtspr(SPRN_PID, next->context.id);
- isync();
- asm volatile(PPC_INVALIDATE_ERAT : : :"memory");
- } else {
- mtspr(SPRN_PID, next->context.id);
- isync();
- }
+ mtspr(SPRN_PID, next->context.id);
+ isync();
}
#endif
diff --git a/arch/powerpc/mm/mmu_context_hash32.c b/arch/powerpc/mm/mmu_context_hash32.c
index aa5a7fd89461..921c1e33e941 100644
--- a/arch/powerpc/mm/mmu_context_hash32.c
+++ b/arch/powerpc/mm/mmu_context_hash32.c
@@ -27,7 +27,6 @@
#include <linux/export.h>
#include <asm/mmu_context.h>
-#include <asm/tlbflush.h>
/*
* On 32-bit PowerPC 6xx/7xx/7xxx CPUs, we use a set of 16 VSIDs
diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c
index abb43646927a..a4ca57612558 100644
--- a/arch/powerpc/mm/mmu_context_iommu.c
+++ b/arch/powerpc/mm/mmu_context_iommu.c
@@ -19,6 +19,7 @@
#include <linux/hugetlb.h>
#include <linux/swap.h>
#include <asm/mmu_context.h>
+#include <asm/pte-walk.h>
static DEFINE_MUTEX(mem_list_mutex);
@@ -27,6 +28,7 @@ struct mm_iommu_table_group_mem_t {
struct rcu_head rcu;
unsigned long used;
atomic64_t mapped;
+ unsigned int pageshift;
u64 ua; /* userspace address */
u64 entries; /* number of entries in hpas[] */
u64 *hpas; /* vmalloc'ed */
@@ -125,6 +127,8 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries,
{
struct mm_iommu_table_group_mem_t *mem;
long i, j, ret = 0, locked_entries = 0;
+ unsigned int pageshift;
+ unsigned long flags;
struct page *page = NULL;
mutex_lock(&mem_list_mutex);
@@ -159,6 +163,12 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries,
goto unlock_exit;
}
+ /*
+ * For a starting point for a maximum page size calculation
+ * we use @ua and @entries natural alignment to allow IOMMU pages
+ * smaller than huge pages but still bigger than PAGE_SIZE.
+ */
+ mem->pageshift = __ffs(ua | (entries << PAGE_SHIFT));
mem->hpas = vzalloc(array_size(entries, sizeof(mem->hpas[0])));
if (!mem->hpas) {
kfree(mem);
@@ -199,6 +209,23 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries,
}
}
populate:
+ pageshift = PAGE_SHIFT;
+ if (PageCompound(page)) {
+ pte_t *pte;
+ struct page *head = compound_head(page);
+ unsigned int compshift = compound_order(head);
+
+ local_irq_save(flags); /* disables as well */
+ pte = find_linux_pte(mm->pgd, ua, NULL, &pageshift);
+ local_irq_restore(flags);
+
+ /* Double check it is still the same pinned page */
+ if (pte && pte_page(*pte) == head &&
+ pageshift == compshift)
+ pageshift = max_t(unsigned int, pageshift,
+ PAGE_SHIFT);
+ }
+ mem->pageshift = min(mem->pageshift, pageshift);
mem->hpas[i] = page_to_pfn(page) << PAGE_SHIFT;
}
@@ -349,7 +376,7 @@ struct mm_iommu_table_group_mem_t *mm_iommu_find(struct mm_struct *mm,
EXPORT_SYMBOL_GPL(mm_iommu_find);
long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
- unsigned long ua, unsigned long *hpa)
+ unsigned long ua, unsigned int pageshift, unsigned long *hpa)
{
const long entry = (ua - mem->ua) >> PAGE_SHIFT;
u64 *va = &mem->hpas[entry];
@@ -357,6 +384,9 @@ long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
if (entry >= mem->entries)
return -EFAULT;
+ if (pageshift > mem->pageshift)
+ return -EFAULT;
+
*hpa = *va | (ua & ~PAGE_MASK);
return 0;
@@ -364,7 +394,7 @@ long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
EXPORT_SYMBOL_GPL(mm_iommu_ua_to_hpa);
long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem,
- unsigned long ua, unsigned long *hpa)
+ unsigned long ua, unsigned int pageshift, unsigned long *hpa)
{
const long entry = (ua - mem->ua) >> PAGE_SHIFT;
void *va = &mem->hpas[entry];
@@ -373,6 +403,9 @@ long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem,
if (entry >= mem->entries)
return -EFAULT;
+ if (pageshift > mem->pageshift)
+ return -EFAULT;
+
pa = (void *) vmalloc_to_phys(va);
if (!pa)
return -EFAULT;
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index c4c0a09a7775..e5d779eed181 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -19,7 +19,6 @@
*
*/
#include <linux/mm.h>
-#include <asm/tlbflush.h>
#include <asm/mmu.h>
#ifdef CONFIG_PPC_MMU_NOHASH
diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c
index c1f4ca45c93a..01d7c0f7c4f0 100644
--- a/arch/powerpc/mm/pgtable-book3s64.c
+++ b/arch/powerpc/mm/pgtable-book3s64.c
@@ -270,6 +270,8 @@ static pmd_t *__alloc_for_pmdcache(struct mm_struct *mm)
return NULL;
}
+ atomic_set(&page->pt_frag_refcount, 1);
+
ret = page_address(page);
/*
* if we support only one fragment just return the
@@ -285,7 +287,7 @@ static pmd_t *__alloc_for_pmdcache(struct mm_struct *mm)
* count.
*/
if (likely(!mm->context.pmd_frag)) {
- set_page_count(page, PMD_FRAG_NR);
+ atomic_set(&page->pt_frag_refcount, PMD_FRAG_NR);
mm->context.pmd_frag = ret + PMD_FRAG_SIZE;
}
spin_unlock(&mm->page_table_lock);
@@ -308,9 +310,10 @@ void pmd_fragment_free(unsigned long *pmd)
{
struct page *page = virt_to_page(pmd);
- if (put_page_testzero(page)) {
+ BUG_ON(atomic_read(&page->pt_frag_refcount) <= 0);
+ if (atomic_dec_and_test(&page->pt_frag_refcount)) {
pgtable_pmd_page_dtor(page);
- free_unref_page(page);
+ __free_page(page);
}
}
@@ -352,6 +355,7 @@ static pte_t *__alloc_for_ptecache(struct mm_struct *mm, int kernel)
return NULL;
}
+ atomic_set(&page->pt_frag_refcount, 1);
ret = page_address(page);
/*
@@ -367,7 +371,7 @@ static pte_t *__alloc_for_ptecache(struct mm_struct *mm, int kernel)
* count.
*/
if (likely(!mm->context.pte_frag)) {
- set_page_count(page, PTE_FRAG_NR);
+ atomic_set(&page->pt_frag_refcount, PTE_FRAG_NR);
mm->context.pte_frag = ret + PTE_FRAG_SIZE;
}
spin_unlock(&mm->page_table_lock);
@@ -390,10 +394,11 @@ void pte_fragment_free(unsigned long *table, int kernel)
{
struct page *page = virt_to_page(table);
- if (put_page_testzero(page)) {
+ BUG_ON(atomic_read(&page->pt_frag_refcount) <= 0);
+ if (atomic_dec_and_test(&page->pt_frag_refcount)) {
if (!kernel)
pgtable_page_dtor(page);
- free_unref_page(page);
+ __free_page(page);
}
}
@@ -409,6 +414,18 @@ static inline void pgtable_free(void *table, int index)
case PUD_INDEX:
kmem_cache_free(PGT_CACHE(PUD_CACHE_INDEX), table);
break;
+#if defined(CONFIG_PPC_4K_PAGES) && defined(CONFIG_HUGETLB_PAGE)
+ /* 16M hugepd directory at pud level */
+ case HTLB_16M_INDEX:
+ BUILD_BUG_ON(H_16M_CACHE_INDEX <= 0);
+ kmem_cache_free(PGT_CACHE(H_16M_CACHE_INDEX), table);
+ break;
+ /* 16G hugepd directory at the pgd level */
+ case HTLB_16G_INDEX:
+ BUILD_BUG_ON(H_16G_CACHE_INDEX <= 0);
+ kmem_cache_free(PGT_CACHE(H_16G_CACHE_INDEX), table);
+ break;
+#endif
/* We don't free pgd table via RCU callback */
default:
BUG();
@@ -438,3 +455,25 @@ void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int index)
return pgtable_free(table, index);
}
#endif
+
+#ifdef CONFIG_PROC_FS
+atomic_long_t direct_pages_count[MMU_PAGE_COUNT];
+
+void arch_report_meminfo(struct seq_file *m)
+{
+ /*
+ * Hash maps the memory with one size mmu_linear_psize.
+ * So don't bother to print these on hash
+ */
+ if (!radix_enabled())
+ return;
+ seq_printf(m, "DirectMap4k: %8lu kB\n",
+ atomic_long_read(&direct_pages_count[MMU_PAGE_4K]) << 2);
+ seq_printf(m, "DirectMap64k: %8lu kB\n",
+ atomic_long_read(&direct_pages_count[MMU_PAGE_64K]) << 6);
+ seq_printf(m, "DirectMap2M: %8lu kB\n",
+ atomic_long_read(&direct_pages_count[MMU_PAGE_2M]) << 11);
+ seq_printf(m, "DirectMap1G: %8lu kB\n",
+ atomic_long_read(&direct_pages_count[MMU_PAGE_1G]) << 20);
+}
+#endif /* CONFIG_PROC_FS */
diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
index 96f68c5aa1f5..7be99fd9af15 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -226,16 +226,6 @@ void radix__mark_rodata_ro(void)
{
unsigned long start, end;
- /*
- * mark_rodata_ro() will mark itself as !writable at some point.
- * Due to DD1 workaround in radix__pte_update(), we'll end up with
- * an invalid pte and the system will crash quite severly.
- */
- if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
- pr_warn("Warning: Unable to mark rodata read only on P9 DD1\n");
- return;
- }
-
start = (unsigned long)_stext;
end = (unsigned long)__init_begin;
@@ -277,6 +267,7 @@ static int __meminit create_physical_mapping(unsigned long start,
#else
int split_text_mapping = 0;
#endif
+ int psize;
start = _ALIGN_UP(start, PAGE_SIZE);
for (addr = start; addr < end; addr += mapping_size) {
@@ -290,13 +281,17 @@ static int __meminit create_physical_mapping(unsigned long start,
retry:
if (IS_ALIGNED(addr, PUD_SIZE) && gap >= PUD_SIZE &&
mmu_psize_defs[MMU_PAGE_1G].shift &&
- PUD_SIZE <= max_mapping_size)
+ PUD_SIZE <= max_mapping_size) {
mapping_size = PUD_SIZE;
- else if (IS_ALIGNED(addr, PMD_SIZE) && gap >= PMD_SIZE &&
- mmu_psize_defs[MMU_PAGE_2M].shift)
+ psize = MMU_PAGE_1G;
+ } else if (IS_ALIGNED(addr, PMD_SIZE) && gap >= PMD_SIZE &&
+ mmu_psize_defs[MMU_PAGE_2M].shift) {
mapping_size = PMD_SIZE;
- else
+ psize = MMU_PAGE_2M;
+ } else {
mapping_size = PAGE_SIZE;
+ psize = mmu_virtual_psize;
+ }
if (split_text_mapping && (mapping_size == PUD_SIZE) &&
(addr <= __pa_symbol(__init_begin)) &&
@@ -307,8 +302,10 @@ retry:
if (split_text_mapping && (mapping_size == PMD_SIZE) &&
(addr <= __pa_symbol(__init_begin)) &&
- (addr + mapping_size) >= __pa_symbol(_stext))
+ (addr + mapping_size) >= __pa_symbol(_stext)) {
mapping_size = PAGE_SIZE;
+ psize = mmu_virtual_psize;
+ }
if (mapping_size != previous_size) {
print_mapping(start, addr, previous_size);
@@ -326,6 +323,8 @@ retry:
rc = __map_kernel_page(vaddr, addr, prot, mapping_size, nid, start, end);
if (rc)
return rc;
+
+ update_page_count(psize, 1);
}
print_mapping(start, addr, mapping_size);
@@ -533,35 +532,6 @@ found:
return;
}
-static void update_hid_for_radix(void)
-{
- unsigned long hid0;
- unsigned long rb = 3UL << PPC_BITLSHIFT(53); /* IS = 3 */
-
- asm volatile("ptesync": : :"memory");
- /* prs = 0, ric = 2, rs = 0, r = 1 is = 3 */
- asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
- : : "r"(rb), "i"(1), "i"(0), "i"(2), "r"(0) : "memory");
- /* prs = 1, ric = 2, rs = 0, r = 1 is = 3 */
- asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
- : : "r"(rb), "i"(1), "i"(1), "i"(2), "r"(0) : "memory");
- asm volatile("eieio; tlbsync; ptesync; isync; slbia": : :"memory");
- trace_tlbie(0, 0, rb, 0, 2, 0, 1);
- trace_tlbie(0, 0, rb, 0, 2, 1, 1);
-
- /*
- * now switch the HID
- */
- hid0 = mfspr(SPRN_HID0);
- hid0 |= HID0_POWER9_RADIX;
- mtspr(SPRN_HID0, hid0);
- asm volatile("isync": : :"memory");
-
- /* Wait for it to happen */
- while (!(mfspr(SPRN_HID0) & HID0_POWER9_RADIX))
- cpu_relax();
-}
-
static void radix_init_amor(void)
{
/*
@@ -576,22 +546,12 @@ static void radix_init_amor(void)
static void radix_init_iamr(void)
{
- unsigned long iamr;
-
- /*
- * The IAMR should set to 0 on DD1.
- */
- if (cpu_has_feature(CPU_FTR_POWER9_DD1))
- iamr = 0;
- else
- iamr = (1ul << 62);
-
/*
* Radix always uses key0 of the IAMR to determine if an access is
* allowed. We set bit 0 (IBM bit 1) of key0, to prevent instruction
* fetch.
*/
- mtspr(SPRN_IAMR, iamr);
+ mtspr(SPRN_IAMR, (1ul << 62));
}
void __init radix__early_init_mmu(void)
@@ -644,8 +604,6 @@ void __init radix__early_init_mmu(void)
if (!firmware_has_feature(FW_FEATURE_LPAR)) {
radix_init_native();
- if (cpu_has_feature(CPU_FTR_POWER9_DD1))
- update_hid_for_radix();
lpcr = mfspr(SPRN_LPCR);
mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR);
radix_init_partition_table();
@@ -671,10 +629,6 @@ void radix__early_init_mmu_secondary(void)
* update partition table control register and UPRT
*/
if (!firmware_has_feature(FW_FEATURE_LPAR)) {
-
- if (cpu_has_feature(CPU_FTR_POWER9_DD1))
- update_hid_for_radix();
-
lpcr = mfspr(SPRN_LPCR);
mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR);
@@ -1095,8 +1049,7 @@ void radix__ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep,
* To avoid NMMU hang while relaxing access, we need mark
* the pte invalid in between.
*/
- if (cpu_has_feature(CPU_FTR_POWER9_DD1) ||
- atomic_read(&mm->context.copros) > 0) {
+ if (atomic_read(&mm->context.copros) > 0) {
unsigned long old_pte, new_pte;
old_pte = __radix_pte_update(ptep, ~0, 0);
diff --git a/arch/powerpc/mm/pkeys.c b/arch/powerpc/mm/pkeys.c
index e6f500fabf5e..333b1f80c435 100644
--- a/arch/powerpc/mm/pkeys.c
+++ b/arch/powerpc/mm/pkeys.c
@@ -14,9 +14,12 @@ DEFINE_STATIC_KEY_TRUE(pkey_disabled);
bool pkey_execute_disable_supported;
int pkeys_total; /* Total pkeys as per device tree */
bool pkeys_devtree_defined; /* pkey property exported by device tree */
-u32 initial_allocation_mask; /* Bits set for reserved keys */
-u64 pkey_amr_uamor_mask; /* Bits in AMR/UMOR not to be touched */
+u32 initial_allocation_mask; /* Bits set for the initially allocated keys */
+u32 reserved_allocation_mask; /* Bits set for reserved keys */
+u64 pkey_amr_mask; /* Bits in AMR not to be touched */
u64 pkey_iamr_mask; /* Bits in AMR not to be touched */
+u64 pkey_uamor_mask; /* Bits in UMOR not to be touched */
+int execute_only_key = 2;
#define AMR_BITS_PER_PKEY 2
#define AMR_RD_BIT 0x1UL
@@ -91,7 +94,7 @@ int pkey_initialize(void)
* arch-neutral code.
*/
pkeys_total = min_t(int, pkeys_total,
- (ARCH_VM_PKEY_FLAGS >> VM_PKEY_SHIFT));
+ ((ARCH_VM_PKEY_FLAGS >> VM_PKEY_SHIFT)+1));
if (!pkey_mmu_enabled() || radix_enabled() || !pkeys_total)
static_branch_enable(&pkey_disabled);
@@ -119,20 +122,39 @@ int pkey_initialize(void)
#else
os_reserved = 0;
#endif
- initial_allocation_mask = ~0x0;
- pkey_amr_uamor_mask = ~0x0ul;
+ /* Bits are in LE format. */
+ reserved_allocation_mask = (0x1 << 1) | (0x1 << execute_only_key);
+
+ /* register mask is in BE format */
+ pkey_amr_mask = ~0x0ul;
+ pkey_amr_mask &= ~(0x3ul << pkeyshift(0));
+
pkey_iamr_mask = ~0x0ul;
- /*
- * key 0, 1 are reserved.
- * key 0 is the default key, which allows read/write/execute.
- * key 1 is recommended not to be used. PowerISA(3.0) page 1015,
- * programming note.
- */
- for (i = 2; i < (pkeys_total - os_reserved); i++) {
- initial_allocation_mask &= ~(0x1 << i);
- pkey_amr_uamor_mask &= ~(0x3ul << pkeyshift(i));
- pkey_iamr_mask &= ~(0x1ul << pkeyshift(i));
+ pkey_iamr_mask &= ~(0x3ul << pkeyshift(0));
+ pkey_iamr_mask &= ~(0x3ul << pkeyshift(execute_only_key));
+
+ pkey_uamor_mask = ~0x0ul;
+ pkey_uamor_mask &= ~(0x3ul << pkeyshift(0));
+ pkey_uamor_mask &= ~(0x3ul << pkeyshift(execute_only_key));
+
+ /* mark the rest of the keys as reserved and hence unavailable */
+ for (i = (pkeys_total - os_reserved); i < pkeys_total; i++) {
+ reserved_allocation_mask |= (0x1 << i);
+ pkey_uamor_mask &= ~(0x3ul << pkeyshift(i));
+ }
+ initial_allocation_mask = reserved_allocation_mask | (0x1 << 0);
+
+ if (unlikely((pkeys_total - os_reserved) <= execute_only_key)) {
+ /*
+ * Insufficient number of keys to support
+ * execute only key. Mark it unavailable.
+ * Any AMR, UAMOR, IAMR bit set for
+ * this key is irrelevant since this key
+ * can never be allocated.
+ */
+ execute_only_key = -1;
}
+
return 0;
}
@@ -143,8 +165,7 @@ void pkey_mm_init(struct mm_struct *mm)
if (static_branch_likely(&pkey_disabled))
return;
mm_pkey_allocation_map(mm) = initial_allocation_mask;
- /* -1 means unallocated or invalid */
- mm->context.execute_only_pkey = -1;
+ mm->context.execute_only_pkey = execute_only_key;
}
static inline u64 read_amr(void)
@@ -213,33 +234,6 @@ static inline void init_iamr(int pkey, u8 init_bits)
write_iamr(old_iamr | new_iamr_bits);
}
-static void pkey_status_change(int pkey, bool enable)
-{
- u64 old_uamor;
-
- /* Reset the AMR and IAMR bits for this key */
- init_amr(pkey, 0x0);
- init_iamr(pkey, 0x0);
-
- /* Enable/disable key */
- old_uamor = read_uamor();
- if (enable)
- old_uamor |= (0x3ul << pkeyshift(pkey));
- else
- old_uamor &= ~(0x3ul << pkeyshift(pkey));
- write_uamor(old_uamor);
-}
-
-void __arch_activate_pkey(int pkey)
-{
- pkey_status_change(pkey, true);
-}
-
-void __arch_deactivate_pkey(int pkey)
-{
- pkey_status_change(pkey, false);
-}
-
/*
* Set the access rights in AMR IAMR and UAMOR registers for @pkey to that
* specified in @init_val.
@@ -289,9 +283,6 @@ void thread_pkey_regs_restore(struct thread_struct *new_thread,
if (static_branch_likely(&pkey_disabled))
return;
- /*
- * TODO: Just set UAMOR to zero if @new_thread hasn't used any keys yet.
- */
if (old_thread->amr != new_thread->amr)
write_amr(new_thread->amr);
if (old_thread->iamr != new_thread->iamr)
@@ -305,9 +296,13 @@ void thread_pkey_regs_init(struct thread_struct *thread)
if (static_branch_likely(&pkey_disabled))
return;
- thread->amr = read_amr() & pkey_amr_uamor_mask;
- thread->iamr = read_iamr() & pkey_iamr_mask;
- thread->uamor = read_uamor() & pkey_amr_uamor_mask;
+ thread->amr = pkey_amr_mask;
+ thread->iamr = pkey_iamr_mask;
+ thread->uamor = pkey_uamor_mask;
+
+ write_uamor(pkey_uamor_mask);
+ write_amr(pkey_amr_mask);
+ write_iamr(pkey_iamr_mask);
}
static inline bool pkey_allows_readwrite(int pkey)
@@ -322,48 +317,7 @@ static inline bool pkey_allows_readwrite(int pkey)
int __execute_only_pkey(struct mm_struct *mm)
{
- bool need_to_set_mm_pkey = false;
- int execute_only_pkey = mm->context.execute_only_pkey;
- int ret;
-
- /* Do we need to assign a pkey for mm's execute-only maps? */
- if (execute_only_pkey == -1) {
- /* Go allocate one to use, which might fail */
- execute_only_pkey = mm_pkey_alloc(mm);
- if (execute_only_pkey < 0)
- return -1;
- need_to_set_mm_pkey = true;
- }
-
- /*
- * We do not want to go through the relatively costly dance to set AMR
- * if we do not need to. Check it first and assume that if the
- * execute-only pkey is readwrite-disabled than we do not have to set it
- * ourselves.
- */
- if (!need_to_set_mm_pkey && !pkey_allows_readwrite(execute_only_pkey))
- return execute_only_pkey;
-
- /*
- * Set up AMR so that it denies access for everything other than
- * execution.
- */
- ret = __arch_set_user_pkey_access(current, execute_only_pkey,
- PKEY_DISABLE_ACCESS |
- PKEY_DISABLE_WRITE);
- /*
- * If the AMR-set operation failed somehow, just return 0 and
- * effectively disable execute-only support.
- */
- if (ret) {
- mm_pkey_free(mm, execute_only_pkey);
- return -1;
- }
-
- /* We got one, store it and use it from here on out */
- if (need_to_set_mm_pkey)
- mm->context.execute_only_pkey = execute_only_pkey;
- return execute_only_pkey;
+ return mm->context.execute_only_pkey;
}
static inline bool vma_is_pkey_exec_only(struct vm_area_struct *vma)
@@ -407,9 +361,6 @@ static bool pkey_access_permitted(int pkey, bool write, bool execute)
int pkey_shift;
u64 amr;
- if (!pkey)
- return true;
-
if (!is_pkey_enabled(pkey))
return true;
diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
index cb796724a6fc..0b095fa54049 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -90,6 +90,45 @@ static inline void create_shadowed_slbe(unsigned long ea, int ssize,
: "memory" );
}
+/*
+ * Insert bolted entries into SLB (which may not be empty, so don't clear
+ * slb_cache_ptr).
+ */
+void __slb_restore_bolted_realmode(void)
+{
+ struct slb_shadow *p = get_slb_shadow();
+ enum slb_index index;
+
+ /* No isync needed because realmode. */
+ for (index = 0; index < SLB_NUM_BOLTED; index++) {
+ asm volatile("slbmte %0,%1" :
+ : "r" (be64_to_cpu(p->save_area[index].vsid)),
+ "r" (be64_to_cpu(p->save_area[index].esid)));
+ }
+}
+
+/*
+ * Insert the bolted entries into an empty SLB.
+ * This is not the same as rebolt because the bolted segments are not
+ * changed, just loaded from the shadow area.
+ */
+void slb_restore_bolted_realmode(void)
+{
+ __slb_restore_bolted_realmode();
+ get_paca()->slb_cache_ptr = 0;
+}
+
+/*
+ * This flushes all SLB entries including 0, so it must be realmode.
+ */
+void slb_flush_all_realmode(void)
+{
+ /*
+ * This flushes all SLB entries including 0, so it must be realmode.
+ */
+ asm volatile("slbmte %0,%0; slbia" : : "r" (0));
+}
+
static void __slb_flush_and_rebolt(void)
{
/* If you change this make sure you change SLB_NUM_BOLTED
diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S
index a83fbd2a4a24..4ac5057ad439 100644
--- a/arch/powerpc/mm/slb_low.S
+++ b/arch/powerpc/mm/slb_low.S
@@ -22,6 +22,7 @@
#include <asm/mmu.h>
#include <asm/pgtable.h>
#include <asm/firmware.h>
+#include <asm/feature-fixups.h>
/*
* This macro generates asm code to compute the VSID scramble
diff --git a/arch/powerpc/mm/subpage-prot.c b/arch/powerpc/mm/subpage-prot.c
index 75cb646a79c3..3327551c8b47 100644
--- a/arch/powerpc/mm/subpage-prot.c
+++ b/arch/powerpc/mm/subpage-prot.c
@@ -17,7 +17,6 @@
#include <asm/pgtable.h>
#include <linux/uaccess.h>
-#include <asm/tlbflush.h>
/*
* Free all pages allocated for subpage protection maps and pointers.
@@ -186,9 +185,6 @@ static void subpage_mark_vma_nohuge(struct mm_struct *mm, unsigned long addr,
* in a 2-bit field won't allow writes to a page that is otherwise
* write-protected.
*/
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wpragmas"
-#pragma GCC diagnostic ignored "-Wattribute-alias"
SYSCALL_DEFINE3(subpage_prot, unsigned long, addr,
unsigned long, len, u32 __user *, map)
{
@@ -272,4 +268,3 @@ SYSCALL_DEFINE3(subpage_prot, unsigned long, addr,
up_write(&mm->mmap_sem);
return err;
}
-#pragma GCC diagnostic pop
diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c
index 67a6e86d3e7e..fef3e1eb3a19 100644
--- a/arch/powerpc/mm/tlb-radix.c
+++ b/arch/powerpc/mm/tlb-radix.c
@@ -689,22 +689,17 @@ EXPORT_SYMBOL(radix__flush_tlb_kernel_range);
static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33;
static unsigned long tlb_local_single_page_flush_ceiling __read_mostly = POWER9_TLB_SETS_RADIX * 2;
-void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
- unsigned long end)
+static inline void __radix__flush_tlb_range(struct mm_struct *mm,
+ unsigned long start, unsigned long end,
+ bool flush_all_sizes)
{
- struct mm_struct *mm = vma->vm_mm;
unsigned long pid;
unsigned int page_shift = mmu_psize_defs[mmu_virtual_psize].shift;
unsigned long page_size = 1UL << page_shift;
unsigned long nr_pages = (end - start) >> page_shift;
bool local, full;
-#ifdef CONFIG_HUGETLB_PAGE
- if (is_vm_hugetlb_page(vma))
- return radix__flush_hugetlb_tlb_range(vma, start, end);
-#endif
-
pid = mm->context.id;
if (unlikely(pid == MMU_NO_CONTEXT))
return;
@@ -738,37 +733,64 @@ is_local:
_tlbie_pid(pid, RIC_FLUSH_TLB);
}
} else {
- bool hflush = false;
+ bool hflush = flush_all_sizes;
+ bool gflush = flush_all_sizes;
unsigned long hstart, hend;
+ unsigned long gstart, gend;
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- hstart = (start + HPAGE_PMD_SIZE - 1) >> HPAGE_PMD_SHIFT;
- hend = end >> HPAGE_PMD_SHIFT;
- if (hstart < hend) {
- hstart <<= HPAGE_PMD_SHIFT;
- hend <<= HPAGE_PMD_SHIFT;
+ if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
hflush = true;
+
+ if (hflush) {
+ hstart = (start + PMD_SIZE - 1) & PMD_MASK;
+ hend = end & PMD_MASK;
+ if (hstart == hend)
+ hflush = false;
+ }
+
+ if (gflush) {
+ gstart = (start + PUD_SIZE - 1) & PUD_MASK;
+ gend = end & PUD_MASK;
+ if (gstart == gend)
+ gflush = false;
}
-#endif
asm volatile("ptesync": : :"memory");
if (local) {
__tlbiel_va_range(start, end, pid, page_size, mmu_virtual_psize);
if (hflush)
__tlbiel_va_range(hstart, hend, pid,
- HPAGE_PMD_SIZE, MMU_PAGE_2M);
+ PMD_SIZE, MMU_PAGE_2M);
+ if (gflush)
+ __tlbiel_va_range(gstart, gend, pid,
+ PUD_SIZE, MMU_PAGE_1G);
asm volatile("ptesync": : :"memory");
} else {
__tlbie_va_range(start, end, pid, page_size, mmu_virtual_psize);
if (hflush)
__tlbie_va_range(hstart, hend, pid,
- HPAGE_PMD_SIZE, MMU_PAGE_2M);
+ PMD_SIZE, MMU_PAGE_2M);
+ if (gflush)
+ __tlbie_va_range(gstart, gend, pid,
+ PUD_SIZE, MMU_PAGE_1G);
fixup_tlbie();
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
}
preempt_enable();
}
+
+void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+ unsigned long end)
+
+{
+#ifdef CONFIG_HUGETLB_PAGE
+ if (is_vm_hugetlb_page(vma))
+ return radix__flush_hugetlb_tlb_range(vma, start, end);
+#endif
+
+ __radix__flush_tlb_range(vma->vm_mm, start, end, false);
+}
EXPORT_SYMBOL(radix__flush_tlb_range);
static int radix_get_mmu_psize(int page_size)
@@ -837,6 +859,8 @@ void radix__tlb_flush(struct mmu_gather *tlb)
int psize = 0;
struct mm_struct *mm = tlb->mm;
int page_size = tlb->page_size;
+ unsigned long start = tlb->start;
+ unsigned long end = tlb->end;
/*
* if page size is not something we understand, do a full mm flush
@@ -847,15 +871,45 @@ void radix__tlb_flush(struct mmu_gather *tlb)
*/
if (tlb->fullmm) {
__flush_all_mm(mm, true);
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE)
+ } else if (mm_tlb_flush_nested(mm)) {
+ /*
+ * If there is a concurrent invalidation that is clearing ptes,
+ * then it's possible this invalidation will miss one of those
+ * cleared ptes and miss flushing the TLB. If this invalidate
+ * returns before the other one flushes TLBs, that can result
+ * in it returning while there are still valid TLBs inside the
+ * range to be invalidated.
+ *
+ * See mm/memory.c:tlb_finish_mmu() for more details.
+ *
+ * The solution to this is ensure the entire range is always
+ * flushed here. The problem for powerpc is that the flushes
+ * are page size specific, so this "forced flush" would not
+ * do the right thing if there are a mix of page sizes in
+ * the range to be invalidated. So use __flush_tlb_range
+ * which invalidates all possible page sizes in the range.
+ *
+ * PWC flush probably is not be required because the core code
+ * shouldn't free page tables in this path, but accounting
+ * for the possibility makes us a bit more robust.
+ *
+ * need_flush_all is an uncommon case because page table
+ * teardown should be done with exclusive locks held (but
+ * after locks are dropped another invalidate could come
+ * in), it could be optimized further if necessary.
+ */
+ if (!tlb->need_flush_all)
+ __radix__flush_tlb_range(mm, start, end, true);
+ else
+ radix__flush_all_mm(mm);
+#endif
} else if ( (psize = radix_get_mmu_psize(page_size)) == -1) {
if (!tlb->need_flush_all)
radix__flush_tlb_mm(mm);
else
radix__flush_all_mm(mm);
} else {
- unsigned long start = tlb->start;
- unsigned long end = tlb->end;
-
if (!tlb->need_flush_all)
radix__flush_tlb_range_psize(mm, start, end, psize);
else
@@ -994,24 +1048,6 @@ void radix__flush_tlb_all(void)
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
-void radix__flush_tlb_pte_p9_dd1(unsigned long old_pte, struct mm_struct *mm,
- unsigned long address)
-{
- /*
- * We track page size in pte only for DD1, So we can
- * call this only on DD1.
- */
- if (!cpu_has_feature(CPU_FTR_POWER9_DD1)) {
- VM_WARN_ON(1);
- return;
- }
-
- if (old_pte & R_PAGE_LARGE)
- radix__flush_tlb_page_psize(mm, address, MMU_PAGE_2M);
- else
- radix__flush_tlb_page_psize(mm, address, mmu_virtual_psize);
-}
-
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
extern void radix_kvm_prefetch_workaround(struct mm_struct *mm)
{
@@ -1043,6 +1079,8 @@ extern void radix_kvm_prefetch_workaround(struct mm_struct *mm)
for (; sib <= cpu_last_thread_sibling(cpu) && !flush; sib++) {
if (sib == cpu)
continue;
+ if (!cpu_possible(sib))
+ continue;
if (paca_ptrs[sib]->kvm_hstate.kvm_vcpu)
flush = true;
}
diff --git a/arch/powerpc/mm/tlb_low_64e.S b/arch/powerpc/mm/tlb_low_64e.S
index eb82d787d99a..7fd20c52a8ec 100644
--- a/arch/powerpc/mm/tlb_low_64e.S
+++ b/arch/powerpc/mm/tlb_low_64e.S
@@ -22,6 +22,7 @@
#include <asm/ppc-opcode.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_booke_hv_asm.h>
+#include <asm/feature-fixups.h>
#ifdef CONFIG_PPC_64K_PAGES
#define VPTE_PMD_SHIFT (PTE_INDEX_SIZE+1)
diff --git a/arch/powerpc/mm/tlb_nohash_low.S b/arch/powerpc/mm/tlb_nohash_low.S
index 048b8e9f4492..e066a658acac 100644
--- a/arch/powerpc/mm/tlb_nohash_low.S
+++ b/arch/powerpc/mm/tlb_nohash_low.S
@@ -34,6 +34,8 @@
#include <asm/asm-offsets.h>
#include <asm/processor.h>
#include <asm/bug.h>
+#include <asm/asm-compat.h>
+#include <asm/feature-fixups.h>
#if defined(CONFIG_40x)