summaryrefslogtreecommitdiff
path: root/arch/powerpc/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r--arch/powerpc/mm/40x_mmu.c2
-rw-r--r--arch/powerpc/mm/fsl_booke_mmu.c7
-rw-r--r--arch/powerpc/mm/hash_low_64.S9
-rw-r--r--arch/powerpc/mm/hash_utils_64.c79
-rw-r--r--arch/powerpc/mm/hugetlbpage-hash64.c40
-rw-r--r--arch/powerpc/mm/init_32.c16
-rw-r--r--arch/powerpc/mm/init_64.c2
-rw-r--r--arch/powerpc/mm/mem.c78
-rw-r--r--arch/powerpc/mm/numa.c182
-rw-r--r--arch/powerpc/mm/pgtable.c1
-rw-r--r--arch/powerpc/mm/pgtable_32.c6
-rw-r--r--arch/powerpc/mm/pgtable_64.c4
-rw-r--r--arch/powerpc/mm/ppc_mmu_32.c4
-rw-r--r--arch/powerpc/mm/stab.c4
-rw-r--r--arch/powerpc/mm/tlb_hash32.c15
-rw-r--r--arch/powerpc/mm/tlb_nohash.c133
16 files changed, 361 insertions, 221 deletions
diff --git a/arch/powerpc/mm/40x_mmu.c b/arch/powerpc/mm/40x_mmu.c
index 65abfcfaaa9e..1dc2fa5ce1bd 100644
--- a/arch/powerpc/mm/40x_mmu.c
+++ b/arch/powerpc/mm/40x_mmu.c
@@ -135,7 +135,7 @@ unsigned long __init mmu_mapin_ram(unsigned long top)
/* If the size of RAM is not an exact power of two, we may not
* have covered RAM in its entirety with 16 and 4 MiB
* pages. Consequently, restrict the top end of RAM currently
- * allocable so that calls to the LMB to allocate PTEs for "tail"
+ * allocable so that calls to the MEMBLOCK to allocate PTEs for "tail"
* coverage with normal-sized pages (or other reasons) do not
* attempt to allocate outside the allowed range.
*/
diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c
index cdc7526e9c93..4b66a1ece6d8 100644
--- a/arch/powerpc/mm/fsl_booke_mmu.c
+++ b/arch/powerpc/mm/fsl_booke_mmu.c
@@ -104,9 +104,10 @@ unsigned long p_mapped_by_tlbcam(phys_addr_t pa)
}
/*
- * Set up one of the I/D BAT (block address translation) register pairs.
- * The parameters are not checked; in particular size must be a power
- * of 4 between 4k and 256M.
+ * Set up a variable-size TLB entry (tlbcam). The parameters are not checked;
+ * in particular size must be a power of 4 between 4k and 256M (or 1G, for cpus
+ * that support extended page sizes). Note that while some cpus support a
+ * page size of 4G, we don't allow its use here.
*/
static void settlbcam(int index, unsigned long virt, phys_addr_t phys,
unsigned long size, unsigned long flags, unsigned int pid)
diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S
index a719f53921a5..3079f6b44cf5 100644
--- a/arch/powerpc/mm/hash_low_64.S
+++ b/arch/powerpc/mm/hash_low_64.S
@@ -68,9 +68,6 @@ _GLOBAL(__hash_page_4K)
std r8,STK_PARM(r8)(r1)
std r9,STK_PARM(r9)(r1)
- /* Add _PAGE_PRESENT to access */
- ori r4,r4,_PAGE_PRESENT
-
/* Save non-volatile registers.
* r31 will hold "old PTE"
* r30 is "new PTE"
@@ -347,9 +344,6 @@ _GLOBAL(__hash_page_4K)
std r8,STK_PARM(r8)(r1)
std r9,STK_PARM(r9)(r1)
- /* Add _PAGE_PRESENT to access */
- ori r4,r4,_PAGE_PRESENT
-
/* Save non-volatile registers.
* r31 will hold "old PTE"
* r30 is "new PTE"
@@ -687,9 +681,6 @@ _GLOBAL(__hash_page_64K)
std r8,STK_PARM(r8)(r1)
std r9,STK_PARM(r9)(r1)
- /* Add _PAGE_PRESENT to access */
- ori r4,r4,_PAGE_PRESENT
-
/* Save non-volatile registers.
* r31 will hold "old PTE"
* r30 is "new PTE"
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 3ecdcec0a39e..09dffe6efa46 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -31,7 +31,7 @@
#include <linux/cache.h>
#include <linux/init.h>
#include <linux/signal.h>
-#include <linux/lmb.h>
+#include <linux/memblock.h>
#include <asm/processor.h>
#include <asm/pgtable.h>
@@ -384,8 +384,8 @@ static int __init htab_dt_scan_hugepage_blocks(unsigned long node,
printk(KERN_INFO "Huge page(16GB) memory: "
"addr = 0x%lX size = 0x%lX pages = %d\n",
phys_addr, block_size, expected_pages);
- if (phys_addr + (16 * GB) <= lmb_end_of_DRAM()) {
- lmb_reserve(phys_addr, block_size * expected_pages);
+ if (phys_addr + (16 * GB) <= memblock_end_of_DRAM()) {
+ memblock_reserve(phys_addr, block_size * expected_pages);
add_gpage(phys_addr, block_size, expected_pages);
}
return 0;
@@ -458,7 +458,7 @@ static void __init htab_init_page_sizes(void)
* and we have at least 1G of RAM at boot
*/
if (mmu_psize_defs[MMU_PAGE_16M].shift &&
- lmb_phys_mem_size() >= 0x40000000)
+ memblock_phys_mem_size() >= 0x40000000)
mmu_vmemmap_psize = MMU_PAGE_16M;
else if (mmu_psize_defs[MMU_PAGE_64K].shift)
mmu_vmemmap_psize = MMU_PAGE_64K;
@@ -520,7 +520,7 @@ static unsigned long __init htab_get_table_size(void)
return 1UL << ppc64_pft_size;
/* round mem_size up to next power of 2 */
- mem_size = lmb_phys_mem_size();
+ mem_size = memblock_phys_mem_size();
rnd_mem_size = 1UL << __ilog2(mem_size);
if (rnd_mem_size < mem_size)
rnd_mem_size <<= 1;
@@ -627,7 +627,7 @@ static void __init htab_initialize(void)
else
limit = 0;
- table = lmb_alloc_base(htab_size_bytes, htab_size_bytes, limit);
+ table = memblock_alloc_base(htab_size_bytes, htab_size_bytes, limit);
DBG("Hash table allocated at %lx, size: %lx\n", table,
htab_size_bytes);
@@ -647,9 +647,9 @@ static void __init htab_initialize(void)
prot = pgprot_val(PAGE_KERNEL);
#ifdef CONFIG_DEBUG_PAGEALLOC
- linear_map_hash_count = lmb_end_of_DRAM() >> PAGE_SHIFT;
- linear_map_hash_slots = __va(lmb_alloc_base(linear_map_hash_count,
- 1, lmb.rmo_size));
+ linear_map_hash_count = memblock_end_of_DRAM() >> PAGE_SHIFT;
+ linear_map_hash_slots = __va(memblock_alloc_base(linear_map_hash_count,
+ 1, memblock.rmo_size));
memset(linear_map_hash_slots, 0, linear_map_hash_count);
#endif /* CONFIG_DEBUG_PAGEALLOC */
@@ -659,16 +659,16 @@ static void __init htab_initialize(void)
*/
/* create bolted the linear mapping in the hash table */
- for (i=0; i < lmb.memory.cnt; i++) {
- base = (unsigned long)__va(lmb.memory.region[i].base);
- size = lmb.memory.region[i].size;
+ for (i=0; i < memblock.memory.cnt; i++) {
+ base = (unsigned long)__va(memblock.memory.region[i].base);
+ size = memblock.memory.region[i].size;
DBG("creating mapping for region: %lx..%lx (prot: %lx)\n",
base, size, prot);
#ifdef CONFIG_U3_DART
/* Do not map the DART space. Fortunately, it will be aligned
- * in such a way that it will not cross two lmb regions and
+ * in such a way that it will not cross two memblock regions and
* will fit within a single 16Mb page.
* The DART space is assumed to be a full 16Mb region even if
* we only use 2Mb of that space. We will use more of it later
@@ -871,6 +871,18 @@ static inline int subpage_protection(struct mm_struct *mm, unsigned long ea)
}
#endif
+void hash_failure_debug(unsigned long ea, unsigned long access,
+ unsigned long vsid, unsigned long trap,
+ int ssize, int psize, unsigned long pte)
+{
+ if (!printk_ratelimit())
+ return;
+ pr_info("mm: Hashing failure ! EA=0x%lx access=0x%lx current=%s\n",
+ ea, access, current->comm);
+ pr_info(" trap=0x%lx vsid=0x%lx ssize=%d psize=%d pte=0x%lx\n",
+ trap, vsid, ssize, psize, pte);
+}
+
/* Result code is:
* 0 - handled
* 1 - normal page fault
@@ -955,6 +967,17 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
return 1;
}
+ /* Add _PAGE_PRESENT to the required access perm */
+ access |= _PAGE_PRESENT;
+
+ /* Pre-check access permissions (will be re-checked atomically
+ * in __hash_page_XX but this pre-check is a fast path
+ */
+ if (access & ~pte_val(*ptep)) {
+ DBG_LOW(" no access !\n");
+ return 1;
+ }
+
#ifdef CONFIG_HUGETLB_PAGE
if (hugeshift)
return __hash_page_huge(ea, access, vsid, ptep, trap, local,
@@ -967,14 +990,6 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
DBG_LOW(" i-pte: %016lx %016lx\n", pte_val(*ptep),
pte_val(*(ptep + PTRS_PER_PTE)));
#endif
- /* Pre-check access permissions (will be re-checked atomically
- * in __hash_page_XX but this pre-check is a fast path
- */
- if (access & ~pte_val(*ptep)) {
- DBG_LOW(" no access !\n");
- return 1;
- }
-
/* Do actual hashing */
#ifdef CONFIG_PPC_64K_PAGES
/* If _PAGE_4K_PFN is set, make sure this is a 4k segment */
@@ -1033,6 +1048,12 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
local, ssize, spp);
}
+ /* Dump some info in case of hash insertion failure, they should
+ * never happen so it is really useful to know if/when they do
+ */
+ if (rc == -1)
+ hash_failure_debug(ea, access, vsid, trap, ssize, psize,
+ pte_val(*ptep));
#ifndef CONFIG_PPC_64K_PAGES
DBG_LOW(" o-pte: %016lx\n", pte_val(*ptep));
#else
@@ -1051,8 +1072,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
void *pgdir;
pte_t *ptep;
unsigned long flags;
- int local = 0;
- int ssize;
+ int rc, ssize, local = 0;
BUG_ON(REGION_ID(ea) != USER_REGION_ID);
@@ -1098,11 +1118,18 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
/* Hash it in */
#ifdef CONFIG_PPC_HAS_HASH_64K
if (mm->context.user_psize == MMU_PAGE_64K)
- __hash_page_64K(ea, access, vsid, ptep, trap, local, ssize);
+ rc = __hash_page_64K(ea, access, vsid, ptep, trap, local, ssize);
else
#endif /* CONFIG_PPC_HAS_HASH_64K */
- __hash_page_4K(ea, access, vsid, ptep, trap, local, ssize,
- subpage_protection(pgdir, ea));
+ rc = __hash_page_4K(ea, access, vsid, ptep, trap, local, ssize,
+ subpage_protection(pgdir, ea));
+
+ /* Dump some info in case of hash insertion failure, they should
+ * never happen so it is really useful to know if/when they do
+ */
+ if (rc == -1)
+ hash_failure_debug(ea, access, vsid, trap, ssize,
+ mm->context.user_psize, pte_val(*ptep));
local_irq_restore(flags);
}
diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c
index 199539882f92..cc5c273086cf 100644
--- a/arch/powerpc/mm/hugetlbpage-hash64.c
+++ b/arch/powerpc/mm/hugetlbpage-hash64.c
@@ -21,21 +21,13 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
unsigned long old_pte, new_pte;
unsigned long va, rflags, pa, sz;
long slot;
- int err = 1;
BUG_ON(shift != mmu_psize_defs[mmu_psize].shift);
/* Search the Linux page table for a match with va */
va = hpt_va(ea, vsid, ssize);
- /*
- * Check the user's access rights to the page. If access should be
- * prevented then send the problem up to do_page_fault.
- */
- if (unlikely(access & ~pte_val(*ptep)))
- goto out;
- /*
- * At this point, we have a pte (old_pte) which can be used to build
+ /* At this point, we have a pte (old_pte) which can be used to build
* or update an HPTE. There are 2 cases:
*
* 1. There is a valid (present) pte with no associated HPTE (this is
@@ -49,9 +41,17 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
do {
old_pte = pte_val(*ptep);
- if (old_pte & _PAGE_BUSY)
- goto out;
+ /* If PTE busy, retry the access */
+ if (unlikely(old_pte & _PAGE_BUSY))
+ return 0;
+ /* If PTE permissions don't match, take page fault */
+ if (unlikely(access & ~old_pte))
+ return 1;
+ /* Try to lock the PTE, add ACCESSED and DIRTY if it was
+ * a write access */
new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED;
+ if (access & _PAGE_RW)
+ new_pte |= _PAGE_DIRTY;
} while(old_pte != __cmpxchg_u64((unsigned long *)ptep,
old_pte, new_pte));
@@ -121,8 +121,16 @@ repeat:
}
}
- if (unlikely(slot == -2))
- panic("hash_huge_page: pte_insert failed\n");
+ /*
+ * Hypervisor failure. Restore old pte and return -1
+ * similar to __hash_page_*
+ */
+ if (unlikely(slot == -2)) {
+ *ptep = __pte(old_pte);
+ hash_failure_debug(ea, access, vsid, trap, ssize,
+ mmu_psize, old_pte);
+ return -1;
+ }
new_pte |= (slot << 12) & (_PAGE_F_SECOND | _PAGE_F_GIX);
}
@@ -131,9 +139,5 @@ repeat:
* No need to use ldarx/stdcx here
*/
*ptep = __pte(new_pte & ~_PAGE_BUSY);
-
- err = 0;
-
- out:
- return err;
+ return 0;
}
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c
index 767333005eb4..6a6975dc2654 100644
--- a/arch/powerpc/mm/init_32.c
+++ b/arch/powerpc/mm/init_32.c
@@ -30,7 +30,7 @@
#include <linux/highmem.h>
#include <linux/initrd.h>
#include <linux/pagemap.h>
-#include <linux/lmb.h>
+#include <linux/memblock.h>
#include <linux/gfp.h>
#include <asm/pgalloc.h>
@@ -136,17 +136,17 @@ void __init MMU_init(void)
/* parse args from command line */
MMU_setup();
- if (lmb.memory.cnt > 1) {
+ if (memblock.memory.cnt > 1) {
#ifndef CONFIG_WII
- lmb.memory.cnt = 1;
- lmb_analyze();
+ memblock.memory.cnt = 1;
+ memblock_analyze();
printk(KERN_WARNING "Only using first contiguous memory region");
#else
wii_memory_fixups();
#endif
}
- total_lowmem = total_memory = lmb_end_of_DRAM() - memstart_addr;
+ total_lowmem = total_memory = memblock_end_of_DRAM() - memstart_addr;
lowmem_end_addr = memstart_addr + total_lowmem;
#ifdef CONFIG_FSL_BOOKE
@@ -161,8 +161,8 @@ void __init MMU_init(void)
lowmem_end_addr = memstart_addr + total_lowmem;
#ifndef CONFIG_HIGHMEM
total_memory = total_lowmem;
- lmb_enforce_memory_limit(lowmem_end_addr);
- lmb_analyze();
+ memblock_enforce_memory_limit(lowmem_end_addr);
+ memblock_analyze();
#endif /* CONFIG_HIGHMEM */
}
@@ -200,7 +200,7 @@ void __init *early_get_page(void)
if (init_bootmem_done) {
p = alloc_bootmem_pages(PAGE_SIZE);
} else {
- p = __va(lmb_alloc_base(PAGE_SIZE, PAGE_SIZE,
+ p = __va(memblock_alloc_base(PAGE_SIZE, PAGE_SIZE,
__initial_memory_limit_addr));
}
return p;
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index e267f223fdff..71f1415e2472 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -40,7 +40,7 @@
#include <linux/nodemask.h>
#include <linux/module.h>
#include <linux/poison.h>
-#include <linux/lmb.h>
+#include <linux/memblock.h>
#include <linux/hugetlb.h>
#include <linux/slab.h>
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 0f594d774bf7..1a84a8d00005 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -32,7 +32,7 @@
#include <linux/initrd.h>
#include <linux/pagemap.h>
#include <linux/suspend.h>
-#include <linux/lmb.h>
+#include <linux/memblock.h>
#include <linux/hugetlb.h>
#include <asm/pgalloc.h>
@@ -83,13 +83,13 @@ int page_is_ram(unsigned long pfn)
#else
unsigned long paddr = (pfn << PAGE_SHIFT);
int i;
- for (i=0; i < lmb.memory.cnt; i++) {
+ for (i=0; i < memblock.memory.cnt; i++) {
unsigned long base;
- base = lmb.memory.region[i].base;
+ base = memblock.memory.region[i].base;
if ((paddr >= base) &&
- (paddr < (base + lmb.memory.region[i].size))) {
+ (paddr < (base + memblock.memory.region[i].size))) {
return 1;
}
}
@@ -142,14 +142,14 @@ int arch_add_memory(int nid, u64 start, u64 size)
/*
* walk_memory_resource() needs to make sure there is no holes in a given
* memory range. PPC64 does not maintain the memory layout in /proc/iomem.
- * Instead it maintains it in lmb.memory structures. Walk through the
+ * Instead it maintains it in memblock.memory structures. Walk through the
* memory regions, find holes and callback for contiguous regions.
*/
int
walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages,
void *arg, int (*func)(unsigned long, unsigned long, void *))
{
- struct lmb_property res;
+ struct memblock_property res;
unsigned long pfn, len;
u64 end;
int ret = -1;
@@ -158,7 +158,7 @@ walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages,
res.size = (u64) nr_pages << PAGE_SHIFT;
end = res.base + res.size - 1;
- while ((res.base < end) && (lmb_find(&res) >= 0)) {
+ while ((res.base < end) && (memblock_find(&res) >= 0)) {
pfn = (unsigned long)(res.base >> PAGE_SHIFT);
len = (unsigned long)(res.size >> PAGE_SHIFT);
ret = (*func)(pfn, len, arg);
@@ -184,8 +184,8 @@ void __init do_init_bootmem(void)
unsigned long total_pages;
int boot_mapsize;
- max_low_pfn = max_pfn = lmb_end_of_DRAM() >> PAGE_SHIFT;
- total_pages = (lmb_end_of_DRAM() - memstart_addr) >> PAGE_SHIFT;
+ max_low_pfn = max_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT;
+ total_pages = (memblock_end_of_DRAM() - memstart_addr) >> PAGE_SHIFT;
#ifdef CONFIG_HIGHMEM
total_pages = total_lowmem >> PAGE_SHIFT;
max_low_pfn = lowmem_end_addr >> PAGE_SHIFT;
@@ -198,16 +198,16 @@ void __init do_init_bootmem(void)
*/
bootmap_pages = bootmem_bootmap_pages(total_pages);
- start = lmb_alloc(bootmap_pages << PAGE_SHIFT, PAGE_SIZE);
+ start = memblock_alloc(bootmap_pages << PAGE_SHIFT, PAGE_SIZE);
min_low_pfn = MEMORY_START >> PAGE_SHIFT;
boot_mapsize = init_bootmem_node(NODE_DATA(0), start >> PAGE_SHIFT, min_low_pfn, max_low_pfn);
/* Add active regions with valid PFNs */
- for (i = 0; i < lmb.memory.cnt; i++) {
+ for (i = 0; i < memblock.memory.cnt; i++) {
unsigned long start_pfn, end_pfn;
- start_pfn = lmb.memory.region[i].base >> PAGE_SHIFT;
- end_pfn = start_pfn + lmb_size_pages(&lmb.memory, i);
+ start_pfn = memblock.memory.region[i].base >> PAGE_SHIFT;
+ end_pfn = start_pfn + memblock_size_pages(&memblock.memory, i);
add_active_range(0, start_pfn, end_pfn);
}
@@ -218,17 +218,17 @@ void __init do_init_bootmem(void)
free_bootmem_with_active_regions(0, lowmem_end_addr >> PAGE_SHIFT);
/* reserve the sections we're already using */
- for (i = 0; i < lmb.reserved.cnt; i++) {
- unsigned long addr = lmb.reserved.region[i].base +
- lmb_size_bytes(&lmb.reserved, i) - 1;
+ for (i = 0; i < memblock.reserved.cnt; i++) {
+ unsigned long addr = memblock.reserved.region[i].base +
+ memblock_size_bytes(&memblock.reserved, i) - 1;
if (addr < lowmem_end_addr)
- reserve_bootmem(lmb.reserved.region[i].base,
- lmb_size_bytes(&lmb.reserved, i),
+ reserve_bootmem(memblock.reserved.region[i].base,
+ memblock_size_bytes(&memblock.reserved, i),
BOOTMEM_DEFAULT);
- else if (lmb.reserved.region[i].base < lowmem_end_addr) {
+ else if (memblock.reserved.region[i].base < lowmem_end_addr) {
unsigned long adjusted_size = lowmem_end_addr -
- lmb.reserved.region[i].base;
- reserve_bootmem(lmb.reserved.region[i].base,
+ memblock.reserved.region[i].base;
+ reserve_bootmem(memblock.reserved.region[i].base,
adjusted_size, BOOTMEM_DEFAULT);
}
}
@@ -236,9 +236,9 @@ void __init do_init_bootmem(void)
free_bootmem_with_active_regions(0, max_pfn);
/* reserve the sections we're already using */
- for (i = 0; i < lmb.reserved.cnt; i++)
- reserve_bootmem(lmb.reserved.region[i].base,
- lmb_size_bytes(&lmb.reserved, i),
+ for (i = 0; i < memblock.reserved.cnt; i++)
+ reserve_bootmem(memblock.reserved.region[i].base,
+ memblock_size_bytes(&memblock.reserved, i),
BOOTMEM_DEFAULT);
#endif
@@ -251,20 +251,20 @@ void __init do_init_bootmem(void)
/* mark pages that don't exist as nosave */
static int __init mark_nonram_nosave(void)
{
- unsigned long lmb_next_region_start_pfn,
- lmb_region_max_pfn;
+ unsigned long memblock_next_region_start_pfn,
+ memblock_region_max_pfn;
int i;
- for (i = 0; i < lmb.memory.cnt - 1; i++) {
- lmb_region_max_pfn =
- (lmb.memory.region[i].base >> PAGE_SHIFT) +
- (lmb.memory.region[i].size >> PAGE_SHIFT);
- lmb_next_region_start_pfn =
- lmb.memory.region[i+1].base >> PAGE_SHIFT;
+ for (i = 0; i < memblock.memory.cnt - 1; i++) {
+ memblock_region_max_pfn =
+ (memblock.memory.region[i].base >> PAGE_SHIFT) +
+ (memblock.memory.region[i].size >> PAGE_SHIFT);
+ memblock_next_region_start_pfn =
+ memblock.memory.region[i+1].base >> PAGE_SHIFT;
- if (lmb_region_max_pfn < lmb_next_region_start_pfn)
- register_nosave_region(lmb_region_max_pfn,
- lmb_next_region_start_pfn);
+ if (memblock_region_max_pfn < memblock_next_region_start_pfn)
+ register_nosave_region(memblock_region_max_pfn,
+ memblock_next_region_start_pfn);
}
return 0;
@@ -275,8 +275,8 @@ static int __init mark_nonram_nosave(void)
*/
void __init paging_init(void)
{
- unsigned long total_ram = lmb_phys_mem_size();
- phys_addr_t top_of_ram = lmb_end_of_DRAM();
+ unsigned long total_ram = memblock_phys_mem_size();
+ phys_addr_t top_of_ram = memblock_end_of_DRAM();
unsigned long max_zone_pfns[MAX_NR_ZONES];
#ifdef CONFIG_PPC32
@@ -327,7 +327,7 @@ void __init mem_init(void)
swiotlb_init(1);
#endif
- num_physpages = lmb.memory.size >> PAGE_SHIFT;
+ num_physpages = memblock.memory.size >> PAGE_SHIFT;
high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
#ifdef CONFIG_NEED_MULTIPLE_NODES
@@ -364,7 +364,7 @@ void __init mem_init(void)
highmem_mapnr = lowmem_end_addr >> PAGE_SHIFT;
for (pfn = highmem_mapnr; pfn < max_mapnr; ++pfn) {
struct page *page = pfn_to_page(pfn);
- if (lmb_is_reserved(pfn << PAGE_SHIFT))
+ if (memblock_is_reserved(pfn << PAGE_SHIFT))
continue;
ClearPageReserved(page);
init_page_count(page);
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 80d110635d24..002878ccf90b 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -17,7 +17,7 @@
#include <linux/nodemask.h>
#include <linux/cpu.h>
#include <linux/notifier.h>
-#include <linux/lmb.h>
+#include <linux/memblock.h>
#include <linux/of.h>
#include <linux/pfn.h>
#include <asm/sparsemem.h>
@@ -42,6 +42,12 @@ EXPORT_SYMBOL(node_data);
static int min_common_depth;
static int n_mem_addr_cells, n_mem_size_cells;
+static int form1_affinity;
+
+#define MAX_DISTANCE_REF_POINTS 4
+static int distance_ref_points_depth;
+static const unsigned int *distance_ref_points;
+static int distance_lookup_table[MAX_NUMNODES][MAX_DISTANCE_REF_POINTS];
/*
* Allocate node_to_cpumask_map based on number of available nodes
@@ -204,6 +210,39 @@ static const u32 *of_get_usable_memory(struct device_node *memory)
return prop;
}
+int __node_distance(int a, int b)
+{
+ int i;
+ int distance = LOCAL_DISTANCE;
+
+ if (!form1_affinity)
+ return distance;
+
+ for (i = 0; i < distance_ref_points_depth; i++) {
+ if (distance_lookup_table[a][i] == distance_lookup_table[b][i])
+ break;
+
+ /* Double the distance for each NUMA level */
+ distance *= 2;
+ }
+
+ return distance;
+}
+
+static void initialize_distance_lookup_table(int nid,
+ const unsigned int *associativity)
+{
+ int i;
+
+ if (!form1_affinity)
+ return;
+
+ for (i = 0; i < distance_ref_points_depth; i++) {
+ distance_lookup_table[nid][i] =
+ associativity[distance_ref_points[i]];
+ }
+}
+
/* Returns nid in the range [0..MAX_NUMNODES-1], or -1 if no useful numa
* info is found.
*/
@@ -225,6 +264,10 @@ static int of_node_to_nid_single(struct device_node *device)
/* POWER4 LPAR uses 0xffff as invalid node */
if (nid == 0xffff || nid >= MAX_NUMNODES)
nid = -1;
+
+ if (nid > 0 && tmp[0] >= distance_ref_points_depth)
+ initialize_distance_lookup_table(nid, tmp);
+
out:
return nid;
}
@@ -251,26 +294,10 @@ int of_node_to_nid(struct device_node *device)
}
EXPORT_SYMBOL_GPL(of_node_to_nid);
-/*
- * In theory, the "ibm,associativity" property may contain multiple
- * associativity lists because a resource may be multiply connected
- * into the machine. This resource then has different associativity
- * characteristics relative to its multiple connections. We ignore
- * this for now. We also assume that all cpu and memory sets have
- * their distances represented at a common level. This won't be
- * true for hierarchical NUMA.
- *
- * In any case the ibm,associativity-reference-points should give
- * the correct depth for a normal NUMA system.
- *
- * - Dave Hansen <haveblue@us.ibm.com>
- */
static int __init find_min_common_depth(void)
{
- int depth, index;
- const unsigned int *ref_points;
+ int depth;
struct device_node *rtas_root;
- unsigned int len;
struct device_node *chosen;
const char *vec5;
@@ -280,18 +307,28 @@ static int __init find_min_common_depth(void)
return -1;
/*
- * this property is 2 32-bit integers, each representing a level of
- * depth in the associativity nodes. The first is for an SMP
- * configuration (should be all 0's) and the second is for a normal
- * NUMA configuration.
+ * This property is a set of 32-bit integers, each representing
+ * an index into the ibm,associativity nodes.
+ *
+ * With form 0 affinity the first integer is for an SMP configuration
+ * (should be all 0's) and the second is for a normal NUMA
+ * configuration. We have only one level of NUMA.
+ *
+ * With form 1 affinity the first integer is the most significant
+ * NUMA boundary and the following are progressively less significant
+ * boundaries. There can be more than one level of NUMA.
*/
- index = 1;
- ref_points = of_get_property(rtas_root,
- "ibm,associativity-reference-points", &len);
+ distance_ref_points = of_get_property(rtas_root,
+ "ibm,associativity-reference-points",
+ &distance_ref_points_depth);
+
+ if (!distance_ref_points) {
+ dbg("NUMA: ibm,associativity-reference-points not found.\n");
+ goto err;
+ }
+
+ distance_ref_points_depth /= sizeof(int);
- /*
- * For form 1 affinity information we want the first field
- */
#define VEC5_AFFINITY_BYTE 5
#define VEC5_AFFINITY 0x80
chosen = of_find_node_by_path("/chosen");
@@ -299,19 +336,38 @@ static int __init find_min_common_depth(void)
vec5 = of_get_property(chosen, "ibm,architecture-vec-5", NULL);
if (vec5 && (vec5[VEC5_AFFINITY_BYTE] & VEC5_AFFINITY)) {
dbg("Using form 1 affinity\n");
- index = 0;
+ form1_affinity = 1;
}
}
- if ((len >= 2 * sizeof(unsigned int)) && ref_points) {
- depth = ref_points[index];
+ if (form1_affinity) {
+ depth = distance_ref_points[0];
} else {
- dbg("NUMA: ibm,associativity-reference-points not found.\n");
- depth = -1;
+ if (distance_ref_points_depth < 2) {
+ printk(KERN_WARNING "NUMA: "
+ "short ibm,associativity-reference-points\n");
+ goto err;
+ }
+
+ depth = distance_ref_points[1];
}
- of_node_put(rtas_root);
+ /*
+ * Warn and cap if the hardware supports more than
+ * MAX_DISTANCE_REF_POINTS domains.
+ */
+ if (distance_ref_points_depth > MAX_DISTANCE_REF_POINTS) {
+ printk(KERN_WARNING "NUMA: distance array capped at "
+ "%d entries\n", MAX_DISTANCE_REF_POINTS);
+ distance_ref_points_depth = MAX_DISTANCE_REF_POINTS;
+ }
+
+ of_node_put(rtas_root);
return depth;
+
+err:
+ of_node_put(rtas_root);
+ return -1;
}
static void __init get_n_mem_cells(int *n_addr_cells, int *n_size_cells)
@@ -351,7 +407,7 @@ struct of_drconf_cell {
#define DRCONF_MEM_RESERVED 0x00000080
/*
- * Read the next lmb list entry from the ibm,dynamic-memory property
+ * Read the next memblock list entry from the ibm,dynamic-memory property
* and return the information in the provided of_drconf_cell structure.
*/
static void read_drconf_cell(struct of_drconf_cell *drmem, const u32 **cellp)
@@ -372,8 +428,8 @@ static void read_drconf_cell(struct of_drconf_cell *drmem, const u32 **cellp)
/*
* Retreive and validate the ibm,dynamic-memory property of the device tree.
*
- * The layout of the ibm,dynamic-memory property is a number N of lmb
- * list entries followed by N lmb list entries. Each lmb list entry
+ * The layout of the ibm,dynamic-memory property is a number N of memblock
+ * list entries followed by N memblock list entries. Each memblock list entry
* contains information as layed out in the of_drconf_cell struct above.
*/
static int of_get_drconf_memory(struct device_node *memory, const u32 **dm)
@@ -540,19 +596,19 @@ static unsigned long __init numa_enforce_memory_limit(unsigned long start,
unsigned long size)
{
/*
- * We use lmb_end_of_DRAM() in here instead of memory_limit because
+ * We use memblock_end_of_DRAM() in here instead of memory_limit because
* we've already adjusted it for the limit and it takes care of
* having memory holes below the limit. Also, in the case of
* iommu_is_off, memory_limit is not set but is implicitly enforced.
*/
- if (start + size <= lmb_end_of_DRAM())
+ if (start + size <= memblock_end_of_DRAM())
return size;
- if (start >= lmb_end_of_DRAM())
+ if (start >= memblock_end_of_DRAM())
return 0;
- return lmb_end_of_DRAM() - start;
+ return memblock_end_of_DRAM() - start;
}
/*
@@ -731,7 +787,7 @@ new_range:
}
/*
- * Now do the same thing for each LMB listed in the ibm,dynamic-memory
+ * Now do the same thing for each MEMBLOCK listed in the ibm,dynamic-memory
* property in the ibm,dynamic-reconfiguration-memory node.
*/
memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
@@ -743,8 +799,8 @@ new_range:
static void __init setup_nonnuma(void)
{
- unsigned long top_of_ram = lmb_end_of_DRAM();
- unsigned long total_ram = lmb_phys_mem_size();
+ unsigned long top_of_ram = memblock_end_of_DRAM();
+ unsigned long total_ram = memblock_phys_mem_size();
unsigned long start_pfn, end_pfn;
unsigned int i, nid = 0;
@@ -753,9 +809,9 @@ static void __init setup_nonnuma(void)
printk(KERN_DEBUG "Memory hole size: %ldMB\n",
(top_of_ram - total_ram) >> 20);
- for (i = 0; i < lmb.memory.cnt; ++i) {
- start_pfn = lmb.memory.region[i].base >> PAGE_SHIFT;
- end_pfn = start_pfn + lmb_size_pages(&lmb.memory, i);
+ for (i = 0; i < memblock.memory.cnt; ++i) {
+ start_pfn = memblock.memory.region[i].base >> PAGE_SHIFT;
+ end_pfn = start_pfn + memblock_size_pages(&memblock.memory, i);
fake_numa_create_new_node(end_pfn, &nid);
add_active_range(nid, start_pfn, end_pfn);
@@ -813,7 +869,7 @@ static void __init dump_numa_memory_topology(void)
count = 0;
- for (i = 0; i < lmb_end_of_DRAM();
+ for (i = 0; i < memblock_end_of_DRAM();
i += (1 << SECTION_SIZE_BITS)) {
if (early_pfn_to_nid(i >> PAGE_SHIFT) == node) {
if (count == 0)
@@ -833,7 +889,7 @@ static void __init dump_numa_memory_topology(void)
}
/*
- * Allocate some memory, satisfying the lmb or bootmem allocator where
+ * Allocate some memory, satisfying the memblock or bootmem allocator where
* required. nid is the preferred node and end is the physical address of
* the highest address in the node.
*
@@ -847,11 +903,11 @@ static void __init *careful_zallocation(int nid, unsigned long size,
int new_nid;
unsigned long ret_paddr;
- ret_paddr = __lmb_alloc_base(size, align, end_pfn << PAGE_SHIFT);
+ ret_paddr = __memblock_alloc_base(size, align, end_pfn << PAGE_SHIFT);
/* retry over all memory */
if (!ret_paddr)
- ret_paddr = __lmb_alloc_base(size, align, lmb_end_of_DRAM());
+ ret_paddr = __memblock_alloc_base(size, align, memblock_end_of_DRAM());
if (!ret_paddr)
panic("numa.c: cannot allocate %lu bytes for node %d",
@@ -861,14 +917,14 @@ static void __init *careful_zallocation(int nid, unsigned long size,
/*
* We initialize the nodes in numeric order: 0, 1, 2...
- * and hand over control from the LMB allocator to the
+ * and hand over control from the MEMBLOCK allocator to the
* bootmem allocator. If this function is called for
* node 5, then we know that all nodes <5 are using the
- * bootmem allocator instead of the LMB allocator.
+ * bootmem allocator instead of the MEMBLOCK allocator.
*
* So, check the nid from which this allocation came
* and double check to see if we need to use bootmem
- * instead of the LMB. We don't free the LMB memory
+ * instead of the MEMBLOCK. We don't free the MEMBLOCK memory
* since it would be useless.
*/
new_nid = early_pfn_to_nid(ret_paddr >> PAGE_SHIFT);
@@ -893,9 +949,9 @@ static void mark_reserved_regions_for_nid(int nid)
struct pglist_data *node = NODE_DATA(nid);
int i;
- for (i = 0; i < lmb.reserved.cnt; i++) {
- unsigned long physbase = lmb.reserved.region[i].base;
- unsigned long size = lmb.reserved.region[i].size;
+ for (i = 0; i < memblock.reserved.cnt; i++) {
+ unsigned long physbase = memblock.reserved.region[i].base;
+ unsigned long size = memblock.reserved.region[i].size;
unsigned long start_pfn = physbase >> PAGE_SHIFT;
unsigned long end_pfn = PFN_UP(physbase + size);
struct node_active_region node_ar;
@@ -903,7 +959,7 @@ static void mark_reserved_regions_for_nid(int nid)
node->node_spanned_pages;
/*
- * Check to make sure that this lmb.reserved area is
+ * Check to make sure that this memblock.reserved area is
* within the bounds of the node that we care about.
* Checking the nid of the start and end points is not
* sufficient because the reserved area could span the
@@ -961,7 +1017,7 @@ void __init do_init_bootmem(void)
int nid;
min_low_pfn = 0;
- max_low_pfn = lmb_end_of_DRAM() >> PAGE_SHIFT;
+ max_low_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT;
max_pfn = max_low_pfn;
if (parse_numa_properties())
@@ -1038,7 +1094,7 @@ void __init paging_init(void)
{
unsigned long max_zone_pfns[MAX_NR_ZONES];
memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
- max_zone_pfns[ZONE_DMA] = lmb_end_of_DRAM() >> PAGE_SHIFT;
+ max_zone_pfns[ZONE_DMA] = memblock_end_of_DRAM() >> PAGE_SHIFT;
free_area_init_nodes(max_zone_pfns);
}
@@ -1113,7 +1169,7 @@ static int hot_add_drconf_scn_to_nid(struct device_node *memory,
/*
* Find the node associated with a hot added memory section for memory
* represented in the device tree as a node (i.e. memory@XXXX) for
- * each lmb.
+ * each memblock.
*/
int hot_add_node_scn_to_nid(unsigned long scn_addr)
{
@@ -1154,8 +1210,8 @@ int hot_add_node_scn_to_nid(unsigned long scn_addr)
/*
* Find the node associated with a hot added memory section. Section
- * corresponds to a SPARSEMEM section, not an LMB. It is assumed that
- * sections are fully contained within a single LMB.
+ * corresponds to a SPARSEMEM section, not an MEMBLOCK. It is assumed that
+ * sections are fully contained within a single MEMBLOCK.
*/
int hot_add_scn_to_nid(unsigned long scn_addr)
{
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index ebc2f38eb381..2c7e801ab20b 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -92,7 +92,6 @@ static void pte_free_rcu_callback(struct rcu_head *head)
static void pte_free_submit(struct pte_freelist_batch *batch)
{
- INIT_RCU_HEAD(&batch->rcu);
call_rcu(&batch->rcu, pte_free_rcu_callback);
}
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index 34347b2e7e31..a87ead0138b4 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -26,7 +26,7 @@
#include <linux/vmalloc.h>
#include <linux/init.h>
#include <linux/highmem.h>
-#include <linux/lmb.h>
+#include <linux/memblock.h>
#include <linux/slab.h>
#include <asm/pgtable.h>
@@ -198,7 +198,7 @@ __ioremap_caller(phys_addr_t addr, unsigned long size, unsigned long flags,
* mem_init() sets high_memory so only do the check after that.
*/
if (mem_init_done && (p < virt_to_phys(high_memory)) &&
- !(__allow_ioremap_reserved && lmb_is_region_reserved(p, size))) {
+ !(__allow_ioremap_reserved && memblock_is_region_reserved(p, size))) {
printk("__ioremap(): phys addr 0x%llx is RAM lr %p\n",
(unsigned long long)p, __builtin_return_address(0));
return NULL;
@@ -331,7 +331,7 @@ void __init mapin_ram(void)
s = mmu_mapin_ram(top);
__mapin_ram_chunk(s, top);
- top = lmb_end_of_DRAM();
+ top = memblock_end_of_DRAM();
s = wii_mmu_mapin_mem2(top);
__mapin_ram_chunk(s, top);
}
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index d050fc8d9714..21d6dfab7942 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -34,7 +34,7 @@
#include <linux/vmalloc.h>
#include <linux/init.h>
#include <linux/bootmem.h>
-#include <linux/lmb.h>
+#include <linux/memblock.h>
#include <linux/slab.h>
#include <asm/pgalloc.h>
@@ -67,7 +67,7 @@ static void *early_alloc_pgtable(unsigned long size)
if (init_bootmem_done)
pt = __alloc_bootmem(size, size, __pa(MAX_DMA_ADDRESS));
else
- pt = __va(lmb_alloc_base(size, size,
+ pt = __va(memblock_alloc_base(size, size,
__pa(MAX_DMA_ADDRESS)));
memset(pt, 0, size);
diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c
index f11c2cdcb0fe..f8a01829d64f 100644
--- a/arch/powerpc/mm/ppc_mmu_32.c
+++ b/arch/powerpc/mm/ppc_mmu_32.c
@@ -26,7 +26,7 @@
#include <linux/mm.h>
#include <linux/init.h>
#include <linux/highmem.h>
-#include <linux/lmb.h>
+#include <linux/memblock.h>
#include <asm/prom.h>
#include <asm/mmu.h>
@@ -223,7 +223,7 @@ void __init MMU_init_hw(void)
* Find some memory for the hash table.
*/
if ( ppc_md.progress ) ppc_md.progress("hash:find piece", 0x322);
- Hash = __va(lmb_alloc_base(Hash_size, Hash_size,
+ Hash = __va(memblock_alloc_base(Hash_size, Hash_size,
__initial_memory_limit_addr));
cacheable_memzero(Hash, Hash_size);
_SDR1 = __pa(Hash) | SDR1_LOW_BITS;
diff --git a/arch/powerpc/mm/stab.c b/arch/powerpc/mm/stab.c
index 687fddaa24c5..446a01842a73 100644
--- a/arch/powerpc/mm/stab.c
+++ b/arch/powerpc/mm/stab.c
@@ -12,7 +12,7 @@
* 2 of the License, or (at your option) any later version.
*/
-#include <linux/lmb.h>
+#include <linux/memblock.h>
#include <asm/pgtable.h>
#include <asm/mmu.h>
@@ -252,7 +252,7 @@ void __init stabs_alloc(void)
if (cpu == 0)
continue; /* stab for CPU 0 is statically allocated */
- newstab = lmb_alloc_base(HW_PAGE_SIZE, HW_PAGE_SIZE,
+ newstab = memblock_alloc_base(HW_PAGE_SIZE, HW_PAGE_SIZE,
1<<SID_SHIFT);
newstab = (unsigned long)__va(newstab);
diff --git a/arch/powerpc/mm/tlb_hash32.c b/arch/powerpc/mm/tlb_hash32.c
index 8aaa8b7eb324..690566b66e8e 100644
--- a/arch/powerpc/mm/tlb_hash32.c
+++ b/arch/powerpc/mm/tlb_hash32.c
@@ -89,17 +89,6 @@ void tlb_flush(struct mmu_gather *tlb)
* -- Cort
*/
-/*
- * 750 SMP is a Bad Idea because the 750 doesn't broadcast all
- * the cache operations on the bus. Hence we need to use an IPI
- * to get the other CPU(s) to invalidate their TLBs.
- */
-#ifdef CONFIG_SMP_750
-#define FINISH_FLUSH smp_send_tlb_invalidate(0)
-#else
-#define FINISH_FLUSH do { } while (0)
-#endif
-
static void flush_range(struct mm_struct *mm, unsigned long start,
unsigned long end)
{
@@ -138,7 +127,6 @@ static void flush_range(struct mm_struct *mm, unsigned long start,
void flush_tlb_kernel_range(unsigned long start, unsigned long end)
{
flush_range(&init_mm, start, end);
- FINISH_FLUSH;
}
EXPORT_SYMBOL(flush_tlb_kernel_range);
@@ -162,7 +150,6 @@ void flush_tlb_mm(struct mm_struct *mm)
*/
for (mp = mm->mmap; mp != NULL; mp = mp->vm_next)
flush_range(mp->vm_mm, mp->vm_start, mp->vm_end);
- FINISH_FLUSH;
}
EXPORT_SYMBOL(flush_tlb_mm);
@@ -179,7 +166,6 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
pmd = pmd_offset(pud_offset(pgd_offset(mm, vmaddr), vmaddr), vmaddr);
if (!pmd_none(*pmd))
flush_hash_pages(mm->context.id, vmaddr, pmd_val(*pmd), 1);
- FINISH_FLUSH;
}
EXPORT_SYMBOL(flush_tlb_page);
@@ -192,6 +178,5 @@ void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
unsigned long end)
{
flush_range(vma->vm_mm, start, end);
- FINISH_FLUSH;
}
EXPORT_SYMBOL(flush_tlb_range);
diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c
index e81d5d67f834..fe391e942521 100644
--- a/arch/powerpc/mm/tlb_nohash.c
+++ b/arch/powerpc/mm/tlb_nohash.c
@@ -34,7 +34,7 @@
#include <linux/pagemap.h>
#include <linux/preempt.h>
#include <linux/spinlock.h>
-#include <linux/lmb.h>
+#include <linux/memblock.h>
#include <asm/tlbflush.h>
#include <asm/tlb.h>
@@ -46,6 +46,7 @@
struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = {
[MMU_PAGE_4K] = {
.shift = 12,
+ .ind = 20,
.enc = BOOK3E_PAGESZ_4K,
},
[MMU_PAGE_16K] = {
@@ -54,6 +55,7 @@ struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = {
},
[MMU_PAGE_64K] = {
.shift = 16,
+ .ind = 28,
.enc = BOOK3E_PAGESZ_64K,
},
[MMU_PAGE_1M] = {
@@ -62,6 +64,7 @@ struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = {
},
[MMU_PAGE_16M] = {
.shift = 24,
+ .ind = 36,
.enc = BOOK3E_PAGESZ_16M,
},
[MMU_PAGE_256M] = {
@@ -344,16 +347,108 @@ void tlb_flush_pgtable(struct mmu_gather *tlb, unsigned long address)
}
}
-/*
- * Early initialization of the MMU TLB code
- */
-static void __early_init_mmu(int boot_cpu)
+static void setup_page_sizes(void)
+{
+ unsigned int tlb0cfg = mfspr(SPRN_TLB0CFG);
+ unsigned int tlb0ps = mfspr(SPRN_TLB0PS);
+ unsigned int eptcfg = mfspr(SPRN_EPTCFG);
+ int i, psize;
+
+ /* Look for supported direct sizes */
+ for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
+ struct mmu_psize_def *def = &mmu_psize_defs[psize];
+
+ if (tlb0ps & (1U << (def->shift - 10)))
+ def->flags |= MMU_PAGE_SIZE_DIRECT;
+ }
+
+ /* Indirect page sizes supported ? */
+ if ((tlb0cfg & TLBnCFG_IND) == 0)
+ goto no_indirect;
+
+ /* Now, we only deal with one IND page size for each
+ * direct size. Hopefully all implementations today are
+ * unambiguous, but we might want to be careful in the
+ * future.
+ */
+ for (i = 0; i < 3; i++) {
+ unsigned int ps, sps;
+
+ sps = eptcfg & 0x1f;
+ eptcfg >>= 5;
+ ps = eptcfg & 0x1f;
+ eptcfg >>= 5;
+ if (!ps || !sps)
+ continue;
+ for (psize = 0; psize < MMU_PAGE_COUNT; psize++) {
+ struct mmu_psize_def *def = &mmu_psize_defs[psize];
+
+ if (ps == (def->shift - 10))
+ def->flags |= MMU_PAGE_SIZE_INDIRECT;
+ if (sps == (def->shift - 10))
+ def->ind = ps + 10;
+ }
+ }
+ no_indirect:
+
+ /* Cleanup array and print summary */
+ pr_info("MMU: Supported page sizes\n");
+ for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
+ struct mmu_psize_def *def = &mmu_psize_defs[psize];
+ const char *__page_type_names[] = {
+ "unsupported",
+ "direct",
+ "indirect",
+ "direct & indirect"
+ };
+ if (def->flags == 0) {
+ def->shift = 0;
+ continue;
+ }
+ pr_info(" %8ld KB as %s\n", 1ul << (def->shift - 10),
+ __page_type_names[def->flags & 0x3]);
+ }
+}
+
+static void setup_mmu_htw(void)
{
extern unsigned int interrupt_base_book3e;
extern unsigned int exc_data_tlb_miss_htw_book3e;
extern unsigned int exc_instruction_tlb_miss_htw_book3e;
unsigned int *ibase = &interrupt_base_book3e;
+
+ /* Check if HW tablewalk is present, and if yes, enable it by:
+ *
+ * - patching the TLB miss handlers to branch to the
+ * one dedicates to it
+ *
+ * - setting the global book3e_htw_enabled
+ */
+ unsigned int tlb0cfg = mfspr(SPRN_TLB0CFG);
+
+ if ((tlb0cfg & TLBnCFG_IND) &&
+ (tlb0cfg & TLBnCFG_PT)) {
+ /* Our exceptions vectors start with a NOP and -then- a branch
+ * to deal with single stepping from userspace which stops on
+ * the second instruction. Thus we need to patch the second
+ * instruction of the exception, not the first one
+ */
+ patch_branch(ibase + (0x1c0 / 4) + 1,
+ (unsigned long)&exc_data_tlb_miss_htw_book3e, 0);
+ patch_branch(ibase + (0x1e0 / 4) + 1,
+ (unsigned long)&exc_instruction_tlb_miss_htw_book3e, 0);
+ book3e_htw_enabled = 1;
+ }
+ pr_info("MMU: Book3E Page Tables %s\n",
+ book3e_htw_enabled ? "Enabled" : "Disabled");
+}
+
+/*
+ * Early initialization of the MMU TLB code
+ */
+static void __early_init_mmu(int boot_cpu)
+{
unsigned int mas4;
/* XXX This will have to be decided at runtime, but right
@@ -370,35 +465,17 @@ static void __early_init_mmu(int boot_cpu)
*/
mmu_vmemmap_psize = MMU_PAGE_16M;
- /* Check if HW tablewalk is present, and if yes, enable it by:
- *
- * - patching the TLB miss handlers to branch to the
- * one dedicates to it
- *
- * - setting the global book3e_htw_enabled
- *
- * - Set MAS4:INDD and default page size
- */
-
/* XXX This code only checks for TLB 0 capabilities and doesn't
* check what page size combos are supported by the HW. It
* also doesn't handle the case where a separate array holds
* the IND entries from the array loaded by the PT.
*/
if (boot_cpu) {
- unsigned int tlb0cfg = mfspr(SPRN_TLB0CFG);
+ /* Look for supported page sizes */
+ setup_page_sizes();
- /* Check if HW loader is supported */
- if ((tlb0cfg & TLBnCFG_IND) &&
- (tlb0cfg & TLBnCFG_PT)) {
- patch_branch(ibase + (0x1c0 / 4),
- (unsigned long)&exc_data_tlb_miss_htw_book3e, 0);
- patch_branch(ibase + (0x1e0 / 4),
- (unsigned long)&exc_instruction_tlb_miss_htw_book3e, 0);
- book3e_htw_enabled = 1;
- }
- pr_info("MMU: Book3E Page Tables %s\n",
- book3e_htw_enabled ? "Enabled" : "Disabled");
+ /* Look for HW tablewalk support */
+ setup_mmu_htw();
}
/* Set MAS4 based on page table setting */
@@ -426,7 +503,7 @@ static void __early_init_mmu(int boot_cpu)
/* Set the global containing the top of the linear mapping
* for use by the TLB miss code
*/
- linear_map_top = lmb_end_of_DRAM();
+ linear_map_top = memblock_end_of_DRAM();
/* A sync won't hurt us after mucking around with
* the MMU configuration