summaryrefslogtreecommitdiff
path: root/arch/powerpc/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r--arch/powerpc/mm/4xx_mmu.c4
-rw-r--r--arch/powerpc/mm/fault.c7
-rw-r--r--arch/powerpc/mm/fsl_booke_mmu.c2
-rw-r--r--arch/powerpc/mm/hash_utils_64.c13
-rw-r--r--arch/powerpc/mm/hugetlbpage.c147
-rw-r--r--arch/powerpc/mm/imalloc.c5
-rw-r--r--arch/powerpc/mm/init_32.c5
-rw-r--r--arch/powerpc/mm/init_64.c3
-rw-r--r--arch/powerpc/mm/lmb.c33
-rw-r--r--arch/powerpc/mm/mem.c23
-rw-r--r--arch/powerpc/mm/mmu_decl.h14
-rw-r--r--arch/powerpc/mm/numa.c146
-rw-r--r--arch/powerpc/mm/pgtable_64.c5
-rw-r--r--arch/powerpc/mm/slb.c16
-rw-r--r--arch/powerpc/mm/slb_low.S6
-rw-r--r--arch/powerpc/mm/stab.c23
-rw-r--r--arch/powerpc/mm/tlb_32.c6
-rw-r--r--arch/powerpc/mm/tlb_64.c6
18 files changed, 333 insertions, 131 deletions
diff --git a/arch/powerpc/mm/4xx_mmu.c b/arch/powerpc/mm/4xx_mmu.c
index b7bcbc232f39..4d006aa1a0d1 100644
--- a/arch/powerpc/mm/4xx_mmu.c
+++ b/arch/powerpc/mm/4xx_mmu.c
@@ -110,13 +110,11 @@ unsigned long __init mmu_mapin_ram(void)
pmd_t *pmdp;
unsigned long val = p | _PMD_SIZE_16M | _PAGE_HWEXEC | _PAGE_HWWRITE;
- spin_lock(&init_mm.page_table_lock);
pmdp = pmd_offset(pgd_offset_k(v), v);
pmd_val(*pmdp++) = val;
pmd_val(*pmdp++) = val;
pmd_val(*pmdp++) = val;
pmd_val(*pmdp++) = val;
- spin_unlock(&init_mm.page_table_lock);
v += LARGE_PAGE_SIZE_16M;
p += LARGE_PAGE_SIZE_16M;
@@ -127,10 +125,8 @@ unsigned long __init mmu_mapin_ram(void)
pmd_t *pmdp;
unsigned long val = p | _PMD_SIZE_4M | _PAGE_HWEXEC | _PAGE_HWWRITE;
- spin_lock(&init_mm.page_table_lock);
pmdp = pmd_offset(pgd_offset_k(v), v);
pmd_val(*pmdp) = val;
- spin_unlock(&init_mm.page_table_lock);
v += LARGE_PAGE_SIZE_4M;
p += LARGE_PAGE_SIZE_4M;
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 93d4fbfdb724..a4815d316722 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -81,7 +81,8 @@ static int store_updates_sp(struct pt_regs *regs)
}
#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
-static void do_dabr(struct pt_regs *regs, unsigned long error_code)
+static void do_dabr(struct pt_regs *regs, unsigned long address,
+ unsigned long error_code)
{
siginfo_t info;
@@ -99,7 +100,7 @@ static void do_dabr(struct pt_regs *regs, unsigned long error_code)
info.si_signo = SIGTRAP;
info.si_errno = 0;
info.si_code = TRAP_HWBKPT;
- info.si_addr = (void __user *)regs->nip;
+ info.si_addr = (void __user *)address;
force_sig_info(SIGTRAP, &info, current);
}
#endif /* !(CONFIG_4xx || CONFIG_BOOKE)*/
@@ -159,7 +160,7 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
if (error_code & DSISR_DABRMATCH) {
/* DABR match */
- do_dabr(regs, error_code);
+ do_dabr(regs, address, error_code);
return 0;
}
#endif /* !(CONFIG_4xx || CONFIG_BOOKE)*/
diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c
index af9ca0eb6d55..5d581bb3aa12 100644
--- a/arch/powerpc/mm/fsl_booke_mmu.c
+++ b/arch/powerpc/mm/fsl_booke_mmu.c
@@ -1,5 +1,5 @@
/*
- * Modifications by Kumar Gala (kumar.gala@freescale.com) to support
+ * Modifications by Kumar Gala (galak@kernel.crashing.org) to support
* E500 Book E processors.
*
* Copyright 2004 Freescale Semiconductor, Inc
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 706e8a63ced9..149351a84b94 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -368,7 +368,7 @@ static unsigned long __init htab_get_table_size(void)
unsigned long mem_size, rnd_mem_size, pteg_count;
/* If hash size isn't already provided by the platform, we try to
- * retreive it from the device-tree. If it's not there neither, we
+ * retrieve it from the device-tree. If it's not there neither, we
* calculate it now based on the total RAM size
*/
if (ppc64_pft_size == 0)
@@ -456,7 +456,7 @@ void __init htab_initialize(void)
/* create bolted the linear mapping in the hash table */
for (i=0; i < lmb.memory.cnt; i++) {
- base = lmb.memory.region[i].base + KERNELBASE;
+ base = (unsigned long)__va(lmb.memory.region[i].base);
size = lmb.memory.region[i].size;
DBG("creating mapping for region: %lx : %lx\n", base, size);
@@ -498,8 +498,8 @@ void __init htab_initialize(void)
* for either 4K or 16MB pages.
*/
if (tce_alloc_start) {
- tce_alloc_start += KERNELBASE;
- tce_alloc_end += KERNELBASE;
+ tce_alloc_start = (unsigned long)__va(tce_alloc_start);
+ tce_alloc_end = (unsigned long)__va(tce_alloc_end);
if (base + size >= tce_alloc_start)
tce_alloc_start = base + size + 1;
@@ -514,7 +514,7 @@ void __init htab_initialize(void)
#undef KB
#undef MB
-void __init htab_initialize_secondary(void)
+void htab_initialize_secondary(void)
{
if (!platform_is_lpar())
mtspr(SPRN_SDR1, _SDR1);
@@ -601,7 +601,7 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
/* Handle hugepage regions */
if (unlikely(in_hugepage_area(mm->context, ea))) {
DBG_LOW(" -> huge page !\n");
- return hash_huge_page(mm, access, ea, vsid, local);
+ return hash_huge_page(mm, access, ea, vsid, local, trap);
}
/* Get PTE and page size from page tables */
@@ -644,6 +644,7 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
DBG_LOW(" -> rc=%d\n", rc);
return rc;
}
+EXPORT_SYMBOL_GPL(hash_page);
void hash_preload(struct mm_struct *mm, unsigned long ea,
unsigned long access, unsigned long trap)
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 426c269e552e..b51bb28c054b 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -148,43 +148,63 @@ int is_aligned_hugepage_range(unsigned long addr, unsigned long len)
return 0;
}
+struct slb_flush_info {
+ struct mm_struct *mm;
+ u16 newareas;
+};
+
static void flush_low_segments(void *parm)
{
- u16 areas = (unsigned long) parm;
+ struct slb_flush_info *fi = parm;
unsigned long i;
- asm volatile("isync" : : : "memory");
+ BUILD_BUG_ON((sizeof(fi->newareas)*8) != NUM_LOW_AREAS);
+
+ if (current->active_mm != fi->mm)
+ return;
- BUILD_BUG_ON((sizeof(areas)*8) != NUM_LOW_AREAS);
+ /* Only need to do anything if this CPU is working in the same
+ * mm as the one which has changed */
+ /* update the paca copy of the context struct */
+ get_paca()->context = current->active_mm->context;
+
+ asm volatile("isync" : : : "memory");
for (i = 0; i < NUM_LOW_AREAS; i++) {
- if (! (areas & (1U << i)))
+ if (! (fi->newareas & (1U << i)))
continue;
asm volatile("slbie %0"
: : "r" ((i << SID_SHIFT) | SLBIE_C));
}
-
asm volatile("isync" : : : "memory");
}
static void flush_high_segments(void *parm)
{
- u16 areas = (unsigned long) parm;
+ struct slb_flush_info *fi = parm;
unsigned long i, j;
- asm volatile("isync" : : : "memory");
- BUILD_BUG_ON((sizeof(areas)*8) != NUM_HIGH_AREAS);
+ BUILD_BUG_ON((sizeof(fi->newareas)*8) != NUM_HIGH_AREAS);
+
+ if (current->active_mm != fi->mm)
+ return;
+
+ /* Only need to do anything if this CPU is working in the same
+ * mm as the one which has changed */
+
+ /* update the paca copy of the context struct */
+ get_paca()->context = current->active_mm->context;
+ asm volatile("isync" : : : "memory");
for (i = 0; i < NUM_HIGH_AREAS; i++) {
- if (! (areas & (1U << i)))
+ if (! (fi->newareas & (1U << i)))
continue;
for (j = 0; j < (1UL << (HTLB_AREA_SHIFT-SID_SHIFT)); j++)
asm volatile("slbie %0"
:: "r" (((i << HTLB_AREA_SHIFT)
- + (j << SID_SHIFT)) | SLBIE_C));
+ + (j << SID_SHIFT)) | SLBIE_C));
}
-
asm volatile("isync" : : : "memory");
}
@@ -229,6 +249,7 @@ static int prepare_high_area_for_htlb(struct mm_struct *mm, unsigned long area)
static int open_low_hpage_areas(struct mm_struct *mm, u16 newareas)
{
unsigned long i;
+ struct slb_flush_info fi;
BUILD_BUG_ON((sizeof(newareas)*8) != NUM_LOW_AREAS);
BUILD_BUG_ON((sizeof(mm->context.low_htlb_areas)*8) != NUM_LOW_AREAS);
@@ -244,19 +265,20 @@ static int open_low_hpage_areas(struct mm_struct *mm, u16 newareas)
mm->context.low_htlb_areas |= newareas;
- /* update the paca copy of the context struct */
- get_paca()->context = mm->context;
-
/* the context change must make it to memory before the flush,
* so that further SLB misses do the right thing. */
mb();
- on_each_cpu(flush_low_segments, (void *)(unsigned long)newareas, 0, 1);
+
+ fi.mm = mm;
+ fi.newareas = newareas;
+ on_each_cpu(flush_low_segments, &fi, 0, 1);
return 0;
}
static int open_high_hpage_areas(struct mm_struct *mm, u16 newareas)
{
+ struct slb_flush_info fi;
unsigned long i;
BUILD_BUG_ON((sizeof(newareas)*8) != NUM_HIGH_AREAS);
@@ -280,22 +302,25 @@ static int open_high_hpage_areas(struct mm_struct *mm, u16 newareas)
/* the context change must make it to memory before the flush,
* so that further SLB misses do the right thing. */
mb();
- on_each_cpu(flush_high_segments, (void *)(unsigned long)newareas, 0, 1);
+
+ fi.mm = mm;
+ fi.newareas = newareas;
+ on_each_cpu(flush_high_segments, &fi, 0, 1);
return 0;
}
int prepare_hugepage_range(unsigned long addr, unsigned long len)
{
- int err;
+ int err = 0;
if ( (addr+len) < addr )
return -EINVAL;
- if ((addr + len) < 0x100000000UL)
+ if (addr < 0x100000000UL)
err = open_low_hpage_areas(current->mm,
LOW_ESID_MASK(addr, len));
- else
+ if ((addr + len) > 0x100000000UL)
err = open_high_hpage_areas(current->mm,
HTLB_AREA_MASK(addr, len));
if (err) {
@@ -524,6 +549,17 @@ fail:
return addr;
}
+static int htlb_check_hinted_area(unsigned long addr, unsigned long len)
+{
+ struct vm_area_struct *vma;
+
+ vma = find_vma(current->mm, addr);
+ if (!vma || ((addr + len) <= vma->vm_start))
+ return 0;
+
+ return -ENOMEM;
+}
+
static unsigned long htlb_get_low_area(unsigned long len, u16 segmask)
{
unsigned long addr = 0;
@@ -593,15 +629,28 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
if (!cpu_has_feature(CPU_FTR_16M_PAGE))
return -EINVAL;
+ /* Paranoia, caller should have dealt with this */
+ BUG_ON((addr + len) < addr);
+
if (test_thread_flag(TIF_32BIT)) {
+ /* Paranoia, caller should have dealt with this */
+ BUG_ON((addr + len) > 0x100000000UL);
+
curareas = current->mm->context.low_htlb_areas;
- /* First see if we can do the mapping in the existing
- * low areas */
+ /* First see if we can use the hint address */
+ if (addr && (htlb_check_hinted_area(addr, len) == 0)) {
+ areamask = LOW_ESID_MASK(addr, len);
+ if (open_low_hpage_areas(current->mm, areamask) == 0)
+ return addr;
+ }
+
+ /* Next see if we can map in the existing low areas */
addr = htlb_get_low_area(len, curareas);
if (addr != -ENOMEM)
return addr;
+ /* Finally go looking for areas to open */
lastshift = 0;
for (areamask = LOW_ESID_MASK(0x100000000UL-len, len);
! lastshift; areamask >>=1) {
@@ -616,12 +665,22 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
} else {
curareas = current->mm->context.high_htlb_areas;
- /* First see if we can do the mapping in the existing
- * high areas */
+ /* First see if we can use the hint address */
+ /* We discourage 64-bit processes from doing hugepage
+ * mappings below 4GB (must use MAP_FIXED) */
+ if ((addr >= 0x100000000UL)
+ && (htlb_check_hinted_area(addr, len) == 0)) {
+ areamask = HTLB_AREA_MASK(addr, len);
+ if (open_high_hpage_areas(current->mm, areamask) == 0)
+ return addr;
+ }
+
+ /* Next see if we can map in the existing high areas */
addr = htlb_get_high_area(len, curareas);
if (addr != -ENOMEM)
return addr;
+ /* Finally go looking for areas to open */
lastshift = 0;
for (areamask = HTLB_AREA_MASK(TASK_SIZE_USER64-len, len);
! lastshift; areamask >>=1) {
@@ -639,8 +698,36 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
return -ENOMEM;
}
+/*
+ * Called by asm hashtable.S for doing lazy icache flush
+ */
+static unsigned int hash_huge_page_do_lazy_icache(unsigned long rflags,
+ pte_t pte, int trap)
+{
+ struct page *page;
+ int i;
+
+ if (!pfn_valid(pte_pfn(pte)))
+ return rflags;
+
+ page = pte_page(pte);
+
+ /* page is dirty */
+ if (!test_bit(PG_arch_1, &page->flags) && !PageReserved(page)) {
+ if (trap == 0x400) {
+ for (i = 0; i < (HPAGE_SIZE / PAGE_SIZE); i++)
+ __flush_dcache_icache(page_address(page+i));
+ set_bit(PG_arch_1, &page->flags);
+ } else {
+ rflags |= HPTE_R_N;
+ }
+ }
+ return rflags;
+}
+
int hash_huge_page(struct mm_struct *mm, unsigned long access,
- unsigned long ea, unsigned long vsid, int local)
+ unsigned long ea, unsigned long vsid, int local,
+ unsigned long trap)
{
pte_t *ptep;
unsigned long old_pte, new_pte;
@@ -691,6 +778,11 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access,
rflags = 0x2 | (!(new_pte & _PAGE_RW));
/* _PAGE_EXEC -> HW_NO_EXEC since it's inverted */
rflags |= ((new_pte & _PAGE_EXEC) ? 0 : HPTE_R_N);
+ if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
+ /* No CPU has hugepages but lacks no execute, so we
+ * don't need to worry about that case */
+ rflags = hash_huge_page_do_lazy_icache(rflags, __pte(old_pte),
+ trap);
/* Check if pte already has an hpte (case 2) */
if (unlikely(old_pte & _PAGE_HASHPTE)) {
@@ -703,7 +795,8 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access,
slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
slot += (old_pte & _PAGE_F_GIX) >> 12;
- if (ppc_md.hpte_updatepp(slot, rflags, va, 1, local) == -1)
+ if (ppc_md.hpte_updatepp(slot, rflags, va, mmu_huge_psize,
+ local) == -1)
old_pte &= ~_PAGE_HPTEFLAGS;
}
@@ -754,9 +847,7 @@ repeat:
}
/*
- * No need to use ldarx/stdcx here because all who
- * might be updating the pte will hold the
- * page_table_lock
+ * No need to use ldarx/stdcx here
*/
*ptep = __pte(new_pte & ~_PAGE_BUSY);
diff --git a/arch/powerpc/mm/imalloc.c b/arch/powerpc/mm/imalloc.c
index f4ca29cf5364..8b0c132bc163 100644
--- a/arch/powerpc/mm/imalloc.c
+++ b/arch/powerpc/mm/imalloc.c
@@ -14,9 +14,10 @@
#include <asm/pgalloc.h>
#include <asm/pgtable.h>
#include <asm/semaphore.h>
-#include <asm/imalloc.h>
#include <asm/cacheflush.h>
+#include "mmu_decl.h"
+
static DECLARE_MUTEX(imlist_sem);
struct vm_struct * imlist = NULL;
@@ -106,6 +107,7 @@ static int im_region_status(unsigned long v_addr, unsigned long size,
if (v_addr < (unsigned long) tmp->addr + tmp->size)
break;
+ *vm = NULL;
if (tmp) {
if (im_region_overlaps(v_addr, size, tmp))
return IM_REGION_OVERLAP;
@@ -126,7 +128,6 @@ static int im_region_status(unsigned long v_addr, unsigned long size,
}
}
- *vm = NULL;
return IM_REGION_UNUSED;
}
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c
index 7d4b8b5f0606..7d0d75c11848 100644
--- a/arch/powerpc/mm/init_32.c
+++ b/arch/powerpc/mm/init_32.c
@@ -188,6 +188,11 @@ void __init MMU_init(void)
if (ppc_md.progress)
ppc_md.progress("MMU:exit", 0x211);
+
+ /* From now on, btext is no longer BAT mapped if it was at all */
+#ifdef CONFIG_BOOTX_TEXT
+ btext_unmap();
+#endif
}
/* This is only called until mem_init is done. */
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 1134f70f231d..81cfb0c2ec58 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -64,7 +64,8 @@
#include <asm/iommu.h>
#include <asm/abs_addr.h>
#include <asm/vdso.h>
-#include <asm/imalloc.h>
+
+#include "mmu_decl.h"
#ifdef DEBUG
#define DBG(fmt...) printk(fmt)
diff --git a/arch/powerpc/mm/lmb.c b/arch/powerpc/mm/lmb.c
index 9b5aa6808eb8..9584608fd768 100644
--- a/arch/powerpc/mm/lmb.c
+++ b/arch/powerpc/mm/lmb.c
@@ -22,35 +22,38 @@
#include "mmu_decl.h" /* for __max_low_memory */
#endif
-struct lmb lmb;
-
#undef DEBUG
+#ifdef DEBUG
+#include <asm/udbg.h>
+#define DBG(fmt...) udbg_printf(fmt)
+#else
+#define DBG(fmt...)
+#endif
+
+struct lmb lmb;
+
void lmb_dump_all(void)
{
#ifdef DEBUG
unsigned long i;
- udbg_printf("lmb_dump_all:\n");
- udbg_printf(" memory.cnt = 0x%lx\n",
- lmb.memory.cnt);
- udbg_printf(" memory.size = 0x%lx\n",
- lmb.memory.size);
+ DBG("lmb_dump_all:\n");
+ DBG(" memory.cnt = 0x%lx\n", lmb.memory.cnt);
+ DBG(" memory.size = 0x%lx\n", lmb.memory.size);
for (i=0; i < lmb.memory.cnt ;i++) {
- udbg_printf(" memory.region[0x%x].base = 0x%lx\n",
+ DBG(" memory.region[0x%x].base = 0x%lx\n",
i, lmb.memory.region[i].base);
- udbg_printf(" .size = 0x%lx\n",
+ DBG(" .size = 0x%lx\n",
lmb.memory.region[i].size);
}
- udbg_printf("\n reserved.cnt = 0x%lx\n",
- lmb.reserved.cnt);
- udbg_printf(" reserved.size = 0x%lx\n",
- lmb.reserved.size);
+ DBG("\n reserved.cnt = 0x%lx\n", lmb.reserved.cnt);
+ DBG(" reserved.size = 0x%lx\n", lmb.reserved.size);
for (i=0; i < lmb.reserved.cnt ;i++) {
- udbg_printf(" reserved.region[0x%x].base = 0x%lx\n",
+ DBG(" reserved.region[0x%x].base = 0x%lx\n",
i, lmb.reserved.region[i].base);
- udbg_printf(" .size = 0x%lx\n",
+ DBG(" .size = 0x%lx\n",
lmb.reserved.region[i].size);
}
#endif /* DEBUG */
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index e2c95fcb8055..15aac0d78dfa 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -114,19 +114,18 @@ void online_page(struct page *page)
num_physpages++;
}
-/*
- * This works only for the non-NUMA case. Later, we'll need a lookup
- * to convert from real physical addresses to nid, that doesn't use
- * pfn_to_nid().
- */
int __devinit add_memory(u64 start, u64 size)
{
- struct pglist_data *pgdata = NODE_DATA(0);
+ struct pglist_data *pgdata;
struct zone *zone;
+ int nid;
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
- start += KERNELBASE;
+ nid = hot_add_scn_to_nid(start);
+ pgdata = NODE_DATA(nid);
+
+ start = __va(start);
create_section_mapping(start, start + size);
/* this should work for most non-highmem platforms */
@@ -200,6 +199,8 @@ void show_mem(void)
unsigned long flags;
pgdat_resize_lock(pgdat, &flags);
for (i = 0; i < pgdat->node_spanned_pages; i++) {
+ if (!pfn_valid(pgdat->node_start_pfn + i))
+ continue;
page = pgdat_page_nr(pgdat, i);
total++;
if (PageHighMem(page))
@@ -336,7 +337,7 @@ void __init mem_init(void)
struct page *page;
unsigned long reservedpages = 0, codesize, initsize, datasize, bsssize;
- num_physpages = max_pfn; /* RAM is assumed contiguous */
+ num_physpages = lmb.memory.size >> PAGE_SHIFT;
high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
#ifdef CONFIG_NEED_MULTIPLE_NODES
@@ -348,11 +349,13 @@ void __init mem_init(void)
}
}
#else
- max_mapnr = num_physpages;
+ max_mapnr = max_pfn;
totalram_pages += free_all_bootmem();
#endif
for_each_pgdat(pgdat) {
for (i = 0; i < pgdat->node_spanned_pages; i++) {
+ if (!pfn_valid(pgdat->node_start_pfn + i))
+ continue;
page = pgdat_page_nr(pgdat, i);
if (PageReserved(page))
reservedpages++;
@@ -491,7 +494,7 @@ EXPORT_SYMBOL(flush_icache_user_range);
* We use it to preload an HPTE into the hash table corresponding to
* the updated linux PTE.
*
- * This must always be called with the mm->page_table_lock held
+ * This must always be called with the pte lock held.
*/
void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
pte_t pte)
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index a4d7a327c0e5..bea2d21ac6f7 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -33,7 +33,6 @@ extern void invalidate_tlbcam_entry(int index);
extern int __map_without_bats;
extern unsigned long ioremap_base;
-extern unsigned long ioremap_bot;
extern unsigned int rtas_data, rtas_size;
extern PTE *Hash, *Hash_end;
@@ -42,6 +41,7 @@ extern unsigned long Hash_size, Hash_mask;
extern unsigned int num_tlbcam_entries;
#endif
+extern unsigned long ioremap_bot;
extern unsigned long __max_low_memory;
extern unsigned long __initial_memory_limit;
extern unsigned long total_memory;
@@ -84,4 +84,16 @@ static inline void flush_HPTE(unsigned context, unsigned long va,
else
_tlbie(va);
}
+#else /* CONFIG_PPC64 */
+/* imalloc region types */
+#define IM_REGION_UNUSED 0x1
+#define IM_REGION_SUBSET 0x2
+#define IM_REGION_EXISTS 0x4
+#define IM_REGION_OVERLAP 0x8
+#define IM_REGION_SUPERSET 0x10
+
+extern struct vm_struct * im_get_free_area(unsigned long size);
+extern struct vm_struct * im_get_area(unsigned long v_addr, unsigned long size,
+ int region_type);
+extern void im_free(void *addr);
#endif
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index bd2cf1336885..2863a912bcd0 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -37,6 +37,7 @@ EXPORT_SYMBOL(node_data);
static bootmem_data_t __initdata plat_node_bdata[MAX_NUMNODES];
static int min_common_depth;
+static int n_mem_addr_cells, n_mem_size_cells;
/*
* We need somewhere to store start/end/node for each region until we have
@@ -125,7 +126,7 @@ void __init get_region(unsigned int nid, unsigned long *start_pfn,
/* We didnt find a matching region, return start/end as 0 */
if (*start_pfn == -1UL)
- start_pfn = 0;
+ *start_pfn = 0;
}
static inline void map_cpu_to_node(int cpu, int node)
@@ -254,32 +255,20 @@ static int __init find_min_common_depth(void)
return depth;
}
-static int __init get_mem_addr_cells(void)
+static void __init get_n_mem_cells(int *n_addr_cells, int *n_size_cells)
{
struct device_node *memory = NULL;
- int rc;
memory = of_find_node_by_type(memory, "memory");
if (!memory)
- return 0; /* it won't matter */
+ panic("numa.c: No memory nodes found!");
- rc = prom_n_addr_cells(memory);
- return rc;
+ *n_addr_cells = prom_n_addr_cells(memory);
+ *n_size_cells = prom_n_size_cells(memory);
+ of_node_put(memory);
}
-static int __init get_mem_size_cells(void)
-{
- struct device_node *memory = NULL;
- int rc;
-
- memory = of_find_node_by_type(memory, "memory");
- if (!memory)
- return 0; /* it won't matter */
- rc = prom_n_size_cells(memory);
- return rc;
-}
-
-static unsigned long __init read_n_cells(int n, unsigned int **buf)
+static unsigned long __devinit read_n_cells(int n, unsigned int **buf)
{
unsigned long result = 0;
@@ -386,7 +375,6 @@ static int __init parse_numa_properties(void)
{
struct device_node *cpu = NULL;
struct device_node *memory = NULL;
- int addr_cells, size_cells;
int max_domain;
unsigned long i;
@@ -425,8 +413,7 @@ static int __init parse_numa_properties(void)
}
}
- addr_cells = get_mem_addr_cells();
- size_cells = get_mem_size_cells();
+ get_n_mem_cells(&n_mem_addr_cells, &n_mem_size_cells);
memory = NULL;
while ((memory = of_find_node_by_type(memory, "memory")) != NULL) {
unsigned long start;
@@ -436,15 +423,21 @@ static int __init parse_numa_properties(void)
unsigned int *memcell_buf;
unsigned int len;
- memcell_buf = (unsigned int *)get_property(memory, "reg", &len);
+ memcell_buf = (unsigned int *)get_property(memory,
+ "linux,usable-memory", &len);
+ if (!memcell_buf || len <= 0)
+ memcell_buf =
+ (unsigned int *)get_property(memory, "reg",
+ &len);
if (!memcell_buf || len <= 0)
continue;
- ranges = memory->n_addrs;
+ /* ranges in cell */
+ ranges = (len >> 2) / (n_mem_addr_cells + n_mem_size_cells);
new_range:
/* these are order-sensitive, and modify the buffer pointer */
- start = read_n_cells(addr_cells, &memcell_buf);
- size = read_n_cells(size_cells, &memcell_buf);
+ start = read_n_cells(n_mem_addr_cells, &memcell_buf);
+ size = read_n_cells(n_mem_size_cells, &memcell_buf);
numa_domain = of_node_numa_domain(memory);
@@ -483,6 +476,7 @@ static void __init setup_nonnuma(void)
{
unsigned long top_of_ram = lmb_end_of_DRAM();
unsigned long total_ram = lmb_phys_mem_size();
+ unsigned int i;
printk(KERN_INFO "Top of RAM: 0x%lx, Total RAM: 0x%lx\n",
top_of_ram, total_ram);
@@ -490,11 +484,47 @@ static void __init setup_nonnuma(void)
(top_of_ram - total_ram) >> 20);
map_cpu_to_node(boot_cpuid, 0);
- add_region(0, 0, lmb_end_of_DRAM() >> PAGE_SHIFT);
+ for (i = 0; i < lmb.memory.cnt; ++i)
+ add_region(0, lmb.memory.region[i].base >> PAGE_SHIFT,
+ lmb_size_pages(&lmb.memory, i));
node_set_online(0);
}
-static void __init dump_numa_topology(void)
+void __init dump_numa_cpu_topology(void)
+{
+ unsigned int node;
+ unsigned int cpu, count;
+
+ if (min_common_depth == -1 || !numa_enabled)
+ return;
+
+ for_each_online_node(node) {
+ printk(KERN_INFO "Node %d CPUs:", node);
+
+ count = 0;
+ /*
+ * If we used a CPU iterator here we would miss printing
+ * the holes in the cpumap.
+ */
+ for (cpu = 0; cpu < NR_CPUS; cpu++) {
+ if (cpu_isset(cpu, numa_cpumask_lookup_table[node])) {
+ if (count == 0)
+ printk(" %u", cpu);
+ ++count;
+ } else {
+ if (count > 1)
+ printk("-%u", cpu - 1);
+ count = 0;
+ }
+ }
+
+ if (count > 1)
+ printk("-%u", NR_CPUS - 1);
+ printk("\n");
+ }
+}
+
+static void __init dump_numa_memory_topology(void)
{
unsigned int node;
unsigned int count;
@@ -526,7 +556,6 @@ static void __init dump_numa_topology(void)
printk("-0x%lx", i);
printk("\n");
}
- return;
}
/*
@@ -588,7 +617,7 @@ void __init do_init_bootmem(void)
if (parse_numa_properties())
setup_nonnuma();
else
- dump_numa_topology();
+ dump_numa_memory_topology();
register_cpu_notifier(&ppc64_numa_nb);
@@ -727,3 +756,60 @@ static int __init early_numa(char *p)
return 0;
}
early_param("numa", early_numa);
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+/*
+ * Find the node associated with a hot added memory section. Section
+ * corresponds to a SPARSEMEM section, not an LMB. It is assumed that
+ * sections are fully contained within a single LMB.
+ */
+int hot_add_scn_to_nid(unsigned long scn_addr)
+{
+ struct device_node *memory = NULL;
+ nodemask_t nodes;
+ int numa_domain = 0;
+
+ if (!numa_enabled || (min_common_depth < 0))
+ return numa_domain;
+
+ while ((memory = of_find_node_by_type(memory, "memory")) != NULL) {
+ unsigned long start, size;
+ int ranges;
+ unsigned int *memcell_buf;
+ unsigned int len;
+
+ memcell_buf = (unsigned int *)get_property(memory, "reg", &len);
+ if (!memcell_buf || len <= 0)
+ continue;
+
+ /* ranges in cell */
+ ranges = (len >> 2) / (n_mem_addr_cells + n_mem_size_cells);
+ha_new_range:
+ start = read_n_cells(n_mem_addr_cells, &memcell_buf);
+ size = read_n_cells(n_mem_size_cells, &memcell_buf);
+ numa_domain = of_node_numa_domain(memory);
+
+ /* Domains not present at boot default to 0 */
+ if (!node_online(numa_domain))
+ numa_domain = any_online_node(NODE_MASK_ALL);
+
+ if ((scn_addr >= start) && (scn_addr < (start + size))) {
+ of_node_put(memory);
+ goto got_numa_domain;
+ }
+
+ if (--ranges) /* process all ranges in cell */
+ goto ha_new_range;
+ }
+ BUG(); /* section address should be found above */
+
+ /* Temporary code to ensure that returned node is not empty */
+got_numa_domain:
+ nodes_setall(nodes);
+ while (NODE_DATA(numa_domain)->node_spanned_pages == 0) {
+ node_clear(numa_domain, nodes);
+ numa_domain = any_online_node(nodes);
+ }
+ return numa_domain;
+}
+#endif /* CONFIG_MEMORY_HOTPLUG */
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index c7f7bb6f30b3..7b278d83739e 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -64,7 +64,8 @@
#include <asm/iommu.h>
#include <asm/abs_addr.h>
#include <asm/vdso.h>
-#include <asm/imalloc.h>
+
+#include "mmu_decl.h"
unsigned long ioremap_bot = IMALLOC_BASE;
static unsigned long phbs_io_bot = PHBS_IO_BASE;
@@ -173,7 +174,7 @@ void __iomem * __ioremap(unsigned long addr, unsigned long size,
pa = addr & PAGE_MASK;
size = PAGE_ALIGN(addr + size) - pa;
- if (size == 0)
+ if ((size == 0) || (pa == 0))
return NULL;
if (mem_init_done) {
diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
index 60e852f2f8e5..ffc8ed4de62d 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -75,7 +75,7 @@ static void slb_flush_and_rebolt(void)
vflags = SLB_VSID_KERNEL | virtual_llp;
ksp_esid_data = mk_esid_data(get_paca()->kstack, 2);
- if ((ksp_esid_data & ESID_MASK) == KERNELBASE)
+ if ((ksp_esid_data & ESID_MASK) == PAGE_OFFSET)
ksp_esid_data &= ~SLB_ESID_V;
/* We need to do this all in asm, so we're sure we don't touch
@@ -87,8 +87,8 @@ static void slb_flush_and_rebolt(void)
/* Slot 2 - kernel stack */
"slbmte %2,%3\n"
"isync"
- :: "r"(mk_vsid_data(VMALLOCBASE, vflags)),
- "r"(mk_esid_data(VMALLOCBASE, 1)),
+ :: "r"(mk_vsid_data(VMALLOC_START, vflags)),
+ "r"(mk_esid_data(VMALLOC_START, 1)),
"r"(mk_vsid_data(ksp_esid_data, lflags)),
"r"(ksp_esid_data)
: "memory");
@@ -134,14 +134,14 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
else
unmapped_base = TASK_UNMAPPED_BASE_USER64;
- if (pc >= KERNELBASE)
+ if (is_kernel_addr(pc))
return;
slb_allocate(pc);
if (GET_ESID(pc) == GET_ESID(stack))
return;
- if (stack >= KERNELBASE)
+ if (is_kernel_addr(stack))
return;
slb_allocate(stack);
@@ -149,7 +149,7 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
|| (GET_ESID(stack) == GET_ESID(unmapped_base)))
return;
- if (unmapped_base >= KERNELBASE)
+ if (is_kernel_addr(unmapped_base))
return;
slb_allocate(unmapped_base);
}
@@ -213,10 +213,10 @@ void slb_initialize(void)
asm volatile("isync":::"memory");
asm volatile("slbmte %0,%0"::"r" (0) : "memory");
asm volatile("isync; slbia; isync":::"memory");
- create_slbe(KERNELBASE, lflags, 0);
+ create_slbe(PAGE_OFFSET, lflags, 0);
/* VMALLOC space has 4K pages always for now */
- create_slbe(VMALLOCBASE, vflags, 1);
+ create_slbe(VMALLOC_START, vflags, 1);
/* We don't bolt the stack for the time being - we're in boot,
* so the stack is in the bolted segment. By the time it goes
diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S
index 950ffc5848c7..d1acee38f163 100644
--- a/arch/powerpc/mm/slb_low.S
+++ b/arch/powerpc/mm/slb_low.S
@@ -37,9 +37,9 @@ _GLOBAL(slb_allocate_realmode)
srdi r9,r3,60 /* get region */
srdi r10,r3,28 /* get esid */
- cmpldi cr7,r9,0xc /* cmp KERNELBASE for later use */
+ cmpldi cr7,r9,0xc /* cmp PAGE_OFFSET for later use */
- /* r3 = address, r10 = esid, cr7 = <>KERNELBASE */
+ /* r3 = address, r10 = esid, cr7 = <> PAGE_OFFSET */
blt cr7,0f /* user or kernel? */
/* kernel address: proto-VSID = ESID */
@@ -166,7 +166,7 @@ _GLOBAL(slb_allocate_user)
/*
* Finish loading of an SLB entry and return
*
- * r3 = EA, r10 = proto-VSID, r11 = flags, clobbers r9, cr7 = <>KERNELBASE
+ * r3 = EA, r10 = proto-VSID, r11 = flags, clobbers r9, cr7 = <> PAGE_OFFSET
*/
slb_finish_load:
ASM_VSID_SCRAMBLE(r10,r9)
diff --git a/arch/powerpc/mm/stab.c b/arch/powerpc/mm/stab.c
index cfbb4e1f966b..82e4951826bc 100644
--- a/arch/powerpc/mm/stab.c
+++ b/arch/powerpc/mm/stab.c
@@ -40,7 +40,7 @@ static int make_ste(unsigned long stab, unsigned long esid, unsigned long vsid)
unsigned long entry, group, old_esid, castout_entry, i;
unsigned int global_entry;
struct stab_entry *ste, *castout_ste;
- unsigned long kernel_segment = (esid << SID_SHIFT) >= KERNELBASE;
+ unsigned long kernel_segment = (esid << SID_SHIFT) >= PAGE_OFFSET;
vsid_data = vsid << STE_VSID_SHIFT;
esid_data = esid << SID_SHIFT | STE_ESID_KP | STE_ESID_V;
@@ -83,7 +83,7 @@ static int make_ste(unsigned long stab, unsigned long esid, unsigned long vsid)
}
/* Dont cast out the first kernel segment */
- if ((castout_ste->esid_data & ESID_MASK) != KERNELBASE)
+ if ((castout_ste->esid_data & ESID_MASK) != PAGE_OFFSET)
break;
castout_entry = (castout_entry + 1) & 0xf;
@@ -122,7 +122,7 @@ static int __ste_allocate(unsigned long ea, struct mm_struct *mm)
unsigned long offset;
/* Kernel or user address? */
- if (ea >= KERNELBASE) {
+ if (is_kernel_addr(ea)) {
vsid = get_kernel_vsid(ea);
} else {
if ((ea >= TASK_SIZE_USER64) || (! mm))
@@ -133,7 +133,7 @@ static int __ste_allocate(unsigned long ea, struct mm_struct *mm)
stab_entry = make_ste(get_paca()->stab_addr, GET_ESID(ea), vsid);
- if (ea < KERNELBASE) {
+ if (!is_kernel_addr(ea)) {
offset = __get_cpu_var(stab_cache_ptr);
if (offset < NR_STAB_CACHE_ENTRIES)
__get_cpu_var(stab_cache[offset++]) = stab_entry;
@@ -190,7 +190,7 @@ void switch_stab(struct task_struct *tsk, struct mm_struct *mm)
entry++, ste++) {
unsigned long ea;
ea = ste->esid_data & ESID_MASK;
- if (ea < KERNELBASE) {
+ if (!is_kernel_addr(ea)) {
ste->esid_data = 0;
}
}
@@ -251,7 +251,7 @@ void stabs_alloc(void)
panic("Unable to allocate segment table for CPU %d.\n",
cpu);
- newstab += KERNELBASE;
+ newstab = (unsigned long)__va(newstab);
memset((void *)newstab, 0, HW_PAGE_SIZE);
@@ -270,11 +270,11 @@ void stabs_alloc(void)
*/
void stab_initialize(unsigned long stab)
{
- unsigned long vsid = get_kernel_vsid(KERNELBASE);
+ unsigned long vsid = get_kernel_vsid(PAGE_OFFSET);
unsigned long stabreal;
asm volatile("isync; slbia; isync":::"memory");
- make_ste(stab, GET_ESID(KERNELBASE), vsid);
+ make_ste(stab, GET_ESID(PAGE_OFFSET), vsid);
/* Order update */
asm volatile("sync":::"memory");
@@ -288,11 +288,6 @@ void stab_initialize(unsigned long stab)
return;
}
#endif /* CONFIG_PPC_ISERIES */
-#ifdef CONFIG_PPC_PSERIES
- if (platform_is_lpar()) {
- plpar_hcall_norets(H_SET_ASR, stabreal);
- return;
- }
-#endif
+
mtspr(SPRN_ASR, stabreal);
}
diff --git a/arch/powerpc/mm/tlb_32.c b/arch/powerpc/mm/tlb_32.c
index 6c3dc3c44c86..ad580f3742e5 100644
--- a/arch/powerpc/mm/tlb_32.c
+++ b/arch/powerpc/mm/tlb_32.c
@@ -149,6 +149,12 @@ void flush_tlb_mm(struct mm_struct *mm)
return;
}
+ /*
+ * It is safe to go down the mm's list of vmas when called
+ * from dup_mmap, holding mmap_sem. It would also be safe from
+ * unmap_region or exit_mmap, but not from vmtruncate on SMP -
+ * but it seems dup_mmap is the only SMP case which gets here.
+ */
for (mp = mm->mmap; mp != NULL; mp = mp->vm_next)
flush_range(mp->vm_mm, mp->vm_start, mp->vm_end);
FINISH_FLUSH;
diff --git a/arch/powerpc/mm/tlb_64.c b/arch/powerpc/mm/tlb_64.c
index 53e31b834ace..bb3afb6e6317 100644
--- a/arch/powerpc/mm/tlb_64.c
+++ b/arch/powerpc/mm/tlb_64.c
@@ -95,7 +95,7 @@ static void pte_free_submit(struct pte_freelist_batch *batch)
void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf)
{
- /* This is safe as we are holding page_table_lock */
+ /* This is safe since tlb_gather_mmu has disabled preemption */
cpumask_t local_cpumask = cpumask_of_cpu(smp_processor_id());
struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur);
@@ -168,7 +168,7 @@ void hpte_update(struct mm_struct *mm, unsigned long addr,
batch->mm = mm;
batch->psize = psize;
}
- if (addr < KERNELBASE) {
+ if (!is_kernel_addr(addr)) {
vsid = get_vsid(mm->context.id, addr);
WARN_ON(vsid == 0);
} else
@@ -206,7 +206,7 @@ void __flush_tlb_pending(struct ppc64_tlb_batch *batch)
void pte_free_finish(void)
{
- /* This is safe as we are holding page_table_lock */
+ /* This is safe since tlb_gather_mmu has disabled preemption */
struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur);
if (*batchp == NULL)