diff options
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r-- | arch/powerpc/mm/4xx_mmu.c | 4 | ||||
-rw-r--r-- | arch/powerpc/mm/fault.c | 7 | ||||
-rw-r--r-- | arch/powerpc/mm/fsl_booke_mmu.c | 2 | ||||
-rw-r--r-- | arch/powerpc/mm/hash_utils_64.c | 13 | ||||
-rw-r--r-- | arch/powerpc/mm/hugetlbpage.c | 147 | ||||
-rw-r--r-- | arch/powerpc/mm/imalloc.c | 5 | ||||
-rw-r--r-- | arch/powerpc/mm/init_32.c | 5 | ||||
-rw-r--r-- | arch/powerpc/mm/init_64.c | 3 | ||||
-rw-r--r-- | arch/powerpc/mm/lmb.c | 33 | ||||
-rw-r--r-- | arch/powerpc/mm/mem.c | 23 | ||||
-rw-r--r-- | arch/powerpc/mm/mmu_decl.h | 14 | ||||
-rw-r--r-- | arch/powerpc/mm/numa.c | 146 | ||||
-rw-r--r-- | arch/powerpc/mm/pgtable_64.c | 5 | ||||
-rw-r--r-- | arch/powerpc/mm/slb.c | 16 | ||||
-rw-r--r-- | arch/powerpc/mm/slb_low.S | 6 | ||||
-rw-r--r-- | arch/powerpc/mm/stab.c | 23 | ||||
-rw-r--r-- | arch/powerpc/mm/tlb_32.c | 6 | ||||
-rw-r--r-- | arch/powerpc/mm/tlb_64.c | 6 |
18 files changed, 333 insertions, 131 deletions
diff --git a/arch/powerpc/mm/4xx_mmu.c b/arch/powerpc/mm/4xx_mmu.c index b7bcbc232f39..4d006aa1a0d1 100644 --- a/arch/powerpc/mm/4xx_mmu.c +++ b/arch/powerpc/mm/4xx_mmu.c @@ -110,13 +110,11 @@ unsigned long __init mmu_mapin_ram(void) pmd_t *pmdp; unsigned long val = p | _PMD_SIZE_16M | _PAGE_HWEXEC | _PAGE_HWWRITE; - spin_lock(&init_mm.page_table_lock); pmdp = pmd_offset(pgd_offset_k(v), v); pmd_val(*pmdp++) = val; pmd_val(*pmdp++) = val; pmd_val(*pmdp++) = val; pmd_val(*pmdp++) = val; - spin_unlock(&init_mm.page_table_lock); v += LARGE_PAGE_SIZE_16M; p += LARGE_PAGE_SIZE_16M; @@ -127,10 +125,8 @@ unsigned long __init mmu_mapin_ram(void) pmd_t *pmdp; unsigned long val = p | _PMD_SIZE_4M | _PAGE_HWEXEC | _PAGE_HWWRITE; - spin_lock(&init_mm.page_table_lock); pmdp = pmd_offset(pgd_offset_k(v), v); pmd_val(*pmdp) = val; - spin_unlock(&init_mm.page_table_lock); v += LARGE_PAGE_SIZE_4M; p += LARGE_PAGE_SIZE_4M; diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 93d4fbfdb724..a4815d316722 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -81,7 +81,8 @@ static int store_updates_sp(struct pt_regs *regs) } #if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE)) -static void do_dabr(struct pt_regs *regs, unsigned long error_code) +static void do_dabr(struct pt_regs *regs, unsigned long address, + unsigned long error_code) { siginfo_t info; @@ -99,7 +100,7 @@ static void do_dabr(struct pt_regs *regs, unsigned long error_code) info.si_signo = SIGTRAP; info.si_errno = 0; info.si_code = TRAP_HWBKPT; - info.si_addr = (void __user *)regs->nip; + info.si_addr = (void __user *)address; force_sig_info(SIGTRAP, &info, current); } #endif /* !(CONFIG_4xx || CONFIG_BOOKE)*/ @@ -159,7 +160,7 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, #if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE)) if (error_code & DSISR_DABRMATCH) { /* DABR match */ - do_dabr(regs, error_code); + do_dabr(regs, address, error_code); return 0; } #endif /* !(CONFIG_4xx || CONFIG_BOOKE)*/ diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c index af9ca0eb6d55..5d581bb3aa12 100644 --- a/arch/powerpc/mm/fsl_booke_mmu.c +++ b/arch/powerpc/mm/fsl_booke_mmu.c @@ -1,5 +1,5 @@ /* - * Modifications by Kumar Gala (kumar.gala@freescale.com) to support + * Modifications by Kumar Gala (galak@kernel.crashing.org) to support * E500 Book E processors. * * Copyright 2004 Freescale Semiconductor, Inc diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 706e8a63ced9..149351a84b94 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -368,7 +368,7 @@ static unsigned long __init htab_get_table_size(void) unsigned long mem_size, rnd_mem_size, pteg_count; /* If hash size isn't already provided by the platform, we try to - * retreive it from the device-tree. If it's not there neither, we + * retrieve it from the device-tree. If it's not there neither, we * calculate it now based on the total RAM size */ if (ppc64_pft_size == 0) @@ -456,7 +456,7 @@ void __init htab_initialize(void) /* create bolted the linear mapping in the hash table */ for (i=0; i < lmb.memory.cnt; i++) { - base = lmb.memory.region[i].base + KERNELBASE; + base = (unsigned long)__va(lmb.memory.region[i].base); size = lmb.memory.region[i].size; DBG("creating mapping for region: %lx : %lx\n", base, size); @@ -498,8 +498,8 @@ void __init htab_initialize(void) * for either 4K or 16MB pages. */ if (tce_alloc_start) { - tce_alloc_start += KERNELBASE; - tce_alloc_end += KERNELBASE; + tce_alloc_start = (unsigned long)__va(tce_alloc_start); + tce_alloc_end = (unsigned long)__va(tce_alloc_end); if (base + size >= tce_alloc_start) tce_alloc_start = base + size + 1; @@ -514,7 +514,7 @@ void __init htab_initialize(void) #undef KB #undef MB -void __init htab_initialize_secondary(void) +void htab_initialize_secondary(void) { if (!platform_is_lpar()) mtspr(SPRN_SDR1, _SDR1); @@ -601,7 +601,7 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) /* Handle hugepage regions */ if (unlikely(in_hugepage_area(mm->context, ea))) { DBG_LOW(" -> huge page !\n"); - return hash_huge_page(mm, access, ea, vsid, local); + return hash_huge_page(mm, access, ea, vsid, local, trap); } /* Get PTE and page size from page tables */ @@ -644,6 +644,7 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) DBG_LOW(" -> rc=%d\n", rc); return rc; } +EXPORT_SYMBOL_GPL(hash_page); void hash_preload(struct mm_struct *mm, unsigned long ea, unsigned long access, unsigned long trap) diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 426c269e552e..b51bb28c054b 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -148,43 +148,63 @@ int is_aligned_hugepage_range(unsigned long addr, unsigned long len) return 0; } +struct slb_flush_info { + struct mm_struct *mm; + u16 newareas; +}; + static void flush_low_segments(void *parm) { - u16 areas = (unsigned long) parm; + struct slb_flush_info *fi = parm; unsigned long i; - asm volatile("isync" : : : "memory"); + BUILD_BUG_ON((sizeof(fi->newareas)*8) != NUM_LOW_AREAS); + + if (current->active_mm != fi->mm) + return; - BUILD_BUG_ON((sizeof(areas)*8) != NUM_LOW_AREAS); + /* Only need to do anything if this CPU is working in the same + * mm as the one which has changed */ + /* update the paca copy of the context struct */ + get_paca()->context = current->active_mm->context; + + asm volatile("isync" : : : "memory"); for (i = 0; i < NUM_LOW_AREAS; i++) { - if (! (areas & (1U << i))) + if (! (fi->newareas & (1U << i))) continue; asm volatile("slbie %0" : : "r" ((i << SID_SHIFT) | SLBIE_C)); } - asm volatile("isync" : : : "memory"); } static void flush_high_segments(void *parm) { - u16 areas = (unsigned long) parm; + struct slb_flush_info *fi = parm; unsigned long i, j; - asm volatile("isync" : : : "memory"); - BUILD_BUG_ON((sizeof(areas)*8) != NUM_HIGH_AREAS); + BUILD_BUG_ON((sizeof(fi->newareas)*8) != NUM_HIGH_AREAS); + + if (current->active_mm != fi->mm) + return; + + /* Only need to do anything if this CPU is working in the same + * mm as the one which has changed */ + + /* update the paca copy of the context struct */ + get_paca()->context = current->active_mm->context; + asm volatile("isync" : : : "memory"); for (i = 0; i < NUM_HIGH_AREAS; i++) { - if (! (areas & (1U << i))) + if (! (fi->newareas & (1U << i))) continue; for (j = 0; j < (1UL << (HTLB_AREA_SHIFT-SID_SHIFT)); j++) asm volatile("slbie %0" :: "r" (((i << HTLB_AREA_SHIFT) - + (j << SID_SHIFT)) | SLBIE_C)); + + (j << SID_SHIFT)) | SLBIE_C)); } - asm volatile("isync" : : : "memory"); } @@ -229,6 +249,7 @@ static int prepare_high_area_for_htlb(struct mm_struct *mm, unsigned long area) static int open_low_hpage_areas(struct mm_struct *mm, u16 newareas) { unsigned long i; + struct slb_flush_info fi; BUILD_BUG_ON((sizeof(newareas)*8) != NUM_LOW_AREAS); BUILD_BUG_ON((sizeof(mm->context.low_htlb_areas)*8) != NUM_LOW_AREAS); @@ -244,19 +265,20 @@ static int open_low_hpage_areas(struct mm_struct *mm, u16 newareas) mm->context.low_htlb_areas |= newareas; - /* update the paca copy of the context struct */ - get_paca()->context = mm->context; - /* the context change must make it to memory before the flush, * so that further SLB misses do the right thing. */ mb(); - on_each_cpu(flush_low_segments, (void *)(unsigned long)newareas, 0, 1); + + fi.mm = mm; + fi.newareas = newareas; + on_each_cpu(flush_low_segments, &fi, 0, 1); return 0; } static int open_high_hpage_areas(struct mm_struct *mm, u16 newareas) { + struct slb_flush_info fi; unsigned long i; BUILD_BUG_ON((sizeof(newareas)*8) != NUM_HIGH_AREAS); @@ -280,22 +302,25 @@ static int open_high_hpage_areas(struct mm_struct *mm, u16 newareas) /* the context change must make it to memory before the flush, * so that further SLB misses do the right thing. */ mb(); - on_each_cpu(flush_high_segments, (void *)(unsigned long)newareas, 0, 1); + + fi.mm = mm; + fi.newareas = newareas; + on_each_cpu(flush_high_segments, &fi, 0, 1); return 0; } int prepare_hugepage_range(unsigned long addr, unsigned long len) { - int err; + int err = 0; if ( (addr+len) < addr ) return -EINVAL; - if ((addr + len) < 0x100000000UL) + if (addr < 0x100000000UL) err = open_low_hpage_areas(current->mm, LOW_ESID_MASK(addr, len)); - else + if ((addr + len) > 0x100000000UL) err = open_high_hpage_areas(current->mm, HTLB_AREA_MASK(addr, len)); if (err) { @@ -524,6 +549,17 @@ fail: return addr; } +static int htlb_check_hinted_area(unsigned long addr, unsigned long len) +{ + struct vm_area_struct *vma; + + vma = find_vma(current->mm, addr); + if (!vma || ((addr + len) <= vma->vm_start)) + return 0; + + return -ENOMEM; +} + static unsigned long htlb_get_low_area(unsigned long len, u16 segmask) { unsigned long addr = 0; @@ -593,15 +629,28 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, if (!cpu_has_feature(CPU_FTR_16M_PAGE)) return -EINVAL; + /* Paranoia, caller should have dealt with this */ + BUG_ON((addr + len) < addr); + if (test_thread_flag(TIF_32BIT)) { + /* Paranoia, caller should have dealt with this */ + BUG_ON((addr + len) > 0x100000000UL); + curareas = current->mm->context.low_htlb_areas; - /* First see if we can do the mapping in the existing - * low areas */ + /* First see if we can use the hint address */ + if (addr && (htlb_check_hinted_area(addr, len) == 0)) { + areamask = LOW_ESID_MASK(addr, len); + if (open_low_hpage_areas(current->mm, areamask) == 0) + return addr; + } + + /* Next see if we can map in the existing low areas */ addr = htlb_get_low_area(len, curareas); if (addr != -ENOMEM) return addr; + /* Finally go looking for areas to open */ lastshift = 0; for (areamask = LOW_ESID_MASK(0x100000000UL-len, len); ! lastshift; areamask >>=1) { @@ -616,12 +665,22 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, } else { curareas = current->mm->context.high_htlb_areas; - /* First see if we can do the mapping in the existing - * high areas */ + /* First see if we can use the hint address */ + /* We discourage 64-bit processes from doing hugepage + * mappings below 4GB (must use MAP_FIXED) */ + if ((addr >= 0x100000000UL) + && (htlb_check_hinted_area(addr, len) == 0)) { + areamask = HTLB_AREA_MASK(addr, len); + if (open_high_hpage_areas(current->mm, areamask) == 0) + return addr; + } + + /* Next see if we can map in the existing high areas */ addr = htlb_get_high_area(len, curareas); if (addr != -ENOMEM) return addr; + /* Finally go looking for areas to open */ lastshift = 0; for (areamask = HTLB_AREA_MASK(TASK_SIZE_USER64-len, len); ! lastshift; areamask >>=1) { @@ -639,8 +698,36 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, return -ENOMEM; } +/* + * Called by asm hashtable.S for doing lazy icache flush + */ +static unsigned int hash_huge_page_do_lazy_icache(unsigned long rflags, + pte_t pte, int trap) +{ + struct page *page; + int i; + + if (!pfn_valid(pte_pfn(pte))) + return rflags; + + page = pte_page(pte); + + /* page is dirty */ + if (!test_bit(PG_arch_1, &page->flags) && !PageReserved(page)) { + if (trap == 0x400) { + for (i = 0; i < (HPAGE_SIZE / PAGE_SIZE); i++) + __flush_dcache_icache(page_address(page+i)); + set_bit(PG_arch_1, &page->flags); + } else { + rflags |= HPTE_R_N; + } + } + return rflags; +} + int hash_huge_page(struct mm_struct *mm, unsigned long access, - unsigned long ea, unsigned long vsid, int local) + unsigned long ea, unsigned long vsid, int local, + unsigned long trap) { pte_t *ptep; unsigned long old_pte, new_pte; @@ -691,6 +778,11 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access, rflags = 0x2 | (!(new_pte & _PAGE_RW)); /* _PAGE_EXEC -> HW_NO_EXEC since it's inverted */ rflags |= ((new_pte & _PAGE_EXEC) ? 0 : HPTE_R_N); + if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) + /* No CPU has hugepages but lacks no execute, so we + * don't need to worry about that case */ + rflags = hash_huge_page_do_lazy_icache(rflags, __pte(old_pte), + trap); /* Check if pte already has an hpte (case 2) */ if (unlikely(old_pte & _PAGE_HASHPTE)) { @@ -703,7 +795,8 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access, slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; slot += (old_pte & _PAGE_F_GIX) >> 12; - if (ppc_md.hpte_updatepp(slot, rflags, va, 1, local) == -1) + if (ppc_md.hpte_updatepp(slot, rflags, va, mmu_huge_psize, + local) == -1) old_pte &= ~_PAGE_HPTEFLAGS; } @@ -754,9 +847,7 @@ repeat: } /* - * No need to use ldarx/stdcx here because all who - * might be updating the pte will hold the - * page_table_lock + * No need to use ldarx/stdcx here */ *ptep = __pte(new_pte & ~_PAGE_BUSY); diff --git a/arch/powerpc/mm/imalloc.c b/arch/powerpc/mm/imalloc.c index f4ca29cf5364..8b0c132bc163 100644 --- a/arch/powerpc/mm/imalloc.c +++ b/arch/powerpc/mm/imalloc.c @@ -14,9 +14,10 @@ #include <asm/pgalloc.h> #include <asm/pgtable.h> #include <asm/semaphore.h> -#include <asm/imalloc.h> #include <asm/cacheflush.h> +#include "mmu_decl.h" + static DECLARE_MUTEX(imlist_sem); struct vm_struct * imlist = NULL; @@ -106,6 +107,7 @@ static int im_region_status(unsigned long v_addr, unsigned long size, if (v_addr < (unsigned long) tmp->addr + tmp->size) break; + *vm = NULL; if (tmp) { if (im_region_overlaps(v_addr, size, tmp)) return IM_REGION_OVERLAP; @@ -126,7 +128,6 @@ static int im_region_status(unsigned long v_addr, unsigned long size, } } - *vm = NULL; return IM_REGION_UNUSED; } diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index 7d4b8b5f0606..7d0d75c11848 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -188,6 +188,11 @@ void __init MMU_init(void) if (ppc_md.progress) ppc_md.progress("MMU:exit", 0x211); + + /* From now on, btext is no longer BAT mapped if it was at all */ +#ifdef CONFIG_BOOTX_TEXT + btext_unmap(); +#endif } /* This is only called until mem_init is done. */ diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 1134f70f231d..81cfb0c2ec58 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -64,7 +64,8 @@ #include <asm/iommu.h> #include <asm/abs_addr.h> #include <asm/vdso.h> -#include <asm/imalloc.h> + +#include "mmu_decl.h" #ifdef DEBUG #define DBG(fmt...) printk(fmt) diff --git a/arch/powerpc/mm/lmb.c b/arch/powerpc/mm/lmb.c index 9b5aa6808eb8..9584608fd768 100644 --- a/arch/powerpc/mm/lmb.c +++ b/arch/powerpc/mm/lmb.c @@ -22,35 +22,38 @@ #include "mmu_decl.h" /* for __max_low_memory */ #endif -struct lmb lmb; - #undef DEBUG +#ifdef DEBUG +#include <asm/udbg.h> +#define DBG(fmt...) udbg_printf(fmt) +#else +#define DBG(fmt...) +#endif + +struct lmb lmb; + void lmb_dump_all(void) { #ifdef DEBUG unsigned long i; - udbg_printf("lmb_dump_all:\n"); - udbg_printf(" memory.cnt = 0x%lx\n", - lmb.memory.cnt); - udbg_printf(" memory.size = 0x%lx\n", - lmb.memory.size); + DBG("lmb_dump_all:\n"); + DBG(" memory.cnt = 0x%lx\n", lmb.memory.cnt); + DBG(" memory.size = 0x%lx\n", lmb.memory.size); for (i=0; i < lmb.memory.cnt ;i++) { - udbg_printf(" memory.region[0x%x].base = 0x%lx\n", + DBG(" memory.region[0x%x].base = 0x%lx\n", i, lmb.memory.region[i].base); - udbg_printf(" .size = 0x%lx\n", + DBG(" .size = 0x%lx\n", lmb.memory.region[i].size); } - udbg_printf("\n reserved.cnt = 0x%lx\n", - lmb.reserved.cnt); - udbg_printf(" reserved.size = 0x%lx\n", - lmb.reserved.size); + DBG("\n reserved.cnt = 0x%lx\n", lmb.reserved.cnt); + DBG(" reserved.size = 0x%lx\n", lmb.reserved.size); for (i=0; i < lmb.reserved.cnt ;i++) { - udbg_printf(" reserved.region[0x%x].base = 0x%lx\n", + DBG(" reserved.region[0x%x].base = 0x%lx\n", i, lmb.reserved.region[i].base); - udbg_printf(" .size = 0x%lx\n", + DBG(" .size = 0x%lx\n", lmb.reserved.region[i].size); } #endif /* DEBUG */ diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index e2c95fcb8055..15aac0d78dfa 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -114,19 +114,18 @@ void online_page(struct page *page) num_physpages++; } -/* - * This works only for the non-NUMA case. Later, we'll need a lookup - * to convert from real physical addresses to nid, that doesn't use - * pfn_to_nid(). - */ int __devinit add_memory(u64 start, u64 size) { - struct pglist_data *pgdata = NODE_DATA(0); + struct pglist_data *pgdata; struct zone *zone; + int nid; unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; - start += KERNELBASE; + nid = hot_add_scn_to_nid(start); + pgdata = NODE_DATA(nid); + + start = __va(start); create_section_mapping(start, start + size); /* this should work for most non-highmem platforms */ @@ -200,6 +199,8 @@ void show_mem(void) unsigned long flags; pgdat_resize_lock(pgdat, &flags); for (i = 0; i < pgdat->node_spanned_pages; i++) { + if (!pfn_valid(pgdat->node_start_pfn + i)) + continue; page = pgdat_page_nr(pgdat, i); total++; if (PageHighMem(page)) @@ -336,7 +337,7 @@ void __init mem_init(void) struct page *page; unsigned long reservedpages = 0, codesize, initsize, datasize, bsssize; - num_physpages = max_pfn; /* RAM is assumed contiguous */ + num_physpages = lmb.memory.size >> PAGE_SHIFT; high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); #ifdef CONFIG_NEED_MULTIPLE_NODES @@ -348,11 +349,13 @@ void __init mem_init(void) } } #else - max_mapnr = num_physpages; + max_mapnr = max_pfn; totalram_pages += free_all_bootmem(); #endif for_each_pgdat(pgdat) { for (i = 0; i < pgdat->node_spanned_pages; i++) { + if (!pfn_valid(pgdat->node_start_pfn + i)) + continue; page = pgdat_page_nr(pgdat, i); if (PageReserved(page)) reservedpages++; @@ -491,7 +494,7 @@ EXPORT_SYMBOL(flush_icache_user_range); * We use it to preload an HPTE into the hash table corresponding to * the updated linux PTE. * - * This must always be called with the mm->page_table_lock held + * This must always be called with the pte lock held. */ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t pte) diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index a4d7a327c0e5..bea2d21ac6f7 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -33,7 +33,6 @@ extern void invalidate_tlbcam_entry(int index); extern int __map_without_bats; extern unsigned long ioremap_base; -extern unsigned long ioremap_bot; extern unsigned int rtas_data, rtas_size; extern PTE *Hash, *Hash_end; @@ -42,6 +41,7 @@ extern unsigned long Hash_size, Hash_mask; extern unsigned int num_tlbcam_entries; #endif +extern unsigned long ioremap_bot; extern unsigned long __max_low_memory; extern unsigned long __initial_memory_limit; extern unsigned long total_memory; @@ -84,4 +84,16 @@ static inline void flush_HPTE(unsigned context, unsigned long va, else _tlbie(va); } +#else /* CONFIG_PPC64 */ +/* imalloc region types */ +#define IM_REGION_UNUSED 0x1 +#define IM_REGION_SUBSET 0x2 +#define IM_REGION_EXISTS 0x4 +#define IM_REGION_OVERLAP 0x8 +#define IM_REGION_SUPERSET 0x10 + +extern struct vm_struct * im_get_free_area(unsigned long size); +extern struct vm_struct * im_get_area(unsigned long v_addr, unsigned long size, + int region_type); +extern void im_free(void *addr); #endif diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index bd2cf1336885..2863a912bcd0 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -37,6 +37,7 @@ EXPORT_SYMBOL(node_data); static bootmem_data_t __initdata plat_node_bdata[MAX_NUMNODES]; static int min_common_depth; +static int n_mem_addr_cells, n_mem_size_cells; /* * We need somewhere to store start/end/node for each region until we have @@ -125,7 +126,7 @@ void __init get_region(unsigned int nid, unsigned long *start_pfn, /* We didnt find a matching region, return start/end as 0 */ if (*start_pfn == -1UL) - start_pfn = 0; + *start_pfn = 0; } static inline void map_cpu_to_node(int cpu, int node) @@ -254,32 +255,20 @@ static int __init find_min_common_depth(void) return depth; } -static int __init get_mem_addr_cells(void) +static void __init get_n_mem_cells(int *n_addr_cells, int *n_size_cells) { struct device_node *memory = NULL; - int rc; memory = of_find_node_by_type(memory, "memory"); if (!memory) - return 0; /* it won't matter */ + panic("numa.c: No memory nodes found!"); - rc = prom_n_addr_cells(memory); - return rc; + *n_addr_cells = prom_n_addr_cells(memory); + *n_size_cells = prom_n_size_cells(memory); + of_node_put(memory); } -static int __init get_mem_size_cells(void) -{ - struct device_node *memory = NULL; - int rc; - - memory = of_find_node_by_type(memory, "memory"); - if (!memory) - return 0; /* it won't matter */ - rc = prom_n_size_cells(memory); - return rc; -} - -static unsigned long __init read_n_cells(int n, unsigned int **buf) +static unsigned long __devinit read_n_cells(int n, unsigned int **buf) { unsigned long result = 0; @@ -386,7 +375,6 @@ static int __init parse_numa_properties(void) { struct device_node *cpu = NULL; struct device_node *memory = NULL; - int addr_cells, size_cells; int max_domain; unsigned long i; @@ -425,8 +413,7 @@ static int __init parse_numa_properties(void) } } - addr_cells = get_mem_addr_cells(); - size_cells = get_mem_size_cells(); + get_n_mem_cells(&n_mem_addr_cells, &n_mem_size_cells); memory = NULL; while ((memory = of_find_node_by_type(memory, "memory")) != NULL) { unsigned long start; @@ -436,15 +423,21 @@ static int __init parse_numa_properties(void) unsigned int *memcell_buf; unsigned int len; - memcell_buf = (unsigned int *)get_property(memory, "reg", &len); + memcell_buf = (unsigned int *)get_property(memory, + "linux,usable-memory", &len); + if (!memcell_buf || len <= 0) + memcell_buf = + (unsigned int *)get_property(memory, "reg", + &len); if (!memcell_buf || len <= 0) continue; - ranges = memory->n_addrs; + /* ranges in cell */ + ranges = (len >> 2) / (n_mem_addr_cells + n_mem_size_cells); new_range: /* these are order-sensitive, and modify the buffer pointer */ - start = read_n_cells(addr_cells, &memcell_buf); - size = read_n_cells(size_cells, &memcell_buf); + start = read_n_cells(n_mem_addr_cells, &memcell_buf); + size = read_n_cells(n_mem_size_cells, &memcell_buf); numa_domain = of_node_numa_domain(memory); @@ -483,6 +476,7 @@ static void __init setup_nonnuma(void) { unsigned long top_of_ram = lmb_end_of_DRAM(); unsigned long total_ram = lmb_phys_mem_size(); + unsigned int i; printk(KERN_INFO "Top of RAM: 0x%lx, Total RAM: 0x%lx\n", top_of_ram, total_ram); @@ -490,11 +484,47 @@ static void __init setup_nonnuma(void) (top_of_ram - total_ram) >> 20); map_cpu_to_node(boot_cpuid, 0); - add_region(0, 0, lmb_end_of_DRAM() >> PAGE_SHIFT); + for (i = 0; i < lmb.memory.cnt; ++i) + add_region(0, lmb.memory.region[i].base >> PAGE_SHIFT, + lmb_size_pages(&lmb.memory, i)); node_set_online(0); } -static void __init dump_numa_topology(void) +void __init dump_numa_cpu_topology(void) +{ + unsigned int node; + unsigned int cpu, count; + + if (min_common_depth == -1 || !numa_enabled) + return; + + for_each_online_node(node) { + printk(KERN_INFO "Node %d CPUs:", node); + + count = 0; + /* + * If we used a CPU iterator here we would miss printing + * the holes in the cpumap. + */ + for (cpu = 0; cpu < NR_CPUS; cpu++) { + if (cpu_isset(cpu, numa_cpumask_lookup_table[node])) { + if (count == 0) + printk(" %u", cpu); + ++count; + } else { + if (count > 1) + printk("-%u", cpu - 1); + count = 0; + } + } + + if (count > 1) + printk("-%u", NR_CPUS - 1); + printk("\n"); + } +} + +static void __init dump_numa_memory_topology(void) { unsigned int node; unsigned int count; @@ -526,7 +556,6 @@ static void __init dump_numa_topology(void) printk("-0x%lx", i); printk("\n"); } - return; } /* @@ -588,7 +617,7 @@ void __init do_init_bootmem(void) if (parse_numa_properties()) setup_nonnuma(); else - dump_numa_topology(); + dump_numa_memory_topology(); register_cpu_notifier(&ppc64_numa_nb); @@ -727,3 +756,60 @@ static int __init early_numa(char *p) return 0; } early_param("numa", early_numa); + +#ifdef CONFIG_MEMORY_HOTPLUG +/* + * Find the node associated with a hot added memory section. Section + * corresponds to a SPARSEMEM section, not an LMB. It is assumed that + * sections are fully contained within a single LMB. + */ +int hot_add_scn_to_nid(unsigned long scn_addr) +{ + struct device_node *memory = NULL; + nodemask_t nodes; + int numa_domain = 0; + + if (!numa_enabled || (min_common_depth < 0)) + return numa_domain; + + while ((memory = of_find_node_by_type(memory, "memory")) != NULL) { + unsigned long start, size; + int ranges; + unsigned int *memcell_buf; + unsigned int len; + + memcell_buf = (unsigned int *)get_property(memory, "reg", &len); + if (!memcell_buf || len <= 0) + continue; + + /* ranges in cell */ + ranges = (len >> 2) / (n_mem_addr_cells + n_mem_size_cells); +ha_new_range: + start = read_n_cells(n_mem_addr_cells, &memcell_buf); + size = read_n_cells(n_mem_size_cells, &memcell_buf); + numa_domain = of_node_numa_domain(memory); + + /* Domains not present at boot default to 0 */ + if (!node_online(numa_domain)) + numa_domain = any_online_node(NODE_MASK_ALL); + + if ((scn_addr >= start) && (scn_addr < (start + size))) { + of_node_put(memory); + goto got_numa_domain; + } + + if (--ranges) /* process all ranges in cell */ + goto ha_new_range; + } + BUG(); /* section address should be found above */ + + /* Temporary code to ensure that returned node is not empty */ +got_numa_domain: + nodes_setall(nodes); + while (NODE_DATA(numa_domain)->node_spanned_pages == 0) { + node_clear(numa_domain, nodes); + numa_domain = any_online_node(nodes); + } + return numa_domain; +} +#endif /* CONFIG_MEMORY_HOTPLUG */ diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index c7f7bb6f30b3..7b278d83739e 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -64,7 +64,8 @@ #include <asm/iommu.h> #include <asm/abs_addr.h> #include <asm/vdso.h> -#include <asm/imalloc.h> + +#include "mmu_decl.h" unsigned long ioremap_bot = IMALLOC_BASE; static unsigned long phbs_io_bot = PHBS_IO_BASE; @@ -173,7 +174,7 @@ void __iomem * __ioremap(unsigned long addr, unsigned long size, pa = addr & PAGE_MASK; size = PAGE_ALIGN(addr + size) - pa; - if (size == 0) + if ((size == 0) || (pa == 0)) return NULL; if (mem_init_done) { diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c index 60e852f2f8e5..ffc8ed4de62d 100644 --- a/arch/powerpc/mm/slb.c +++ b/arch/powerpc/mm/slb.c @@ -75,7 +75,7 @@ static void slb_flush_and_rebolt(void) vflags = SLB_VSID_KERNEL | virtual_llp; ksp_esid_data = mk_esid_data(get_paca()->kstack, 2); - if ((ksp_esid_data & ESID_MASK) == KERNELBASE) + if ((ksp_esid_data & ESID_MASK) == PAGE_OFFSET) ksp_esid_data &= ~SLB_ESID_V; /* We need to do this all in asm, so we're sure we don't touch @@ -87,8 +87,8 @@ static void slb_flush_and_rebolt(void) /* Slot 2 - kernel stack */ "slbmte %2,%3\n" "isync" - :: "r"(mk_vsid_data(VMALLOCBASE, vflags)), - "r"(mk_esid_data(VMALLOCBASE, 1)), + :: "r"(mk_vsid_data(VMALLOC_START, vflags)), + "r"(mk_esid_data(VMALLOC_START, 1)), "r"(mk_vsid_data(ksp_esid_data, lflags)), "r"(ksp_esid_data) : "memory"); @@ -134,14 +134,14 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm) else unmapped_base = TASK_UNMAPPED_BASE_USER64; - if (pc >= KERNELBASE) + if (is_kernel_addr(pc)) return; slb_allocate(pc); if (GET_ESID(pc) == GET_ESID(stack)) return; - if (stack >= KERNELBASE) + if (is_kernel_addr(stack)) return; slb_allocate(stack); @@ -149,7 +149,7 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm) || (GET_ESID(stack) == GET_ESID(unmapped_base))) return; - if (unmapped_base >= KERNELBASE) + if (is_kernel_addr(unmapped_base)) return; slb_allocate(unmapped_base); } @@ -213,10 +213,10 @@ void slb_initialize(void) asm volatile("isync":::"memory"); asm volatile("slbmte %0,%0"::"r" (0) : "memory"); asm volatile("isync; slbia; isync":::"memory"); - create_slbe(KERNELBASE, lflags, 0); + create_slbe(PAGE_OFFSET, lflags, 0); /* VMALLOC space has 4K pages always for now */ - create_slbe(VMALLOCBASE, vflags, 1); + create_slbe(VMALLOC_START, vflags, 1); /* We don't bolt the stack for the time being - we're in boot, * so the stack is in the bolted segment. By the time it goes diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S index 950ffc5848c7..d1acee38f163 100644 --- a/arch/powerpc/mm/slb_low.S +++ b/arch/powerpc/mm/slb_low.S @@ -37,9 +37,9 @@ _GLOBAL(slb_allocate_realmode) srdi r9,r3,60 /* get region */ srdi r10,r3,28 /* get esid */ - cmpldi cr7,r9,0xc /* cmp KERNELBASE for later use */ + cmpldi cr7,r9,0xc /* cmp PAGE_OFFSET for later use */ - /* r3 = address, r10 = esid, cr7 = <>KERNELBASE */ + /* r3 = address, r10 = esid, cr7 = <> PAGE_OFFSET */ blt cr7,0f /* user or kernel? */ /* kernel address: proto-VSID = ESID */ @@ -166,7 +166,7 @@ _GLOBAL(slb_allocate_user) /* * Finish loading of an SLB entry and return * - * r3 = EA, r10 = proto-VSID, r11 = flags, clobbers r9, cr7 = <>KERNELBASE + * r3 = EA, r10 = proto-VSID, r11 = flags, clobbers r9, cr7 = <> PAGE_OFFSET */ slb_finish_load: ASM_VSID_SCRAMBLE(r10,r9) diff --git a/arch/powerpc/mm/stab.c b/arch/powerpc/mm/stab.c index cfbb4e1f966b..82e4951826bc 100644 --- a/arch/powerpc/mm/stab.c +++ b/arch/powerpc/mm/stab.c @@ -40,7 +40,7 @@ static int make_ste(unsigned long stab, unsigned long esid, unsigned long vsid) unsigned long entry, group, old_esid, castout_entry, i; unsigned int global_entry; struct stab_entry *ste, *castout_ste; - unsigned long kernel_segment = (esid << SID_SHIFT) >= KERNELBASE; + unsigned long kernel_segment = (esid << SID_SHIFT) >= PAGE_OFFSET; vsid_data = vsid << STE_VSID_SHIFT; esid_data = esid << SID_SHIFT | STE_ESID_KP | STE_ESID_V; @@ -83,7 +83,7 @@ static int make_ste(unsigned long stab, unsigned long esid, unsigned long vsid) } /* Dont cast out the first kernel segment */ - if ((castout_ste->esid_data & ESID_MASK) != KERNELBASE) + if ((castout_ste->esid_data & ESID_MASK) != PAGE_OFFSET) break; castout_entry = (castout_entry + 1) & 0xf; @@ -122,7 +122,7 @@ static int __ste_allocate(unsigned long ea, struct mm_struct *mm) unsigned long offset; /* Kernel or user address? */ - if (ea >= KERNELBASE) { + if (is_kernel_addr(ea)) { vsid = get_kernel_vsid(ea); } else { if ((ea >= TASK_SIZE_USER64) || (! mm)) @@ -133,7 +133,7 @@ static int __ste_allocate(unsigned long ea, struct mm_struct *mm) stab_entry = make_ste(get_paca()->stab_addr, GET_ESID(ea), vsid); - if (ea < KERNELBASE) { + if (!is_kernel_addr(ea)) { offset = __get_cpu_var(stab_cache_ptr); if (offset < NR_STAB_CACHE_ENTRIES) __get_cpu_var(stab_cache[offset++]) = stab_entry; @@ -190,7 +190,7 @@ void switch_stab(struct task_struct *tsk, struct mm_struct *mm) entry++, ste++) { unsigned long ea; ea = ste->esid_data & ESID_MASK; - if (ea < KERNELBASE) { + if (!is_kernel_addr(ea)) { ste->esid_data = 0; } } @@ -251,7 +251,7 @@ void stabs_alloc(void) panic("Unable to allocate segment table for CPU %d.\n", cpu); - newstab += KERNELBASE; + newstab = (unsigned long)__va(newstab); memset((void *)newstab, 0, HW_PAGE_SIZE); @@ -270,11 +270,11 @@ void stabs_alloc(void) */ void stab_initialize(unsigned long stab) { - unsigned long vsid = get_kernel_vsid(KERNELBASE); + unsigned long vsid = get_kernel_vsid(PAGE_OFFSET); unsigned long stabreal; asm volatile("isync; slbia; isync":::"memory"); - make_ste(stab, GET_ESID(KERNELBASE), vsid); + make_ste(stab, GET_ESID(PAGE_OFFSET), vsid); /* Order update */ asm volatile("sync":::"memory"); @@ -288,11 +288,6 @@ void stab_initialize(unsigned long stab) return; } #endif /* CONFIG_PPC_ISERIES */ -#ifdef CONFIG_PPC_PSERIES - if (platform_is_lpar()) { - plpar_hcall_norets(H_SET_ASR, stabreal); - return; - } -#endif + mtspr(SPRN_ASR, stabreal); } diff --git a/arch/powerpc/mm/tlb_32.c b/arch/powerpc/mm/tlb_32.c index 6c3dc3c44c86..ad580f3742e5 100644 --- a/arch/powerpc/mm/tlb_32.c +++ b/arch/powerpc/mm/tlb_32.c @@ -149,6 +149,12 @@ void flush_tlb_mm(struct mm_struct *mm) return; } + /* + * It is safe to go down the mm's list of vmas when called + * from dup_mmap, holding mmap_sem. It would also be safe from + * unmap_region or exit_mmap, but not from vmtruncate on SMP - + * but it seems dup_mmap is the only SMP case which gets here. + */ for (mp = mm->mmap; mp != NULL; mp = mp->vm_next) flush_range(mp->vm_mm, mp->vm_start, mp->vm_end); FINISH_FLUSH; diff --git a/arch/powerpc/mm/tlb_64.c b/arch/powerpc/mm/tlb_64.c index 53e31b834ace..bb3afb6e6317 100644 --- a/arch/powerpc/mm/tlb_64.c +++ b/arch/powerpc/mm/tlb_64.c @@ -95,7 +95,7 @@ static void pte_free_submit(struct pte_freelist_batch *batch) void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf) { - /* This is safe as we are holding page_table_lock */ + /* This is safe since tlb_gather_mmu has disabled preemption */ cpumask_t local_cpumask = cpumask_of_cpu(smp_processor_id()); struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur); @@ -168,7 +168,7 @@ void hpte_update(struct mm_struct *mm, unsigned long addr, batch->mm = mm; batch->psize = psize; } - if (addr < KERNELBASE) { + if (!is_kernel_addr(addr)) { vsid = get_vsid(mm->context.id, addr); WARN_ON(vsid == 0); } else @@ -206,7 +206,7 @@ void __flush_tlb_pending(struct ppc64_tlb_batch *batch) void pte_free_finish(void) { - /* This is safe as we are holding page_table_lock */ + /* This is safe since tlb_gather_mmu has disabled preemption */ struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur); if (*batchp == NULL) |