diff options
author | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-05-09 23:56:01 +0400 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-05-09 23:56:01 +0400 |
commit | aabded9c3aab5160ae2ca3dd1fa0fa37f3d510e4 (patch) | |
tree | 8544d546735bcb975b8dec296eb9b6dc6531fb2a /arch/powerpc/mm | |
parent | 9a9136e270af14da506f66bcafcc506b86a86498 (diff) | |
parent | f1a1eb299a8422c3e8d41753095bec44b2493398 (diff) | |
download | linux-aabded9c3aab5160ae2ca3dd1fa0fa37f3d510e4.tar.xz |
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/paulus/powerpc
* 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/paulus/powerpc:
[POWERPC] Further fixes for the removal of 4level-fixup hack from ppc32
[POWERPC] EEH: log all PCI-X and PCI-E AER registers
[POWERPC] EEH: capture and log pci state on error
[POWERPC] EEH: Split up long error msg
[POWERPC] EEH: log error only after driver notification.
[POWERPC] fsl_soc: Make mac_addr const in fs_enet_of_init().
[POWERPC] Don't use SLAB/SLUB for PTE pages
[POWERPC] Spufs support for 64K LS mappings on 4K kernels
[POWERPC] Add ability to 4K kernel to hash in 64K pages
[POWERPC] Introduce address space "slices"
[POWERPC] Small fixes & cleanups in segment page size demotion
[POWERPC] iSeries: Make HVC_ISERIES the default
[POWERPC] iSeries: suppress build warning in lparmap.c
[POWERPC] Mark pages that don't exist as nosave
[POWERPC] swsusp: Introduce register_nosave_region_late
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r-- | arch/powerpc/mm/Makefile | 1 | ||||
-rw-r--r-- | arch/powerpc/mm/hash_low_64.S | 5 | ||||
-rw-r--r-- | arch/powerpc/mm/hash_utils_64.c | 142 | ||||
-rw-r--r-- | arch/powerpc/mm/hugetlbpage.c | 548 | ||||
-rw-r--r-- | arch/powerpc/mm/init_64.c | 17 | ||||
-rw-r--r-- | arch/powerpc/mm/mem.c | 25 | ||||
-rw-r--r-- | arch/powerpc/mm/mmu_context_64.c | 10 | ||||
-rw-r--r-- | arch/powerpc/mm/ppc_mmu_32.c | 2 | ||||
-rw-r--r-- | arch/powerpc/mm/slb.c | 11 | ||||
-rw-r--r-- | arch/powerpc/mm/slb_low.S | 52 | ||||
-rw-r--r-- | arch/powerpc/mm/slice.c | 633 | ||||
-rw-r--r-- | arch/powerpc/mm/tlb_32.c | 4 | ||||
-rw-r--r-- | arch/powerpc/mm/tlb_64.c | 12 |
13 files changed, 817 insertions, 645 deletions
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index 38a81967ca07..4f839c6a9768 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -18,4 +18,5 @@ obj-$(CONFIG_40x) += 4xx_mmu.o obj-$(CONFIG_44x) += 44x_mmu.o obj-$(CONFIG_FSL_BOOKE) += fsl_booke_mmu.o obj-$(CONFIG_NEED_MULTIPLE_NODES) += numa.o +obj-$(CONFIG_PPC_MM_SLICES) += slice.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S index e64ce3eec36e..4762ff7c14df 100644 --- a/arch/powerpc/mm/hash_low_64.S +++ b/arch/powerpc/mm/hash_low_64.S @@ -615,6 +615,9 @@ htab_pte_insert_failure: li r3,-1 b htab_bail +#endif /* CONFIG_PPC_64K_PAGES */ + +#ifdef CONFIG_PPC_HAS_HASH_64K /***************************************************************************** * * @@ -870,7 +873,7 @@ ht64_pte_insert_failure: b ht64_bail -#endif /* CONFIG_PPC_64K_PAGES */ +#endif /* CONFIG_PPC_HAS_HASH_64K */ /***************************************************************************** diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 9b226fa7006f..028ba4ed03d2 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -51,6 +51,7 @@ #include <asm/cputable.h> #include <asm/abs_addr.h> #include <asm/sections.h> +#include <asm/spu.h> #ifdef DEBUG #define DBG(fmt...) udbg_printf(fmt) @@ -419,7 +420,7 @@ static void __init htab_finish_init(void) extern unsigned int *htab_call_hpte_remove; extern unsigned int *htab_call_hpte_updatepp; -#ifdef CONFIG_PPC_64K_PAGES +#ifdef CONFIG_PPC_HAS_HASH_64K extern unsigned int *ht64_call_hpte_insert1; extern unsigned int *ht64_call_hpte_insert2; extern unsigned int *ht64_call_hpte_remove; @@ -596,22 +597,23 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap) * Demote a segment to using 4k pages. * For now this makes the whole process use 4k pages. */ -void demote_segment_4k(struct mm_struct *mm, unsigned long addr) -{ #ifdef CONFIG_PPC_64K_PAGES +static void demote_segment_4k(struct mm_struct *mm, unsigned long addr) +{ if (mm->context.user_psize == MMU_PAGE_4K) return; +#ifdef CONFIG_PPC_MM_SLICES + slice_set_user_psize(mm, MMU_PAGE_4K); +#else /* CONFIG_PPC_MM_SLICES */ mm->context.user_psize = MMU_PAGE_4K; mm->context.sllp = SLB_VSID_USER | mmu_psize_defs[MMU_PAGE_4K].sllp; - get_paca()->context = mm->context; - slb_flush_and_rebolt(); +#endif /* CONFIG_PPC_MM_SLICES */ + #ifdef CONFIG_SPE_BASE spu_flush_all_slbs(mm); #endif -#endif } - -EXPORT_SYMBOL_GPL(demote_segment_4k); +#endif /* CONFIG_PPC_64K_PAGES */ /* Result code is: * 0 - handled @@ -646,7 +648,11 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) return 1; } vsid = get_vsid(mm->context.id, ea); +#ifdef CONFIG_PPC_MM_SLICES + psize = get_slice_psize(mm, ea); +#else psize = mm->context.user_psize; +#endif break; case VMALLOC_REGION_ID: mm = &init_mm; @@ -674,11 +680,22 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) if (user_region && cpus_equal(mm->cpu_vm_mask, tmp)) local = 1; +#ifdef CONFIG_HUGETLB_PAGE /* Handle hugepage regions */ - if (unlikely(in_hugepage_area(mm->context, ea))) { + if (HPAGE_SHIFT && psize == mmu_huge_psize) { DBG_LOW(" -> huge page !\n"); return hash_huge_page(mm, access, ea, vsid, local, trap); } +#endif /* CONFIG_HUGETLB_PAGE */ + +#ifndef CONFIG_PPC_64K_PAGES + /* If we use 4K pages and our psize is not 4K, then we are hitting + * a special driver mapping, we need to align the address before + * we fetch the PTE + */ + if (psize != MMU_PAGE_4K) + ea &= ~((1ul << mmu_psize_defs[psize].shift) - 1); +#endif /* CONFIG_PPC_64K_PAGES */ /* Get PTE and page size from page tables */ ptep = find_linux_pte(pgdir, ea); @@ -702,54 +719,56 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) } /* Do actual hashing */ -#ifndef CONFIG_PPC_64K_PAGES - rc = __hash_page_4K(ea, access, vsid, ptep, trap, local); -#else +#ifdef CONFIG_PPC_64K_PAGES /* If _PAGE_4K_PFN is set, make sure this is a 4k segment */ if (pte_val(*ptep) & _PAGE_4K_PFN) { demote_segment_4k(mm, ea); psize = MMU_PAGE_4K; } - if (mmu_ci_restrictions) { - /* If this PTE is non-cacheable, switch to 4k */ - if (psize == MMU_PAGE_64K && - (pte_val(*ptep) & _PAGE_NO_CACHE)) { - if (user_region) { - demote_segment_4k(mm, ea); - psize = MMU_PAGE_4K; - } else if (ea < VMALLOC_END) { - /* - * some driver did a non-cacheable mapping - * in vmalloc space, so switch vmalloc - * to 4k pages - */ - printk(KERN_ALERT "Reducing vmalloc segment " - "to 4kB pages because of " - "non-cacheable mapping\n"); - psize = mmu_vmalloc_psize = MMU_PAGE_4K; - } + /* If this PTE is non-cacheable and we have restrictions on + * using non cacheable large pages, then we switch to 4k + */ + if (mmu_ci_restrictions && psize == MMU_PAGE_64K && + (pte_val(*ptep) & _PAGE_NO_CACHE)) { + if (user_region) { + demote_segment_4k(mm, ea); + psize = MMU_PAGE_4K; + } else if (ea < VMALLOC_END) { + /* + * some driver did a non-cacheable mapping + * in vmalloc space, so switch vmalloc + * to 4k pages + */ + printk(KERN_ALERT "Reducing vmalloc segment " + "to 4kB pages because of " + "non-cacheable mapping\n"); + psize = mmu_vmalloc_psize = MMU_PAGE_4K; #ifdef CONFIG_SPE_BASE spu_flush_all_slbs(mm); #endif } - if (user_region) { - if (psize != get_paca()->context.user_psize) { - get_paca()->context = mm->context; - slb_flush_and_rebolt(); - } - } else if (get_paca()->vmalloc_sllp != - mmu_psize_defs[mmu_vmalloc_psize].sllp) { - get_paca()->vmalloc_sllp = - mmu_psize_defs[mmu_vmalloc_psize].sllp; + } + if (user_region) { + if (psize != get_paca()->context.user_psize) { + get_paca()->context.user_psize = + mm->context.user_psize; slb_flush_and_rebolt(); } + } else if (get_paca()->vmalloc_sllp != + mmu_psize_defs[mmu_vmalloc_psize].sllp) { + get_paca()->vmalloc_sllp = + mmu_psize_defs[mmu_vmalloc_psize].sllp; + slb_flush_and_rebolt(); } +#endif /* CONFIG_PPC_64K_PAGES */ + +#ifdef CONFIG_PPC_HAS_HASH_64K if (psize == MMU_PAGE_64K) rc = __hash_page_64K(ea, access, vsid, ptep, trap, local); else +#endif /* CONFIG_PPC_HAS_HASH_64K */ rc = __hash_page_4K(ea, access, vsid, ptep, trap, local); -#endif /* CONFIG_PPC_64K_PAGES */ #ifndef CONFIG_PPC_64K_PAGES DBG_LOW(" o-pte: %016lx\n", pte_val(*ptep)); @@ -772,42 +791,55 @@ void hash_preload(struct mm_struct *mm, unsigned long ea, unsigned long flags; int local = 0; - /* We don't want huge pages prefaulted for now - */ - if (unlikely(in_hugepage_area(mm->context, ea))) + BUG_ON(REGION_ID(ea) != USER_REGION_ID); + +#ifdef CONFIG_PPC_MM_SLICES + /* We only prefault standard pages for now */ + if (unlikely(get_slice_psize(mm, ea) != mm->context.user_psize)); return; +#endif DBG_LOW("hash_preload(mm=%p, mm->pgdir=%p, ea=%016lx, access=%lx," " trap=%lx\n", mm, mm->pgd, ea, access, trap); - /* Get PTE, VSID, access mask */ + /* Get Linux PTE if available */ pgdir = mm->pgd; if (pgdir == NULL) return; ptep = find_linux_pte(pgdir, ea); if (!ptep) return; + +#ifdef CONFIG_PPC_64K_PAGES + /* If either _PAGE_4K_PFN or _PAGE_NO_CACHE is set (and we are on + * a 64K kernel), then we don't preload, hash_page() will take + * care of it once we actually try to access the page. + * That way we don't have to duplicate all of the logic for segment + * page size demotion here + */ + if (pte_val(*ptep) & (_PAGE_4K_PFN | _PAGE_NO_CACHE)) + return; +#endif /* CONFIG_PPC_64K_PAGES */ + + /* Get VSID */ vsid = get_vsid(mm->context.id, ea); - /* Hash it in */ + /* Hash doesn't like irqs */ local_irq_save(flags); + + /* Is that local to this CPU ? */ mask = cpumask_of_cpu(smp_processor_id()); if (cpus_equal(mm->cpu_vm_mask, mask)) local = 1; -#ifndef CONFIG_PPC_64K_PAGES - __hash_page_4K(ea, access, vsid, ptep, trap, local); -#else - if (mmu_ci_restrictions) { - /* If this PTE is non-cacheable, switch to 4k */ - if (mm->context.user_psize == MMU_PAGE_64K && - (pte_val(*ptep) & _PAGE_NO_CACHE)) - demote_segment_4k(mm, ea); - } + + /* Hash it in */ +#ifdef CONFIG_PPC_HAS_HASH_64K if (mm->context.user_psize == MMU_PAGE_64K) __hash_page_64K(ea, access, vsid, ptep, trap, local); else - __hash_page_4K(ea, access, vsid, ptep, trap, local); #endif /* CONFIG_PPC_64K_PAGES */ + __hash_page_4K(ea, access, vsid, ptep, trap, local); + local_irq_restore(flags); } diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index fb959264c104..92a1b16fb7e3 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -91,7 +91,7 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) pgd_t *pg; pud_t *pu; - BUG_ON(! in_hugepage_area(mm->context, addr)); + BUG_ON(get_slice_psize(mm, addr) != mmu_huge_psize); addr &= HPAGE_MASK; @@ -119,7 +119,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) pud_t *pu; hugepd_t *hpdp = NULL; - BUG_ON(! in_hugepage_area(mm->context, addr)); + BUG_ON(get_slice_psize(mm, addr) != mmu_huge_psize); addr &= HPAGE_MASK; @@ -302,7 +302,7 @@ void hugetlb_free_pgd_range(struct mmu_gather **tlb, start = addr; pgd = pgd_offset((*tlb)->mm, addr); do { - BUG_ON(! in_hugepage_area((*tlb)->mm->context, addr)); + BUG_ON(get_slice_psize((*tlb)->mm, addr) != mmu_huge_psize); next = pgd_addr_end(addr, end); if (pgd_none_or_clear_bad(pgd)) continue; @@ -331,203 +331,13 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, return __pte(old); } -struct slb_flush_info { - struct mm_struct *mm; - u16 newareas; -}; - -static void flush_low_segments(void *parm) -{ - struct slb_flush_info *fi = parm; - unsigned long i; - - BUILD_BUG_ON((sizeof(fi->newareas)*8) != NUM_LOW_AREAS); - - if (current->active_mm != fi->mm) - return; - - /* Only need to do anything if this CPU is working in the same - * mm as the one which has changed */ - - /* update the paca copy of the context struct */ - get_paca()->context = current->active_mm->context; - - asm volatile("isync" : : : "memory"); - for (i = 0; i < NUM_LOW_AREAS; i++) { - if (! (fi->newareas & (1U << i))) - continue; - asm volatile("slbie %0" - : : "r" ((i << SID_SHIFT) | SLBIE_C)); - } - asm volatile("isync" : : : "memory"); -} - -static void flush_high_segments(void *parm) -{ - struct slb_flush_info *fi = parm; - unsigned long i, j; - - - BUILD_BUG_ON((sizeof(fi->newareas)*8) != NUM_HIGH_AREAS); - - if (current->active_mm != fi->mm) - return; - - /* Only need to do anything if this CPU is working in the same - * mm as the one which has changed */ - - /* update the paca copy of the context struct */ - get_paca()->context = current->active_mm->context; - - asm volatile("isync" : : : "memory"); - for (i = 0; i < NUM_HIGH_AREAS; i++) { - if (! (fi->newareas & (1U << i))) - continue; - for (j = 0; j < (1UL << (HTLB_AREA_SHIFT-SID_SHIFT)); j++) - asm volatile("slbie %0" - :: "r" (((i << HTLB_AREA_SHIFT) - + (j << SID_SHIFT)) | SLBIE_C)); - } - asm volatile("isync" : : : "memory"); -} - -static int prepare_low_area_for_htlb(struct mm_struct *mm, unsigned long area) -{ - unsigned long start = area << SID_SHIFT; - unsigned long end = (area+1) << SID_SHIFT; - struct vm_area_struct *vma; - - BUG_ON(area >= NUM_LOW_AREAS); - - /* Check no VMAs are in the region */ - vma = find_vma(mm, start); - if (vma && (vma->vm_start < end)) - return -EBUSY; - - return 0; -} - -static int prepare_high_area_for_htlb(struct mm_struct *mm, unsigned long area) -{ - unsigned long start = area << HTLB_AREA_SHIFT; - unsigned long end = (area+1) << HTLB_AREA_SHIFT; - struct vm_area_struct *vma; - - BUG_ON(area >= NUM_HIGH_AREAS); - - /* Hack, so that each addresses is controlled by exactly one - * of the high or low area bitmaps, the first high area starts - * at 4GB, not 0 */ - if (start == 0) - start = 0x100000000UL; - - /* Check no VMAs are in the region */ - vma = find_vma(mm, start); - if (vma && (vma->vm_start < end)) - return -EBUSY; - - return 0; -} - -static int open_low_hpage_areas(struct mm_struct *mm, u16 newareas) -{ - unsigned long i; - struct slb_flush_info fi; - - BUILD_BUG_ON((sizeof(newareas)*8) != NUM_LOW_AREAS); - BUILD_BUG_ON((sizeof(mm->context.low_htlb_areas)*8) != NUM_LOW_AREAS); - - newareas &= ~(mm->context.low_htlb_areas); - if (! newareas) - return 0; /* The segments we want are already open */ - - for (i = 0; i < NUM_LOW_AREAS; i++) - if ((1 << i) & newareas) - if (prepare_low_area_for_htlb(mm, i) != 0) - return -EBUSY; - - mm->context.low_htlb_areas |= newareas; - - /* the context change must make it to memory before the flush, - * so that further SLB misses do the right thing. */ - mb(); - - fi.mm = mm; - fi.newareas = newareas; - on_each_cpu(flush_low_segments, &fi, 0, 1); - - return 0; -} - -static int open_high_hpage_areas(struct mm_struct *mm, u16 newareas) -{ - struct slb_flush_info fi; - unsigned long i; - - BUILD_BUG_ON((sizeof(newareas)*8) != NUM_HIGH_AREAS); - BUILD_BUG_ON((sizeof(mm->context.high_htlb_areas)*8) - != NUM_HIGH_AREAS); - - newareas &= ~(mm->context.high_htlb_areas); - if (! newareas) - return 0; /* The areas we want are already open */ - - for (i = 0; i < NUM_HIGH_AREAS; i++) - if ((1 << i) & newareas) - if (prepare_high_area_for_htlb(mm, i) != 0) - return -EBUSY; - - mm->context.high_htlb_areas |= newareas; - - /* the context change must make it to memory before the flush, - * so that further SLB misses do the right thing. */ - mb(); - - fi.mm = mm; - fi.newareas = newareas; - on_each_cpu(flush_high_segments, &fi, 0, 1); - - return 0; -} - -int prepare_hugepage_range(unsigned long addr, unsigned long len, pgoff_t pgoff) -{ - int err = 0; - - if (pgoff & (~HPAGE_MASK >> PAGE_SHIFT)) - return -EINVAL; - if (len & ~HPAGE_MASK) - return -EINVAL; - if (addr & ~HPAGE_MASK) - return -EINVAL; - - if (addr < 0x100000000UL) - err = open_low_hpage_areas(current->mm, - LOW_ESID_MASK(addr, len)); - if ((addr + len) > 0x100000000UL) - err = open_high_hpage_areas(current->mm, - HTLB_AREA_MASK(addr, len)); -#ifdef CONFIG_SPE_BASE - spu_flush_all_slbs(current->mm); -#endif - if (err) { - printk(KERN_DEBUG "prepare_hugepage_range(%lx, %lx)" - " failed (lowmask: 0x%04hx, highmask: 0x%04hx)\n", - addr, len, - LOW_ESID_MASK(addr, len), HTLB_AREA_MASK(addr, len)); - return err; - } - - return 0; -} - struct page * follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) { pte_t *ptep; struct page *page; - if (! in_hugepage_area(mm->context, address)) + if (get_slice_psize(mm, address) != mmu_huge_psize) return ERR_PTR(-EINVAL); ptep = huge_pte_offset(mm, address); @@ -551,359 +361,13 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address, return NULL; } -/* Because we have an exclusive hugepage region which lies within the - * normal user address space, we have to take special measures to make - * non-huge mmap()s evade the hugepage reserved regions. */ -unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, - unsigned long len, unsigned long pgoff, - unsigned long flags) -{ - struct mm_struct *mm = current->mm; - struct vm_area_struct *vma; - unsigned long start_addr; - - if (len > TASK_SIZE) - return -ENOMEM; - - /* handle fixed mapping: prevent overlap with huge pages */ - if (flags & MAP_FIXED) { - if (is_hugepage_only_range(mm, addr, len)) - return -EINVAL; - return addr; - } - - if (addr) { - addr = PAGE_ALIGN(addr); - vma = find_vma(mm, addr); - if (((TASK_SIZE - len) >= addr) - && (!vma || (addr+len) <= vma->vm_start) - && !is_hugepage_only_range(mm, addr,len)) - return addr; - } - if (len > mm->cached_hole_size) { - start_addr = addr = mm->free_area_cache; - } else { - start_addr = addr = TASK_UNMAPPED_BASE; - mm->cached_hole_size = 0; - } - -full_search: - vma = find_vma(mm, addr); - while (TASK_SIZE - len >= addr) { - BUG_ON(vma && (addr >= vma->vm_end)); - - if (touches_hugepage_low_range(mm, addr, len)) { - addr = ALIGN(addr+1, 1<<SID_SHIFT); - vma = find_vma(mm, addr); - continue; - } - if (touches_hugepage_high_range(mm, addr, len)) { - addr = ALIGN(addr+1, 1UL<<HTLB_AREA_SHIFT); - vma = find_vma(mm, addr); - continue; - } - if (!vma || addr + len <= vma->vm_start) { - /* - * Remember the place where we stopped the search: - */ - mm->free_area_cache = addr + len; - return addr; - } - if (addr + mm->cached_hole_size < vma->vm_start) - mm->cached_hole_size = vma->vm_start - addr; - addr = vma->vm_end; - vma = vma->vm_next; - } - - /* Make sure we didn't miss any holes */ - if (start_addr != TASK_UNMAPPED_BASE) { - start_addr = addr = TASK_UNMAPPED_BASE; - mm->cached_hole_size = 0; - goto full_search; - } - return -ENOMEM; -} - -/* - * This mmap-allocator allocates new areas top-down from below the - * stack's low limit (the base): - * - * Because we have an exclusive hugepage region which lies within the - * normal user address space, we have to take special measures to make - * non-huge mmap()s evade the hugepage reserved regions. - */ -unsigned long -arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, - const unsigned long len, const unsigned long pgoff, - const unsigned long flags) -{ - struct vm_area_struct *vma, *prev_vma; - struct mm_struct *mm = current->mm; - unsigned long base = mm->mmap_base, addr = addr0; - unsigned long largest_hole = mm->cached_hole_size; - int first_time = 1; - - /* requested length too big for entire address space */ - if (len > TASK_SIZE) - return -ENOMEM; - - /* handle fixed mapping: prevent overlap with huge pages */ - if (flags & MAP_FIXED) { - if (is_hugepage_only_range(mm, addr, len)) - return -EINVAL; - return addr; - } - - /* dont allow allocations above current base */ - if (mm->free_area_cache > base) - mm->free_area_cache = base; - - /* requesting a specific address */ - if (addr) { - addr = PAGE_ALIGN(addr); - vma = find_vma(mm, addr); - if (TASK_SIZE - len >= addr && - (!vma || addr + len <= vma->vm_start) - && !is_hugepage_only_range(mm, addr,len)) - return addr; - } - - if (len <= largest_hole) { - largest_hole = 0; - mm->free_area_cache = base; - } -try_again: - /* make sure it can fit in the remaining address space */ - if (mm->free_area_cache < len) - goto fail; - - /* either no address requested or cant fit in requested address hole */ - addr = (mm->free_area_cache - len) & PAGE_MASK; - do { -hugepage_recheck: - if (touches_hugepage_low_range(mm, addr, len)) { - addr = (addr & ((~0) << SID_SHIFT)) - len; - goto hugepage_recheck; - } else if (touches_hugepage_high_range(mm, addr, len)) { - addr = (addr & ((~0UL) << HTLB_AREA_SHIFT)) - len; - goto hugepage_recheck; - } - - /* - * Lookup failure means no vma is above this address, - * i.e. return with success: - */ - if (!(vma = find_vma_prev(mm, addr, &prev_vma))) - return addr; - - /* - * new region fits between prev_vma->vm_end and - * vma->vm_start, use it: - */ - if (addr+len <= vma->vm_start && - (!prev_vma || (addr >= prev_vma->vm_end))) { - /* remember the address as a hint for next time */ - mm->cached_hole_size = largest_hole; - return (mm->free_area_cache = addr); - } else { - /* pull free_area_cache down to the first hole */ - if (mm->free_area_cache == vma->vm_end) { - mm->free_area_cache = vma->vm_start; - mm->cached_hole_size = largest_hole; - } - } - - /* remember the largest hole we saw so far */ - if (addr + largest_hole < vma->vm_start) - largest_hole = vma->vm_start - addr; - - /* try just below the current vma->vm_start */ - addr = vma->vm_start-len; - } while (len <= vma->vm_start); - -fail: - /* - * if hint left us with no space for the requested - * mapping then try again: - */ - if (first_time) { - mm->free_area_cache = base; - largest_hole = 0; - first_time = 0; - goto try_again; - } - /* - * A failed mmap() very likely causes application failure, - * so fall back to the bottom-up function here. This scenario - * can happen with large stack limits and large mmap() - * allocations. - */ - mm->free_area_cache = TASK_UNMAPPED_BASE; - mm->cached_hole_size = ~0UL; - addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags); - /* - * Restore the topdown base: - */ - mm->free_area_cache = base; - mm->cached_hole_size = ~0UL; - - return addr; -} - -static int htlb_check_hinted_area(unsigned long addr, unsigned long len) -{ - struct vm_area_struct *vma; - - vma = find_vma(current->mm, addr); - if (TASK_SIZE - len >= addr && - (!vma || ((addr + len) <= vma->vm_start))) - return 0; - - return -ENOMEM; -} - -static unsigned long htlb_get_low_area(unsigned long len, u16 segmask) -{ - unsigned long addr = 0; - struct vm_area_struct *vma; - - vma = find_vma(current->mm, addr); - while (addr + len <= 0x100000000UL) { - BUG_ON(vma && (addr >= vma->vm_end)); /* invariant */ - - if (! __within_hugepage_low_range(addr, len, segmask)) { - addr = ALIGN(addr+1, 1<<SID_SHIFT); - vma = find_vma(current->mm, addr); - continue; - } - - if (!vma || (addr + len) <= vma->vm_start) - return addr; - addr = ALIGN(vma->vm_end, HPAGE_SIZE); - /* Depending on segmask this might not be a confirmed - * hugepage region, so the ALIGN could have skipped - * some VMAs */ - vma = find_vma(current->mm, addr); - } - - return -ENOMEM; -} - -static unsigned long htlb_get_high_area(unsigned long len, u16 areamask) -{ - unsigned long addr = 0x100000000UL; - struct vm_area_struct *vma; - - vma = find_vma(current->mm, addr); - while (addr + len <= TASK_SIZE_USER64) { - BUG_ON(vma && (addr >= vma->vm_end)); /* invariant */ - - if (! __within_hugepage_high_range(addr, len, areamask)) { - addr = ALIGN(addr+1, 1UL<<HTLB_AREA_SHIFT); - vma = find_vma(current->mm, addr); - continue; - } - - if (!vma || (addr + len) <= vma->vm_start) - return addr; - addr = ALIGN(vma->vm_end, HPAGE_SIZE); - /* Depending on segmask this might not be a confirmed - * hugepage region, so the ALIGN could have skipped - * some VMAs */ - vma = find_vma(current->mm, addr); - } - - return -ENOMEM; -} unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) { - int lastshift; - u16 areamask, curareas; - - if (HPAGE_SHIFT == 0) - return -EINVAL; - if (len & ~HPAGE_MASK) - return -EINVAL; - if (len > TASK_SIZE) - return -ENOMEM; - - if (!cpu_has_feature(CPU_FTR_16M_PAGE)) - return -EINVAL; - - /* Paranoia, caller should have dealt with this */ - BUG_ON((addr + len) < addr); - - /* Handle MAP_FIXED */ - if (flags & MAP_FIXED) { - if (prepare_hugepage_range(addr, len, pgoff)) - return -EINVAL; - return addr; - } - - if (test_thread_flag(TIF_32BIT)) { - curareas = current->mm->context.low_htlb_areas; - - /* First see if we can use the hint address */ - if (addr && (htlb_check_hinted_area(addr, len) == 0)) { - areamask = LOW_ESID_MASK(addr, len); - if (open_low_hpage_areas(current->mm, areamask) == 0) - return addr; - } - - /* Next see if we can map in the existing low areas */ - addr = htlb_get_low_area(len, curareas); - if (addr != -ENOMEM) - return addr; - - /* Finally go looking for areas to open */ - lastshift = 0; - for (areamask = LOW_ESID_MASK(0x100000000UL-len, len); - ! lastshift; areamask >>=1) { - if (areamask & 1) - lastshift = 1; - - addr = htlb_get_low_area(len, curareas | areamask); - if ((addr != -ENOMEM) - && open_low_hpage_areas(current->mm, areamask) == 0) - return addr; - } - } else { - curareas = current->mm->context.high_htlb_areas; - - /* First see if we can use the hint address */ - /* We discourage 64-bit processes from doing hugepage - * mappings below 4GB (must use MAP_FIXED) */ - if ((addr >= 0x100000000UL) - && (htlb_check_hinted_area(addr, len) == 0)) { - areamask = HTLB_AREA_MASK(addr, len); - if (open_high_hpage_areas(current->mm, areamask) == 0) - return addr; - } - - /* Next see if we can map in the existing high areas */ - addr = htlb_get_high_area(len, curareas); - if (addr != -ENOMEM) - return addr; - - /* Finally go looking for areas to open */ - lastshift = 0; - for (areamask = HTLB_AREA_MASK(TASK_SIZE_USER64-len, len); - ! lastshift; areamask >>=1) { - if (areamask & 1) - lastshift = 1; - - addr = htlb_get_high_area(len, curareas | areamask); - if ((addr != -ENOMEM) - && open_high_hpage_areas(current->mm, areamask) == 0) - return addr; - } - } - printk(KERN_DEBUG "hugetlb_get_unmapped_area() unable to open" - " enough areas\n"); - return -ENOMEM; + return slice_get_unmapped_area(addr, len, flags, + mmu_huge_psize, 1, 0); } /* diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index fe1fe852181a..7312a265545f 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -146,21 +146,16 @@ static void zero_ctor(void *addr, struct kmem_cache *cache, unsigned long flags) memset(addr, 0, kmem_cache_size(cache)); } -#ifdef CONFIG_PPC_64K_PAGES -static const unsigned int pgtable_cache_size[3] = { - PTE_TABLE_SIZE, PMD_TABLE_SIZE, PGD_TABLE_SIZE -}; -static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = { - "pte_pmd_cache", "pmd_cache", "pgd_cache", -}; -#else static const unsigned int pgtable_cache_size[2] = { - PTE_TABLE_SIZE, PMD_TABLE_SIZE + PGD_TABLE_SIZE, PMD_TABLE_SIZE }; static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = { - "pgd_pte_cache", "pud_pmd_cache", -}; +#ifdef CONFIG_PPC_64K_PAGES + "pgd_cache", "pmd_cache", +#else + "pgd_cache", "pud_pmd_cache", #endif /* CONFIG_PPC_64K_PAGES */ +}; #ifdef CONFIG_HUGETLB_PAGE /* Hugepages need one extra cache, initialized in hugetlbpage.c. We diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 1a6e08f3298f..246eeea40ece 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -31,6 +31,7 @@ #include <linux/highmem.h> #include <linux/initrd.h> #include <linux/pagemap.h> +#include <linux/suspend.h> #include <asm/pgalloc.h> #include <asm/prom.h> @@ -276,6 +277,28 @@ void __init do_init_bootmem(void) init_bootmem_done = 1; } +/* mark pages that don't exist as nosave */ +static int __init mark_nonram_nosave(void) +{ + unsigned long lmb_next_region_start_pfn, + lmb_region_max_pfn; + int i; + + for (i = 0; i < lmb.memory.cnt - 1; i++) { + lmb_region_max_pfn = + (lmb.memory.region[i].base >> PAGE_SHIFT) + + (lmb.memory.region[i].size >> PAGE_SHIFT); + lmb_next_region_start_pfn = + lmb.memory.region[i+1].base >> PAGE_SHIFT; + + if (lmb_region_max_pfn < lmb_next_region_start_pfn) + register_nosave_region(lmb_region_max_pfn, + lmb_next_region_start_pfn); + } + + return 0; +} + /* * paging_init() sets up the page tables - in fact we've already done this. */ @@ -307,6 +330,8 @@ void __init paging_init(void) max_zone_pfns[ZONE_DMA] = top_of_ram >> PAGE_SHIFT; #endif free_area_init_nodes(max_zone_pfns); + + mark_nonram_nosave(); } #endif /* ! CONFIG_NEED_MULTIPLE_NODES */ diff --git a/arch/powerpc/mm/mmu_context_64.c b/arch/powerpc/mm/mmu_context_64.c index 90a06ac02d5e..7a78cdc0515a 100644 --- a/arch/powerpc/mm/mmu_context_64.c +++ b/arch/powerpc/mm/mmu_context_64.c @@ -28,6 +28,7 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) { int index; int err; + int new_context = (mm->context.id == 0); again: if (!idr_pre_get(&mmu_context_idr, GFP_KERNEL)) @@ -50,9 +51,18 @@ again: } mm->context.id = index; +#ifdef CONFIG_PPC_MM_SLICES + /* The old code would re-promote on fork, we don't do that + * when using slices as it could cause problem promoting slices + * that have been forced down to 4K + */ + if (new_context) + slice_set_user_psize(mm, mmu_virtual_psize); +#else mm->context.user_psize = mmu_virtual_psize; mm->context.sllp = SLB_VSID_USER | mmu_psize_defs[mmu_virtual_psize].sllp; +#endif return 0; } diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c index 05066674a7a0..ec1421a20aaa 100644 --- a/arch/powerpc/mm/ppc_mmu_32.c +++ b/arch/powerpc/mm/ppc_mmu_32.c @@ -185,7 +185,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea, if (Hash == 0) return; - pmd = pmd_offset(pgd_offset(mm, ea), ea); + pmd = pmd_offset(pud_offset(pgd_offset(mm, ea), ea), ea); if (!pmd_none(*pmd)) add_hash_page(mm->context.id, ea, pmd_val(*pmd)); } diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c index 224e960650a0..304375a73574 100644 --- a/arch/powerpc/mm/slb.c +++ b/arch/powerpc/mm/slb.c @@ -198,12 +198,6 @@ void slb_initialize(void) static int slb_encoding_inited; extern unsigned int *slb_miss_kernel_load_linear; extern unsigned int *slb_miss_kernel_load_io; -#ifdef CONFIG_HUGETLB_PAGE - extern unsigned int *slb_miss_user_load_huge; - unsigned long huge_llp; - - huge_llp = mmu_psize_defs[mmu_huge_psize].sllp; -#endif /* Prepare our SLB miss handler based on our page size */ linear_llp = mmu_psize_defs[mmu_linear_psize].sllp; @@ -220,11 +214,6 @@ void slb_initialize(void) DBG("SLB: linear LLP = %04x\n", linear_llp); DBG("SLB: io LLP = %04x\n", io_llp); -#ifdef CONFIG_HUGETLB_PAGE - patch_slb_encoding(slb_miss_user_load_huge, - SLB_VSID_USER | huge_llp); - DBG("SLB: huge LLP = %04x\n", huge_llp); -#endif } get_paca()->stab_rr = SLB_NUM_BOLTED; diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S index b10e4707d7c1..cd1a93d4948c 100644 --- a/arch/powerpc/mm/slb_low.S +++ b/arch/powerpc/mm/slb_low.S @@ -82,31 +82,45 @@ _GLOBAL(slb_miss_kernel_load_io) srdi. r9,r10,USER_ESID_BITS bne- 8f /* invalid ea bits set */ - /* Figure out if the segment contains huge pages */ -#ifdef CONFIG_HUGETLB_PAGE -BEGIN_FTR_SECTION - b 1f -END_FTR_SECTION_IFCLR(CPU_FTR_16M_PAGE) + + /* when using slices, we extract the psize off the slice bitmaps + * and then we need to get the sllp encoding off the mmu_psize_defs + * array. + * + * XXX This is a bit inefficient especially for the normal case, + * so we should try to implement a fast path for the standard page + * size using the old sllp value so we avoid the array. We cannot + * really do dynamic patching unfortunately as processes might flip + * between 4k and 64k standard page size + */ +#ifdef CONFIG_PPC_MM_SLICES cmpldi r10,16 - lhz r9,PACALOWHTLBAREAS(r13) - mr r11,r10 + /* Get the slice index * 4 in r11 and matching slice size mask in r9 */ + ld r9,PACALOWSLICESPSIZE(r13) + sldi r11,r10,2 blt 5f + ld r9,PACAHIGHSLICEPSIZE(r13) + srdi r11,r10,(SLICE_HIGH_SHIFT - SLICE_LOW_SHIFT - 2) + andi. r11,r11,0x3c - lhz r9,PACAHIGHHTLBAREAS(r13) - srdi r11,r10,(HTLB_AREA_SHIFT-SID_SHIFT) - -5: srd r9,r9,r11 - andi. r9,r9,1 - beq 1f -_GLOBAL(slb_miss_user_load_huge) - li r11,0 - b 2f -1: -#endif /* CONFIG_HUGETLB_PAGE */ +5: /* Extract the psize and multiply to get an array offset */ + srd r9,r9,r11 + andi. r9,r9,0xf + mulli r9,r9,MMUPSIZEDEFSIZE + /* Now get to the array and obtain the sllp + */ + ld r11,PACATOC(r13) + ld r11,mmu_psize_defs@got(r11) + add r11,r11,r9 + ld r11,MMUPSIZESLLP(r11) + ori r11,r11,SLB_VSID_USER +#else + /* paca context sllp already contains the SLB_VSID_USER bits */ lhz r11,PACACONTEXTSLLP(r13) -2: +#endif /* CONFIG_PPC_MM_SLICES */ + ld r9,PACACONTEXTID(r13) rldimi r10,r9,USER_ESID_BITS,0 b slb_finish_load diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c new file mode 100644 index 000000000000..f833dba2a028 --- /dev/null +++ b/arch/powerpc/mm/slice.c @@ -0,0 +1,633 @@ +/* + * address space "slices" (meta-segments) support + * + * Copyright (C) 2007 Benjamin Herrenschmidt, IBM Corporation. + * + * Based on hugetlb implementation + * + * Copyright (C) 2003 David Gibson, IBM Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#undef DEBUG + +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/pagemap.h> +#include <linux/err.h> +#include <linux/spinlock.h> +#include <linux/module.h> +#include <asm/mman.h> +#include <asm/mmu.h> +#include <asm/spu.h> + +static spinlock_t slice_convert_lock = SPIN_LOCK_UNLOCKED; + + +#ifdef DEBUG +int _slice_debug = 1; + +static void slice_print_mask(const char *label, struct slice_mask mask) +{ + char *p, buf[16 + 3 + 16 + 1]; + int i; + + if (!_slice_debug) + return; + p = buf; + for (i = 0; i < SLICE_NUM_LOW; i++) + *(p++) = (mask.low_slices & (1 << i)) ? '1' : '0'; + *(p++) = ' '; + *(p++) = '-'; + *(p++) = ' '; + for (i = 0; i < SLICE_NUM_HIGH; i++) + *(p++) = (mask.high_slices & (1 << i)) ? '1' : '0'; + *(p++) = 0; + + printk(KERN_DEBUG "%s:%s\n", label, buf); +} + +#define slice_dbg(fmt...) do { if (_slice_debug) pr_debug(fmt); } while(0) + +#else + +static void slice_print_mask(const char *label, struct slice_mask mask) {} +#define slice_dbg(fmt...) + +#endif + +static struct slice_mask slice_range_to_mask(unsigned long start, + unsigned long len) +{ + unsigned long end = start + len - 1; + struct slice_mask ret = { 0, 0 }; + + if (start < SLICE_LOW_TOP) { + unsigned long mend = min(end, SLICE_LOW_TOP); + unsigned long mstart = min(start, SLICE_LOW_TOP); + + ret.low_slices = (1u << (GET_LOW_SLICE_INDEX(mend) + 1)) + - (1u << GET_LOW_SLICE_INDEX(mstart)); + } + + if ((start + len) > SLICE_LOW_TOP) + ret.high_slices = (1u << (GET_HIGH_SLICE_INDEX(end) + 1)) + - (1u << GET_HIGH_SLICE_INDEX(start)); + + return ret; +} + +static int slice_area_is_free(struct mm_struct *mm, unsigned long addr, + unsigned long len) +{ + struct vm_area_struct *vma; + + if ((mm->task_size - len) < addr) + return 0; + vma = find_vma(mm, addr); + return (!vma || (addr + len) <= vma->vm_start); +} + +static int slice_low_has_vma(struct mm_struct *mm, unsigned long slice) +{ + return !slice_area_is_free(mm, slice << SLICE_LOW_SHIFT, + 1ul << SLICE_LOW_SHIFT); +} + +static int slice_high_has_vma(struct mm_struct *mm, unsigned long slice) +{ + unsigned long start = slice << SLICE_HIGH_SHIFT; + unsigned long end = start + (1ul << SLICE_HIGH_SHIFT); + + /* Hack, so that each addresses is controlled by exactly one + * of the high or low area bitmaps, the first high area starts + * at 4GB, not 0 */ + if (start == 0) + start = SLICE_LOW_TOP; + + return !slice_area_is_free(mm, start, end - start); +} + +static struct slice_mask slice_mask_for_free(struct mm_struct *mm) +{ + struct slice_mask ret = { 0, 0 }; + unsigned long i; + + for (i = 0; i < SLICE_NUM_LOW; i++) + if (!slice_low_has_vma(mm, i)) + ret.low_slices |= 1u << i; + + if (mm->task_size <= SLICE_LOW_TOP) + return ret; + + for (i = 0; i < SLICE_NUM_HIGH; i++) + if (!slice_high_has_vma(mm, i)) + ret.high_slices |= 1u << i; + + return ret; +} + +static struct slice_mask slice_mask_for_size(struct mm_struct *mm, int psize) +{ + struct slice_mask ret = { 0, 0 }; + unsigned long i; + u64 psizes; + + psizes = mm->context.low_slices_psize; + for (i = 0; i < SLICE_NUM_LOW; i++) + if (((psizes >> (i * 4)) & 0xf) == psize) + ret.low_slices |= 1u << i; + + psizes = mm->context.high_slices_psize; + for (i = 0; i < SLICE_NUM_HIGH; i++) + if (((psizes >> (i * 4)) & 0xf) == psize) + ret.high_slices |= 1u << i; + + return ret; +} + +static int slice_check_fit(struct slice_mask mask, struct slice_mask available) +{ + return (mask.low_slices & available.low_slices) == mask.low_slices && + (mask.high_slices & available.high_slices) == mask.high_slices; +} + +static void slice_flush_segments(void *parm) +{ + struct mm_struct *mm = parm; + unsigned long flags; + + if (mm != current->active_mm) + return; + + /* update the paca copy of the context struct */ + get_paca()->context = current->active_mm->context; + + local_irq_save(flags); + slb_flush_and_rebolt(); + local_irq_restore(flags); +} + +static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psize) +{ + /* Write the new slice psize bits */ + u64 lpsizes, hpsizes; + unsigned long i, flags; + + slice_dbg("slice_convert(mm=%p, psize=%d)\n", mm, psize); + slice_print_mask(" mask", mask); + + /* We need to use a spinlock here to protect against + * concurrent 64k -> 4k demotion ... + */ + spin_lock_irqsave(&slice_convert_lock, flags); + + lpsizes = mm->context.low_slices_psize; + for (i = 0; i < SLICE_NUM_LOW; i++) + if (mask.low_slices & (1u << i)) + lpsizes = (lpsizes & ~(0xful << (i * 4))) | + (((unsigned long)psize) << (i * 4)); + + hpsizes = mm->context.high_slices_psize; + for (i = 0; i < SLICE_NUM_HIGH; i++) + if (mask.high_slices & (1u << i)) + hpsizes = (hpsizes & ~(0xful << (i * 4))) | + (((unsigned long)psize) << (i * 4)); + + mm->context.low_slices_psize = lpsizes; + mm->context.high_slices_psize = hpsizes; + + slice_dbg(" lsps=%lx, hsps=%lx\n", + mm->context.low_slices_psize, + mm->context.high_slices_psize); + + spin_unlock_irqrestore(&slice_convert_lock, flags); + mb(); + + /* XXX this is sub-optimal but will do for now */ + on_each_cpu(slice_flush_segments, mm, 0, 1); +#ifdef CONFIG_SPU_BASE + spu_flush_all_slbs(mm); +#endif +} + +static unsigned long slice_find_area_bottomup(struct mm_struct *mm, + unsigned long len, + struct slice_mask available, + int psize, int use_cache) +{ + struct vm_area_struct *vma; + unsigned long start_addr, addr; + struct slice_mask mask; + int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT); + + if (use_cache) { + if (len <= mm->cached_hole_size) { + start_addr = addr = TASK_UNMAPPED_BASE; + mm->cached_hole_size = 0; + } else + start_addr = addr = mm->free_area_cache; + } else + start_addr = addr = TASK_UNMAPPED_BASE; + +full_search: + for (;;) { + addr = _ALIGN_UP(addr, 1ul << pshift); + if ((TASK_SIZE - len) < addr) + break; + vma = find_vma(mm, addr); + BUG_ON(vma && (addr >= vma->vm_end)); + + mask = slice_range_to_mask(addr, len); + if (!slice_check_fit(mask, available)) { + if (addr < SLICE_LOW_TOP) + addr = _ALIGN_UP(addr + 1, 1ul << SLICE_LOW_SHIFT); + else + addr = _ALIGN_UP(addr + 1, 1ul << SLICE_HIGH_SHIFT); + continue; + } + if (!vma || addr + len <= vma->vm_start) { + /* + * Remember the place where we stopped the search: + */ + if (use_cache) + mm->free_area_cache = addr + len; + return addr; + } + if (use_cache && (addr + mm->cached_hole_size) < vma->vm_start) + mm->cached_hole_size = vma->vm_start - addr; + addr = vma->vm_end; + } + + /* Make sure we didn't miss any holes */ + if (use_cache && start_addr != TASK_UNMAPPED_BASE) { + start_addr = addr = TASK_UNMAPPED_BASE; + mm->cached_hole_size = 0; + goto full_search; + } + return -ENOMEM; +} + +static unsigned long slice_find_area_topdown(struct mm_struct *mm, + unsigned long len, + struct slice_mask available, + int psize, int use_cache) +{ + struct vm_area_struct *vma; + unsigned long addr; + struct slice_mask mask; + int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT); + + /* check if free_area_cache is useful for us */ + if (use_cache) { + if (len <= mm->cached_hole_size) { + mm->cached_hole_size = 0; + mm->free_area_cache = mm->mmap_base; + } + + /* either no address requested or can't fit in requested + * address hole + */ + addr = mm->free_area_cache; + + /* make sure it can fit in the remaining address space */ + if (addr > len) { + addr = _ALIGN_DOWN(addr - len, 1ul << pshift); + mask = slice_range_to_mask(addr, len); + if (slice_check_fit(mask, available) && + slice_area_is_free(mm, addr, len)) + /* remember the address as a hint for + * next time + */ + return (mm->free_area_cache = addr); + } + } + + addr = mm->mmap_base; + while (addr > len) { + /* Go down by chunk size */ + addr = _ALIGN_DOWN(addr - len, 1ul << pshift); + + /* Check for hit with different page size */ + mask = slice_range_to_mask(addr, len); + if (!slice_check_fit(mask, available)) { + if (addr < SLICE_LOW_TOP) + addr = _ALIGN_DOWN(addr, 1ul << SLICE_LOW_SHIFT); + else if (addr < (1ul << SLICE_HIGH_SHIFT)) + addr = SLICE_LOW_TOP; + else + addr = _ALIGN_DOWN(addr, 1ul << SLICE_HIGH_SHIFT); + continue; + } + + /* + * Lookup failure means no vma is above this address, + * else if new region fits below vma->vm_start, + * return with success: + */ + vma = find_vma(mm, addr); + if (!vma || (addr + len) <= vma->vm_start) { + /* remember the address as a hint for next time */ + if (use_cache) + mm->free_area_cache = addr; + return addr; + } + + /* remember the largest hole we saw so far */ + if (use_cache && (addr + mm->cached_hole_size) < vma->vm_start) + mm->cached_hole_size = vma->vm_start - addr; + + /* try just below the current vma->vm_start */ + addr = vma->vm_start; + } + + /* + * A failed mmap() very likely causes application failure, + * so fall back to the bottom-up function here. This scenario + * can happen with large stack limits and large mmap() + * allocations. + */ + addr = slice_find_area_bottomup(mm, len, available, psize, 0); + + /* + * Restore the topdown base: + */ + if (use_cache) { + mm->free_area_cache = mm->mmap_base; + mm->cached_hole_size = ~0UL; + } + + return addr; +} + + +static unsigned long slice_find_area(struct mm_struct *mm, unsigned long len, + struct slice_mask mask, int psize, + int topdown, int use_cache) +{ + if (topdown) + return slice_find_area_topdown(mm, len, mask, psize, use_cache); + else + return slice_find_area_bottomup(mm, len, mask, psize, use_cache); +} + +unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, + unsigned long flags, unsigned int psize, + int topdown, int use_cache) +{ + struct slice_mask mask; + struct slice_mask good_mask; + struct slice_mask potential_mask = {0,0} /* silence stupid warning */; + int pmask_set = 0; + int fixed = (flags & MAP_FIXED); + int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT); + struct mm_struct *mm = current->mm; + + /* Sanity checks */ + BUG_ON(mm->task_size == 0); + + slice_dbg("slice_get_unmapped_area(mm=%p, psize=%d...\n", mm, psize); + slice_dbg(" addr=%lx, len=%lx, flags=%lx, topdown=%d, use_cache=%d\n", + addr, len, flags, topdown, use_cache); + + if (len > mm->task_size) + return -ENOMEM; + if (fixed && (addr & ((1ul << pshift) - 1))) + return -EINVAL; + if (fixed && addr > (mm->task_size - len)) + return -EINVAL; + + /* If hint, make sure it matches our alignment restrictions */ + if (!fixed && addr) { + addr = _ALIGN_UP(addr, 1ul << pshift); + slice_dbg(" aligned addr=%lx\n", addr); + } + + /* First makeup a "good" mask of slices that have the right size + * already + */ + good_mask = slice_mask_for_size(mm, psize); + slice_print_mask(" good_mask", good_mask); + + /* First check hint if it's valid or if we have MAP_FIXED */ + if ((addr != 0 || fixed) && (mm->task_size - len) >= addr) { + + /* Don't bother with hint if it overlaps a VMA */ + if (!fixed && !slice_area_is_free(mm, addr, len)) + goto search; + + /* Build a mask for the requested range */ + mask = slice_range_to_mask(addr, len); + slice_print_mask(" mask", mask); + + /* Check if we fit in the good mask. If we do, we just return, + * nothing else to do + */ + if (slice_check_fit(mask, good_mask)) { + slice_dbg(" fits good !\n"); + return addr; + } + + /* We don't fit in the good mask, check what other slices are + * empty and thus can be converted + */ + potential_mask = slice_mask_for_free(mm); + potential_mask.low_slices |= good_mask.low_slices; + potential_mask.high_slices |= good_mask.high_slices; + pmask_set = 1; + slice_print_mask(" potential", potential_mask); + if (slice_check_fit(mask, potential_mask)) { + slice_dbg(" fits potential !\n"); + goto convert; + } + } + + /* If we have MAP_FIXED and failed the above step, then error out */ + if (fixed) + return -EBUSY; + + search: + slice_dbg(" search...\n"); + + /* Now let's see if we can find something in the existing slices + * for that size + */ + addr = slice_find_area(mm, len, good_mask, psize, topdown, use_cache); + if (addr != -ENOMEM) { + /* Found within the good mask, we don't have to setup, + * we thus return directly + */ + slice_dbg(" found area at 0x%lx\n", addr); + return addr; + } + + /* Won't fit, check what can be converted */ + if (!pmask_set) { + potential_mask = slice_mask_for_free(mm); + potential_mask.low_slices |= good_mask.low_slices; + potential_mask.high_slices |= good_mask.high_slices; + pmask_set = 1; + slice_print_mask(" potential", potential_mask); + } + + /* Now let's see if we can find something in the existing slices + * for that size + */ + addr = slice_find_area(mm, len, potential_mask, psize, topdown, + use_cache); + if (addr == -ENOMEM) + return -ENOMEM; + + mask = slice_range_to_mask(addr, len); + slice_dbg(" found potential area at 0x%lx\n", addr); + slice_print_mask(" mask", mask); + + convert: + slice_convert(mm, mask, psize); + return addr; + +} +EXPORT_SYMBOL_GPL(slice_get_unmapped_area); + +unsigned long arch_get_unmapped_area(struct file *filp, + unsigned long addr, + unsigned long len, + unsigned long pgoff, + unsigned long flags) +{ + return slice_get_unmapped_area(addr, len, flags, + current->mm->context.user_psize, + 0, 1); +} + +unsigned long arch_get_unmapped_area_topdown(struct file *filp, + const unsigned long addr0, + const unsigned long len, + const unsigned long pgoff, + const unsigned long flags) +{ + return slice_get_unmapped_area(addr0, len, flags, + current->mm->context.user_psize, + 1, 1); +} + +unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr) +{ + u64 psizes; + int index; + + if (addr < SLICE_LOW_TOP) { + psizes = mm->context.low_slices_psize; + index = GET_LOW_SLICE_INDEX(addr); + } else { + psizes = mm->context.high_slices_psize; + index = GET_HIGH_SLICE_INDEX(addr); + } + + return (psizes >> (index * 4)) & 0xf; +} +EXPORT_SYMBOL_GPL(get_slice_psize); + +/* + * This is called by hash_page when it needs to do a lazy conversion of + * an address space from real 64K pages to combo 4K pages (typically + * when hitting a non cacheable mapping on a processor or hypervisor + * that won't allow them for 64K pages). + * + * This is also called in init_new_context() to change back the user + * psize from whatever the parent context had it set to + * + * This function will only change the content of the {low,high)_slice_psize + * masks, it will not flush SLBs as this shall be handled lazily by the + * caller. + */ +void slice_set_user_psize(struct mm_struct *mm, unsigned int psize) +{ + unsigned long flags, lpsizes, hpsizes; + unsigned int old_psize; + int i; + + slice_dbg("slice_set_user_psize(mm=%p, psize=%d)\n", mm, psize); + + spin_lock_irqsave(&slice_convert_lock, flags); + + old_psize = mm->context.user_psize; + slice_dbg(" old_psize=%d\n", old_psize); + if (old_psize == psize) + goto bail; + + mm->context.user_psize = psize; + wmb(); + + lpsizes = mm->context.low_slices_psize; + for (i = 0; i < SLICE_NUM_LOW; i++) + if (((lpsizes >> (i * 4)) & 0xf) == old_psize) + lpsizes = (lpsizes & ~(0xful << (i * 4))) | + (((unsigned long)psize) << (i * 4)); + + hpsizes = mm->context.high_slices_psize; + for (i = 0; i < SLICE_NUM_HIGH; i++) + if (((hpsizes >> (i * 4)) & 0xf) == old_psize) + hpsizes = (hpsizes & ~(0xful << (i * 4))) | + (((unsigned long)psize) << (i * 4)); + + mm->context.low_slices_psize = lpsizes; + mm->context.high_slices_psize = hpsizes; + + slice_dbg(" lsps=%lx, hsps=%lx\n", + mm->context.low_slices_psize, + mm->context.high_slices_psize); + + bail: + spin_unlock_irqrestore(&slice_convert_lock, flags); +} + +/* + * is_hugepage_only_range() is used by generic code to verify wether + * a normal mmap mapping (non hugetlbfs) is valid on a given area. + * + * until the generic code provides a more generic hook and/or starts + * calling arch get_unmapped_area for MAP_FIXED (which our implementation + * here knows how to deal with), we hijack it to keep standard mappings + * away from us. + * + * because of that generic code limitation, MAP_FIXED mapping cannot + * "convert" back a slice with no VMAs to the standard page size, only + * get_unmapped_area() can. It would be possible to fix it here but I + * prefer working on fixing the generic code instead. + * + * WARNING: This will not work if hugetlbfs isn't enabled since the + * generic code will redefine that function as 0 in that. This is ok + * for now as we only use slices with hugetlbfs enabled. This should + * be fixed as the generic code gets fixed. + */ +int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr, + unsigned long len) +{ + struct slice_mask mask, available; + + mask = slice_range_to_mask(addr, len); + available = slice_mask_for_size(mm, mm->context.user_psize); + +#if 0 /* too verbose */ + slice_dbg("is_hugepage_only_range(mm=%p, addr=%lx, len=%lx)\n", + mm, addr, len); + slice_print_mask(" mask", mask); + slice_print_mask(" available", available); +#endif + return !slice_check_fit(mask, available); +} + diff --git a/arch/powerpc/mm/tlb_32.c b/arch/powerpc/mm/tlb_32.c index 925ff70be8ba..6a69417cbc0e 100644 --- a/arch/powerpc/mm/tlb_32.c +++ b/arch/powerpc/mm/tlb_32.c @@ -111,7 +111,7 @@ static void flush_range(struct mm_struct *mm, unsigned long start, if (start >= end) return; end = (end - 1) | ~PAGE_MASK; - pmd = pmd_offset(pgd_offset(mm, start), start); + pmd = pmd_offset(pud_offset(pgd_offset(mm, start), start), start); for (;;) { pmd_end = ((start + PGDIR_SIZE) & PGDIR_MASK) - 1; if (pmd_end > end) @@ -169,7 +169,7 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) return; } mm = (vmaddr < TASK_SIZE)? vma->vm_mm: &init_mm; - pmd = pmd_offset(pgd_offset(mm, vmaddr), vmaddr); + pmd = pmd_offset(pud_offset(pgd_offset(mm, vmaddr), vmaddr), vmaddr); if (!pmd_none(*pmd)) flush_hash_pages(mm->context.id, vmaddr, pmd_val(*pmd), 1); FINISH_FLUSH; diff --git a/arch/powerpc/mm/tlb_64.c b/arch/powerpc/mm/tlb_64.c index fd8d08c325eb..2bfc4d7e1aa2 100644 --- a/arch/powerpc/mm/tlb_64.c +++ b/arch/powerpc/mm/tlb_64.c @@ -143,16 +143,22 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr, */ addr &= PAGE_MASK; - /* Get page size (maybe move back to caller) */ + /* Get page size (maybe move back to caller). + * + * NOTE: when using special 64K mappings in 4K environment like + * for SPEs, we obtain the page size from the slice, which thus + * must still exist (and thus the VMA not reused) at the time + * of this call + */ if (huge) { #ifdef CONFIG_HUGETLB_PAGE psize = mmu_huge_psize; #else BUG(); - psize = pte_pagesize_index(pte); /* shutup gcc */ + psize = pte_pagesize_index(mm, addr, pte); /* shutup gcc */ #endif } else - psize = pte_pagesize_index(pte); + psize = pte_pagesize_index(mm, addr, pte); /* Build full vaddr */ if (!is_kernel_addr(addr)) { |