diff options
author | Michal Marek <mmarek@suse.cz> | 2010-08-04 15:59:13 +0400 |
---|---|---|
committer | Michal Marek <mmarek@suse.cz> | 2010-08-04 15:59:13 +0400 |
commit | 772320e84588dcbe1600ffb83e5f328f2209ac2a (patch) | |
tree | a7de21b79340aeaa17c58126f6b801b82c77b53a /arch/powerpc/mm | |
parent | 1ce53adf13a54375d2a5c7cdbe341b2558389615 (diff) | |
parent | 9fe6206f400646a2322096b56c59891d530e8d51 (diff) | |
download | linux-772320e84588dcbe1600ffb83e5f328f2209ac2a.tar.xz |
Merge commit 'v2.6.35' into kbuild/kbuild
Conflicts:
arch/powerpc/Makefile
Diffstat (limited to 'arch/powerpc/mm')
28 files changed, 671 insertions, 274 deletions
diff --git a/arch/powerpc/mm/40x_mmu.c b/arch/powerpc/mm/40x_mmu.c index 08dfa8e6d86f..1dc2fa5ce1bd 100644 --- a/arch/powerpc/mm/40x_mmu.c +++ b/arch/powerpc/mm/40x_mmu.c @@ -84,8 +84,8 @@ void __init MMU_init_hw(void) * vectors and the kernel live in real-mode. */ - mtspr(SPRN_DCCR, 0xF0000000); /* 512 MB of data space at 0x0. */ - mtspr(SPRN_ICCR, 0xF0000000); /* 512 MB of instr. space at 0x0. */ + mtspr(SPRN_DCCR, 0xFFFF0000); /* 2GByte of data space at 0x0. */ + mtspr(SPRN_ICCR, 0xFFFF0000); /* 2GByte of instr. space at 0x0. */ } #define LARGE_PAGE_SIZE_16M (1<<24) @@ -135,7 +135,7 @@ unsigned long __init mmu_mapin_ram(unsigned long top) /* If the size of RAM is not an exact power of two, we may not * have covered RAM in its entirety with 16 and 4 MiB * pages. Consequently, restrict the top end of RAM currently - * allocable so that calls to the LMB to allocate PTEs for "tail" + * allocable so that calls to the MEMBLOCK to allocate PTEs for "tail" * coverage with normal-sized pages (or other reasons) do not * attempt to allocate outside the allowed range. */ diff --git a/arch/powerpc/mm/44x_mmu.c b/arch/powerpc/mm/44x_mmu.c index 3986264b0993..d8c6efb32bc6 100644 --- a/arch/powerpc/mm/44x_mmu.c +++ b/arch/powerpc/mm/44x_mmu.c @@ -38,7 +38,9 @@ unsigned int tlb_44x_index; /* = 0 */ unsigned int tlb_44x_hwater = PPC44x_TLB_SIZE - 1 - PPC44x_EARLY_TLBS; int icache_44x_need_flush; -static void __init ppc44x_update_tlb_hwater(void) +unsigned long tlb_47x_boltmap[1024/8]; + +static void __cpuinit ppc44x_update_tlb_hwater(void) { extern unsigned int tlb_44x_patch_hwater_D[]; extern unsigned int tlb_44x_patch_hwater_I[]; @@ -59,7 +61,7 @@ static void __init ppc44x_update_tlb_hwater(void) } /* - * "Pins" a 256MB TLB entry in AS0 for kernel lowmem + * "Pins" a 256MB TLB entry in AS0 for kernel lowmem for 44x type MMU */ static void __init ppc44x_pin_tlb(unsigned int virt, unsigned int phys) { @@ -67,12 +69,18 @@ static void __init ppc44x_pin_tlb(unsigned int virt, unsigned int phys) ppc44x_update_tlb_hwater(); + mtspr(SPRN_MMUCR, 0); + __asm__ __volatile__( "tlbwe %2,%3,%4\n" "tlbwe %1,%3,%5\n" "tlbwe %0,%3,%6\n" : +#ifdef CONFIG_PPC47x + : "r" (PPC47x_TLB2_S_RWX), +#else : "r" (PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_G), +#endif "r" (phys), "r" (virt | PPC44x_TLB_VALID | PPC44x_TLB_256M), "r" (entry), @@ -81,8 +89,93 @@ static void __init ppc44x_pin_tlb(unsigned int virt, unsigned int phys) "i" (PPC44x_TLB_ATTRIB)); } +static int __init ppc47x_find_free_bolted(void) +{ + unsigned int mmube0 = mfspr(SPRN_MMUBE0); + unsigned int mmube1 = mfspr(SPRN_MMUBE1); + + if (!(mmube0 & MMUBE0_VBE0)) + return 0; + if (!(mmube0 & MMUBE0_VBE1)) + return 1; + if (!(mmube0 & MMUBE0_VBE2)) + return 2; + if (!(mmube1 & MMUBE1_VBE3)) + return 3; + if (!(mmube1 & MMUBE1_VBE4)) + return 4; + if (!(mmube1 & MMUBE1_VBE5)) + return 5; + return -1; +} + +static void __init ppc47x_update_boltmap(void) +{ + unsigned int mmube0 = mfspr(SPRN_MMUBE0); + unsigned int mmube1 = mfspr(SPRN_MMUBE1); + + if (mmube0 & MMUBE0_VBE0) + __set_bit((mmube0 >> MMUBE0_IBE0_SHIFT) & 0xff, + tlb_47x_boltmap); + if (mmube0 & MMUBE0_VBE1) + __set_bit((mmube0 >> MMUBE0_IBE1_SHIFT) & 0xff, + tlb_47x_boltmap); + if (mmube0 & MMUBE0_VBE2) + __set_bit((mmube0 >> MMUBE0_IBE2_SHIFT) & 0xff, + tlb_47x_boltmap); + if (mmube1 & MMUBE1_VBE3) + __set_bit((mmube1 >> MMUBE1_IBE3_SHIFT) & 0xff, + tlb_47x_boltmap); + if (mmube1 & MMUBE1_VBE4) + __set_bit((mmube1 >> MMUBE1_IBE4_SHIFT) & 0xff, + tlb_47x_boltmap); + if (mmube1 & MMUBE1_VBE5) + __set_bit((mmube1 >> MMUBE1_IBE5_SHIFT) & 0xff, + tlb_47x_boltmap); +} + +/* + * "Pins" a 256MB TLB entry in AS0 for kernel lowmem for 47x type MMU + */ +static void __cpuinit ppc47x_pin_tlb(unsigned int virt, unsigned int phys) +{ + unsigned int rA; + int bolted; + + /* Base rA is HW way select, way 0, bolted bit set */ + rA = 0x88000000; + + /* Look for a bolted entry slot */ + bolted = ppc47x_find_free_bolted(); + BUG_ON(bolted < 0); + + /* Insert bolted slot number */ + rA |= bolted << 24; + + pr_debug("256M TLB entry for 0x%08x->0x%08x in bolt slot %d\n", + virt, phys, bolted); + + mtspr(SPRN_MMUCR, 0); + + __asm__ __volatile__( + "tlbwe %2,%3,0\n" + "tlbwe %1,%3,1\n" + "tlbwe %0,%3,2\n" + : + : "r" (PPC47x_TLB2_SW | PPC47x_TLB2_SR | + PPC47x_TLB2_SX +#ifdef CONFIG_SMP + | PPC47x_TLB2_M +#endif + ), + "r" (phys), + "r" (virt | PPC47x_TLB0_VALID | PPC47x_TLB0_256M), + "r" (rA)); +} + void __init MMU_init_hw(void) { + /* This is not useful on 47x but won't hurt either */ ppc44x_update_tlb_hwater(); flush_instruction_cache(); @@ -95,8 +188,51 @@ unsigned long __init mmu_mapin_ram(unsigned long top) /* Pin in enough TLBs to cover any lowmem not covered by the * initial 256M mapping established in head_44x.S */ for (addr = PPC_PIN_SIZE; addr < lowmem_end_addr; - addr += PPC_PIN_SIZE) - ppc44x_pin_tlb(addr + PAGE_OFFSET, addr); + addr += PPC_PIN_SIZE) { + if (mmu_has_feature(MMU_FTR_TYPE_47x)) + ppc47x_pin_tlb(addr + PAGE_OFFSET, addr); + else + ppc44x_pin_tlb(addr + PAGE_OFFSET, addr); + } + if (mmu_has_feature(MMU_FTR_TYPE_47x)) { + ppc47x_update_boltmap(); +#ifdef DEBUG + { + int i; + + printk(KERN_DEBUG "bolted entries: "); + for (i = 0; i < 255; i++) { + if (test_bit(i, tlb_47x_boltmap)) + printk("%d ", i); + } + printk("\n"); + } +#endif /* DEBUG */ + } return total_lowmem; } + +#ifdef CONFIG_SMP +void __cpuinit mmu_init_secondary(int cpu) +{ + unsigned long addr; + + /* Pin in enough TLBs to cover any lowmem not covered by the + * initial 256M mapping established in head_44x.S + * + * WARNING: This is called with only the first 256M of the + * linear mapping in the TLB and we can't take faults yet + * so beware of what this code uses. It runs off a temporary + * stack. current (r2) isn't initialized, smp_processor_id() + * will not work, current thread info isn't accessible, ... + */ + for (addr = PPC_PIN_SIZE; addr < lowmem_end_addr; + addr += PPC_PIN_SIZE) { + if (mmu_has_feature(MMU_FTR_TYPE_47x)) + ppc47x_pin_tlb(addr + PAGE_OFFSET, addr); + else + ppc44x_pin_tlb(addr + PAGE_OFFSET, addr); + } +} +#endif /* CONFIG_SMP */ diff --git a/arch/powerpc/mm/dma-noncoherent.c b/arch/powerpc/mm/dma-noncoherent.c index 36692f5c9a76..757c0bed9a91 100644 --- a/arch/powerpc/mm/dma-noncoherent.c +++ b/arch/powerpc/mm/dma-noncoherent.c @@ -23,6 +23,7 @@ */ #include <linux/sched.h> +#include <linux/slab.h> #include <linux/kernel.h> #include <linux/errno.h> #include <linux/string.h> diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 26fb6b990b0a..1bd712c33ce2 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -151,13 +151,14 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, if (!user_mode(regs) && (address >= TASK_SIZE)) return SIGSEGV; -#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE)) +#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE) || \ + defined(CONFIG_PPC_BOOK3S_64)) if (error_code & DSISR_DABRMATCH) { /* DABR match */ do_dabr(regs, address, error_code); return 0; } -#endif /* !(CONFIG_4xx || CONFIG_BOOKE)*/ +#endif if (in_atomic() || mm == NULL) { if (!user_mode(regs)) @@ -307,7 +308,6 @@ good_area: * make sure we exit gracefully rather than endlessly redo * the fault. */ - survive: ret = handle_mm_fault(mm, vma, address, is_write ? FAULT_FLAG_WRITE : 0); if (unlikely(ret & VM_FAULT_ERROR)) { if (ret & VM_FAULT_OOM) @@ -359,15 +359,10 @@ bad_area_nosemaphore: */ out_of_memory: up_read(&mm->mmap_sem); - if (is_global_init(current)) { - yield(); - down_read(&mm->mmap_sem); - goto survive; - } - printk("VM: killing process %s\n", current->comm); - if (user_mode(regs)) - do_group_exit(SIGKILL); - return SIGKILL; + if (!user_mode(regs)) + return SIGKILL; + pagefault_out_of_memory(); + return 0; do_sigbus: up_read(&mm->mmap_sem); diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c index c5394728bf2e..cdc7526e9c93 100644 --- a/arch/powerpc/mm/fsl_booke_mmu.c +++ b/arch/powerpc/mm/fsl_booke_mmu.c @@ -2,7 +2,7 @@ * Modifications by Kumar Gala (galak@kernel.crashing.org) to support * E500 Book E processors. * - * Copyright 2004 Freescale Semiconductor, Inc + * Copyright 2004,2010 Freescale Semiconductor, Inc. * * This file contains the routines for initializing the MMU * on the 4xx series of chips. @@ -56,19 +56,13 @@ unsigned int tlbcam_index; -#define NUM_TLBCAMS (64) #if defined(CONFIG_LOWMEM_CAM_NUM_BOOL) && (CONFIG_LOWMEM_CAM_NUM >= NUM_TLBCAMS) #error "LOWMEM_CAM_NUM must be less than NUM_TLBCAMS" #endif -struct tlbcam { - u32 MAS0; - u32 MAS1; - unsigned long MAS2; - u32 MAS3; - u32 MAS7; -} TLBCAM[NUM_TLBCAMS]; +#define NUM_TLBCAMS (64) +struct tlbcam TLBCAM[NUM_TLBCAMS]; struct tlbcamrange { unsigned long start; @@ -109,19 +103,6 @@ unsigned long p_mapped_by_tlbcam(phys_addr_t pa) return 0; } -void loadcam_entry(int idx) -{ - mtspr(SPRN_MAS0, TLBCAM[idx].MAS0); - mtspr(SPRN_MAS1, TLBCAM[idx].MAS1); - mtspr(SPRN_MAS2, TLBCAM[idx].MAS2); - mtspr(SPRN_MAS3, TLBCAM[idx].MAS3); - - if (cur_cpu_spec->cpu_features & MMU_FTR_BIG_PHYS) - mtspr(SPRN_MAS7, TLBCAM[idx].MAS7); - - asm volatile("isync;tlbwe;isync" : : : "memory"); -} - /* * Set up one of the I/D BAT (block address translation) register pairs. * The parameters are not checked; in particular size must be a power @@ -152,18 +133,13 @@ static void settlbcam(int index, unsigned long virt, phys_addr_t phys, TLBCAM[index].MAS3 = (phys & MAS3_RPN) | MAS3_SX | MAS3_SR; TLBCAM[index].MAS3 |= ((flags & _PAGE_RW) ? MAS3_SW : 0); - if (cur_cpu_spec->cpu_features & MMU_FTR_BIG_PHYS) + if (mmu_has_feature(MMU_FTR_BIG_PHYS)) TLBCAM[index].MAS7 = (u64)phys >> 32; -#ifndef CONFIG_KGDB /* want user access for breakpoints */ if (flags & _PAGE_USER) { TLBCAM[index].MAS3 |= MAS3_UX | MAS3_UR; TLBCAM[index].MAS3 |= ((flags & _PAGE_RW) ? MAS3_UW : 0); } -#else - TLBCAM[index].MAS3 |= MAS3_UX | MAS3_UR; - TLBCAM[index].MAS3 |= ((flags & _PAGE_RW) ? MAS3_UW : 0); -#endif tlbcam_addrs[index].start = virt; tlbcam_addrs[index].limit = virt + size - 1; diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S index a719f53921a5..3079f6b44cf5 100644 --- a/arch/powerpc/mm/hash_low_64.S +++ b/arch/powerpc/mm/hash_low_64.S @@ -68,9 +68,6 @@ _GLOBAL(__hash_page_4K) std r8,STK_PARM(r8)(r1) std r9,STK_PARM(r9)(r1) - /* Add _PAGE_PRESENT to access */ - ori r4,r4,_PAGE_PRESENT - /* Save non-volatile registers. * r31 will hold "old PTE" * r30 is "new PTE" @@ -347,9 +344,6 @@ _GLOBAL(__hash_page_4K) std r8,STK_PARM(r8)(r1) std r9,STK_PARM(r9)(r1) - /* Add _PAGE_PRESENT to access */ - ori r4,r4,_PAGE_PRESENT - /* Save non-volatile registers. * r31 will hold "old PTE" * r30 is "new PTE" @@ -687,9 +681,6 @@ _GLOBAL(__hash_page_64K) std r8,STK_PARM(r8)(r1) std r9,STK_PARM(r9)(r1) - /* Add _PAGE_PRESENT to access */ - ori r4,r4,_PAGE_PRESENT - /* Save non-volatile registers. * r31 will hold "old PTE" * r30 is "new PTE" diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index 056d23a1b105..784a400e0781 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -37,7 +37,7 @@ #define HPTE_LOCK_BIT 3 -static DEFINE_SPINLOCK(native_tlbie_lock); +static DEFINE_RAW_SPINLOCK(native_tlbie_lock); static inline void __tlbie(unsigned long va, int psize, int ssize) { @@ -104,7 +104,7 @@ static inline void tlbie(unsigned long va, int psize, int ssize, int local) if (use_local) use_local = mmu_psize_defs[psize].tlbiel; if (lock_tlbie && !use_local) - spin_lock(&native_tlbie_lock); + raw_spin_lock(&native_tlbie_lock); asm volatile("ptesync": : :"memory"); if (use_local) { __tlbiel(va, psize, ssize); @@ -114,7 +114,7 @@ static inline void tlbie(unsigned long va, int psize, int ssize, int local) asm volatile("eieio; tlbsync; ptesync": : :"memory"); } if (lock_tlbie && !use_local) - spin_unlock(&native_tlbie_lock); + raw_spin_unlock(&native_tlbie_lock); } static inline void native_lock_hpte(struct hash_pte *hptep) @@ -122,7 +122,7 @@ static inline void native_lock_hpte(struct hash_pte *hptep) unsigned long *word = &hptep->v; while (1) { - if (!test_and_set_bit(HPTE_LOCK_BIT, word)) + if (!test_and_set_bit_lock(HPTE_LOCK_BIT, word)) break; while(test_bit(HPTE_LOCK_BIT, word)) cpu_relax(); @@ -133,8 +133,7 @@ static inline void native_unlock_hpte(struct hash_pte *hptep) { unsigned long *word = &hptep->v; - asm volatile("lwsync":::"memory"); - clear_bit(HPTE_LOCK_BIT, word); + clear_bit_unlock(HPTE_LOCK_BIT, word); } static long native_hpte_insert(unsigned long hpte_group, unsigned long va, @@ -434,7 +433,7 @@ static void native_hpte_clear(void) /* we take the tlbie lock and hold it. Some hardware will * deadlock if we try to tlbie from two processors at once. */ - spin_lock(&native_tlbie_lock); + raw_spin_lock(&native_tlbie_lock); slots = pteg_count * HPTES_PER_GROUP; @@ -458,7 +457,7 @@ static void native_hpte_clear(void) } asm volatile("eieio; tlbsync; ptesync":::"memory"); - spin_unlock(&native_tlbie_lock); + raw_spin_unlock(&native_tlbie_lock); local_irq_restore(flags); } @@ -521,7 +520,7 @@ static void native_flush_hash_range(unsigned long number, int local) int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE); if (lock_tlbie) - spin_lock(&native_tlbie_lock); + raw_spin_lock(&native_tlbie_lock); asm volatile("ptesync":::"memory"); for (i = 0; i < number; i++) { @@ -536,7 +535,7 @@ static void native_flush_hash_range(unsigned long number, int local) asm volatile("eieio; tlbsync; ptesync":::"memory"); if (lock_tlbie) - spin_unlock(&native_tlbie_lock); + raw_spin_unlock(&native_tlbie_lock); } local_irq_restore(flags); diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 3ecdcec0a39e..09dffe6efa46 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -31,7 +31,7 @@ #include <linux/cache.h> #include <linux/init.h> #include <linux/signal.h> -#include <linux/lmb.h> +#include <linux/memblock.h> #include <asm/processor.h> #include <asm/pgtable.h> @@ -384,8 +384,8 @@ static int __init htab_dt_scan_hugepage_blocks(unsigned long node, printk(KERN_INFO "Huge page(16GB) memory: " "addr = 0x%lX size = 0x%lX pages = %d\n", phys_addr, block_size, expected_pages); - if (phys_addr + (16 * GB) <= lmb_end_of_DRAM()) { - lmb_reserve(phys_addr, block_size * expected_pages); + if (phys_addr + (16 * GB) <= memblock_end_of_DRAM()) { + memblock_reserve(phys_addr, block_size * expected_pages); add_gpage(phys_addr, block_size, expected_pages); } return 0; @@ -458,7 +458,7 @@ static void __init htab_init_page_sizes(void) * and we have at least 1G of RAM at boot */ if (mmu_psize_defs[MMU_PAGE_16M].shift && - lmb_phys_mem_size() >= 0x40000000) + memblock_phys_mem_size() >= 0x40000000) mmu_vmemmap_psize = MMU_PAGE_16M; else if (mmu_psize_defs[MMU_PAGE_64K].shift) mmu_vmemmap_psize = MMU_PAGE_64K; @@ -520,7 +520,7 @@ static unsigned long __init htab_get_table_size(void) return 1UL << ppc64_pft_size; /* round mem_size up to next power of 2 */ - mem_size = lmb_phys_mem_size(); + mem_size = memblock_phys_mem_size(); rnd_mem_size = 1UL << __ilog2(mem_size); if (rnd_mem_size < mem_size) rnd_mem_size <<= 1; @@ -627,7 +627,7 @@ static void __init htab_initialize(void) else limit = 0; - table = lmb_alloc_base(htab_size_bytes, htab_size_bytes, limit); + table = memblock_alloc_base(htab_size_bytes, htab_size_bytes, limit); DBG("Hash table allocated at %lx, size: %lx\n", table, htab_size_bytes); @@ -647,9 +647,9 @@ static void __init htab_initialize(void) prot = pgprot_val(PAGE_KERNEL); #ifdef CONFIG_DEBUG_PAGEALLOC - linear_map_hash_count = lmb_end_of_DRAM() >> PAGE_SHIFT; - linear_map_hash_slots = __va(lmb_alloc_base(linear_map_hash_count, - 1, lmb.rmo_size)); + linear_map_hash_count = memblock_end_of_DRAM() >> PAGE_SHIFT; + linear_map_hash_slots = __va(memblock_alloc_base(linear_map_hash_count, + 1, memblock.rmo_size)); memset(linear_map_hash_slots, 0, linear_map_hash_count); #endif /* CONFIG_DEBUG_PAGEALLOC */ @@ -659,16 +659,16 @@ static void __init htab_initialize(void) */ /* create bolted the linear mapping in the hash table */ - for (i=0; i < lmb.memory.cnt; i++) { - base = (unsigned long)__va(lmb.memory.region[i].base); - size = lmb.memory.region[i].size; + for (i=0; i < memblock.memory.cnt; i++) { + base = (unsigned long)__va(memblock.memory.region[i].base); + size = memblock.memory.region[i].size; DBG("creating mapping for region: %lx..%lx (prot: %lx)\n", base, size, prot); #ifdef CONFIG_U3_DART /* Do not map the DART space. Fortunately, it will be aligned - * in such a way that it will not cross two lmb regions and + * in such a way that it will not cross two memblock regions and * will fit within a single 16Mb page. * The DART space is assumed to be a full 16Mb region even if * we only use 2Mb of that space. We will use more of it later @@ -871,6 +871,18 @@ static inline int subpage_protection(struct mm_struct *mm, unsigned long ea) } #endif +void hash_failure_debug(unsigned long ea, unsigned long access, + unsigned long vsid, unsigned long trap, + int ssize, int psize, unsigned long pte) +{ + if (!printk_ratelimit()) + return; + pr_info("mm: Hashing failure ! EA=0x%lx access=0x%lx current=%s\n", + ea, access, current->comm); + pr_info(" trap=0x%lx vsid=0x%lx ssize=%d psize=%d pte=0x%lx\n", + trap, vsid, ssize, psize, pte); +} + /* Result code is: * 0 - handled * 1 - normal page fault @@ -955,6 +967,17 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) return 1; } + /* Add _PAGE_PRESENT to the required access perm */ + access |= _PAGE_PRESENT; + + /* Pre-check access permissions (will be re-checked atomically + * in __hash_page_XX but this pre-check is a fast path + */ + if (access & ~pte_val(*ptep)) { + DBG_LOW(" no access !\n"); + return 1; + } + #ifdef CONFIG_HUGETLB_PAGE if (hugeshift) return __hash_page_huge(ea, access, vsid, ptep, trap, local, @@ -967,14 +990,6 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) DBG_LOW(" i-pte: %016lx %016lx\n", pte_val(*ptep), pte_val(*(ptep + PTRS_PER_PTE))); #endif - /* Pre-check access permissions (will be re-checked atomically - * in __hash_page_XX but this pre-check is a fast path - */ - if (access & ~pte_val(*ptep)) { - DBG_LOW(" no access !\n"); - return 1; - } - /* Do actual hashing */ #ifdef CONFIG_PPC_64K_PAGES /* If _PAGE_4K_PFN is set, make sure this is a 4k segment */ @@ -1033,6 +1048,12 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) local, ssize, spp); } + /* Dump some info in case of hash insertion failure, they should + * never happen so it is really useful to know if/when they do + */ + if (rc == -1) + hash_failure_debug(ea, access, vsid, trap, ssize, psize, + pte_val(*ptep)); #ifndef CONFIG_PPC_64K_PAGES DBG_LOW(" o-pte: %016lx\n", pte_val(*ptep)); #else @@ -1051,8 +1072,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea, void *pgdir; pte_t *ptep; unsigned long flags; - int local = 0; - int ssize; + int rc, ssize, local = 0; BUG_ON(REGION_ID(ea) != USER_REGION_ID); @@ -1098,11 +1118,18 @@ void hash_preload(struct mm_struct *mm, unsigned long ea, /* Hash it in */ #ifdef CONFIG_PPC_HAS_HASH_64K if (mm->context.user_psize == MMU_PAGE_64K) - __hash_page_64K(ea, access, vsid, ptep, trap, local, ssize); + rc = __hash_page_64K(ea, access, vsid, ptep, trap, local, ssize); else #endif /* CONFIG_PPC_HAS_HASH_64K */ - __hash_page_4K(ea, access, vsid, ptep, trap, local, ssize, - subpage_protection(pgdir, ea)); + rc = __hash_page_4K(ea, access, vsid, ptep, trap, local, ssize, + subpage_protection(pgdir, ea)); + + /* Dump some info in case of hash insertion failure, they should + * never happen so it is really useful to know if/when they do + */ + if (rc == -1) + hash_failure_debug(ea, access, vsid, trap, ssize, + mm->context.user_psize, pte_val(*ptep)); local_irq_restore(flags); } diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c index 199539882f92..cc5c273086cf 100644 --- a/arch/powerpc/mm/hugetlbpage-hash64.c +++ b/arch/powerpc/mm/hugetlbpage-hash64.c @@ -21,21 +21,13 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, unsigned long old_pte, new_pte; unsigned long va, rflags, pa, sz; long slot; - int err = 1; BUG_ON(shift != mmu_psize_defs[mmu_psize].shift); /* Search the Linux page table for a match with va */ va = hpt_va(ea, vsid, ssize); - /* - * Check the user's access rights to the page. If access should be - * prevented then send the problem up to do_page_fault. - */ - if (unlikely(access & ~pte_val(*ptep))) - goto out; - /* - * At this point, we have a pte (old_pte) which can be used to build + /* At this point, we have a pte (old_pte) which can be used to build * or update an HPTE. There are 2 cases: * * 1. There is a valid (present) pte with no associated HPTE (this is @@ -49,9 +41,17 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, do { old_pte = pte_val(*ptep); - if (old_pte & _PAGE_BUSY) - goto out; + /* If PTE busy, retry the access */ + if (unlikely(old_pte & _PAGE_BUSY)) + return 0; + /* If PTE permissions don't match, take page fault */ + if (unlikely(access & ~old_pte)) + return 1; + /* Try to lock the PTE, add ACCESSED and DIRTY if it was + * a write access */ new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED; + if (access & _PAGE_RW) + new_pte |= _PAGE_DIRTY; } while(old_pte != __cmpxchg_u64((unsigned long *)ptep, old_pte, new_pte)); @@ -121,8 +121,16 @@ repeat: } } - if (unlikely(slot == -2)) - panic("hash_huge_page: pte_insert failed\n"); + /* + * Hypervisor failure. Restore old pte and return -1 + * similar to __hash_page_* + */ + if (unlikely(slot == -2)) { + *ptep = __pte(old_pte); + hash_failure_debug(ea, access, vsid, trap, ssize, + mmu_psize, old_pte); + return -1; + } new_pte |= (slot << 12) & (_PAGE_F_SECOND | _PAGE_F_GIX); } @@ -131,9 +139,5 @@ repeat: * No need to use ldarx/stdcx here */ *ptep = __pte(new_pte & ~_PAGE_BUSY); - - err = 0; - - out: - return err; + return 0; } diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 123f7070238a..9bb249c3046e 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -9,6 +9,7 @@ #include <linux/mm.h> #include <linux/io.h> +#include <linux/slab.h> #include <linux/hugetlb.h> #include <asm/pgtable.h> #include <asm/pgalloc.h> diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index 4ec900af332f..6a6975dc2654 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -30,7 +30,8 @@ #include <linux/highmem.h> #include <linux/initrd.h> #include <linux/pagemap.h> -#include <linux/lmb.h> +#include <linux/memblock.h> +#include <linux/gfp.h> #include <asm/pgalloc.h> #include <asm/prom.h> @@ -47,7 +48,7 @@ #include "mmu_decl.h" #if defined(CONFIG_KERNEL_START_BOOL) || defined(CONFIG_LOWMEM_SIZE_BOOL) -/* The ammount of lowmem must be within 0xF0000000 - KERNELBASE. */ +/* The amount of lowmem must be within 0xF0000000 - KERNELBASE. */ #if (CONFIG_LOWMEM_SIZE > (0xF0000000 - PAGE_OFFSET)) #error "You must adjust CONFIG_LOWMEM_SIZE or CONFIG_START_KERNEL" #endif @@ -135,17 +136,17 @@ void __init MMU_init(void) /* parse args from command line */ MMU_setup(); - if (lmb.memory.cnt > 1) { + if (memblock.memory.cnt > 1) { #ifndef CONFIG_WII - lmb.memory.cnt = 1; - lmb_analyze(); + memblock.memory.cnt = 1; + memblock_analyze(); printk(KERN_WARNING "Only using first contiguous memory region"); #else wii_memory_fixups(); #endif } - total_lowmem = total_memory = lmb_end_of_DRAM() - memstart_addr; + total_lowmem = total_memory = memblock_end_of_DRAM() - memstart_addr; lowmem_end_addr = memstart_addr + total_lowmem; #ifdef CONFIG_FSL_BOOKE @@ -160,8 +161,8 @@ void __init MMU_init(void) lowmem_end_addr = memstart_addr + total_lowmem; #ifndef CONFIG_HIGHMEM total_memory = total_lowmem; - lmb_enforce_memory_limit(lowmem_end_addr); - lmb_analyze(); + memblock_enforce_memory_limit(lowmem_end_addr); + memblock_analyze(); #endif /* CONFIG_HIGHMEM */ } @@ -199,7 +200,7 @@ void __init *early_get_page(void) if (init_bootmem_done) { p = alloc_bootmem_pages(PAGE_SIZE); } else { - p = __va(lmb_alloc_base(PAGE_SIZE, PAGE_SIZE, + p = __va(memblock_alloc_base(PAGE_SIZE, PAGE_SIZE, __initial_memory_limit_addr)); } return p; diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 776f28d02b6b..71f1415e2472 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -40,8 +40,9 @@ #include <linux/nodemask.h> #include <linux/module.h> #include <linux/poison.h> -#include <linux/lmb.h> +#include <linux/memblock.h> #include <linux/hugetlb.h> +#include <linux/slab.h> #include <asm/pgalloc.h> #include <asm/page.h> @@ -251,6 +252,47 @@ static void __meminit vmemmap_create_mapping(unsigned long start, } #endif /* CONFIG_PPC_BOOK3E */ +struct vmemmap_backing *vmemmap_list; + +static __meminit struct vmemmap_backing * vmemmap_list_alloc(int node) +{ + static struct vmemmap_backing *next; + static int num_left; + + /* allocate a page when required and hand out chunks */ + if (!next || !num_left) { + next = vmemmap_alloc_block(PAGE_SIZE, node); + if (unlikely(!next)) { + WARN_ON(1); + return NULL; + } + num_left = PAGE_SIZE / sizeof(struct vmemmap_backing); + } + + num_left--; + + return next++; +} + +static __meminit void vmemmap_list_populate(unsigned long phys, + unsigned long start, + int node) +{ + struct vmemmap_backing *vmem_back; + + vmem_back = vmemmap_list_alloc(node); + if (unlikely(!vmem_back)) { + WARN_ON(1); + return; + } + + vmem_back->phys = phys; + vmem_back->virt_addr = start; + vmem_back->list = vmemmap_list; + + vmemmap_list = vmem_back; +} + int __meminit vmemmap_populate(struct page *start_page, unsigned long nr_pages, int node) { @@ -275,6 +317,8 @@ int __meminit vmemmap_populate(struct page *start_page, if (!p) return -ENOMEM; + vmemmap_list_populate(__pa(p), start, node); + pr_debug(" * %016lx..%016lx allocated at %p\n", start, start + page_size, p); diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index b9b152558f9c..1a84a8d00005 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -22,6 +22,7 @@ #include <linux/kernel.h> #include <linux/errno.h> #include <linux/string.h> +#include <linux/gfp.h> #include <linux/types.h> #include <linux/mm.h> #include <linux/stddef.h> @@ -31,7 +32,7 @@ #include <linux/initrd.h> #include <linux/pagemap.h> #include <linux/suspend.h> -#include <linux/lmb.h> +#include <linux/memblock.h> #include <linux/hugetlb.h> #include <asm/pgalloc.h> @@ -48,6 +49,7 @@ #include <asm/sparsemem.h> #include <asm/vdso.h> #include <asm/fixmap.h> +#include <asm/swiotlb.h> #include "mmu_decl.h" @@ -81,13 +83,13 @@ int page_is_ram(unsigned long pfn) #else unsigned long paddr = (pfn << PAGE_SHIFT); int i; - for (i=0; i < lmb.memory.cnt; i++) { + for (i=0; i < memblock.memory.cnt; i++) { unsigned long base; - base = lmb.memory.region[i].base; + base = memblock.memory.region[i].base; if ((paddr >= base) && - (paddr < (base + lmb.memory.region[i].size))) { + (paddr < (base + memblock.memory.region[i].size))) { return 1; } } @@ -140,14 +142,14 @@ int arch_add_memory(int nid, u64 start, u64 size) /* * walk_memory_resource() needs to make sure there is no holes in a given * memory range. PPC64 does not maintain the memory layout in /proc/iomem. - * Instead it maintains it in lmb.memory structures. Walk through the + * Instead it maintains it in memblock.memory structures. Walk through the * memory regions, find holes and callback for contiguous regions. */ int walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages, void *arg, int (*func)(unsigned long, unsigned long, void *)) { - struct lmb_property res; + struct memblock_property res; unsigned long pfn, len; u64 end; int ret = -1; @@ -156,7 +158,7 @@ walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages, res.size = (u64) nr_pages << PAGE_SHIFT; end = res.base + res.size - 1; - while ((res.base < end) && (lmb_find(&res) >= 0)) { + while ((res.base < end) && (memblock_find(&res) >= 0)) { pfn = (unsigned long)(res.base >> PAGE_SHIFT); len = (unsigned long)(res.size >> PAGE_SHIFT); ret = (*func)(pfn, len, arg); @@ -182,8 +184,8 @@ void __init do_init_bootmem(void) unsigned long total_pages; int boot_mapsize; - max_low_pfn = max_pfn = lmb_end_of_DRAM() >> PAGE_SHIFT; - total_pages = (lmb_end_of_DRAM() - memstart_addr) >> PAGE_SHIFT; + max_low_pfn = max_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT; + total_pages = (memblock_end_of_DRAM() - memstart_addr) >> PAGE_SHIFT; #ifdef CONFIG_HIGHMEM total_pages = total_lowmem >> PAGE_SHIFT; max_low_pfn = lowmem_end_addr >> PAGE_SHIFT; @@ -196,16 +198,16 @@ void __init do_init_bootmem(void) */ bootmap_pages = bootmem_bootmap_pages(total_pages); - start = lmb_alloc(bootmap_pages << PAGE_SHIFT, PAGE_SIZE); + start = memblock_alloc(bootmap_pages << PAGE_SHIFT, PAGE_SIZE); min_low_pfn = MEMORY_START >> PAGE_SHIFT; boot_mapsize = init_bootmem_node(NODE_DATA(0), start >> PAGE_SHIFT, min_low_pfn, max_low_pfn); /* Add active regions with valid PFNs */ - for (i = 0; i < lmb.memory.cnt; i++) { + for (i = 0; i < memblock.memory.cnt; i++) { unsigned long start_pfn, end_pfn; - start_pfn = lmb.memory.region[i].base >> PAGE_SHIFT; - end_pfn = start_pfn + lmb_size_pages(&lmb.memory, i); + start_pfn = memblock.memory.region[i].base >> PAGE_SHIFT; + end_pfn = start_pfn + memblock_size_pages(&memblock.memory, i); add_active_range(0, start_pfn, end_pfn); } @@ -216,17 +218,17 @@ void __init do_init_bootmem(void) free_bootmem_with_active_regions(0, lowmem_end_addr >> PAGE_SHIFT); /* reserve the sections we're already using */ - for (i = 0; i < lmb.reserved.cnt; i++) { - unsigned long addr = lmb.reserved.region[i].base + - lmb_size_bytes(&lmb.reserved, i) - 1; + for (i = 0; i < memblock.reserved.cnt; i++) { + unsigned long addr = memblock.reserved.region[i].base + + memblock_size_bytes(&memblock.reserved, i) - 1; if (addr < lowmem_end_addr) - reserve_bootmem(lmb.reserved.region[i].base, - lmb_size_bytes(&lmb.reserved, i), + reserve_bootmem(memblock.reserved.region[i].base, + memblock_size_bytes(&memblock.reserved, i), BOOTMEM_DEFAULT); - else if (lmb.reserved.region[i].base < lowmem_end_addr) { + else if (memblock.reserved.region[i].base < lowmem_end_addr) { unsigned long adjusted_size = lowmem_end_addr - - lmb.reserved.region[i].base; - reserve_bootmem(lmb.reserved.region[i].base, + memblock.reserved.region[i].base; + reserve_bootmem(memblock.reserved.region[i].base, adjusted_size, BOOTMEM_DEFAULT); } } @@ -234,9 +236,9 @@ void __init do_init_bootmem(void) free_bootmem_with_active_regions(0, max_pfn); /* reserve the sections we're already using */ - for (i = 0; i < lmb.reserved.cnt; i++) - reserve_bootmem(lmb.reserved.region[i].base, - lmb_size_bytes(&lmb.reserved, i), + for (i = 0; i < memblock.reserved.cnt; i++) + reserve_bootmem(memblock.reserved.region[i].base, + memblock_size_bytes(&memblock.reserved, i), BOOTMEM_DEFAULT); #endif @@ -249,20 +251,20 @@ void __init do_init_bootmem(void) /* mark pages that don't exist as nosave */ static int __init mark_nonram_nosave(void) { - unsigned long lmb_next_region_start_pfn, - lmb_region_max_pfn; + unsigned long memblock_next_region_start_pfn, + memblock_region_max_pfn; int i; - for (i = 0; i < lmb.memory.cnt - 1; i++) { - lmb_region_max_pfn = - (lmb.memory.region[i].base >> PAGE_SHIFT) + - (lmb.memory.region[i].size >> PAGE_SHIFT); - lmb_next_region_start_pfn = - lmb.memory.region[i+1].base >> PAGE_SHIFT; + for (i = 0; i < memblock.memory.cnt - 1; i++) { + memblock_region_max_pfn = + (memblock.memory.region[i].base >> PAGE_SHIFT) + + (memblock.memory.region[i].size >> PAGE_SHIFT); + memblock_next_region_start_pfn = + memblock.memory.region[i+1].base >> PAGE_SHIFT; - if (lmb_region_max_pfn < lmb_next_region_start_pfn) - register_nosave_region(lmb_region_max_pfn, - lmb_next_region_start_pfn); + if (memblock_region_max_pfn < memblock_next_region_start_pfn) + register_nosave_region(memblock_region_max_pfn, + memblock_next_region_start_pfn); } return 0; @@ -273,8 +275,8 @@ static int __init mark_nonram_nosave(void) */ void __init paging_init(void) { - unsigned long total_ram = lmb_phys_mem_size(); - phys_addr_t top_of_ram = lmb_end_of_DRAM(); + unsigned long total_ram = memblock_phys_mem_size(); + phys_addr_t top_of_ram = memblock_end_of_DRAM(); unsigned long max_zone_pfns[MAX_NR_ZONES]; #ifdef CONFIG_PPC32 @@ -320,7 +322,12 @@ void __init mem_init(void) struct page *page; unsigned long reservedpages = 0, codesize, initsize, datasize, bsssize; - num_physpages = lmb.memory.size >> PAGE_SHIFT; +#ifdef CONFIG_SWIOTLB + if (ppc_swiotlb_enable) + swiotlb_init(1); +#endif + + num_physpages = memblock.memory.size >> PAGE_SHIFT; high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); #ifdef CONFIG_NEED_MULTIPLE_NODES @@ -357,7 +364,7 @@ void __init mem_init(void) highmem_mapnr = lowmem_end_addr >> PAGE_SHIFT; for (pfn = highmem_mapnr; pfn < max_mapnr; ++pfn) { struct page *page = pfn_to_page(pfn); - if (lmb_is_reserved(pfn << PAGE_SHIFT)) + if (memblock_is_reserved(pfn << PAGE_SHIFT)) continue; ClearPageReserved(page); init_page_count(page); @@ -494,13 +501,13 @@ EXPORT_SYMBOL(flush_icache_user_range); * This must always be called with the pte lock held. */ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, - pte_t pte) + pte_t *ptep) { #ifdef CONFIG_PPC_STD_MMU unsigned long access = 0, trap; /* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */ - if (!pte_young(pte) || address >= TASK_SIZE) + if (!pte_young(*ptep) || address >= TASK_SIZE) return; /* We try to figure out if we are coming from an instruction diff --git a/arch/powerpc/mm/mmu_context_hash32.c b/arch/powerpc/mm/mmu_context_hash32.c index 0dfba2bf7f31..d0ee554e86e4 100644 --- a/arch/powerpc/mm/mmu_context_hash32.c +++ b/arch/powerpc/mm/mmu_context_hash32.c @@ -60,11 +60,7 @@ static unsigned long next_mmu_context; static unsigned long context_map[LAST_CONTEXT / BITS_PER_LONG + 1]; - -/* - * Set up the context for a new address space. - */ -int init_new_context(struct task_struct *t, struct mm_struct *mm) +unsigned long __init_new_context(void) { unsigned long ctx = next_mmu_context; @@ -74,19 +70,38 @@ int init_new_context(struct task_struct *t, struct mm_struct *mm) ctx = 0; } next_mmu_context = (ctx + 1) & LAST_CONTEXT; - mm->context.id = ctx; + + return ctx; +} +EXPORT_SYMBOL_GPL(__init_new_context); + +/* + * Set up the context for a new address space. + */ +int init_new_context(struct task_struct *t, struct mm_struct *mm) +{ + mm->context.id = __init_new_context(); return 0; } /* + * Free a context ID. Make sure to call this with preempt disabled! + */ +void __destroy_context(unsigned long ctx) +{ + clear_bit(ctx, context_map); +} +EXPORT_SYMBOL_GPL(__destroy_context); + +/* * We're finished using the context for an address space. */ void destroy_context(struct mm_struct *mm) { preempt_disable(); if (mm->context.id != NO_CONTEXT) { - clear_bit(mm->context.id, context_map); + __destroy_context(mm->context.id); mm->context.id = NO_CONTEXT; } preempt_enable(); diff --git a/arch/powerpc/mm/mmu_context_hash64.c b/arch/powerpc/mm/mmu_context_hash64.c index b910d37aea1a..2535828aa84b 100644 --- a/arch/powerpc/mm/mmu_context_hash64.c +++ b/arch/powerpc/mm/mmu_context_hash64.c @@ -19,11 +19,12 @@ #include <linux/spinlock.h> #include <linux/idr.h> #include <linux/module.h> +#include <linux/gfp.h> #include <asm/mmu_context.h> static DEFINE_SPINLOCK(mmu_context_lock); -static DEFINE_IDR(mmu_context_idr); +static DEFINE_IDA(mmu_context_ida); /* * The proto-VSID space has 2^35 - 1 segments available for user mappings. @@ -39,11 +40,11 @@ int __init_new_context(void) int err; again: - if (!idr_pre_get(&mmu_context_idr, GFP_KERNEL)) + if (!ida_pre_get(&mmu_context_ida, GFP_KERNEL)) return -ENOMEM; spin_lock(&mmu_context_lock); - err = idr_get_new_above(&mmu_context_idr, NULL, 1, &index); + err = ida_get_new_above(&mmu_context_ida, 1, &index); spin_unlock(&mmu_context_lock); if (err == -EAGAIN) @@ -53,7 +54,7 @@ again: if (index > MAX_CONTEXT) { spin_lock(&mmu_context_lock); - idr_remove(&mmu_context_idr, index); + ida_remove(&mmu_context_ida, index); spin_unlock(&mmu_context_lock); return -ENOMEM; } @@ -85,7 +86,7 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) void __destroy_context(int context_id) { spin_lock(&mmu_context_lock); - idr_remove(&mmu_context_idr, context_id); + ida_remove(&mmu_context_ida, context_id); spin_unlock(&mmu_context_lock); } EXPORT_SYMBOL_GPL(__destroy_context); diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c index 1044a634b6d0..ddfd7ad4e1d6 100644 --- a/arch/powerpc/mm/mmu_context_nohash.c +++ b/arch/powerpc/mm/mmu_context_nohash.c @@ -47,6 +47,7 @@ #include <linux/bootmem.h> #include <linux/notifier.h> #include <linux/cpu.h> +#include <linux/slab.h> #include <asm/mmu_context.h> #include <asm/tlbflush.h> @@ -56,7 +57,7 @@ static unsigned int next_context, nr_free_contexts; static unsigned long *context_map; static unsigned long *stale_map[NR_CPUS]; static struct mm_struct **context_mm; -static DEFINE_SPINLOCK(context_lock); +static DEFINE_RAW_SPINLOCK(context_lock); #define CTX_MAP_SIZE \ (sizeof(unsigned long) * (last_context / BITS_PER_LONG + 1)) @@ -121,9 +122,9 @@ static unsigned int steal_context_smp(unsigned int id) /* This will happen if you have more CPUs than available contexts, * all we can do here is wait a bit and try again */ - spin_unlock(&context_lock); + raw_spin_unlock(&context_lock); cpu_relax(); - spin_lock(&context_lock); + raw_spin_lock(&context_lock); /* This will cause the caller to try again */ return MMU_NO_CONTEXT; @@ -194,7 +195,7 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next) unsigned long *map; /* No lockless fast path .. yet */ - spin_lock(&context_lock); + raw_spin_lock(&context_lock); pr_hard("[%d] activating context for mm @%p, active=%d, id=%d", cpu, next, next->context.active, next->context.id); @@ -278,7 +279,7 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next) /* Flick the MMU and release lock */ pr_hardcont(" -> %d\n", id); set_context(id, next->pgd); - spin_unlock(&context_lock); + raw_spin_unlock(&context_lock); } /* @@ -307,7 +308,7 @@ void destroy_context(struct mm_struct *mm) WARN_ON(mm->context.active != 0); - spin_lock_irqsave(&context_lock, flags); + raw_spin_lock_irqsave(&context_lock, flags); id = mm->context.id; if (id != MMU_NO_CONTEXT) { __clear_bit(id, context_map); @@ -318,7 +319,7 @@ void destroy_context(struct mm_struct *mm) context_mm[id] = NULL; nr_free_contexts++; } - spin_unlock_irqrestore(&context_lock, flags); + raw_spin_unlock_irqrestore(&context_lock, flags); } #ifdef CONFIG_SMP @@ -394,10 +395,18 @@ void __init mmu_context_init(void) * the PID/TID comparison is disabled, so we can use a TID of zero * to represent all kernel pages as shared among all contexts. * -- Dan + * + * The IBM 47x core supports 16-bit PIDs, thus 65535 contexts. We + * should normally never have to steal though the facility is + * present if needed. + * -- BenH */ if (mmu_has_feature(MMU_FTR_TYPE_8xx)) { first_context = 0; last_context = 15; + } else if (mmu_has_feature(MMU_FTR_TYPE_47x)) { + first_context = 1; + last_context = 65535; } else { first_context = 1; last_context = 255; diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index d49a77503e19..63b84a0d3b10 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -69,12 +69,7 @@ static inline void _tlbil_va(unsigned long address, unsigned int pid, } #endif /* CONIFG_8xx */ -/* - * As of today, we don't support tlbivax broadcast on any - * implementation. When that becomes the case, this will be - * an extern. - */ -#ifdef CONFIG_PPC_BOOK3E +#if defined(CONFIG_PPC_BOOK3E) || defined(CONFIG_PPC_47x) extern void _tlbivax_bcast(unsigned long address, unsigned int pid, unsigned int tsize, unsigned int ind); #else @@ -149,7 +144,15 @@ extern unsigned long mmu_mapin_ram(unsigned long top); extern void MMU_init_hw(void); extern unsigned long mmu_mapin_ram(unsigned long top); extern void adjust_total_lowmem(void); - +extern void loadcam_entry(unsigned int index); + +struct tlbcam { + u32 MAS0; + u32 MAS1; + unsigned long MAS2; + u32 MAS3; + u32 MAS7; +}; #elif defined(CONFIG_PPC32) /* anything 32-bit except 4xx or 8xx */ extern void MMU_init_hw(void); diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index b037d95eeadc..aa731af720c0 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -17,7 +17,7 @@ #include <linux/nodemask.h> #include <linux/cpu.h> #include <linux/notifier.h> -#include <linux/lmb.h> +#include <linux/memblock.h> #include <linux/of.h> #include <linux/pfn.h> #include <asm/sparsemem.h> @@ -33,16 +33,41 @@ static int numa_debug; #define dbg(args...) if (numa_debug) { printk(KERN_INFO args); } int numa_cpu_lookup_table[NR_CPUS]; -cpumask_t numa_cpumask_lookup_table[MAX_NUMNODES]; +cpumask_var_t node_to_cpumask_map[MAX_NUMNODES]; struct pglist_data *node_data[MAX_NUMNODES]; EXPORT_SYMBOL(numa_cpu_lookup_table); -EXPORT_SYMBOL(numa_cpumask_lookup_table); +EXPORT_SYMBOL(node_to_cpumask_map); EXPORT_SYMBOL(node_data); static int min_common_depth; static int n_mem_addr_cells, n_mem_size_cells; +/* + * Allocate node_to_cpumask_map based on number of available nodes + * Requires node_possible_map to be valid. + * + * Note: node_to_cpumask() is not valid until after this is done. + */ +static void __init setup_node_to_cpumask_map(void) +{ + unsigned int node, num = 0; + + /* setup nr_node_ids if not done yet */ + if (nr_node_ids == MAX_NUMNODES) { + for_each_node_mask(node, node_possible_map) + num = node; + nr_node_ids = num + 1; + } + + /* allocate the map */ + for (node = 0; node < nr_node_ids; node++) + alloc_bootmem_cpumask_var(&node_to_cpumask_map[node]); + + /* cpumask_of_node() will now work */ + dbg("Node to cpumask map for %d nodes\n", nr_node_ids); +} + static int __cpuinit fake_numa_create_new_node(unsigned long end_pfn, unsigned int *nid) { @@ -138,8 +163,8 @@ static void __cpuinit map_cpu_to_node(int cpu, int node) dbg("adding cpu %d to node %d\n", cpu, node); - if (!(cpu_isset(cpu, numa_cpumask_lookup_table[node]))) - cpu_set(cpu, numa_cpumask_lookup_table[node]); + if (!(cpumask_test_cpu(cpu, node_to_cpumask_map[node]))) + cpumask_set_cpu(cpu, node_to_cpumask_map[node]); } #ifdef CONFIG_HOTPLUG_CPU @@ -149,8 +174,8 @@ static void unmap_cpu_from_node(unsigned long cpu) dbg("removing cpu %lu from node %d\n", cpu, node); - if (cpu_isset(cpu, numa_cpumask_lookup_table[node])) { - cpu_clear(cpu, numa_cpumask_lookup_table[node]); + if (cpumask_test_cpu(cpu, node_to_cpumask_map[node])) { + cpumask_set_cpu(cpu, node_to_cpumask_map[node]); } else { printk(KERN_ERR "WARNING: cpu %lu not found in node %d\n", cpu, node); @@ -242,10 +267,12 @@ EXPORT_SYMBOL_GPL(of_node_to_nid); */ static int __init find_min_common_depth(void) { - int depth; + int depth, index; const unsigned int *ref_points; struct device_node *rtas_root; unsigned int len; + struct device_node *chosen; + const char *vec5; rtas_root = of_find_node_by_path("/rtas"); @@ -258,11 +285,26 @@ static int __init find_min_common_depth(void) * configuration (should be all 0's) and the second is for a normal * NUMA configuration. */ + index = 1; ref_points = of_get_property(rtas_root, "ibm,associativity-reference-points", &len); + /* + * For form 1 affinity information we want the first field + */ +#define VEC5_AFFINITY_BYTE 5 +#define VEC5_AFFINITY 0x80 + chosen = of_find_node_by_path("/chosen"); + if (chosen) { + vec5 = of_get_property(chosen, "ibm,architecture-vec-5", NULL); + if (vec5 && (vec5[VEC5_AFFINITY_BYTE] & VEC5_AFFINITY)) { + dbg("Using form 1 affinity\n"); + index = 0; + } + } + if ((len >= 2 * sizeof(unsigned int)) && ref_points) { - depth = ref_points[1]; + depth = ref_points[index]; } else { dbg("NUMA: ibm,associativity-reference-points not found.\n"); depth = -1; @@ -309,7 +351,7 @@ struct of_drconf_cell { #define DRCONF_MEM_RESERVED 0x00000080 /* - * Read the next lmb list entry from the ibm,dynamic-memory property + * Read the next memblock list entry from the ibm,dynamic-memory property * and return the information in the provided of_drconf_cell structure. */ static void read_drconf_cell(struct of_drconf_cell *drmem, const u32 **cellp) @@ -330,8 +372,8 @@ static void read_drconf_cell(struct of_drconf_cell *drmem, const u32 **cellp) /* * Retreive and validate the ibm,dynamic-memory property of the device tree. * - * The layout of the ibm,dynamic-memory property is a number N of lmb - * list entries followed by N lmb list entries. Each lmb list entry + * The layout of the ibm,dynamic-memory property is a number N of memblock + * list entries followed by N memblock list entries. Each memblock list entry * contains information as layed out in the of_drconf_cell struct above. */ static int of_get_drconf_memory(struct device_node *memory, const u32 **dm) @@ -451,7 +493,7 @@ static int __cpuinit numa_setup_cpu(unsigned long lcpu) nid = of_node_to_nid_single(cpu); if (nid < 0 || !node_online(nid)) - nid = any_online_node(NODE_MASK_ALL); + nid = first_online_node; out: map_cpu_to_node(lcpu, nid); @@ -498,19 +540,19 @@ static unsigned long __init numa_enforce_memory_limit(unsigned long start, unsigned long size) { /* - * We use lmb_end_of_DRAM() in here instead of memory_limit because + * We use memblock_end_of_DRAM() in here instead of memory_limit because * we've already adjusted it for the limit and it takes care of * having memory holes below the limit. Also, in the case of * iommu_is_off, memory_limit is not set but is implicitly enforced. */ - if (start + size <= lmb_end_of_DRAM()) + if (start + size <= memblock_end_of_DRAM()) return size; - if (start >= lmb_end_of_DRAM()) + if (start >= memblock_end_of_DRAM()) return 0; - return lmb_end_of_DRAM() - start; + return memblock_end_of_DRAM() - start; } /* @@ -689,7 +731,7 @@ new_range: } /* - * Now do the same thing for each LMB listed in the ibm,dynamic-memory + * Now do the same thing for each MEMBLOCK listed in the ibm,dynamic-memory * property in the ibm,dynamic-reconfiguration-memory node. */ memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); @@ -701,8 +743,8 @@ new_range: static void __init setup_nonnuma(void) { - unsigned long top_of_ram = lmb_end_of_DRAM(); - unsigned long total_ram = lmb_phys_mem_size(); + unsigned long top_of_ram = memblock_end_of_DRAM(); + unsigned long total_ram = memblock_phys_mem_size(); unsigned long start_pfn, end_pfn; unsigned int i, nid = 0; @@ -711,9 +753,9 @@ static void __init setup_nonnuma(void) printk(KERN_DEBUG "Memory hole size: %ldMB\n", (top_of_ram - total_ram) >> 20); - for (i = 0; i < lmb.memory.cnt; ++i) { - start_pfn = lmb.memory.region[i].base >> PAGE_SHIFT; - end_pfn = start_pfn + lmb_size_pages(&lmb.memory, i); + for (i = 0; i < memblock.memory.cnt; ++i) { + start_pfn = memblock.memory.region[i].base >> PAGE_SHIFT; + end_pfn = start_pfn + memblock_size_pages(&memblock.memory, i); fake_numa_create_new_node(end_pfn, &nid); add_active_range(nid, start_pfn, end_pfn); @@ -737,8 +779,9 @@ void __init dump_numa_cpu_topology(void) * If we used a CPU iterator here we would miss printing * the holes in the cpumap. */ - for (cpu = 0; cpu < NR_CPUS; cpu++) { - if (cpu_isset(cpu, numa_cpumask_lookup_table[node])) { + for (cpu = 0; cpu < nr_cpu_ids; cpu++) { + if (cpumask_test_cpu(cpu, + node_to_cpumask_map[node])) { if (count == 0) printk(" %u", cpu); ++count; @@ -750,7 +793,7 @@ void __init dump_numa_cpu_topology(void) } if (count > 1) - printk("-%u", NR_CPUS - 1); + printk("-%u", nr_cpu_ids - 1); printk("\n"); } } @@ -770,7 +813,7 @@ static void __init dump_numa_memory_topology(void) count = 0; - for (i = 0; i < lmb_end_of_DRAM(); + for (i = 0; i < memblock_end_of_DRAM(); i += (1 << SECTION_SIZE_BITS)) { if (early_pfn_to_nid(i >> PAGE_SHIFT) == node) { if (count == 0) @@ -790,7 +833,7 @@ static void __init dump_numa_memory_topology(void) } /* - * Allocate some memory, satisfying the lmb or bootmem allocator where + * Allocate some memory, satisfying the memblock or bootmem allocator where * required. nid is the preferred node and end is the physical address of * the highest address in the node. * @@ -804,11 +847,11 @@ static void __init *careful_zallocation(int nid, unsigned long size, int new_nid; unsigned long ret_paddr; - ret_paddr = __lmb_alloc_base(size, align, end_pfn << PAGE_SHIFT); + ret_paddr = __memblock_alloc_base(size, align, end_pfn << PAGE_SHIFT); /* retry over all memory */ if (!ret_paddr) - ret_paddr = __lmb_alloc_base(size, align, lmb_end_of_DRAM()); + ret_paddr = __memblock_alloc_base(size, align, memblock_end_of_DRAM()); if (!ret_paddr) panic("numa.c: cannot allocate %lu bytes for node %d", @@ -818,14 +861,14 @@ static void __init *careful_zallocation(int nid, unsigned long size, /* * We initialize the nodes in numeric order: 0, 1, 2... - * and hand over control from the LMB allocator to the + * and hand over control from the MEMBLOCK allocator to the * bootmem allocator. If this function is called for * node 5, then we know that all nodes <5 are using the - * bootmem allocator instead of the LMB allocator. + * bootmem allocator instead of the MEMBLOCK allocator. * * So, check the nid from which this allocation came * and double check to see if we need to use bootmem - * instead of the LMB. We don't free the LMB memory + * instead of the MEMBLOCK. We don't free the MEMBLOCK memory * since it would be useless. */ new_nid = early_pfn_to_nid(ret_paddr >> PAGE_SHIFT); @@ -850,9 +893,9 @@ static void mark_reserved_regions_for_nid(int nid) struct pglist_data *node = NODE_DATA(nid); int i; - for (i = 0; i < lmb.reserved.cnt; i++) { - unsigned long physbase = lmb.reserved.region[i].base; - unsigned long size = lmb.reserved.region[i].size; + for (i = 0; i < memblock.reserved.cnt; i++) { + unsigned long physbase = memblock.reserved.region[i].base; + unsigned long size = memblock.reserved.region[i].size; unsigned long start_pfn = physbase >> PAGE_SHIFT; unsigned long end_pfn = PFN_UP(physbase + size); struct node_active_region node_ar; @@ -860,7 +903,7 @@ static void mark_reserved_regions_for_nid(int nid) node->node_spanned_pages; /* - * Check to make sure that this lmb.reserved area is + * Check to make sure that this memblock.reserved area is * within the bounds of the node that we care about. * Checking the nid of the start and end points is not * sufficient because the reserved area could span the @@ -918,7 +961,7 @@ void __init do_init_bootmem(void) int nid; min_low_pfn = 0; - max_low_pfn = lmb_end_of_DRAM() >> PAGE_SHIFT; + max_low_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT; max_pfn = max_low_pfn; if (parse_numa_properties()) @@ -926,10 +969,6 @@ void __init do_init_bootmem(void) else dump_numa_memory_topology(); - register_cpu_notifier(&ppc64_numa_nb); - cpu_numa_callback(&ppc64_numa_nb, CPU_UP_PREPARE, - (void *)(unsigned long)boot_cpuid); - for_each_online_node(nid) { unsigned long start_pfn, end_pfn; void *bootmem_vaddr; @@ -983,13 +1022,23 @@ void __init do_init_bootmem(void) } init_bootmem_done = 1; + + /* + * Now bootmem is initialised we can create the node to cpumask + * lookup tables and setup the cpu callback to populate them. + */ + setup_node_to_cpumask_map(); + + register_cpu_notifier(&ppc64_numa_nb); + cpu_numa_callback(&ppc64_numa_nb, CPU_UP_PREPARE, + (void *)(unsigned long)boot_cpuid); } void __init paging_init(void) { unsigned long max_zone_pfns[MAX_NR_ZONES]; memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); - max_zone_pfns[ZONE_DMA] = lmb_end_of_DRAM() >> PAGE_SHIFT; + max_zone_pfns[ZONE_DMA] = memblock_end_of_DRAM() >> PAGE_SHIFT; free_area_init_nodes(max_zone_pfns); } @@ -1064,7 +1113,7 @@ static int hot_add_drconf_scn_to_nid(struct device_node *memory, /* * Find the node associated with a hot added memory section for memory * represented in the device tree as a node (i.e. memory@XXXX) for - * each lmb. + * each memblock. */ int hot_add_node_scn_to_nid(unsigned long scn_addr) { @@ -1105,8 +1154,8 @@ int hot_add_node_scn_to_nid(unsigned long scn_addr) /* * Find the node associated with a hot added memory section. Section - * corresponds to a SPARSEMEM section, not an LMB. It is assumed that - * sections are fully contained within a single LMB. + * corresponds to a SPARSEMEM section, not an MEMBLOCK. It is assumed that + * sections are fully contained within a single MEMBLOCK. */ int hot_add_scn_to_nid(unsigned long scn_addr) { @@ -1114,7 +1163,7 @@ int hot_add_scn_to_nid(unsigned long scn_addr) int nid, found = 0; if (!numa_enabled || (min_common_depth < 0)) - return any_online_node(NODE_MASK_ALL); + return first_online_node; memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); if (memory) { @@ -1125,7 +1174,7 @@ int hot_add_scn_to_nid(unsigned long scn_addr) } if (nid < 0 || !node_online(nid)) - nid = any_online_node(NODE_MASK_ALL); + nid = first_online_node; if (NODE_DATA(nid)->node_spanned_pages) return nid; diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index 99df697c601a..ebc2f38eb381 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -22,6 +22,7 @@ */ #include <linux/kernel.h> +#include <linux/gfp.h> #include <linux/mm.h> #include <linux/init.h> #include <linux/percpu.h> diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index 573b3bd1c45b..a87ead0138b4 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -26,7 +26,8 @@ #include <linux/vmalloc.h> #include <linux/init.h> #include <linux/highmem.h> -#include <linux/lmb.h> +#include <linux/memblock.h> +#include <linux/slab.h> #include <asm/pgtable.h> #include <asm/pgalloc.h> @@ -114,11 +115,7 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) { struct page *ptepage; -#ifdef CONFIG_HIGHPTE - gfp_t flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_REPEAT | __GFP_ZERO; -#else gfp_t flags = GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO; -#endif ptepage = alloc_pages(flags, 0); if (!ptepage) @@ -145,6 +142,14 @@ ioremap_flags(phys_addr_t addr, unsigned long size, unsigned long flags) /* we don't want to let _PAGE_USER and _PAGE_EXEC leak out */ flags &= ~(_PAGE_USER | _PAGE_EXEC); +#ifdef _PAGE_BAP_SR + /* _PAGE_USER contains _PAGE_BAP_SR on BookE using the new PTE format + * which means that we just cleared supervisor access... oops ;-) This + * restores it + */ + flags |= _PAGE_BAP_SR; +#endif + return __ioremap_caller(addr, size, flags, __builtin_return_address(0)); } EXPORT_SYMBOL(ioremap_flags); @@ -193,7 +198,7 @@ __ioremap_caller(phys_addr_t addr, unsigned long size, unsigned long flags, * mem_init() sets high_memory so only do the check after that. */ if (mem_init_done && (p < virt_to_phys(high_memory)) && - !(__allow_ioremap_reserved && lmb_is_region_reserved(p, size))) { + !(__allow_ioremap_reserved && memblock_is_region_reserved(p, size))) { printk("__ioremap(): phys addr 0x%llx is RAM lr %p\n", (unsigned long long)p, __builtin_return_address(0)); return NULL; @@ -326,7 +331,7 @@ void __init mapin_ram(void) s = mmu_mapin_ram(top); __mapin_ram_chunk(s, top); - top = lmb_end_of_DRAM(); + top = memblock_end_of_DRAM(); s = wii_mmu_mapin_mem2(top); __mapin_ram_chunk(s, top); } @@ -384,11 +389,7 @@ static int __change_page_attr(struct page *page, pgprot_t prot) return -EINVAL; __set_pte_at(&init_mm, address, kpte, mk_pte(page, prot), 0); wmb(); -#ifdef CONFIG_PPC_STD_MMU - flush_hash_pages(0, address, pmd_val(*kpmd), 1); -#else flush_tlb_page(NULL, address); -#endif pte_unmap(kpte); return 0; diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index 853d5565eed5..21d6dfab7942 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -34,7 +34,8 @@ #include <linux/vmalloc.h> #include <linux/init.h> #include <linux/bootmem.h> -#include <linux/lmb.h> +#include <linux/memblock.h> +#include <linux/slab.h> #include <asm/pgalloc.h> #include <asm/page.h> @@ -66,7 +67,7 @@ static void *early_alloc_pgtable(unsigned long size) if (init_bootmem_done) pt = __alloc_bootmem(size, size, __pa(MAX_DMA_ADDRESS)); else - pt = __va(lmb_alloc_base(size, size, + pt = __va(memblock_alloc_base(size, size, __pa(MAX_DMA_ADDRESS))); memset(pt, 0, size); @@ -264,6 +265,14 @@ void __iomem * ioremap_flags(phys_addr_t addr, unsigned long size, /* we don't want to let _PAGE_USER and _PAGE_EXEC leak out */ flags &= ~(_PAGE_USER | _PAGE_EXEC); +#ifdef _PAGE_BAP_SR + /* _PAGE_USER contains _PAGE_BAP_SR on BookE using the new PTE format + * which means that we just cleared supervisor access... oops ;-) This + * restores it + */ + flags |= _PAGE_BAP_SR; +#endif + if (ppc_md.ioremap) return ppc_md.ioremap(addr, size, flags, caller); return __ioremap_caller(addr, size, flags, caller); diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c index f11c2cdcb0fe..f8a01829d64f 100644 --- a/arch/powerpc/mm/ppc_mmu_32.c +++ b/arch/powerpc/mm/ppc_mmu_32.c @@ -26,7 +26,7 @@ #include <linux/mm.h> #include <linux/init.h> #include <linux/highmem.h> -#include <linux/lmb.h> +#include <linux/memblock.h> #include <asm/prom.h> #include <asm/mmu.h> @@ -223,7 +223,7 @@ void __init MMU_init_hw(void) * Find some memory for the hash table. */ if ( ppc_md.progress ) ppc_md.progress("hash:find piece", 0x322); - Hash = __va(lmb_alloc_base(Hash_size, Hash_size, + Hash = __va(memblock_alloc_base(Hash_size, Hash_size, __initial_memory_limit_addr)); cacheable_memzero(Hash, Hash_size); _SDR1 = __pa(Hash) | SDR1_LOW_BITS; diff --git a/arch/powerpc/mm/stab.c b/arch/powerpc/mm/stab.c index 687fddaa24c5..446a01842a73 100644 --- a/arch/powerpc/mm/stab.c +++ b/arch/powerpc/mm/stab.c @@ -12,7 +12,7 @@ * 2 of the License, or (at your option) any later version. */ -#include <linux/lmb.h> +#include <linux/memblock.h> #include <asm/pgtable.h> #include <asm/mmu.h> @@ -252,7 +252,7 @@ void __init stabs_alloc(void) if (cpu == 0) continue; /* stab for CPU 0 is statically allocated */ - newstab = lmb_alloc_base(HW_PAGE_SIZE, HW_PAGE_SIZE, + newstab = memblock_alloc_base(HW_PAGE_SIZE, HW_PAGE_SIZE, 1<<SID_SHIFT); newstab = (unsigned long)__va(newstab); diff --git a/arch/powerpc/mm/subpage-prot.c b/arch/powerpc/mm/subpage-prot.c index a040b81e93bd..e4f8f1fc81a5 100644 --- a/arch/powerpc/mm/subpage-prot.c +++ b/arch/powerpc/mm/subpage-prot.c @@ -10,7 +10,6 @@ #include <linux/errno.h> #include <linux/kernel.h> #include <linux/gfp.h> -#include <linux/slab.h> #include <linux/types.h> #include <linux/mm.h> #include <linux/hugetlb.h> diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c index 282d9306361f..1ec06576f619 100644 --- a/arch/powerpc/mm/tlb_hash64.c +++ b/arch/powerpc/mm/tlb_hash64.c @@ -63,15 +63,21 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr, if (huge) { #ifdef CONFIG_HUGETLB_PAGE psize = get_slice_psize(mm, addr); + /* Mask the address for the correct page size */ + addr &= ~((1UL << mmu_psize_defs[psize].shift) - 1); #else BUG(); psize = pte_pagesize_index(mm, addr, pte); /* shutup gcc */ #endif - } else + } else { psize = pte_pagesize_index(mm, addr, pte); + /* Mask the address for the standard page size. If we + * have a 64k page kernel, but the hardware does not + * support 64k pages, this might be different from the + * hardware page size encoded in the slice table. */ + addr &= PAGE_MASK; + } - /* Mask the address for the correct page size */ - addr &= ~((1UL << mmu_psize_defs[psize].shift) - 1); /* Build full vaddr */ if (!is_kernel_addr(addr)) { diff --git a/arch/powerpc/mm/tlb_low_64e.S b/arch/powerpc/mm/tlb_low_64e.S index f288279e679d..8b04c54e596f 100644 --- a/arch/powerpc/mm/tlb_low_64e.S +++ b/arch/powerpc/mm/tlb_low_64e.S @@ -1,5 +1,5 @@ /* - * Low leve TLB miss handlers for Book3E + * Low level TLB miss handlers for Book3E * * Copyright (C) 2008-2009 * Ben. Herrenschmidt (benh@kernel.crashing.org), IBM Corp. diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c index 2fbc680c2c71..d8695b02a968 100644 --- a/arch/powerpc/mm/tlb_nohash.c +++ b/arch/powerpc/mm/tlb_nohash.c @@ -34,7 +34,7 @@ #include <linux/pagemap.h> #include <linux/preempt.h> #include <linux/spinlock.h> -#include <linux/lmb.h> +#include <linux/memblock.h> #include <asm/tlbflush.h> #include <asm/tlb.h> @@ -150,7 +150,7 @@ EXPORT_SYMBOL(local_flush_tlb_page); */ #ifdef CONFIG_SMP -static DEFINE_SPINLOCK(tlbivax_lock); +static DEFINE_RAW_SPINLOCK(tlbivax_lock); static int mm_is_core_local(struct mm_struct *mm) { @@ -232,10 +232,10 @@ void __flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr, if (mmu_has_feature(MMU_FTR_USE_TLBIVAX_BCAST)) { int lock = mmu_has_feature(MMU_FTR_LOCK_BCAST_INVAL); if (lock) - spin_lock(&tlbivax_lock); + raw_spin_lock(&tlbivax_lock); _tlbivax_bcast(vmaddr, pid, tsize, ind); if (lock) - spin_unlock(&tlbivax_lock); + raw_spin_unlock(&tlbivax_lock); goto bail; } else { struct tlb_flush_param p = { @@ -426,7 +426,7 @@ static void __early_init_mmu(int boot_cpu) /* Set the global containing the top of the linear mapping * for use by the TLB miss code */ - linear_map_top = lmb_end_of_DRAM(); + linear_map_top = memblock_end_of_DRAM(); /* A sync won't hurt us after mucking around with * the MMU configuration diff --git a/arch/powerpc/mm/tlb_nohash_low.S b/arch/powerpc/mm/tlb_nohash_low.S index bbdc5b577b85..cfa768203d08 100644 --- a/arch/powerpc/mm/tlb_nohash_low.S +++ b/arch/powerpc/mm/tlb_nohash_low.S @@ -10,7 +10,7 @@ * - tlbil_va * - tlbil_pid * - tlbil_all - * - tlbivax_bcast (not yet) + * - tlbivax_bcast * * Code mostly moved over from misc_32.S * @@ -33,6 +33,7 @@ #include <asm/ppc_asm.h> #include <asm/asm-offsets.h> #include <asm/processor.h> +#include <asm/bug.h> #if defined(CONFIG_40x) @@ -65,7 +66,7 @@ _GLOBAL(__tlbil_va) * Nothing to do for 8xx, everything is inline */ -#elif defined(CONFIG_44x) +#elif defined(CONFIG_44x) /* Includes 47x */ /* * 440 implementation uses tlbsx/we for tlbil_va and a full sweep @@ -73,7 +74,13 @@ _GLOBAL(__tlbil_va) */ _GLOBAL(__tlbil_va) mfspr r5,SPRN_MMUCR - rlwimi r5,r4,0,24,31 /* Set TID */ + mfmsr r10 + + /* + * We write 16 bits of STID since 47x supports that much, we + * will never be passed out of bounds values on 440 (hopefully) + */ + rlwimi r5,r4,0,16,31 /* We have to run the search with interrupts disabled, otherwise * an interrupt which causes a TLB miss can clobber the MMUCR @@ -83,24 +90,41 @@ _GLOBAL(__tlbil_va) * and restoring MMUCR, so only normal interrupts have to be * taken care of. */ - mfmsr r4 wrteei 0 mtspr SPRN_MMUCR,r5 - tlbsx. r3, 0, r3 - wrtee r4 - bne 1f + tlbsx. r6,0,r3 + bne 10f sync - /* There are only 64 TLB entries, so r3 < 64, - * which means bit 22, is clear. Since 22 is - * the V bit in the TLB_PAGEID, loading this +BEGIN_MMU_FTR_SECTION + b 2f +END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_47x) + /* On 440 There are only 64 TLB entries, so r3 < 64, which means bit + * 22, is clear. Since 22 is the V bit in the TLB_PAGEID, loading this * value will invalidate the TLB entry. */ - tlbwe r3, r3, PPC44x_TLB_PAGEID + tlbwe r6,r6,PPC44x_TLB_PAGEID isync -1: blr +10: wrtee r10 + blr +2: +#ifdef CONFIG_PPC_47x + oris r7,r6,0x8000 /* specify way explicitely */ + clrrwi r4,r3,12 /* get an EPN for the hashing with V = 0 */ + ori r4,r4,PPC47x_TLBE_SIZE + tlbwe r4,r7,0 /* write it */ + isync + wrtee r10 + blr +#else /* CONFIG_PPC_47x */ +1: trap + EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0; +#endif /* !CONFIG_PPC_47x */ _GLOBAL(_tlbil_all) _GLOBAL(_tlbil_pid) +BEGIN_MMU_FTR_SECTION + b 2f +END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_47x) li r3,0 sync @@ -115,6 +139,76 @@ _GLOBAL(_tlbil_pid) isync blr +2: +#ifdef CONFIG_PPC_47x + /* 476 variant. There's not simple way to do this, hopefully we'll + * try to limit the amount of such full invalidates + */ + mfmsr r11 /* Interrupts off */ + wrteei 0 + li r3,-1 /* Current set */ + lis r10,tlb_47x_boltmap@h + ori r10,r10,tlb_47x_boltmap@l + lis r7,0x8000 /* Specify way explicitely */ + + b 9f /* For each set */ + +1: li r9,4 /* Number of ways */ + li r4,0 /* Current way */ + li r6,0 /* Default entry value 0 */ + andi. r0,r8,1 /* Check if way 0 is bolted */ + mtctr r9 /* Load way counter */ + bne- 3f /* Bolted, skip loading it */ + +2: /* For each way */ + or r5,r3,r4 /* Make way|index for tlbre */ + rlwimi r5,r5,16,8,15 /* Copy index into position */ + tlbre r6,r5,0 /* Read entry */ +3: addis r4,r4,0x2000 /* Next way */ + andi. r0,r6,PPC47x_TLB0_VALID /* Valid entry ? */ + beq 4f /* Nope, skip it */ + rlwimi r7,r5,0,1,2 /* Insert way number */ + rlwinm r6,r6,0,21,19 /* Clear V */ + tlbwe r6,r7,0 /* Write it */ +4: bdnz 2b /* Loop for each way */ + srwi r8,r8,1 /* Next boltmap bit */ +9: cmpwi cr1,r3,255 /* Last set done ? */ + addi r3,r3,1 /* Next set */ + beq cr1,1f /* End of loop */ + andi. r0,r3,0x1f /* Need to load a new boltmap word ? */ + bne 1b /* No, loop */ + lwz r8,0(r10) /* Load boltmap entry */ + addi r10,r10,4 /* Next word */ + b 1b /* Then loop */ +1: isync /* Sync shadows */ + wrtee r11 +#else /* CONFIG_PPC_47x */ +1: trap + EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0; +#endif /* !CONFIG_PPC_47x */ + blr + +#ifdef CONFIG_PPC_47x +/* + * _tlbivax_bcast is only on 47x. We don't bother doing a runtime + * check though, it will blow up soon enough if we mistakenly try + * to use it on a 440. + */ +_GLOBAL(_tlbivax_bcast) + mfspr r5,SPRN_MMUCR + mfmsr r10 + rlwimi r5,r4,0,16,31 + wrteei 0 + mtspr SPRN_MMUCR,r5 +/* tlbivax 0,r3 - use .long to avoid binutils deps */ + .long 0x7c000624 | (r3 << 11) + isync + eieio + tlbsync + sync + wrtee r10 + blr +#endif /* CONFIG_PPC_47x */ #elif defined(CONFIG_FSL_BOOKE) /* @@ -271,3 +365,31 @@ _GLOBAL(set_context) #else #error Unsupported processor type ! #endif + +#if defined(CONFIG_FSL_BOOKE) +/* + * extern void loadcam_entry(unsigned int index) + * + * Load TLBCAM[index] entry in to the L2 CAM MMU + */ +_GLOBAL(loadcam_entry) + LOAD_REG_ADDR(r4, TLBCAM) + mulli r5,r3,TLBCAM_SIZE + add r3,r5,r4 + lwz r4,TLBCAM_MAS0(r3) + mtspr SPRN_MAS0,r4 + lwz r4,TLBCAM_MAS1(r3) + mtspr SPRN_MAS1,r4 + PPC_LL r4,TLBCAM_MAS2(r3) + mtspr SPRN_MAS2,r4 + lwz r4,TLBCAM_MAS3(r3) + mtspr SPRN_MAS3,r4 +BEGIN_MMU_FTR_SECTION + lwz r4,TLBCAM_MAS7(r3) + mtspr SPRN_MAS7,r4 +END_MMU_FTR_SECTION_IFSET(MMU_FTR_BIG_PHYS) + isync + tlbwe + isync + blr +#endif |