diff options
author | James Morris <jmorris@namei.org> | 2009-12-09 11:01:03 +0300 |
---|---|---|
committer | James Morris <jmorris@namei.org> | 2009-12-09 11:01:03 +0300 |
commit | 1ad1f10cd915744bbe52b19423653b38287d827d (patch) | |
tree | ae072aace36b45a55d80b8cbf1b6d92523a88ea0 /arch/arm/mm | |
parent | 08e3daff217059c84c360cc71212686e0a7995af (diff) | |
parent | 2b876f95d03e226394b5d360c86127cbefaf614b (diff) | |
download | linux-1ad1f10cd915744bbe52b19423653b38287d827d.tar.xz |
Merge branch 'master' into next
Diffstat (limited to 'arch/arm/mm')
-rw-r--r-- | arch/arm/mm/Kconfig | 13 | ||||
-rw-r--r-- | arch/arm/mm/Makefile | 4 | ||||
-rw-r--r-- | arch/arm/mm/cache-l2x0.c | 25 | ||||
-rw-r--r-- | arch/arm/mm/cache-tauros2.c | 263 | ||||
-rw-r--r-- | arch/arm/mm/copypage-v6.c | 8 | ||||
-rw-r--r-- | arch/arm/mm/dma-mapping.c | 497 | ||||
-rw-r--r-- | arch/arm/mm/fault-armv.c | 9 | ||||
-rw-r--r-- | arch/arm/mm/flush.c | 49 | ||||
-rw-r--r-- | arch/arm/mm/mm.h | 2 | ||||
-rw-r--r-- | arch/arm/mm/mmu.c | 4 | ||||
-rw-r--r-- | arch/arm/mm/proc-v6.S | 33 | ||||
-rw-r--r-- | arch/arm/mm/proc-xsc3.S | 2 | ||||
-rw-r--r-- | arch/arm/mm/vmregion.c | 131 | ||||
-rw-r--r-- | arch/arm/mm/vmregion.h | 29 |
14 files changed, 716 insertions, 353 deletions
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index 9264d814cd7a..dd4698c67cc3 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -388,7 +388,7 @@ config CPU_FEROCEON_OLD_ID # ARMv6 config CPU_V6 - bool "Support ARM V6 processor" if ARCH_INTEGRATOR || MACH_REALVIEW_EB || MACH_REALVIEW_PBX + bool "Support ARM V6 processor" if ARCH_INTEGRATOR || MACH_REALVIEW_EB || MACH_REALVIEW_PBX || ARCH_DOVE select CPU_32v6 select CPU_ABRT_EV6 select CPU_PABRT_V6 @@ -764,6 +764,15 @@ config CACHE_L2X0 help This option enables the L2x0 PrimeCell. +config CACHE_TAUROS2 + bool "Enable the Tauros2 L2 cache controller" + depends on ARCH_DOVE + default y + select OUTER_CACHE + help + This option enables the Tauros2 L2 cache controller (as + found on PJ1/PJ4). + config CACHE_XSC3L2 bool "Enable the L2 cache on XScale3" depends on CPU_XSC3 @@ -774,5 +783,5 @@ config CACHE_XSC3L2 config ARM_L1_CACHE_SHIFT int - default 6 if ARCH_OMAP3 + default 6 if ARCH_OMAP3 || ARCH_S5PC1XX default 5 diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile index 055cb2aa8134..827e238e5d4a 100644 --- a/arch/arm/mm/Makefile +++ b/arch/arm/mm/Makefile @@ -6,7 +6,7 @@ obj-y := dma-mapping.o extable.o fault.o init.o \ iomap.o obj-$(CONFIG_MMU) += fault-armv.o flush.o ioremap.o mmap.o \ - pgd.o mmu.o + pgd.o mmu.o vmregion.o ifneq ($(CONFIG_MMU),y) obj-y += nommu.o @@ -87,4 +87,4 @@ obj-$(CONFIG_CPU_V7) += proc-v7.o obj-$(CONFIG_CACHE_FEROCEON_L2) += cache-feroceon-l2.o obj-$(CONFIG_CACHE_L2X0) += cache-l2x0.o obj-$(CONFIG_CACHE_XSC3L2) += cache-xsc3l2.o - +obj-$(CONFIG_CACHE_TAUROS2) += cache-tauros2.o diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c index b480f1d3591f..747f9a9021bb 100644 --- a/arch/arm/mm/cache-l2x0.c +++ b/arch/arm/mm/cache-l2x0.c @@ -99,18 +99,25 @@ void __init l2x0_init(void __iomem *base, __u32 aux_val, __u32 aux_mask) l2x0_base = base; - /* disable L2X0 */ - writel(0, l2x0_base + L2X0_CTRL); + /* + * Check if l2x0 controller is already enabled. + * If you are booting from non-secure mode + * accessing the below registers will fault. + */ + if (!(readl(l2x0_base + L2X0_CTRL) & 1)) { - aux = readl(l2x0_base + L2X0_AUX_CTRL); - aux &= aux_mask; - aux |= aux_val; - writel(aux, l2x0_base + L2X0_AUX_CTRL); + /* l2x0 controller is disabled */ - l2x0_inv_all(); + aux = readl(l2x0_base + L2X0_AUX_CTRL); + aux &= aux_mask; + aux |= aux_val; + writel(aux, l2x0_base + L2X0_AUX_CTRL); - /* enable L2X0 */ - writel(1, l2x0_base + L2X0_CTRL); + l2x0_inv_all(); + + /* enable L2X0 */ + writel(1, l2x0_base + L2X0_CTRL); + } outer_cache.inv_range = l2x0_inv_range; outer_cache.clean_range = l2x0_clean_range; diff --git a/arch/arm/mm/cache-tauros2.c b/arch/arm/mm/cache-tauros2.c new file mode 100644 index 000000000000..50868651890f --- /dev/null +++ b/arch/arm/mm/cache-tauros2.c @@ -0,0 +1,263 @@ +/* + * arch/arm/mm/cache-tauros2.c - Tauros2 L2 cache controller support + * + * Copyright (C) 2008 Marvell Semiconductor + * + * This file is licensed under the terms of the GNU General Public + * License version 2. This program is licensed "as is" without any + * warranty of any kind, whether express or implied. + * + * References: + * - PJ1 CPU Core Datasheet, + * Document ID MV-S104837-01, Rev 0.7, January 24 2008. + * - PJ4 CPU Core Datasheet, + * Document ID MV-S105190-00, Rev 0.7, March 14 2008. + */ + +#include <linux/init.h> +#include <asm/cacheflush.h> +#include <asm/hardware/cache-tauros2.h> + + +/* + * When Tauros2 is used on a CPU that supports the v7 hierarchical + * cache operations, the cache handling code in proc-v7.S takes care + * of everything, including handling DMA coherency. + * + * So, we only need to register outer cache operations here if we're + * being used on a pre-v7 CPU, and we only need to build support for + * outer cache operations into the kernel image if the kernel has been + * configured to support a pre-v7 CPU. + */ +#if __LINUX_ARM_ARCH__ < 7 +/* + * Low-level cache maintenance operations. + */ +static inline void tauros2_clean_pa(unsigned long addr) +{ + __asm__("mcr p15, 1, %0, c7, c11, 3" : : "r" (addr)); +} + +static inline void tauros2_clean_inv_pa(unsigned long addr) +{ + __asm__("mcr p15, 1, %0, c7, c15, 3" : : "r" (addr)); +} + +static inline void tauros2_inv_pa(unsigned long addr) +{ + __asm__("mcr p15, 1, %0, c7, c7, 3" : : "r" (addr)); +} + + +/* + * Linux primitives. + * + * Note that the end addresses passed to Linux primitives are + * noninclusive. + */ +#define CACHE_LINE_SIZE 32 + +static void tauros2_inv_range(unsigned long start, unsigned long end) +{ + /* + * Clean and invalidate partial first cache line. + */ + if (start & (CACHE_LINE_SIZE - 1)) { + tauros2_clean_inv_pa(start & ~(CACHE_LINE_SIZE - 1)); + start = (start | (CACHE_LINE_SIZE - 1)) + 1; + } + + /* + * Clean and invalidate partial last cache line. + */ + if (end & (CACHE_LINE_SIZE - 1)) { + tauros2_clean_inv_pa(end & ~(CACHE_LINE_SIZE - 1)); + end &= ~(CACHE_LINE_SIZE - 1); + } + + /* + * Invalidate all full cache lines between 'start' and 'end'. + */ + while (start < end) { + tauros2_inv_pa(start); + start += CACHE_LINE_SIZE; + } + + dsb(); +} + +static void tauros2_clean_range(unsigned long start, unsigned long end) +{ + start &= ~(CACHE_LINE_SIZE - 1); + while (start < end) { + tauros2_clean_pa(start); + start += CACHE_LINE_SIZE; + } + + dsb(); +} + +static void tauros2_flush_range(unsigned long start, unsigned long end) +{ + start &= ~(CACHE_LINE_SIZE - 1); + while (start < end) { + tauros2_clean_inv_pa(start); + start += CACHE_LINE_SIZE; + } + + dsb(); +} +#endif + +static inline u32 __init read_extra_features(void) +{ + u32 u; + + __asm__("mrc p15, 1, %0, c15, c1, 0" : "=r" (u)); + + return u; +} + +static inline void __init write_extra_features(u32 u) +{ + __asm__("mcr p15, 1, %0, c15, c1, 0" : : "r" (u)); +} + +static void __init disable_l2_prefetch(void) +{ + u32 u; + + /* + * Read the CPU Extra Features register and verify that the + * Disable L2 Prefetch bit is set. + */ + u = read_extra_features(); + if (!(u & 0x01000000)) { + printk(KERN_INFO "Tauros2: Disabling L2 prefetch.\n"); + write_extra_features(u | 0x01000000); + } +} + +static inline int __init cpuid_scheme(void) +{ + extern int processor_id; + + return !!((processor_id & 0x000f0000) == 0x000f0000); +} + +static inline u32 __init read_mmfr3(void) +{ + u32 mmfr3; + + __asm__("mrc p15, 0, %0, c0, c1, 7\n" : "=r" (mmfr3)); + + return mmfr3; +} + +static inline u32 __init read_actlr(void) +{ + u32 actlr; + + __asm__("mrc p15, 0, %0, c1, c0, 1\n" : "=r" (actlr)); + + return actlr; +} + +static inline void __init write_actlr(u32 actlr) +{ + __asm__("mcr p15, 0, %0, c1, c0, 1\n" : : "r" (actlr)); +} + +void __init tauros2_init(void) +{ + extern int processor_id; + char *mode; + + disable_l2_prefetch(); + +#ifdef CONFIG_CPU_32v5 + if ((processor_id & 0xff0f0000) == 0x56050000) { + u32 feat; + + /* + * v5 CPUs with Tauros2 have the L2 cache enable bit + * located in the CPU Extra Features register. + */ + feat = read_extra_features(); + if (!(feat & 0x00400000)) { + printk(KERN_INFO "Tauros2: Enabling L2 cache.\n"); + write_extra_features(feat | 0x00400000); + } + + mode = "ARMv5"; + outer_cache.inv_range = tauros2_inv_range; + outer_cache.clean_range = tauros2_clean_range; + outer_cache.flush_range = tauros2_flush_range; + } +#endif + +#ifdef CONFIG_CPU_32v6 + /* + * Check whether this CPU lacks support for the v7 hierarchical + * cache ops. (PJ4 is in its v6 personality mode if the MMFR3 + * register indicates no support for the v7 hierarchical cache + * ops.) + */ + if (cpuid_scheme() && (read_mmfr3() & 0xf) == 0) { + /* + * When Tauros2 is used in an ARMv6 system, the L2 + * enable bit is in the ARMv6 ARM-mandated position + * (bit [26] of the System Control Register). + */ + if (!(get_cr() & 0x04000000)) { + printk(KERN_INFO "Tauros2: Enabling L2 cache.\n"); + adjust_cr(0x04000000, 0x04000000); + } + + mode = "ARMv6"; + outer_cache.inv_range = tauros2_inv_range; + outer_cache.clean_range = tauros2_clean_range; + outer_cache.flush_range = tauros2_flush_range; + } +#endif + +#ifdef CONFIG_CPU_32v7 + /* + * Check whether this CPU has support for the v7 hierarchical + * cache ops. (PJ4 is in its v7 personality mode if the MMFR3 + * register indicates support for the v7 hierarchical cache + * ops.) + * + * (Although strictly speaking there may exist CPUs that + * implement the v7 cache ops but are only ARMv6 CPUs (due to + * not complying with all of the other ARMv7 requirements), + * there are no real-life examples of Tauros2 being used on + * such CPUs as of yet.) + */ + if (cpuid_scheme() && (read_mmfr3() & 0xf) == 1) { + u32 actlr; + + /* + * When Tauros2 is used in an ARMv7 system, the L2 + * enable bit is located in the Auxiliary System Control + * Register (which is the only register allowed by the + * ARMv7 spec to contain fine-grained cache control bits). + */ + actlr = read_actlr(); + if (!(actlr & 0x00000002)) { + printk(KERN_INFO "Tauros2: Enabling L2 cache.\n"); + write_actlr(actlr | 0x00000002); + } + + mode = "ARMv7"; + } +#endif + + if (mode == NULL) { + printk(KERN_CRIT "Tauros2: Unable to detect CPU mode.\n"); + return; + } + + printk(KERN_INFO "Tauros2: L2 cache support initialised " + "in %s mode.\n", mode); +} diff --git a/arch/arm/mm/copypage-v6.c b/arch/arm/mm/copypage-v6.c index 4127a7bddfe5..841f355319bf 100644 --- a/arch/arm/mm/copypage-v6.c +++ b/arch/arm/mm/copypage-v6.c @@ -41,6 +41,14 @@ static void v6_copy_user_highpage_nonaliasing(struct page *to, kfrom = kmap_atomic(from, KM_USER0); kto = kmap_atomic(to, KM_USER1); copy_page(kto, kfrom); +#ifdef CONFIG_HIGHMEM + /* + * kmap_atomic() doesn't set the page virtual address, and + * kunmap_atomic() takes care of cache flushing already. + */ + if (page_address(to) != NULL) +#endif + __cpuc_flush_dcache_page(kto); kunmap_atomic(kto, KM_USER1); kunmap_atomic(kfrom, KM_USER0); } diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index b9590a7085ca..26325cb5d368 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -63,194 +63,152 @@ static u64 get_coherent_dma_mask(struct device *dev) return mask; } -#ifdef CONFIG_MMU /* - * These are the page tables (2MB each) covering uncached, DMA consistent allocations + * Allocate a DMA buffer for 'dev' of size 'size' using the + * specified gfp mask. Note that 'size' must be page aligned. */ -static pte_t *consistent_pte[NUM_CONSISTENT_PTES]; -static DEFINE_SPINLOCK(consistent_lock); +static struct page *__dma_alloc_buffer(struct device *dev, size_t size, gfp_t gfp) +{ + unsigned long order = get_order(size); + struct page *page, *p, *e; + void *ptr; + u64 mask = get_coherent_dma_mask(dev); -/* - * VM region handling support. - * - * This should become something generic, handling VM region allocations for - * vmalloc and similar (ioremap, module space, etc). - * - * I envisage vmalloc()'s supporting vm_struct becoming: - * - * struct vm_struct { - * struct vm_region region; - * unsigned long flags; - * struct page **pages; - * unsigned int nr_pages; - * unsigned long phys_addr; - * }; - * - * get_vm_area() would then call vm_region_alloc with an appropriate - * struct vm_region head (eg): - * - * struct vm_region vmalloc_head = { - * .vm_list = LIST_HEAD_INIT(vmalloc_head.vm_list), - * .vm_start = VMALLOC_START, - * .vm_end = VMALLOC_END, - * }; - * - * However, vmalloc_head.vm_start is variable (typically, it is dependent on - * the amount of RAM found at boot time.) I would imagine that get_vm_area() - * would have to initialise this each time prior to calling vm_region_alloc(). - */ -struct arm_vm_region { - struct list_head vm_list; - unsigned long vm_start; - unsigned long vm_end; - struct page *vm_pages; - int vm_active; -}; +#ifdef CONFIG_DMA_API_DEBUG + u64 limit = (mask + 1) & ~mask; + if (limit && size >= limit) { + dev_warn(dev, "coherent allocation too big (requested %#x mask %#llx)\n", + size, mask); + return NULL; + } +#endif -static struct arm_vm_region consistent_head = { - .vm_list = LIST_HEAD_INIT(consistent_head.vm_list), - .vm_start = CONSISTENT_BASE, - .vm_end = CONSISTENT_END, -}; + if (!mask) + return NULL; -static struct arm_vm_region * -arm_vm_region_alloc(struct arm_vm_region *head, size_t size, gfp_t gfp) -{ - unsigned long addr = head->vm_start, end = head->vm_end - size; - unsigned long flags; - struct arm_vm_region *c, *new; - - new = kmalloc(sizeof(struct arm_vm_region), gfp); - if (!new) - goto out; - - spin_lock_irqsave(&consistent_lock, flags); - - list_for_each_entry(c, &head->vm_list, vm_list) { - if ((addr + size) < addr) - goto nospc; - if ((addr + size) <= c->vm_start) - goto found; - addr = c->vm_end; - if (addr > end) - goto nospc; - } + if (mask < 0xffffffffULL) + gfp |= GFP_DMA; + + page = alloc_pages(gfp, order); + if (!page) + return NULL; - found: /* - * Insert this entry _before_ the one we found. + * Now split the huge page and free the excess pages */ - list_add_tail(&new->vm_list, &c->vm_list); - new->vm_start = addr; - new->vm_end = addr + size; - new->vm_active = 1; - - spin_unlock_irqrestore(&consistent_lock, flags); - return new; - - nospc: - spin_unlock_irqrestore(&consistent_lock, flags); - kfree(new); - out: - return NULL; + split_page(page, order); + for (p = page + (size >> PAGE_SHIFT), e = page + (1 << order); p < e; p++) + __free_page(p); + + /* + * Ensure that the allocated pages are zeroed, and that any data + * lurking in the kernel direct-mapped region is invalidated. + */ + ptr = page_address(page); + memset(ptr, 0, size); + dmac_flush_range(ptr, ptr + size); + outer_flush_range(__pa(ptr), __pa(ptr) + size); + + return page; } -static struct arm_vm_region *arm_vm_region_find(struct arm_vm_region *head, unsigned long addr) +/* + * Free a DMA buffer. 'size' must be page aligned. + */ +static void __dma_free_buffer(struct page *page, size_t size) { - struct arm_vm_region *c; - - list_for_each_entry(c, &head->vm_list, vm_list) { - if (c->vm_active && c->vm_start == addr) - goto out; + struct page *e = page + (size >> PAGE_SHIFT); + + while (page < e) { + __free_page(page); + page++; } - c = NULL; - out: - return c; } +#ifdef CONFIG_MMU +/* + * These are the page tables (2MB each) covering uncached, DMA consistent allocations + */ +static pte_t *consistent_pte[NUM_CONSISTENT_PTES]; + +#include "vmregion.h" + +static struct arm_vmregion_head consistent_head = { + .vm_lock = __SPIN_LOCK_UNLOCKED(&consistent_head.vm_lock), + .vm_list = LIST_HEAD_INIT(consistent_head.vm_list), + .vm_start = CONSISTENT_BASE, + .vm_end = CONSISTENT_END, +}; + #ifdef CONFIG_HUGETLB_PAGE #error ARM Coherent DMA allocator does not (yet) support huge TLB #endif -static void * -__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, - pgprot_t prot) +/* + * Initialise the consistent memory allocation. + */ +static int __init consistent_init(void) { - struct page *page; - struct arm_vm_region *c; - unsigned long order; - u64 mask = get_coherent_dma_mask(dev); - u64 limit; + int ret = 0; + pgd_t *pgd; + pmd_t *pmd; + pte_t *pte; + int i = 0; + u32 base = CONSISTENT_BASE; - if (!consistent_pte[0]) { - printk(KERN_ERR "%s: not initialised\n", __func__); - dump_stack(); - return NULL; - } + do { + pgd = pgd_offset(&init_mm, base); + pmd = pmd_alloc(&init_mm, pgd, base); + if (!pmd) { + printk(KERN_ERR "%s: no pmd tables\n", __func__); + ret = -ENOMEM; + break; + } + WARN_ON(!pmd_none(*pmd)); - if (!mask) - goto no_page; + pte = pte_alloc_kernel(pmd, base); + if (!pte) { + printk(KERN_ERR "%s: no pte tables\n", __func__); + ret = -ENOMEM; + break; + } - /* - * Sanity check the allocation size. - */ - size = PAGE_ALIGN(size); - limit = (mask + 1) & ~mask; - if ((limit && size >= limit) || - size >= (CONSISTENT_END - CONSISTENT_BASE)) { - printk(KERN_WARNING "coherent allocation too big " - "(requested %#x mask %#llx)\n", size, mask); - goto no_page; - } + consistent_pte[i++] = pte; + base += (1 << PGDIR_SHIFT); + } while (base < CONSISTENT_END); - order = get_order(size); + return ret; +} - if (mask < 0xffffffffULL) - gfp |= GFP_DMA; +core_initcall(consistent_init); - page = alloc_pages(gfp, order); - if (!page) - goto no_page; +static void * +__dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot) +{ + struct arm_vmregion *c; - /* - * Invalidate any data that might be lurking in the - * kernel direct-mapped region for device DMA. - */ - { - void *ptr = page_address(page); - memset(ptr, 0, size); - dmac_flush_range(ptr, ptr + size); - outer_flush_range(__pa(ptr), __pa(ptr) + size); + if (!consistent_pte[0]) { + printk(KERN_ERR "%s: not initialised\n", __func__); + dump_stack(); + return NULL; } /* * Allocate a virtual address in the consistent mapping region. */ - c = arm_vm_region_alloc(&consistent_head, size, + c = arm_vmregion_alloc(&consistent_head, size, gfp & ~(__GFP_DMA | __GFP_HIGHMEM)); if (c) { pte_t *pte; - struct page *end = page + (1 << order); int idx = CONSISTENT_PTE_INDEX(c->vm_start); u32 off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1); pte = consistent_pte[idx] + off; c->vm_pages = page; - split_page(page, order); - - /* - * Set the "dma handle" - */ - *handle = page_to_dma(dev, page); - do { BUG_ON(!pte_none(*pte)); - /* - * x86 does not mark the pages reserved... - */ - SetPageReserved(page); set_pte_ext(pte, mk_pte(page, prot), 0); page++; pte++; @@ -261,48 +219,90 @@ __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, } } while (size -= PAGE_SIZE); - /* - * Free the otherwise unused pages. - */ - while (page < end) { - __free_page(page); - page++; - } - return (void *)c->vm_start; } - - if (page) - __free_pages(page, order); - no_page: - *handle = ~0; return NULL; } + +static void __dma_free_remap(void *cpu_addr, size_t size) +{ + struct arm_vmregion *c; + unsigned long addr; + pte_t *ptep; + int idx; + u32 off; + + c = arm_vmregion_find_remove(&consistent_head, (unsigned long)cpu_addr); + if (!c) { + printk(KERN_ERR "%s: trying to free invalid coherent area: %p\n", + __func__, cpu_addr); + dump_stack(); + return; + } + + if ((c->vm_end - c->vm_start) != size) { + printk(KERN_ERR "%s: freeing wrong coherent size (%ld != %d)\n", + __func__, c->vm_end - c->vm_start, size); + dump_stack(); + size = c->vm_end - c->vm_start; + } + + idx = CONSISTENT_PTE_INDEX(c->vm_start); + off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1); + ptep = consistent_pte[idx] + off; + addr = c->vm_start; + do { + pte_t pte = ptep_get_and_clear(&init_mm, addr, ptep); + + ptep++; + addr += PAGE_SIZE; + off++; + if (off >= PTRS_PER_PTE) { + off = 0; + ptep = consistent_pte[++idx]; + } + + if (pte_none(pte) || !pte_present(pte)) + printk(KERN_CRIT "%s: bad page in kernel page table\n", + __func__); + } while (size -= PAGE_SIZE); + + flush_tlb_kernel_range(c->vm_start, c->vm_end); + + arm_vmregion_free(&consistent_head, c); +} + #else /* !CONFIG_MMU */ + +#define __dma_alloc_remap(page, size, gfp, prot) page_address(page) +#define __dma_free_remap(addr, size) do { } while (0) + +#endif /* CONFIG_MMU */ + static void * __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, pgprot_t prot) { - void *virt; - u64 mask = get_coherent_dma_mask(dev); + struct page *page; + void *addr; - if (!mask) - goto error; + *handle = ~0; + size = PAGE_ALIGN(size); - if (mask < 0xffffffffULL) - gfp |= GFP_DMA; - virt = kmalloc(size, gfp); - if (!virt) - goto error; + page = __dma_alloc_buffer(dev, size, gfp); + if (!page) + return NULL; - *handle = virt_to_dma(dev, virt); - return virt; + if (!arch_is_coherent()) + addr = __dma_alloc_remap(page, size, gfp, prot); + else + addr = page_address(page); -error: - *handle = ~0; - return NULL; + if (addr) + *handle = page_to_dma(dev, page); + + return addr; } -#endif /* CONFIG_MMU */ /* * Allocate DMA-coherent memory space and return both the kernel remapped @@ -316,19 +316,8 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gf if (dma_alloc_from_coherent(dev, size, handle, &memory)) return memory; - if (arch_is_coherent()) { - void *virt; - - virt = kmalloc(size, gfp); - if (!virt) - return NULL; - *handle = virt_to_dma(dev, virt); - - return virt; - } - return __dma_alloc(dev, size, handle, gfp, - pgprot_noncached(pgprot_kernel)); + pgprot_dmacoherent(pgprot_kernel)); } EXPORT_SYMBOL(dma_alloc_coherent); @@ -349,15 +338,12 @@ static int dma_mmap(struct device *dev, struct vm_area_struct *vma, { int ret = -ENXIO; #ifdef CONFIG_MMU - unsigned long flags, user_size, kern_size; - struct arm_vm_region *c; + unsigned long user_size, kern_size; + struct arm_vmregion *c; user_size = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; - spin_lock_irqsave(&consistent_lock, flags); - c = arm_vm_region_find(&consistent_head, (unsigned long)cpu_addr); - spin_unlock_irqrestore(&consistent_lock, flags); - + c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr); if (c) { unsigned long off = vma->vm_pgoff; @@ -379,7 +365,7 @@ static int dma_mmap(struct device *dev, struct vm_area_struct *vma, int dma_mmap_coherent(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t dma_addr, size_t size) { - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + vma->vm_page_prot = pgprot_dmacoherent(vma->vm_page_prot); return dma_mmap(dev, vma, cpu_addr, dma_addr, size); } EXPORT_SYMBOL(dma_mmap_coherent); @@ -396,144 +382,23 @@ EXPORT_SYMBOL(dma_mmap_writecombine); * free a page as defined by the above mapping. * Must not be called with IRQs disabled. */ -#ifdef CONFIG_MMU void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, dma_addr_t handle) { - struct arm_vm_region *c; - unsigned long flags, addr; - pte_t *ptep; - int idx; - u32 off; - WARN_ON(irqs_disabled()); if (dma_release_from_coherent(dev, get_order(size), cpu_addr)) return; - if (arch_is_coherent()) { - kfree(cpu_addr); - return; - } - size = PAGE_ALIGN(size); - spin_lock_irqsave(&consistent_lock, flags); - c = arm_vm_region_find(&consistent_head, (unsigned long)cpu_addr); - if (!c) - goto no_area; - - c->vm_active = 0; - spin_unlock_irqrestore(&consistent_lock, flags); - - if ((c->vm_end - c->vm_start) != size) { - printk(KERN_ERR "%s: freeing wrong coherent size (%ld != %d)\n", - __func__, c->vm_end - c->vm_start, size); - dump_stack(); - size = c->vm_end - c->vm_start; - } - - idx = CONSISTENT_PTE_INDEX(c->vm_start); - off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1); - ptep = consistent_pte[idx] + off; - addr = c->vm_start; - do { - pte_t pte = ptep_get_and_clear(&init_mm, addr, ptep); - unsigned long pfn; - - ptep++; - addr += PAGE_SIZE; - off++; - if (off >= PTRS_PER_PTE) { - off = 0; - ptep = consistent_pte[++idx]; - } - - if (!pte_none(pte) && pte_present(pte)) { - pfn = pte_pfn(pte); - - if (pfn_valid(pfn)) { - struct page *page = pfn_to_page(pfn); - - /* - * x86 does not mark the pages reserved... - */ - ClearPageReserved(page); - - __free_page(page); - continue; - } - } - - printk(KERN_CRIT "%s: bad page in kernel page table\n", - __func__); - } while (size -= PAGE_SIZE); - - flush_tlb_kernel_range(c->vm_start, c->vm_end); - - spin_lock_irqsave(&consistent_lock, flags); - list_del(&c->vm_list); - spin_unlock_irqrestore(&consistent_lock, flags); - - kfree(c); - return; + if (!arch_is_coherent()) + __dma_free_remap(cpu_addr, size); - no_area: - spin_unlock_irqrestore(&consistent_lock, flags); - printk(KERN_ERR "%s: trying to free invalid coherent area: %p\n", - __func__, cpu_addr); - dump_stack(); + __dma_free_buffer(dma_to_page(dev, handle), size); } -#else /* !CONFIG_MMU */ -void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, dma_addr_t handle) -{ - if (dma_release_from_coherent(dev, get_order(size), cpu_addr)) - return; - kfree(cpu_addr); -} -#endif /* CONFIG_MMU */ EXPORT_SYMBOL(dma_free_coherent); /* - * Initialise the consistent memory allocation. - */ -static int __init consistent_init(void) -{ - int ret = 0; -#ifdef CONFIG_MMU - pgd_t *pgd; - pmd_t *pmd; - pte_t *pte; - int i = 0; - u32 base = CONSISTENT_BASE; - - do { - pgd = pgd_offset(&init_mm, base); - pmd = pmd_alloc(&init_mm, pgd, base); - if (!pmd) { - printk(KERN_ERR "%s: no pmd tables\n", __func__); - ret = -ENOMEM; - break; - } - WARN_ON(!pmd_none(*pmd)); - - pte = pte_alloc_kernel(pmd, base); - if (!pte) { - printk(KERN_ERR "%s: no pte tables\n", __func__); - ret = -ENOMEM; - break; - } - - consistent_pte[i++] = pte; - base += (1 << PGDIR_SHIFT); - } while (base < CONSISTENT_END); -#endif /* !CONFIG_MMU */ - - return ret; -} - -core_initcall(consistent_init); - -/* * Make an area consistent for devices. * Note: Drivers should NOT use this function directly, as it will break * platforms with CONFIG_DMABOUNCE. diff --git a/arch/arm/mm/fault-armv.c b/arch/arm/mm/fault-armv.c index d0d17b6a3703..729602291958 100644 --- a/arch/arm/mm/fault-armv.c +++ b/arch/arm/mm/fault-armv.c @@ -23,6 +23,8 @@ #include <asm/pgtable.h> #include <asm/tlbflush.h> +#include "mm.h" + static unsigned long shared_pte_mask = L_PTE_MT_BUFFERABLE; /* @@ -151,7 +153,14 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long addr, pte_t pte) if (!pfn_valid(pfn)) return; + /* + * The zero page is never written to, so never has any dirty + * cache lines, and therefore never needs to be flushed. + */ page = pfn_to_page(pfn); + if (page == ZERO_PAGE(0)) + return; + mapping = page_mapping(page); #ifndef CONFIG_SMP if (test_and_clear_bit(PG_dcache_dirty, &page->flags)) diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c index 7f294f307c83..329594e760cd 100644 --- a/arch/arm/mm/flush.c +++ b/arch/arm/mm/flush.c @@ -35,14 +35,12 @@ static void flush_pfn_alias(unsigned long pfn, unsigned long vaddr) : : "r" (to), "r" (to + PAGE_SIZE - L1_CACHE_BYTES), "r" (zero) : "cc"); - __flush_icache_all(); } void flush_cache_mm(struct mm_struct *mm) { if (cache_is_vivt()) { - if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(mm))) - __cpuc_flush_user_all(); + vivt_flush_cache_mm(mm); return; } @@ -52,16 +50,13 @@ void flush_cache_mm(struct mm_struct *mm) : : "r" (0) : "cc"); - __flush_icache_all(); } } void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { if (cache_is_vivt()) { - if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(vma->vm_mm))) - __cpuc_flush_user_range(start & PAGE_MASK, PAGE_ALIGN(end), - vma->vm_flags); + vivt_flush_cache_range(vma, start, end); return; } @@ -71,22 +66,26 @@ void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned : : "r" (0) : "cc"); - __flush_icache_all(); } + + if (vma->vm_flags & VM_EXEC) + __flush_icache_all(); } void flush_cache_page(struct vm_area_struct *vma, unsigned long user_addr, unsigned long pfn) { if (cache_is_vivt()) { - if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(vma->vm_mm))) { - unsigned long addr = user_addr & PAGE_MASK; - __cpuc_flush_user_range(addr, addr + PAGE_SIZE, vma->vm_flags); - } + vivt_flush_cache_page(vma, user_addr, pfn); return; } - if (cache_is_vipt_aliasing()) + if (cache_is_vipt_aliasing()) { flush_pfn_alias(pfn, user_addr); + __flush_icache_all(); + } + + if (vma->vm_flags & VM_EXEC && icache_is_vivt_asid_tagged()) + __flush_icache_all(); } void flush_ptrace_access(struct vm_area_struct *vma, struct page *page, @@ -94,15 +93,13 @@ void flush_ptrace_access(struct vm_area_struct *vma, struct page *page, unsigned long len, int write) { if (cache_is_vivt()) { - if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(vma->vm_mm))) { - unsigned long addr = (unsigned long)kaddr; - __cpuc_coherent_kern_range(addr, addr + len); - } + vivt_flush_ptrace_access(vma, page, uaddr, kaddr, len, write); return; } if (cache_is_vipt_aliasing()) { flush_pfn_alias(page_to_pfn(page), uaddr); + __flush_icache_all(); return; } @@ -120,6 +117,8 @@ void flush_ptrace_access(struct vm_area_struct *vma, struct page *page, void __flush_dcache_page(struct address_space *mapping, struct page *page) { + void *addr = page_address(page); + /* * Writeback any data associated with the kernel mapping of this * page. This ensures that data in the physical page is mutually @@ -130,9 +129,9 @@ void __flush_dcache_page(struct address_space *mapping, struct page *page) * kmap_atomic() doesn't set the page virtual address, and * kunmap_atomic() takes care of cache flushing already. */ - if (page_address(page)) + if (addr) #endif - __cpuc_flush_dcache_page(page_address(page)); + __cpuc_flush_dcache_page(addr); /* * If this is a page cache page, and we have an aliasing VIPT cache, @@ -196,7 +195,16 @@ static void __flush_dcache_aliases(struct address_space *mapping, struct page *p */ void flush_dcache_page(struct page *page) { - struct address_space *mapping = page_mapping(page); + struct address_space *mapping; + + /* + * The zero page is never written to, so never has any dirty + * cache lines, and therefore never needs to be flushed. + */ + if (page == ZERO_PAGE(0)) + return; + + mapping = page_mapping(page); #ifndef CONFIG_SMP if (!PageHighMem(page) && mapping && !mapping_mapped(mapping)) @@ -242,6 +250,7 @@ void __flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned l * userspace address only. */ flush_pfn_alias(pfn, vmaddr); + __flush_icache_all(); } /* diff --git a/arch/arm/mm/mm.h b/arch/arm/mm/mm.h index c4f6f05198e0..a888363398f8 100644 --- a/arch/arm/mm/mm.h +++ b/arch/arm/mm/mm.h @@ -24,6 +24,8 @@ struct mem_type { const struct mem_type *get_mem_type(unsigned int type); +extern void __flush_dcache_page(struct address_space *mapping, struct page *page); + #endif struct map_desc; diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index ea67be0223ac..8c7fbd19a4b3 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -881,7 +881,7 @@ void __init reserve_node_zero(pg_data_t *pgdat) BOOTMEM_EXCLUSIVE); } - if (machine_is_treo680()) { + if (machine_is_treo680() || machine_is_centro()) { reserve_bootmem_node(pgdat, 0xa0000000, 0x1000, BOOTMEM_EXCLUSIVE); reserve_bootmem_node(pgdat, 0xa2000000, 0x1000, @@ -1036,7 +1036,7 @@ void __init paging_init(struct machine_desc *mdesc) */ zero_page = alloc_bootmem_low_pages(PAGE_SIZE); empty_zero_page = virt_to_page(zero_page); - flush_dcache_page(empty_zero_page); + __flush_dcache_page(NULL, empty_zero_page); } /* diff --git a/arch/arm/mm/proc-v6.S b/arch/arm/mm/proc-v6.S index 70f75d2e3ead..5485c821101c 100644 --- a/arch/arm/mm/proc-v6.S +++ b/arch/arm/mm/proc-v6.S @@ -130,9 +130,16 @@ ENTRY(cpu_v6_set_pte_ext) - + .type cpu_v6_name, #object cpu_v6_name: .asciz "ARMv6-compatible processor" + .size cpu_v6_name, . - cpu_v6_name + + .type cpu_pj4_name, #object +cpu_pj4_name: + .asciz "Marvell PJ4 processor" + .size cpu_pj4_name, . - cpu_pj4_name + .align __INIT @@ -241,3 +248,27 @@ __v6_proc_info: .long v6_user_fns .long v6_cache_fns .size __v6_proc_info, . - __v6_proc_info + + .type __pj4_v6_proc_info, #object +__pj4_v6_proc_info: + .long 0x560f5810 + .long 0xff0ffff0 + .long PMD_TYPE_SECT | \ + PMD_SECT_BUFFERABLE | \ + PMD_SECT_CACHEABLE | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + .long PMD_TYPE_SECT | \ + PMD_SECT_XN | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + b __v6_setup + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP + .long cpu_pj4_name + .long v6_processor_functions + .long v6wbi_tlb_fns + .long v6_user_fns + .long v6_cache_fns + .size __pj4_v6_proc_info, . - __pj4_v6_proc_info diff --git a/arch/arm/mm/proc-xsc3.S b/arch/arm/mm/proc-xsc3.S index 2028f3702881..fab134e29826 100644 --- a/arch/arm/mm/proc-xsc3.S +++ b/arch/arm/mm/proc-xsc3.S @@ -396,7 +396,7 @@ __xsc3_setup: orr r4, r4, #0x18 @ cache the page table in L2 mcr p15, 0, r4, c2, c0, 0 @ load page table pointer - mov r0, #0 @ don't allow CP access + mov r0, #1 << 6 @ cp6 access for early sched_clock mcr p15, 0, r0, c15, c1, 0 @ write CP access register mrc p15, 0, r0, c1, c0, 1 @ get auxiliary control reg diff --git a/arch/arm/mm/vmregion.c b/arch/arm/mm/vmregion.c new file mode 100644 index 000000000000..19e09bdb1b8a --- /dev/null +++ b/arch/arm/mm/vmregion.c @@ -0,0 +1,131 @@ +#include <linux/spinlock.h> +#include <linux/list.h> +#include <linux/slab.h> + +#include "vmregion.h" + +/* + * VM region handling support. + * + * This should become something generic, handling VM region allocations for + * vmalloc and similar (ioremap, module space, etc). + * + * I envisage vmalloc()'s supporting vm_struct becoming: + * + * struct vm_struct { + * struct vmregion region; + * unsigned long flags; + * struct page **pages; + * unsigned int nr_pages; + * unsigned long phys_addr; + * }; + * + * get_vm_area() would then call vmregion_alloc with an appropriate + * struct vmregion head (eg): + * + * struct vmregion vmalloc_head = { + * .vm_list = LIST_HEAD_INIT(vmalloc_head.vm_list), + * .vm_start = VMALLOC_START, + * .vm_end = VMALLOC_END, + * }; + * + * However, vmalloc_head.vm_start is variable (typically, it is dependent on + * the amount of RAM found at boot time.) I would imagine that get_vm_area() + * would have to initialise this each time prior to calling vmregion_alloc(). + */ + +struct arm_vmregion * +arm_vmregion_alloc(struct arm_vmregion_head *head, size_t size, gfp_t gfp) +{ + unsigned long addr = head->vm_start, end = head->vm_end - size; + unsigned long flags; + struct arm_vmregion *c, *new; + + if (head->vm_end - head->vm_start < size) { + printk(KERN_WARNING "%s: allocation too big (requested %#x)\n", + __func__, size); + goto out; + } + + new = kmalloc(sizeof(struct arm_vmregion), gfp); + if (!new) + goto out; + + spin_lock_irqsave(&head->vm_lock, flags); + + list_for_each_entry(c, &head->vm_list, vm_list) { + if ((addr + size) < addr) + goto nospc; + if ((addr + size) <= c->vm_start) + goto found; + addr = c->vm_end; + if (addr > end) + goto nospc; + } + + found: + /* + * Insert this entry _before_ the one we found. + */ + list_add_tail(&new->vm_list, &c->vm_list); + new->vm_start = addr; + new->vm_end = addr + size; + new->vm_active = 1; + + spin_unlock_irqrestore(&head->vm_lock, flags); + return new; + + nospc: + spin_unlock_irqrestore(&head->vm_lock, flags); + kfree(new); + out: + return NULL; +} + +static struct arm_vmregion *__arm_vmregion_find(struct arm_vmregion_head *head, unsigned long addr) +{ + struct arm_vmregion *c; + + list_for_each_entry(c, &head->vm_list, vm_list) { + if (c->vm_active && c->vm_start == addr) + goto out; + } + c = NULL; + out: + return c; +} + +struct arm_vmregion *arm_vmregion_find(struct arm_vmregion_head *head, unsigned long addr) +{ + struct arm_vmregion *c; + unsigned long flags; + + spin_lock_irqsave(&head->vm_lock, flags); + c = __arm_vmregion_find(head, addr); + spin_unlock_irqrestore(&head->vm_lock, flags); + return c; +} + +struct arm_vmregion *arm_vmregion_find_remove(struct arm_vmregion_head *head, unsigned long addr) +{ + struct arm_vmregion *c; + unsigned long flags; + + spin_lock_irqsave(&head->vm_lock, flags); + c = __arm_vmregion_find(head, addr); + if (c) + c->vm_active = 0; + spin_unlock_irqrestore(&head->vm_lock, flags); + return c; +} + +void arm_vmregion_free(struct arm_vmregion_head *head, struct arm_vmregion *c) +{ + unsigned long flags; + + spin_lock_irqsave(&head->vm_lock, flags); + list_del(&c->vm_list); + spin_unlock_irqrestore(&head->vm_lock, flags); + + kfree(c); +} diff --git a/arch/arm/mm/vmregion.h b/arch/arm/mm/vmregion.h new file mode 100644 index 000000000000..6b2cdbdf3a85 --- /dev/null +++ b/arch/arm/mm/vmregion.h @@ -0,0 +1,29 @@ +#ifndef VMREGION_H +#define VMREGION_H + +#include <linux/spinlock.h> +#include <linux/list.h> + +struct page; + +struct arm_vmregion_head { + spinlock_t vm_lock; + struct list_head vm_list; + unsigned long vm_start; + unsigned long vm_end; +}; + +struct arm_vmregion { + struct list_head vm_list; + unsigned long vm_start; + unsigned long vm_end; + struct page *vm_pages; + int vm_active; +}; + +struct arm_vmregion *arm_vmregion_alloc(struct arm_vmregion_head *, size_t, gfp_t); +struct arm_vmregion *arm_vmregion_find(struct arm_vmregion_head *, unsigned long); +struct arm_vmregion *arm_vmregion_find_remove(struct arm_vmregion_head *, unsigned long); +void arm_vmregion_free(struct arm_vmregion_head *, struct arm_vmregion *); + +#endif |