diff options
Diffstat (limited to 'arch/arm/mm')
52 files changed, 1520 insertions, 639 deletions
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index 33ed048502a3..d1193884d76d 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -180,8 +180,21 @@ config CPU_ARM925T # ARM926T config CPU_ARM926T bool "Support ARM926T processor" - depends on ARCH_INTEGRATOR || ARCH_VERSATILE_PB || MACH_VERSATILE_AB || ARCH_OMAP730 || ARCH_OMAP16XX || MACH_REALVIEW_EB || ARCH_PNX4008 || ARCH_NETX || CPU_S3C2412 || ARCH_AT91SAM9260 || ARCH_AT91SAM9261 || ARCH_AT91SAM9263 || ARCH_AT91SAM9RL || ARCH_AT91CAP9 || ARCH_NS9XXX || ARCH_DAVINCI - default y if ARCH_VERSATILE_PB || MACH_VERSATILE_AB || ARCH_OMAP730 || ARCH_OMAP16XX || ARCH_PNX4008 || ARCH_NETX || CPU_S3C2412 || ARCH_AT91SAM9260 || ARCH_AT91SAM9261 || ARCH_AT91SAM9263 || ARCH_AT91SAM9RL || ARCH_AT91CAP9 || ARCH_NS9XXX || ARCH_DAVINCI + depends on ARCH_INTEGRATOR || ARCH_VERSATILE_PB || \ + MACH_VERSATILE_AB || ARCH_OMAP730 || \ + ARCH_OMAP16XX || MACH_REALVIEW_EB || \ + ARCH_PNX4008 || ARCH_NETX || CPU_S3C2412 || \ + ARCH_AT91SAM9260 || ARCH_AT91SAM9261 || \ + ARCH_AT91SAM9263 || ARCH_AT91SAM9RL || \ + ARCH_AT91SAM9G20 || ARCH_AT91CAP9 || \ + ARCH_NS9XXX || ARCH_DAVINCI || ARCH_MX2 + default y if ARCH_VERSATILE_PB || MACH_VERSATILE_AB || \ + ARCH_OMAP730 || ARCH_OMAP16XX || \ + ARCH_PNX4008 || ARCH_NETX || CPU_S3C2412 || \ + ARCH_AT91SAM9260 || ARCH_AT91SAM9261 || \ + ARCH_AT91SAM9263 || ARCH_AT91SAM9RL || \ + ARCH_AT91SAM9G20 || ARCH_AT91CAP9 || \ + ARCH_NS9XXX || ARCH_DAVINCI || ARCH_MX2 select CPU_32v5 select CPU_ABRT_EV5TJ select CPU_PABRT_NOIFAR @@ -365,7 +378,7 @@ config CPU_XSC3 # Feroceon config CPU_FEROCEON bool - depends on ARCH_ORION5X + depends on ARCH_ORION5X || ARCH_LOKI || ARCH_KIRKWOOD || ARCH_MV78XX0 default y select CPU_32v5 select CPU_ABRT_EV5T @@ -373,7 +386,7 @@ config CPU_FEROCEON select CPU_CACHE_VIVT select CPU_CP15_MMU select CPU_COPY_FEROCEON if MMU - select CPU_TLB_V4WBI if MMU + select CPU_TLB_FEROCEON if MMU config CPU_FEROCEON_OLD_ID bool "Accept early Feroceon cores with an ARM926 ID" @@ -415,7 +428,7 @@ config CPU_32v6K # ARMv7 config CPU_V7 bool "Support ARM V7 processor" - depends on ARCH_INTEGRATOR || MACH_REALVIEW_EB + depends on ARCH_INTEGRATOR || MACH_REALVIEW_EB || ARCH_OMAP3 select CPU_32v6K select CPU_32v7 select CPU_ABRT_EV7 @@ -551,6 +564,11 @@ config CPU_TLB_V4WBI ARM Architecture Version 4 TLB with writeback cache and invalidate instruction cache entry. +config CPU_TLB_FEROCEON + bool + help + Feroceon TLB (v4wbi with non-outer-cachable page table walks). + config CPU_TLB_V6 bool @@ -709,6 +727,22 @@ config OUTER_CACHE bool default n +config CACHE_FEROCEON_L2 + bool "Enable the Feroceon L2 cache controller" + depends on ARCH_KIRKWOOD || ARCH_MV78XX0 + default y + select OUTER_CACHE + help + This option enables the Feroceon L2 cache controller. + +config CACHE_FEROCEON_L2_WRITETHROUGH + bool "Force Feroceon L2 cache write through" + depends on CACHE_FEROCEON_L2 + default n + help + Say Y here to use the Feroceon L2 cache in writethrough mode. + Unless you specifically require this, say N for writeback mode. + config CACHE_L2X0 bool "Enable the L2x0 outer cache controller" depends on REALVIEW_EB_ARM11MP || MACH_REALVIEW_PB11MP || MACH_REALVIEW_PB1176 @@ -716,3 +750,11 @@ config CACHE_L2X0 select OUTER_CACHE help This option enables the L2x0 PrimeCell. + +config CACHE_XSC3L2 + bool "Enable the L2 cache on XScale3" + depends on CPU_XSC3 + default y + select OUTER_CACHE + help + This option enables the L2 cache on XScale3. diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile index 32b2d2d213a6..480f78a3611a 100644 --- a/arch/arm/mm/Makefile +++ b/arch/arm/mm/Makefile @@ -2,7 +2,7 @@ # Makefile for the linux arm-specific parts of the memory manager. # -obj-y := consistent.o extable.o fault.o init.o \ +obj-y := dma-mapping.o extable.o fault.o init.o \ iomap.o obj-$(CONFIG_MMU) += fault-armv.o flush.o ioremap.o mmap.o \ @@ -46,6 +46,7 @@ obj-$(CONFIG_CPU_TLB_V3) += tlb-v3.o obj-$(CONFIG_CPU_TLB_V4WT) += tlb-v4.o obj-$(CONFIG_CPU_TLB_V4WB) += tlb-v4wb.o obj-$(CONFIG_CPU_TLB_V4WBI) += tlb-v4wbi.o +obj-$(CONFIG_CPU_TLB_FEROCEON) += tlb-v4wbi.o # reuse v4wbi TLB functions obj-$(CONFIG_CPU_TLB_V6) += tlb-v6.o obj-$(CONFIG_CPU_TLB_V7) += tlb-v7.o @@ -73,4 +74,7 @@ obj-$(CONFIG_CPU_FEROCEON) += proc-feroceon.o obj-$(CONFIG_CPU_V6) += proc-v6.o obj-$(CONFIG_CPU_V7) += proc-v7.o +obj-$(CONFIG_CACHE_FEROCEON_L2) += cache-feroceon-l2.o obj-$(CONFIG_CACHE_L2X0) += cache-l2x0.o +obj-$(CONFIG_CACHE_XSC3L2) += cache-xsc3l2.o + diff --git a/arch/arm/mm/abort-ev7.S b/arch/arm/mm/abort-ev7.S index eb90bce38e14..2e6dc040c654 100644 --- a/arch/arm/mm/abort-ev7.S +++ b/arch/arm/mm/abort-ev7.S @@ -30,3 +30,4 @@ ENTRY(v7_early_abort) * New designs should not need to patch up faults. */ mov pc, lr +ENDPROC(v7_early_abort) diff --git a/arch/arm/mm/abort-nommu.S b/arch/arm/mm/abort-nommu.S index a7cc7f9ee45d..625e580945b5 100644 --- a/arch/arm/mm/abort-nommu.S +++ b/arch/arm/mm/abort-nommu.S @@ -17,3 +17,4 @@ ENTRY(nommu_early_abort) mov r0, #0 @ clear r0, r1 (no FSR/FAR) mov r1, #0 mov pc, lr +ENDPROC(nommu_early_abort) diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c index e162cca5917f..133e65d166b3 100644 --- a/arch/arm/mm/alignment.c +++ b/arch/arm/mm/alignment.c @@ -17,8 +17,8 @@ #include <linux/string.h> #include <linux/proc_fs.h> #include <linux/init.h> +#include <linux/uaccess.h> -#include <asm/uaccess.h> #include <asm/unaligned.h> #include "fault.h" diff --git a/arch/arm/mm/cache-feroceon-l2.c b/arch/arm/mm/cache-feroceon-l2.c new file mode 100644 index 000000000000..13cdae8b0d44 --- /dev/null +++ b/arch/arm/mm/cache-feroceon-l2.c @@ -0,0 +1,326 @@ +/* + * arch/arm/mm/cache-feroceon-l2.c - Feroceon L2 cache controller support + * + * Copyright (C) 2008 Marvell Semiconductor + * + * This file is licensed under the terms of the GNU General Public + * License version 2. This program is licensed "as is" without any + * warranty of any kind, whether express or implied. + * + * References: + * - Unified Layer 2 Cache for Feroceon CPU Cores, + * Document ID MV-S104858-00, Rev. A, October 23 2007. + */ + +#include <linux/init.h> +#include <asm/cacheflush.h> +#include <plat/cache-feroceon-l2.h> + + +/* + * Low-level cache maintenance operations. + * + * As well as the regular 'clean/invalidate/flush L2 cache line by + * MVA' instructions, the Feroceon L2 cache controller also features + * 'clean/invalidate L2 range by MVA' operations. + * + * Cache range operations are initiated by writing the start and + * end addresses to successive cp15 registers, and process every + * cache line whose first byte address lies in the inclusive range + * [start:end]. + * + * The cache range operations stall the CPU pipeline until completion. + * + * The range operations require two successive cp15 writes, in + * between which we don't want to be preempted. + */ +static inline void l2_clean_pa(unsigned long addr) +{ + __asm__("mcr p15, 1, %0, c15, c9, 3" : : "r" (addr)); +} + +static inline void l2_clean_mva_range(unsigned long start, unsigned long end) +{ + unsigned long flags; + + /* + * Make sure 'start' and 'end' reference the same page, as + * L2 is PIPT and range operations only do a TLB lookup on + * the start address. + */ + BUG_ON((start ^ end) >> PAGE_SHIFT); + + raw_local_irq_save(flags); + __asm__("mcr p15, 1, %0, c15, c9, 4\n\t" + "mcr p15, 1, %1, c15, c9, 5" + : : "r" (start), "r" (end)); + raw_local_irq_restore(flags); +} + +static inline void l2_clean_pa_range(unsigned long start, unsigned long end) +{ + l2_clean_mva_range(__phys_to_virt(start), __phys_to_virt(end)); +} + +static inline void l2_clean_inv_pa(unsigned long addr) +{ + __asm__("mcr p15, 1, %0, c15, c10, 3" : : "r" (addr)); +} + +static inline void l2_inv_pa(unsigned long addr) +{ + __asm__("mcr p15, 1, %0, c15, c11, 3" : : "r" (addr)); +} + +static inline void l2_inv_mva_range(unsigned long start, unsigned long end) +{ + unsigned long flags; + + /* + * Make sure 'start' and 'end' reference the same page, as + * L2 is PIPT and range operations only do a TLB lookup on + * the start address. + */ + BUG_ON((start ^ end) >> PAGE_SHIFT); + + raw_local_irq_save(flags); + __asm__("mcr p15, 1, %0, c15, c11, 4\n\t" + "mcr p15, 1, %1, c15, c11, 5" + : : "r" (start), "r" (end)); + raw_local_irq_restore(flags); +} + +static inline void l2_inv_pa_range(unsigned long start, unsigned long end) +{ + l2_inv_mva_range(__phys_to_virt(start), __phys_to_virt(end)); +} + + +/* + * Linux primitives. + * + * Note that the end addresses passed to Linux primitives are + * noninclusive, while the hardware cache range operations use + * inclusive start and end addresses. + */ +#define CACHE_LINE_SIZE 32 +#define MAX_RANGE_SIZE 1024 + +static int l2_wt_override; + +static unsigned long calc_range_end(unsigned long start, unsigned long end) +{ + unsigned long range_end; + + BUG_ON(start & (CACHE_LINE_SIZE - 1)); + BUG_ON(end & (CACHE_LINE_SIZE - 1)); + + /* + * Try to process all cache lines between 'start' and 'end'. + */ + range_end = end; + + /* + * Limit the number of cache lines processed at once, + * since cache range operations stall the CPU pipeline + * until completion. + */ + if (range_end > start + MAX_RANGE_SIZE) + range_end = start + MAX_RANGE_SIZE; + + /* + * Cache range operations can't straddle a page boundary. + */ + if (range_end > (start | (PAGE_SIZE - 1)) + 1) + range_end = (start | (PAGE_SIZE - 1)) + 1; + + return range_end; +} + +static void feroceon_l2_inv_range(unsigned long start, unsigned long end) +{ + /* + * Clean and invalidate partial first cache line. + */ + if (start & (CACHE_LINE_SIZE - 1)) { + l2_clean_inv_pa(start & ~(CACHE_LINE_SIZE - 1)); + start = (start | (CACHE_LINE_SIZE - 1)) + 1; + } + + /* + * Clean and invalidate partial last cache line. + */ + if (end & (CACHE_LINE_SIZE - 1)) { + l2_clean_inv_pa(end & ~(CACHE_LINE_SIZE - 1)); + end &= ~(CACHE_LINE_SIZE - 1); + } + + /* + * Invalidate all full cache lines between 'start' and 'end'. + */ + while (start != end) { + unsigned long range_end = calc_range_end(start, end); + l2_inv_pa_range(start, range_end - CACHE_LINE_SIZE); + start = range_end; + } + + dsb(); +} + +static void feroceon_l2_clean_range(unsigned long start, unsigned long end) +{ + /* + * If L2 is forced to WT, the L2 will always be clean and we + * don't need to do anything here. + */ + if (!l2_wt_override) { + start &= ~(CACHE_LINE_SIZE - 1); + end = (end + CACHE_LINE_SIZE - 1) & ~(CACHE_LINE_SIZE - 1); + while (start != end) { + unsigned long range_end = calc_range_end(start, end); + l2_clean_pa_range(start, range_end - CACHE_LINE_SIZE); + start = range_end; + } + } + + dsb(); +} + +static void feroceon_l2_flush_range(unsigned long start, unsigned long end) +{ + start &= ~(CACHE_LINE_SIZE - 1); + end = (end + CACHE_LINE_SIZE - 1) & ~(CACHE_LINE_SIZE - 1); + while (start != end) { + unsigned long range_end = calc_range_end(start, end); + if (!l2_wt_override) + l2_clean_pa_range(start, range_end - CACHE_LINE_SIZE); + l2_inv_pa_range(start, range_end - CACHE_LINE_SIZE); + start = range_end; + } + + dsb(); +} + + +/* + * Routines to disable and re-enable the D-cache and I-cache at run + * time. These are necessary because the L2 cache can only be enabled + * or disabled while the L1 Dcache and Icache are both disabled. + */ +static int __init flush_and_disable_dcache(void) +{ + u32 cr; + + cr = get_cr(); + if (cr & CR_C) { + unsigned long flags; + + raw_local_irq_save(flags); + flush_cache_all(); + set_cr(cr & ~CR_C); + raw_local_irq_restore(flags); + return 1; + } + return 0; +} + +static void __init enable_dcache(void) +{ + u32 cr; + + cr = get_cr(); + set_cr(cr | CR_C); +} + +static void __init __invalidate_icache(void) +{ + int dummy; + + __asm__ __volatile__("mcr p15, 0, %0, c7, c5, 0" : "=r" (dummy)); +} + +static int __init invalidate_and_disable_icache(void) +{ + u32 cr; + + cr = get_cr(); + if (cr & CR_I) { + set_cr(cr & ~CR_I); + __invalidate_icache(); + return 1; + } + return 0; +} + +static void __init enable_icache(void) +{ + u32 cr; + + cr = get_cr(); + set_cr(cr | CR_I); +} + +static inline u32 read_extra_features(void) +{ + u32 u; + + __asm__("mrc p15, 1, %0, c15, c1, 0" : "=r" (u)); + + return u; +} + +static inline void write_extra_features(u32 u) +{ + __asm__("mcr p15, 1, %0, c15, c1, 0" : : "r" (u)); +} + +static void __init disable_l2_prefetch(void) +{ + u32 u; + + /* + * Read the CPU Extra Features register and verify that the + * Disable L2 Prefetch bit is set. + */ + u = read_extra_features(); + if (!(u & 0x01000000)) { + printk(KERN_INFO "Feroceon L2: Disabling L2 prefetch.\n"); + write_extra_features(u | 0x01000000); + } +} + +static void __init enable_l2(void) +{ + u32 u; + + u = read_extra_features(); + if (!(u & 0x00400000)) { + int i, d; + + printk(KERN_INFO "Feroceon L2: Enabling L2\n"); + + d = flush_and_disable_dcache(); + i = invalidate_and_disable_icache(); + write_extra_features(u | 0x00400000); + if (i) + enable_icache(); + if (d) + enable_dcache(); + } +} + +void __init feroceon_l2_init(int __l2_wt_override) +{ + l2_wt_override = __l2_wt_override; + + disable_l2_prefetch(); + + outer_cache.inv_range = feroceon_l2_inv_range; + outer_cache.clean_range = feroceon_l2_clean_range; + outer_cache.flush_range = feroceon_l2_flush_range; + + enable_l2(); + + printk(KERN_INFO "Feroceon L2: Cache support initialised%s.\n", + l2_wt_override ? ", in WT override mode" : ""); +} diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c index 76b800a95191..b480f1d3591f 100644 --- a/arch/arm/mm/cache-l2x0.c +++ b/arch/arm/mm/cache-l2x0.c @@ -18,9 +18,9 @@ */ #include <linux/init.h> #include <linux/spinlock.h> +#include <linux/io.h> #include <asm/cacheflush.h> -#include <asm/io.h> #include <asm/hardware/cache-l2x0.h> #define CACHE_LINE_SIZE 32 diff --git a/arch/arm/mm/cache-v3.S b/arch/arm/mm/cache-v3.S index e1994788cf0e..3b3639eb7ca5 100644 --- a/arch/arm/mm/cache-v3.S +++ b/arch/arm/mm/cache-v3.S @@ -9,7 +9,7 @@ */ #include <linux/linkage.h> #include <linux/init.h> -#include <asm/hardware.h> +#include <mach/hardware.h> #include <asm/page.h> #include "proc-macros.S" diff --git a/arch/arm/mm/cache-v4.S b/arch/arm/mm/cache-v4.S index b2908063ed6a..33926c9fcda6 100644 --- a/arch/arm/mm/cache-v4.S +++ b/arch/arm/mm/cache-v4.S @@ -9,7 +9,7 @@ */ #include <linux/linkage.h> #include <linux/init.h> -#include <asm/hardware.h> +#include <mach/hardware.h> #include <asm/page.h> #include "proc-macros.S" diff --git a/arch/arm/mm/cache-v4wt.S b/arch/arm/mm/cache-v4wt.S index 9bcabd86c6f3..51a9b0b273b6 100644 --- a/arch/arm/mm/cache-v4wt.S +++ b/arch/arm/mm/cache-v4wt.S @@ -13,7 +13,7 @@ */ #include <linux/linkage.h> #include <linux/init.h> -#include <asm/hardware.h> +#include <mach/hardware.h> #include <asm/page.h> #include "proc-macros.S" diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S index 35ffc4d95997..d19c2bec2b1f 100644 --- a/arch/arm/mm/cache-v7.S +++ b/arch/arm/mm/cache-v7.S @@ -66,6 +66,7 @@ finished: mcr p15, 2, r10, c0, c0, 0 @ select current cache level in cssr isb mov pc, lr +ENDPROC(v7_flush_dcache_all) /* * v7_flush_cache_all() @@ -85,6 +86,7 @@ ENTRY(v7_flush_kern_cache_all) mcr p15, 0, r0, c7, c5, 0 @ I+BTB cache invalidate ldmfd sp!, {r4-r5, r7, r9-r11, lr} mov pc, lr +ENDPROC(v7_flush_kern_cache_all) /* * v7_flush_cache_all() @@ -110,6 +112,8 @@ ENTRY(v7_flush_user_cache_all) */ ENTRY(v7_flush_user_cache_range) mov pc, lr +ENDPROC(v7_flush_user_cache_all) +ENDPROC(v7_flush_user_cache_range) /* * v7_coherent_kern_range(start,end) @@ -155,6 +159,8 @@ ENTRY(v7_coherent_user_range) dsb isb mov pc, lr +ENDPROC(v7_coherent_kern_range) +ENDPROC(v7_coherent_user_range) /* * v7_flush_kern_dcache_page(kaddr) @@ -174,6 +180,7 @@ ENTRY(v7_flush_kern_dcache_page) blo 1b dsb mov pc, lr +ENDPROC(v7_flush_kern_dcache_page) /* * v7_dma_inv_range(start,end) @@ -202,6 +209,7 @@ ENTRY(v7_dma_inv_range) blo 1b dsb mov pc, lr +ENDPROC(v7_dma_inv_range) /* * v7_dma_clean_range(start,end) @@ -219,6 +227,7 @@ ENTRY(v7_dma_clean_range) blo 1b dsb mov pc, lr +ENDPROC(v7_dma_clean_range) /* * v7_dma_flush_range(start,end) @@ -236,6 +245,7 @@ ENTRY(v7_dma_flush_range) blo 1b dsb mov pc, lr +ENDPROC(v7_dma_flush_range) __INITDATA diff --git a/arch/arm/mm/cache-xsc3l2.c b/arch/arm/mm/cache-xsc3l2.c new file mode 100644 index 000000000000..10b1bae1a258 --- /dev/null +++ b/arch/arm/mm/cache-xsc3l2.c @@ -0,0 +1,183 @@ +/* + * arch/arm/mm/cache-xsc3l2.c - XScale3 L2 cache controller support + * + * Copyright (C) 2007 ARM Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include <linux/init.h> +#include <linux/spinlock.h> +#include <linux/io.h> + +#include <asm/system.h> +#include <asm/cputype.h> +#include <asm/cacheflush.h> + +#define CR_L2 (1 << 26) + +#define CACHE_LINE_SIZE 32 +#define CACHE_LINE_SHIFT 5 +#define CACHE_WAY_PER_SET 8 + +#define CACHE_WAY_SIZE(l2ctype) (8192 << (((l2ctype) >> 8) & 0xf)) +#define CACHE_SET_SIZE(l2ctype) (CACHE_WAY_SIZE(l2ctype) >> CACHE_LINE_SHIFT) + +static inline int xsc3_l2_present(void) +{ + unsigned long l2ctype; + + __asm__("mrc p15, 1, %0, c0, c0, 1" : "=r" (l2ctype)); + + return !!(l2ctype & 0xf8); +} + +static inline void xsc3_l2_clean_mva(unsigned long addr) +{ + __asm__("mcr p15, 1, %0, c7, c11, 1" : : "r" (addr)); +} + +static inline void xsc3_l2_clean_pa(unsigned long addr) +{ + xsc3_l2_clean_mva(__phys_to_virt(addr)); +} + +static inline void xsc3_l2_inv_mva(unsigned long addr) +{ + __asm__("mcr p15, 1, %0, c7, c7, 1" : : "r" (addr)); +} + +static inline void xsc3_l2_inv_pa(unsigned long addr) +{ + xsc3_l2_inv_mva(__phys_to_virt(addr)); +} + +static inline void xsc3_l2_inv_all(void) +{ + unsigned long l2ctype, set_way; + int set, way; + + __asm__("mrc p15, 1, %0, c0, c0, 1" : "=r" (l2ctype)); + + for (set = 0; set < CACHE_SET_SIZE(l2ctype); set++) { + for (way = 0; way < CACHE_WAY_PER_SET; way++) { + set_way = (way << 29) | (set << 5); + __asm__("mcr p15, 1, %0, c7, c11, 2" : : "r"(set_way)); + } + } + + dsb(); +} + +static void xsc3_l2_inv_range(unsigned long start, unsigned long end) +{ + if (start == 0 && end == -1ul) { + xsc3_l2_inv_all(); + return; + } + + /* + * Clean and invalidate partial first cache line. + */ + if (start & (CACHE_LINE_SIZE - 1)) { + xsc3_l2_clean_pa(start & ~(CACHE_LINE_SIZE - 1)); + xsc3_l2_inv_pa(start & ~(CACHE_LINE_SIZE - 1)); + start = (start | (CACHE_LINE_SIZE - 1)) + 1; + } + + /* + * Clean and invalidate partial last cache line. + */ + if (end & (CACHE_LINE_SIZE - 1)) { + xsc3_l2_clean_pa(end & ~(CACHE_LINE_SIZE - 1)); + xsc3_l2_inv_pa(end & ~(CACHE_LINE_SIZE - 1)); + end &= ~(CACHE_LINE_SIZE - 1); + } + + /* + * Invalidate all full cache lines between 'start' and 'end'. + */ + while (start != end) { + xsc3_l2_inv_pa(start); + start += CACHE_LINE_SIZE; + } + + dsb(); +} + +static void xsc3_l2_clean_range(unsigned long start, unsigned long end) +{ + start &= ~(CACHE_LINE_SIZE - 1); + while (start < end) { + xsc3_l2_clean_pa(start); + start += CACHE_LINE_SIZE; + } + + dsb(); +} + +/* + * optimize L2 flush all operation by set/way format + */ +static inline void xsc3_l2_flush_all(void) +{ + unsigned long l2ctype, set_way; + int set, way; + + __asm__("mrc p15, 1, %0, c0, c0, 1" : "=r" (l2ctype)); + + for (set = 0; set < CACHE_SET_SIZE(l2ctype); set++) { + for (way = 0; way < CACHE_WAY_PER_SET; way++) { + set_way = (way << 29) | (set << 5); + __asm__("mcr p15, 1, %0, c7, c15, 2" : : "r"(set_way)); + } + } + + dsb(); +} + +static void xsc3_l2_flush_range(unsigned long start, unsigned long end) +{ + if (start == 0 && end == -1ul) { + xsc3_l2_flush_all(); + return; + } + + start &= ~(CACHE_LINE_SIZE - 1); + while (start < end) { + xsc3_l2_clean_pa(start); + xsc3_l2_inv_pa(start); + start += CACHE_LINE_SIZE; + } + + dsb(); +} + +static int __init xsc3_l2_init(void) +{ + if (!cpu_is_xsc3() || !xsc3_l2_present()) + return 0; + + if (!(get_cr() & CR_L2)) { + pr_info("XScale3 L2 cache enabled.\n"); + adjust_cr(CR_L2, CR_L2); + xsc3_l2_inv_all(); + } + + outer_cache.inv_range = xsc3_l2_inv_range; + outer_cache.clean_range = xsc3_l2_clean_range; + outer_cache.flush_range = xsc3_l2_flush_range; + + return 0; +} +core_initcall(xsc3_l2_init); diff --git a/arch/arm/mm/copypage-v4mc.c b/arch/arm/mm/copypage-v4mc.c index ded0e96d069d..8d33e2549344 100644 --- a/arch/arm/mm/copypage-v4mc.c +++ b/arch/arm/mm/copypage-v4mc.c @@ -28,7 +28,7 @@ * specific hacks for copying pages efficiently. */ #define minicache_pgprot __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | \ - L_PTE_CACHEABLE) + L_PTE_MT_MINICACHE) static DEFINE_SPINLOCK(minicache_lock); diff --git a/arch/arm/mm/copypage-v6.c b/arch/arm/mm/copypage-v6.c index 3adb79257f43..0e21c0767580 100644 --- a/arch/arm/mm/copypage-v6.c +++ b/arch/arm/mm/copypage-v6.c @@ -16,6 +16,7 @@ #include <asm/shmparam.h> #include <asm/tlbflush.h> #include <asm/cacheflush.h> +#include <asm/cachetype.h> #include "mm.h" diff --git a/arch/arm/mm/copypage-xscale.c b/arch/arm/mm/copypage-xscale.c index 2e455f82a4d5..bad49331bbf9 100644 --- a/arch/arm/mm/copypage-xscale.c +++ b/arch/arm/mm/copypage-xscale.c @@ -30,7 +30,7 @@ #define COPYPAGE_MINICACHE 0xffff8000 #define minicache_pgprot __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | \ - L_PTE_CACHEABLE) + L_PTE_MT_MINICACHE) static DEFINE_SPINLOCK(minicache_lock); diff --git a/arch/arm/mm/discontig.c b/arch/arm/mm/discontig.c index 1e5602189507..c8c0c4b0f0a3 100644 --- a/arch/arm/mm/discontig.c +++ b/arch/arm/mm/discontig.c @@ -21,26 +21,24 @@ * Our node_data structure for discontiguous memory. */ -static bootmem_data_t node_bootmem_data[MAX_NUMNODES]; - pg_data_t discontig_node_data[MAX_NUMNODES] = { - { .bdata = &node_bootmem_data[0] }, - { .bdata = &node_bootmem_data[1] }, - { .bdata = &node_bootmem_data[2] }, - { .bdata = &node_bootmem_data[3] }, + { .bdata = &bootmem_node_data[0] }, + { .bdata = &bootmem_node_data[1] }, + { .bdata = &bootmem_node_data[2] }, + { .bdata = &bootmem_node_data[3] }, #if MAX_NUMNODES == 16 - { .bdata = &node_bootmem_data[4] }, - { .bdata = &node_bootmem_data[5] }, - { .bdata = &node_bootmem_data[6] }, - { .bdata = &node_bootmem_data[7] }, - { .bdata = &node_bootmem_data[8] }, - { .bdata = &node_bootmem_data[9] }, - { .bdata = &node_bootmem_data[10] }, - { .bdata = &node_bootmem_data[11] }, - { .bdata = &node_bootmem_data[12] }, - { .bdata = &node_bootmem_data[13] }, - { .bdata = &node_bootmem_data[14] }, - { .bdata = &node_bootmem_data[15] }, + { .bdata = &bootmem_node_data[4] }, + { .bdata = &bootmem_node_data[5] }, + { .bdata = &bootmem_node_data[6] }, + { .bdata = &bootmem_node_data[7] }, + { .bdata = &bootmem_node_data[8] }, + { .bdata = &bootmem_node_data[9] }, + { .bdata = &bootmem_node_data[10] }, + { .bdata = &bootmem_node_data[11] }, + { .bdata = &bootmem_node_data[12] }, + { .bdata = &bootmem_node_data[13] }, + { .bdata = &bootmem_node_data[14] }, + { .bdata = &bootmem_node_data[15] }, #endif }; diff --git a/arch/arm/mm/consistent.c b/arch/arm/mm/dma-mapping.c index 333a82a3717e..67960017dc8f 100644 --- a/arch/arm/mm/consistent.c +++ b/arch/arm/mm/dma-mapping.c @@ -1,5 +1,5 @@ /* - * linux/arch/arm/mm/consistent.c + * linux/arch/arm/mm/dma-mapping.c * * Copyright (C) 2000-2004 Russell King * @@ -274,6 +274,11 @@ __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, void * dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp) { + void *memory; + + if (dma_alloc_from_coherent(dev, size, handle, &memory)) + return memory; + if (arch_is_coherent()) { void *virt; @@ -362,6 +367,9 @@ void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, dma_addr WARN_ON(irqs_disabled()); + if (dma_release_from_coherent(dev, get_order(size), cpu_addr)) + return; + if (arch_is_coherent()) { kfree(cpu_addr); return; @@ -504,3 +512,105 @@ void dma_cache_maint(const void *start, size_t size, int direction) } } EXPORT_SYMBOL(dma_cache_maint); + +/** + * dma_map_sg - map a set of SG buffers for streaming mode DMA + * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices + * @sg: list of buffers + * @nents: number of buffers to map + * @dir: DMA transfer direction + * + * Map a set of buffers described by scatterlist in streaming mode for DMA. + * This is the scatter-gather version of the dma_map_single interface. + * Here the scatter gather list elements are each tagged with the + * appropriate dma address and length. They are obtained via + * sg_dma_{address,length}. + * + * Device ownership issues as mentioned for dma_map_single are the same + * here. + */ +int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir) +{ + struct scatterlist *s; + int i, j; + + for_each_sg(sg, s, nents, i) { + s->dma_address = dma_map_page(dev, sg_page(s), s->offset, + s->length, dir); + if (dma_mapping_error(dev, s->dma_address)) + goto bad_mapping; + } + return nents; + + bad_mapping: + for_each_sg(sg, s, i, j) + dma_unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir); + return 0; +} +EXPORT_SYMBOL(dma_map_sg); + +/** + * dma_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg + * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices + * @sg: list of buffers + * @nents: number of buffers to unmap (returned from dma_map_sg) + * @dir: DMA transfer direction (same as was passed to dma_map_sg) + * + * Unmap a set of streaming mode DMA translations. Again, CPU access + * rules concerning calls here are the same as for dma_unmap_single(). + */ +void dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir) +{ + struct scatterlist *s; + int i; + + for_each_sg(sg, s, nents, i) + dma_unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir); +} +EXPORT_SYMBOL(dma_unmap_sg); + +/** + * dma_sync_sg_for_cpu + * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices + * @sg: list of buffers + * @nents: number of buffers to map (returned from dma_map_sg) + * @dir: DMA transfer direction (same as was passed to dma_map_sg) + */ +void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir) +{ + struct scatterlist *s; + int i; + + for_each_sg(sg, s, nents, i) { + dmabounce_sync_for_cpu(dev, sg_dma_address(s), 0, + sg_dma_len(s), dir); + } +} +EXPORT_SYMBOL(dma_sync_sg_for_cpu); + +/** + * dma_sync_sg_for_device + * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices + * @sg: list of buffers + * @nents: number of buffers to map (returned from dma_map_sg) + * @dir: DMA transfer direction (same as was passed to dma_map_sg) + */ +void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir) +{ + struct scatterlist *s; + int i; + + for_each_sg(sg, s, nents, i) { + if (!dmabounce_sync_for_device(dev, sg_dma_address(s), 0, + sg_dma_len(s), dir)) + continue; + + if (!arch_is_coherent()) + dma_cache_maint(sg_virt(s), s->length, dir); + } +} +EXPORT_SYMBOL(dma_sync_sg_for_device); diff --git a/arch/arm/mm/extable.c b/arch/arm/mm/extable.c index 9592c3ee4cb2..9d285626bc7d 100644 --- a/arch/arm/mm/extable.c +++ b/arch/arm/mm/extable.c @@ -2,7 +2,7 @@ * linux/arch/arm/mm/extable.c */ #include <linux/module.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> int fixup_exception(struct pt_regs *regs) { diff --git a/arch/arm/mm/fault-armv.c b/arch/arm/mm/fault-armv.c index 44558d5f9313..81d0b8772de3 100644 --- a/arch/arm/mm/fault-armv.c +++ b/arch/arm/mm/fault-armv.c @@ -17,11 +17,13 @@ #include <linux/init.h> #include <linux/pagemap.h> +#include <asm/bugs.h> #include <asm/cacheflush.h> +#include <asm/cachetype.h> #include <asm/pgtable.h> #include <asm/tlbflush.h> -static unsigned long shared_pte_mask = L_PTE_CACHEABLE; +static unsigned long shared_pte_mask = L_PTE_MT_BUFFERABLE; /* * We take the easy way out of this problem - we make the @@ -37,7 +39,7 @@ static int adjust_pte(struct vm_area_struct *vma, unsigned long address) pgd_t *pgd; pmd_t *pmd; pte_t *pte, entry; - int ret = 0; + int ret; pgd = pgd_offset(vma->vm_mm, address); if (pgd_none(*pgd)) @@ -55,15 +57,20 @@ static int adjust_pte(struct vm_area_struct *vma, unsigned long address) entry = *pte; /* + * If this page is present, it's actually being shared. + */ + ret = pte_present(entry); + + /* * If this page isn't present, or is already setup to * fault (ie, is old), we can safely ignore any issues. */ - if (pte_present(entry) && pte_val(entry) & shared_pte_mask) { + if (ret && (pte_val(entry) & L_PTE_MT_MASK) != shared_pte_mask) { flush_cache_page(vma, address, pte_pfn(entry)); - pte_val(entry) &= ~shared_pte_mask; + pte_val(entry) &= ~L_PTE_MT_MASK; + pte_val(entry) |= shared_pte_mask; set_pte_at(vma->vm_mm, address, pte, entry); flush_tlb_page(vma, address); - ret = 1; } pte_unmap(pte); return ret; @@ -144,13 +151,17 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long addr, pte_t pte) page = pfn_to_page(pfn); mapping = page_mapping(page); if (mapping) { +#ifndef CONFIG_SMP int dirty = test_and_clear_bit(PG_dcache_dirty, &page->flags); if (dirty) __flush_dcache_page(mapping, page); +#endif if (cache_is_vivt()) make_coherent(mapping, vma, addr, pfn); + else if (vma->vm_flags & VM_EXEC) + __flush_icache_all(); } } @@ -189,7 +200,7 @@ void __init check_writebuffer_bugs(void) unsigned long *p1, *p2; pgprot_t prot = __pgprot(L_PTE_PRESENT|L_PTE_YOUNG| L_PTE_DIRTY|L_PTE_WRITE| - L_PTE_BUFFERABLE); + L_PTE_MT_BUFFERABLE); p1 = vmap(&page, 1, VM_IOREMAP, prot); p2 = vmap(&page, 1, VM_IOREMAP, prot); @@ -210,7 +221,7 @@ void __init check_writebuffer_bugs(void) if (v) { printk("failed, %s\n", reason); - shared_pte_mask |= L_PTE_BUFFERABLE; + shared_pte_mask = L_PTE_MT_UNCACHED; } else { printk("ok\n"); } diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index 28ad7ab1c0cd..2df8d9facf57 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -13,11 +13,11 @@ #include <linux/mm.h> #include <linux/init.h> #include <linux/kprobes.h> +#include <linux/uaccess.h> #include <asm/system.h> #include <asm/pgtable.h> #include <asm/tlbflush.h> -#include <asm/uaccess.h> #include "fault.h" @@ -72,9 +72,8 @@ void show_pte(struct mm_struct *mm, unsigned long addr) } pmd = pmd_offset(pgd, addr); -#if PTRS_PER_PMD != 1 - printk(", *pmd=%08lx", pmd_val(*pmd)); -#endif + if (PTRS_PER_PMD != 1) + printk(", *pmd=%08lx", pmd_val(*pmd)); if (pmd_none(*pmd)) break; diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c index 9df507d36e0b..0fa9bf388f0b 100644 --- a/arch/arm/mm/flush.c +++ b/arch/arm/mm/flush.c @@ -12,6 +12,7 @@ #include <linux/pagemap.h> #include <asm/cacheflush.h> +#include <asm/cachetype.h> #include <asm/system.h> #include <asm/tlbflush.h> @@ -199,6 +200,8 @@ void flush_dcache_page(struct page *page) __flush_dcache_page(mapping, page); if (mapping && cache_is_vivt()) __flush_dcache_aliases(mapping, page); + else if (mapping) + __flush_icache_all(); } } EXPORT_SYMBOL(flush_dcache_page); diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index b657f1719af0..82c4b4217989 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -26,9 +26,42 @@ #include "mm.h" -extern void _text, _etext, __data_start, _end, __init_begin, __init_end; -extern unsigned long phys_initrd_start; -extern unsigned long phys_initrd_size; +static unsigned long phys_initrd_start __initdata = 0; +static unsigned long phys_initrd_size __initdata = 0; + +static void __init early_initrd(char **p) +{ + unsigned long start, size; + + start = memparse(*p, p); + if (**p == ',') { + size = memparse((*p) + 1, p); + + phys_initrd_start = start; + phys_initrd_size = size; + } +} +__early_param("initrd=", early_initrd); + +static int __init parse_tag_initrd(const struct tag *tag) +{ + printk(KERN_WARNING "ATAG_INITRD is deprecated; " + "please update your bootloader.\n"); + phys_initrd_start = __virt_to_phys(tag->u.initrd.start); + phys_initrd_size = tag->u.initrd.size; + return 0; +} + +__tagtable(ATAG_INITRD, parse_tag_initrd); + +static int __init parse_tag_initrd2(const struct tag *tag) +{ + phys_initrd_start = tag->u.initrd.start; + phys_initrd_size = tag->u.initrd.size; + return 0; +} + +__tagtable(ATAG_INITRD2, parse_tag_initrd2); /* * This is used to pass memory configuration data from paging_init @@ -36,10 +69,6 @@ extern unsigned long phys_initrd_size; */ static struct meminfo meminfo = { 0, }; -#define for_each_nodebank(iter,mi,no) \ - for (iter = 0; iter < mi->nr_banks; iter++) \ - if (mi->bank[iter].node == no) - void show_mem(void) { int free = 0, total = 0, reserved = 0; @@ -50,14 +79,15 @@ void show_mem(void) show_free_areas(); for_each_online_node(node) { pg_data_t *n = NODE_DATA(node); - struct page *map = n->node_mem_map - n->node_start_pfn; + struct page *map = pgdat_page_nr(n, 0) - n->node_start_pfn; for_each_nodebank (i,mi,node) { + struct membank *bank = &mi->bank[i]; unsigned int pfn1, pfn2; struct page *page, *end; - pfn1 = __phys_to_pfn(mi->bank[i].start); - pfn2 = __phys_to_pfn(mi->bank[i].size + mi->bank[i].start); + pfn1 = bank_pfn_start(bank); + pfn2 = bank_pfn_end(bank); page = map + pfn1; end = map + pfn2; @@ -96,17 +126,17 @@ void show_mem(void) static unsigned int __init find_bootmap_pfn(int node, struct meminfo *mi, unsigned int bootmap_pages) { - unsigned int start_pfn, bank, bootmap_pfn; + unsigned int start_pfn, i, bootmap_pfn; start_pfn = PAGE_ALIGN(__pa(&_end)) >> PAGE_SHIFT; bootmap_pfn = 0; - for_each_nodebank(bank, mi, node) { + for_each_nodebank(i, mi, node) { + struct membank *bank = &mi->bank[i]; unsigned int start, end; - start = mi->bank[bank].start >> PAGE_SHIFT; - end = (mi->bank[bank].size + - mi->bank[bank].start) >> PAGE_SHIFT; + start = bank_pfn_start(bank); + end = bank_pfn_end(bank); if (end < start_pfn) continue; @@ -145,20 +175,17 @@ static int __init check_initrd(struct meminfo *mi) initrd_node = -1; for (i = 0; i < mi->nr_banks; i++) { - unsigned long bank_end; - - bank_end = mi->bank[i].start + mi->bank[i].size; - - if (mi->bank[i].start <= phys_initrd_start && - end <= bank_end) - initrd_node = mi->bank[i].node; + struct membank *bank = &mi->bank[i]; + if (bank_phys_start(bank) <= phys_initrd_start && + end <= bank_phys_end(bank)) + initrd_node = bank->node; } } if (initrd_node == -1) { - printk(KERN_ERR "initrd (0x%08lx - 0x%08lx) extends beyond " + printk(KERN_ERR "INITRD: 0x%08lx+0x%08lx extends beyond " "physical memory - disabling initrd\n", - phys_initrd_start, end); + phys_initrd_start, phys_initrd_size); phys_initrd_start = phys_initrd_size = 0; } #endif @@ -171,19 +198,17 @@ static inline void map_memory_bank(struct membank *bank) #ifdef CONFIG_MMU struct map_desc map; - map.pfn = __phys_to_pfn(bank->start); - map.virtual = __phys_to_virt(bank->start); - map.length = bank->size; + map.pfn = bank_pfn_start(bank); + map.virtual = __phys_to_virt(bank_phys_start(bank)); + map.length = bank_phys_size(bank); map.type = MT_MEMORY; create_mapping(&map); #endif } -static unsigned long __init -bootmem_init_node(int node, int initrd_node, struct meminfo *mi) +static unsigned long __init bootmem_init_node(int node, struct meminfo *mi) { - unsigned long zone_size[MAX_NR_ZONES], zhole_size[MAX_NR_ZONES]; unsigned long start_pfn, end_pfn, boot_pfn; unsigned int boot_pages; pg_data_t *pgdat; @@ -199,8 +224,8 @@ bootmem_init_node(int node, int initrd_node, struct meminfo *mi) struct membank *bank = &mi->bank[i]; unsigned long start, end; - start = bank->start >> PAGE_SHIFT; - end = (bank->start + bank->size) >> PAGE_SHIFT; + start = bank_pfn_start(bank); + end = bank_pfn_end(bank); if (start_pfn > start) start_pfn = start; @@ -230,8 +255,11 @@ bootmem_init_node(int node, int initrd_node, struct meminfo *mi) pgdat = NODE_DATA(node); init_bootmem_node(pgdat, boot_pfn, start_pfn, end_pfn); - for_each_nodebank(i, mi, node) - free_bootmem_node(pgdat, mi->bank[i].start, mi->bank[i].size); + for_each_nodebank(i, mi, node) { + struct membank *bank = &mi->bank[i]; + free_bootmem_node(pgdat, bank_phys_start(bank), bank_phys_size(bank)); + memory_present(node, bank_pfn_start(bank), bank_pfn_end(bank)); + } /* * Reserve the bootmem bitmap for this node. @@ -239,23 +267,39 @@ bootmem_init_node(int node, int initrd_node, struct meminfo *mi) reserve_bootmem_node(pgdat, boot_pfn << PAGE_SHIFT, boot_pages << PAGE_SHIFT, BOOTMEM_DEFAULT); + return end_pfn; +} + +static void __init bootmem_reserve_initrd(int node) +{ #ifdef CONFIG_BLK_DEV_INITRD - /* - * If the initrd is in this node, reserve its memory. - */ - if (node == initrd_node) { - reserve_bootmem_node(pgdat, phys_initrd_start, - phys_initrd_size, BOOTMEM_DEFAULT); + pg_data_t *pgdat = NODE_DATA(node); + int res; + + res = reserve_bootmem_node(pgdat, phys_initrd_start, + phys_initrd_size, BOOTMEM_EXCLUSIVE); + + if (res == 0) { initrd_start = __phys_to_virt(phys_initrd_start); initrd_end = initrd_start + phys_initrd_size; + } else { + printk(KERN_ERR + "INITRD: 0x%08lx+0x%08lx overlaps in-use " + "memory region - disabling initrd\n", + phys_initrd_start, phys_initrd_size); } #endif +} - /* - * Finally, reserve any node zero regions. - */ - if (node == 0) - reserve_node_zero(pgdat); +static void __init bootmem_free_node(int node, struct meminfo *mi) +{ + unsigned long zone_size[MAX_NR_ZONES], zhole_size[MAX_NR_ZONES]; + unsigned long start_pfn, end_pfn; + pg_data_t *pgdat = NODE_DATA(node); + int i; + + start_pfn = pgdat->bdata->node_min_pfn; + end_pfn = pgdat->bdata->node_low_pfn; /* * initialise the zones within this node. @@ -276,7 +320,7 @@ bootmem_init_node(int node, int initrd_node, struct meminfo *mi) */ zhole_size[0] = zone_size[0]; for_each_nodebank(i, mi, node) - zhole_size[0] -= mi->bank[i].size >> PAGE_SHIFT; + zhole_size[0] -= bank_pfn_size(&mi->bank[i]); /* * Adjust the sizes according to any special requirements for @@ -284,22 +328,13 @@ bootmem_init_node(int node, int initrd_node, struct meminfo *mi) */ arch_adjust_zones(node, zone_size, zhole_size); - free_area_init_node(node, pgdat, zone_size, start_pfn, zhole_size); - - return end_pfn; + free_area_init_node(node, zone_size, start_pfn, zhole_size); } void __init bootmem_init(struct meminfo *mi) { unsigned long memend_pfn = 0; - int node, initrd_node, i; - - /* - * Invalidate the node number for empty or invalid memory banks - */ - for (i = 0; i < mi->nr_banks; i++) - if (mi->bank[i].size == 0 || mi->bank[i].node >= MAX_NUMNODES) - mi->bank[i].node = -1; + int node, initrd_node; memcpy(&meminfo, mi, sizeof(meminfo)); @@ -312,9 +347,19 @@ void __init bootmem_init(struct meminfo *mi) * Run through each node initialising the bootmem allocator. */ for_each_node(node) { - unsigned long end_pfn; + unsigned long end_pfn = bootmem_init_node(node, mi); - end_pfn = bootmem_init_node(node, initrd_node, mi); + /* + * Reserve any special node zero regions. + */ + if (node == 0) + reserve_node_zero(NODE_DATA(node)); + + /* + * If the initrd is in this node, reserve its memory. + */ + if (node == initrd_node) + bootmem_reserve_initrd(node); /* * Remember the highest memory PFN. @@ -323,6 +368,19 @@ void __init bootmem_init(struct meminfo *mi) memend_pfn = end_pfn; } + /* + * sparse_init() needs the bootmem allocator up and running. + */ + sparse_init(); + + /* + * Now free memory in each node - free_area_init_node needs + * the sparse mem_map arrays initialized by sparse_init() + * for memmap_init_zone(), otherwise all PFNs are invalid. + */ + for_each_node(node) + bootmem_free_node(node, mi); + high_memory = __va(memend_pfn << PAGE_SHIFT); /* @@ -393,7 +451,9 @@ static void __init free_unused_memmap_node(int node, struct meminfo *mi) * information on the command line. */ for_each_nodebank(i, mi, node) { - bank_start = mi->bank[i].start >> PAGE_SHIFT; + struct membank *bank = &mi->bank[i]; + + bank_start = bank_pfn_start(bank); if (bank_start < prev_bank_end) { printk(KERN_ERR "MEM: unordered memory banks. " "Not freeing memmap.\n"); @@ -407,8 +467,7 @@ static void __init free_unused_memmap_node(int node, struct meminfo *mi) if (prev_bank_end && prev_bank_end != bank_start) free_memmap(node, prev_bank_end, bank_start); - prev_bank_end = (mi->bank[i].start + - mi->bank[i].size) >> PAGE_SHIFT; + prev_bank_end = bank_pfn_end(bank); } } @@ -453,8 +512,8 @@ void __init mem_init(void) num_physpages = 0; for (i = 0; i < meminfo.nr_banks; i++) { - num_physpages += meminfo.bank[i].size >> PAGE_SHIFT; - printk(" %ldMB", meminfo.bank[i].size >> 20); + num_physpages += bank_pfn_size(&meminfo.bank[i]); + printk(" %ldMB", bank_phys_size(&meminfo.bank[i]) >> 20); } printk(" = %luMB total\n", num_physpages >> (20 - PAGE_SHIFT)); diff --git a/arch/arm/mm/iomap.c b/arch/arm/mm/iomap.c index 7429f8c01015..ffad039cbb73 100644 --- a/arch/arm/mm/iomap.c +++ b/arch/arm/mm/iomap.c @@ -7,8 +7,7 @@ #include <linux/module.h> #include <linux/pci.h> #include <linux/ioport.h> - -#include <asm/io.h> +#include <linux/io.h> #ifdef __io void __iomem *ioport_map(unsigned long port, unsigned int nr) diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c index 303a7ff6bfd2..18373f73f2fc 100644 --- a/arch/arm/mm/ioremap.c +++ b/arch/arm/mm/ioremap.c @@ -24,9 +24,10 @@ #include <linux/errno.h> #include <linux/mm.h> #include <linux/vmalloc.h> +#include <linux/io.h> +#include <asm/cputype.h> #include <asm/cacheflush.h> -#include <asm/io.h> #include <asm/mmu_context.h> #include <asm/pgalloc.h> #include <asm/tlbflush.h> @@ -55,8 +56,7 @@ static int remap_area_pte(pmd_t *pmd, unsigned long addr, unsigned long end, if (!pte_none(*pte)) goto bad; - set_pte_ext(pte, pfn_pte(phys_addr >> PAGE_SHIFT, prot), - type->prot_pte_ext); + set_pte_ext(pte, pfn_pte(phys_addr >> PAGE_SHIFT, prot), 0); phys_addr += PAGE_SIZE; } while (pte++, addr += PAGE_SIZE, addr != end); return 0; @@ -259,7 +259,7 @@ remap_area_supersections(unsigned long virt, unsigned long pfn, * caller shouldn't need to know that small detail. * * 'flags' are the extra L_PTE_ flags that you want to specify for this - * mapping. See include/asm-arm/proc-armv/pgtable.h for more information. + * mapping. See <asm/pgtable.h> for more information. */ void __iomem * __arm_ioremap_pfn(unsigned long pfn, unsigned long offset, size_t size, @@ -332,15 +332,14 @@ __arm_ioremap(unsigned long phys_addr, size_t size, unsigned int mtype) } EXPORT_SYMBOL(__arm_ioremap); -void __iounmap(volatile void __iomem *addr) +void __iounmap(volatile void __iomem *io_addr) { + void *addr = (void *)(PAGE_MASK & (unsigned long)io_addr); #ifndef CONFIG_SMP struct vm_struct **p, *tmp; #endif unsigned int section_mapping = 0; - addr = (volatile void __iomem *)(PAGE_MASK & (unsigned long)addr); - #ifndef CONFIG_SMP /* * If this is a section based mapping we need to handle it @@ -351,7 +350,7 @@ void __iounmap(volatile void __iomem *addr) */ write_lock(&vmlist_lock); for (p = &vmlist ; (tmp = *p) ; p = &tmp->next) { - if((tmp->flags & VM_IOREMAP) && (tmp->addr == addr)) { + if ((tmp->flags & VM_IOREMAP) && (tmp->addr == addr)) { if (tmp->flags & VM_ARM_SECTION_MAPPING) { *p = tmp->next; unmap_area_sections((unsigned long)tmp->addr, @@ -366,6 +365,6 @@ void __iounmap(volatile void __iomem *addr) #endif if (!section_mapping) - vunmap((void __force *)addr); + vunmap(addr); } EXPORT_SYMBOL(__iounmap); diff --git a/arch/arm/mm/mm.h b/arch/arm/mm/mm.h index 7647c597fc59..5d9f53907b4e 100644 --- a/arch/arm/mm/mm.h +++ b/arch/arm/mm/mm.h @@ -18,7 +18,6 @@ static inline pmd_t *pmd_off_k(unsigned long virt) struct mem_type { unsigned int prot_pte; - unsigned int prot_pte_ext; unsigned int prot_l1; unsigned int prot_sect; unsigned int domain; @@ -35,3 +34,5 @@ struct pglist_data; void __init create_mapping(struct map_desc *md); void __init bootmem_init(struct meminfo *mi); void reserve_node_zero(struct pglist_data *pgdat); + +extern void _text, _stext, _etext, __data_start, _end, __init_begin, __init_end; diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c index 3f6dc40b8353..5358fcc7f61e 100644 --- a/arch/arm/mm/mmap.c +++ b/arch/arm/mm/mmap.c @@ -6,6 +6,8 @@ #include <linux/mman.h> #include <linux/shm.h> #include <linux/sched.h> +#include <linux/io.h> +#include <asm/cputype.h> #include <asm/system.h> #define COLOUR_ALIGN(addr,pgoff) \ @@ -37,8 +39,8 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, * caches alias. This is indicated by bits 9 and 21 of the * cache type register. */ - cache_type = read_cpuid(CPUID_CACHETYPE); - if (cache_type != read_cpuid(CPUID_ID)) { + cache_type = read_cpuid_cachetype(); + if (cache_type != read_cpuid_id()) { aliasing = (cache_type | cache_type >> 12) & (1 << 11); if (aliasing) do_align = filp || flags & MAP_SHARED; diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 2d6d682c206a..8ba754064559 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -15,6 +15,7 @@ #include <linux/mman.h> #include <linux/nodemask.h> +#include <asm/cputype.h> #include <asm/mach-types.h> #include <asm/setup.h> #include <asm/sizes.h> @@ -27,9 +28,6 @@ DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); -extern void _stext, _etext, __data_start, _end; -extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; - /* * empty_zero_page is a special page that is used for * zero-initialized data and COW. @@ -68,27 +66,27 @@ static struct cachepolicy cache_policies[] __initdata = { .policy = "uncached", .cr_mask = CR_W|CR_C, .pmd = PMD_SECT_UNCACHED, - .pte = 0, + .pte = L_PTE_MT_UNCACHED, }, { .policy = "buffered", .cr_mask = CR_C, .pmd = PMD_SECT_BUFFERED, - .pte = PTE_BUFFERABLE, + .pte = L_PTE_MT_BUFFERABLE, }, { .policy = "writethrough", .cr_mask = 0, .pmd = PMD_SECT_WT, - .pte = PTE_CACHEABLE, + .pte = L_PTE_MT_WRITETHROUGH, }, { .policy = "writeback", .cr_mask = 0, .pmd = PMD_SECT_WB, - .pte = PTE_BUFFERABLE|PTE_CACHEABLE, + .pte = L_PTE_MT_WRITEBACK, }, { .policy = "writealloc", .cr_mask = 0, .pmd = PMD_SECT_WBWA, - .pte = PTE_BUFFERABLE|PTE_CACHEABLE, + .pte = L_PTE_MT_WRITEALLOC, } }; @@ -186,29 +184,28 @@ void adjust_cr(unsigned long mask, unsigned long set) static struct mem_type mem_types[] = { [MT_DEVICE] = { /* Strongly ordered / ARMv6 shared device */ - .prot_pte = PROT_PTE_DEVICE, + .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_SHARED | + L_PTE_SHARED, .prot_l1 = PMD_TYPE_TABLE, .prot_sect = PROT_SECT_DEVICE | PMD_SECT_UNCACHED, .domain = DOMAIN_IO, }, [MT_DEVICE_NONSHARED] = { /* ARMv6 non-shared device */ - .prot_pte = PROT_PTE_DEVICE, - .prot_pte_ext = PTE_EXT_TEX(2), + .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_NONSHARED, .prot_l1 = PMD_TYPE_TABLE, .prot_sect = PROT_SECT_DEVICE | PMD_SECT_TEX(2), .domain = DOMAIN_IO, }, [MT_DEVICE_CACHED] = { /* ioremap_cached */ - .prot_pte = PROT_PTE_DEVICE | L_PTE_CACHEABLE | L_PTE_BUFFERABLE, + .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_CACHED, .prot_l1 = PMD_TYPE_TABLE, .prot_sect = PROT_SECT_DEVICE | PMD_SECT_WB, .domain = DOMAIN_IO, }, - [MT_DEVICE_IXP2000] = { /* IXP2400 requires XCB=101 for on-chip I/O */ - .prot_pte = PROT_PTE_DEVICE, + [MT_DEVICE_WC] = { /* ioremap_wc */ + .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_WC, .prot_l1 = PMD_TYPE_TABLE, - .prot_sect = PROT_SECT_DEVICE | PMD_SECT_BUFFERABLE | - PMD_SECT_TEX(1), + .prot_sect = PROT_SECT_DEVICE | PMD_SECT_BUFFERABLE, .domain = DOMAIN_IO, }, [MT_CACHECLEAN] = { @@ -253,7 +250,7 @@ static void __init build_mem_type_table(void) { struct cachepolicy *cp; unsigned int cr = get_cr(); - unsigned int user_pgprot, kern_pgprot; + unsigned int user_pgprot, kern_pgprot, vecs_pgprot; int cpu_arch = cpu_architecture(); int i; @@ -271,6 +268,20 @@ static void __init build_mem_type_table(void) cachepolicy = CPOLICY_WRITEBACK; ecc_mask = 0; } +#ifdef CONFIG_SMP + cachepolicy = CPOLICY_WRITEALLOC; +#endif + + /* + * On non-Xscale3 ARMv5-and-older systems, use CB=01 + * (Uncached/Buffered) for ioremap_wc() mappings. On XScale3 + * and ARMv6+, use TEXCB=00100 mappings (Inner/Outer Uncacheable + * in xsc3 parlance, Uncached Normal in ARMv6 parlance). + */ + if (cpu_is_xsc3() || cpu_arch >= CPU_ARCH_ARMv6) { + mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_TEX(1); + mem_types[MT_DEVICE_WC].prot_sect &= ~PMD_SECT_BUFFERABLE; + } /* * ARMv5 and lower, bit 4 must be set for page tables. @@ -292,7 +303,15 @@ static void __init build_mem_type_table(void) } cp = &cache_policies[cachepolicy]; - kern_pgprot = user_pgprot = cp->pte; + vecs_pgprot = kern_pgprot = user_pgprot = cp->pte; + +#ifndef CONFIG_SMP + /* + * Only use write-through for non-SMP systems + */ + if (cpu_arch >= CPU_ARCH_ARMv5 && cachepolicy > CPOLICY_WRITETHROUGH) + vecs_pgprot = cache_policies[CPOLICY_WRITETHROUGH].pte; +#endif /* * Enable CPU-specific coherency if supported. @@ -320,7 +339,6 @@ static void __init build_mem_type_table(void) /* * Mark the device area as "shared device" */ - mem_types[MT_DEVICE].prot_pte |= L_PTE_BUFFERABLE; mem_types[MT_DEVICE].prot_sect |= PMD_SECT_BUFFERED; #ifdef CONFIG_SMP @@ -329,30 +347,21 @@ static void __init build_mem_type_table(void) */ user_pgprot |= L_PTE_SHARED; kern_pgprot |= L_PTE_SHARED; + vecs_pgprot |= L_PTE_SHARED; mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S; #endif } for (i = 0; i < 16; i++) { unsigned long v = pgprot_val(protection_map[i]); - v = (v & ~(L_PTE_BUFFERABLE|L_PTE_CACHEABLE)) | user_pgprot; - protection_map[i] = __pgprot(v); + protection_map[i] = __pgprot(v | user_pgprot); } - mem_types[MT_LOW_VECTORS].prot_pte |= kern_pgprot; - mem_types[MT_HIGH_VECTORS].prot_pte |= kern_pgprot; + mem_types[MT_LOW_VECTORS].prot_pte |= vecs_pgprot; + mem_types[MT_HIGH_VECTORS].prot_pte |= vecs_pgprot; - if (cpu_arch >= CPU_ARCH_ARMv5) { -#ifndef CONFIG_SMP - /* - * Only use write-through for non-SMP systems - */ - mem_types[MT_LOW_VECTORS].prot_pte &= ~L_PTE_BUFFERABLE; - mem_types[MT_HIGH_VECTORS].prot_pte &= ~L_PTE_BUFFERABLE; -#endif - } else { + if (cpu_arch < CPU_ARCH_ARMv5) mem_types[MT_MINICLEAN].prot_sect &= ~PMD_SECT_TEX(1); - } pgprot_user = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | user_pgprot); pgprot_kernel = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | @@ -400,8 +409,7 @@ static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr, pte = pte_offset_kernel(pmd, addr); do { - set_pte_ext(pte, pfn_pte(pfn, __pgprot(type->prot_pte)), - type->prot_pte_ext); + set_pte_ext(pte, pfn_pte(pfn, __pgprot(type->prot_pte)), 0); pfn++; } while (pte++, addr += PAGE_SIZE, addr != end); } @@ -568,6 +576,77 @@ void __init iotable_init(struct map_desc *io_desc, int nr) create_mapping(io_desc + i); } +static unsigned long __initdata vmalloc_reserve = SZ_128M; + +/* + * vmalloc=size forces the vmalloc area to be exactly 'size' + * bytes. This can be used to increase (or decrease) the vmalloc + * area - the default is 128m. + */ +static void __init early_vmalloc(char **arg) +{ + vmalloc_reserve = memparse(*arg, arg); + + if (vmalloc_reserve < SZ_16M) { + vmalloc_reserve = SZ_16M; + printk(KERN_WARNING + "vmalloc area too small, limiting to %luMB\n", + vmalloc_reserve >> 20); + } +} +__early_param("vmalloc=", early_vmalloc); + +#define VMALLOC_MIN (void *)(VMALLOC_END - vmalloc_reserve) + +static int __init check_membank_valid(struct membank *mb) +{ + /* + * Check whether this memory region has non-zero size or + * invalid node number. + */ + if (mb->size == 0 || mb->node >= MAX_NUMNODES) + return 0; + + /* + * Check whether this memory region would entirely overlap + * the vmalloc area. + */ + if (phys_to_virt(mb->start) >= VMALLOC_MIN) { + printk(KERN_NOTICE "Ignoring RAM at %.8lx-%.8lx " + "(vmalloc region overlap).\n", + mb->start, mb->start + mb->size - 1); + return 0; + } + + /* + * Check whether this memory region would partially overlap + * the vmalloc area. + */ + if (phys_to_virt(mb->start + mb->size) < phys_to_virt(mb->start) || + phys_to_virt(mb->start + mb->size) > VMALLOC_MIN) { + unsigned long newsize = VMALLOC_MIN - phys_to_virt(mb->start); + + printk(KERN_NOTICE "Truncating RAM at %.8lx-%.8lx " + "to -%.8lx (vmalloc region overlap).\n", + mb->start, mb->start + mb->size - 1, + mb->start + newsize - 1); + mb->size = newsize; + } + + return 1; +} + +static void __init sanity_check_meminfo(struct meminfo *mi) +{ + int i, j; + + for (i = 0, j = 0; i < mi->nr_banks; i++) { + if (check_membank_valid(&mi->bank[i])) + mi->bank[j++] = mi->bank[i]; + } + mi->nr_banks = j; +} + static inline void prepare_page_table(struct meminfo *mi) { unsigned long addr; @@ -753,6 +832,7 @@ void __init paging_init(struct meminfo *mi, struct machine_desc *mdesc) void *zero_page; build_mem_type_table(); + sanity_check_meminfo(mi); prepare_page_table(mi); bootmem_init(mi); devicemaps_init(mdesc); diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c index 63c62fdea521..07b62b238979 100644 --- a/arch/arm/mm/nommu.c +++ b/arch/arm/mm/nommu.c @@ -7,16 +7,14 @@ #include <linux/mm.h> #include <linux/pagemap.h> #include <linux/bootmem.h> +#include <linux/io.h> #include <asm/cacheflush.h> -#include <asm/io.h> #include <asm/page.h> #include <asm/mach/arch.h> #include "mm.h" -extern void _stext, __data_start, _end; - /* * Reserve the various regions of node 0 */ @@ -43,12 +41,26 @@ void __init reserve_node_zero(pg_data_t *pgdat) BOOTMEM_DEFAULT); } +static void __init sanity_check_meminfo(struct meminfo *mi) +{ + int i, j; + + for (i = 0, j = 0; i < mi->nr_banks; i++) { + struct membank *mb = &mi->bank[i]; + + if (mb->size != 0 && mb->node < MAX_NUMNODES) + mi->bank[j++] = mi->bank[i]; + } + mi->nr_banks = j; +} + /* * paging_init() sets up the page tables, initialises the zone memory * maps, and sets up the zero page, bad page and bad page tables. */ void __init paging_init(struct meminfo *mi, struct machine_desc *mdesc) { + sanity_check_meminfo(mi); bootmem_init(mi); } diff --git a/arch/arm/mm/proc-arm1020.S b/arch/arm/mm/proc-arm1020.S index 5673f4d6113b..b5551bf010aa 100644 --- a/arch/arm/mm/proc-arm1020.S +++ b/arch/arm/mm/proc-arm1020.S @@ -29,7 +29,7 @@ #include <linux/init.h> #include <asm/assembler.h> #include <asm/asm-offsets.h> -#include <asm/elf.h> +#include <asm/hwcap.h> #include <asm/pgtable-hwdef.h> #include <asm/pgtable.h> #include <asm/ptrace.h> @@ -399,29 +399,7 @@ ENTRY(cpu_arm1020_switch_mm) .align 5 ENTRY(cpu_arm1020_set_pte_ext) #ifdef CONFIG_MMU - str r1, [r0], #-2048 @ linux version - - eor r1, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY - - bic r2, r1, #PTE_SMALL_AP_MASK - bic r2, r2, #PTE_TYPE_MASK - orr r2, r2, #PTE_TYPE_SMALL - - tst r1, #L_PTE_USER @ User? - orrne r2, r2, #PTE_SMALL_AP_URO_SRW - - tst r1, #L_PTE_WRITE | L_PTE_DIRTY @ Write and Dirty? - orreq r2, r2, #PTE_SMALL_AP_UNO_SRW - - tst r1, #L_PTE_PRESENT | L_PTE_YOUNG @ Present and Young? - movne r2, #0 - -#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH - eor r3, r1, #0x0a @ C & small page? - tst r3, #0x0b - biceq r2, r2, #4 -#endif - str r2, [r0] @ hardware version + armv3_set_pte_ext mov r0, r0 #ifndef CONFIG_CPU_DCACHE_DISABLE mcr p15, 0, r0, c7, c10, 4 diff --git a/arch/arm/mm/proc-arm1020e.S b/arch/arm/mm/proc-arm1020e.S index 4343fdb0e9e5..8bc6740c29eb 100644 --- a/arch/arm/mm/proc-arm1020e.S +++ b/arch/arm/mm/proc-arm1020e.S @@ -29,7 +29,7 @@ #include <linux/init.h> #include <asm/assembler.h> #include <asm/asm-offsets.h> -#include <asm/elf.h> +#include <asm/hwcap.h> #include <asm/pgtable-hwdef.h> #include <asm/pgtable.h> #include <asm/ptrace.h> @@ -383,29 +383,7 @@ ENTRY(cpu_arm1020e_switch_mm) .align 5 ENTRY(cpu_arm1020e_set_pte_ext) #ifdef CONFIG_MMU - str r1, [r0], #-2048 @ linux version - - eor r1, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY - - bic r2, r1, #PTE_SMALL_AP_MASK - bic r2, r2, #PTE_TYPE_MASK - orr r2, r2, #PTE_TYPE_SMALL - - tst r1, #L_PTE_USER @ User? - orrne r2, r2, #PTE_SMALL_AP_URO_SRW - - tst r1, #L_PTE_WRITE | L_PTE_DIRTY @ Write and Dirty? - orreq r2, r2, #PTE_SMALL_AP_UNO_SRW - - tst r1, #L_PTE_PRESENT | L_PTE_YOUNG @ Present and Young? - movne r2, #0 - -#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH - eor r3, r1, #0x0a @ C & small page? - tst r3, #0x0b - biceq r2, r2, #4 -#endif - str r2, [r0] @ hardware version + armv3_set_pte_ext mov r0, r0 #ifndef CONFIG_CPU_DCACHE_DISABLE mcr p15, 0, r0, c7, c10, 1 @ clean D entry diff --git a/arch/arm/mm/proc-arm1022.S b/arch/arm/mm/proc-arm1022.S index 2a4ea1659e96..2cd03e66c0a3 100644 --- a/arch/arm/mm/proc-arm1022.S +++ b/arch/arm/mm/proc-arm1022.S @@ -18,7 +18,7 @@ #include <linux/init.h> #include <asm/assembler.h> #include <asm/asm-offsets.h> -#include <asm/elf.h> +#include <asm/hwcap.h> #include <asm/pgtable-hwdef.h> #include <asm/pgtable.h> #include <asm/ptrace.h> @@ -365,29 +365,7 @@ ENTRY(cpu_arm1022_switch_mm) .align 5 ENTRY(cpu_arm1022_set_pte_ext) #ifdef CONFIG_MMU - str r1, [r0], #-2048 @ linux version - - eor r1, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY - - bic r2, r1, #PTE_SMALL_AP_MASK - bic r2, r2, #PTE_TYPE_MASK - orr r2, r2, #PTE_TYPE_SMALL - - tst r1, #L_PTE_USER @ User? - orrne r2, r2, #PTE_SMALL_AP_URO_SRW - - tst r1, #L_PTE_WRITE | L_PTE_DIRTY @ Write and Dirty? - orreq r2, r2, #PTE_SMALL_AP_UNO_SRW - - tst r1, #L_PTE_PRESENT | L_PTE_YOUNG @ Present and Young? - movne r2, #0 - -#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH - eor r3, r1, #0x0a @ C & small page? - tst r3, #0x0b - biceq r2, r2, #4 -#endif - str r2, [r0] @ hardware version + armv3_set_pte_ext mov r0, r0 #ifndef CONFIG_CPU_DCACHE_DISABLE mcr p15, 0, r0, c7, c10, 1 @ clean D entry diff --git a/arch/arm/mm/proc-arm1026.S b/arch/arm/mm/proc-arm1026.S index 77a1babd421c..ad961a897f6e 100644 --- a/arch/arm/mm/proc-arm1026.S +++ b/arch/arm/mm/proc-arm1026.S @@ -18,7 +18,7 @@ #include <linux/init.h> #include <asm/assembler.h> #include <asm/asm-offsets.h> -#include <asm/elf.h> +#include <asm/hwcap.h> #include <asm/pgtable-hwdef.h> #include <asm/pgtable.h> #include <asm/ptrace.h> @@ -354,29 +354,7 @@ ENTRY(cpu_arm1026_switch_mm) .align 5 ENTRY(cpu_arm1026_set_pte_ext) #ifdef CONFIG_MMU - str r1, [r0], #-2048 @ linux version - - eor r1, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY - - bic r2, r1, #PTE_SMALL_AP_MASK - bic r2, r2, #PTE_TYPE_MASK - orr r2, r2, #PTE_TYPE_SMALL - - tst r1, #L_PTE_USER @ User? - orrne r2, r2, #PTE_SMALL_AP_URO_SRW - - tst r1, #L_PTE_WRITE | L_PTE_DIRTY @ Write and Dirty? - orreq r2, r2, #PTE_SMALL_AP_UNO_SRW - - tst r1, #L_PTE_PRESENT | L_PTE_YOUNG @ Present and Young? - movne r2, #0 - -#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH - eor r3, r1, #0x0a @ C & small page? - tst r3, #0x0b - biceq r2, r2, #4 -#endif - str r2, [r0] @ hardware version + armv3_set_pte_ext mov r0, r0 #ifndef CONFIG_CPU_DCACHE_DISABLE mcr p15, 0, r0, c7, c10, 1 @ clean D entry diff --git a/arch/arm/mm/proc-arm6_7.S b/arch/arm/mm/proc-arm6_7.S index c371fc87776e..80d6e1de069a 100644 --- a/arch/arm/mm/proc-arm6_7.S +++ b/arch/arm/mm/proc-arm6_7.S @@ -15,11 +15,13 @@ #include <linux/init.h> #include <asm/assembler.h> #include <asm/asm-offsets.h> -#include <asm/elf.h> +#include <asm/hwcap.h> #include <asm/pgtable-hwdef.h> #include <asm/pgtable.h> #include <asm/ptrace.h> +#include "proc-macros.S" + ENTRY(cpu_arm6_dcache_clean_area) ENTRY(cpu_arm7_dcache_clean_area) mov pc, lr @@ -214,30 +216,13 @@ ENTRY(cpu_arm7_switch_mm) * : r1 = value to set * Purpose : Set a PTE and flush it out of any WB cache */ - .align 5 + .align 5 ENTRY(cpu_arm6_set_pte_ext) ENTRY(cpu_arm7_set_pte_ext) #ifdef CONFIG_MMU - str r1, [r0], #-2048 @ linux version - - eor r1, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY - - bic r2, r1, #PTE_SMALL_AP_MASK - bic r2, r2, #PTE_TYPE_MASK - orr r2, r2, #PTE_TYPE_SMALL - - tst r1, #L_PTE_USER @ User? - orrne r2, r2, #PTE_SMALL_AP_URO_SRW - - tst r1, #L_PTE_WRITE | L_PTE_DIRTY @ Write and Dirty? - orreq r2, r2, #PTE_SMALL_AP_UNO_SRW - - tst r1, #L_PTE_PRESENT | L_PTE_YOUNG @ Present and Young - movne r2, #0 - - str r2, [r0] @ hardware version + armv3_set_pte_ext wc_disable=0 #endif /* CONFIG_MMU */ - mov pc, lr + mov pc, lr /* * Function: _arm6_7_reset diff --git a/arch/arm/mm/proc-arm720.S b/arch/arm/mm/proc-arm720.S index d64f8e6f75ab..85ae18695f10 100644 --- a/arch/arm/mm/proc-arm720.S +++ b/arch/arm/mm/proc-arm720.S @@ -36,7 +36,7 @@ #include <linux/init.h> #include <asm/assembler.h> #include <asm/asm-offsets.h> -#include <asm/elf.h> +#include <asm/hwcap.h> #include <asm/pgtable-hwdef.h> #include <asm/pgtable.h> #include <asm/ptrace.h> @@ -93,29 +93,12 @@ ENTRY(cpu_arm720_switch_mm) * : r1 = value to set * Purpose : Set a PTE and flush it out of any WB cache */ - .align 5 + .align 5 ENTRY(cpu_arm720_set_pte_ext) #ifdef CONFIG_MMU - str r1, [r0], #-2048 @ linux version - - eor r1, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY - - bic r2, r1, #PTE_SMALL_AP_MASK - bic r2, r2, #PTE_TYPE_MASK - orr r2, r2, #PTE_TYPE_SMALL - - tst r1, #L_PTE_USER @ User? - orrne r2, r2, #PTE_SMALL_AP_URO_SRW - - tst r1, #L_PTE_WRITE | L_PTE_DIRTY @ Write and Dirty? - orreq r2, r2, #PTE_SMALL_AP_UNO_SRW - - tst r1, #L_PTE_PRESENT | L_PTE_YOUNG @ Present and Young - movne r2, #0 - - str r2, [r0] @ hardware version + armv3_set_pte_ext wc_disable=0 #endif - mov pc, lr + mov pc, lr /* * Function: arm720_reset @@ -231,7 +214,7 @@ cpu_arm720_name: .align /* - * See linux/include/asm-arm/procinfo.h for a definition of this structure. + * See <asm/procinfo.h> for a definition of this structure. */ .section ".proc.info.init", #alloc, #execinstr diff --git a/arch/arm/mm/proc-arm740.S b/arch/arm/mm/proc-arm740.S index 3a57376c8bc9..4f95bee63e95 100644 --- a/arch/arm/mm/proc-arm740.S +++ b/arch/arm/mm/proc-arm740.S @@ -12,7 +12,7 @@ #include <linux/init.h> #include <asm/assembler.h> #include <asm/asm-offsets.h> -#include <asm/elf.h> +#include <asm/hwcap.h> #include <asm/pgtable-hwdef.h> #include <asm/pgtable.h> #include <asm/ptrace.h> diff --git a/arch/arm/mm/proc-arm7tdmi.S b/arch/arm/mm/proc-arm7tdmi.S index 7b3ecdeb5370..93e05fa7bed4 100644 --- a/arch/arm/mm/proc-arm7tdmi.S +++ b/arch/arm/mm/proc-arm7tdmi.S @@ -12,7 +12,7 @@ #include <linux/init.h> #include <asm/assembler.h> #include <asm/asm-offsets.h> -#include <asm/elf.h> +#include <asm/hwcap.h> #include <asm/pgtable-hwdef.h> #include <asm/pgtable.h> #include <asm/ptrace.h> diff --git a/arch/arm/mm/proc-arm920.S b/arch/arm/mm/proc-arm920.S index 28cdb060df45..914d688394fc 100644 --- a/arch/arm/mm/proc-arm920.S +++ b/arch/arm/mm/proc-arm920.S @@ -28,7 +28,7 @@ #include <linux/linkage.h> #include <linux/init.h> #include <asm/assembler.h> -#include <asm/elf.h> +#include <asm/hwcap.h> #include <asm/pgtable-hwdef.h> #include <asm/pgtable.h> #include <asm/page.h> @@ -351,33 +351,11 @@ ENTRY(cpu_arm920_switch_mm) .align 5 ENTRY(cpu_arm920_set_pte_ext) #ifdef CONFIG_MMU - str r1, [r0], #-2048 @ linux version - - eor r1, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY - - bic r2, r1, #PTE_SMALL_AP_MASK - bic r2, r2, #PTE_TYPE_MASK - orr r2, r2, #PTE_TYPE_SMALL - - tst r1, #L_PTE_USER @ User? - orrne r2, r2, #PTE_SMALL_AP_URO_SRW - - tst r1, #L_PTE_WRITE | L_PTE_DIRTY @ Write and Dirty? - orreq r2, r2, #PTE_SMALL_AP_UNO_SRW - - tst r1, #L_PTE_PRESENT | L_PTE_YOUNG @ Present and Young? - movne r2, #0 - -#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH - eor r3, r2, #0x0a @ C & small page? - tst r3, #0x0b - biceq r2, r2, #4 -#endif - str r2, [r0] @ hardware version + armv3_set_pte_ext mov r0, r0 mcr p15, 0, r0, c7, c10, 1 @ clean D entry mcr p15, 0, r0, c7, c10, 4 @ drain WB -#endif /* CONFIG_MMU */ +#endif mov pc, lr __INIT diff --git a/arch/arm/mm/proc-arm922.S b/arch/arm/mm/proc-arm922.S index 94ddcb4a4b76..51c9c9859e58 100644 --- a/arch/arm/mm/proc-arm922.S +++ b/arch/arm/mm/proc-arm922.S @@ -29,7 +29,7 @@ #include <linux/linkage.h> #include <linux/init.h> #include <asm/assembler.h> -#include <asm/elf.h> +#include <asm/hwcap.h> #include <asm/pgtable-hwdef.h> #include <asm/pgtable.h> #include <asm/page.h> @@ -355,29 +355,7 @@ ENTRY(cpu_arm922_switch_mm) .align 5 ENTRY(cpu_arm922_set_pte_ext) #ifdef CONFIG_MMU - str r1, [r0], #-2048 @ linux version - - eor r1, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY - - bic r2, r1, #PTE_SMALL_AP_MASK - bic r2, r2, #PTE_TYPE_MASK - orr r2, r2, #PTE_TYPE_SMALL - - tst r1, #L_PTE_USER @ User? - orrne r2, r2, #PTE_SMALL_AP_URO_SRW - - tst r1, #L_PTE_WRITE | L_PTE_DIRTY @ Write and Dirty? - orreq r2, r2, #PTE_SMALL_AP_UNO_SRW - - tst r1, #L_PTE_PRESENT | L_PTE_YOUNG @ Present and Young? - movne r2, #0 - -#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH - eor r3, r2, #0x0a @ C & small page? - tst r3, #0x0b - biceq r2, r2, #4 -#endif - str r2, [r0] @ hardware version + armv3_set_pte_ext mov r0, r0 mcr p15, 0, r0, c7, c10, 1 @ clean D entry mcr p15, 0, r0, c7, c10, 4 @ drain WB diff --git a/arch/arm/mm/proc-arm925.S b/arch/arm/mm/proc-arm925.S index d045812f3399..2724526d89c1 100644 --- a/arch/arm/mm/proc-arm925.S +++ b/arch/arm/mm/proc-arm925.S @@ -52,7 +52,7 @@ #include <linux/linkage.h> #include <linux/init.h> #include <asm/assembler.h> -#include <asm/elf.h> +#include <asm/hwcap.h> #include <asm/pgtable-hwdef.h> #include <asm/pgtable.h> #include <asm/page.h> @@ -398,29 +398,7 @@ ENTRY(cpu_arm925_switch_mm) .align 5 ENTRY(cpu_arm925_set_pte_ext) #ifdef CONFIG_MMU - str r1, [r0], #-2048 @ linux version - - eor r1, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY - - bic r2, r1, #PTE_SMALL_AP_MASK - bic r2, r2, #PTE_TYPE_MASK - orr r2, r2, #PTE_TYPE_SMALL - - tst r1, #L_PTE_USER @ User? - orrne r2, r2, #PTE_SMALL_AP_URO_SRW - - tst r1, #L_PTE_WRITE | L_PTE_DIRTY @ Write and Dirty? - orreq r2, r2, #PTE_SMALL_AP_UNO_SRW - - tst r1, #L_PTE_PRESENT | L_PTE_YOUNG @ Present and Young? - movne r2, #0 - -#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH - eor r3, r2, #0x0a @ C & small page? - tst r3, #0x0b - biceq r2, r2, #4 -#endif - str r2, [r0] @ hardware version + armv3_set_pte_ext mov r0, r0 #ifndef CONFIG_CPU_DCACHE_WRITETHROUGH mcr p15, 0, r0, c7, c10, 1 @ clean D entry diff --git a/arch/arm/mm/proc-arm926.S b/arch/arm/mm/proc-arm926.S index 4cd33169a7c9..54466937bff9 100644 --- a/arch/arm/mm/proc-arm926.S +++ b/arch/arm/mm/proc-arm926.S @@ -28,7 +28,7 @@ #include <linux/linkage.h> #include <linux/init.h> #include <asm/assembler.h> -#include <asm/elf.h> +#include <asm/hwcap.h> #include <asm/pgtable-hwdef.h> #include <asm/pgtable.h> #include <asm/page.h> @@ -359,29 +359,7 @@ ENTRY(cpu_arm926_switch_mm) .align 5 ENTRY(cpu_arm926_set_pte_ext) #ifdef CONFIG_MMU - str r1, [r0], #-2048 @ linux version - - eor r1, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY - - bic r2, r1, #PTE_SMALL_AP_MASK - bic r2, r2, #PTE_TYPE_MASK - orr r2, r2, #PTE_TYPE_SMALL - - tst r1, #L_PTE_USER @ User? - orrne r2, r2, #PTE_SMALL_AP_URO_SRW - - tst r1, #L_PTE_WRITE | L_PTE_DIRTY @ Write and Dirty? - orreq r2, r2, #PTE_SMALL_AP_UNO_SRW - - tst r1, #L_PTE_PRESENT | L_PTE_YOUNG @ Present and Young? - movne r2, #0 - -#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH - eor r3, r2, #0x0a @ C & small page? - tst r3, #0x0b - biceq r2, r2, #4 -#endif - str r2, [r0] @ hardware version + armv3_set_pte_ext mov r0, r0 #ifndef CONFIG_CPU_DCACHE_WRITETHROUGH mcr p15, 0, r0, c7, c10, 1 @ clean D entry diff --git a/arch/arm/mm/proc-arm940.S b/arch/arm/mm/proc-arm940.S index 1a3d63df8e90..f595117caf55 100644 --- a/arch/arm/mm/proc-arm940.S +++ b/arch/arm/mm/proc-arm940.S @@ -11,10 +11,11 @@ #include <linux/linkage.h> #include <linux/init.h> #include <asm/assembler.h> -#include <asm/elf.h> +#include <asm/hwcap.h> #include <asm/pgtable-hwdef.h> #include <asm/pgtable.h> #include <asm/ptrace.h> +#include "proc-macros.S" /* ARM940T has a 4KB DCache comprising 256 lines of 4 words */ #define CACHE_DLINESIZE 16 diff --git a/arch/arm/mm/proc-arm946.S b/arch/arm/mm/proc-arm946.S index 82d579ac9b98..e03f6ff1fb26 100644 --- a/arch/arm/mm/proc-arm946.S +++ b/arch/arm/mm/proc-arm946.S @@ -13,10 +13,11 @@ #include <linux/linkage.h> #include <linux/init.h> #include <asm/assembler.h> -#include <asm/elf.h> +#include <asm/hwcap.h> #include <asm/pgtable-hwdef.h> #include <asm/pgtable.h> #include <asm/ptrace.h> +#include "proc-macros.S" /* * ARM946E-S is synthesizable to have 0KB to 1MB sized D-Cache, diff --git a/arch/arm/mm/proc-arm9tdmi.S b/arch/arm/mm/proc-arm9tdmi.S index c85c1f50e396..be6c11d2b3fb 100644 --- a/arch/arm/mm/proc-arm9tdmi.S +++ b/arch/arm/mm/proc-arm9tdmi.S @@ -12,7 +12,7 @@ #include <linux/init.h> #include <asm/assembler.h> #include <asm/asm-offsets.h> -#include <asm/elf.h> +#include <asm/hwcap.h> #include <asm/pgtable-hwdef.h> #include <asm/pgtable.h> #include <asm/ptrace.h> diff --git a/arch/arm/mm/proc-feroceon.S b/arch/arm/mm/proc-feroceon.S index a02c1712b52d..0fe1f8fc3488 100644 --- a/arch/arm/mm/proc-feroceon.S +++ b/arch/arm/mm/proc-feroceon.S @@ -22,7 +22,7 @@ #include <linux/linkage.h> #include <linux/init.h> #include <asm/assembler.h> -#include <asm/elf.h> +#include <asm/hwcap.h> #include <asm/pgtable-hwdef.h> #include <asm/pgtable.h> #include <asm/page.h> @@ -44,11 +44,31 @@ */ #define CACHE_DLINESIZE 32 + .bss + .align 3 +__cache_params_loc: + .space 8 + .text +__cache_params: + .word __cache_params_loc + /* * cpu_feroceon_proc_init() */ ENTRY(cpu_feroceon_proc_init) + mrc p15, 0, r0, c0, c0, 1 @ read cache type register + ldr r1, __cache_params + mov r2, #(16 << 5) + tst r0, #(1 << 16) @ get way + mov r0, r0, lsr #18 @ get cache size order + movne r3, #((4 - 1) << 30) @ 4-way + and r0, r0, #0xf + moveq r3, #0 @ 1-way + mov r2, r2, lsl r0 @ actual cache size + movne r2, r2, lsr #2 @ turned into # of sets + sub r2, r2, #(1 << 5) + stmia r1, {r2, r3} mov pc, lr /* @@ -59,6 +79,14 @@ ENTRY(cpu_feroceon_proc_fin) mov ip, #PSR_F_BIT | PSR_I_BIT | SVC_MODE msr cpsr_c, ip bl feroceon_flush_kern_cache_all + +#if defined(CONFIG_CACHE_FEROCEON_L2) && \ + !defined(CONFIG_CACHE_FEROCEON_L2_WRITETHROUGH) + mov r0, #0 + mcr p15, 1, r0, c15, c9, 0 @ clean L2 + mcr p15, 0, r0, c7, c10, 4 @ drain WB +#endif + mrc p15, 0, r0, c1, c0, 0 @ ctrl register bic r0, r0, #0x1000 @ ...i............ bic r0, r0, #0x000e @ ............wca. @@ -117,11 +145,19 @@ ENTRY(feroceon_flush_user_cache_all) */ ENTRY(feroceon_flush_kern_cache_all) mov r2, #VM_EXEC - mov ip, #0 + __flush_whole_cache: -1: mrc p15, 0, r15, c7, c14, 3 @ test,clean,invalidate - bne 1b + ldr r1, __cache_params + ldmia r1, {r1, r3} +1: orr ip, r1, r3 +2: mcr p15, 0, ip, c7, c14, 2 @ clean + invalidate D set/way + subs ip, ip, #(1 << 30) @ next way + bcs 2b + subs r1, r1, #(1 << 5) @ next set + bcs 1b + tst r2, #VM_EXEC + mov ip, #0 mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache mcrne p15, 0, ip, c7, c10, 4 @ drain WB mov pc, lr @@ -138,7 +174,6 @@ __flush_whole_cache: */ .align 5 ENTRY(feroceon_flush_user_cache_range) - mov ip, #0 sub r3, r1, r0 @ calculate total size cmp r3, #CACHE_DLIMIT bgt __flush_whole_cache @@ -152,6 +187,7 @@ ENTRY(feroceon_flush_user_cache_range) cmp r0, r1 blo 1b tst r2, #VM_EXEC + mov ip, #0 mcrne p15, 0, ip, c7, c10, 4 @ drain WB mov pc, lr @@ -209,6 +245,20 @@ ENTRY(feroceon_flush_kern_dcache_page) mcr p15, 0, r0, c7, c10, 4 @ drain WB mov pc, lr + .align 5 +ENTRY(feroceon_range_flush_kern_dcache_page) + mrs r2, cpsr + add r1, r0, #PAGE_SZ - CACHE_DLINESIZE @ top addr is inclusive + orr r3, r2, #PSR_I_BIT + msr cpsr_c, r3 @ disable interrupts + mcr p15, 5, r0, c15, c15, 0 @ D clean/inv range start + mcr p15, 5, r1, c15, c15, 1 @ D clean/inv range top + msr cpsr_c, r2 @ restore interrupts + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + /* * dma_inv_range(start, end) * @@ -225,10 +275,10 @@ ENTRY(feroceon_flush_kern_dcache_page) .align 5 ENTRY(feroceon_dma_inv_range) tst r0, #CACHE_DLINESIZE - 1 + bic r0, r0, #CACHE_DLINESIZE - 1 mcrne p15, 0, r0, c7, c10, 1 @ clean D entry tst r1, #CACHE_DLINESIZE - 1 mcrne p15, 0, r1, c7, c10, 1 @ clean D entry - bic r0, r0, #CACHE_DLINESIZE - 1 1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry add r0, r0, #CACHE_DLINESIZE cmp r0, r1 @@ -236,6 +286,22 @@ ENTRY(feroceon_dma_inv_range) mcr p15, 0, r0, c7, c10, 4 @ drain WB mov pc, lr + .align 5 +ENTRY(feroceon_range_dma_inv_range) + mrs r2, cpsr + tst r0, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r0, c7, c10, 1 @ clean D entry + tst r1, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r1, c7, c10, 1 @ clean D entry + cmp r1, r0 + subne r1, r1, #1 @ top address is inclusive + orr r3, r2, #PSR_I_BIT + msr cpsr_c, r3 @ disable interrupts + mcr p15, 5, r0, c15, c14, 0 @ D inv range start + mcr p15, 5, r1, c15, c14, 1 @ D inv range top + msr cpsr_c, r2 @ restore interrupts + mov pc, lr + /* * dma_clean_range(start, end) * @@ -256,6 +322,19 @@ ENTRY(feroceon_dma_clean_range) mcr p15, 0, r0, c7, c10, 4 @ drain WB mov pc, lr + .align 5 +ENTRY(feroceon_range_dma_clean_range) + mrs r2, cpsr + cmp r1, r0 + subne r1, r1, #1 @ top address is inclusive + orr r3, r2, #PSR_I_BIT + msr cpsr_c, r3 @ disable interrupts + mcr p15, 5, r0, c15, c13, 0 @ D clean range start + mcr p15, 5, r1, c15, c13, 1 @ D clean range top + msr cpsr_c, r2 @ restore interrupts + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + /* * dma_flush_range(start, end) * @@ -274,6 +353,19 @@ ENTRY(feroceon_dma_flush_range) mcr p15, 0, r0, c7, c10, 4 @ drain WB mov pc, lr + .align 5 +ENTRY(feroceon_range_dma_flush_range) + mrs r2, cpsr + cmp r1, r0 + subne r1, r1, #1 @ top address is inclusive + orr r3, r2, #PSR_I_BIT + msr cpsr_c, r3 @ disable interrupts + mcr p15, 5, r0, c15, c15, 0 @ D clean/inv range start + mcr p15, 5, r1, c15, c15, 1 @ D clean/inv range top + msr cpsr_c, r2 @ restore interrupts + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + ENTRY(feroceon_cache_fns) .long feroceon_flush_kern_cache_all .long feroceon_flush_user_cache_all @@ -285,12 +377,35 @@ ENTRY(feroceon_cache_fns) .long feroceon_dma_clean_range .long feroceon_dma_flush_range +ENTRY(feroceon_range_cache_fns) + .long feroceon_flush_kern_cache_all + .long feroceon_flush_user_cache_all + .long feroceon_flush_user_cache_range + .long feroceon_coherent_kern_range + .long feroceon_coherent_user_range + .long feroceon_range_flush_kern_dcache_page + .long feroceon_range_dma_inv_range + .long feroceon_range_dma_clean_range + .long feroceon_range_dma_flush_range + .align 5 ENTRY(cpu_feroceon_dcache_clean_area) +#if defined(CONFIG_CACHE_FEROCEON_L2) && \ + !defined(CONFIG_CACHE_FEROCEON_L2_WRITETHROUGH) + mov r2, r0 + mov r3, r1 +#endif 1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry add r0, r0, #CACHE_DLINESIZE subs r1, r1, #CACHE_DLINESIZE bhi 1b +#if defined(CONFIG_CACHE_FEROCEON_L2) && \ + !defined(CONFIG_CACHE_FEROCEON_L2_WRITETHROUGH) +1: mcr p15, 1, r2, c15, c9, 1 @ clean L2 entry + add r2, r2, #CACHE_DLINESIZE + subs r3, r3, #CACHE_DLINESIZE + bhi 1b +#endif mcr p15, 0, r0, c7, c10, 4 @ drain WB mov pc, lr @@ -306,16 +421,25 @@ ENTRY(cpu_feroceon_dcache_clean_area) .align 5 ENTRY(cpu_feroceon_switch_mm) #ifdef CONFIG_MMU - mov ip, #0 -@ && 'Clean & Invalidate whole DCache' -1: mrc p15, 0, r15, c7, c14, 3 @ test,clean,invalidate - bne 1b - mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache - mcr p15, 0, ip, c7, c10, 4 @ drain WB + /* + * Note: we wish to call __flush_whole_cache but we need to preserve + * lr to do so. The only way without touching main memory is to + * use r2 which is normally used to test the VM_EXEC flag, and + * compensate locally for the skipped ops if it is not set. + */ + mov r2, lr @ abuse r2 to preserve lr + bl __flush_whole_cache + @ if r2 contains the VM_EXEC bit then the next 2 ops are done already + tst r2, #VM_EXEC + mcreq p15, 0, ip, c7, c5, 0 @ invalidate I cache + mcreq p15, 0, ip, c7, c10, 4 @ drain WB + mcr p15, 0, r0, c2, c0, 0 @ load page table pointer mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs -#endif + mov pc, r2 +#else mov pc, lr +#endif /* * cpu_feroceon_set_pte_ext(ptep, pte, ext) @@ -325,26 +449,13 @@ ENTRY(cpu_feroceon_switch_mm) .align 5 ENTRY(cpu_feroceon_set_pte_ext) #ifdef CONFIG_MMU - str r1, [r0], #-2048 @ linux version - - eor r1, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY - - bic r2, r1, #PTE_SMALL_AP_MASK - bic r2, r2, #PTE_TYPE_MASK - orr r2, r2, #PTE_TYPE_SMALL - - tst r1, #L_PTE_USER @ User? - orrne r2, r2, #PTE_SMALL_AP_URO_SRW - - tst r1, #L_PTE_WRITE | L_PTE_DIRTY @ Write and Dirty? - orreq r2, r2, #PTE_SMALL_AP_UNO_SRW - - tst r1, #L_PTE_PRESENT | L_PTE_YOUNG @ Present and Young? - movne r2, #0 - - str r2, [r0] @ hardware version + armv3_set_pte_ext wc_disable=0 mov r0, r0 mcr p15, 0, r0, c7, c10, 1 @ clean D entry +#if defined(CONFIG_CACHE_FEROCEON_L2) && \ + !defined(CONFIG_CACHE_FEROCEON_L2_WRITETHROUGH) + mcr p15, 1, r0, c15, c9, 1 @ clean L2 entry +#endif mcr p15, 0, r0, c7, c10, 4 @ drain WB #endif mov pc, lr @@ -369,14 +480,15 @@ __feroceon_setup: .size __feroceon_setup, . - __feroceon_setup /* - * R - * .RVI ZFRS BLDP WCAM - * .011 0001 ..11 0101 + * B + * R P + * .RVI UFRS BLDP WCAM + * .011 .001 ..11 0101 * */ .type feroceon_crval, #object feroceon_crval: - crval clear=0x00007f3f, mmuset=0x00003135, ucset=0x00001134 + crval clear=0x0000773f, mmuset=0x00003135, ucset=0x00001134 __INITDATA @@ -414,6 +526,21 @@ cpu_feroceon_name: .asciz "Feroceon" .size cpu_feroceon_name, . - cpu_feroceon_name + .type cpu_88fr531_name, #object +cpu_88fr531_name: + .asciz "Feroceon 88FR531-vd" + .size cpu_88fr531_name, . - cpu_88fr531_name + + .type cpu_88fr571_name, #object +cpu_88fr571_name: + .asciz "Feroceon 88FR571-vd" + .size cpu_88fr571_name, . - cpu_88fr571_name + + .type cpu_88fr131_name, #object +cpu_88fr131_name: + .asciz "Feroceon 88FR131" + .size cpu_88fr131_name, . - cpu_88fr131_name + .align .section ".proc.info.init", #alloc, #execinstr @@ -421,15 +548,15 @@ cpu_feroceon_name: #ifdef CONFIG_CPU_FEROCEON_OLD_ID .type __feroceon_old_id_proc_info,#object __feroceon_old_id_proc_info: - .long 0x41069260 - .long 0xfffffff0 - .long PMD_TYPE_SECT | \ + .long 0x41009260 + .long 0xff00fff0 + .long PMD_TYPE_SECT | \ PMD_SECT_BUFFERABLE | \ PMD_SECT_CACHEABLE | \ PMD_BIT4 | \ PMD_SECT_AP_WRITE | \ PMD_SECT_AP_READ - .long PMD_TYPE_SECT | \ + .long PMD_TYPE_SECT | \ PMD_BIT4 | \ PMD_SECT_AP_WRITE | \ PMD_SECT_AP_READ @@ -445,17 +572,17 @@ __feroceon_old_id_proc_info: .size __feroceon_old_id_proc_info, . - __feroceon_old_id_proc_info #endif - .type __feroceon_proc_info,#object -__feroceon_proc_info: + .type __88fr531_proc_info,#object +__88fr531_proc_info: .long 0x56055310 .long 0xfffffff0 - .long PMD_TYPE_SECT | \ + .long PMD_TYPE_SECT | \ PMD_SECT_BUFFERABLE | \ PMD_SECT_CACHEABLE | \ PMD_BIT4 | \ PMD_SECT_AP_WRITE | \ PMD_SECT_AP_READ - .long PMD_TYPE_SECT | \ + .long PMD_TYPE_SECT | \ PMD_BIT4 | \ PMD_SECT_AP_WRITE | \ PMD_SECT_AP_READ @@ -463,9 +590,59 @@ __feroceon_proc_info: .long cpu_arch_name .long cpu_elf_name .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP - .long cpu_feroceon_name + .long cpu_88fr531_name .long feroceon_processor_functions .long v4wbi_tlb_fns .long feroceon_user_fns .long feroceon_cache_fns - .size __feroceon_proc_info, . - __feroceon_proc_info + .size __88fr531_proc_info, . - __88fr531_proc_info + + .type __88fr571_proc_info,#object +__88fr571_proc_info: + .long 0x56155710 + .long 0xfffffff0 + .long PMD_TYPE_SECT | \ + PMD_SECT_BUFFERABLE | \ + PMD_SECT_CACHEABLE | \ + PMD_BIT4 | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + .long PMD_TYPE_SECT | \ + PMD_BIT4 | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + b __feroceon_setup + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP + .long cpu_88fr571_name + .long feroceon_processor_functions + .long v4wbi_tlb_fns + .long feroceon_user_fns + .long feroceon_range_cache_fns + .size __88fr571_proc_info, . - __88fr571_proc_info + + .type __88fr131_proc_info,#object +__88fr131_proc_info: + .long 0x56251310 + .long 0xfffffff0 + .long PMD_TYPE_SECT | \ + PMD_SECT_BUFFERABLE | \ + PMD_SECT_CACHEABLE | \ + PMD_BIT4 | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + .long PMD_TYPE_SECT | \ + PMD_BIT4 | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + b __feroceon_setup + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP + .long cpu_88fr131_name + .long feroceon_processor_functions + .long v4wbi_tlb_fns + .long feroceon_user_fns + .long feroceon_range_cache_fns + .size __88fr131_proc_info, . - __88fr131_proc_info diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S index b13150052a76..54b1f721dec8 100644 --- a/arch/arm/mm/proc-macros.S +++ b/arch/arm/mm/proc-macros.S @@ -71,3 +71,173 @@ mov \reg, #16 @ size offset mov \reg, \reg, lsl \tmp @ actual cache line size .endm + + +/* + * Sanity check the PTE configuration for the code below - which makes + * certain assumptions about how these bits are layed out. + */ +#if L_PTE_SHARED != PTE_EXT_SHARED +#error PTE shared bit mismatch +#endif +#if L_PTE_BUFFERABLE != PTE_BUFFERABLE +#error PTE bufferable bit mismatch +#endif +#if L_PTE_CACHEABLE != PTE_CACHEABLE +#error PTE cacheable bit mismatch +#endif +#if (L_PTE_EXEC+L_PTE_USER+L_PTE_WRITE+L_PTE_DIRTY+L_PTE_YOUNG+\ + L_PTE_FILE+L_PTE_PRESENT) > L_PTE_SHARED +#error Invalid Linux PTE bit settings +#endif + +/* + * The ARMv6 and ARMv7 set_pte_ext translation function. + * + * Permission translation: + * YUWD APX AP1 AP0 SVC User + * 0xxx 0 0 0 no acc no acc + * 100x 1 0 1 r/o no acc + * 10x0 1 0 1 r/o no acc + * 1011 0 0 1 r/w no acc + * 110x 0 1 0 r/w r/o + * 11x0 0 1 0 r/w r/o + * 1111 0 1 1 r/w r/w + */ + .macro armv6_mt_table pfx +\pfx\()_mt_table: + .long 0x00 @ L_PTE_MT_UNCACHED + .long PTE_EXT_TEX(1) @ L_PTE_MT_BUFFERABLE + .long PTE_CACHEABLE @ L_PTE_MT_WRITETHROUGH + .long PTE_CACHEABLE | PTE_BUFFERABLE @ L_PTE_MT_WRITEBACK + .long PTE_BUFFERABLE @ L_PTE_MT_DEV_SHARED + .long 0x00 @ unused + .long 0x00 @ L_PTE_MT_MINICACHE (not present) + .long PTE_EXT_TEX(1) | PTE_CACHEABLE | PTE_BUFFERABLE @ L_PTE_MT_WRITEALLOC + .long 0x00 @ unused + .long PTE_EXT_TEX(1) @ L_PTE_MT_DEV_WC + .long 0x00 @ unused + .long PTE_CACHEABLE | PTE_BUFFERABLE @ L_PTE_MT_DEV_CACHED + .long PTE_EXT_TEX(2) @ L_PTE_MT_DEV_NONSHARED + .long 0x00 @ unused + .long 0x00 @ unused + .long 0x00 @ unused + .endm + + .macro armv6_set_pte_ext pfx + str r1, [r0], #-2048 @ linux version + + bic r3, r1, #0x000003fc + bic r3, r3, #PTE_TYPE_MASK + orr r3, r3, r2 + orr r3, r3, #PTE_EXT_AP0 | 2 + + adr ip, \pfx\()_mt_table + and r2, r1, #L_PTE_MT_MASK + ldr r2, [ip, r2] + + tst r1, #L_PTE_WRITE + tstne r1, #L_PTE_DIRTY + orreq r3, r3, #PTE_EXT_APX + + tst r1, #L_PTE_USER + orrne r3, r3, #PTE_EXT_AP1 + tstne r3, #PTE_EXT_APX + bicne r3, r3, #PTE_EXT_APX | PTE_EXT_AP0 + + tst r1, #L_PTE_EXEC + orreq r3, r3, #PTE_EXT_XN + + orr r3, r3, r2 + + tst r1, #L_PTE_YOUNG + tstne r1, #L_PTE_PRESENT + moveq r3, #0 + + str r3, [r0] + mcr p15, 0, r0, c7, c10, 1 @ flush_pte + .endm + + +/* + * The ARMv3, ARMv4 and ARMv5 set_pte_ext translation function, + * covering most CPUs except Xscale and Xscale 3. + * + * Permission translation: + * YUWD AP SVC User + * 0xxx 0x00 no acc no acc + * 100x 0x00 r/o no acc + * 10x0 0x00 r/o no acc + * 1011 0x55 r/w no acc + * 110x 0xaa r/w r/o + * 11x0 0xaa r/w r/o + * 1111 0xff r/w r/w + */ + .macro armv3_set_pte_ext wc_disable=1 + str r1, [r0], #-2048 @ linux version + + eor r3, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY + + bic r2, r1, #PTE_SMALL_AP_MASK @ keep C, B bits + bic r2, r2, #PTE_TYPE_MASK + orr r2, r2, #PTE_TYPE_SMALL + + tst r3, #L_PTE_USER @ user? + orrne r2, r2, #PTE_SMALL_AP_URO_SRW + + tst r3, #L_PTE_WRITE | L_PTE_DIRTY @ write and dirty? + orreq r2, r2, #PTE_SMALL_AP_UNO_SRW + + tst r3, #L_PTE_PRESENT | L_PTE_YOUNG @ present and young? + movne r2, #0 + + .if \wc_disable +#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH + tst r2, #PTE_CACHEABLE + bicne r2, r2, #PTE_BUFFERABLE +#endif + .endif + str r2, [r0] @ hardware version + .endm + + +/* + * Xscale set_pte_ext translation, split into two halves to cope + * with work-arounds. r3 must be preserved by code between these + * two macros. + * + * Permission translation: + * YUWD AP SVC User + * 0xxx 00 no acc no acc + * 100x 00 r/o no acc + * 10x0 00 r/o no acc + * 1011 01 r/w no acc + * 110x 10 r/w r/o + * 11x0 10 r/w r/o + * 1111 11 r/w r/w + */ + .macro xscale_set_pte_ext_prologue + str r1, [r0], #-2048 @ linux version + + eor r3, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY + + bic r2, r1, #PTE_SMALL_AP_MASK @ keep C, B bits + orr r2, r2, #PTE_TYPE_EXT @ extended page + + tst r3, #L_PTE_USER @ user? + orrne r2, r2, #PTE_EXT_AP_URO_SRW @ yes -> user r/o, system r/w + + tst r3, #L_PTE_WRITE | L_PTE_DIRTY @ write and dirty? + orreq r2, r2, #PTE_EXT_AP_UNO_SRW @ yes -> user n/a, system r/w + @ combined with user -> user r/w + .endm + + .macro xscale_set_pte_ext_epilogue + tst r3, #L_PTE_PRESENT | L_PTE_YOUNG @ present and young? + movne r2, #0 @ no -> fault + + str r2, [r0] @ hardware version + mov ip, #0 + mcr p15, 0, r0, c7, c10, 1 @ clean L1 D line + mcr p15, 0, ip, c7, c10, 4 @ data write barrier + .endm diff --git a/arch/arm/mm/proc-sa110.S b/arch/arm/mm/proc-sa110.S index 9818195dbf11..90a7e5279f29 100644 --- a/arch/arm/mm/proc-sa110.S +++ b/arch/arm/mm/proc-sa110.S @@ -17,8 +17,8 @@ #include <linux/init.h> #include <asm/assembler.h> #include <asm/asm-offsets.h> -#include <asm/elf.h> -#include <asm/hardware.h> +#include <asm/hwcap.h> +#include <mach/hardware.h> #include <asm/pgtable-hwdef.h> #include <asm/pgtable.h> #include <asm/ptrace.h> @@ -153,24 +153,7 @@ ENTRY(cpu_sa110_switch_mm) .align 5 ENTRY(cpu_sa110_set_pte_ext) #ifdef CONFIG_MMU - str r1, [r0], #-2048 @ linux version - - eor r1, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY - - bic r2, r1, #PTE_SMALL_AP_MASK - bic r2, r2, #PTE_TYPE_MASK - orr r2, r2, #PTE_TYPE_SMALL - - tst r1, #L_PTE_USER @ User? - orrne r2, r2, #PTE_SMALL_AP_URO_SRW - - tst r1, #L_PTE_WRITE | L_PTE_DIRTY @ Write and Dirty? - orreq r2, r2, #PTE_SMALL_AP_UNO_SRW - - tst r1, #L_PTE_PRESENT | L_PTE_YOUNG @ Present and Young? - movne r2, #0 - - str r2, [r0] @ hardware version + armv3_set_pte_ext wc_disable=0 mov r0, r0 mcr p15, 0, r0, c7, c10, 1 @ clean D entry mcr p15, 0, r0, c7, c10, 4 @ drain WB diff --git a/arch/arm/mm/proc-sa1100.S b/arch/arm/mm/proc-sa1100.S index c5fe27ad2892..451e2d953e2a 100644 --- a/arch/arm/mm/proc-sa1100.S +++ b/arch/arm/mm/proc-sa1100.S @@ -22,8 +22,8 @@ #include <linux/init.h> #include <asm/assembler.h> #include <asm/asm-offsets.h> -#include <asm/elf.h> -#include <asm/hardware.h> +#include <asm/hwcap.h> +#include <mach/hardware.h> #include <asm/pgtable-hwdef.h> #include <asm/pgtable.h> @@ -166,24 +166,7 @@ ENTRY(cpu_sa1100_switch_mm) .align 5 ENTRY(cpu_sa1100_set_pte_ext) #ifdef CONFIG_MMU - str r1, [r0], #-2048 @ linux version - - eor r1, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY - - bic r2, r1, #PTE_SMALL_AP_MASK - bic r2, r2, #PTE_TYPE_MASK - orr r2, r2, #PTE_TYPE_SMALL - - tst r1, #L_PTE_USER @ User? - orrne r2, r2, #PTE_SMALL_AP_URO_SRW - - tst r1, #L_PTE_WRITE | L_PTE_DIRTY @ Write and Dirty? - orreq r2, r2, #PTE_SMALL_AP_UNO_SRW - - tst r1, #L_PTE_PRESENT | L_PTE_YOUNG @ Present and Young? - movne r2, #0 - - str r2, [r0] @ hardware version + armv3_set_pte_ext wc_disable=0 mov r0, r0 mcr p15, 0, r0, c7, c10, 1 @ clean D entry mcr p15, 0, r0, c7, c10, 4 @ drain WB diff --git a/arch/arm/mm/proc-v6.S b/arch/arm/mm/proc-v6.S index 5702ec58b2a2..294943b85973 100644 --- a/arch/arm/mm/proc-v6.S +++ b/arch/arm/mm/proc-v6.S @@ -13,7 +13,7 @@ #include <linux/linkage.h> #include <asm/assembler.h> #include <asm/asm-offsets.h> -#include <asm/elf.h> +#include <asm/hwcap.h> #include <asm/pgtable-hwdef.h> #include <asm/pgtable.h> @@ -114,46 +114,12 @@ ENTRY(cpu_v6_switch_mm) * (hardware version is stored at -1024 bytes) * - pte - PTE value to store * - ext - value for extended PTE bits - * - * Permissions: - * YUWD APX AP1 AP0 SVC User - * 0xxx 0 0 0 no acc no acc - * 100x 1 0 1 r/o no acc - * 10x0 1 0 1 r/o no acc - * 1011 0 0 1 r/w no acc - * 110x 0 1 0 r/w r/o - * 11x0 0 1 0 r/w r/o - * 1111 0 1 1 r/w r/w */ + armv6_mt_table cpu_v6 + ENTRY(cpu_v6_set_pte_ext) #ifdef CONFIG_MMU - str r1, [r0], #-2048 @ linux version - - bic r3, r1, #0x000003f0 - bic r3, r3, #0x00000003 - orr r3, r3, r2 - orr r3, r3, #PTE_EXT_AP0 | 2 - - tst r1, #L_PTE_WRITE - tstne r1, #L_PTE_DIRTY - orreq r3, r3, #PTE_EXT_APX - - tst r1, #L_PTE_USER - orrne r3, r3, #PTE_EXT_AP1 - tstne r3, #PTE_EXT_APX - bicne r3, r3, #PTE_EXT_APX | PTE_EXT_AP0 - - tst r1, #L_PTE_YOUNG - biceq r3, r3, #PTE_EXT_APX | PTE_EXT_AP_MASK - - tst r1, #L_PTE_EXEC - orreq r3, r3, #PTE_EXT_XN - - tst r1, #L_PTE_PRESENT - moveq r3, #0 - - str r3, [r0] - mcr p15, 0, r0, c7, c10, 1 @ flush_pte + armv6_set_pte_ext cpu_v6 #endif mov pc, lr diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index b49f9a4c82c8..34e424041927 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -12,7 +12,7 @@ #include <linux/linkage.h> #include <asm/assembler.h> #include <asm/asm-offsets.h> -#include <asm/elf.h> +#include <asm/hwcap.h> #include <asm/pgtable-hwdef.h> #include <asm/pgtable.h> @@ -25,9 +25,11 @@ ENTRY(cpu_v7_proc_init) mov pc, lr +ENDPROC(cpu_v7_proc_init) ENTRY(cpu_v7_proc_fin) mov pc, lr +ENDPROC(cpu_v7_proc_fin) /* * cpu_v7_reset(loc) @@ -43,6 +45,7 @@ ENTRY(cpu_v7_proc_fin) .align 5 ENTRY(cpu_v7_reset) mov pc, r0 +ENDPROC(cpu_v7_reset) /* * cpu_v7_do_idle() @@ -52,8 +55,9 @@ ENTRY(cpu_v7_reset) * IRQs are already disabled. */ ENTRY(cpu_v7_do_idle) - .long 0xe320f003 @ ARM V7 WFI instruction + wfi mov pc, lr +ENDPROC(cpu_v7_do_idle) ENTRY(cpu_v7_dcache_clean_area) #ifndef TLB_CAN_READ_FROM_L1_CACHE @@ -65,6 +69,7 @@ ENTRY(cpu_v7_dcache_clean_area) dsb #endif mov pc, lr +ENDPROC(cpu_v7_dcache_clean_area) /* * cpu_v7_switch_mm(pgd_phys, tsk) @@ -89,6 +94,7 @@ ENTRY(cpu_v7_switch_mm) isb #endif mov pc, lr +ENDPROC(cpu_v7_switch_mm) /* * cpu_v7_set_pte_ext(ptep, pte) @@ -99,26 +105,19 @@ ENTRY(cpu_v7_switch_mm) * (hardware version is stored at -1024 bytes) * - pte - PTE value to store * - ext - value for extended PTE bits - * - * Permissions: - * YUWD APX AP1 AP0 SVC User - * 0xxx 0 0 0 no acc no acc - * 100x 1 0 1 r/o no acc - * 10x0 1 0 1 r/o no acc - * 1011 0 0 1 r/w no acc - * 110x 0 1 0 r/w r/o - * 11x0 0 1 0 r/w r/o - * 1111 0 1 1 r/w r/w */ ENTRY(cpu_v7_set_pte_ext) #ifdef CONFIG_MMU str r1, [r0], #-2048 @ linux version bic r3, r1, #0x000003f0 - bic r3, r3, #0x00000003 + bic r3, r3, #PTE_TYPE_MASK orr r3, r3, r2 orr r3, r3, #PTE_EXT_AP0 | 2 + tst r2, #1 << 4 + orrne r3, r3, #PTE_EXT_TEX(1) + tst r1, #L_PTE_WRITE tstne r1, #L_PTE_DIRTY orreq r3, r3, #PTE_EXT_APX @@ -128,19 +127,18 @@ ENTRY(cpu_v7_set_pte_ext) tstne r3, #PTE_EXT_APX bicne r3, r3, #PTE_EXT_APX | PTE_EXT_AP0 - tst r1, #L_PTE_YOUNG - biceq r3, r3, #PTE_EXT_APX | PTE_EXT_AP_MASK - tst r1, #L_PTE_EXEC orreq r3, r3, #PTE_EXT_XN - tst r1, #L_PTE_PRESENT + tst r1, #L_PTE_YOUNG + tstne r1, #L_PTE_PRESENT moveq r3, #0 str r3, [r0] mcr p15, 0, r0, c7, c10, 1 @ flush_pte #endif mov pc, lr +ENDPROC(cpu_v7_set_pte_ext) cpu_v7_name: .ascii "ARMv7 Processor" @@ -182,12 +180,17 @@ __v7_setup: mov r10, #0x1f @ domains 0, 1 = manager mcr p15, 0, r10, c3, c0, 0 @ load domain access register #endif + ldr r5, =0x40e040e0 + ldr r6, =0xff0aa1a8 + mcr p15, 0, r5, c10, c2, 0 @ write PRRR + mcr p15, 0, r6, c10, c2, 1 @ write NMRR adr r5, v7_crval ldmia r5, {r5, r6} mrc p15, 0, r0, c1, c0, 0 @ read control register bic r0, r0, r5 @ clear bits them orr r0, r0, r6 @ set them mov pc, lr @ return to head.S:__ret +ENDPROC(__v7_setup) /* * V X F I D LR @@ -197,7 +200,7 @@ __v7_setup: */ .type v7_crval, #object v7_crval: - crval clear=0x0120c302, mmuset=0x00c0387d, ucset=0x00c0187c + crval clear=0x0120c302, mmuset=0x10c0387d, ucset=0x00c0187c __v7_setup_stack: .space 4 * 11 @ 11 registers diff --git a/arch/arm/mm/proc-xsc3.S b/arch/arm/mm/proc-xsc3.S index 3533741a76f6..04dc8b65401b 100644 --- a/arch/arm/mm/proc-xsc3.S +++ b/arch/arm/mm/proc-xsc3.S @@ -27,8 +27,8 @@ #include <linux/linkage.h> #include <linux/init.h> #include <asm/assembler.h> -#include <asm/elf.h> -#include <asm/hardware.h> +#include <asm/hwcap.h> +#include <mach/hardware.h> #include <asm/pgtable.h> #include <asm/pgtable-hwdef.h> #include <asm/page.h> @@ -52,11 +52,6 @@ #define CACHESIZE 32768 /* - * Run with L2 enabled. - */ -#define L2_CACHE_ENABLE 1 - -/* * This macro is used to wait for a CP15 write and is needed when we * have to ensure that the last operation to the coprocessor was * completed before continuing with operation. @@ -265,12 +260,9 @@ ENTRY(xsc3_dma_inv_range) tst r0, #CACHELINESIZE - 1 bic r0, r0, #CACHELINESIZE - 1 mcrne p15, 0, r0, c7, c10, 1 @ clean L1 D line - mcrne p15, 1, r0, c7, c11, 1 @ clean L2 line tst r1, #CACHELINESIZE - 1 mcrne p15, 0, r1, c7, c10, 1 @ clean L1 D line - mcrne p15, 1, r1, c7, c11, 1 @ clean L2 line 1: mcr p15, 0, r0, c7, c6, 1 @ invalidate L1 D line - mcr p15, 1, r0, c7, c7, 1 @ invalidate L2 line add r0, r0, #CACHELINESIZE cmp r0, r1 blo 1b @@ -288,7 +280,6 @@ ENTRY(xsc3_dma_inv_range) ENTRY(xsc3_dma_clean_range) bic r0, r0, #CACHELINESIZE - 1 1: mcr p15, 0, r0, c7, c10, 1 @ clean L1 D line - mcr p15, 1, r0, c7, c11, 1 @ clean L2 line add r0, r0, #CACHELINESIZE cmp r0, r1 blo 1b @@ -306,8 +297,6 @@ ENTRY(xsc3_dma_clean_range) ENTRY(xsc3_dma_flush_range) bic r0, r0, #CACHELINESIZE - 1 1: mcr p15, 0, r0, c7, c14, 1 @ clean/invalidate L1 D line - mcr p15, 1, r0, c7, c11, 1 @ clean L2 line - mcr p15, 1, r0, c7, c7, 1 @ invalidate L2 line add r0, r0, #CACHELINESIZE cmp r0, r1 blo 1b @@ -347,9 +336,7 @@ ENTRY(cpu_xsc3_switch_mm) mcr p15, 0, ip, c7, c5, 0 @ invalidate L1 I cache and BTB mcr p15, 0, ip, c7, c10, 4 @ data write barrier mcr p15, 0, ip, c7, c5, 4 @ prefetch flush -#ifdef L2_CACHE_ENABLE orr r0, r0, #0x18 @ cache the page table in L2 -#endif mcr p15, 0, r0, c2, c0, 0 @ load page table pointer mcr p15, 0, ip, c8, c7, 0 @ invalidate I and D TLBs cpwait_ret lr, ip @@ -358,40 +345,38 @@ ENTRY(cpu_xsc3_switch_mm) * cpu_xsc3_set_pte_ext(ptep, pte, ext) * * Set a PTE and flush it out - * */ +cpu_xsc3_mt_table: + .long 0x00 @ L_PTE_MT_UNCACHED + .long PTE_EXT_TEX(1) @ L_PTE_MT_BUFFERABLE + .long PTE_CACHEABLE @ L_PTE_MT_WRITETHROUGH + .long PTE_CACHEABLE | PTE_BUFFERABLE @ L_PTE_MT_WRITEBACK + .long PTE_EXT_TEX(1) | PTE_BUFFERABLE @ L_PTE_MT_DEV_SHARED + .long 0x00 @ unused + .long 0x00 @ L_PTE_MT_MINICACHE (not present) + .long PTE_EXT_TEX(5) | PTE_CACHEABLE | PTE_BUFFERABLE @ L_PTE_MT_WRITEALLOC (not present?) + .long 0x00 @ unused + .long PTE_EXT_TEX(1) @ L_PTE_MT_DEV_WC + .long 0x00 @ unused + .long PTE_CACHEABLE | PTE_BUFFERABLE @ L_PTE_MT_DEV_CACHED + .long PTE_EXT_TEX(2) @ L_PTE_MT_DEV_NONSHARED + .long 0x00 @ unused + .long 0x00 @ unused + .long 0x00 @ unused + .align 5 ENTRY(cpu_xsc3_set_pte_ext) - str r1, [r0], #-2048 @ linux version + xscale_set_pte_ext_prologue - bic r2, r1, #0xff0 @ keep C, B bits - orr r2, r2, #PTE_TYPE_EXT @ extended page tst r1, #L_PTE_SHARED @ shared? - orrne r2, r2, #0x200 - - eor r3, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY - - tst r3, #L_PTE_USER @ user? - orrne r2, r2, #PTE_EXT_AP_URO_SRW @ yes -> user r/o, system r/w - - tst r3, #L_PTE_WRITE | L_PTE_DIRTY @ write and dirty? - orreq r2, r2, #PTE_EXT_AP_UNO_SRW @ yes -> user n/a, system r/w - @ combined with user -> user r/w - -#if L2_CACHE_ENABLE - @ If it's cacheable, it needs to be in L2 also. - eor ip, r1, #L_PTE_CACHEABLE - tst ip, #L_PTE_CACHEABLE - orreq r2, r2, #PTE_EXT_TEX(0x5) -#endif - - tst r3, #L_PTE_PRESENT | L_PTE_YOUNG @ present and young? - movne r2, #0 @ no -> fault - - str r2, [r0] @ hardware version - mov ip, #0 - mcr p15, 0, r0, c7, c10, 1 @ clean L1 D line - mcr p15, 0, ip, c7, c10, 4 @ data write barrier + and r1, r1, #L_PTE_MT_MASK + adr ip, cpu_xsc3_mt_table + ldr ip, [ip, r1] + orrne r2, r2, #PTE_EXT_COHERENT @ interlock: mask in coherent bit + bic r2, r2, #0x0c @ clear old C,B bits + orr r2, r2, ip + + xscale_set_pte_ext_epilogue mov pc, lr .ltorg @@ -408,9 +393,7 @@ __xsc3_setup: mcr p15, 0, ip, c7, c10, 4 @ data write barrier mcr p15, 0, ip, c7, c5, 4 @ prefetch flush mcr p15, 0, ip, c8, c7, 0 @ invalidate I and D TLBs -#if L2_CACHE_ENABLE orr r4, r4, #0x18 @ cache the page table in L2 -#endif mcr p15, 0, r4, c2, c0, 0 @ load page table pointer mov r0, #0 @ don't allow CP access @@ -418,9 +401,7 @@ __xsc3_setup: mrc p15, 0, r0, c1, c0, 1 @ get auxiliary control reg and r0, r0, #2 @ preserve bit P bit setting -#if L2_CACHE_ENABLE orr r0, r0, #(1 << 10) @ enable L2 for LLR cache -#endif mcr p15, 0, r0, c1, c0, 1 @ set auxiliary control reg adr r5, xsc3_crval @@ -429,9 +410,6 @@ __xsc3_setup: bic r0, r0, r5 @ ..V. ..R. .... ..A. orr r0, r0, r6 @ ..VI Z..S .... .C.M (mmu) @ ...I Z..S .... .... (uc) -#if L2_CACHE_ENABLE - orr r0, r0, #0x04000000 @ L2 enable -#endif mov pc, lr .size __xsc3_setup, . - __xsc3_setup diff --git a/arch/arm/mm/proc-xscale.S b/arch/arm/mm/proc-xscale.S index 2dd85273976f..0cce37b93937 100644 --- a/arch/arm/mm/proc-xscale.S +++ b/arch/arm/mm/proc-xscale.S @@ -23,7 +23,7 @@ #include <linux/linkage.h> #include <linux/init.h> #include <asm/assembler.h> -#include <asm/elf.h> +#include <asm/hwcap.h> #include <asm/pgtable.h> #include <asm/pgtable-hwdef.h> #include <asm/page.h> @@ -406,8 +406,6 @@ ENTRY(cpu_xscale_dcache_clean_area) /* =============================== PageTable ============================== */ -#define PTE_CACHE_WRITE_ALLOCATE 0 - /* * cpu_xscale_switch_mm(pgd) * @@ -431,56 +429,42 @@ ENTRY(cpu_xscale_switch_mm) * * Errata 40: must set memory to write-through for user read-only pages. */ +cpu_xscale_mt_table: + .long 0x00 @ L_PTE_MT_UNCACHED + .long PTE_BUFFERABLE @ L_PTE_MT_BUFFERABLE + .long PTE_CACHEABLE @ L_PTE_MT_WRITETHROUGH + .long PTE_CACHEABLE | PTE_BUFFERABLE @ L_PTE_MT_WRITEBACK + .long PTE_EXT_TEX(1) | PTE_BUFFERABLE @ L_PTE_MT_DEV_SHARED + .long 0x00 @ unused + .long PTE_EXT_TEX(1) | PTE_CACHEABLE @ L_PTE_MT_MINICACHE + .long PTE_EXT_TEX(1) | PTE_CACHEABLE | PTE_BUFFERABLE @ L_PTE_MT_WRITEALLOC + .long 0x00 @ unused + .long PTE_BUFFERABLE @ L_PTE_MT_DEV_WC + .long 0x00 @ unused + .long PTE_CACHEABLE | PTE_BUFFERABLE @ L_PTE_MT_DEV_CACHED + .long 0x00 @ L_PTE_MT_DEV_NONSHARED + .long 0x00 @ unused + .long 0x00 @ unused + .long 0x00 @ unused + .align 5 ENTRY(cpu_xscale_set_pte_ext) - str r1, [r0], #-2048 @ linux version - - bic r2, r1, #0xff0 - orr r2, r2, #PTE_TYPE_EXT @ extended page - - eor r3, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY - - tst r3, #L_PTE_USER @ User? - orrne r2, r2, #PTE_EXT_AP_URO_SRW @ yes -> user r/o, system r/w - - tst r3, #L_PTE_WRITE | L_PTE_DIRTY @ Write and Dirty? - orreq r2, r2, #PTE_EXT_AP_UNO_SRW @ yes -> user n/a, system r/w - @ combined with user -> user r/w - - @ - @ Handle the X bit. We want to set this bit for the minicache - @ (U = E = B = W = 0, C = 1) or when write allocate is enabled, - @ and we have a writeable, cacheable region. If we ignore the - @ U and E bits, we can allow user space to use the minicache as - @ well. - @ - @ X = (C & ~W & ~B) | (C & W & B & write_allocate) - @ - eor ip, r1, #L_PTE_CACHEABLE - tst ip, #L_PTE_CACHEABLE | L_PTE_WRITE | L_PTE_BUFFERABLE -#if PTE_CACHE_WRITE_ALLOCATE - eorne ip, r1, #L_PTE_CACHEABLE | L_PTE_WRITE | L_PTE_BUFFERABLE - tstne ip, #L_PTE_CACHEABLE | L_PTE_WRITE | L_PTE_BUFFERABLE -#endif - orreq r2, r2, #PTE_EXT_TEX(1) + xscale_set_pte_ext_prologue @ - @ Erratum 40: The B bit must be cleared for a user read-only - @ cacheable page. - @ - @ B = B & ~(U & C & ~W) + @ Erratum 40: must set memory to write-through for user read-only pages @ - and ip, r1, #L_PTE_USER | L_PTE_WRITE | L_PTE_CACHEABLE - teq ip, #L_PTE_USER | L_PTE_CACHEABLE - biceq r2, r2, #PTE_BUFFERABLE + and ip, r1, #(L_PTE_MT_MASK | L_PTE_USER | L_PTE_WRITE) & ~(4 << 2) + teq ip, #L_PTE_MT_WRITEBACK | L_PTE_USER - tst r3, #L_PTE_PRESENT | L_PTE_YOUNG @ Present and Young? - movne r2, #0 @ no -> fault + moveq r1, #L_PTE_MT_WRITETHROUGH + and r1, r1, #L_PTE_MT_MASK + adr ip, cpu_xscale_mt_table + ldr ip, [ip, r1] + bic r2, r2, #0x0c + orr r2, r2, ip - str r2, [r0] @ hardware version - mov ip, #0 - mcr p15, 0, r0, c7, c10, 1 @ Clean D cache line - mcr p15, 0, ip, c7, c10, 4 @ Drain Write (& Fill) Buffer + xscale_set_pte_ext_epilogue mov pc, lr diff --git a/arch/arm/mm/tlb-v7.S b/arch/arm/mm/tlb-v7.S index b56dda8052f7..24ba5109f2e7 100644 --- a/arch/arm/mm/tlb-v7.S +++ b/arch/arm/mm/tlb-v7.S @@ -51,6 +51,7 @@ ENTRY(v7wbi_flush_user_tlb_range) mcr p15, 0, ip, c7, c5, 6 @ flush BTAC/BTB dsb mov pc, lr +ENDPROC(v7wbi_flush_user_tlb_range) /* * v7wbi_flush_kern_tlb_range(start,end) @@ -77,6 +78,7 @@ ENTRY(v7wbi_flush_kern_tlb_range) dsb isb mov pc, lr +ENDPROC(v7wbi_flush_kern_tlb_range) .section ".text.init", #alloc, #execinstr |