diff options
Diffstat (limited to 'arch/arm/mm')
49 files changed, 1562 insertions, 507 deletions
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index baf638487a2d..101105e52610 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -399,7 +399,7 @@ config CPU_V6 config CPU_32v6K bool "Support ARM V6K processor extensions" if !SMP depends on CPU_V6 - default y if SMP && !ARCH_MX3 + default y if SMP && !(ARCH_MX3 || ARCH_OMAP2) help Say Y here if your ARMv6 processor supports the 'K' extension. This enables the kernel to use some instructions not present @@ -410,7 +410,7 @@ config CPU_32v6K # ARMv7 config CPU_V7 bool "Support ARM V7 processor" if ARCH_INTEGRATOR || MACH_REALVIEW_EB || MACH_REALVIEW_PBX - select CPU_32v6K + select CPU_32v6K if !ARCH_OMAP2 select CPU_32v7 select CPU_ABRT_EV7 select CPU_PABRT_V7 @@ -572,6 +572,8 @@ config CPU_TLB_V6 config CPU_TLB_V7 bool +config VERIFY_PERMISSION_FAULT + bool endif config CPU_HAS_ASID @@ -733,9 +735,34 @@ config NEEDS_SYSCALL_FOR_CMPXCHG Forget about fast user space cmpxchg support. It is just not possible. +config DMA_CACHE_RWFO + bool "Enable read/write for ownership DMA cache maintenance" + depends on CPU_V6 && SMP + default y + help + The Snoop Control Unit on ARM11MPCore does not detect the + cache maintenance operations and the dma_{map,unmap}_area() + functions may leave stale cache entries on other CPUs. By + enabling this option, Read or Write For Ownership in the ARMv6 + DMA cache maintenance functions is performed. These LDR/STR + instructions change the cache line state to shared or modified + so that the cache operation has the desired effect. + + Note that the workaround is only valid on processors that do + not perform speculative loads into the D-cache. For such + processors, if cache maintenance operations are not broadcast + in hardware, other workarounds are needed (e.g. cache + maintenance broadcasting in software via FIQ). + config OUTER_CACHE bool +config OUTER_CACHE_SYNC + bool + help + The outer cache has a outer_cache_fns.sync function pointer + that can be used to drain the write buffer of the outer cache. + config CACHE_FEROCEON_L2 bool "Enable the Feroceon L2 cache controller" depends on ARCH_KIRKWOOD || ARCH_MV78XX0 @@ -754,15 +781,17 @@ config CACHE_FEROCEON_L2_WRITETHROUGH config CACHE_L2X0 bool "Enable the L2x0 outer cache controller" depends on REALVIEW_EB_ARM11MP || MACH_REALVIEW_PB11MP || MACH_REALVIEW_PB1176 || \ - REALVIEW_EB_A9MP || ARCH_MX35 || ARCH_MX31 || MACH_REALVIEW_PBX || ARCH_NOMADIK + REALVIEW_EB_A9MP || ARCH_MX35 || ARCH_MX31 || MACH_REALVIEW_PBX || \ + ARCH_NOMADIK || ARCH_OMAP4 || ARCH_U8500 || ARCH_VEXPRESS_CA9X4 default y select OUTER_CACHE + select OUTER_CACHE_SYNC help This option enables the L2x0 PrimeCell. config CACHE_TAUROS2 bool "Enable the Tauros2 L2 cache controller" - depends on ARCH_DOVE + depends on (ARCH_DOVE || ARCH_MMP) default y select OUTER_CACHE help @@ -779,5 +808,32 @@ config CACHE_XSC3L2 config ARM_L1_CACHE_SHIFT int - default 6 if ARCH_OMAP3 || ARCH_S5PC1XX + default 6 if ARM_L1_CACHE_SHIFT_6 default 5 + +config ARM_DMA_MEM_BUFFERABLE + bool "Use non-cacheable memory for DMA" if CPU_V6 && !CPU_V7 + depends on !(MACH_REALVIEW_PB1176 || REALVIEW_EB_ARM11MP || \ + MACH_REALVIEW_PB11MP) + default y if CPU_V6 || CPU_V7 + help + Historically, the kernel has used strongly ordered mappings to + provide DMA coherent memory. With the advent of ARMv7, mapping + memory with differing types results in unpredictable behaviour, + so on these CPUs, this option is forced on. + + Multiple mappings with differing attributes is also unpredictable + on ARMv6 CPUs, but since they do not have aggressive speculative + prefetch, no harm appears to occur. + + However, drivers may be missing the necessary barriers for ARMv6, + and therefore turning this on may result in unpredictable driver + behaviour. Therefore, we offer this as an option. + + You are recommended say 'Y' here and debug any affected drivers. + +config ARCH_HAS_BARRIERS + bool + help + This option allows the use of custom mandatory barriers + included via the mach/barriers.h file. diff --git a/arch/arm/mm/abort-ev7.S b/arch/arm/mm/abort-ev7.S index 2e6dc040c654..ec88b157d3bb 100644 --- a/arch/arm/mm/abort-ev7.S +++ b/arch/arm/mm/abort-ev7.S @@ -29,5 +29,26 @@ ENTRY(v7_early_abort) * V6 code adjusts the returned DFSR. * New designs should not need to patch up faults. */ + +#if defined(CONFIG_VERIFY_PERMISSION_FAULT) + /* + * Detect erroneous permission failures and fix + */ + ldr r3, =0x40d @ On permission fault + and r3, r1, r3 + cmp r3, #0x0d + movne pc, lr + + mcr p15, 0, r0, c7, c8, 0 @ Retranslate FAR + isb + mrc p15, 0, r2, c7, c4, 0 @ Read the PAR + and r3, r2, #0x7b @ On translation fault + cmp r3, #0x0b + movne pc, lr + bic r1, r1, #0xf @ Fix up FSR FS[5:0] + and r2, r2, #0x7e + orr r1, r1, r2, LSR #1 +#endif + mov pc, lr ENDPROC(v7_early_abort) diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c index b270d6228fe2..6f98c358989a 100644 --- a/arch/arm/mm/alignment.c +++ b/arch/arm/mm/alignment.c @@ -11,11 +11,13 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ +#include <linux/moduleparam.h> #include <linux/compiler.h> #include <linux/kernel.h> #include <linux/errno.h> #include <linux/string.h> #include <linux/proc_fs.h> +#include <linux/seq_file.h> #include <linux/init.h> #include <linux/sched.h> #include <linux/uaccess.h> @@ -77,6 +79,8 @@ static unsigned long ai_dword; static unsigned long ai_multi; static int ai_usermode; +core_param(alignment, ai_usermode, int, 0600); + #define UM_WARN (1 << 0) #define UM_FIXUP (1 << 1) #define UM_SIGNAL (1 << 2) @@ -91,36 +95,29 @@ static const char *usermode_action[] = { "signal+warn" }; -static int -proc_alignment_read(char *page, char **start, off_t off, int count, int *eof, - void *data) +static int alignment_proc_show(struct seq_file *m, void *v) { - char *p = page; - int len; - - p += sprintf(p, "User:\t\t%lu\n", ai_user); - p += sprintf(p, "System:\t\t%lu\n", ai_sys); - p += sprintf(p, "Skipped:\t%lu\n", ai_skipped); - p += sprintf(p, "Half:\t\t%lu\n", ai_half); - p += sprintf(p, "Word:\t\t%lu\n", ai_word); + seq_printf(m, "User:\t\t%lu\n", ai_user); + seq_printf(m, "System:\t\t%lu\n", ai_sys); + seq_printf(m, "Skipped:\t%lu\n", ai_skipped); + seq_printf(m, "Half:\t\t%lu\n", ai_half); + seq_printf(m, "Word:\t\t%lu\n", ai_word); if (cpu_architecture() >= CPU_ARCH_ARMv5TE) - p += sprintf(p, "DWord:\t\t%lu\n", ai_dword); - p += sprintf(p, "Multi:\t\t%lu\n", ai_multi); - p += sprintf(p, "User faults:\t%i (%s)\n", ai_usermode, + seq_printf(m, "DWord:\t\t%lu\n", ai_dword); + seq_printf(m, "Multi:\t\t%lu\n", ai_multi); + seq_printf(m, "User faults:\t%i (%s)\n", ai_usermode, usermode_action[ai_usermode]); - len = (p - page) - off; - if (len < 0) - len = 0; - - *eof = (len <= count) ? 1 : 0; - *start = page + off; + return 0; +} - return len; +static int alignment_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, alignment_proc_show, NULL); } -static int proc_alignment_write(struct file *file, const char __user *buffer, - unsigned long count, void *data) +static ssize_t alignment_proc_write(struct file *file, const char __user *buffer, + size_t count, loff_t *pos) { char mode; @@ -133,6 +130,13 @@ static int proc_alignment_write(struct file *file, const char __user *buffer, return count; } +static const struct file_operations alignment_proc_fops = { + .open = alignment_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + .write = alignment_proc_write, +}; #endif /* CONFIG_PROC_FS */ union offset_union { @@ -163,15 +167,15 @@ union offset_union { THUMB( "1: "ins" %1, [%2]\n" ) \ THUMB( " add %2, %2, #1\n" ) \ "2:\n" \ - " .section .fixup,\"ax\"\n" \ + " .pushsection .fixup,\"ax\"\n" \ " .align 2\n" \ "3: mov %0, #1\n" \ " b 2b\n" \ - " .previous\n" \ - " .section __ex_table,\"a\"\n" \ + " .popsection\n" \ + " .pushsection __ex_table,\"a\"\n" \ " .align 3\n" \ " .long 1b, 3b\n" \ - " .previous\n" \ + " .popsection\n" \ : "=r" (err), "=&r" (val), "=r" (addr) \ : "0" (err), "2" (addr)) @@ -223,16 +227,16 @@ union offset_union { " mov %1, %1, "NEXT_BYTE"\n" \ "2: "ins" %1, [%2]\n" \ "3:\n" \ - " .section .fixup,\"ax\"\n" \ + " .pushsection .fixup,\"ax\"\n" \ " .align 2\n" \ "4: mov %0, #1\n" \ " b 3b\n" \ - " .previous\n" \ - " .section __ex_table,\"a\"\n" \ + " .popsection\n" \ + " .pushsection __ex_table,\"a\"\n" \ " .align 3\n" \ " .long 1b, 4b\n" \ " .long 2b, 4b\n" \ - " .previous\n" \ + " .popsection\n" \ : "=r" (err), "=&r" (v), "=&r" (a) \ : "0" (err), "1" (v), "2" (a)); \ if (err) \ @@ -263,18 +267,18 @@ union offset_union { " mov %1, %1, "NEXT_BYTE"\n" \ "4: "ins" %1, [%2]\n" \ "5:\n" \ - " .section .fixup,\"ax\"\n" \ + " .pushsection .fixup,\"ax\"\n" \ " .align 2\n" \ "6: mov %0, #1\n" \ " b 5b\n" \ - " .previous\n" \ - " .section __ex_table,\"a\"\n" \ + " .popsection\n" \ + " .pushsection __ex_table,\"a\"\n" \ " .align 3\n" \ " .long 1b, 6b\n" \ " .long 2b, 6b\n" \ " .long 3b, 6b\n" \ " .long 4b, 6b\n" \ - " .previous\n" \ + " .popsection\n" \ : "=r" (err), "=&r" (v), "=&r" (a) \ : "0" (err), "1" (v), "2" (a)); \ if (err) \ @@ -898,16 +902,10 @@ static int __init alignment_init(void) #ifdef CONFIG_PROC_FS struct proc_dir_entry *res; - res = proc_mkdir("cpu", NULL); + res = proc_create("cpu/alignment", S_IWUSR | S_IRUGO, NULL, + &alignment_proc_fops); if (!res) return -ENOMEM; - - res = create_proc_entry("alignment", S_IWUSR | S_IRUGO, res); - if (!res) - return -ENOMEM; - - res->read_proc = proc_alignment_read; - res->write_proc = proc_alignment_write; #endif /* diff --git a/arch/arm/mm/cache-fa.S b/arch/arm/mm/cache-fa.S index a89444a3c016..7148e53e6078 100644 --- a/arch/arm/mm/cache-fa.S +++ b/arch/arm/mm/cache-fa.S @@ -157,7 +157,7 @@ ENTRY(fa_flush_kern_dcache_area) * - start - virtual start address * - end - virtual end address */ -ENTRY(fa_dma_inv_range) +fa_dma_inv_range: tst r0, #CACHE_DLINESIZE - 1 bic r0, r0, #CACHE_DLINESIZE - 1 mcrne p15, 0, r0, c7, c14, 1 @ clean & invalidate D entry @@ -180,7 +180,7 @@ ENTRY(fa_dma_inv_range) * - start - virtual start address * - end - virtual end address */ -ENTRY(fa_dma_clean_range) +fa_dma_clean_range: bic r0, r0, #CACHE_DLINESIZE - 1 1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry add r0, r0, #CACHE_DLINESIZE @@ -205,6 +205,30 @@ ENTRY(fa_dma_flush_range) mcr p15, 0, r0, c7, c10, 4 @ drain write buffer mov pc, lr +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(fa_dma_map_area) + add r1, r1, r0 + cmp r2, #DMA_TO_DEVICE + beq fa_dma_clean_range + bcs fa_dma_inv_range + b fa_dma_flush_range +ENDPROC(fa_dma_map_area) + +/* + * dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(fa_dma_unmap_area) + mov pc, lr +ENDPROC(fa_dma_unmap_area) + __INITDATA .type fa_cache_fns, #object @@ -215,7 +239,7 @@ ENTRY(fa_cache_fns) .long fa_coherent_kern_range .long fa_coherent_user_range .long fa_flush_kern_dcache_area - .long fa_dma_inv_range - .long fa_dma_clean_range + .long fa_dma_map_area + .long fa_dma_unmap_area .long fa_dma_flush_range .size fa_cache_fns, . - fa_cache_fns diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c index cb8fc6573b1b..9982eb385c0f 100644 --- a/arch/arm/mm/cache-l2x0.c +++ b/arch/arm/mm/cache-l2x0.c @@ -27,29 +27,90 @@ static void __iomem *l2x0_base; static DEFINE_SPINLOCK(l2x0_lock); +static uint32_t l2x0_way_mask; /* Bitmask of active ways */ static inline void cache_wait(void __iomem *reg, unsigned long mask) { /* wait for the operation to complete */ - while (readl(reg) & mask) + while (readl_relaxed(reg) & mask) ; } static inline void cache_sync(void) { void __iomem *base = l2x0_base; - writel(0, base + L2X0_CACHE_SYNC); + writel_relaxed(0, base + L2X0_CACHE_SYNC); cache_wait(base + L2X0_CACHE_SYNC, 1); } +static inline void l2x0_clean_line(unsigned long addr) +{ + void __iomem *base = l2x0_base; + cache_wait(base + L2X0_CLEAN_LINE_PA, 1); + writel_relaxed(addr, base + L2X0_CLEAN_LINE_PA); +} + +static inline void l2x0_inv_line(unsigned long addr) +{ + void __iomem *base = l2x0_base; + cache_wait(base + L2X0_INV_LINE_PA, 1); + writel_relaxed(addr, base + L2X0_INV_LINE_PA); +} + +#ifdef CONFIG_PL310_ERRATA_588369 +static void debug_writel(unsigned long val) +{ + extern void omap_smc1(u32 fn, u32 arg); + + /* + * Texas Instrument secure monitor api to modify the + * PL310 Debug Control Register. + */ + omap_smc1(0x100, val); +} + +static inline void l2x0_flush_line(unsigned long addr) +{ + void __iomem *base = l2x0_base; + + /* Clean by PA followed by Invalidate by PA */ + cache_wait(base + L2X0_CLEAN_LINE_PA, 1); + writel_relaxed(addr, base + L2X0_CLEAN_LINE_PA); + cache_wait(base + L2X0_INV_LINE_PA, 1); + writel_relaxed(addr, base + L2X0_INV_LINE_PA); +} +#else + +/* Optimised out for non-errata case */ +static inline void debug_writel(unsigned long val) +{ +} + +static inline void l2x0_flush_line(unsigned long addr) +{ + void __iomem *base = l2x0_base; + cache_wait(base + L2X0_CLEAN_INV_LINE_PA, 1); + writel_relaxed(addr, base + L2X0_CLEAN_INV_LINE_PA); +} +#endif + +static void l2x0_cache_sync(void) +{ + unsigned long flags; + + spin_lock_irqsave(&l2x0_lock, flags); + cache_sync(); + spin_unlock_irqrestore(&l2x0_lock, flags); +} + static inline void l2x0_inv_all(void) { unsigned long flags; /* invalidate all ways */ spin_lock_irqsave(&l2x0_lock, flags); - writel(0xff, l2x0_base + L2X0_INV_WAY); - cache_wait(l2x0_base + L2X0_INV_WAY, 0xff); + writel_relaxed(l2x0_way_mask, l2x0_base + L2X0_INV_WAY); + cache_wait(l2x0_base + L2X0_INV_WAY, l2x0_way_mask); cache_sync(); spin_unlock_irqrestore(&l2x0_lock, flags); } @@ -62,23 +123,24 @@ static void l2x0_inv_range(unsigned long start, unsigned long end) spin_lock_irqsave(&l2x0_lock, flags); if (start & (CACHE_LINE_SIZE - 1)) { start &= ~(CACHE_LINE_SIZE - 1); - cache_wait(base + L2X0_CLEAN_INV_LINE_PA, 1); - writel(start, base + L2X0_CLEAN_INV_LINE_PA); + debug_writel(0x03); + l2x0_flush_line(start); + debug_writel(0x00); start += CACHE_LINE_SIZE; } if (end & (CACHE_LINE_SIZE - 1)) { end &= ~(CACHE_LINE_SIZE - 1); - cache_wait(base + L2X0_CLEAN_INV_LINE_PA, 1); - writel(end, base + L2X0_CLEAN_INV_LINE_PA); + debug_writel(0x03); + l2x0_flush_line(end); + debug_writel(0x00); } while (start < end) { unsigned long blk_end = start + min(end - start, 4096UL); while (start < blk_end) { - cache_wait(base + L2X0_INV_LINE_PA, 1); - writel(start, base + L2X0_INV_LINE_PA); + l2x0_inv_line(start); start += CACHE_LINE_SIZE; } @@ -103,8 +165,7 @@ static void l2x0_clean_range(unsigned long start, unsigned long end) unsigned long blk_end = start + min(end - start, 4096UL); while (start < blk_end) { - cache_wait(base + L2X0_CLEAN_LINE_PA, 1); - writel(start, base + L2X0_CLEAN_LINE_PA); + l2x0_clean_line(start); start += CACHE_LINE_SIZE; } @@ -128,11 +189,12 @@ static void l2x0_flush_range(unsigned long start, unsigned long end) while (start < end) { unsigned long blk_end = start + min(end - start, 4096UL); + debug_writel(0x03); while (start < blk_end) { - cache_wait(base + L2X0_CLEAN_INV_LINE_PA, 1); - writel(start, base + L2X0_CLEAN_INV_LINE_PA); + l2x0_flush_line(start); start += CACHE_LINE_SIZE; } + debug_writel(0x00); if (blk_end < end) { spin_unlock_irqrestore(&l2x0_lock, flags); @@ -147,32 +209,62 @@ static void l2x0_flush_range(unsigned long start, unsigned long end) void __init l2x0_init(void __iomem *base, __u32 aux_val, __u32 aux_mask) { __u32 aux; + __u32 cache_id; + int ways; + const char *type; l2x0_base = base; + cache_id = readl_relaxed(l2x0_base + L2X0_CACHE_ID); + aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL); + + aux &= aux_mask; + aux |= aux_val; + + /* Determine the number of ways */ + switch (cache_id & L2X0_CACHE_ID_PART_MASK) { + case L2X0_CACHE_ID_PART_L310: + if (aux & (1 << 16)) + ways = 16; + else + ways = 8; + type = "L310"; + break; + case L2X0_CACHE_ID_PART_L210: + ways = (aux >> 13) & 0xf; + type = "L210"; + break; + default: + /* Assume unknown chips have 8 ways */ + ways = 8; + type = "L2x0 series"; + break; + } + + l2x0_way_mask = (1 << ways) - 1; + /* * Check if l2x0 controller is already enabled. * If you are booting from non-secure mode * accessing the below registers will fault. */ - if (!(readl(l2x0_base + L2X0_CTRL) & 1)) { + if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & 1)) { /* l2x0 controller is disabled */ - - aux = readl(l2x0_base + L2X0_AUX_CTRL); - aux &= aux_mask; - aux |= aux_val; - writel(aux, l2x0_base + L2X0_AUX_CTRL); + writel_relaxed(aux, l2x0_base + L2X0_AUX_CTRL); l2x0_inv_all(); /* enable L2X0 */ - writel(1, l2x0_base + L2X0_CTRL); + writel_relaxed(1, l2x0_base + L2X0_CTRL); } outer_cache.inv_range = l2x0_inv_range; outer_cache.clean_range = l2x0_clean_range; outer_cache.flush_range = l2x0_flush_range; + outer_cache.sync = l2x0_cache_sync; - printk(KERN_INFO "L2X0 cache controller enabled\n"); + printk(KERN_INFO "%s cache controller enabled\n", type); + printk(KERN_INFO "l2x0: %d ways, CACHE_ID 0x%08x, AUX_CTRL 0x%08x\n", + ways, cache_id, aux); } diff --git a/arch/arm/mm/cache-v3.S b/arch/arm/mm/cache-v3.S index 2a482731ea36..c2ff3c599fee 100644 --- a/arch/arm/mm/cache-v3.S +++ b/arch/arm/mm/cache-v3.S @@ -84,20 +84,6 @@ ENTRY(v3_flush_kern_dcache_area) /* FALLTHROUGH */ /* - * dma_inv_range(start, end) - * - * Invalidate (discard) the specified virtual address range. - * May not write back any entries. If 'start' or 'end' - * are not cache line aligned, those lines must be written - * back. - * - * - start - virtual start address - * - end - virtual end address - */ -ENTRY(v3_dma_inv_range) - /* FALLTHROUGH */ - -/* * dma_flush_range(start, end) * * Clean and invalidate the specified virtual address range. @@ -108,18 +94,29 @@ ENTRY(v3_dma_inv_range) ENTRY(v3_dma_flush_range) mov r0, #0 mcr p15, 0, r0, c7, c0, 0 @ flush ID cache + mov pc, lr + +/* + * dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(v3_dma_unmap_area) + teq r2, #DMA_TO_DEVICE + bne v3_dma_flush_range /* FALLTHROUGH */ /* - * dma_clean_range(start, end) - * - * Clean (write back) the specified virtual address range. - * - * - start - virtual start address - * - end - virtual end address + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction */ -ENTRY(v3_dma_clean_range) +ENTRY(v3_dma_map_area) mov pc, lr +ENDPROC(v3_dma_unmap_area) +ENDPROC(v3_dma_map_area) __INITDATA @@ -131,7 +128,7 @@ ENTRY(v3_cache_fns) .long v3_coherent_kern_range .long v3_coherent_user_range .long v3_flush_kern_dcache_area - .long v3_dma_inv_range - .long v3_dma_clean_range + .long v3_dma_map_area + .long v3_dma_unmap_area .long v3_dma_flush_range .size v3_cache_fns, . - v3_cache_fns diff --git a/arch/arm/mm/cache-v4.S b/arch/arm/mm/cache-v4.S index 5c7da3e372e9..4810f7e3e813 100644 --- a/arch/arm/mm/cache-v4.S +++ b/arch/arm/mm/cache-v4.S @@ -94,20 +94,6 @@ ENTRY(v4_flush_kern_dcache_area) /* FALLTHROUGH */ /* - * dma_inv_range(start, end) - * - * Invalidate (discard) the specified virtual address range. - * May not write back any entries. If 'start' or 'end' - * are not cache line aligned, those lines must be written - * back. - * - * - start - virtual start address - * - end - virtual end address - */ -ENTRY(v4_dma_inv_range) - /* FALLTHROUGH */ - -/* * dma_flush_range(start, end) * * Clean and invalidate the specified virtual address range. @@ -120,18 +106,29 @@ ENTRY(v4_dma_flush_range) mov r0, #0 mcr p15, 0, r0, c7, c7, 0 @ flush ID cache #endif + mov pc, lr + +/* + * dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(v4_dma_unmap_area) + teq r2, #DMA_TO_DEVICE + bne v4_dma_flush_range /* FALLTHROUGH */ /* - * dma_clean_range(start, end) - * - * Clean (write back) the specified virtual address range. - * - * - start - virtual start address - * - end - virtual end address + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction */ -ENTRY(v4_dma_clean_range) +ENTRY(v4_dma_map_area) mov pc, lr +ENDPROC(v4_dma_unmap_area) +ENDPROC(v4_dma_map_area) __INITDATA @@ -143,7 +140,7 @@ ENTRY(v4_cache_fns) .long v4_coherent_kern_range .long v4_coherent_user_range .long v4_flush_kern_dcache_area - .long v4_dma_inv_range - .long v4_dma_clean_range + .long v4_dma_map_area + .long v4_dma_unmap_area .long v4_dma_flush_range .size v4_cache_fns, . - v4_cache_fns diff --git a/arch/arm/mm/cache-v4wb.S b/arch/arm/mm/cache-v4wb.S index 3dbedf1ec0e7..df8368afa102 100644 --- a/arch/arm/mm/cache-v4wb.S +++ b/arch/arm/mm/cache-v4wb.S @@ -173,7 +173,7 @@ ENTRY(v4wb_coherent_user_range) * - start - virtual start address * - end - virtual end address */ -ENTRY(v4wb_dma_inv_range) +v4wb_dma_inv_range: tst r0, #CACHE_DLINESIZE - 1 bic r0, r0, #CACHE_DLINESIZE - 1 mcrne p15, 0, r0, c7, c10, 1 @ clean D entry @@ -194,7 +194,7 @@ ENTRY(v4wb_dma_inv_range) * - start - virtual start address * - end - virtual end address */ -ENTRY(v4wb_dma_clean_range) +v4wb_dma_clean_range: bic r0, r0, #CACHE_DLINESIZE - 1 1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry add r0, r0, #CACHE_DLINESIZE @@ -216,6 +216,30 @@ ENTRY(v4wb_dma_clean_range) .globl v4wb_dma_flush_range .set v4wb_dma_flush_range, v4wb_coherent_kern_range +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(v4wb_dma_map_area) + add r1, r1, r0 + cmp r2, #DMA_TO_DEVICE + beq v4wb_dma_clean_range + bcs v4wb_dma_inv_range + b v4wb_dma_flush_range +ENDPROC(v4wb_dma_map_area) + +/* + * dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(v4wb_dma_unmap_area) + mov pc, lr +ENDPROC(v4wb_dma_unmap_area) + __INITDATA .type v4wb_cache_fns, #object @@ -226,7 +250,7 @@ ENTRY(v4wb_cache_fns) .long v4wb_coherent_kern_range .long v4wb_coherent_user_range .long v4wb_flush_kern_dcache_area - .long v4wb_dma_inv_range - .long v4wb_dma_clean_range + .long v4wb_dma_map_area + .long v4wb_dma_unmap_area .long v4wb_dma_flush_range .size v4wb_cache_fns, . - v4wb_cache_fns diff --git a/arch/arm/mm/cache-v4wt.S b/arch/arm/mm/cache-v4wt.S index b3b7410270b4..45c70312f43b 100644 --- a/arch/arm/mm/cache-v4wt.S +++ b/arch/arm/mm/cache-v4wt.S @@ -142,23 +142,12 @@ ENTRY(v4wt_flush_kern_dcache_area) * - start - virtual start address * - end - virtual end address */ -ENTRY(v4wt_dma_inv_range) +v4wt_dma_inv_range: bic r0, r0, #CACHE_DLINESIZE - 1 1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry add r0, r0, #CACHE_DLINESIZE cmp r0, r1 blo 1b - /* FALLTHROUGH */ - -/* - * dma_clean_range(start, end) - * - * Clean the specified virtual address range. - * - * - start - virtual start address - * - end - virtual end address - */ -ENTRY(v4wt_dma_clean_range) mov pc, lr /* @@ -172,6 +161,29 @@ ENTRY(v4wt_dma_clean_range) .globl v4wt_dma_flush_range .equ v4wt_dma_flush_range, v4wt_dma_inv_range +/* + * dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(v4wt_dma_unmap_area) + add r1, r1, r0 + teq r2, #DMA_TO_DEVICE + bne v4wt_dma_inv_range + /* FALLTHROUGH */ + +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(v4wt_dma_map_area) + mov pc, lr +ENDPROC(v4wt_dma_unmap_area) +ENDPROC(v4wt_dma_map_area) + __INITDATA .type v4wt_cache_fns, #object @@ -182,7 +194,7 @@ ENTRY(v4wt_cache_fns) .long v4wt_coherent_kern_range .long v4wt_coherent_user_range .long v4wt_flush_kern_dcache_area - .long v4wt_dma_inv_range - .long v4wt_dma_clean_range + .long v4wt_dma_map_area + .long v4wt_dma_unmap_area .long v4wt_dma_flush_range .size v4wt_cache_fns, . - v4wt_cache_fns diff --git a/arch/arm/mm/cache-v6.S b/arch/arm/mm/cache-v6.S index 4ba0a24ce6f5..86aa689ef1aa 100644 --- a/arch/arm/mm/cache-v6.S +++ b/arch/arm/mm/cache-v6.S @@ -195,7 +195,7 @@ ENTRY(v6_flush_kern_dcache_area) * - start - virtual start address of region * - end - virtual end address of region */ -ENTRY(v6_dma_inv_range) +v6_dma_inv_range: tst r0, #D_CACHE_LINE_SIZE - 1 bic r0, r0, #D_CACHE_LINE_SIZE - 1 #ifdef HARVARD_CACHE @@ -211,6 +211,10 @@ ENTRY(v6_dma_inv_range) mcrne p15, 0, r1, c7, c15, 1 @ clean & invalidate unified line #endif 1: +#ifdef CONFIG_DMA_CACHE_RWFO + ldr r2, [r0] @ read for ownership + str r2, [r0] @ write for ownership +#endif #ifdef HARVARD_CACHE mcr p15, 0, r0, c7, c6, 1 @ invalidate D line #else @@ -228,9 +232,12 @@ ENTRY(v6_dma_inv_range) * - start - virtual start address of region * - end - virtual end address of region */ -ENTRY(v6_dma_clean_range) +v6_dma_clean_range: bic r0, r0, #D_CACHE_LINE_SIZE - 1 1: +#ifdef CONFIG_DMA_CACHE_RWFO + ldr r2, [r0] @ read for ownership +#endif #ifdef HARVARD_CACHE mcr p15, 0, r0, c7, c10, 1 @ clean D line #else @@ -251,6 +258,10 @@ ENTRY(v6_dma_clean_range) ENTRY(v6_dma_flush_range) bic r0, r0, #D_CACHE_LINE_SIZE - 1 1: +#ifdef CONFIG_DMA_CACHE_RWFO + ldr r2, [r0] @ read for ownership + str r2, [r0] @ write for ownership +#endif #ifdef HARVARD_CACHE mcr p15, 0, r0, c7, c14, 1 @ clean & invalidate D line #else @@ -263,6 +274,40 @@ ENTRY(v6_dma_flush_range) mcr p15, 0, r0, c7, c10, 4 @ drain write buffer mov pc, lr +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(v6_dma_map_area) + add r1, r1, r0 + teq r2, #DMA_FROM_DEVICE + beq v6_dma_inv_range +#ifndef CONFIG_DMA_CACHE_RWFO + b v6_dma_clean_range +#else + teq r2, #DMA_TO_DEVICE + beq v6_dma_clean_range + b v6_dma_flush_range +#endif +ENDPROC(v6_dma_map_area) + +/* + * dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(v6_dma_unmap_area) +#ifndef CONFIG_DMA_CACHE_RWFO + add r1, r1, r0 + teq r2, #DMA_TO_DEVICE + bne v6_dma_inv_range +#endif + mov pc, lr +ENDPROC(v6_dma_unmap_area) + __INITDATA .type v6_cache_fns, #object @@ -273,7 +318,7 @@ ENTRY(v6_cache_fns) .long v6_coherent_kern_range .long v6_coherent_user_range .long v6_flush_kern_dcache_area - .long v6_dma_inv_range - .long v6_dma_clean_range + .long v6_dma_map_area + .long v6_dma_unmap_area .long v6_dma_flush_range .size v6_cache_fns, . - v6_cache_fns diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S index 9073db849fb4..37c8157e116e 100644 --- a/arch/arm/mm/cache-v7.S +++ b/arch/arm/mm/cache-v7.S @@ -91,7 +91,11 @@ ENTRY(v7_flush_kern_cache_all) THUMB( stmfd sp!, {r4-r7, r9-r11, lr} ) bl v7_flush_dcache_all mov r0, #0 +#ifdef CONFIG_SMP + mcr p15, 0, r0, c7, c1, 0 @ invalidate I-cache inner shareable +#else mcr p15, 0, r0, c7, c5, 0 @ I+BTB cache invalidate +#endif ARM( ldmfd sp!, {r4-r5, r7, r9-r11, lr} ) THUMB( ldmfd sp!, {r4-r7, r9-r11, lr} ) mov pc, lr @@ -167,7 +171,11 @@ ENTRY(v7_coherent_user_range) cmp r0, r1 blo 1b mov r0, #0 +#ifdef CONFIG_SMP + mcr p15, 0, r0, c7, c1, 6 @ invalidate BTB Inner Shareable +#else mcr p15, 0, r0, c7, c5, 6 @ invalidate BTB +#endif dsb isb mov pc, lr @@ -216,7 +224,7 @@ ENDPROC(v7_flush_kern_dcache_area) * - start - virtual start address of region * - end - virtual end address of region */ -ENTRY(v7_dma_inv_range) +v7_dma_inv_range: dcache_line_size r2, r3 sub r3, r2, #1 tst r0, r3 @@ -240,7 +248,7 @@ ENDPROC(v7_dma_inv_range) * - start - virtual start address of region * - end - virtual end address of region */ -ENTRY(v7_dma_clean_range) +v7_dma_clean_range: dcache_line_size r2, r3 sub r3, r2, #1 bic r0, r0, r3 @@ -271,6 +279,32 @@ ENTRY(v7_dma_flush_range) mov pc, lr ENDPROC(v7_dma_flush_range) +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(v7_dma_map_area) + add r1, r1, r0 + teq r2, #DMA_FROM_DEVICE + beq v7_dma_inv_range + b v7_dma_clean_range +ENDPROC(v7_dma_map_area) + +/* + * dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(v7_dma_unmap_area) + add r1, r1, r0 + teq r2, #DMA_TO_DEVICE + bne v7_dma_inv_range + mov pc, lr +ENDPROC(v7_dma_unmap_area) + __INITDATA .type v7_cache_fns, #object @@ -281,7 +315,7 @@ ENTRY(v7_cache_fns) .long v7_coherent_kern_range .long v7_coherent_user_range .long v7_flush_kern_dcache_area - .long v7_dma_inv_range - .long v7_dma_clean_range + .long v7_dma_map_area + .long v7_dma_unmap_area .long v7_dma_flush_range .size v7_cache_fns, . - v7_cache_fns diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c index a9e22e31eaa1..b0ee9ba3cfab 100644 --- a/arch/arm/mm/context.c +++ b/arch/arm/mm/context.c @@ -10,12 +10,17 @@ #include <linux/init.h> #include <linux/sched.h> #include <linux/mm.h> +#include <linux/smp.h> +#include <linux/percpu.h> #include <asm/mmu_context.h> #include <asm/tlbflush.h> static DEFINE_SPINLOCK(cpu_asid_lock); unsigned int cpu_last_asid = ASID_FIRST_VERSION; +#ifdef CONFIG_SMP +DEFINE_PER_CPU(struct mm_struct *, current_mm); +#endif /* * We fork()ed a process, and we need a new context for the child @@ -26,13 +31,109 @@ unsigned int cpu_last_asid = ASID_FIRST_VERSION; void __init_new_context(struct task_struct *tsk, struct mm_struct *mm) { mm->context.id = 0; + spin_lock_init(&mm->context.id_lock); } +static void flush_context(void) +{ + /* set the reserved ASID before flushing the TLB */ + asm("mcr p15, 0, %0, c13, c0, 1\n" : : "r" (0)); + isb(); + local_flush_tlb_all(); + if (icache_is_vivt_asid_tagged()) { + __flush_icache_all(); + dsb(); + } +} + +#ifdef CONFIG_SMP + +static void set_mm_context(struct mm_struct *mm, unsigned int asid) +{ + unsigned long flags; + + /* + * Locking needed for multi-threaded applications where the + * same mm->context.id could be set from different CPUs during + * the broadcast. This function is also called via IPI so the + * mm->context.id_lock has to be IRQ-safe. + */ + spin_lock_irqsave(&mm->context.id_lock, flags); + if (likely((mm->context.id ^ cpu_last_asid) >> ASID_BITS)) { + /* + * Old version of ASID found. Set the new one and + * reset mm_cpumask(mm). + */ + mm->context.id = asid; + cpumask_clear(mm_cpumask(mm)); + } + spin_unlock_irqrestore(&mm->context.id_lock, flags); + + /* + * Set the mm_cpumask(mm) bit for the current CPU. + */ + cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm)); +} + +/* + * Reset the ASID on the current CPU. This function call is broadcast + * from the CPU handling the ASID rollover and holding cpu_asid_lock. + */ +static void reset_context(void *info) +{ + unsigned int asid; + unsigned int cpu = smp_processor_id(); + struct mm_struct *mm = per_cpu(current_mm, cpu); + + /* + * Check if a current_mm was set on this CPU as it might still + * be in the early booting stages and using the reserved ASID. + */ + if (!mm) + return; + + smp_rmb(); + asid = cpu_last_asid + cpu + 1; + + flush_context(); + set_mm_context(mm, asid); + + /* set the new ASID */ + asm("mcr p15, 0, %0, c13, c0, 1\n" : : "r" (mm->context.id)); + isb(); +} + +#else + +static inline void set_mm_context(struct mm_struct *mm, unsigned int asid) +{ + mm->context.id = asid; + cpumask_copy(mm_cpumask(mm), cpumask_of(smp_processor_id())); +} + +#endif + void __new_context(struct mm_struct *mm) { unsigned int asid; spin_lock(&cpu_asid_lock); +#ifdef CONFIG_SMP + /* + * Check the ASID again, in case the change was broadcast from + * another CPU before we acquired the lock. + */ + if (unlikely(((mm->context.id ^ cpu_last_asid) >> ASID_BITS) == 0)) { + cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm)); + spin_unlock(&cpu_asid_lock); + return; + } +#endif + /* + * At this point, it is guaranteed that the current mm (with + * an old ASID) isn't active on any other CPU since the ASIDs + * are changed simultaneously via IPI. + */ asid = ++cpu_last_asid; if (asid == 0) asid = cpu_last_asid = ASID_FIRST_VERSION; @@ -42,20 +143,15 @@ void __new_context(struct mm_struct *mm) * to start a new version and flush the TLB. */ if (unlikely((asid & ~ASID_MASK) == 0)) { - asid = ++cpu_last_asid; - /* set the reserved ASID before flushing the TLB */ - asm("mcr p15, 0, %0, c13, c0, 1 @ set reserved context ID\n" - : - : "r" (0)); - isb(); - flush_tlb_all(); - if (icache_is_vivt_asid_tagged()) { - __flush_icache_all(); - dsb(); - } + asid = cpu_last_asid + smp_processor_id() + 1; + flush_context(); +#ifdef CONFIG_SMP + smp_wmb(); + smp_call_function(reset_context, NULL, 1); +#endif + cpu_last_asid += NR_CPUS; } - spin_unlock(&cpu_asid_lock); - cpumask_copy(mm_cpumask(mm), cpumask_of(smp_processor_id())); - mm->context.id = asid; + set_mm_context(mm, asid); + spin_unlock(&cpu_asid_lock); } diff --git a/arch/arm/mm/copypage-fa.c b/arch/arm/mm/copypage-fa.c index b2a6008b0111..d2852e1635b1 100644 --- a/arch/arm/mm/copypage-fa.c +++ b/arch/arm/mm/copypage-fa.c @@ -40,7 +40,7 @@ fa_copy_user_page(void *kto, const void *kfrom) } void fa_copy_user_highpage(struct page *to, struct page *from, - unsigned long vaddr) + unsigned long vaddr, struct vm_area_struct *vma) { void *kto, *kfrom; diff --git a/arch/arm/mm/copypage-feroceon.c b/arch/arm/mm/copypage-feroceon.c index 70997d5bee2d..ac163de7dc01 100644 --- a/arch/arm/mm/copypage-feroceon.c +++ b/arch/arm/mm/copypage-feroceon.c @@ -18,7 +18,7 @@ feroceon_copy_user_page(void *kto, const void *kfrom) { asm("\ stmfd sp!, {r4-r9, lr} \n\ - mov ip, %0 \n\ + mov ip, %2 \n\ 1: mov lr, r1 \n\ ldmia r1!, {r2 - r9} \n\ pld [lr, #32] \n\ @@ -64,16 +64,17 @@ feroceon_copy_user_page(void *kto, const void *kfrom) mcr p15, 0, ip, c7, c10, 4 @ drain WB\n\ ldmfd sp!, {r4-r9, pc}" : - : "I" (PAGE_SIZE)); + : "r" (kto), "r" (kfrom), "I" (PAGE_SIZE)); } void feroceon_copy_user_highpage(struct page *to, struct page *from, - unsigned long vaddr) + unsigned long vaddr, struct vm_area_struct *vma) { void *kto, *kfrom; kto = kmap_atomic(to, KM_USER0); kfrom = kmap_atomic(from, KM_USER1); + flush_cache_page(vma, vaddr, page_to_pfn(from)); feroceon_copy_user_page(kto, kfrom); kunmap_atomic(kfrom, KM_USER1); kunmap_atomic(kto, KM_USER0); diff --git a/arch/arm/mm/copypage-v3.c b/arch/arm/mm/copypage-v3.c index de9c06854ad7..f72303e1d804 100644 --- a/arch/arm/mm/copypage-v3.c +++ b/arch/arm/mm/copypage-v3.c @@ -38,7 +38,7 @@ v3_copy_user_page(void *kto, const void *kfrom) } void v3_copy_user_highpage(struct page *to, struct page *from, - unsigned long vaddr) + unsigned long vaddr, struct vm_area_struct *vma) { void *kto, *kfrom; diff --git a/arch/arm/mm/copypage-v4mc.c b/arch/arm/mm/copypage-v4mc.c index 7370a7142b04..598c51ad5071 100644 --- a/arch/arm/mm/copypage-v4mc.c +++ b/arch/arm/mm/copypage-v4mc.c @@ -69,7 +69,7 @@ mc_copy_user_page(void *from, void *to) } void v4_mc_copy_user_highpage(struct page *to, struct page *from, - unsigned long vaddr) + unsigned long vaddr, struct vm_area_struct *vma) { void *kto = kmap_atomic(to, KM_USER1); diff --git a/arch/arm/mm/copypage-v4wb.c b/arch/arm/mm/copypage-v4wb.c index 9ab098414227..cb589cbb2b6c 100644 --- a/arch/arm/mm/copypage-v4wb.c +++ b/arch/arm/mm/copypage-v4wb.c @@ -27,7 +27,7 @@ v4wb_copy_user_page(void *kto, const void *kfrom) { asm("\ stmfd sp!, {r4, lr} @ 2\n\ - mov r2, %0 @ 1\n\ + mov r2, %2 @ 1\n\ ldmia r1!, {r3, r4, ip, lr} @ 4\n\ 1: mcr p15, 0, r0, c7, c6, 1 @ 1 invalidate D line\n\ stmia r0!, {r3, r4, ip, lr} @ 4\n\ @@ -44,16 +44,17 @@ v4wb_copy_user_page(void *kto, const void *kfrom) mcr p15, 0, r1, c7, c10, 4 @ 1 drain WB\n\ ldmfd sp!, {r4, pc} @ 3" : - : "I" (PAGE_SIZE / 64)); + : "r" (kto), "r" (kfrom), "I" (PAGE_SIZE / 64)); } void v4wb_copy_user_highpage(struct page *to, struct page *from, - unsigned long vaddr) + unsigned long vaddr, struct vm_area_struct *vma) { void *kto, *kfrom; kto = kmap_atomic(to, KM_USER0); kfrom = kmap_atomic(from, KM_USER1); + flush_cache_page(vma, vaddr, page_to_pfn(from)); v4wb_copy_user_page(kto, kfrom); kunmap_atomic(kfrom, KM_USER1); kunmap_atomic(kto, KM_USER0); diff --git a/arch/arm/mm/copypage-v4wt.c b/arch/arm/mm/copypage-v4wt.c index 300efafd6643..30c7d048a324 100644 --- a/arch/arm/mm/copypage-v4wt.c +++ b/arch/arm/mm/copypage-v4wt.c @@ -25,7 +25,7 @@ v4wt_copy_user_page(void *kto, const void *kfrom) { asm("\ stmfd sp!, {r4, lr} @ 2\n\ - mov r2, %0 @ 1\n\ + mov r2, %2 @ 1\n\ ldmia r1!, {r3, r4, ip, lr} @ 4\n\ 1: stmia r0!, {r3, r4, ip, lr} @ 4\n\ ldmia r1!, {r3, r4, ip, lr} @ 4+1\n\ @@ -40,11 +40,11 @@ v4wt_copy_user_page(void *kto, const void *kfrom) mcr p15, 0, r2, c7, c7, 0 @ flush ID cache\n\ ldmfd sp!, {r4, pc} @ 3" : - : "I" (PAGE_SIZE / 64)); + : "r" (kto), "r" (kfrom), "I" (PAGE_SIZE / 64)); } void v4wt_copy_user_highpage(struct page *to, struct page *from, - unsigned long vaddr) + unsigned long vaddr, struct vm_area_struct *vma) { void *kto, *kfrom; diff --git a/arch/arm/mm/copypage-v6.c b/arch/arm/mm/copypage-v6.c index 0fa1319273de..f55fa1044f72 100644 --- a/arch/arm/mm/copypage-v6.c +++ b/arch/arm/mm/copypage-v6.c @@ -34,21 +34,14 @@ static DEFINE_SPINLOCK(v6_lock); * attack the kernel's existing mapping of these pages. */ static void v6_copy_user_highpage_nonaliasing(struct page *to, - struct page *from, unsigned long vaddr) + struct page *from, unsigned long vaddr, struct vm_area_struct *vma) { void *kto, *kfrom; kfrom = kmap_atomic(from, KM_USER0); kto = kmap_atomic(to, KM_USER1); copy_page(kto, kfrom); -#ifdef CONFIG_HIGHMEM - /* - * kmap_atomic() doesn't set the page virtual address, and - * kunmap_atomic() takes care of cache flushing already. - */ - if (page_address(to) != NULL) -#endif - __cpuc_flush_dcache_area(kto, PAGE_SIZE); + __cpuc_flush_dcache_area(kto, PAGE_SIZE); kunmap_atomic(kto, KM_USER1); kunmap_atomic(kfrom, KM_USER0); } @@ -81,7 +74,7 @@ static void discard_old_kernel_data(void *kto) * Copy the page, taking account of the cache colour. */ static void v6_copy_user_highpage_aliasing(struct page *to, - struct page *from, unsigned long vaddr) + struct page *from, unsigned long vaddr, struct vm_area_struct *vma) { unsigned int offset = CACHE_COLOUR(vaddr); unsigned long kfrom, kto; diff --git a/arch/arm/mm/copypage-xsc3.c b/arch/arm/mm/copypage-xsc3.c index bc4525f5ab23..f9cde0702f1e 100644 --- a/arch/arm/mm/copypage-xsc3.c +++ b/arch/arm/mm/copypage-xsc3.c @@ -34,7 +34,7 @@ xsc3_mc_copy_user_page(void *kto, const void *kfrom) { asm("\ stmfd sp!, {r4, r5, lr} \n\ - mov lr, %0 \n\ + mov lr, %2 \n\ \n\ pld [r1, #0] \n\ pld [r1, #32] \n\ @@ -67,16 +67,17 @@ xsc3_mc_copy_user_page(void *kto, const void *kfrom) \n\ ldmfd sp!, {r4, r5, pc}" : - : "I" (PAGE_SIZE / 64 - 1)); + : "r" (kto), "r" (kfrom), "I" (PAGE_SIZE / 64 - 1)); } void xsc3_mc_copy_user_highpage(struct page *to, struct page *from, - unsigned long vaddr) + unsigned long vaddr, struct vm_area_struct *vma) { void *kto, *kfrom; kto = kmap_atomic(to, KM_USER0); kfrom = kmap_atomic(from, KM_USER1); + flush_cache_page(vma, vaddr, page_to_pfn(from)); xsc3_mc_copy_user_page(kto, kfrom); kunmap_atomic(kfrom, KM_USER1); kunmap_atomic(kto, KM_USER0); diff --git a/arch/arm/mm/copypage-xscale.c b/arch/arm/mm/copypage-xscale.c index 76824d3e966a..9920c0ae2096 100644 --- a/arch/arm/mm/copypage-xscale.c +++ b/arch/arm/mm/copypage-xscale.c @@ -91,7 +91,7 @@ mc_copy_user_page(void *from, void *to) } void xscale_mc_copy_user_highpage(struct page *to, struct page *from, - unsigned long vaddr) + unsigned long vaddr, struct vm_area_struct *vma) { void *kto = kmap_atomic(to, KM_USER1); diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 26325cb5d368..9e7742f0a102 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -11,7 +11,7 @@ */ #include <linux/module.h> #include <linux/mm.h> -#include <linux/slab.h> +#include <linux/gfp.h> #include <linux/errno.h> #include <linux/list.h> #include <linux/init.h> @@ -24,18 +24,6 @@ #include <asm/tlbflush.h> #include <asm/sizes.h> -/* Sanity check size */ -#if (CONSISTENT_DMA_SIZE % SZ_2M) -#error "CONSISTENT_DMA_SIZE must be multiple of 2MiB" -#endif - -#define CONSISTENT_END (0xffe00000) -#define CONSISTENT_BASE (CONSISTENT_END - CONSISTENT_DMA_SIZE) - -#define CONSISTENT_OFFSET(x) (((unsigned long)(x) - CONSISTENT_BASE) >> PAGE_SHIFT) -#define CONSISTENT_PTE_INDEX(x) (((unsigned long)(x) - CONSISTENT_BASE) >> PGDIR_SHIFT) -#define NUM_CONSISTENT_PTES (CONSISTENT_DMA_SIZE >> PGDIR_SHIFT) - static u64 get_coherent_dma_mask(struct device *dev) { u64 mask = ISA_DMA_THRESHOLD; @@ -126,6 +114,15 @@ static void __dma_free_buffer(struct page *page, size_t size) } #ifdef CONFIG_MMU +/* Sanity check size */ +#if (CONSISTENT_DMA_SIZE % SZ_2M) +#error "CONSISTENT_DMA_SIZE must be multiple of 2MiB" +#endif + +#define CONSISTENT_OFFSET(x) (((unsigned long)(x) - CONSISTENT_BASE) >> PAGE_SHIFT) +#define CONSISTENT_PTE_INDEX(x) (((unsigned long)(x) - CONSISTENT_BASE) >> PGDIR_SHIFT) +#define NUM_CONSISTENT_PTES (CONSISTENT_DMA_SIZE >> PGDIR_SHIFT) + /* * These are the page tables (2MB each) covering uncached, DMA consistent allocations */ @@ -404,78 +401,44 @@ EXPORT_SYMBOL(dma_free_coherent); * platforms with CONFIG_DMABOUNCE. * Use the driver DMA support - see dma-mapping.h (dma_sync_*) */ -void dma_cache_maint(const void *start, size_t size, int direction) +void ___dma_single_cpu_to_dev(const void *kaddr, size_t size, + enum dma_data_direction dir) { - void (*inner_op)(const void *, const void *); - void (*outer_op)(unsigned long, unsigned long); - - BUG_ON(!virt_addr_valid(start) || !virt_addr_valid(start + size - 1)); - - switch (direction) { - case DMA_FROM_DEVICE: /* invalidate only */ - inner_op = dmac_inv_range; - outer_op = outer_inv_range; - break; - case DMA_TO_DEVICE: /* writeback only */ - inner_op = dmac_clean_range; - outer_op = outer_clean_range; - break; - case DMA_BIDIRECTIONAL: /* writeback and invalidate */ - inner_op = dmac_flush_range; - outer_op = outer_flush_range; - break; - default: - BUG(); - } + unsigned long paddr; + + BUG_ON(!virt_addr_valid(kaddr) || !virt_addr_valid(kaddr + size - 1)); + + dmac_map_area(kaddr, size, dir); - inner_op(start, start + size); - outer_op(__pa(start), __pa(start) + size); + paddr = __pa(kaddr); + if (dir == DMA_FROM_DEVICE) { + outer_inv_range(paddr, paddr + size); + } else { + outer_clean_range(paddr, paddr + size); + } + /* FIXME: non-speculating: flush on bidirectional mappings? */ } -EXPORT_SYMBOL(dma_cache_maint); +EXPORT_SYMBOL(___dma_single_cpu_to_dev); -static void dma_cache_maint_contiguous(struct page *page, unsigned long offset, - size_t size, int direction) +void ___dma_single_dev_to_cpu(const void *kaddr, size_t size, + enum dma_data_direction dir) { - void *vaddr; - unsigned long paddr; - void (*inner_op)(const void *, const void *); - void (*outer_op)(unsigned long, unsigned long); - - switch (direction) { - case DMA_FROM_DEVICE: /* invalidate only */ - inner_op = dmac_inv_range; - outer_op = outer_inv_range; - break; - case DMA_TO_DEVICE: /* writeback only */ - inner_op = dmac_clean_range; - outer_op = outer_clean_range; - break; - case DMA_BIDIRECTIONAL: /* writeback and invalidate */ - inner_op = dmac_flush_range; - outer_op = outer_flush_range; - break; - default: - BUG(); - } + BUG_ON(!virt_addr_valid(kaddr) || !virt_addr_valid(kaddr + size - 1)); - if (!PageHighMem(page)) { - vaddr = page_address(page) + offset; - inner_op(vaddr, vaddr + size); - } else { - vaddr = kmap_high_get(page); - if (vaddr) { - vaddr += offset; - inner_op(vaddr, vaddr + size); - kunmap_high(page); - } + /* FIXME: non-speculating: not required */ + /* don't bother invalidating if DMA to device */ + if (dir != DMA_TO_DEVICE) { + unsigned long paddr = __pa(kaddr); + outer_inv_range(paddr, paddr + size); } - paddr = page_to_phys(page) + offset; - outer_op(paddr, paddr + size); + dmac_unmap_area(kaddr, size, dir); } +EXPORT_SYMBOL(___dma_single_dev_to_cpu); -void dma_cache_maint_page(struct page *page, unsigned long offset, - size_t size, int dir) +static void dma_cache_maint_page(struct page *page, unsigned long offset, + size_t size, enum dma_data_direction dir, + void (*op)(const void *, size_t, int)) { /* * A single sg entry may refer to multiple physically contiguous @@ -486,20 +449,67 @@ void dma_cache_maint_page(struct page *page, unsigned long offset, size_t left = size; do { size_t len = left; - if (PageHighMem(page) && len + offset > PAGE_SIZE) { - if (offset >= PAGE_SIZE) { - page += offset / PAGE_SIZE; - offset %= PAGE_SIZE; + void *vaddr; + + if (PageHighMem(page)) { + if (len + offset > PAGE_SIZE) { + if (offset >= PAGE_SIZE) { + page += offset / PAGE_SIZE; + offset %= PAGE_SIZE; + } + len = PAGE_SIZE - offset; } - len = PAGE_SIZE - offset; + vaddr = kmap_high_get(page); + if (vaddr) { + vaddr += offset; + op(vaddr, len, dir); + kunmap_high(page); + } else if (cache_is_vipt()) { + pte_t saved_pte; + vaddr = kmap_high_l1_vipt(page, &saved_pte); + op(vaddr + offset, len, dir); + kunmap_high_l1_vipt(page, saved_pte); + } + } else { + vaddr = page_address(page) + offset; + op(vaddr, len, dir); } - dma_cache_maint_contiguous(page, offset, len, dir); offset = 0; page++; left -= len; } while (left); } -EXPORT_SYMBOL(dma_cache_maint_page); + +void ___dma_page_cpu_to_dev(struct page *page, unsigned long off, + size_t size, enum dma_data_direction dir) +{ + unsigned long paddr; + + dma_cache_maint_page(page, off, size, dir, dmac_map_area); + + paddr = page_to_phys(page) + off; + if (dir == DMA_FROM_DEVICE) { + outer_inv_range(paddr, paddr + size); + } else { + outer_clean_range(paddr, paddr + size); + } + /* FIXME: non-speculating: flush on bidirectional mappings? */ +} +EXPORT_SYMBOL(___dma_page_cpu_to_dev); + +void ___dma_page_dev_to_cpu(struct page *page, unsigned long off, + size_t size, enum dma_data_direction dir) +{ + unsigned long paddr = page_to_phys(page) + off; + + /* FIXME: non-speculating: not required */ + /* don't bother invalidating if DMA to device */ + if (dir != DMA_TO_DEVICE) + outer_inv_range(paddr, paddr + size); + + dma_cache_maint_page(page, off, size, dir, dmac_unmap_area); +} +EXPORT_SYMBOL(___dma_page_dev_to_cpu); /** * dma_map_sg - map a set of SG buffers for streaming mode DMA @@ -573,8 +583,12 @@ void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int i; for_each_sg(sg, s, nents, i) { - dmabounce_sync_for_cpu(dev, sg_dma_address(s), 0, - sg_dma_len(s), dir); + if (!dmabounce_sync_for_cpu(dev, sg_dma_address(s), 0, + sg_dma_len(s), dir)) + continue; + + __dma_page_dev_to_cpu(sg_page(s), s->offset, + s->length, dir); } } EXPORT_SYMBOL(dma_sync_sg_for_cpu); @@ -597,9 +611,8 @@ void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, sg_dma_len(s), dir)) continue; - if (!arch_is_coherent()) - dma_cache_maint_page(sg_page(s), s->offset, - s->length, dir); + __dma_page_cpu_to_dev(sg_page(s), s->offset, + s->length, dir); } } EXPORT_SYMBOL(dma_sync_sg_for_device); diff --git a/arch/arm/mm/fault-armv.c b/arch/arm/mm/fault-armv.c index 56ee15321b00..9b906dec1ca1 100644 --- a/arch/arm/mm/fault-armv.c +++ b/arch/arm/mm/fault-armv.c @@ -16,6 +16,7 @@ #include <linux/vmalloc.h> #include <linux/init.h> #include <linux/pagemap.h> +#include <linux/gfp.h> #include <asm/bugs.h> #include <asm/cacheflush.h> @@ -36,28 +37,12 @@ static unsigned long shared_pte_mask = L_PTE_MT_BUFFERABLE; * Therefore those configurations which might call adjust_pte (those * without CONFIG_CPU_CACHE_VIPT) cannot support split page_table_lock. */ -static int adjust_pte(struct vm_area_struct *vma, unsigned long address) +static int do_adjust_pte(struct vm_area_struct *vma, unsigned long address, + unsigned long pfn, pte_t *ptep) { - pgd_t *pgd; - pmd_t *pmd; - pte_t *pte, entry; + pte_t entry = *ptep; int ret; - pgd = pgd_offset(vma->vm_mm, address); - if (pgd_none(*pgd)) - goto no_pgd; - if (pgd_bad(*pgd)) - goto bad_pgd; - - pmd = pmd_offset(pgd, address); - if (pmd_none(*pmd)) - goto no_pmd; - if (pmd_bad(*pmd)) - goto bad_pmd; - - pte = pte_offset_map(pmd, address); - entry = *pte; - /* * If this page is present, it's actually being shared. */ @@ -68,33 +53,55 @@ static int adjust_pte(struct vm_area_struct *vma, unsigned long address) * fault (ie, is old), we can safely ignore any issues. */ if (ret && (pte_val(entry) & L_PTE_MT_MASK) != shared_pte_mask) { - unsigned long pfn = pte_pfn(entry); flush_cache_page(vma, address, pfn); outer_flush_range((pfn << PAGE_SHIFT), (pfn << PAGE_SHIFT) + PAGE_SIZE); pte_val(entry) &= ~L_PTE_MT_MASK; pte_val(entry) |= shared_pte_mask; - set_pte_at(vma->vm_mm, address, pte, entry); + set_pte_at(vma->vm_mm, address, ptep, entry); flush_tlb_page(vma, address); } - pte_unmap(pte); + return ret; +} + +static int adjust_pte(struct vm_area_struct *vma, unsigned long address, + unsigned long pfn) +{ + spinlock_t *ptl; + pgd_t *pgd; + pmd_t *pmd; + pte_t *pte; + int ret; + + pgd = pgd_offset(vma->vm_mm, address); + if (pgd_none_or_clear_bad(pgd)) + return 0; + + pmd = pmd_offset(pgd, address); + if (pmd_none_or_clear_bad(pmd)) + return 0; -bad_pgd: - pgd_ERROR(*pgd); - pgd_clear(pgd); -no_pgd: - return 0; - -bad_pmd: - pmd_ERROR(*pmd); - pmd_clear(pmd); -no_pmd: - return 0; + /* + * This is called while another page table is mapped, so we + * must use the nested version. This also means we need to + * open-code the spin-locking. + */ + ptl = pte_lockptr(vma->vm_mm, pmd); + pte = pte_offset_map_nested(pmd, address); + spin_lock(ptl); + + ret = do_adjust_pte(vma, address, pfn, pte); + + spin_unlock(ptl); + pte_unmap_nested(pte); + + return ret; } static void -make_coherent(struct address_space *mapping, struct vm_area_struct *vma, unsigned long addr, unsigned long pfn) +make_coherent(struct address_space *mapping, struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, unsigned long pfn) { struct mm_struct *mm = vma->vm_mm; struct vm_area_struct *mpnt; @@ -122,13 +129,11 @@ make_coherent(struct address_space *mapping, struct vm_area_struct *vma, unsigne if (!(mpnt->vm_flags & VM_MAYSHARE)) continue; offset = (pgoff - mpnt->vm_pgoff) << PAGE_SHIFT; - aliases += adjust_pte(mpnt, mpnt->vm_start + offset); + aliases += adjust_pte(mpnt, mpnt->vm_start + offset, pfn); } flush_dcache_mmap_unlock(mapping); if (aliases) - adjust_pte(vma, addr); - else - flush_cache_page(vma, addr, pfn); + do_adjust_pte(vma, addr, pfn, ptep); } /* @@ -144,9 +149,10 @@ make_coherent(struct address_space *mapping, struct vm_area_struct *vma, unsigne * * Note that the pte lock will be held. */ -void update_mmu_cache(struct vm_area_struct *vma, unsigned long addr, pte_t pte) +void update_mmu_cache(struct vm_area_struct *vma, unsigned long addr, + pte_t *ptep) { - unsigned long pfn = pte_pfn(pte); + unsigned long pfn = pte_pfn(*ptep); struct address_space *mapping; struct page *page; @@ -168,7 +174,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long addr, pte_t pte) #endif if (mapping) { if (cache_is_vivt()) - make_coherent(mapping, vma, addr, pfn); + make_coherent(mapping, vma, addr, ptep, pfn); else if (vma->vm_flags & VM_EXEC) __flush_icache_all(); } diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index 10e06801afb3..cbfb2edcf7d1 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -18,6 +18,7 @@ #include <linux/page-flags.h> #include <linux/sched.h> #include <linux/highmem.h> +#include <linux/perf_event.h> #include <asm/system.h> #include <asm/pgtable.h> @@ -302,6 +303,12 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) fault = __do_page_fault(mm, addr, fsr, tsk); up_read(&mm->mmap_sem); + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, addr); + if (fault & VM_FAULT_MAJOR) + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, regs, addr); + else if (fault & VM_FAULT_MINOR) + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, regs, addr); + /* * Handle the "normal" case first - VM_FAULT_MAJOR / VM_FAULT_MINOR */ @@ -386,6 +393,9 @@ do_translation_fault(unsigned long addr, unsigned int fsr, if (addr < TASK_SIZE) return do_page_fault(addr, fsr, regs); + if (user_mode(regs)) + goto bad_area; + index = pgd_index(addr); /* @@ -456,7 +466,12 @@ static struct fsr_info { { do_bad, SIGILL, BUS_ADRALN, "alignment exception" }, { do_bad, SIGKILL, 0, "terminal exception" }, { do_bad, SIGILL, BUS_ADRALN, "alignment exception" }, +/* Do we need runtime check ? */ +#if __LINUX_ARM_ARCH__ < 6 { do_bad, SIGBUS, 0, "external abort on linefetch" }, +#else + { do_translation_fault, SIGSEGV, SEGV_MAPERR, "I-cache maintenance fault" }, +#endif { do_translation_fault, SIGSEGV, SEGV_MAPERR, "section translation fault" }, { do_bad, SIGBUS, 0, "external abort on linefetch" }, { do_page_fault, SIGSEGV, SEGV_MAPERR, "page translation fault" }, diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c index 6f3a4b7a3b82..c6844cb9b508 100644 --- a/arch/arm/mm/flush.c +++ b/arch/arm/mm/flush.c @@ -13,6 +13,8 @@ #include <asm/cacheflush.h> #include <asm/cachetype.h> +#include <asm/highmem.h> +#include <asm/smp_plat.h> #include <asm/system.h> #include <asm/tlbflush.h> @@ -87,13 +89,26 @@ void flush_cache_page(struct vm_area_struct *vma, unsigned long user_addr, unsig if (vma->vm_flags & VM_EXEC && icache_is_vivt_asid_tagged()) __flush_icache_all(); } +#else +#define flush_pfn_alias(pfn,vaddr) do { } while (0) +#endif + +#ifdef CONFIG_SMP +static void flush_ptrace_access_other(void *args) +{ + __flush_icache_all(); +} +#endif +static void flush_ptrace_access(struct vm_area_struct *vma, struct page *page, - unsigned long uaddr, void *kaddr, - unsigned long len, int write) + unsigned long uaddr, void *kaddr, unsigned long len) { if (cache_is_vivt()) { - vivt_flush_ptrace_access(vma, page, uaddr, kaddr, len, write); + if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(vma->vm_mm))) { + unsigned long addr = (unsigned long)kaddr; + __cpuc_coherent_kern_range(addr, addr + len); + } return; } @@ -104,34 +119,59 @@ void flush_ptrace_access(struct vm_area_struct *vma, struct page *page, } /* VIPT non-aliasing cache */ - if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(vma->vm_mm)) && - vma->vm_flags & VM_EXEC) { + if (vma->vm_flags & VM_EXEC) { unsigned long addr = (unsigned long)kaddr; - /* only flushing the kernel mapping on non-aliasing VIPT */ __cpuc_coherent_kern_range(addr, addr + len); +#ifdef CONFIG_SMP + if (cache_ops_need_broadcast()) + smp_call_function(flush_ptrace_access_other, + NULL, 1); +#endif } } -#else -#define flush_pfn_alias(pfn,vaddr) do { } while (0) + +/* + * Copy user data from/to a page which is mapped into a different + * processes address space. Really, we want to allow our "user + * space" model to handle this. + * + * Note that this code needs to run on the current CPU. + */ +void copy_to_user_page(struct vm_area_struct *vma, struct page *page, + unsigned long uaddr, void *dst, const void *src, + unsigned long len) +{ +#ifdef CONFIG_SMP + preempt_disable(); #endif + memcpy(dst, src, len); + flush_ptrace_access(vma, page, uaddr, dst, len); +#ifdef CONFIG_SMP + preempt_enable(); +#endif +} void __flush_dcache_page(struct address_space *mapping, struct page *page) { - void *addr = page_address(page); - /* * Writeback any data associated with the kernel mapping of this * page. This ensures that data in the physical page is mutually * coherent with the kernels mapping. */ -#ifdef CONFIG_HIGHMEM - /* - * kmap_atomic() doesn't set the page virtual address, and - * kunmap_atomic() takes care of cache flushing already. - */ - if (addr) -#endif - __cpuc_flush_dcache_area(addr, PAGE_SIZE); + if (!PageHighMem(page)) { + __cpuc_flush_dcache_area(page_address(page), PAGE_SIZE); + } else { + void *addr = kmap_high_get(page); + if (addr) { + __cpuc_flush_dcache_area(addr, PAGE_SIZE); + kunmap_high(page); + } else if (cache_is_vipt()) { + pte_t saved_pte; + addr = kmap_high_l1_vipt(page, &saved_pte); + __cpuc_flush_dcache_area(addr, PAGE_SIZE); + kunmap_high_l1_vipt(page, saved_pte); + } + } /* * If this is a page cache page, and we have an aliasing VIPT cache, diff --git a/arch/arm/mm/highmem.c b/arch/arm/mm/highmem.c index 2be1ec7c1b41..6ab244062b4a 100644 --- a/arch/arm/mm/highmem.c +++ b/arch/arm/mm/highmem.c @@ -48,7 +48,16 @@ void *kmap_atomic(struct page *page, enum km_type type) debug_kmap_atomic(type); - kmap = kmap_high_get(page); +#ifdef CONFIG_DEBUG_HIGHMEM + /* + * There is no cache coherency issue when non VIVT, so force the + * dedicated kmap usage for better debugging purposes in that case. + */ + if (!cache_is_vivt()) + kmap = NULL; + else +#endif + kmap = kmap_high_get(page); if (kmap) return kmap; @@ -79,7 +88,8 @@ void kunmap_atomic(void *kvaddr, enum km_type type) unsigned int idx = type + KM_TYPE_NR * smp_processor_id(); if (kvaddr >= (void *)FIXADDR_START) { - __cpuc_flush_dcache_area((void *)vaddr, PAGE_SIZE); + if (cache_is_vivt()) + __cpuc_flush_dcache_area((void *)vaddr, PAGE_SIZE); #ifdef CONFIG_DEBUG_HIGHMEM BUG_ON(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx)); set_pte_ext(TOP_PTE(vaddr), __pte(0), 0); @@ -124,3 +134,90 @@ struct page *kmap_atomic_to_page(const void *ptr) pte = TOP_PTE(vaddr); return pte_page(*pte); } + +#ifdef CONFIG_CPU_CACHE_VIPT + +#include <linux/percpu.h> + +/* + * The VIVT cache of a highmem page is always flushed before the page + * is unmapped. Hence unmapped highmem pages need no cache maintenance + * in that case. + * + * However unmapped pages may still be cached with a VIPT cache, and + * it is not possible to perform cache maintenance on them using physical + * addresses unfortunately. So we have no choice but to set up a temporary + * virtual mapping for that purpose. + * + * Yet this VIPT cache maintenance may be triggered from DMA support + * functions which are possibly called from interrupt context. As we don't + * want to keep interrupt disabled all the time when such maintenance is + * taking place, we therefore allow for some reentrancy by preserving and + * restoring the previous fixmap entry before the interrupted context is + * resumed. If the reentrancy depth is 0 then there is no need to restore + * the previous fixmap, and leaving the current one in place allow it to + * be reused the next time without a TLB flush (common with DMA). + */ + +static DEFINE_PER_CPU(int, kmap_high_l1_vipt_depth); + +void *kmap_high_l1_vipt(struct page *page, pte_t *saved_pte) +{ + unsigned int idx, cpu; + int *depth; + unsigned long vaddr, flags; + pte_t pte, *ptep; + + if (!in_interrupt()) + preempt_disable(); + + cpu = smp_processor_id(); + depth = &per_cpu(kmap_high_l1_vipt_depth, cpu); + + idx = KM_L1_CACHE + KM_TYPE_NR * cpu; + vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); + ptep = TOP_PTE(vaddr); + pte = mk_pte(page, kmap_prot); + + raw_local_irq_save(flags); + (*depth)++; + if (pte_val(*ptep) == pte_val(pte)) { + *saved_pte = pte; + } else { + *saved_pte = *ptep; + set_pte_ext(ptep, pte, 0); + local_flush_tlb_kernel_page(vaddr); + } + raw_local_irq_restore(flags); + + return (void *)vaddr; +} + +void kunmap_high_l1_vipt(struct page *page, pte_t saved_pte) +{ + unsigned int idx, cpu = smp_processor_id(); + int *depth = &per_cpu(kmap_high_l1_vipt_depth, cpu); + unsigned long vaddr, flags; + pte_t pte, *ptep; + + idx = KM_L1_CACHE + KM_TYPE_NR * cpu; + vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); + ptep = TOP_PTE(vaddr); + pte = mk_pte(page, kmap_prot); + + BUG_ON(pte_val(*ptep) != pte_val(pte)); + BUG_ON(*depth <= 0); + + raw_local_irq_save(flags); + (*depth)--; + if (*depth != 0 && pte_val(pte) != pte_val(saved_pte)) { + set_pte_ext(ptep, saved_pte, 0); + local_flush_tlb_kernel_page(vaddr); + } + raw_local_irq_restore(flags); + + if (!in_interrupt()) + preempt_enable(); +} + +#endif /* CONFIG_CPU_CACHE_VIPT */ diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index a04ffbbbe253..f6a999465323 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -15,14 +15,15 @@ #include <linux/mman.h> #include <linux/nodemask.h> #include <linux/initrd.h> -#include <linux/sort.h> #include <linux/highmem.h> +#include <linux/gfp.h> #include <asm/mach-types.h> #include <asm/sections.h> #include <asm/setup.h> #include <asm/sizes.h> #include <asm/tlb.h> +#include <asm/fixmap.h> #include <asm/mach/arch.h> #include <asm/mach/map.h> @@ -32,19 +33,21 @@ static unsigned long phys_initrd_start __initdata = 0; static unsigned long phys_initrd_size __initdata = 0; -static void __init early_initrd(char **p) +static int __init early_initrd(char *p) { unsigned long start, size; + char *endp; - start = memparse(*p, p); - if (**p == ',') { - size = memparse((*p) + 1, p); + start = memparse(p, &endp); + if (*endp == ',') { + size = memparse(endp + 1, NULL); phys_initrd_start = start; phys_initrd_size = size; } + return 0; } -__early_param("initrd=", early_initrd); +early_param("initrd", early_initrd); static int __init parse_tag_initrd(const struct tag *tag) { @@ -82,9 +85,6 @@ void show_mem(void) printk("Mem-info:\n"); show_free_areas(); for_each_online_node(node) { - pg_data_t *n = NODE_DATA(node); - struct page *map = pgdat_page_nr(n, 0) - n->node_start_pfn; - for_each_nodebank (i,mi,node) { struct membank *bank = &mi->bank[i]; unsigned int pfn1, pfn2; @@ -93,8 +93,8 @@ void show_mem(void) pfn1 = bank_pfn_start(bank); pfn2 = bank_pfn_end(bank); - page = map + pfn1; - end = map + pfn2; + page = pfn_to_page(pfn1); + end = pfn_to_page(pfn2 - 1) + 1; do { total++; @@ -223,20 +223,6 @@ static int __init check_initrd(struct meminfo *mi) return initrd_node; } -static inline void map_memory_bank(struct membank *bank) -{ -#ifdef CONFIG_MMU - struct map_desc map; - - map.pfn = bank_pfn_start(bank); - map.virtual = __phys_to_virt(bank_phys_start(bank)); - map.length = bank_phys_size(bank); - map.type = MT_MEMORY; - - create_mapping(&map); -#endif -} - static void __init bootmem_init_node(int node, struct meminfo *mi, unsigned long start_pfn, unsigned long end_pfn) { @@ -246,16 +232,6 @@ static void __init bootmem_init_node(int node, struct meminfo *mi, int i; /* - * Map the memory banks for this node. - */ - for_each_nodebank(i, mi, node) { - struct membank *bank = &mi->bank[i]; - - if (!bank->highmem) - map_memory_bank(bank); - } - - /* * Allocate the bootmem bitmap page. */ boot_pages = bootmem_bootmap_pages(end_pfn - start_pfn); @@ -384,21 +360,12 @@ static void arm_memory_present(struct meminfo *mi, int node) } #endif -static int __init meminfo_cmp(const void *_a, const void *_b) -{ - const struct membank *a = _a, *b = _b; - long cmp = bank_pfn_start(a) - bank_pfn_start(b); - return cmp < 0 ? -1 : cmp > 0 ? 1 : 0; -} - void __init bootmem_init(void) { struct meminfo *mi = &meminfo; unsigned long min, max_low, max_high; int node, initrd_node; - sort(&mi->bank, mi->nr_banks, sizeof(mi->bank[0]), meminfo_cmp, NULL); - /* * Locate which node contains the ramdisk image, if any. */ @@ -560,7 +527,7 @@ static void __init free_unused_memmap_node(int node, struct meminfo *mi) */ void __init mem_init(void) { - unsigned int codesize, datasize, initsize; + unsigned long reserved_pages, free_pages; int i, node; #ifndef CONFIG_DISCONTIGMEM @@ -596,6 +563,30 @@ void __init mem_init(void) totalram_pages += totalhigh_pages; #endif + reserved_pages = free_pages = 0; + + for_each_online_node(node) { + for_each_nodebank(i, &meminfo, node) { + struct membank *bank = &meminfo.bank[i]; + unsigned int pfn1, pfn2; + struct page *page, *end; + + pfn1 = bank_pfn_start(bank); + pfn2 = bank_pfn_end(bank); + + page = pfn_to_page(pfn1); + end = pfn_to_page(pfn2 - 1) + 1; + + do { + if (PageReserved(page)) + reserved_pages++; + else if (!page_count(page)) + free_pages++; + page++; + } while (page < end); + } + } + /* * Since our memory may not be contiguous, calculate the * real number of pages we have in this system @@ -608,16 +599,71 @@ void __init mem_init(void) } printk(" = %luMB total\n", num_physpages >> (20 - PAGE_SHIFT)); - codesize = _etext - _text; - datasize = _end - _data; - initsize = __init_end - __init_begin; - - printk(KERN_NOTICE "Memory: %luKB available (%dK code, " - "%dK data, %dK init, %luK highmem)\n", - nr_free_pages() << (PAGE_SHIFT-10), codesize >> 10, - datasize >> 10, initsize >> 10, + printk(KERN_NOTICE "Memory: %luk/%luk available, %luk reserved, %luK highmem\n", + nr_free_pages() << (PAGE_SHIFT-10), + free_pages << (PAGE_SHIFT-10), + reserved_pages << (PAGE_SHIFT-10), totalhigh_pages << (PAGE_SHIFT-10)); +#define MLK(b, t) b, t, ((t) - (b)) >> 10 +#define MLM(b, t) b, t, ((t) - (b)) >> 20 +#define MLK_ROUNDUP(b, t) b, t, DIV_ROUND_UP(((t) - (b)), SZ_1K) + + printk(KERN_NOTICE "Virtual kernel memory layout:\n" + " vector : 0x%08lx - 0x%08lx (%4ld kB)\n" + " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n" +#ifdef CONFIG_MMU + " DMA : 0x%08lx - 0x%08lx (%4ld MB)\n" +#endif + " vmalloc : 0x%08lx - 0x%08lx (%4ld MB)\n" + " lowmem : 0x%08lx - 0x%08lx (%4ld MB)\n" +#ifdef CONFIG_HIGHMEM + " pkmap : 0x%08lx - 0x%08lx (%4ld MB)\n" +#endif + " modules : 0x%08lx - 0x%08lx (%4ld MB)\n" + " .init : 0x%p" " - 0x%p" " (%4d kB)\n" + " .text : 0x%p" " - 0x%p" " (%4d kB)\n" + " .data : 0x%p" " - 0x%p" " (%4d kB)\n", + + MLK(UL(CONFIG_VECTORS_BASE), UL(CONFIG_VECTORS_BASE) + + (PAGE_SIZE)), + MLK(FIXADDR_START, FIXADDR_TOP), +#ifdef CONFIG_MMU + MLM(CONSISTENT_BASE, CONSISTENT_END), +#endif + MLM(VMALLOC_START, VMALLOC_END), + MLM(PAGE_OFFSET, (unsigned long)high_memory), +#ifdef CONFIG_HIGHMEM + MLM(PKMAP_BASE, (PKMAP_BASE) + (LAST_PKMAP) * + (PAGE_SIZE)), +#endif + MLM(MODULES_VADDR, MODULES_END), + + MLK_ROUNDUP(__init_begin, __init_end), + MLK_ROUNDUP(_text, _etext), + MLK_ROUNDUP(_data, _edata)); + +#undef MLK +#undef MLM +#undef MLK_ROUNDUP + + /* + * Check boundaries twice: Some fundamental inconsistencies can + * be detected at build time already. + */ +#ifdef CONFIG_MMU + BUILD_BUG_ON(VMALLOC_END > CONSISTENT_BASE); + BUG_ON(VMALLOC_END > CONSISTENT_BASE); + + BUILD_BUG_ON(TASK_SIZE > MODULES_VADDR); + BUG_ON(TASK_SIZE > MODULES_VADDR); +#endif + +#ifdef CONFIG_HIGHMEM + BUILD_BUG_ON(PKMAP_BASE + LAST_PKMAP * PAGE_SIZE > PAGE_OFFSET); + BUG_ON(PKMAP_BASE + LAST_PKMAP * PAGE_SIZE > PAGE_OFFSET); +#endif + if (PAGE_SIZE >= 16384 && num_physpages <= 128) { extern int sysctl_overcommit_memory; /* @@ -632,10 +678,10 @@ void __init mem_init(void) void free_initmem(void) { #ifdef CONFIG_HAVE_TCM - extern char *__tcm_start, *__tcm_end; + extern char __tcm_start, __tcm_end; - totalram_pages += free_area(__phys_to_pfn(__pa(__tcm_start)), - __phys_to_pfn(__pa(__tcm_end)), + totalram_pages += free_area(__phys_to_pfn(__pa(&__tcm_start)), + __phys_to_pfn(__pa(&__tcm_end)), "TCM link"); #endif diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c index 0ab75c60f7cf..28c8b950ef04 100644 --- a/arch/arm/mm/ioremap.c +++ b/arch/arm/mm/ioremap.c @@ -139,8 +139,8 @@ void __check_kvm_seq(struct mm_struct *mm) * which requires the new ioremap'd region to be referenced, the CPU will * reference the _old_ region. * - * Note that get_vm_area() allocates a guard 4K page, so we need to mask - * the size back to 1MB aligned or we will overflow in the loop below. + * Note that get_vm_area_caller() allocates a guard 4K page, so we need to + * mask the size back to 1MB aligned or we will overflow in the loop below. */ static void unmap_area_sections(unsigned long virt, unsigned long size) { @@ -254,22 +254,8 @@ remap_area_supersections(unsigned long virt, unsigned long pfn, } #endif - -/* - * Remap an arbitrary physical address space into the kernel virtual - * address space. Needed when the kernel wants to access high addresses - * directly. - * - * NOTE! We need to allow non-page-aligned mappings too: we will obviously - * have to convert them into an offset in a page-aligned mapping, but the - * caller shouldn't need to know that small detail. - * - * 'flags' are the extra L_PTE_ flags that you want to specify for this - * mapping. See <asm/pgtable.h> for more information. - */ -void __iomem * -__arm_ioremap_pfn(unsigned long pfn, unsigned long offset, size_t size, - unsigned int mtype) +void __iomem * __arm_ioremap_pfn_caller(unsigned long pfn, + unsigned long offset, size_t size, unsigned int mtype, void *caller) { const struct mem_type *type; int err; @@ -291,7 +277,7 @@ __arm_ioremap_pfn(unsigned long pfn, unsigned long offset, size_t size, */ size = PAGE_ALIGN(offset + size); - area = get_vm_area(size, VM_IOREMAP); + area = get_vm_area_caller(size, VM_IOREMAP, caller); if (!area) return NULL; addr = (unsigned long)area->addr; @@ -318,10 +304,9 @@ __arm_ioremap_pfn(unsigned long pfn, unsigned long offset, size_t size, flush_cache_vmap(addr, addr + size); return (void __iomem *) (offset + addr); } -EXPORT_SYMBOL(__arm_ioremap_pfn); -void __iomem * -__arm_ioremap(unsigned long phys_addr, size_t size, unsigned int mtype) +void __iomem *__arm_ioremap_caller(unsigned long phys_addr, size_t size, + unsigned int mtype, void *caller) { unsigned long last_addr; unsigned long offset = phys_addr & ~PAGE_MASK; @@ -334,7 +319,33 @@ __arm_ioremap(unsigned long phys_addr, size_t size, unsigned int mtype) if (!size || last_addr < phys_addr) return NULL; - return __arm_ioremap_pfn(pfn, offset, size, mtype); + return __arm_ioremap_pfn_caller(pfn, offset, size, mtype, + caller); +} + +/* + * Remap an arbitrary physical address space into the kernel virtual + * address space. Needed when the kernel wants to access high addresses + * directly. + * + * NOTE! We need to allow non-page-aligned mappings too: we will obviously + * have to convert them into an offset in a page-aligned mapping, but the + * caller shouldn't need to know that small detail. + */ +void __iomem * +__arm_ioremap_pfn(unsigned long pfn, unsigned long offset, size_t size, + unsigned int mtype) +{ + return __arm_ioremap_pfn_caller(pfn, offset, size, mtype, + __builtin_return_address(0)); +} +EXPORT_SYMBOL(__arm_ioremap_pfn); + +void __iomem * +__arm_ioremap(unsigned long phys_addr, size_t size, unsigned int mtype) +{ + return __arm_ioremap_caller(phys_addr, size, mtype, + __builtin_return_address(0)); } EXPORT_SYMBOL(__arm_ioremap); diff --git a/arch/arm/mm/mm.h b/arch/arm/mm/mm.h index a888363398f8..815d08eecbb0 100644 --- a/arch/arm/mm/mm.h +++ b/arch/arm/mm/mm.h @@ -28,10 +28,7 @@ extern void __flush_dcache_page(struct address_space *mapping, struct page *page #endif -struct map_desc; -struct meminfo; struct pglist_data; -void __init create_mapping(struct map_desc *md); void __init bootmem_init(void); void reserve_node_zero(struct pglist_data *pgdat); diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 761ffede6a23..285894171186 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -14,6 +14,7 @@ #include <linux/bootmem.h> #include <linux/mman.h> #include <linux/nodemask.h> +#include <linux/sort.h> #include <asm/cputype.h> #include <asm/mach-types.h> @@ -100,18 +101,17 @@ static struct cachepolicy cache_policies[] __initdata = { * writebuffer to be turned off. (Note: the write * buffer should not be on and the cache off). */ -static void __init early_cachepolicy(char **p) +static int __init early_cachepolicy(char *p) { int i; for (i = 0; i < ARRAY_SIZE(cache_policies); i++) { int len = strlen(cache_policies[i].policy); - if (memcmp(*p, cache_policies[i].policy, len) == 0) { + if (memcmp(p, cache_policies[i].policy, len) == 0) { cachepolicy = i; cr_alignment &= ~cache_policies[i].cr_mask; cr_no_alignment &= ~cache_policies[i].cr_mask; - *p += len; break; } } @@ -130,36 +130,37 @@ static void __init early_cachepolicy(char **p) } flush_cache_all(); set_cr(cr_alignment); + return 0; } -__early_param("cachepolicy=", early_cachepolicy); +early_param("cachepolicy", early_cachepolicy); -static void __init early_nocache(char **__unused) +static int __init early_nocache(char *__unused) { char *p = "buffered"; printk(KERN_WARNING "nocache is deprecated; use cachepolicy=%s\n", p); - early_cachepolicy(&p); + early_cachepolicy(p); + return 0; } -__early_param("nocache", early_nocache); +early_param("nocache", early_nocache); -static void __init early_nowrite(char **__unused) +static int __init early_nowrite(char *__unused) { char *p = "uncached"; printk(KERN_WARNING "nowb is deprecated; use cachepolicy=%s\n", p); - early_cachepolicy(&p); + early_cachepolicy(p); + return 0; } -__early_param("nowb", early_nowrite); +early_param("nowb", early_nowrite); -static void __init early_ecc(char **p) +static int __init early_ecc(char *p) { - if (memcmp(*p, "on", 2) == 0) { + if (memcmp(p, "on", 2) == 0) ecc_mask = PMD_PROTECTION; - *p += 2; - } else if (memcmp(*p, "off", 3) == 0) { + else if (memcmp(p, "off", 3) == 0) ecc_mask = 0; - *p += 3; - } + return 0; } -__early_param("ecc=", early_ecc); +early_param("ecc", early_ecc); static int __init noalign_setup(char *__unused) { @@ -420,6 +421,10 @@ static void __init build_mem_type_table(void) user_pgprot |= L_PTE_SHARED; kern_pgprot |= L_PTE_SHARED; vecs_pgprot |= L_PTE_SHARED; + mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_S; + mem_types[MT_DEVICE_WC].prot_pte |= L_PTE_SHARED; + mem_types[MT_DEVICE_CACHED].prot_sect |= PMD_SECT_S; + mem_types[MT_DEVICE_CACHED].prot_pte |= L_PTE_SHARED; mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S; mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_S; #endif @@ -599,7 +604,7 @@ static void __init create_36bit_mapping(struct map_desc *md, * offsets, and we take full advantage of sections and * supersections. */ -void __init create_mapping(struct map_desc *md) +static void __init create_mapping(struct map_desc *md) { unsigned long phys, addr, length, end; const struct mem_type *type; @@ -670,9 +675,9 @@ static unsigned long __initdata vmalloc_reserve = SZ_128M; * bytes. This can be used to increase (or decrease) the vmalloc * area - the default is 128m. */ -static void __init early_vmalloc(char **arg) +static int __init early_vmalloc(char *arg) { - vmalloc_reserve = memparse(*arg, arg); + vmalloc_reserve = memparse(arg, NULL); if (vmalloc_reserve < SZ_16M) { vmalloc_reserve = SZ_16M; @@ -687,8 +692,9 @@ static void __init early_vmalloc(char **arg) "vmalloc area is too big, limiting to %luMB\n", vmalloc_reserve >> 20); } + return 0; } -__early_param("vmalloc=", early_vmalloc); +early_param("vmalloc", early_vmalloc); #define VMALLOC_MIN (void *)(VMALLOC_END - vmalloc_reserve) @@ -864,9 +870,10 @@ void __init reserve_node_zero(pg_data_t *pgdat) if (machine_is_p720t()) res_size = 0x00014000; - /* H1940 and RX3715 need to reserve this for suspend */ + /* H1940, RX3715 and RX1950 need to reserve this for suspend */ - if (machine_is_h1940() || machine_is_rx3715()) { + if (machine_is_h1940() || machine_is_rx3715() + || machine_is_rx1950()) { reserve_bootmem_node(pgdat, 0x30003000, 0x1000, BOOTMEM_DEFAULT); reserve_bootmem_node(pgdat, 0x30081000, 0x1000, @@ -1012,6 +1019,39 @@ static void __init kmap_init(void) #endif } +static inline void map_memory_bank(struct membank *bank) +{ + struct map_desc map; + + map.pfn = bank_pfn_start(bank); + map.virtual = __phys_to_virt(bank_phys_start(bank)); + map.length = bank_phys_size(bank); + map.type = MT_MEMORY; + + create_mapping(&map); +} + +static void __init map_lowmem(void) +{ + struct meminfo *mi = &meminfo; + int i; + + /* Map all the lowmem memory banks. */ + for (i = 0; i < mi->nr_banks; i++) { + struct membank *bank = &mi->bank[i]; + + if (!bank->highmem) + map_memory_bank(bank); + } +} + +static int __init meminfo_cmp(const void *_a, const void *_b) +{ + const struct membank *a = _a, *b = _b; + long cmp = bank_pfn_start(a) - bank_pfn_start(b); + return cmp < 0 ? -1 : cmp > 0 ? 1 : 0; +} + /* * paging_init() sets up the page tables, initialises the zone memory * maps, and sets up the zero page, bad page and bad page tables. @@ -1020,9 +1060,12 @@ void __init paging_init(struct machine_desc *mdesc) { void *zero_page; + sort(&meminfo.bank, meminfo.nr_banks, sizeof(meminfo.bank[0]), meminfo_cmp, NULL); + build_mem_type_table(); sanity_check_meminfo(); prepare_page_table(); + map_lowmem(); bootmem_init(); devicemaps_init(mdesc); kmap_init(); @@ -1049,10 +1092,12 @@ void setup_mm_for_reboot(char mode) pgd_t *pgd; int i; - if (current->mm && current->mm->pgd) - pgd = current->mm->pgd; - else - pgd = init_mm.pgd; + /* + * We need to access to user-mode page tables here. For kernel threads + * we don't have any user-mode mappings so we use the context that we + * "borrowed". + */ + pgd = current->active_mm->pgd; base_pmdval = PMD_SECT_AP_WRITE | PMD_SECT_AP_READ | PMD_TYPE_SECT; if (cpu_architecture() <= CPU_ARCH_ARMv5TEJ && !cpu_is_xscale()) diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c index 374a8311bc84..33b327379f07 100644 --- a/arch/arm/mm/nommu.c +++ b/arch/arm/mm/nommu.c @@ -65,6 +65,15 @@ void flush_dcache_page(struct page *page) } EXPORT_SYMBOL(flush_dcache_page); +void copy_to_user_page(struct vm_area_struct *vma, struct page *page, + unsigned long uaddr, void *dst, const void *src, + unsigned long len) +{ + memcpy(dst, src, len); + if (vma->vm_flags & VM_EXEC) + __cpuc_coherent_user_range(uaddr, uaddr + len); +} + void __iomem *__arm_ioremap_pfn(unsigned long pfn, unsigned long offset, size_t size, unsigned int mtype) { @@ -74,6 +83,12 @@ void __iomem *__arm_ioremap_pfn(unsigned long pfn, unsigned long offset, } EXPORT_SYMBOL(__arm_ioremap_pfn); +void __iomem *__arm_ioremap_pfn_caller(unsigned long pfn, unsigned long offset, + size_t size, unsigned int mtype, void *caller) +{ + return __arm_ioremap_pfn(pfn, offset, size, mtype); +} + void __iomem *__arm_ioremap(unsigned long phys_addr, size_t size, unsigned int mtype) { @@ -81,6 +96,12 @@ void __iomem *__arm_ioremap(unsigned long phys_addr, size_t size, } EXPORT_SYMBOL(__arm_ioremap); +void __iomem *__arm_ioremap_caller(unsigned long phys_addr, size_t size, + unsigned int mtype, void *caller) +{ + return __arm_ioremap(phys_addr, size, mtype); +} + void __iounmap(volatile void __iomem *addr) { } diff --git a/arch/arm/mm/pgd.c b/arch/arm/mm/pgd.c index 2690146161ba..be5f58e153bf 100644 --- a/arch/arm/mm/pgd.c +++ b/arch/arm/mm/pgd.c @@ -8,6 +8,7 @@ * published by the Free Software Foundation. */ #include <linux/mm.h> +#include <linux/gfp.h> #include <linux/highmem.h> #include <asm/pgalloc.h> diff --git a/arch/arm/mm/proc-arm1020.S b/arch/arm/mm/proc-arm1020.S index 8012e24282b2..72507c630ceb 100644 --- a/arch/arm/mm/proc-arm1020.S +++ b/arch/arm/mm/proc-arm1020.S @@ -265,7 +265,7 @@ ENTRY(arm1020_flush_kern_dcache_area) * * (same as v4wb) */ -ENTRY(arm1020_dma_inv_range) +arm1020_dma_inv_range: mov ip, #0 #ifndef CONFIG_CPU_DCACHE_DISABLE tst r0, #CACHE_DLINESIZE - 1 @@ -295,7 +295,7 @@ ENTRY(arm1020_dma_inv_range) * * (same as v4wb) */ -ENTRY(arm1020_dma_clean_range) +arm1020_dma_clean_range: mov ip, #0 #ifndef CONFIG_CPU_DCACHE_DISABLE bic r0, r0, #CACHE_DLINESIZE - 1 @@ -330,6 +330,30 @@ ENTRY(arm1020_dma_flush_range) mcr p15, 0, ip, c7, c10, 4 @ drain WB mov pc, lr +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(arm1020_dma_map_area) + add r1, r1, r0 + cmp r2, #DMA_TO_DEVICE + beq arm1020_dma_clean_range + bcs arm1020_dma_inv_range + b arm1020_dma_flush_range +ENDPROC(arm1020_dma_map_area) + +/* + * dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(arm1020_dma_unmap_area) + mov pc, lr +ENDPROC(arm1020_dma_unmap_area) + ENTRY(arm1020_cache_fns) .long arm1020_flush_kern_cache_all .long arm1020_flush_user_cache_all @@ -337,8 +361,8 @@ ENTRY(arm1020_cache_fns) .long arm1020_coherent_kern_range .long arm1020_coherent_user_range .long arm1020_flush_kern_dcache_area - .long arm1020_dma_inv_range - .long arm1020_dma_clean_range + .long arm1020_dma_map_area + .long arm1020_dma_unmap_area .long arm1020_dma_flush_range .align 5 diff --git a/arch/arm/mm/proc-arm1020e.S b/arch/arm/mm/proc-arm1020e.S index 41fe25d234f5..d27829805609 100644 --- a/arch/arm/mm/proc-arm1020e.S +++ b/arch/arm/mm/proc-arm1020e.S @@ -258,7 +258,7 @@ ENTRY(arm1020e_flush_kern_dcache_area) * * (same as v4wb) */ -ENTRY(arm1020e_dma_inv_range) +arm1020e_dma_inv_range: mov ip, #0 #ifndef CONFIG_CPU_DCACHE_DISABLE tst r0, #CACHE_DLINESIZE - 1 @@ -284,7 +284,7 @@ ENTRY(arm1020e_dma_inv_range) * * (same as v4wb) */ -ENTRY(arm1020e_dma_clean_range) +arm1020e_dma_clean_range: mov ip, #0 #ifndef CONFIG_CPU_DCACHE_DISABLE bic r0, r0, #CACHE_DLINESIZE - 1 @@ -316,6 +316,30 @@ ENTRY(arm1020e_dma_flush_range) mcr p15, 0, ip, c7, c10, 4 @ drain WB mov pc, lr +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(arm1020e_dma_map_area) + add r1, r1, r0 + cmp r2, #DMA_TO_DEVICE + beq arm1020e_dma_clean_range + bcs arm1020e_dma_inv_range + b arm1020e_dma_flush_range +ENDPROC(arm1020e_dma_map_area) + +/* + * dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(arm1020e_dma_unmap_area) + mov pc, lr +ENDPROC(arm1020e_dma_unmap_area) + ENTRY(arm1020e_cache_fns) .long arm1020e_flush_kern_cache_all .long arm1020e_flush_user_cache_all @@ -323,8 +347,8 @@ ENTRY(arm1020e_cache_fns) .long arm1020e_coherent_kern_range .long arm1020e_coherent_user_range .long arm1020e_flush_kern_dcache_area - .long arm1020e_dma_inv_range - .long arm1020e_dma_clean_range + .long arm1020e_dma_map_area + .long arm1020e_dma_unmap_area .long arm1020e_dma_flush_range .align 5 diff --git a/arch/arm/mm/proc-arm1022.S b/arch/arm/mm/proc-arm1022.S index 20a5b1b31a70..ce13e4a827de 100644 --- a/arch/arm/mm/proc-arm1022.S +++ b/arch/arm/mm/proc-arm1022.S @@ -247,7 +247,7 @@ ENTRY(arm1022_flush_kern_dcache_area) * * (same as v4wb) */ -ENTRY(arm1022_dma_inv_range) +arm1022_dma_inv_range: mov ip, #0 #ifndef CONFIG_CPU_DCACHE_DISABLE tst r0, #CACHE_DLINESIZE - 1 @@ -273,7 +273,7 @@ ENTRY(arm1022_dma_inv_range) * * (same as v4wb) */ -ENTRY(arm1022_dma_clean_range) +arm1022_dma_clean_range: mov ip, #0 #ifndef CONFIG_CPU_DCACHE_DISABLE bic r0, r0, #CACHE_DLINESIZE - 1 @@ -305,6 +305,30 @@ ENTRY(arm1022_dma_flush_range) mcr p15, 0, ip, c7, c10, 4 @ drain WB mov pc, lr +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(arm1022_dma_map_area) + add r1, r1, r0 + cmp r2, #DMA_TO_DEVICE + beq arm1022_dma_clean_range + bcs arm1022_dma_inv_range + b arm1022_dma_flush_range +ENDPROC(arm1022_dma_map_area) + +/* + * dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(arm1022_dma_unmap_area) + mov pc, lr +ENDPROC(arm1022_dma_unmap_area) + ENTRY(arm1022_cache_fns) .long arm1022_flush_kern_cache_all .long arm1022_flush_user_cache_all @@ -312,8 +336,8 @@ ENTRY(arm1022_cache_fns) .long arm1022_coherent_kern_range .long arm1022_coherent_user_range .long arm1022_flush_kern_dcache_area - .long arm1022_dma_inv_range - .long arm1022_dma_clean_range + .long arm1022_dma_map_area + .long arm1022_dma_unmap_area .long arm1022_dma_flush_range .align 5 diff --git a/arch/arm/mm/proc-arm1026.S b/arch/arm/mm/proc-arm1026.S index 96aedb10fcc4..636672a29c6d 100644 --- a/arch/arm/mm/proc-arm1026.S +++ b/arch/arm/mm/proc-arm1026.S @@ -241,7 +241,7 @@ ENTRY(arm1026_flush_kern_dcache_area) * * (same as v4wb) */ -ENTRY(arm1026_dma_inv_range) +arm1026_dma_inv_range: mov ip, #0 #ifndef CONFIG_CPU_DCACHE_DISABLE tst r0, #CACHE_DLINESIZE - 1 @@ -267,7 +267,7 @@ ENTRY(arm1026_dma_inv_range) * * (same as v4wb) */ -ENTRY(arm1026_dma_clean_range) +arm1026_dma_clean_range: mov ip, #0 #ifndef CONFIG_CPU_DCACHE_DISABLE bic r0, r0, #CACHE_DLINESIZE - 1 @@ -299,6 +299,30 @@ ENTRY(arm1026_dma_flush_range) mcr p15, 0, ip, c7, c10, 4 @ drain WB mov pc, lr +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(arm1026_dma_map_area) + add r1, r1, r0 + cmp r2, #DMA_TO_DEVICE + beq arm1026_dma_clean_range + bcs arm1026_dma_inv_range + b arm1026_dma_flush_range +ENDPROC(arm1026_dma_map_area) + +/* + * dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(arm1026_dma_unmap_area) + mov pc, lr +ENDPROC(arm1026_dma_unmap_area) + ENTRY(arm1026_cache_fns) .long arm1026_flush_kern_cache_all .long arm1026_flush_user_cache_all @@ -306,8 +330,8 @@ ENTRY(arm1026_cache_fns) .long arm1026_coherent_kern_range .long arm1026_coherent_user_range .long arm1026_flush_kern_dcache_area - .long arm1026_dma_inv_range - .long arm1026_dma_clean_range + .long arm1026_dma_map_area + .long arm1026_dma_unmap_area .long arm1026_dma_flush_range .align 5 diff --git a/arch/arm/mm/proc-arm6_7.S b/arch/arm/mm/proc-arm6_7.S index 3f9cd3d8f6d5..795dc615f43b 100644 --- a/arch/arm/mm/proc-arm6_7.S +++ b/arch/arm/mm/proc-arm6_7.S @@ -41,7 +41,7 @@ ENTRY(cpu_arm7_dcache_clean_area) ENTRY(cpu_arm7_data_abort) mrc p15, 0, r1, c5, c0, 0 @ get FSR mrc p15, 0, r0, c6, c0, 0 @ get FAR - ldr r8, [r0] @ read arm instruction + ldr r8, [r2] @ read arm instruction tst r8, #1 << 20 @ L = 0 -> write? orreq r1, r1, #1 << 11 @ yes. and r7, r8, #15 << 24 diff --git a/arch/arm/mm/proc-arm920.S b/arch/arm/mm/proc-arm920.S index 471669e2d7cb..8be81992645d 100644 --- a/arch/arm/mm/proc-arm920.S +++ b/arch/arm/mm/proc-arm920.S @@ -239,7 +239,7 @@ ENTRY(arm920_flush_kern_dcache_area) * * (same as v4wb) */ -ENTRY(arm920_dma_inv_range) +arm920_dma_inv_range: tst r0, #CACHE_DLINESIZE - 1 bic r0, r0, #CACHE_DLINESIZE - 1 mcrne p15, 0, r0, c7, c10, 1 @ clean D entry @@ -262,7 +262,7 @@ ENTRY(arm920_dma_inv_range) * * (same as v4wb) */ -ENTRY(arm920_dma_clean_range) +arm920_dma_clean_range: bic r0, r0, #CACHE_DLINESIZE - 1 1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry add r0, r0, #CACHE_DLINESIZE @@ -288,6 +288,30 @@ ENTRY(arm920_dma_flush_range) mcr p15, 0, r0, c7, c10, 4 @ drain WB mov pc, lr +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(arm920_dma_map_area) + add r1, r1, r0 + cmp r2, #DMA_TO_DEVICE + beq arm920_dma_clean_range + bcs arm920_dma_inv_range + b arm920_dma_flush_range +ENDPROC(arm920_dma_map_area) + +/* + * dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(arm920_dma_unmap_area) + mov pc, lr +ENDPROC(arm920_dma_unmap_area) + ENTRY(arm920_cache_fns) .long arm920_flush_kern_cache_all .long arm920_flush_user_cache_all @@ -295,8 +319,8 @@ ENTRY(arm920_cache_fns) .long arm920_coherent_kern_range .long arm920_coherent_user_range .long arm920_flush_kern_dcache_area - .long arm920_dma_inv_range - .long arm920_dma_clean_range + .long arm920_dma_map_area + .long arm920_dma_unmap_area .long arm920_dma_flush_range #endif diff --git a/arch/arm/mm/proc-arm922.S b/arch/arm/mm/proc-arm922.S index ee111b00fa41..c0ff8e4b1074 100644 --- a/arch/arm/mm/proc-arm922.S +++ b/arch/arm/mm/proc-arm922.S @@ -241,7 +241,7 @@ ENTRY(arm922_flush_kern_dcache_area) * * (same as v4wb) */ -ENTRY(arm922_dma_inv_range) +arm922_dma_inv_range: tst r0, #CACHE_DLINESIZE - 1 bic r0, r0, #CACHE_DLINESIZE - 1 mcrne p15, 0, r0, c7, c10, 1 @ clean D entry @@ -264,7 +264,7 @@ ENTRY(arm922_dma_inv_range) * * (same as v4wb) */ -ENTRY(arm922_dma_clean_range) +arm922_dma_clean_range: bic r0, r0, #CACHE_DLINESIZE - 1 1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry add r0, r0, #CACHE_DLINESIZE @@ -290,6 +290,30 @@ ENTRY(arm922_dma_flush_range) mcr p15, 0, r0, c7, c10, 4 @ drain WB mov pc, lr +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(arm922_dma_map_area) + add r1, r1, r0 + cmp r2, #DMA_TO_DEVICE + beq arm922_dma_clean_range + bcs arm922_dma_inv_range + b arm922_dma_flush_range +ENDPROC(arm922_dma_map_area) + +/* + * dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(arm922_dma_unmap_area) + mov pc, lr +ENDPROC(arm922_dma_unmap_area) + ENTRY(arm922_cache_fns) .long arm922_flush_kern_cache_all .long arm922_flush_user_cache_all @@ -297,8 +321,8 @@ ENTRY(arm922_cache_fns) .long arm922_coherent_kern_range .long arm922_coherent_user_range .long arm922_flush_kern_dcache_area - .long arm922_dma_inv_range - .long arm922_dma_clean_range + .long arm922_dma_map_area + .long arm922_dma_unmap_area .long arm922_dma_flush_range #endif diff --git a/arch/arm/mm/proc-arm925.S b/arch/arm/mm/proc-arm925.S index 8deb5bde58e4..3c6cffe400f6 100644 --- a/arch/arm/mm/proc-arm925.S +++ b/arch/arm/mm/proc-arm925.S @@ -283,7 +283,7 @@ ENTRY(arm925_flush_kern_dcache_area) * * (same as v4wb) */ -ENTRY(arm925_dma_inv_range) +arm925_dma_inv_range: #ifndef CONFIG_CPU_DCACHE_WRITETHROUGH tst r0, #CACHE_DLINESIZE - 1 mcrne p15, 0, r0, c7, c10, 1 @ clean D entry @@ -308,7 +308,7 @@ ENTRY(arm925_dma_inv_range) * * (same as v4wb) */ -ENTRY(arm925_dma_clean_range) +arm925_dma_clean_range: #ifndef CONFIG_CPU_DCACHE_WRITETHROUGH bic r0, r0, #CACHE_DLINESIZE - 1 1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry @@ -341,6 +341,30 @@ ENTRY(arm925_dma_flush_range) mcr p15, 0, r0, c7, c10, 4 @ drain WB mov pc, lr +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(arm925_dma_map_area) + add r1, r1, r0 + cmp r2, #DMA_TO_DEVICE + beq arm925_dma_clean_range + bcs arm925_dma_inv_range + b arm925_dma_flush_range +ENDPROC(arm925_dma_map_area) + +/* + * dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(arm925_dma_unmap_area) + mov pc, lr +ENDPROC(arm925_dma_unmap_area) + ENTRY(arm925_cache_fns) .long arm925_flush_kern_cache_all .long arm925_flush_user_cache_all @@ -348,8 +372,8 @@ ENTRY(arm925_cache_fns) .long arm925_coherent_kern_range .long arm925_coherent_user_range .long arm925_flush_kern_dcache_area - .long arm925_dma_inv_range - .long arm925_dma_clean_range + .long arm925_dma_map_area + .long arm925_dma_unmap_area .long arm925_dma_flush_range ENTRY(cpu_arm925_dcache_clean_area) diff --git a/arch/arm/mm/proc-arm926.S b/arch/arm/mm/proc-arm926.S index 64db6e275a44..75b707c9cce1 100644 --- a/arch/arm/mm/proc-arm926.S +++ b/arch/arm/mm/proc-arm926.S @@ -246,7 +246,7 @@ ENTRY(arm926_flush_kern_dcache_area) * * (same as v4wb) */ -ENTRY(arm926_dma_inv_range) +arm926_dma_inv_range: #ifndef CONFIG_CPU_DCACHE_WRITETHROUGH tst r0, #CACHE_DLINESIZE - 1 mcrne p15, 0, r0, c7, c10, 1 @ clean D entry @@ -271,7 +271,7 @@ ENTRY(arm926_dma_inv_range) * * (same as v4wb) */ -ENTRY(arm926_dma_clean_range) +arm926_dma_clean_range: #ifndef CONFIG_CPU_DCACHE_WRITETHROUGH bic r0, r0, #CACHE_DLINESIZE - 1 1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry @@ -304,6 +304,30 @@ ENTRY(arm926_dma_flush_range) mcr p15, 0, r0, c7, c10, 4 @ drain WB mov pc, lr +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(arm926_dma_map_area) + add r1, r1, r0 + cmp r2, #DMA_TO_DEVICE + beq arm926_dma_clean_range + bcs arm926_dma_inv_range + b arm926_dma_flush_range +ENDPROC(arm926_dma_map_area) + +/* + * dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(arm926_dma_unmap_area) + mov pc, lr +ENDPROC(arm926_dma_unmap_area) + ENTRY(arm926_cache_fns) .long arm926_flush_kern_cache_all .long arm926_flush_user_cache_all @@ -311,8 +335,8 @@ ENTRY(arm926_cache_fns) .long arm926_coherent_kern_range .long arm926_coherent_user_range .long arm926_flush_kern_dcache_area - .long arm926_dma_inv_range - .long arm926_dma_clean_range + .long arm926_dma_map_area + .long arm926_dma_unmap_area .long arm926_dma_flush_range ENTRY(cpu_arm926_dcache_clean_area) diff --git a/arch/arm/mm/proc-arm940.S b/arch/arm/mm/proc-arm940.S index 8196b9f401fb..1af1657819eb 100644 --- a/arch/arm/mm/proc-arm940.S +++ b/arch/arm/mm/proc-arm940.S @@ -171,7 +171,7 @@ ENTRY(arm940_flush_kern_dcache_area) * - start - virtual start address * - end - virtual end address */ -ENTRY(arm940_dma_inv_range) +arm940_dma_inv_range: mov ip, #0 mov r1, #(CACHE_DSEGMENTS - 1) << 4 @ 4 segments 1: orr r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries @@ -192,7 +192,7 @@ ENTRY(arm940_dma_inv_range) * - start - virtual start address * - end - virtual end address */ -ENTRY(arm940_dma_clean_range) +arm940_dma_clean_range: ENTRY(cpu_arm940_dcache_clean_area) mov ip, #0 #ifndef CONFIG_CPU_DCACHE_WRITETHROUGH @@ -233,6 +233,30 @@ ENTRY(arm940_dma_flush_range) mcr p15, 0, ip, c7, c10, 4 @ drain WB mov pc, lr +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(arm940_dma_map_area) + add r1, r1, r0 + cmp r2, #DMA_TO_DEVICE + beq arm940_dma_clean_range + bcs arm940_dma_inv_range + b arm940_dma_flush_range +ENDPROC(arm940_dma_map_area) + +/* + * dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(arm940_dma_unmap_area) + mov pc, lr +ENDPROC(arm940_dma_unmap_area) + ENTRY(arm940_cache_fns) .long arm940_flush_kern_cache_all .long arm940_flush_user_cache_all @@ -240,8 +264,8 @@ ENTRY(arm940_cache_fns) .long arm940_coherent_kern_range .long arm940_coherent_user_range .long arm940_flush_kern_dcache_area - .long arm940_dma_inv_range - .long arm940_dma_clean_range + .long arm940_dma_map_area + .long arm940_dma_unmap_area .long arm940_dma_flush_range __INIT diff --git a/arch/arm/mm/proc-arm946.S b/arch/arm/mm/proc-arm946.S index 9a951239c86c..1664b6aaff79 100644 --- a/arch/arm/mm/proc-arm946.S +++ b/arch/arm/mm/proc-arm946.S @@ -215,7 +215,7 @@ ENTRY(arm946_flush_kern_dcache_area) * - end - virtual end address * (same as arm926) */ -ENTRY(arm946_dma_inv_range) +arm946_dma_inv_range: #ifndef CONFIG_CPU_DCACHE_WRITETHROUGH tst r0, #CACHE_DLINESIZE - 1 mcrne p15, 0, r0, c7, c10, 1 @ clean D entry @@ -240,7 +240,7 @@ ENTRY(arm946_dma_inv_range) * * (same as arm926) */ -ENTRY(arm946_dma_clean_range) +arm946_dma_clean_range: #ifndef CONFIG_CPU_DCACHE_WRITETHROUGH bic r0, r0, #CACHE_DLINESIZE - 1 1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry @@ -275,6 +275,30 @@ ENTRY(arm946_dma_flush_range) mcr p15, 0, r0, c7, c10, 4 @ drain WB mov pc, lr +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(arm946_dma_map_area) + add r1, r1, r0 + cmp r2, #DMA_TO_DEVICE + beq arm946_dma_clean_range + bcs arm946_dma_inv_range + b arm946_dma_flush_range +ENDPROC(arm946_dma_map_area) + +/* + * dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(arm946_dma_unmap_area) + mov pc, lr +ENDPROC(arm946_dma_unmap_area) + ENTRY(arm946_cache_fns) .long arm946_flush_kern_cache_all .long arm946_flush_user_cache_all @@ -282,8 +306,8 @@ ENTRY(arm946_cache_fns) .long arm946_coherent_kern_range .long arm946_coherent_user_range .long arm946_flush_kern_dcache_area - .long arm946_dma_inv_range - .long arm946_dma_clean_range + .long arm946_dma_map_area + .long arm946_dma_unmap_area .long arm946_dma_flush_range diff --git a/arch/arm/mm/proc-feroceon.S b/arch/arm/mm/proc-feroceon.S index dbc39383e66a..53e632343849 100644 --- a/arch/arm/mm/proc-feroceon.S +++ b/arch/arm/mm/proc-feroceon.S @@ -274,7 +274,7 @@ ENTRY(feroceon_range_flush_kern_dcache_area) * (same as v4wb) */ .align 5 -ENTRY(feroceon_dma_inv_range) +feroceon_dma_inv_range: tst r0, #CACHE_DLINESIZE - 1 bic r0, r0, #CACHE_DLINESIZE - 1 mcrne p15, 0, r0, c7, c10, 1 @ clean D entry @@ -288,7 +288,7 @@ ENTRY(feroceon_dma_inv_range) mov pc, lr .align 5 -ENTRY(feroceon_range_dma_inv_range) +feroceon_range_dma_inv_range: mrs r2, cpsr tst r0, #CACHE_DLINESIZE - 1 mcrne p15, 0, r0, c7, c10, 1 @ clean D entry @@ -314,7 +314,7 @@ ENTRY(feroceon_range_dma_inv_range) * (same as v4wb) */ .align 5 -ENTRY(feroceon_dma_clean_range) +feroceon_dma_clean_range: bic r0, r0, #CACHE_DLINESIZE - 1 1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry add r0, r0, #CACHE_DLINESIZE @@ -324,7 +324,7 @@ ENTRY(feroceon_dma_clean_range) mov pc, lr .align 5 -ENTRY(feroceon_range_dma_clean_range) +feroceon_range_dma_clean_range: mrs r2, cpsr cmp r1, r0 subne r1, r1, #1 @ top address is inclusive @@ -367,6 +367,44 @@ ENTRY(feroceon_range_dma_flush_range) mcr p15, 0, r0, c7, c10, 4 @ drain WB mov pc, lr +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(feroceon_dma_map_area) + add r1, r1, r0 + cmp r2, #DMA_TO_DEVICE + beq feroceon_dma_clean_range + bcs feroceon_dma_inv_range + b feroceon_dma_flush_range +ENDPROC(feroceon_dma_map_area) + +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(feroceon_range_dma_map_area) + add r1, r1, r0 + cmp r2, #DMA_TO_DEVICE + beq feroceon_range_dma_clean_range + bcs feroceon_range_dma_inv_range + b feroceon_range_dma_flush_range +ENDPROC(feroceon_range_dma_map_area) + +/* + * dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(feroceon_dma_unmap_area) + mov pc, lr +ENDPROC(feroceon_dma_unmap_area) + ENTRY(feroceon_cache_fns) .long feroceon_flush_kern_cache_all .long feroceon_flush_user_cache_all @@ -374,8 +412,8 @@ ENTRY(feroceon_cache_fns) .long feroceon_coherent_kern_range .long feroceon_coherent_user_range .long feroceon_flush_kern_dcache_area - .long feroceon_dma_inv_range - .long feroceon_dma_clean_range + .long feroceon_dma_map_area + .long feroceon_dma_unmap_area .long feroceon_dma_flush_range ENTRY(feroceon_range_cache_fns) @@ -385,8 +423,8 @@ ENTRY(feroceon_range_cache_fns) .long feroceon_coherent_kern_range .long feroceon_coherent_user_range .long feroceon_range_flush_kern_dcache_area - .long feroceon_range_dma_inv_range - .long feroceon_range_dma_clean_range + .long feroceon_range_dma_map_area + .long feroceon_dma_unmap_area .long feroceon_range_dma_flush_range .align 5 diff --git a/arch/arm/mm/proc-mohawk.S b/arch/arm/mm/proc-mohawk.S index 9674d36cc97d..caa31154e7db 100644 --- a/arch/arm/mm/proc-mohawk.S +++ b/arch/arm/mm/proc-mohawk.S @@ -218,7 +218,7 @@ ENTRY(mohawk_flush_kern_dcache_area) * * (same as v4wb) */ -ENTRY(mohawk_dma_inv_range) +mohawk_dma_inv_range: tst r0, #CACHE_DLINESIZE - 1 mcrne p15, 0, r0, c7, c10, 1 @ clean D entry tst r1, #CACHE_DLINESIZE - 1 @@ -241,7 +241,7 @@ ENTRY(mohawk_dma_inv_range) * * (same as v4wb) */ -ENTRY(mohawk_dma_clean_range) +mohawk_dma_clean_range: bic r0, r0, #CACHE_DLINESIZE - 1 1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry add r0, r0, #CACHE_DLINESIZE @@ -268,6 +268,30 @@ ENTRY(mohawk_dma_flush_range) mcr p15, 0, r0, c7, c10, 4 @ drain WB mov pc, lr +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(mohawk_dma_map_area) + add r1, r1, r0 + cmp r2, #DMA_TO_DEVICE + beq mohawk_dma_clean_range + bcs mohawk_dma_inv_range + b mohawk_dma_flush_range +ENDPROC(mohawk_dma_map_area) + +/* + * dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(mohawk_dma_unmap_area) + mov pc, lr +ENDPROC(mohawk_dma_unmap_area) + ENTRY(mohawk_cache_fns) .long mohawk_flush_kern_cache_all .long mohawk_flush_user_cache_all @@ -275,8 +299,8 @@ ENTRY(mohawk_cache_fns) .long mohawk_coherent_kern_range .long mohawk_coherent_user_range .long mohawk_flush_kern_dcache_area - .long mohawk_dma_inv_range - .long mohawk_dma_clean_range + .long mohawk_dma_map_area + .long mohawk_dma_unmap_area .long mohawk_dma_flush_range ENTRY(cpu_mohawk_dcache_clean_area) diff --git a/arch/arm/mm/proc-sa1100.S b/arch/arm/mm/proc-sa1100.S index ee7700242c19..5c47760c2064 100644 --- a/arch/arm/mm/proc-sa1100.S +++ b/arch/arm/mm/proc-sa1100.S @@ -45,7 +45,7 @@ ENTRY(cpu_sa1100_proc_init) mcr p15, 0, r0, c9, c0, 5 @ Allow read-buffer operations from userland mov pc, lr - .previous + .section .text /* * cpu_sa1100_proc_fin() diff --git a/arch/arm/mm/proc-xsc3.S b/arch/arm/mm/proc-xsc3.S index 8e4f6dca8997..e5797f1c1db7 100644 --- a/arch/arm/mm/proc-xsc3.S +++ b/arch/arm/mm/proc-xsc3.S @@ -257,7 +257,7 @@ ENTRY(xsc3_flush_kern_dcache_area) * - start - virtual start address * - end - virtual end address */ -ENTRY(xsc3_dma_inv_range) +xsc3_dma_inv_range: tst r0, #CACHELINESIZE - 1 bic r0, r0, #CACHELINESIZE - 1 mcrne p15, 0, r0, c7, c10, 1 @ clean L1 D line @@ -278,7 +278,7 @@ ENTRY(xsc3_dma_inv_range) * - start - virtual start address * - end - virtual end address */ -ENTRY(xsc3_dma_clean_range) +xsc3_dma_clean_range: bic r0, r0, #CACHELINESIZE - 1 1: mcr p15, 0, r0, c7, c10, 1 @ clean L1 D line add r0, r0, #CACHELINESIZE @@ -304,6 +304,30 @@ ENTRY(xsc3_dma_flush_range) mcr p15, 0, r0, c7, c10, 4 @ data write barrier mov pc, lr +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(xsc3_dma_map_area) + add r1, r1, r0 + cmp r2, #DMA_TO_DEVICE + beq xsc3_dma_clean_range + bcs xsc3_dma_inv_range + b xsc3_dma_flush_range +ENDPROC(xsc3_dma_map_area) + +/* + * dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(xsc3_dma_unmap_area) + mov pc, lr +ENDPROC(xsc3_dma_unmap_area) + ENTRY(xsc3_cache_fns) .long xsc3_flush_kern_cache_all .long xsc3_flush_user_cache_all @@ -311,8 +335,8 @@ ENTRY(xsc3_cache_fns) .long xsc3_coherent_kern_range .long xsc3_coherent_user_range .long xsc3_flush_kern_dcache_area - .long xsc3_dma_inv_range - .long xsc3_dma_clean_range + .long xsc3_dma_map_area + .long xsc3_dma_unmap_area .long xsc3_dma_flush_range ENTRY(cpu_xsc3_dcache_clean_area) diff --git a/arch/arm/mm/proc-xscale.S b/arch/arm/mm/proc-xscale.S index 93df47265f2d..63037e2162f2 100644 --- a/arch/arm/mm/proc-xscale.S +++ b/arch/arm/mm/proc-xscale.S @@ -315,7 +315,7 @@ ENTRY(xscale_flush_kern_dcache_area) * - start - virtual start address * - end - virtual end address */ -ENTRY(xscale_dma_inv_range) +xscale_dma_inv_range: tst r0, #CACHELINESIZE - 1 bic r0, r0, #CACHELINESIZE - 1 mcrne p15, 0, r0, c7, c10, 1 @ clean D entry @@ -336,7 +336,7 @@ ENTRY(xscale_dma_inv_range) * - start - virtual start address * - end - virtual end address */ -ENTRY(xscale_dma_clean_range) +xscale_dma_clean_range: bic r0, r0, #CACHELINESIZE - 1 1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry add r0, r0, #CACHELINESIZE @@ -363,6 +363,43 @@ ENTRY(xscale_dma_flush_range) mcr p15, 0, r0, c7, c10, 4 @ Drain Write (& Fill) Buffer mov pc, lr +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(xscale_dma_map_area) + add r1, r1, r0 + cmp r2, #DMA_TO_DEVICE + beq xscale_dma_clean_range + bcs xscale_dma_inv_range + b xscale_dma_flush_range +ENDPROC(xscale_dma_map_area) + +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(xscale_dma_a0_map_area) + add r1, r1, r0 + teq r2, #DMA_TO_DEVICE + beq xscale_dma_clean_range + b xscale_dma_flush_range +ENDPROC(xscsale_dma_a0_map_area) + +/* + * dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(xscale_dma_unmap_area) + mov pc, lr +ENDPROC(xscale_dma_unmap_area) + ENTRY(xscale_cache_fns) .long xscale_flush_kern_cache_all .long xscale_flush_user_cache_all @@ -370,8 +407,8 @@ ENTRY(xscale_cache_fns) .long xscale_coherent_kern_range .long xscale_coherent_user_range .long xscale_flush_kern_dcache_area - .long xscale_dma_inv_range - .long xscale_dma_clean_range + .long xscale_dma_map_area + .long xscale_dma_unmap_area .long xscale_dma_flush_range /* @@ -394,8 +431,8 @@ ENTRY(xscale_80200_A0_A1_cache_fns) .long xscale_coherent_kern_range .long xscale_coherent_user_range .long xscale_flush_kern_dcache_area - .long xscale_dma_flush_range - .long xscale_dma_clean_range + .long xscale_dma_a0_map_area + .long xscale_dma_unmap_area .long xscale_dma_flush_range ENTRY(cpu_xscale_dcache_clean_area) diff --git a/arch/arm/mm/tlb-v7.S b/arch/arm/mm/tlb-v7.S index 0cb1848bd876..f3f288a9546d 100644 --- a/arch/arm/mm/tlb-v7.S +++ b/arch/arm/mm/tlb-v7.S @@ -50,7 +50,11 @@ ENTRY(v7wbi_flush_user_tlb_range) cmp r0, r1 blo 1b mov ip, #0 +#ifdef CONFIG_SMP + mcr p15, 0, ip, c7, c1, 6 @ flush BTAC/BTB Inner Shareable +#else mcr p15, 0, ip, c7, c5, 6 @ flush BTAC/BTB +#endif dsb mov pc, lr ENDPROC(v7wbi_flush_user_tlb_range) @@ -79,7 +83,11 @@ ENTRY(v7wbi_flush_kern_tlb_range) cmp r0, r1 blo 1b mov r2, #0 +#ifdef CONFIG_SMP + mcr p15, 0, r2, c7, c1, 6 @ flush BTAC/BTB Inner Shareable +#else mcr p15, 0, r2, c7, c5, 6 @ flush BTAC/BTB +#endif dsb isb mov pc, lr |