From 49cbe78637eb0503f45fc9b556ec08918a616534 Mon Sep 17 00:00:00 2001 From: Eric Miao Date: Tue, 20 Jan 2009 14:15:18 +0800 Subject: [ARM] pxa: add base support for Marvell's PXA168 processor line MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit """The MarvellĀ® PXA168 processor is the first in a family of application processors targeted at mass market opportunities in computing and consumer devices. It balances high computing and multimedia performance with low power consumption to support extended battery life, and includes a wealth of integrated peripherals to reduce overall BOM cost .... """ See http://www.marvell.com/featured/pxa168.jsp for more information. 1. Marvell Mohawk core is a hybrid of xscale3 and its own ARM core, there are many enhancements like instructions for flushing the whole D-cache, and so on 2. Clock reuses Russell's common clkdev, and added the basic support for UART1/2. 3. Devices are a bit different from the 'mach-pxa' way, the platform devices are now dynamically allocated only when necessary (i.e. when pxa_register_device() is called). Description for each device are stored in an array of 'struct pxa_device_desc'. Now that: a. this array of device description is marked with __initdata and can be freed up system is fully up b. which means board code has to add all needed devices early in his initializing function c. platform specific data can now be marked as __initdata since they are allocated and copied by platform_device_add_data() 4. only the basic UART1/2/3 are added, more devices will come later. Signed-off-by: Jason Chagas Signed-off-by: Eric Miao --- arch/arm/include/asm/cacheflush.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/arm/include/asm/cacheflush.h') diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h index 6cbd8fdc9f1f..bfb0cb9aaa97 100644 --- a/arch/arm/include/asm/cacheflush.h +++ b/arch/arm/include/asm/cacheflush.h @@ -94,6 +94,14 @@ # endif #endif +#if defined(CONFIG_CPU_MOHAWK) +# ifdef _CACHE +# define MULTI_CACHE 1 +# else +# define _CACHE mohawk +# endif +#endif + #if defined(CONFIG_CPU_FEROCEON) # define MULTI_CACHE 1 #endif -- cgit v1.2.3 From 28853ac8fe5221de74a14f1182d7b2b383dfd85c Mon Sep 17 00:00:00 2001 From: Paulius Zaleckas Date: Wed, 25 Mar 2009 13:10:01 +0200 Subject: ARM: Add support for FA526 v2 Adds support for Faraday FA526 core. This core is used at least by: Cortina Systems Gemini and Centroid family Cavium Networks ECONA family Grain Media GM8120 Pixelplus ImageARM Prolific PL-1029 Faraday IP evaluation boards v2: - move TLB_BTB to separate patch - update copyrights Signed-off-by: Paulius Zaleckas --- arch/arm/Makefile | 1 + arch/arm/boot/compressed/head.S | 26 ++++ arch/arm/include/asm/cacheflush.h | 8 ++ arch/arm/include/asm/page.h | 8 ++ arch/arm/include/asm/proc-fns.h | 8 ++ arch/arm/include/asm/system.h | 6 + arch/arm/include/asm/tlbflush.h | 19 +++ arch/arm/mm/Kconfig | 35 +++++- arch/arm/mm/Makefile | 4 + arch/arm/mm/cache-fa.S | 220 +++++++++++++++++++++++++++++++++ arch/arm/mm/copypage-fa.c | 86 +++++++++++++ arch/arm/mm/proc-fa526.S | 248 ++++++++++++++++++++++++++++++++++++++ arch/arm/mm/tlb-fa.S | 75 ++++++++++++ 13 files changed, 742 insertions(+), 2 deletions(-) create mode 100644 arch/arm/mm/cache-fa.S create mode 100644 arch/arm/mm/copypage-fa.c create mode 100644 arch/arm/mm/proc-fa526.S create mode 100644 arch/arm/mm/tlb-fa.S (limited to 'arch/arm/include/asm/cacheflush.h') diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 24e0f0187697..d29f9260fb1c 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -72,6 +72,7 @@ tune-$(CONFIG_CPU_ARM920T) :=-mtune=arm9tdmi tune-$(CONFIG_CPU_ARM922T) :=-mtune=arm9tdmi tune-$(CONFIG_CPU_ARM925T) :=-mtune=arm9tdmi tune-$(CONFIG_CPU_ARM926T) :=-mtune=arm9tdmi +tune-$(CONFIG_CPU_FA526) :=-mtune=arm9tdmi tune-$(CONFIG_CPU_SA110) :=-mtune=strongarm110 tune-$(CONFIG_CPU_SA1100) :=-mtune=strongarm1100 tune-$(CONFIG_CPU_XSCALE) :=$(call cc-option,-mtune=xscale,-mtune=strongarm110) -Wa,-mcpu=xscale diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index 77d614232d81..def02483286a 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -459,6 +459,20 @@ __armv7_mmu_cache_on: mcr p15, 0, r0, c7, c5, 4 @ ISB mov pc, r12 +__fa526_cache_on: + mov r12, lr + bl __setup_mmu + mov r0, #0 + mcr p15, 0, r0, c7, c7, 0 @ Invalidate whole cache + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer + mcr p15, 0, r0, c8, c7, 0 @ flush UTLB + mrc p15, 0, r0, c1, c0, 0 @ read control reg + orr r0, r0, #0x1000 @ I-cache enable + bl __common_mmu_cache_on + mov r0, #0 + mcr p15, 0, r0, c8, c7, 0 @ flush UTLB + mov pc, r12 + __arm6_mmu_cache_on: mov r12, lr bl __setup_mmu @@ -636,6 +650,12 @@ proc_types: b __armv4_mmu_cache_off b __armv5tej_mmu_cache_flush + .word 0x66015261 @ FA526 + .word 0xff01fff1 + b __fa526_cache_on + b __armv4_mmu_cache_off + b __fa526_cache_flush + @ These match on the architecture ID .word 0x00020000 @ ARMv4T @@ -775,6 +795,12 @@ __armv4_mpu_cache_flush: mcr p15, 0, ip, c7, c10, 4 @ drain WB mov pc, lr +__fa526_cache_flush: + mov r1, #0 + mcr p15, 0, r1, c7, c14, 0 @ clean and invalidate D cache + mcr p15, 0, r1, c7, c5, 0 @ flush I cache + mcr p15, 0, r1, c7, c10, 4 @ drain WB + mov pc, lr __armv6_mmu_cache_flush: mov r1, #0 diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h index 6cbd8fdc9f1f..a6b8b90ed57f 100644 --- a/arch/arm/include/asm/cacheflush.h +++ b/arch/arm/include/asm/cacheflush.h @@ -46,6 +46,14 @@ # define MULTI_CACHE 1 #endif +#if defined(CONFIG_CPU_FA526) +# ifdef _CACHE +# define MULTI_CACHE 1 +# else +# define _CACHE fa +# endif +#endif + #if defined(CONFIG_CPU_ARM926T) # ifdef _CACHE # define MULTI_CACHE 1 diff --git a/arch/arm/include/asm/page.h b/arch/arm/include/asm/page.h index f341c9dbd662..e6eb8a67b807 100644 --- a/arch/arm/include/asm/page.h +++ b/arch/arm/include/asm/page.h @@ -76,6 +76,14 @@ # endif #endif +#ifdef CONFIG_CPU_COPY_FA +# ifdef _USER +# define MULTI_USER 1 +# else +# define _USER fa +# endif +#endif + #ifdef CONFIG_CPU_SA1100 # ifdef _USER # define MULTI_USER 1 diff --git a/arch/arm/include/asm/proc-fns.h b/arch/arm/include/asm/proc-fns.h index db80203b68e0..00949281d3ee 100644 --- a/arch/arm/include/asm/proc-fns.h +++ b/arch/arm/include/asm/proc-fns.h @@ -89,6 +89,14 @@ # define CPU_NAME cpu_arm922 # endif # endif +# ifdef CONFIG_CPU_FA526 +# ifdef CPU_NAME +# undef MULTI_CPU +# define MULTI_CPU +# else +# define CPU_NAME cpu_fa526 +# endif +# endif # ifdef CONFIG_CPU_ARM925T # ifdef CPU_NAME # undef MULTI_CPU diff --git a/arch/arm/include/asm/system.h b/arch/arm/include/asm/system.h index 811be55f338e..d6a4dad99c9b 100644 --- a/arch/arm/include/asm/system.h +++ b/arch/arm/include/asm/system.h @@ -125,6 +125,12 @@ extern unsigned int user_debug; : : "r" (0) : "memory") #define dmb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 5" \ : : "r" (0) : "memory") +#elif defined(CONFIG_CPU_FA526) +#define isb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c5, 4" \ + : : "r" (0) : "memory") +#define dsb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 4" \ + : : "r" (0) : "memory") +#define dmb() __asm__ __volatile__ ("" : : : "memory") #else #define isb() __asm__ __volatile__ ("" : : : "memory") #define dsb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 4" \ diff --git a/arch/arm/include/asm/tlbflush.h b/arch/arm/include/asm/tlbflush.h index ffedd2494eab..a62218013c78 100644 --- a/arch/arm/include/asm/tlbflush.h +++ b/arch/arm/include/asm/tlbflush.h @@ -54,6 +54,7 @@ * v4wb - ARMv4 with write buffer without I TLB flush entry instruction * v4wbi - ARMv4 with write buffer with I TLB flush entry instruction * fr - Feroceon (v4wbi with non-outer-cacheable page table walks) + * fa - Faraday (v4 with write buffer with UTLB and branch target buffer (BTB)) * v6wbi - ARMv6 with write buffer with I TLB flush entry instruction * v7wbi - identical to v6wbi */ @@ -90,6 +91,22 @@ # define v4_always_flags (-1UL) #endif +#define fa_tlb_flags (TLB_WB | TLB_BTB | TLB_DCLEAN | \ + TLB_V4_U_FULL | TLB_V4_U_PAGE) + +#ifdef CONFIG_CPU_TLB_FA +# define fa_possible_flags fa_tlb_flags +# define fa_always_flags fa_tlb_flags +# ifdef _TLB +# define MULTI_TLB 1 +# else +# define _TLB fa +# endif +#else +# define fa_possible_flags 0 +# define fa_always_flags (-1UL) +#endif + #define v4wbi_tlb_flags (TLB_WB | TLB_DCLEAN | \ TLB_V4_I_FULL | TLB_V4_D_FULL | \ TLB_V4_I_PAGE | TLB_V4_D_PAGE) @@ -268,6 +285,7 @@ extern struct cpu_tlb_fns cpu_tlb; v4wbi_possible_flags | \ fr_possible_flags | \ v4wb_possible_flags | \ + fa_possible_flags | \ v6wbi_possible_flags | \ v7wbi_possible_flags) @@ -276,6 +294,7 @@ extern struct cpu_tlb_fns cpu_tlb; v4wbi_always_flags & \ fr_always_flags & \ v4wb_always_flags & \ + fa_always_flags & \ v6wbi_always_flags & \ v7wbi_always_flags) diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index d490f3773c01..bc3331863d9d 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -186,6 +186,24 @@ config CPU_ARM926T Say Y if you want support for the ARM926T processor. Otherwise, say N. +# FA526 +config CPU_FA526 + bool + select CPU_32v4 + select CPU_ABRT_EV4 + select CPU_PABRT_NOIFAR + select CPU_CACHE_VIVT + select CPU_CP15_MMU + select CPU_CACHE_FA + select CPU_COPY_FA if MMU + select CPU_TLB_FA if MMU + help + The FA526 is a version of the ARMv4 compatible processor with + Branch Target Buffer, Unified TLB and cache line size 16. + + Say Y if you want support for the FA526 processor. + Otherwise, say N. + # ARM940T config CPU_ARM940T bool "Support ARM940T processor" if ARCH_INTEGRATOR @@ -484,6 +502,9 @@ config CPU_CACHE_VIVT config CPU_CACHE_VIPT bool +config CPU_CACHE_FA + bool + if MMU # The copy-page model config CPU_COPY_V3 @@ -498,6 +519,9 @@ config CPU_COPY_V4WB config CPU_COPY_FEROCEON bool +config CPU_COPY_FA + bool + config CPU_COPY_V6 bool @@ -528,6 +552,13 @@ config CPU_TLB_FEROCEON help Feroceon TLB (v4wbi with non-outer-cachable page table walks). +config CPU_TLB_FA + bool + help + Faraday ARM FA526 architecture, unified TLB with writeback cache + and invalidate instruction cache entry. Branch target buffer is + also supported. + config CPU_TLB_V6 bool @@ -638,7 +669,7 @@ config CPU_DCACHE_SIZE config CPU_DCACHE_WRITETHROUGH bool "Force write through D-cache" - depends on (CPU_ARM740T || CPU_ARM920T || CPU_ARM922T || CPU_ARM925T || CPU_ARM926T || CPU_ARM940T || CPU_ARM946E || CPU_ARM1020) && !CPU_DCACHE_DISABLE + depends on (CPU_ARM740T || CPU_ARM920T || CPU_ARM922T || CPU_ARM925T || CPU_ARM926T || CPU_ARM940T || CPU_ARM946E || CPU_ARM1020 || CPU_FA526) && !CPU_DCACHE_DISABLE default y if CPU_ARM925T help Say Y here to use the data cache in writethrough mode. Unless you @@ -653,7 +684,7 @@ config CPU_CACHE_ROUND_ROBIN config CPU_BPREDICT_DISABLE bool "Disable branch prediction" - depends on CPU_ARM1020 || CPU_V6 || CPU_XSC3 || CPU_V7 + depends on CPU_ARM1020 || CPU_V6 || CPU_XSC3 || CPU_V7 || CPU_FA526 help Say Y here to disable branch prediction. If unsure, say N. diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile index 480f78a3611a..40f941c2245c 100644 --- a/arch/arm/mm/Makefile +++ b/arch/arm/mm/Makefile @@ -32,6 +32,7 @@ obj-$(CONFIG_CPU_CACHE_V4WT) += cache-v4wt.o obj-$(CONFIG_CPU_CACHE_V4WB) += cache-v4wb.o obj-$(CONFIG_CPU_CACHE_V6) += cache-v6.o obj-$(CONFIG_CPU_CACHE_V7) += cache-v7.o +obj-$(CONFIG_CPU_CACHE_FA) += cache-fa.o obj-$(CONFIG_CPU_COPY_V3) += copypage-v3.o obj-$(CONFIG_CPU_COPY_V4WT) += copypage-v4wt.o @@ -41,6 +42,7 @@ obj-$(CONFIG_CPU_COPY_V6) += copypage-v6.o context.o obj-$(CONFIG_CPU_SA1100) += copypage-v4mc.o obj-$(CONFIG_CPU_XSCALE) += copypage-xscale.o obj-$(CONFIG_CPU_XSC3) += copypage-xsc3.o +obj-$(CONFIG_CPU_COPY_FA) += copypage-fa.o obj-$(CONFIG_CPU_TLB_V3) += tlb-v3.o obj-$(CONFIG_CPU_TLB_V4WT) += tlb-v4.o @@ -49,6 +51,7 @@ obj-$(CONFIG_CPU_TLB_V4WBI) += tlb-v4wbi.o obj-$(CONFIG_CPU_TLB_FEROCEON) += tlb-v4wbi.o # reuse v4wbi TLB functions obj-$(CONFIG_CPU_TLB_V6) += tlb-v6.o obj-$(CONFIG_CPU_TLB_V7) += tlb-v7.o +obj-$(CONFIG_CPU_TLB_FA) += tlb-fa.o obj-$(CONFIG_CPU_ARM610) += proc-arm6_7.o obj-$(CONFIG_CPU_ARM710) += proc-arm6_7.o @@ -62,6 +65,7 @@ obj-$(CONFIG_CPU_ARM925T) += proc-arm925.o obj-$(CONFIG_CPU_ARM926T) += proc-arm926.o obj-$(CONFIG_CPU_ARM940T) += proc-arm940.o obj-$(CONFIG_CPU_ARM946E) += proc-arm946.o +obj-$(CONFIG_CPU_FA526) += proc-fa526.o obj-$(CONFIG_CPU_ARM1020) += proc-arm1020.o obj-$(CONFIG_CPU_ARM1020E) += proc-arm1020e.o obj-$(CONFIG_CPU_ARM1022) += proc-arm1022.o diff --git a/arch/arm/mm/cache-fa.S b/arch/arm/mm/cache-fa.S new file mode 100644 index 000000000000..b63a8f7b95cf --- /dev/null +++ b/arch/arm/mm/cache-fa.S @@ -0,0 +1,220 @@ +/* + * linux/arch/arm/mm/cache-fa.S + * + * Copyright (C) 2005 Faraday Corp. + * Copyright (C) 2008-2009 Paulius Zaleckas + * + * Based on cache-v4wb.S: + * Copyright (C) 1997-2002 Russell king + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Processors: FA520 FA526 FA626 + */ +#include +#include +#include +#include + +#include "proc-macros.S" + +/* + * The size of one data cache line. + */ +#define CACHE_DLINESIZE 16 + +/* + * The total size of the data cache. + */ +#ifdef CONFIG_ARCH_GEMINI +#define CACHE_DSIZE 8192 +#else +#define CACHE_DSIZE 16384 +#endif + +/* FIXME: put optimal value here. Current one is just estimation */ +#define CACHE_DLIMIT (CACHE_DSIZE * 2) + +/* + * flush_user_cache_all() + * + * Clean and invalidate all cache entries in a particular address + * space. + */ +ENTRY(fa_flush_user_cache_all) + /* FALLTHROUGH */ +/* + * flush_kern_cache_all() + * + * Clean and invalidate the entire cache. + */ +ENTRY(fa_flush_kern_cache_all) + mov ip, #0 + mov r2, #VM_EXEC +__flush_whole_cache: + mcr p15, 0, ip, c7, c14, 0 @ clean/invalidate D cache + tst r2, #VM_EXEC + mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache + mcrne p15, 0, ip, c7, c5, 6 @ invalidate BTB + mcrne p15, 0, ip, c7, c10, 4 @ drain write buffer + mcrne p15, 0, ip, c7, c5, 4 @ prefetch flush + mov pc, lr + +/* + * flush_user_cache_range(start, end, flags) + * + * Invalidate a range of cache entries in the specified + * address space. + * + * - start - start address (inclusive, page aligned) + * - end - end address (exclusive, page aligned) + * - flags - vma_area_struct flags describing address space + */ +ENTRY(fa_flush_user_cache_range) + mov ip, #0 + sub r3, r1, r0 @ calculate total size + cmp r3, #CACHE_DLIMIT @ total size >= limit? + bhs __flush_whole_cache @ flush whole D cache + +1: tst r2, #VM_EXEC + mcrne p15, 0, r0, c7, c5, 1 @ invalidate I line + mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + tst r2, #VM_EXEC + mcrne p15, 0, ip, c7, c5, 6 @ invalidate BTB + mcrne p15, 0, ip, c7, c10, 4 @ data write barrier + mcrne p15, 0, ip, c7, c5, 4 @ prefetch flush + mov pc, lr + +/* + * coherent_kern_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(fa_coherent_kern_range) + /* fall through */ + +/* + * coherent_user_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(fa_coherent_user_range) + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D entry + mcr p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mov r0, #0 + mcr p15, 0, r0, c7, c5, 6 @ invalidate BTB + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer + mcr p15, 0, r0, c7, c5, 4 @ prefetch flush + mov pc, lr + +/* + * flush_kern_dcache_page(kaddr) + * + * Ensure that the data held in the page kaddr is written back + * to the page in question. + * + * - kaddr - kernel address (guaranteed to be page aligned) + */ +ENTRY(fa_flush_kern_dcache_page) + add r1, r0, #PAGE_SZ +1: mcr p15, 0, r0, c7, c14, 1 @ clean & invalidate D line + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer + mov pc, lr + +/* + * dma_inv_range(start, end) + * + * Invalidate (discard) the specified virtual address range. + * May not write back any entries. If 'start' or 'end' + * are not cache line aligned, those lines must be written + * back. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(fa_dma_inv_range) + tst r0, #CACHE_DLINESIZE - 1 + bic r0, r0, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r0, c7, c14, 1 @ clean & invalidate D entry + tst r1, #CACHE_DLINESIZE - 1 + bic r1, r1, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r1, c7, c14, 1 @ clean & invalidate D entry +1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mov r0, #0 + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer + mov pc, lr + +/* + * dma_clean_range(start, end) + * + * Clean (write back) the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(fa_dma_clean_range) + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mov r0, #0 + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer + mov pc, lr + +/* + * dma_flush_range(start,end) + * - start - virtual start address of region + * - end - virtual end address of region + */ +ENTRY(fa_dma_flush_range) + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c14, 1 @ clean & invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mov r0, #0 + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer + mov pc, lr + + __INITDATA + + .type fa_cache_fns, #object +ENTRY(fa_cache_fns) + .long fa_flush_kern_cache_all + .long fa_flush_user_cache_all + .long fa_flush_user_cache_range + .long fa_coherent_kern_range + .long fa_coherent_user_range + .long fa_flush_kern_dcache_page + .long fa_dma_inv_range + .long fa_dma_clean_range + .long fa_dma_flush_range + .size fa_cache_fns, . - fa_cache_fns diff --git a/arch/arm/mm/copypage-fa.c b/arch/arm/mm/copypage-fa.c new file mode 100644 index 000000000000..b2a6008b0111 --- /dev/null +++ b/arch/arm/mm/copypage-fa.c @@ -0,0 +1,86 @@ +/* + * linux/arch/arm/lib/copypage-fa.S + * + * Copyright (C) 2005 Faraday Corp. + * Copyright (C) 2008-2009 Paulius Zaleckas + * + * Based on copypage-v4wb.S: + * Copyright (C) 1995-1999 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include + +/* + * Faraday optimised copy_user_page + */ +static void __naked +fa_copy_user_page(void *kto, const void *kfrom) +{ + asm("\ + stmfd sp!, {r4, lr} @ 2\n\ + mov r2, %0 @ 1\n\ +1: ldmia r1!, {r3, r4, ip, lr} @ 4\n\ + stmia r0, {r3, r4, ip, lr} @ 4\n\ + mcr p15, 0, r0, c7, c14, 1 @ 1 clean and invalidate D line\n\ + add r0, r0, #16 @ 1\n\ + ldmia r1!, {r3, r4, ip, lr} @ 4\n\ + stmia r0, {r3, r4, ip, lr} @ 4\n\ + mcr p15, 0, r0, c7, c14, 1 @ 1 clean and invalidate D line\n\ + add r0, r0, #16 @ 1\n\ + subs r2, r2, #1 @ 1\n\ + bne 1b @ 1\n\ + mcr p15, 0, r2, c7, c10, 4 @ 1 drain WB\n\ + ldmfd sp!, {r4, pc} @ 3" + : + : "I" (PAGE_SIZE / 32)); +} + +void fa_copy_user_highpage(struct page *to, struct page *from, + unsigned long vaddr) +{ + void *kto, *kfrom; + + kto = kmap_atomic(to, KM_USER0); + kfrom = kmap_atomic(from, KM_USER1); + fa_copy_user_page(kto, kfrom); + kunmap_atomic(kfrom, KM_USER1); + kunmap_atomic(kto, KM_USER0); +} + +/* + * Faraday optimised clear_user_page + * + * Same story as above. + */ +void fa_clear_user_highpage(struct page *page, unsigned long vaddr) +{ + void *ptr, *kaddr = kmap_atomic(page, KM_USER0); + asm volatile("\ + mov r1, %2 @ 1\n\ + mov r2, #0 @ 1\n\ + mov r3, #0 @ 1\n\ + mov ip, #0 @ 1\n\ + mov lr, #0 @ 1\n\ +1: stmia %0, {r2, r3, ip, lr} @ 4\n\ + mcr p15, 0, %0, c7, c14, 1 @ 1 clean and invalidate D line\n\ + add %0, %0, #16 @ 1\n\ + stmia %0, {r2, r3, ip, lr} @ 4\n\ + mcr p15, 0, %0, c7, c14, 1 @ 1 clean and invalidate D line\n\ + add %0, %0, #16 @ 1\n\ + subs r1, r1, #1 @ 1\n\ + bne 1b @ 1\n\ + mcr p15, 0, r1, c7, c10, 4 @ 1 drain WB" + : "=r" (ptr) + : "0" (kaddr), "I" (PAGE_SIZE / 32) + : "r1", "r2", "r3", "ip", "lr"); + kunmap_atomic(kaddr, KM_USER0); +} + +struct cpu_user_fns fa_user_fns __initdata = { + .cpu_clear_user_highpage = fa_clear_user_highpage, + .cpu_copy_user_highpage = fa_copy_user_highpage, +}; diff --git a/arch/arm/mm/proc-fa526.S b/arch/arm/mm/proc-fa526.S new file mode 100644 index 000000000000..08b8a955d5d7 --- /dev/null +++ b/arch/arm/mm/proc-fa526.S @@ -0,0 +1,248 @@ +/* + * linux/arch/arm/mm/proc-fa526.S: MMU functions for FA526 + * + * Written by : Luke Lee + * Copyright (C) 2005 Faraday Corp. + * Copyright (C) 2008-2009 Paulius Zaleckas + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * + * These are the low level assembler for performing cache and TLB + * functions on the fa526. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "proc-macros.S" + +#define CACHE_DLINESIZE 16 + + .text +/* + * cpu_fa526_proc_init() + */ +ENTRY(cpu_fa526_proc_init) + mov pc, lr + +/* + * cpu_fa526_proc_fin() + */ +ENTRY(cpu_fa526_proc_fin) + stmfd sp!, {lr} + mov ip, #PSR_F_BIT | PSR_I_BIT | SVC_MODE + msr cpsr_c, ip + bl fa_flush_kern_cache_all + mrc p15, 0, r0, c1, c0, 0 @ ctrl register + bic r0, r0, #0x1000 @ ...i............ + bic r0, r0, #0x000e @ ............wca. + mcr p15, 0, r0, c1, c0, 0 @ disable caches + nop + nop + ldmfd sp!, {pc} + +/* + * cpu_fa526_reset(loc) + * + * Perform a soft reset of the system. Put the CPU into the + * same state as it would be if it had been reset, and branch + * to what would be the reset vector. + * + * loc: location to jump to for soft reset + */ + .align 4 +ENTRY(cpu_fa526_reset) +/* TODO: Use CP8 if possible... */ + mov ip, #0 + mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches + mcr p15, 0, ip, c7, c10, 4 @ drain WB +#ifdef CONFIG_MMU + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs +#endif + mrc p15, 0, ip, c1, c0, 0 @ ctrl register + bic ip, ip, #0x000f @ ............wcam + bic ip, ip, #0x1100 @ ...i...s........ + bic ip, ip, #0x0800 @ BTB off + mcr p15, 0, ip, c1, c0, 0 @ ctrl register + nop + nop + mov pc, r0 + +/* + * cpu_fa526_do_idle() + */ + .align 4 +ENTRY(cpu_fa526_do_idle) + mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt + mov pc, lr + + +ENTRY(cpu_fa526_dcache_clean_area) +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHE_DLINESIZE + subs r1, r1, #CACHE_DLINESIZE + bhi 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +/* =============================== PageTable ============================== */ + +/* + * cpu_fa526_switch_mm(pgd) + * + * Set the translation base pointer to be as described by pgd. + * + * pgd: new page tables + */ + .align 4 +ENTRY(cpu_fa526_switch_mm) +#ifdef CONFIG_MMU + mov ip, #0 +#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH + mcr p15, 0, ip, c7, c6, 0 @ invalidate D cache +#else + mcr p15, 0, ip, c7, c14, 0 @ clean and invalidate whole D cache +#endif + mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache + mcr p15, 0, ip, c7, c5, 6 @ invalidate BTB since mm changed + mcr p15, 0, ip, c7, c10, 4 @ data write barrier + mcr p15, 0, ip, c7, c5, 4 @ prefetch flush + mcr p15, 0, r0, c2, c0, 0 @ load page table pointer + mcr p15, 0, ip, c8, c7, 0 @ invalidate UTLB +#endif + mov pc, lr + +/* + * cpu_fa526_set_pte_ext(ptep, pte, ext) + * + * Set a PTE and flush it out + */ + .align 4 +ENTRY(cpu_fa526_set_pte_ext) +#ifdef CONFIG_MMU + armv3_set_pte_ext + mov r0, r0 + mcr p15, 0, r0, c7, c10, 1 @ clean D entry + mov r0, #0 + mcr p15, 0, r0, c7, c10, 4 @ drain WB +#endif + mov pc, lr + + __INIT + + .type __fa526_setup, #function +__fa526_setup: + /* On return of this routine, r0 must carry correct flags for CFG register */ + mov r0, #0 + mcr p15, 0, r0, c7, c7 @ invalidate I,D caches on v4 + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer on v4 +#ifdef CONFIG_MMU + mcr p15, 0, r0, c8, c7 @ invalidate I,D TLBs on v4 +#endif + mcr p15, 0, r0, c7, c5, 5 @ invalidate IScratchpad RAM + + mov r0, #1 + mcr p15, 0, r0, c1, c1, 0 @ turn-on ECR + + mov r0, #0 + mcr p15, 0, r0, c7, c5, 6 @ invalidate BTB All + mcr p15, 0, r0, c7, c10, 4 @ data write barrier + mcr p15, 0, r0, c7, c5, 4 @ prefetch flush + + mov r0, #0x1f @ Domains 0, 1 = manager, 2 = client + mcr p15, 0, r0, c3, c0 @ load domain access register + + mrc p15, 0, r0, c1, c0 @ get control register v4 + ldr r5, fa526_cr1_clear + bic r0, r0, r5 + ldr r5, fa526_cr1_set + orr r0, r0, r5 + mov pc, lr + .size __fa526_setup, . - __fa526_setup + + /* + * .RVI ZFRS BLDP WCAM + * ..11 1001 .111 1101 + * + */ + .type fa526_cr1_clear, #object + .type fa526_cr1_set, #object +fa526_cr1_clear: + .word 0x3f3f +fa526_cr1_set: + .word 0x397D + + __INITDATA + +/* + * Purpose : Function pointers used to access above functions - all calls + * come through these + */ + .type fa526_processor_functions, #object +fa526_processor_functions: + .word v4_early_abort + .word pabort_noifar + .word cpu_fa526_proc_init + .word cpu_fa526_proc_fin + .word cpu_fa526_reset + .word cpu_fa526_do_idle + .word cpu_fa526_dcache_clean_area + .word cpu_fa526_switch_mm + .word cpu_fa526_set_pte_ext + .size fa526_processor_functions, . - fa526_processor_functions + + .section ".rodata" + + .type cpu_arch_name, #object +cpu_arch_name: + .asciz "armv4" + .size cpu_arch_name, . - cpu_arch_name + + .type cpu_elf_name, #object +cpu_elf_name: + .asciz "v4" + .size cpu_elf_name, . - cpu_elf_name + + .type cpu_fa526_name, #object +cpu_fa526_name: + .asciz "FA526" + .size cpu_fa526_name, . - cpu_fa526_name + + .align + + .section ".proc.info.init", #alloc, #execinstr + + .type __fa526_proc_info,#object +__fa526_proc_info: + .long 0x66015261 + .long 0xff01fff1 + .long PMD_TYPE_SECT | \ + PMD_SECT_BUFFERABLE | \ + PMD_SECT_CACHEABLE | \ + PMD_BIT4 | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + .long PMD_TYPE_SECT | \ + PMD_BIT4 | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + b __fa526_setup + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP | HWCAP_HALF + .long cpu_fa526_name + .long fa526_processor_functions + .long fa_tlb_fns + .long fa_user_fns + .long fa_cache_fns + .size __fa526_proc_info, . - __fa526_proc_info diff --git a/arch/arm/mm/tlb-fa.S b/arch/arm/mm/tlb-fa.S new file mode 100644 index 000000000000..9694f1f6f485 --- /dev/null +++ b/arch/arm/mm/tlb-fa.S @@ -0,0 +1,75 @@ +/* + * linux/arch/arm/mm/tlb-fa.S + * + * Copyright (C) 2005 Faraday Corp. + * Copyright (C) 2008-2009 Paulius Zaleckas + * + * Based on tlb-v4wbi.S: + * Copyright (C) 1997-2002 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * ARM architecture version 4, Faraday variation. + * This assume an unified TLBs, with a write buffer, and branch target buffer (BTB) + * + * Processors: FA520 FA526 FA626 + */ +#include +#include +#include +#include +#include "proc-macros.S" + + +/* + * flush_user_tlb_range(start, end, mm) + * + * Invalidate a range of TLB entries in the specified address space. + * + * - start - range start address + * - end - range end address + * - mm - mm_struct describing address space + */ + .align 4 +ENTRY(fa_flush_user_tlb_range) + vma_vm_mm ip, r2 + act_mm r3 @ get current->active_mm + eors r3, ip, r3 @ == mm ? + movne pc, lr @ no, we dont do anything + mov r3, #0 + mcr p15, 0, r3, c7, c10, 4 @ drain WB + bic r0, r0, #0x0ff + bic r0, r0, #0xf00 +1: mcr p15, 0, r0, c8, c7, 1 @ invalidate UTLB entry + add r0, r0, #PAGE_SZ + cmp r0, r1 + blo 1b + mcr p15, 0, r3, c7, c5, 6 @ invalidate BTB + mcr p15, 0, r3, c7, c10, 4 @ data write barrier + mov pc, lr + + +ENTRY(fa_flush_kern_tlb_range) + mov r3, #0 + mcr p15, 0, r3, c7, c10, 4 @ drain WB + bic r0, r0, #0x0ff + bic r0, r0, #0xf00 +1: mcr p15, 0, r0, c8, c7, 1 @ invalidate UTLB entry + add r0, r0, #PAGE_SZ + cmp r0, r1 + blo 1b + mcr p15, 0, r3, c7, c5, 6 @ invalidate BTB + mcr p15, 0, r3, c7, c10, 4 @ data write barrier + mcr p15, 0, r3, c7, c5, 4 @ prefetch flush + mov pc, lr + + __INITDATA + + .type fa_tlb_fns, #object +ENTRY(fa_tlb_fns) + .long fa_flush_user_tlb_range + .long fa_flush_kern_tlb_range + .long fa_tlb_flags + .size fa_tlb_fns, . - fa_tlb_fns -- cgit v1.2.3