diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-03-04 00:06:09 +0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-03-04 00:06:09 +0400 |
commit | 8fd5e7a2d9574b3cac1c9264ad1aed3b613ed6fe (patch) | |
tree | 5696f5d31c6c75b71bfc4852fb234b773e266cfe | |
parent | 529e5fbcd8d3cb48cf824ac8fde91cc80a9e985f (diff) | |
parent | c60ac31542e93499b58dcfc1e3f6550ba5b5728e (diff) | |
download | linux-8fd5e7a2d9574b3cac1c9264ad1aed3b613ed6fe.tar.xz |
Merge tag 'metag-v3.9-rc1-v4' of git://git.kernel.org/pub/scm/linux/kernel/git/jhogan/metag
Pull new ImgTec Meta architecture from James Hogan:
"This adds core architecture support for Imagination's Meta processor
cores, followed by some later miscellaneous arch/metag cleanups and
fixes which I kept separate to ease review:
- Support for basic Meta 1 (ATP) and Meta 2 (HTP) core architecture
- A few fixes all over, particularly for symbol prefixes
- A few privilege protection fixes
- Several cleanups (setup.c includes, split out a lot of
metag_ksyms.c)
- Fix some missing exports
- Convert hugetlb to use vm_unmapped_area()
- Copy device tree to non-init memory
- Provide dma_get_sgtable()"
* tag 'metag-v3.9-rc1-v4' of git://git.kernel.org/pub/scm/linux/kernel/git/jhogan/metag: (61 commits)
metag: Provide dma_get_sgtable()
metag: prom.h: remove declaration of metag_dt_memblock_reserve()
metag: copy devicetree to non-init memory
metag: cleanup metag_ksyms.c includes
metag: move mm/init.c exports out of metag_ksyms.c
metag: move usercopy.c exports out of metag_ksyms.c
metag: move setup.c exports out of metag_ksyms.c
metag: move kick.c exports out of metag_ksyms.c
metag: move traps.c exports out of metag_ksyms.c
metag: move irq enable out of irqflags.h on SMP
genksyms: fix metag symbol prefix on crc symbols
metag: hugetlb: convert to vm_unmapped_area()
metag: export clear_page and copy_page
metag: export metag_code_cache_flush_all
metag: protect more non-MMU memory regions
metag: make TXPRIVEXT bits explicit
metag: kernel/setup.c: sort includes
perf: Enable building perf tools for Meta
metag: add boot time LNKGET/LNKSET check
metag: add __init to metag_cache_probe()
...
200 files changed, 26934 insertions, 8 deletions
diff --git a/Documentation/00-INDEX b/Documentation/00-INDEX index 0f3e8bbab8d7..45b3df936d2f 100644 --- a/Documentation/00-INDEX +++ b/Documentation/00-INDEX @@ -299,6 +299,8 @@ memory-hotplug.txt - Hotpluggable memory support, how to use and current status. memory.txt - info on typical Linux memory problems. +metag/ + - directory with info about Linux on Meta architecture. mips/ - directory with info about Linux on MIPS architecture. misc-devices/ diff --git a/Documentation/devicetree/bindings/metag/meta-intc.txt b/Documentation/devicetree/bindings/metag/meta-intc.txt new file mode 100644 index 000000000000..8c47dcbfabc6 --- /dev/null +++ b/Documentation/devicetree/bindings/metag/meta-intc.txt @@ -0,0 +1,82 @@ +* Meta External Trigger Controller Binding + +This binding specifies what properties must be available in the device tree +representation of a Meta external trigger controller. + +Required properties: + + - compatible: Specifies the compatibility list for the interrupt controller. + The type shall be <string> and the value shall include "img,meta-intc". + + - num-banks: Specifies the number of interrupt banks (each of which can + handle 32 interrupt sources). + + - interrupt-controller: The presence of this property identifies the node + as an interupt controller. No property value shall be defined. + + - #interrupt-cells: Specifies the number of cells needed to encode an + interrupt source. The type shall be a <u32> and the value shall be 2. + + - #address-cells: Specifies the number of cells needed to encode an + address. The type shall be <u32> and the value shall be 0. As such, + 'interrupt-map' nodes do not have to specify a parent unit address. + +Optional properties: + + - no-mask: The controller doesn't have any mask registers. + +* Interrupt Specifier Definition + + Interrupt specifiers consists of 2 cells encoded as follows: + + - <1st-cell>: The interrupt-number that identifies the interrupt source. + + - <2nd-cell>: The Linux interrupt flags containing level-sense information, + encoded as follows: + 1 = edge triggered + 4 = level-sensitive + +* Examples + +Example 1: + + /* + * Meta external trigger block + */ + intc: intc { + // This is an interrupt controller node. + interrupt-controller; + + // No address cells so that 'interrupt-map' nodes which + // reference this interrupt controller node do not need a parent + // address specifier. + #address-cells = <0>; + + // Two cells to encode interrupt sources. + #interrupt-cells = <2>; + + // Number of interrupt banks + num-banks = <2>; + + // No HWMASKEXT is available (specify on Chorus2 and Comet ES1) + no-mask; + + // Compatible with Meta hardware trigger block. + compatible = "img,meta-intc"; + }; + +Example 2: + + /* + * An interrupt generating device that is wired to a Meta external + * trigger block. + */ + uart1: uart@0x02004c00 { + // Interrupt source '5' that is level-sensitive. + // Note that there are only two cells as specified in the + // interrupt parent's '#interrupt-cells' property. + interrupts = <5 4 /* level */>; + + // The interrupt controller that this device is wired to. + interrupt-parent = <&intc>; + }; diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 3a54fca730c0..4609e81dbc37 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -978,6 +978,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted. If specified, z/VM IUCV HVC accepts connections from listed z/VM user IDs only. + hwthread_map= [METAG] Comma-separated list of Linux cpu id to + hardware thread id mappings. + Format: <cpu>:<hwthread> + keep_bootcon [KNL] Do not unregister boot console at start. This is only useful for debugging when something happens in the window diff --git a/Documentation/metag/00-INDEX b/Documentation/metag/00-INDEX new file mode 100644 index 000000000000..db11c513bd5c --- /dev/null +++ b/Documentation/metag/00-INDEX @@ -0,0 +1,4 @@ +00-INDEX + - this file +kernel-ABI.txt + - Documents metag ABI details diff --git a/Documentation/metag/kernel-ABI.txt b/Documentation/metag/kernel-ABI.txt new file mode 100644 index 000000000000..7b8dee83b9c1 --- /dev/null +++ b/Documentation/metag/kernel-ABI.txt @@ -0,0 +1,256 @@ + ========================== + KERNEL ABIS FOR METAG ARCH + ========================== + +This document describes the Linux ABIs for the metag architecture, and has the +following sections: + + (*) Outline of registers + (*) Userland registers + (*) Kernel registers + (*) System call ABI + (*) Calling conventions + + +==================== +OUTLINE OF REGISTERS +==================== + +The main Meta core registers are arranged in units: + + UNIT Type DESCRIPTION GP EXT PRIV GLOBAL + ======= ======= =============== ======= ======= ======= ======= + CT Special Control unit + D0 General Data unit 0 0-7 8-15 16-31 16-31 + D1 General Data unit 1 0-7 8-15 16-31 16-31 + A0 General Address unit 0 0-3 4-7 8-15 8-15 + A1 General Address unit 1 0-3 4-7 8-15 8-15 + PC Special PC unit 0 1 + PORT Special Ports + TR Special Trigger unit 0-7 + TT Special Trace unit 0-5 + FX General FP unit 0-15 + +GP registers form part of the main context. + +Extended context registers (EXT) may not be present on all hardware threads and +can be context switched if support is enabled and the appropriate bits are set +in e.g. the D0.8 register to indicate what extended state to preserve. + +Global registers are shared between threads and are privilege protected. + +See arch/metag/include/asm/metag_regs.h for definitions relating to core +registers and the fields and bits they contain. See the TRMs for further details +about special registers. + +Several special registers are preserved in the main context, these are the +interesting ones: + + REG (ALIAS) PURPOSE + ======================= =============================================== + CT.1 (TXMODE) Processor mode bits (particularly for DSP) + CT.2 (TXSTATUS) Condition flags and LSM_STEP (MGET/MSET step) + CT.3 (TXRPT) Branch repeat counter + PC.0 (PC) Program counter + +Some of the general registers have special purposes in the ABI and therefore +have aliases: + + D0 REG (ALIAS) PURPOSE D1 REG (ALIAS) PURPOSE + =============== =============== =============== ======================= + D0.0 (D0Re0) 32bit result D1.0 (D1Re0) Top half of 64bit result + D0.1 (D0Ar6) Argument 6 D1.1 (D1Ar5) Argument 5 + D0.2 (D0Ar4) Argument 4 D1.2 (D1Ar3) Argument 3 + D0.3 (D0Ar2) Argument 2 D1.3 (D1Ar1) Argument 1 + D0.4 (D0FrT) Frame temp D1.4 (D1RtP) Return pointer + D0.5 Call preserved D1.5 Call preserved + D0.6 Call preserved D1.6 Call preserved + D0.7 Call preserved D1.7 Call preserved + + A0 REG (ALIAS) PURPOSE A1 REG (ALIAS) PURPOSE + =============== =============== =============== ======================= + A0.0 (A0StP) Stack pointer A1.0 (A1GbP) Global base pointer + A0.1 (A0FrP) Frame pointer A1.1 (A1LbP) Local base pointer + A0.2 A1.2 + A0.3 A1.3 + + +================== +USERLAND REGISTERS +================== + +All the general purpose D0, D1, A0, A1 registers are preserved when entering the +kernel (including asynchronous events such as interrupts and timer ticks) except +the following which have special purposes in the ABI: + + REGISTERS WHEN STATUS PURPOSE + =============== ======= =============== =============================== + D0.8 DSP Preserved ECH, determines what extended + DSP state to preserve. + A0.0 (A0StP) ALWAYS Preserved Stack >= A0StP may be clobbered + at any time by the creation of a + signal frame. + A1.0 (A1GbP) SMP Clobbered Used as temporary for loading + kernel stack pointer and saving + core context. + A0.15 !SMP Protected Stores kernel stack pointer. + A1.15 ALWAYS Protected Stores kernel base pointer. + +On UP A0.15 is used to store the kernel stack pointer for storing the userland +context. A0.15 is global between hardware threads though which means it cannot +be used on SMP for this purpose. Since no protected local registers are +available A1GbP is reserved for use as a temporary to allow a percpu stack +pointer to be loaded for storing the rest of the context. + + +================ +KERNEL REGISTERS +================ + +When in the kernel the following registers have special purposes in the ABI: + + REGISTERS WHEN STATUS PURPOSE + =============== ======= =============== =============================== + A0.0 (A0StP) ALWAYS Preserved Stack >= A0StP may be clobbered + at any time by the creation of + an irq signal frame. + A1.0 (A1GbP) ALWAYS Preserved Reserved (kernel base pointer). + + +=============== +SYSTEM CALL ABI +=============== + +When a system call is made, the following registers are effective: + + REGISTERS CALL RETURN + =============== ======================= =============================== + D0.0 (D0Re0) Return value (or -errno) + D1.0 (D1Re0) System call number Clobbered + D0.1 (D0Ar6) Syscall arg #6 Preserved + D1.1 (D1Ar5) Syscall arg #5 Preserved + D0.2 (D0Ar4) Syscall arg #4 Preserved + D1.2 (D1Ar3) Syscall arg #3 Preserved + D0.3 (D0Ar2) Syscall arg #2 Preserved + D1.3 (D1Ar1) Syscall arg #1 Preserved + +Due to the limited number of argument registers and some system calls with badly +aligned 64-bit arguments, 64-bit values are always packed in consecutive +arguments, even if this is contrary to the normal calling conventions (where the +two halves would go in a matching pair of data registers). + +For example fadvise64_64 usually has the signature: + + long sys_fadvise64_64(i32 fd, i64 offs, i64 len, i32 advice); + +But for metag fadvise64_64 is wrapped so that the 64-bit arguments are packed: + + long sys_fadvise64_64_metag(i32 fd, i32 offs_lo, + i32 offs_hi, i32 len_lo, + i32 len_hi, i32 advice) + +So the arguments are packed in the registers like this: + + D0 REG (ALIAS) VALUE D1 REG (ALIAS) VALUE + =============== =============== =============== ======================= + D0.1 (D0Ar6) advice D1.1 (D1Ar5) hi(len) + D0.2 (D0Ar4) lo(len) D1.2 (D1Ar3) hi(offs) + D0.3 (D0Ar2) lo(offs) D1.3 (D1Ar1) fd + + +=================== +CALLING CONVENTIONS +=================== + +These calling conventions apply to both user and kernel code. The stack grows +from low addresses to high addresses in the metag ABI. The stack pointer (A0StP) +should always point to the next free address on the stack and should at all +times be 64-bit aligned. The following registers are effective at the point of a +call: + + REGISTERS CALL RETURN + =============== ======================= =============================== + D0.0 (D0Re0) 32bit return value + D1.0 (D1Re0) Upper half of 64bit return value + D0.1 (D0Ar6) 32bit argument #6 Clobbered + D1.1 (D1Ar5) 32bit argument #5 Clobbered + D0.2 (D0Ar4) 32bit argument #4 Clobbered + D1.2 (D1Ar3) 32bit argument #3 Clobbered + D0.3 (D0Ar2) 32bit argument #2 Clobbered + D1.3 (D1Ar1) 32bit argument #1 Clobbered + D0.4 (D0FrT) Clobbered + D1.4 (D1RtP) Return pointer Clobbered + D{0-1}.{5-7} Preserved + A0.0 (A0StP) Stack pointer Preserved + A1.0 (A0GbP) Preserved + A0.1 (A0FrP) Frame pointer Preserved + A1.1 (A0LbP) Preserved + A{0-1},{2-3} Clobbered + +64-bit arguments are placed in matching pairs of registers (i.e. the same +register number in both D0 and D1 units), with the least significant half in D0 +and the most significant half in D1, leaving a gap where necessary. Futher +arguments are stored on the stack in reverse order (earlier arguments at higher +addresses): + + ADDRESS 0 1 2 3 4 5 6 7 + =============== ===== ===== ===== ===== ===== ===== ===== ===== + A0StP --> + A0StP-0x08 32bit argument #8 32bit argument #7 + A0StP-0x10 32bit argument #10 32bit argument #9 + +Function prologues tend to look a bit like this: + + /* If frame pointer in use, move it to frame temp register so it can be + easily pushed onto stack */ + MOV D0FrT,A0FrP + + /* If frame pointer in use, set it to stack pointer */ + ADD A0FrP,A0StP,#0 + + /* Preserve D0FrT, D1RtP, D{0-1}.{5-7} on stack, incrementing A0StP */ + MSETL [A0StP++],D0FrT,D0.5,D0.6,D0.7 + + /* Allocate some stack space for local variables */ + ADD A0StP,A0StP,#0x10 + +At this point the stack would look like this: + + ADDRESS 0 1 2 3 4 5 6 7 + =============== ===== ===== ===== ===== ===== ===== ===== ===== + A0StP --> + A0StP-0x08 + A0StP-0x10 + A0StP-0x18 Old D0.7 Old D1.7 + A0StP-0x20 Old D0.6 Old D1.6 + A0StP-0x28 Old D0.5 Old D1.5 + A0FrP --> Old A0FrP (frame ptr) Old D1RtP (return ptr) + A0FrP-0x08 32bit argument #8 32bit argument #7 + A0FrP-0x10 32bit argument #10 32bit argument #9 + +Function epilogues tend to differ depending on the use of a frame pointer. An +example of a frame pointer epilogue: + + /* Restore D0FrT, D1RtP, D{0-1}.{5-7} from stack, incrementing A0FrP */ + MGETL D0FrT,D0.5,D0.6,D0.7,[A0FrP++] + /* Restore stack pointer to where frame pointer was before increment */ + SUB A0StP,A0FrP,#0x20 + /* Restore frame pointer from frame temp */ + MOV A0FrP,D0FrT + /* Return to caller via restored return pointer */ + MOV PC,D1RtP + +If the function hasn't touched the frame pointer, MGETL cannot be safely used +with A0StP as it always increments and that would expose the stack to clobbering +by interrupts (kernel) or signals (user). Therefore it's common to see the MGETL +split into separate GETL instructions: + + /* Restore D0FrT, D1RtP, D{0-1}.{5-7} from stack */ + GETL D0FrT,D1RtP,[A0StP+#-0x30] + GETL D0.5,D1.5,[A0StP+#-0x28] + GETL D0.6,D1.6,[A0StP+#-0x20] + GETL D0.7,D1.7,[A0StP+#-0x18] + /* Restore stack pointer */ + SUB A0StP,A0StP,#0x30 + /* Return to caller via restored return pointer */ + MOV PC,D1RtP diff --git a/MAINTAINERS b/MAINTAINERS index aea0adf414dc..e95b1e944eb7 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5204,6 +5204,18 @@ F: drivers/mtd/ F: include/linux/mtd/ F: include/uapi/mtd/ +METAG ARCHITECTURE +M: James Hogan <james.hogan@imgtec.com> +S: Supported +F: arch/metag/ +F: Documentation/metag/ +F: Documentation/devicetree/bindings/metag/ +F: drivers/clocksource/metag_generic.c +F: drivers/irqchip/irq-metag.c +F: drivers/irqchip/irq-metag-ext.c +F: drivers/tty/metag_da.c +F: fs/imgdafs/ + MICROBLAZE ARCHITECTURE M: Michal Simek <monstr@monstr.eu> L: microblaze-uclinux@itee.uq.edu.au (moderated for non-subscribers) diff --git a/arch/Kconfig b/arch/Kconfig index dcd91a85536a..5a1779c93940 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -103,6 +103,22 @@ config UPROBES If in doubt, say "N". +config HAVE_64BIT_ALIGNED_ACCESS + def_bool 64BIT && !HAVE_EFFICIENT_UNALIGNED_ACCESS + help + Some architectures require 64 bit accesses to be 64 bit + aligned, which also requires structs containing 64 bit values + to be 64 bit aligned too. This includes some 32 bit + architectures which can do 64 bit accesses, as well as 64 bit + architectures without unaligned access. + + This symbol should be selected by an architecture if 64 bit + accesses are required to be 64 bit aligned in this way even + though it is not a 64 bit architecture. + + See Documentation/unaligned-memory-access.txt for more + information on the topic of unaligned memory accesses. + config HAVE_EFFICIENT_UNALIGNED_ACCESS bool help diff --git a/arch/metag/Kconfig b/arch/metag/Kconfig new file mode 100644 index 000000000000..afc8973d1488 --- /dev/null +++ b/arch/metag/Kconfig @@ -0,0 +1,290 @@ +config SYMBOL_PREFIX + string + default "_" + +config METAG + def_bool y + select EMBEDDED + select GENERIC_ATOMIC64 + select GENERIC_CLOCKEVENTS + select GENERIC_IRQ_SHOW + select GENERIC_SMP_IDLE_THREAD + select HAVE_64BIT_ALIGNED_ACCESS + select HAVE_ARCH_TRACEHOOK + select HAVE_C_RECORDMCOUNT + select HAVE_DEBUG_KMEMLEAK + select HAVE_DYNAMIC_FTRACE + select HAVE_FTRACE_MCOUNT_RECORD + select HAVE_FUNCTION_TRACER + select HAVE_FUNCTION_TRACE_MCOUNT_TEST + select HAVE_GENERIC_HARDIRQS + select HAVE_KERNEL_BZIP2 + select HAVE_KERNEL_GZIP + select HAVE_KERNEL_LZO + select HAVE_KERNEL_XZ + select HAVE_MEMBLOCK + select HAVE_MEMBLOCK_NODE_MAP + select HAVE_MOD_ARCH_SPECIFIC + select HAVE_PERF_EVENTS + select HAVE_SYSCALL_TRACEPOINTS + select IRQ_DOMAIN + select MODULES_USE_ELF_RELA + select OF + select OF_EARLY_FLATTREE + select SPARSE_IRQ + +config STACKTRACE_SUPPORT + def_bool y + +config LOCKDEP_SUPPORT + def_bool y + +config HAVE_LATENCYTOP_SUPPORT + def_bool y + +config RWSEM_GENERIC_SPINLOCK + def_bool y + +config RWSEM_XCHGADD_ALGORITHM + bool + +config GENERIC_HWEIGHT + def_bool y + +config GENERIC_CALIBRATE_DELAY + def_bool y + +config GENERIC_GPIO + def_bool n + +config NO_IOPORT + def_bool y + +source "init/Kconfig" + +source "kernel/Kconfig.freezer" + +menu "Processor type and features" + +config MMU + def_bool y + +config STACK_GROWSUP + def_bool y + +config HOTPLUG_CPU + bool "Enable CPU hotplug support" + depends on SMP + help + Say Y here to allow turning CPUs off and on. CPUs can be + controlled through /sys/devices/system/cpu. + + Say N if you want to disable CPU hotplug. + +config HIGHMEM + bool "High Memory Support" + help + The address space of Meta processors is only 4 Gigabytes large + and it has to accommodate user address space, kernel address + space as well as some memory mapped IO. That means that, if you + have a large amount of physical memory and/or IO, not all of the + memory can be "permanently mapped" by the kernel. The physical + memory that is not permanently mapped is called "high memory". + + Depending on the selected kernel/user memory split, minimum + vmalloc space and actual amount of RAM, you may not need this + option which should result in a slightly faster kernel. + + If unsure, say n. + +source "arch/metag/mm/Kconfig" + +source "arch/metag/Kconfig.soc" + +config METAG_META12 + bool + help + Select this from the SoC config symbol to indicate that it contains a + Meta 1.2 core. + +config METAG_META21 + bool + help + Select this from the SoC config symbol to indicate that it contains a + Meta 2.1 core. + +config SMP + bool "Symmetric multi-processing support" + depends on METAG_META21 && METAG_META21_MMU + select USE_GENERIC_SMP_HELPERS + help + This enables support for systems with more than one thread running + Linux. If you have a system with only one thread running Linux, + say N. Otherwise, say Y. + +config NR_CPUS + int "Maximum number of CPUs (2-4)" if SMP + range 2 4 if SMP + default "1" if !SMP + default "4" if SMP + +config METAG_SMP_WRITE_REORDERING + bool + help + This attempts to prevent cache-memory incoherence due to external + reordering of writes from different hardware threads when SMP is + enabled. It adds fences (system event 0) to smp_mb and smp_rmb in an + attempt to catch some of the cases, and also before writes to shared + memory in LOCK1 protected atomics and spinlocks. + This will not completely prevent cache incoherency on affected cores. + +config METAG_LNKGET_AROUND_CACHE + bool + depends on METAG_META21 + help + This indicates that the LNKGET/LNKSET instructions go around the + cache, which requires some extra cache flushes when the memory needs + to be accessed by normal GET/SET instructions too. + +choice + prompt "Atomicity primitive" + default METAG_ATOMICITY_LNKGET + help + This option selects the mechanism for performing atomic operations. + +config METAG_ATOMICITY_IRQSOFF + depends on !SMP + bool "irqsoff" + help + This option disables interrupts to achieve atomicity. This mechanism + is not SMP-safe. + +config METAG_ATOMICITY_LNKGET + depends on METAG_META21 + bool "lnkget/lnkset" + help + This option uses the LNKGET and LNKSET instructions to achieve + atomicity. LNKGET/LNKSET are load-link/store-conditional instructions. + Choose this option if your system requires low latency. + +config METAG_ATOMICITY_LOCK1 + depends on SMP + bool "lock1" + help + This option uses the LOCK1 instruction for atomicity. This is mainly + provided as a debugging aid if the lnkget/lnkset atomicity primitive + isn't working properly. + +endchoice + +config METAG_FPU + bool "FPU Support" + depends on METAG_META21 + default y + help + This option allows processes to use FPU hardware available with this + CPU. If this option is not enabled FPU registers will not be saved + and restored on context-switch. + + If you plan on running programs which are compiled to use hard floats + say Y here. + +config METAG_DSP + bool "DSP Support" + help + This option allows processes to use DSP hardware available + with this CPU. If this option is not enabled DSP registers + will not be saved and restored on context-switch. + + If you plan on running DSP programs say Y here. + +config METAG_PERFCOUNTER_IRQS + bool "PerfCounters interrupt support" + depends on METAG_META21 + help + This option enables using interrupts to collect information from + Performance Counters. This option is supported in new META21 + (starting from HTP265). + + When disabled, Performance Counters information will be collected + based on Timer Interrupt. + +config METAG_DA + bool "DA support" + help + Say Y if you plan to use a DA debug adapter with Linux. The presence + of the DA will be detected automatically at boot, so it is safe to say + Y to this option even when booting without a DA. + + This enables support for services provided by DA JTAG debug adapters, + such as: + - communication over DA channels (such as the console driver). + - use of the DA filesystem. + +menu "Boot options" + +config METAG_BUILTIN_DTB + bool "Embed DTB in kernel image" + default y + help + Embeds a device tree binary in the kernel image. + +config METAG_BUILTIN_DTB_NAME + string "Built in DTB" + depends on METAG_BUILTIN_DTB + help + Set the name of the DTB to embed (leave blank to pick one + automatically based on kernel configuration). + +config CMDLINE_BOOL + bool "Default bootloader kernel arguments" + +config CMDLINE + string "Kernel command line" + depends on CMDLINE_BOOL + help + On some architectures there is currently no way for the boot loader + to pass arguments to the kernel. For these architectures, you should + supply some command-line options at build time by entering them + here. + +config CMDLINE_FORCE + bool "Force default kernel command string" + depends on CMDLINE_BOOL + help + Set this to have arguments from the default kernel command string + override those passed by the boot loader. + +endmenu + +source "kernel/Kconfig.preempt" + +source kernel/Kconfig.hz + +endmenu + +menu "Power management options" + +source kernel/power/Kconfig + +endmenu + +menu "Executable file formats" + +source "fs/Kconfig.binfmt" + +endmenu + +source "net/Kconfig" + +source "drivers/Kconfig" + +source "fs/Kconfig" + +source "arch/metag/Kconfig.debug" + +source "security/Kconfig" + +source "crypto/Kconfig" + +source "lib/Kconfig" diff --git a/arch/metag/Kconfig.debug b/arch/metag/Kconfig.debug new file mode 100644 index 000000000000..e45bbf6a7a5d --- /dev/null +++ b/arch/metag/Kconfig.debug @@ -0,0 +1,40 @@ +menu "Kernel hacking" + +config TRACE_IRQFLAGS_SUPPORT + bool + default y + +source "lib/Kconfig.debug" + +config DEBUG_STACKOVERFLOW + bool "Check for stack overflows" + depends on DEBUG_KERNEL + help + This option will cause messages to be printed if free stack space + drops below a certain limit. + +config 4KSTACKS + bool "Use 4Kb for kernel stacks instead of 8Kb" + depends on DEBUG_KERNEL + help + If you say Y here the kernel will use a 4Kb stacksize for the + kernel stack attached to each process/thread. This facilitates + running more threads on a system and also reduces the pressure + on the VM subsystem for higher order allocations. This option + will also use IRQ stacks to compensate for the reduced stackspace. + +config METAG_FUNCTION_TRACE + bool "Output Meta real-time trace data for function entry/exit" + help + If you say Y here the kernel will use the Meta hardware trace + unit to output information about function entry and exit that + can be used by a debugger for profiling and call-graphs. + +config METAG_POISON_CATCH_BUFFERS + bool "Poison catch buffer contents on kernel entry" + help + If you say Y here the kernel will write poison data to the + catch buffer registers on kernel entry. This will make any + problem with catch buffer handling much more apparent. + +endmenu diff --git a/arch/metag/Kconfig.soc b/arch/metag/Kconfig.soc new file mode 100644 index 000000000000..ec079cfb7c6a --- /dev/null +++ b/arch/metag/Kconfig.soc @@ -0,0 +1,55 @@ +choice + prompt "SoC Type" + default META21_FPGA + +config META12_FPGA + bool "Meta 1.2 FPGA" + select METAG_META12 + help + This is a Meta 1.2 FPGA bitstream, just a bare CPU. + +config META21_FPGA + bool "Meta 2.1 FPGA" + select METAG_META21 + help + This is a Meta 2.1 FPGA bitstream, just a bare CPU. + +endchoice + +menu "SoC configuration" + +if METAG_META21 + +# Meta 2.x specific options + +config METAG_META21_MMU + bool "Meta 2.x MMU mode" + default y + help + Use the Meta 2.x MMU in extended mode. + +config METAG_UNALIGNED + bool "Meta 2.x unaligned access checking" + default y + help + All memory accesses will be checked for alignment and an exception + raised on unaligned accesses. This feature does cost performance + but without it there will be no notification of this type of error. + +config METAG_USER_TCM + bool "Meta on-chip memory support for userland" + select GENERIC_ALLOCATOR + default y + help + Allow the on-chip memories of Meta SoCs to be used by user + applications. + +endif + +config METAG_HALT_ON_PANIC + bool "Halt the core on panic" + help + Halt the core when a panic occurs. This is useful when running + pre-production silicon or in an FPGA environment. + +endmenu diff --git a/arch/metag/Makefile b/arch/metag/Makefile new file mode 100644 index 000000000000..81bd6a1c7483 --- /dev/null +++ b/arch/metag/Makefile @@ -0,0 +1,87 @@ +# +# metag/Makefile +# +# This file is included by the global makefile so that you can add your own +# architecture-specific flags and dependencies. Remember to do have actions +# for "archclean" cleaning up for this architecture. +# +# This file is subject to the terms and conditions of the GNU General Public +# License. See the file "COPYING" in the main directory of this archive +# for more details. +# +# Copyright (C) 1994 by Linus Torvalds +# 2007,2008,2012 by Imagination Technologies Ltd. +# + +LDFLAGS := +OBJCOPYFLAGS := -O binary -R .note -R .comment -S + +checkflags-$(CONFIG_METAG_META12) += -DMETAC_1_2 +checkflags-$(CONFIG_METAG_META21) += -DMETAC_2_1 +CHECKFLAGS += -D__metag__ $(checkflags-y) + +KBUILD_DEFCONFIG := meta2_defconfig + +sflags-$(CONFIG_METAG_META12) += -mmetac=1.2 +ifeq ($(CONFIG_METAG_META12),y) +# Only use TBI API 1.4 if DSP is enabled for META12 cores +sflags-$(CONFIG_METAG_DSP) += -DTBI_1_4 +endif +sflags-$(CONFIG_METAG_META21) += -mmetac=2.1 -DTBI_1_4 + +cflags-$(CONFIG_METAG_FUNCTION_TRACE) += -mhwtrace-leaf -mhwtrace-retpc +cflags-$(CONFIG_METAG_META21) += -mextensions=bex + +KBUILD_CFLAGS += -pipe +KBUILD_CFLAGS += -ffunction-sections + +KBUILD_CFLAGS += $(sflags-y) $(cflags-y) +KBUILD_AFLAGS += $(sflags-y) + +LDFLAGS_vmlinux := $(ldflags-y) + +head-y := arch/metag/kernel/head.o + +core-y += arch/metag/boot/dts/ +core-y += arch/metag/kernel/ +core-y += arch/metag/mm/ + +libs-y += arch/metag/lib/ +libs-y += arch/metag/tbx/ + +boot := arch/metag/boot + +boot_targets += uImage +boot_targets += uImage.gz +boot_targets += uImage.bz2 +boot_targets += uImage.xz +boot_targets += uImage.lzo +boot_targets += uImage.bin +boot_targets += vmlinux.bin + +PHONY += $(boot_targets) + +all: vmlinux.bin + +$(boot_targets): vmlinux + $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ + +%.dtb %.dtb.S %.dtb.o: scripts + $(Q)$(MAKE) $(build)=$(boot)/dts $(boot)/dts/$@ + +dtbs: scripts + $(Q)$(MAKE) $(build)=$(boot)/dts dtbs + +archclean: + $(Q)$(MAKE) $(clean)=$(boot) + +define archhelp + echo '* vmlinux.bin - Binary kernel image (arch/$(ARCH)/boot/vmlinux.bin)' + @echo ' uImage - Alias to bootable U-Boot image' + @echo ' uImage.bin - Kernel-only image for U-Boot (bin)' + @echo ' uImage.gz - Kernel-only image for U-Boot (gzip)' + @echo ' uImage.bz2 - Kernel-only image for U-Boot (bzip2)' + @echo ' uImage.xz - Kernel-only image for U-Boot (xz)' + @echo ' uImage.lzo - Kernel-only image for U-Boot (lzo)' + @echo ' dtbs - Build device tree blobs for enabled boards' +endef diff --git a/arch/metag/boot/.gitignore b/arch/metag/boot/.gitignore new file mode 100644 index 000000000000..a021da201156 --- /dev/null +++ b/arch/metag/boot/.gitignore @@ -0,0 +1,4 @@ +vmlinux* +uImage* +ramdisk.* +*.dtb diff --git a/arch/metag/boot/Makefile b/arch/metag/boot/Makefile new file mode 100644 index 000000000000..5a1f88cf91e3 --- /dev/null +++ b/arch/metag/boot/Makefile @@ -0,0 +1,68 @@ +# +# This file is subject to the terms and conditions of the GNU General Public +# License. See the file "COPYING" in the main directory of this archive +# for more details. +# +# Copyright (C) 2007,2012 Imagination Technologies Ltd. +# + +suffix-y := bin +suffix-$(CONFIG_KERNEL_GZIP) := gz +suffix-$(CONFIG_KERNEL_BZIP2) := bz2 +suffix-$(CONFIG_KERNEL_XZ) := xz +suffix-$(CONFIG_KERNEL_LZO) := lzo + +targets += vmlinux.bin +targets += uImage +targets += uImage.gz +targets += uImage.bz2 +targets += uImage.xz +targets += uImage.lzo +targets += uImage.bin + +extra-y += vmlinux.bin +extra-y += vmlinux.bin.gz +extra-y += vmlinux.bin.bz2 +extra-y += vmlinux.bin.xz +extra-y += vmlinux.bin.lzo + +UIMAGE_LOADADDR = $(CONFIG_PAGE_OFFSET) + +ifeq ($(CONFIG_FUNCTION_TRACER),y) +orig_cflags := $(KBUILD_CFLAGS) +KBUILD_CFLAGS = $(subst -pg, , $(orig_cflags)) +endif + +$(obj)/vmlinux.bin: vmlinux FORCE + $(call if_changed,objcopy) + +$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE + $(call if_changed,gzip) + +$(obj)/vmlinux.bin.bz2: $(obj)/vmlinux.bin FORCE + $(call if_changed,bzip2) + +$(obj)/vmlinux.bin.xz: $(obj)/vmlinux.bin FORCE + $(call if_changed,xzkern) + +$(obj)/vmlinux.bin.lzo: $(obj)/vmlinux.bin FORCE + $(call if_changed,lzo) + +$(obj)/uImage.gz: $(obj)/vmlinux.bin.gz FORCE + $(call if_changed,uimage,gzip) + +$(obj)/uImage.bz2: $(obj)/vmlinux.bin.bz2 FORCE + $(call if_changed,uimage,bzip2) + +$(obj)/uImage.xz: $(obj)/vmlinux.bin.xz FORCE + $(call if_changed,uimage,xz) + +$(obj)/uImage.lzo: $(obj)/vmlinux.bin.lzo FORCE + $(call if_changed,uimage,lzo) + +$(obj)/uImage.bin: $(obj)/vmlinux.bin FORCE + $(call if_changed,uimage,none) + +$(obj)/uImage: $(obj)/uImage.$(suffix-y) + @ln -sf $(notdir $<) $@ + @echo ' Image $@ is ready' diff --git a/arch/metag/boot/dts/Makefile b/arch/metag/boot/dts/Makefile new file mode 100644 index 000000000000..e0b5afd8bde8 --- /dev/null +++ b/arch/metag/boot/dts/Makefile @@ -0,0 +1,16 @@ +dtb-y += skeleton.dtb + +# Built-in dtb +builtindtb-y := skeleton + +ifneq ($(CONFIG_METAG_BUILTIN_DTB_NAME),"") + builtindtb-y := $(CONFIG_METAG_BUILTIN_DTB_NAME) +endif +obj-$(CONFIG_METAG_BUILTIN_DTB) += $(patsubst "%",%,$(builtindtb-y)).dtb.o + +targets += dtbs +targets += $(dtb-y) + +dtbs: $(addprefix $(obj)/, $(dtb-y)) + +clean-files += *.dtb diff --git a/arch/metag/boot/dts/skeleton.dts b/arch/metag/boot/dts/skeleton.dts new file mode 100644 index 000000000000..7244d1f0d555 --- /dev/null +++ b/arch/metag/boot/dts/skeleton.dts @@ -0,0 +1,10 @@ +/* + * Copyright (C) 2012 Imagination Technologies Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +/dts-v1/; + +/include/ "skeleton.dtsi" diff --git a/arch/metag/boot/dts/skeleton.dtsi b/arch/metag/boot/dts/skeleton.dtsi new file mode 100644 index 000000000000..78229eacced7 --- /dev/null +++ b/arch/metag/boot/dts/skeleton.dtsi @@ -0,0 +1,14 @@ +/* + * Skeleton device tree; the bare minimum needed to boot; just include and + * add a compatible value. The bootloader will typically populate the memory + * node. + */ + +/ { + compatible = "img,meta"; + #address-cells = <1>; + #size-cells = <1>; + chosen { }; + aliases { }; + memory { device_type = "memory"; reg = <0 0>; }; +}; diff --git a/arch/metag/configs/meta1_defconfig b/arch/metag/configs/meta1_defconfig new file mode 100644 index 000000000000..c35a75e8ecfe --- /dev/null +++ b/arch/metag/configs/meta1_defconfig @@ -0,0 +1,40 @@ +# CONFIG_LOCALVERSION_AUTO is not set +# CONFIG_SWAP is not set +CONFIG_LOG_BUF_SHIFT=13 +CONFIG_SYSFS_DEPRECATED=y +CONFIG_SYSFS_DEPRECATED_V2=y +CONFIG_KALLSYMS_ALL=y +# CONFIG_ELF_CORE is not set +CONFIG_SLAB=y +# CONFIG_BLK_DEV_BSG is not set +CONFIG_PARTITION_ADVANCED=y +# CONFIG_MSDOS_PARTITION is not set +# CONFIG_IOSCHED_DEADLINE is not set +# CONFIG_IOSCHED_CFQ is not set +CONFIG_FLATMEM_MANUAL=y +CONFIG_META12_FPGA=y +CONFIG_METAG_DA=y +CONFIG_HZ_100=y +CONFIG_DEVTMPFS=y +CONFIG_DEVTMPFS_MOUNT=y +# CONFIG_STANDALONE is not set +# CONFIG_PREVENT_FIRMWARE_BUILD is not set +# CONFIG_FW_LOADER is not set +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_COUNT=1 +CONFIG_BLK_DEV_RAM_SIZE=16384 +# CONFIG_INPUT is not set +# CONFIG_SERIO is not set +# CONFIG_VT is not set +# CONFIG_LEGACY_PTYS is not set +CONFIG_DA_TTY=y +CONFIG_DA_CONSOLE=y +# CONFIG_DEVKMEM is not set +# CONFIG_HW_RANDOM is not set +# CONFIG_HWMON is not set +# CONFIG_USB_SUPPORT is not set +# CONFIG_DNOTIFY is not set +CONFIG_TMPFS=y +# CONFIG_MISC_FILESYSTEMS is not set +# CONFIG_SCHED_DEBUG is not set +CONFIG_DEBUG_INFO=y diff --git a/arch/metag/configs/meta2_defconfig b/arch/metag/configs/meta2_defconfig new file mode 100644 index 000000000000..fb3148410183 --- /dev/null +++ b/arch/metag/configs/meta2_defconfig @@ -0,0 +1,41 @@ +# CONFIG_LOCALVERSION_AUTO is not set +# CONFIG_SWAP is not set +CONFIG_SYSVIPC=y +CONFIG_LOG_BUF_SHIFT=13 +CONFIG_SYSFS_DEPRECATED=y +CONFIG_SYSFS_DEPRECATED_V2=y +CONFIG_KALLSYMS_ALL=y +# CONFIG_ELF_CORE is not set +CONFIG_SLAB=y +# CONFIG_BLK_DEV_BSG is not set +CONFIG_PARTITION_ADVANCED=y +# CONFIG_MSDOS_PARTITION is not set +# CONFIG_IOSCHED_DEADLINE is not set +# CONFIG_IOSCHED_CFQ is not set +CONFIG_METAG_L2C=y +CONFIG_FLATMEM_MANUAL=y +CONFIG_METAG_HALT_ON_PANIC=y +CONFIG_METAG_DA=y +CONFIG_HZ_100=y +CONFIG_DEVTMPFS=y +# CONFIG_STANDALONE is not set +# CONFIG_PREVENT_FIRMWARE_BUILD is not set +# CONFIG_FW_LOADER is not set +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_COUNT=1 +CONFIG_BLK_DEV_RAM_SIZE=16384 +# CONFIG_INPUT is not set +# CONFIG_SERIO is not set +# CONFIG_VT is not set +# CONFIG_LEGACY_PTYS is not set +CONFIG_DA_TTY=y +CONFIG_DA_CONSOLE=y +# CONFIG_DEVKMEM is not set +# CONFIG_HW_RANDOM is not set +# CONFIG_HWMON is not set +# CONFIG_USB_SUPPORT is not set +# CONFIG_DNOTIFY is not set +CONFIG_TMPFS=y +# CONFIG_MISC_FILESYSTEMS is not set +# CONFIG_SCHED_DEBUG is not set +CONFIG_DEBUG_INFO=y diff --git a/arch/metag/configs/meta2_smp_defconfig b/arch/metag/configs/meta2_smp_defconfig new file mode 100644 index 000000000000..6c7b777ac276 --- /dev/null +++ b/arch/metag/configs/meta2_smp_defconfig @@ -0,0 +1,42 @@ +# CONFIG_LOCALVERSION_AUTO is not set +# CONFIG_SWAP is not set +CONFIG_SYSVIPC=y +CONFIG_LOG_BUF_SHIFT=13 +CONFIG_SYSFS_DEPRECATED=y +CONFIG_SYSFS_DEPRECATED_V2=y +CONFIG_KALLSYMS_ALL=y +# CONFIG_ELF_CORE is not set +CONFIG_SLAB=y +# CONFIG_BLK_DEV_BSG is not set +CONFIG_PARTITION_ADVANCED=y +# CONFIG_MSDOS_PARTITION is not set +# CONFIG_IOSCHED_DEADLINE is not set +# CONFIG_IOSCHED_CFQ is not set +CONFIG_METAG_L2C=y +CONFIG_FLATMEM_MANUAL=y +CONFIG_METAG_HALT_ON_PANIC=y +CONFIG_SMP=y +CONFIG_METAG_DA=y +CONFIG_HZ_100=y +CONFIG_DEVTMPFS=y +# CONFIG_STANDALONE is not set +# CONFIG_PREVENT_FIRMWARE_BUILD is not set +# CONFIG_FW_LOADER is not set +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_COUNT=1 +CONFIG_BLK_DEV_RAM_SIZE=16384 +# CONFIG_INPUT is not set +# CONFIG_SERIO is not set +# CONFIG_VT is not set +# CONFIG_LEGACY_PTYS is not set +CONFIG_DA_TTY=y +CONFIG_DA_CONSOLE=y +# CONFIG_DEVKMEM is not set +# CONFIG_HW_RANDOM is not set +# CONFIG_HWMON is not set +# CONFIG_USB_SUPPORT is not set +# CONFIG_DNOTIFY is not set +CONFIG_TMPFS=y +# CONFIG_MISC_FILESYSTEMS is not set +# CONFIG_SCHED_DEBUG is not set +CONFIG_DEBUG_INFO=y diff --git a/arch/metag/include/asm/Kbuild b/arch/metag/include/asm/Kbuild new file mode 100644 index 000000000000..6ae0ccb632cb --- /dev/null +++ b/arch/metag/include/asm/Kbuild @@ -0,0 +1,54 @@ +generic-y += auxvec.h +generic-y += bitsperlong.h +generic-y += bugs.h +generic-y += clkdev.h +generic-y += cputime.h +generic-y += current.h +generic-y += device.h +generic-y += dma.h +generic-y += emergency-restart.h +generic-y += errno.h +generic-y += exec.h +generic-y += fb.h +generic-y += fcntl.h +generic-y += futex.h +generic-y += hardirq.h +generic-y += hw_irq.h +generic-y += ioctl.h +generic-y += ioctls.h +generic-y += ipcbuf.h +generic-y += irq_regs.h +generic-y += kdebug.h +generic-y += kmap_types.h +generic-y += kvm_para.h +generic-y += local.h +generic-y += local64.h +generic-y += msgbuf.h +generic-y += mutex.h +generic-y += param.h +generic-y += pci.h +generic-y += percpu.h +generic-y += poll.h +generic-y += posix_types.h +generic-y += scatterlist.h +generic-y += sections.h +generic-y += sembuf.h +generic-y += serial.h +generic-y += shmbuf.h +generic-y += shmparam.h +generic-y += signal.h +generic-y += socket.h +generic-y += sockios.h +generic-y += stat.h +generic-y += statfs.h +generic-y += switch_to.h +generic-y += termbits.h +generic-y += termios.h +generic-y += timex.h +generic-y += trace_clock.h +generic-y += types.h +generic-y += ucontext.h +generic-y += unaligned.h +generic-y += user.h +generic-y += vga.h +generic-y += xor.h diff --git a/arch/metag/include/asm/atomic.h b/arch/metag/include/asm/atomic.h new file mode 100644 index 000000000000..307ecd2bd9a1 --- /dev/null +++ b/arch/metag/include/asm/atomic.h @@ -0,0 +1,53 @@ +#ifndef __ASM_METAG_ATOMIC_H +#define __ASM_METAG_ATOMIC_H + +#include <linux/compiler.h> +#include <linux/types.h> +#include <asm/cmpxchg.h> + +#if defined(CONFIG_METAG_ATOMICITY_IRQSOFF) +/* The simple UP case. */ +#include <asm-generic/atomic.h> +#else + +#if defined(CONFIG_METAG_ATOMICITY_LOCK1) +#include <asm/atomic_lock1.h> +#else +#include <asm/atomic_lnkget.h> +#endif + +#define atomic_add_negative(a, v) (atomic_add_return((a), (v)) < 0) + +#define atomic_dec_return(v) atomic_sub_return(1, (v)) +#define atomic_inc_return(v) atomic_add_return(1, (v)) + +/* + * atomic_inc_and_test - increment and test + * @v: pointer of type atomic_t + * + * Atomically increments @v by 1 + * and returns true if the result is zero, or false for all + * other cases. + */ +#define atomic_inc_and_test(v) (atomic_inc_return(v) == 0) + +#define atomic_sub_and_test(i, v) (atomic_sub_return((i), (v)) == 0) +#define atomic_dec_and_test(v) (atomic_sub_return(1, (v)) == 0) + +#define atomic_inc(v) atomic_add(1, (v)) +#define atomic_dec(v) atomic_sub(1, (v)) + +#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0) + +#define smp_mb__before_atomic_dec() barrier() +#define smp_mb__after_atomic_dec() barrier() +#define smp_mb__before_atomic_inc() barrier() +#define smp_mb__after_atomic_inc() barrier() + +#endif + +#define atomic_dec_if_positive(v) atomic_sub_if_positive(1, v) + +#include <asm-generic/atomic64.h> + +#endif /* __ASM_METAG_ATOMIC_H */ diff --git a/arch/metag/include/asm/atomic_lnkget.h b/arch/metag/include/asm/atomic_lnkget.h new file mode 100644 index 000000000000..d2e60a18986c --- /dev/null +++ b/arch/metag/include/asm/atomic_lnkget.h @@ -0,0 +1,234 @@ +#ifndef __ASM_METAG_ATOMIC_LNKGET_H +#define __ASM_METAG_ATOMIC_LNKGET_H + +#define ATOMIC_INIT(i) { (i) } + +#define atomic_set(v, i) ((v)->counter = (i)) + +#include <linux/compiler.h> + +#include <asm/barrier.h> + +/* + * None of these asm statements clobber memory as LNKSET writes around + * the cache so the memory it modifies cannot safely be read by any means + * other than these accessors. + */ + +static inline int atomic_read(const atomic_t *v) +{ + int temp; + + asm volatile ( + "LNKGETD %0, [%1]\n" + : "=da" (temp) + : "da" (&v->counter)); + + return temp; +} + +static inline void atomic_add(int i, atomic_t *v) +{ + int temp; + + asm volatile ( + "1: LNKGETD %0, [%1]\n" + " ADD %0, %0, %2\n" + " LNKSETD [%1], %0\n" + " DEFR %0, TXSTAT\n" + " ANDT %0, %0, #HI(0x3f000000)\n" + " CMPT %0, #HI(0x02000000)\n" + " BNZ 1b\n" + : "=&d" (temp) + : "da" (&v->counter), "bd" (i) + : "cc"); +} + +static inline void atomic_sub(int i, atomic_t *v) +{ + int temp; + + asm volatile ( + "1: LNKGETD %0, [%1]\n" + " SUB %0, %0, %2\n" + " LNKSETD [%1], %0\n" + " DEFR %0, TXSTAT\n" + " ANDT %0, %0, #HI(0x3f000000)\n" + " CMPT %0, #HI(0x02000000)\n" + " BNZ 1b\n" + : "=&d" (temp) + : "da" (&v->counter), "bd" (i) + : "cc"); +} + +static inline int atomic_add_return(int i, atomic_t *v) +{ + int result, temp; + + smp_mb(); + + asm volatile ( + "1: LNKGETD %1, [%2]\n" + " ADD %1, %1, %3\n" + " LNKSETD [%2], %1\n" + " DEFR %0, TXSTAT\n" + " ANDT %0, %0, #HI(0x3f000000)\n" + " CMPT %0, #HI(0x02000000)\n" + " BNZ 1b\n" + : "=&d" (temp), "=&da" (result) + : "da" (&v->counter), "bd" (i) + : "cc"); + + smp_mb(); + + return result; +} + +static inline int atomic_sub_return(int i, atomic_t *v) +{ + int result, temp; + + smp_mb(); + + asm volatile ( + "1: LNKGETD %1, [%2]\n" + " SUB %1, %1, %3\n" + " LNKSETD [%2], %1\n" + " DEFR %0, TXSTAT\n" + " ANDT %0, %0, #HI(0x3f000000)\n" + " CMPT %0, #HI(0x02000000)\n" + " BNZ 1b\n" + : "=&d" (temp), "=&da" (result) + : "da" (&v->counter), "bd" (i) + : "cc"); + + smp_mb(); + + return result; +} + +static inline void atomic_clear_mask(unsigned int mask, atomic_t *v) +{ + int temp; + + asm volatile ( + "1: LNKGETD %0, [%1]\n" + " AND %0, %0, %2\n" + " LNKSETD [%1] %0\n" + " DEFR %0, TXSTAT\n" + " ANDT %0, %0, #HI(0x3f000000)\n" + " CMPT %0, #HI(0x02000000)\n" + " BNZ 1b\n" + : "=&d" (temp) + : "da" (&v->counter), "bd" (~mask) + : "cc"); +} + +static inline void atomic_set_mask(unsigned int mask, atomic_t *v) +{ + int temp; + + asm volatile ( + "1: LNKGETD %0, [%1]\n" + " OR %0, %0, %2\n" + " LNKSETD [%1], %0\n" + " DEFR %0, TXSTAT\n" + " ANDT %0, %0, #HI(0x3f000000)\n" + " CMPT %0, #HI(0x02000000)\n" + " BNZ 1b\n" + : "=&d" (temp) + : "da" (&v->counter), "bd" (mask) + : "cc"); +} + +static inline int atomic_cmpxchg(atomic_t *v, int old, int new) +{ + int result, temp; + + smp_mb(); + + asm volatile ( + "1: LNKGETD %1, [%2]\n" + " CMP %1, %3\n" + " LNKSETDEQ [%2], %4\n" + " BNE 2f\n" + " DEFR %0, TXSTAT\n" + " ANDT %0, %0, #HI(0x3f000000)\n" + " CMPT %0, #HI(0x02000000)\n" + " BNZ 1b\n" + "2:\n" + : "=&d" (temp), "=&d" (result) + : "da" (&v->counter), "bd" (old), "da" (new) + : "cc"); + + smp_mb(); + + return result; +} + +static inline int atomic_xchg(atomic_t *v, int new) +{ + int temp, old; + + asm volatile ( + "1: LNKGETD %1, [%2]\n" + " LNKSETD [%2], %3\n" + " DEFR %0, TXSTAT\n" + " ANDT %0, %0, #HI(0x3f000000)\n" + " CMPT %0, #HI(0x02000000)\n" + " BNZ 1b\n" + : "=&d" (temp), "=&d" (old) + : "da" (&v->counter), "da" (new) + : "cc"); + + return old; +} + +static inline int __atomic_add_unless(atomic_t *v, int a, int u) +{ + int result, temp; + + smp_mb(); + + asm volatile ( + "1: LNKGETD %1, [%2]\n" + " CMP %1, %3\n" + " ADD %0, %1, %4\n" + " LNKSETDNE [%2], %0\n" + " BEQ 2f\n" + " DEFR %0, TXSTAT\n" + " ANDT %0, %0, #HI(0x3f000000)\n" + " CMPT %0, #HI(0x02000000)\n" + " BNZ 1b\n" + "2:\n" + : "=&d" (temp), "=&d" (result) + : "da" (&v->counter), "bd" (u), "bd" (a) + : "cc"); + + smp_mb(); + + return result; +} + +static inline int atomic_sub_if_positive(int i, atomic_t *v) +{ + int result, temp; + + asm volatile ( + "1: LNKGETD %1, [%2]\n" + " SUBS %1, %1, %3\n" + " LNKSETDGE [%2], %1\n" + " BLT 2f\n" + " DEFR %0, TXSTAT\n" + " ANDT %0, %0, #HI(0x3f000000)\n" + " CMPT %0, #HI(0x02000000)\n" + " BNZ 1b\n" + "2:\n" + : "=&d" (temp), "=&da" (result) + : "da" (&v->counter), "bd" (i) + : "cc"); + + return result; +} + +#endif /* __ASM_METAG_ATOMIC_LNKGET_H */ diff --git a/arch/metag/include/asm/atomic_lock1.h b/arch/metag/include/asm/atomic_lock1.h new file mode 100644 index 000000000000..e578955e674b --- /dev/null +++ b/arch/metag/include/asm/atomic_lock1.h @@ -0,0 +1,160 @@ +#ifndef __ASM_METAG_ATOMIC_LOCK1_H +#define __ASM_METAG_ATOMIC_LOCK1_H + +#define ATOMIC_INIT(i) { (i) } + +#include <linux/compiler.h> + +#include <asm/barrier.h> +#include <asm/global_lock.h> + +static inline int atomic_read(const atomic_t *v) +{ + return (v)->counter; +} + +/* + * atomic_set needs to be take the lock to protect atomic_add_unless from a + * possible race, as it reads the counter twice: + * + * CPU0 CPU1 + * atomic_add_unless(1, 0) + * ret = v->counter (non-zero) + * if (ret != u) v->counter = 0 + * v->counter += 1 (counter set to 1) + * + * Making atomic_set take the lock ensures that ordering and logical + * consistency is preserved. + */ +static inline int atomic_set(atomic_t *v, int i) +{ + unsigned long flags; + + __global_lock1(flags); + fence(); + v->counter = i; + __global_unlock1(flags); + return i; +} + +static inline void atomic_add(int i, atomic_t *v) +{ + unsigned long flags; + + __global_lock1(flags); + fence(); + v->counter += i; + __global_unlock1(flags); +} + +static inline void atomic_sub(int i, atomic_t *v) +{ + unsigned long flags; + + __global_lock1(flags); + fence(); + v->counter -= i; + __global_unlock1(flags); +} + +static inline int atomic_add_return(int i, atomic_t *v) +{ + unsigned long result; + unsigned long flags; + + __global_lock1(flags); + result = v->counter; + result += i; + fence(); + v->counter = result; + __global_unlock1(flags); + + return result; +} + +static inline int atomic_sub_return(int i, atomic_t *v) +{ + unsigned long result; + unsigned long flags; + + __global_lock1(flags); + result = v->counter; + result -= i; + fence(); + v->counter = result; + __global_unlock1(flags); + + return result; +} + +static inline void atomic_clear_mask(unsigned int mask, atomic_t *v) +{ + unsigned long flags; + + __global_lock1(flags); + fence(); + v->counter &= ~mask; + __global_unlock1(flags); +} + +static inline void atomic_set_mask(unsigned int mask, atomic_t *v) +{ + unsigned long flags; + + __global_lock1(flags); + fence(); + v->counter |= mask; + __global_unlock1(flags); +} + +static inline int atomic_cmpxchg(atomic_t *v, int old, int new) +{ + int ret; + unsigned long flags; + + __global_lock1(flags); + ret = v->counter; + if (ret == old) { + fence(); + v->counter = new; + } + __global_unlock1(flags); + + return ret; +} + +#define atomic_xchg(v, new) (xchg(&((v)->counter), new)) + +static inline int __atomic_add_unless(atomic_t *v, int a, int u) +{ + int ret; + unsigned long flags; + + __global_lock1(flags); + ret = v->counter; + if (ret != u) { + fence(); + v->counter += a; + } + __global_unlock1(flags); + + return ret; +} + +static inline int atomic_sub_if_positive(int i, atomic_t *v) +{ + int ret; + unsigned long flags; + + __global_lock1(flags); + ret = v->counter - 1; + if (ret >= 0) { + fence(); + v->counter = ret; + } + __global_unlock1(flags); + + return ret; +} + +#endif /* __ASM_METAG_ATOMIC_LOCK1_H */ diff --git a/arch/metag/include/asm/barrier.h b/arch/metag/include/asm/barrier.h new file mode 100644 index 000000000000..c90bfc6bf648 --- /dev/null +++ b/arch/metag/include/asm/barrier.h @@ -0,0 +1,85 @@ +#ifndef _ASM_METAG_BARRIER_H +#define _ASM_METAG_BARRIER_H + +#include <asm/metag_mem.h> + +#define nop() asm volatile ("NOP") +#define mb() wmb() +#define rmb() barrier() + +#ifdef CONFIG_METAG_META21 + +/* HTP and above have a system event to fence writes */ +static inline void wr_fence(void) +{ + volatile int *flushptr = (volatile int *) LINSYSEVENT_WR_FENCE; + barrier(); + *flushptr = 0; +} + +#else /* CONFIG_METAG_META21 */ + +/* + * ATP doesn't have system event to fence writes, so it is necessary to flush + * the processor write queues as well as possibly the write combiner (depending + * on the page being written). + * To ensure the write queues are flushed we do 4 writes to a system event + * register (in this case write combiner flush) which will also flush the write + * combiner. + */ +static inline void wr_fence(void) +{ + volatile int *flushptr = (volatile int *) LINSYSEVENT_WR_COMBINE_FLUSH; + barrier(); + *flushptr = 0; + *flushptr = 0; + *flushptr = 0; + *flushptr = 0; +} + +#endif /* !CONFIG_METAG_META21 */ + +static inline void wmb(void) +{ + /* flush writes through the write combiner */ + wr_fence(); +} + +#define read_barrier_depends() do { } while (0) + +#ifndef CONFIG_SMP +#define fence() do { } while (0) +#define smp_mb() barrier() +#define smp_rmb() barrier() +#define smp_wmb() barrier() +#else + +#ifdef CONFIG_METAG_SMP_WRITE_REORDERING +/* + * Write to the atomic memory unlock system event register (command 0). This is + * needed before a write to shared memory in a critical section, to prevent + * external reordering of writes before the fence on other threads with writes + * after the fence on this thread (and to prevent the ensuing cache-memory + * incoherence). It is therefore ineffective if used after and on the same + * thread as a write. + */ +static inline void fence(void) +{ + volatile int *flushptr = (volatile int *) LINSYSEVENT_WR_ATOMIC_UNLOCK; + barrier(); + *flushptr = 0; +} +#define smp_mb() fence() +#define smp_rmb() fence() +#define smp_wmb() barrier() +#else +#define fence() do { } while (0) +#define smp_mb() barrier() +#define smp_rmb() barrier() +#define smp_wmb() barrier() +#endif +#endif +#define smp_read_barrier_depends() do { } while (0) +#define set_mb(var, value) do { var = value; smp_mb(); } while (0) + +#endif /* _ASM_METAG_BARRIER_H */ diff --git a/arch/metag/include/asm/bitops.h b/arch/metag/include/asm/bitops.h new file mode 100644 index 000000000000..c0d0df0d1378 --- /dev/null +++ b/arch/metag/include/asm/bitops.h @@ -0,0 +1,132 @@ +#ifndef __ASM_METAG_BITOPS_H +#define __ASM_METAG_BITOPS_H + +#include <linux/compiler.h> +#include <asm/barrier.h> +#include <asm/global_lock.h> + +/* + * clear_bit() doesn't provide any barrier for the compiler. + */ +#define smp_mb__before_clear_bit() barrier() +#define smp_mb__after_clear_bit() barrier() + +#ifdef CONFIG_SMP +/* + * These functions are the basis of our bit ops. + */ +static inline void set_bit(unsigned int bit, volatile unsigned long *p) +{ + unsigned long flags; + unsigned long mask = 1UL << (bit & 31); + + p += bit >> 5; + + __global_lock1(flags); + fence(); + *p |= mask; + __global_unlock1(flags); +} + +static inline void clear_bit(unsigned int bit, volatile unsigned long *p) +{ + unsigned long flags; + unsigned long mask = 1UL << (bit & 31); + + p += bit >> 5; + + __global_lock1(flags); + fence(); + *p &= ~mask; + __global_unlock1(flags); +} + +static inline void change_bit(unsigned int bit, volatile unsigned long *p) +{ + unsigned long flags; + unsigned long mask = 1UL << (bit & 31); + + p += bit >> 5; + + __global_lock1(flags); + fence(); + *p ^= mask; + __global_unlock1(flags); +} + +static inline int test_and_set_bit(unsigned int bit, volatile unsigned long *p) +{ + unsigned long flags; + unsigned long old; + unsigned long mask = 1UL << (bit & 31); + + p += bit >> 5; + + __global_lock1(flags); + old = *p; + if (!(old & mask)) { + fence(); + *p = old | mask; + } + __global_unlock1(flags); + + return (old & mask) != 0; +} + +static inline int test_and_clear_bit(unsigned int bit, + volatile unsigned long *p) +{ + unsigned long flags; + unsigned long old; + unsigned long mask = 1UL << (bit & 31); + + p += bit >> 5; + + __global_lock1(flags); + old = *p; + if (old & mask) { + fence(); + *p = old & ~mask; + } + __global_unlock1(flags); + + return (old & mask) != 0; +} + +static inline int test_and_change_bit(unsigned int bit, + volatile unsigned long *p) +{ + unsigned long flags; + unsigned long old; + unsigned long mask = 1UL << (bit & 31); + + p += bit >> 5; + + __global_lock1(flags); + fence(); + old = *p; + *p = old ^ mask; + __global_unlock1(flags); + + return (old & mask) != 0; +} + +#else +#include <asm-generic/bitops/atomic.h> +#endif /* CONFIG_SMP */ + +#include <asm-generic/bitops/non-atomic.h> +#include <asm-generic/bitops/find.h> +#include <asm-generic/bitops/ffs.h> +#include <asm-generic/bitops/__ffs.h> +#include <asm-generic/bitops/ffz.h> +#include <asm-generic/bitops/fls.h> +#include <asm-generic/bitops/__fls.h> +#include <asm-generic/bitops/fls64.h> +#include <asm-generic/bitops/hweight.h> +#include <asm-generic/bitops/lock.h> +#include <asm-generic/bitops/sched.h> +#include <asm-generic/bitops/le.h> +#include <asm-generic/bitops/ext2-atomic.h> + +#endif /* __ASM_METAG_BITOPS_H */ diff --git a/arch/metag/include/asm/bug.h b/arch/metag/include/asm/bug.h new file mode 100644 index 000000000000..d04b48cefecc --- /dev/null +++ b/arch/metag/include/asm/bug.h @@ -0,0 +1,12 @@ +#ifndef _ASM_METAG_BUG_H +#define _ASM_METAG_BUG_H + +#include <asm-generic/bug.h> + +struct pt_regs; + +extern const char *trap_name(int trapno); +extern void die(const char *str, struct pt_regs *regs, long err, + unsigned long addr) __attribute__ ((noreturn)); + +#endif diff --git a/arch/metag/include/asm/cache.h b/arch/metag/include/asm/cache.h new file mode 100644 index 000000000000..a43b650cfdc0 --- /dev/null +++ b/arch/metag/include/asm/cache.h @@ -0,0 +1,23 @@ +#ifndef __ASM_METAG_CACHE_H +#define __ASM_METAG_CACHE_H + +/* L1 cache line size (64 bytes) */ +#define L1_CACHE_SHIFT 6 +#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) + +/* Meta requires large data items to be 8 byte aligned. */ +#define ARCH_SLAB_MINALIGN 8 + +/* + * With an L2 cache, we may invalidate dirty lines, so we need to ensure DMA + * buffers have cache line alignment. + */ +#ifdef CONFIG_METAG_L2C +#define ARCH_DMA_MINALIGN L1_CACHE_BYTES +#else +#define ARCH_DMA_MINALIGN 8 +#endif + +#define __read_mostly __attribute__((__section__(".data..read_mostly"))) + +#endif diff --git a/arch/metag/include/asm/cacheflush.h b/arch/metag/include/asm/cacheflush.h new file mode 100644 index 000000000000..7787ec5e3ed0 --- /dev/null +++ b/arch/metag/include/asm/cacheflush.h @@ -0,0 +1,250 @@ +#ifndef _METAG_CACHEFLUSH_H +#define _METAG_CACHEFLUSH_H + +#include <linux/mm.h> +#include <linux/sched.h> +#include <linux/io.h> + +#include <asm/l2cache.h> +#include <asm/metag_isa.h> +#include <asm/metag_mem.h> + +void metag_cache_probe(void); + +void metag_data_cache_flush_all(const void *start); +void metag_code_cache_flush_all(const void *start); + +/* + * Routines to flush physical cache lines that may be used to cache data or code + * normally accessed via the linear address range supplied. The region flushed + * must either lie in local or global address space determined by the top bit of + * the pStart address. If Bytes is >= 4K then the whole of the related cache + * state will be flushed rather than a limited range. + */ +void metag_data_cache_flush(const void *start, int bytes); +void metag_code_cache_flush(const void *start, int bytes); + +#ifdef CONFIG_METAG_META12 + +/* Write through, virtually tagged, split I/D cache. */ + +static inline void __flush_cache_all(void) +{ + metag_code_cache_flush_all((void *) PAGE_OFFSET); + metag_data_cache_flush_all((void *) PAGE_OFFSET); +} + +#define flush_cache_all() __flush_cache_all() + +/* flush the entire user address space referenced in this mm structure */ +static inline void flush_cache_mm(struct mm_struct *mm) +{ + if (mm == current->mm) + __flush_cache_all(); +} + +#define flush_cache_dup_mm(mm) flush_cache_mm(mm) + +/* flush a range of addresses from this mm */ +static inline void flush_cache_range(struct vm_area_struct *vma, + unsigned long start, unsigned long end) +{ + flush_cache_mm(vma->vm_mm); +} + +static inline void flush_cache_page(struct vm_area_struct *vma, + unsigned long vmaddr, unsigned long pfn) +{ + flush_cache_mm(vma->vm_mm); +} + +#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 +static inline void flush_dcache_page(struct page *page) +{ + metag_data_cache_flush_all((void *) PAGE_OFFSET); +} + +#define flush_dcache_mmap_lock(mapping) do { } while (0) +#define flush_dcache_mmap_unlock(mapping) do { } while (0) + +static inline void flush_icache_page(struct vm_area_struct *vma, + struct page *page) +{ + metag_code_cache_flush(page_to_virt(page), PAGE_SIZE); +} + +static inline void flush_cache_vmap(unsigned long start, unsigned long end) +{ + metag_data_cache_flush_all((void *) PAGE_OFFSET); +} + +static inline void flush_cache_vunmap(unsigned long start, unsigned long end) +{ + metag_data_cache_flush_all((void *) PAGE_OFFSET); +} + +#else + +/* Write through, physically tagged, split I/D cache. */ + +#define flush_cache_all() do { } while (0) +#define flush_cache_mm(mm) do { } while (0) +#define flush_cache_dup_mm(mm) do { } while (0) +#define flush_cache_range(vma, start, end) do { } while (0) +#define flush_cache_page(vma, vmaddr, pfn) do { } while (0) +#define flush_dcache_mmap_lock(mapping) do { } while (0) +#define flush_dcache_mmap_unlock(mapping) do { } while (0) +#define flush_icache_page(vma, pg) do { } while (0) +#define flush_cache_vmap(start, end) do { } while (0) +#define flush_cache_vunmap(start, end) do { } while (0) + +#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 +static inline void flush_dcache_page(struct page *page) +{ + /* FIXME: We can do better than this. All we are trying to do is + * make the i-cache coherent, we should use the PG_arch_1 bit like + * e.g. powerpc. + */ +#ifdef CONFIG_SMP + metag_out32(1, SYSC_ICACHE_FLUSH); +#else + metag_code_cache_flush_all((void *) PAGE_OFFSET); +#endif +} + +#endif + +/* Push n pages at kernel virtual address and clear the icache */ +static inline void flush_icache_range(unsigned long address, + unsigned long endaddr) +{ +#ifdef CONFIG_SMP + metag_out32(1, SYSC_ICACHE_FLUSH); +#else + metag_code_cache_flush((void *) address, endaddr - address); +#endif +} + +static inline void flush_cache_sigtramp(unsigned long addr, int size) +{ + /* + * Flush the icache in case there was previously some code + * fetched from this address, perhaps a previous sigtramp. + * + * We don't need to flush the dcache, it's write through and + * we just wrote the sigtramp code through it. + */ +#ifdef CONFIG_SMP + metag_out32(1, SYSC_ICACHE_FLUSH); +#else + metag_code_cache_flush((void *) addr, size); +#endif +} + +#ifdef CONFIG_METAG_L2C + +/* + * Perform a single specific CACHEWD operation on an address, masking lower bits + * of address first. + */ +static inline void cachewd_line(void *addr, unsigned int data) +{ + unsigned long masked = (unsigned long)addr & -0x40; + __builtin_meta2_cachewd((void *)masked, data); +} + +/* Perform a certain CACHEW op on each cache line in a range */ +static inline void cachew_region_op(void *start, unsigned long size, + unsigned int op) +{ + unsigned long offset = (unsigned long)start & 0x3f; + int i; + if (offset) { + size += offset; + start -= offset; + } + i = (size - 1) >> 6; + do { + __builtin_meta2_cachewd(start, op); + start += 0x40; + } while (i--); +} + +/* prevent write fence and flushbacks being reordered in L2 */ +static inline void l2c_fence_flush(void *addr) +{ + /* + * Synchronise by reading back and re-flushing. + * It is assumed this access will miss, as the caller should have just + * flushed the cache line. + */ + (void)(volatile u8 *)addr; + cachewd_line(addr, CACHEW_FLUSH_L1D_L2); +} + +/* prevent write fence and writebacks being reordered in L2 */ +static inline void l2c_fence(void *addr) +{ + /* + * A write back has occurred, but not necessarily an invalidate, so the + * readback in l2c_fence_flush() would hit in the cache and have no + * effect. Therefore fully flush the line first. + */ + cachewd_line(addr, CACHEW_FLUSH_L1D_L2); + l2c_fence_flush(addr); +} + +/* Used to keep memory consistent when doing DMA. */ +static inline void flush_dcache_region(void *start, unsigned long size) +{ + /* metag_data_cache_flush won't flush L2 cache lines if size >= 4096 */ + if (meta_l2c_is_enabled()) { + cachew_region_op(start, size, CACHEW_FLUSH_L1D_L2); + if (meta_l2c_is_writeback()) + l2c_fence_flush(start + size - 1); + } else { + metag_data_cache_flush(start, size); + } +} + +/* Write back dirty lines to memory (or do nothing if no writeback caches) */ +static inline void writeback_dcache_region(void *start, unsigned long size) +{ + if (meta_l2c_is_enabled() && meta_l2c_is_writeback()) { + cachew_region_op(start, size, CACHEW_WRITEBACK_L1D_L2); + l2c_fence(start + size - 1); + } +} + +/* Invalidate (may also write back if necessary) */ +static inline void invalidate_dcache_region(void *start, unsigned long size) +{ + if (meta_l2c_is_enabled()) + cachew_region_op(start, size, CACHEW_INVALIDATE_L1D_L2); + else + metag_data_cache_flush(start, size); +} +#else +#define flush_dcache_region(s, l) metag_data_cache_flush((s), (l)) +#define writeback_dcache_region(s, l) do {} while (0) +#define invalidate_dcache_region(s, l) flush_dcache_region((s), (l)) +#endif + +static inline void copy_to_user_page(struct vm_area_struct *vma, + struct page *page, unsigned long vaddr, + void *dst, const void *src, + unsigned long len) +{ + memcpy(dst, src, len); + flush_icache_range((unsigned long)dst, (unsigned long)dst + len); +} + +static inline void copy_from_user_page(struct vm_area_struct *vma, + struct page *page, unsigned long vaddr, + void *dst, const void *src, + unsigned long len) +{ + memcpy(dst, src, len); +} + +#endif /* _METAG_CACHEFLUSH_H */ diff --git a/arch/metag/include/asm/cachepart.h b/arch/metag/include/asm/cachepart.h new file mode 100644 index 000000000000..cf6b44e916b5 --- /dev/null +++ b/arch/metag/include/asm/cachepart.h @@ -0,0 +1,42 @@ +/* + * Meta cache partition manipulation. + * + * Copyright 2010 Imagination Technologies Ltd. + */ + +#ifndef _METAG_CACHEPART_H_ +#define _METAG_CACHEPART_H_ + +/** + * get_dcache_size() - Get size of data cache. + */ +unsigned int get_dcache_size(void); + +/** + * get_icache_size() - Get size of code cache. + */ +unsigned int get_icache_size(void); + +/** + * get_global_dcache_size() - Get the thread's global dcache. + * + * Returns the size of the current thread's global dcache partition. + */ +unsigned int get_global_dcache_size(void); + +/** + * get_global_icache_size() - Get the thread's global icache. + * + * Returns the size of the current thread's global icache partition. + */ +unsigned int get_global_icache_size(void); + +/** + * check_for_dache_aliasing() - Ensure that the bootloader has configured the + * dache and icache properly to avoid aliasing + * @thread_id: Hardware thread ID + * + */ +void check_for_cache_aliasing(int thread_id); + +#endif diff --git a/arch/metag/include/asm/checksum.h b/arch/metag/include/asm/checksum.h new file mode 100644 index 000000000000..999bf761a732 --- /dev/null +++ b/arch/metag/include/asm/checksum.h @@ -0,0 +1,92 @@ +#ifndef _METAG_CHECKSUM_H +#define _METAG_CHECKSUM_H + +/* + * computes the checksum of a memory block at buff, length len, + * and adds in "sum" (32-bit) + * + * returns a 32-bit number suitable for feeding into itself + * or csum_tcpudp_magic + * + * this function must be called with even lengths, except + * for the last fragment, which may be odd + * + * it's best to have buff aligned on a 32-bit boundary + */ +extern __wsum csum_partial(const void *buff, int len, __wsum sum); + +/* + * the same as csum_partial, but copies from src while it + * checksums + * + * here even more important to align src and dst on a 32-bit (or even + * better 64-bit) boundary + */ +extern __wsum csum_partial_copy(const void *src, void *dst, int len, + __wsum sum); + +/* + * the same as csum_partial_copy, but copies from user space. + * + * here even more important to align src and dst on a 32-bit (or even + * better 64-bit) boundary + */ +extern __wsum csum_partial_copy_from_user(const void __user *src, void *dst, + int len, __wsum sum, int *csum_err); + +#define csum_partial_copy_nocheck(src, dst, len, sum) \ + csum_partial_copy((src), (dst), (len), (sum)) + +/* + * Fold a partial checksum + */ +static inline __sum16 csum_fold(__wsum csum) +{ + u32 sum = (__force u32)csum; + sum = (sum & 0xffff) + (sum >> 16); + sum = (sum & 0xffff) + (sum >> 16); + return (__force __sum16)~sum; +} + +/* + * This is a version of ip_compute_csum() optimized for IP headers, + * which always checksum on 4 octet boundaries. + */ +extern __sum16 ip_fast_csum(const void *iph, unsigned int ihl); + +/* + * computes the checksum of the TCP/UDP pseudo-header + * returns a 16-bit checksum, already complemented + */ +static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, + unsigned short len, + unsigned short proto, + __wsum sum) +{ + unsigned long len_proto = (proto + len) << 8; + asm ("ADD %0, %0, %1\n" + "ADDS %0, %0, %2\n" + "ADDCS %0, %0, #1\n" + "ADDS %0, %0, %3\n" + "ADDCS %0, %0, #1\n" + : "=d" (sum) + : "d" (daddr), "d" (saddr), "d" (len_proto), + "0" (sum) + : "cc"); + return sum; +} + +static inline __sum16 +csum_tcpudp_magic(__be32 saddr, __be32 daddr, unsigned short len, + unsigned short proto, __wsum sum) +{ + return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum)); +} + +/* + * this routine is used for miscellaneous IP-like checksums, mainly + * in icmp.c + */ +extern __sum16 ip_compute_csum(const void *buff, int len); + +#endif /* _METAG_CHECKSUM_H */ diff --git a/arch/metag/include/asm/clock.h b/arch/metag/include/asm/clock.h new file mode 100644 index 000000000000..3e2915a280c7 --- /dev/null +++ b/arch/metag/include/asm/clock.h @@ -0,0 +1,51 @@ +/* + * arch/metag/include/asm/clock.h + * + * Copyright (C) 2012 Imagination Technologies Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _METAG_CLOCK_H_ +#define _METAG_CLOCK_H_ + +#include <asm/mach/arch.h> + +/** + * struct meta_clock_desc - Meta Core clock callbacks. + * @get_core_freq: Get the frequency of the Meta core. If this is NULL, the + * core frequency will be determined like this: + * Meta 1: based on loops_per_jiffy. + * Meta 2: (EXPAND_TIMER_DIV + 1) MHz. + */ +struct meta_clock_desc { + unsigned long (*get_core_freq)(void); +}; + +extern struct meta_clock_desc _meta_clock; + +/* + * Set up the default clock, ensuring all callbacks are valid - only accessible + * during boot. + */ +void setup_meta_clocks(struct meta_clock_desc *desc); + +/** + * get_coreclock() - Get the frequency of the Meta core clock. + * + * Returns: The Meta core clock frequency in Hz. + */ +static inline unsigned long get_coreclock(void) +{ + /* + * Use the current clock callback. If set correctly this will provide + * the most accurate frequency as it can be calculated directly from the + * PLL configuration. otherwise a default callback will have been set + * instead. + */ + return _meta_clock.get_core_freq(); +} + +#endif /* _METAG_CLOCK_H_ */ diff --git a/arch/metag/include/asm/cmpxchg.h b/arch/metag/include/asm/cmpxchg.h new file mode 100644 index 000000000000..b1bc1be8540f --- /dev/null +++ b/arch/metag/include/asm/cmpxchg.h @@ -0,0 +1,65 @@ +#ifndef __ASM_METAG_CMPXCHG_H +#define __ASM_METAG_CMPXCHG_H + +#include <asm/barrier.h> + +#if defined(CONFIG_METAG_ATOMICITY_IRQSOFF) +#include <asm/cmpxchg_irq.h> +#elif defined(CONFIG_METAG_ATOMICITY_LOCK1) +#include <asm/cmpxchg_lock1.h> +#elif defined(CONFIG_METAG_ATOMICITY_LNKGET) +#include <asm/cmpxchg_lnkget.h> +#endif + +extern void __xchg_called_with_bad_pointer(void); + +#define __xchg(ptr, x, size) \ +({ \ + unsigned long __xchg__res; \ + volatile void *__xchg_ptr = (ptr); \ + switch (size) { \ + case 4: \ + __xchg__res = xchg_u32(__xchg_ptr, x); \ + break; \ + case 1: \ + __xchg__res = xchg_u8(__xchg_ptr, x); \ + break; \ + default: \ + __xchg_called_with_bad_pointer(); \ + __xchg__res = x; \ + break; \ + } \ + \ + __xchg__res; \ +}) + +#define xchg(ptr, x) \ + ((__typeof__(*(ptr)))__xchg((ptr), (unsigned long)(x), sizeof(*(ptr)))) + +/* This function doesn't exist, so you'll get a linker error + * if something tries to do an invalid cmpxchg(). */ +extern void __cmpxchg_called_with_bad_pointer(void); + +static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, + unsigned long new, int size) +{ + switch (size) { + case 4: + return __cmpxchg_u32(ptr, old, new); + } + __cmpxchg_called_with_bad_pointer(); + return old; +} + +#define __HAVE_ARCH_CMPXCHG 1 + +#define cmpxchg(ptr, o, n) \ + ({ \ + __typeof__(*(ptr)) _o_ = (o); \ + __typeof__(*(ptr)) _n_ = (n); \ + (__typeof__(*(ptr))) __cmpxchg((ptr), (unsigned long)_o_, \ + (unsigned long)_n_, \ + sizeof(*(ptr))); \ + }) + +#endif /* __ASM_METAG_CMPXCHG_H */ diff --git a/arch/metag/include/asm/cmpxchg_irq.h b/arch/metag/include/asm/cmpxchg_irq.h new file mode 100644 index 000000000000..649573168b05 --- /dev/null +++ b/arch/metag/include/asm/cmpxchg_irq.h @@ -0,0 +1,42 @@ +#ifndef __ASM_METAG_CMPXCHG_IRQ_H +#define __ASM_METAG_CMPXCHG_IRQ_H + +#include <linux/irqflags.h> + +static inline unsigned long xchg_u32(volatile u32 *m, unsigned long val) +{ + unsigned long flags, retval; + + local_irq_save(flags); + retval = *m; + *m = val; + local_irq_restore(flags); + return retval; +} + +static inline unsigned long xchg_u8(volatile u8 *m, unsigned long val) +{ + unsigned long flags, retval; + + local_irq_save(flags); + retval = *m; + *m = val & 0xff; + local_irq_restore(flags); + return retval; +} + +static inline unsigned long __cmpxchg_u32(volatile int *m, unsigned long old, + unsigned long new) +{ + __u32 retval; + unsigned long flags; + + local_irq_save(flags); + retval = *m; + if (retval == old) + *m = new; + local_irq_restore(flags); /* implies memory barrier */ + return retval; +} + +#endif /* __ASM_METAG_CMPXCHG_IRQ_H */ diff --git a/arch/metag/include/asm/cmpxchg_lnkget.h b/arch/metag/include/asm/cmpxchg_lnkget.h new file mode 100644 index 000000000000..0154e2807ebb --- /dev/null +++ b/arch/metag/include/asm/cmpxchg_lnkget.h @@ -0,0 +1,86 @@ +#ifndef __ASM_METAG_CMPXCHG_LNKGET_H +#define __ASM_METAG_CMPXCHG_LNKGET_H + +static inline unsigned long xchg_u32(volatile u32 *m, unsigned long val) +{ + int temp, old; + + smp_mb(); + + asm volatile ( + "1: LNKGETD %1, [%2]\n" + " LNKSETD [%2], %3\n" + " DEFR %0, TXSTAT\n" + " ANDT %0, %0, #HI(0x3f000000)\n" + " CMPT %0, #HI(0x02000000)\n" + " BNZ 1b\n" +#ifdef CONFIG_METAG_LNKGET_AROUND_CACHE + " DCACHE [%2], %0\n" +#endif + : "=&d" (temp), "=&d" (old) + : "da" (m), "da" (val) + : "cc" + ); + + smp_mb(); + + return old; +} + +static inline unsigned long xchg_u8(volatile u8 *m, unsigned long val) +{ + int temp, old; + + smp_mb(); + + asm volatile ( + "1: LNKGETD %1, [%2]\n" + " LNKSETD [%2], %3\n" + " DEFR %0, TXSTAT\n" + " ANDT %0, %0, #HI(0x3f000000)\n" + " CMPT %0, #HI(0x02000000)\n" + " BNZ 1b\n" +#ifdef CONFIG_METAG_LNKGET_AROUND_CACHE + " DCACHE [%2], %0\n" +#endif + : "=&d" (temp), "=&d" (old) + : "da" (m), "da" (val & 0xff) + : "cc" + ); + + smp_mb(); + + return old; +} + +static inline unsigned long __cmpxchg_u32(volatile int *m, unsigned long old, + unsigned long new) +{ + __u32 retval, temp; + + smp_mb(); + + asm volatile ( + "1: LNKGETD %1, [%2]\n" + " CMP %1, %3\n" + " LNKSETDEQ [%2], %4\n" + " BNE 2f\n" + " DEFR %0, TXSTAT\n" + " ANDT %0, %0, #HI(0x3f000000)\n" + " CMPT %0, #HI(0x02000000)\n" + " BNZ 1b\n" +#ifdef CONFIG_METAG_LNKGET_AROUND_CACHE + " DCACHE [%2], %0\n" +#endif + "2:\n" + : "=&d" (temp), "=&da" (retval) + : "da" (m), "bd" (old), "da" (new) + : "cc" + ); + + smp_mb(); + + return retval; +} + +#endif /* __ASM_METAG_CMPXCHG_LNKGET_H */ diff --git a/arch/metag/include/asm/cmpxchg_lock1.h b/arch/metag/include/asm/cmpxchg_lock1.h new file mode 100644 index 000000000000..fd6850474969 --- /dev/null +++ b/arch/metag/include/asm/cmpxchg_lock1.h @@ -0,0 +1,48 @@ +#ifndef __ASM_METAG_CMPXCHG_LOCK1_H +#define __ASM_METAG_CMPXCHG_LOCK1_H + +#include <asm/global_lock.h> + +/* Use LOCK2 as these have to be atomic w.r.t. ordinary accesses. */ + +static inline unsigned long xchg_u32(volatile u32 *m, unsigned long val) +{ + unsigned long flags, retval; + + __global_lock2(flags); + fence(); + retval = *m; + *m = val; + __global_unlock2(flags); + return retval; +} + +static inline unsigned long xchg_u8(volatile u8 *m, unsigned long val) +{ + unsigned long flags, retval; + + __global_lock2(flags); + fence(); + retval = *m; + *m = val & 0xff; + __global_unlock2(flags); + return retval; +} + +static inline unsigned long __cmpxchg_u32(volatile int *m, unsigned long old, + unsigned long new) +{ + __u32 retval; + unsigned long flags; + + __global_lock2(flags); + retval = *m; + if (retval == old) { + fence(); + *m = new; + } + __global_unlock2(flags); + return retval; +} + +#endif /* __ASM_METAG_CMPXCHG_LOCK1_H */ diff --git a/arch/metag/include/asm/core_reg.h b/arch/metag/include/asm/core_reg.h new file mode 100644 index 000000000000..bdbc3a51f31c --- /dev/null +++ b/arch/metag/include/asm/core_reg.h @@ -0,0 +1,35 @@ +#ifndef __ASM_METAG_CORE_REG_H_ +#define __ASM_METAG_CORE_REG_H_ + +#include <asm/metag_regs.h> + +extern void core_reg_write(int unit, int reg, int thread, unsigned int val); +extern unsigned int core_reg_read(int unit, int reg, int thread); + +/* + * These macros allow direct access from C to any register known to the + * assembler. Example candidates are TXTACTCYC, TXIDLECYC, and TXPRIVEXT. + */ + +#define __core_reg_get(reg) ({ \ + unsigned int __grvalue; \ + asm volatile("MOV %0," #reg \ + : "=r" (__grvalue)); \ + __grvalue; \ +}) + +#define __core_reg_set(reg, value) do { \ + unsigned int __srvalue = (value); \ + asm volatile("MOV " #reg ",%0" \ + : \ + : "r" (__srvalue)); \ +} while (0) + +#define __core_reg_swap(reg, value) do { \ + unsigned int __srvalue = (value); \ + asm volatile("SWAP " #reg ",%0" \ + : "+r" (__srvalue)); \ + (value) = __srvalue; \ +} while (0) + +#endif diff --git a/arch/metag/include/asm/cpu.h b/arch/metag/include/asm/cpu.h new file mode 100644 index 000000000000..decf12969268 --- /dev/null +++ b/arch/metag/include/asm/cpu.h @@ -0,0 +1,14 @@ +#ifndef _ASM_METAG_CPU_H +#define _ASM_METAG_CPU_H + +#include <linux/percpu.h> + +struct cpuinfo_metag { + struct cpu cpu; +#ifdef CONFIG_SMP + unsigned long loops_per_jiffy; +#endif +}; + +DECLARE_PER_CPU(struct cpuinfo_metag, cpu_data); +#endif /* _ASM_METAG_CPU_H */ diff --git a/arch/metag/include/asm/da.h b/arch/metag/include/asm/da.h new file mode 100644 index 000000000000..81bd5212fb03 --- /dev/null +++ b/arch/metag/include/asm/da.h @@ -0,0 +1,43 @@ +/* + * Meta DA JTAG debugger control. + * + * Copyright 2012 Imagination Technologies Ltd. + */ + +#ifndef _METAG_DA_H_ +#define _METAG_DA_H_ + +#ifdef CONFIG_METAG_DA + +#include <linux/init.h> +#include <linux/types.h> + +extern bool _metag_da_present; + +/** + * metag_da_enabled() - Find whether a DA is currently enabled. + * + * Returns: true if a DA was detected, false if not. + */ +static inline bool metag_da_enabled(void) +{ + return _metag_da_present; +} + +/** + * metag_da_probe() - Try and detect a connected DA. + * + * This is used at start up to detect whether a DA is active. + * + * Returns: 0 on detection, -err otherwise. + */ +int __init metag_da_probe(void); + +#else /* !CONFIG_METAG_DA */ + +#define metag_da_enabled() false +#define metag_da_probe() do {} while (0) + +#endif + +#endif /* _METAG_DA_H_ */ diff --git a/arch/metag/include/asm/delay.h b/arch/metag/include/asm/delay.h new file mode 100644 index 000000000000..9c92f996957a --- /dev/null +++ b/arch/metag/include/asm/delay.h @@ -0,0 +1,29 @@ +#ifndef _METAG_DELAY_H +#define _METAG_DELAY_H + +/* + * Copyright (C) 1993 Linus Torvalds + * + * Delay routines calling functions in arch/metag/lib/delay.c + */ + +/* Undefined functions to get compile-time errors */ +extern void __bad_udelay(void); +extern void __bad_ndelay(void); + +extern void __udelay(unsigned long usecs); +extern void __ndelay(unsigned long nsecs); +extern void __const_udelay(unsigned long xloops); +extern void __delay(unsigned long loops); + +/* 0x10c7 is 2**32 / 1000000 (rounded up) */ +#define udelay(n) (__builtin_constant_p(n) ? \ + ((n) > 20000 ? __bad_udelay() : __const_udelay((n) * 0x10c7ul)) : \ + __udelay(n)) + +/* 0x5 is 2**32 / 1000000000 (rounded up) */ +#define ndelay(n) (__builtin_constant_p(n) ? \ + ((n) > 20000 ? __bad_ndelay() : __const_udelay((n) * 5ul)) : \ + __ndelay(n)) + +#endif /* _METAG_DELAY_H */ diff --git a/arch/metag/include/asm/div64.h b/arch/metag/include/asm/div64.h new file mode 100644 index 000000000000..0fdd11676212 --- /dev/null +++ b/arch/metag/include/asm/div64.h @@ -0,0 +1,12 @@ +#ifndef __ASM_DIV64_H__ +#define __ASM_DIV64_H__ + +#include <asm-generic/div64.h> + +extern u64 div_u64(u64 dividend, u64 divisor); +extern s64 div_s64(s64 dividend, s64 divisor); + +#define div_u64 div_u64 +#define div_s64 div_s64 + +#endif diff --git a/arch/metag/include/asm/dma-mapping.h b/arch/metag/include/asm/dma-mapping.h new file mode 100644 index 000000000000..14b23efd9b7a --- /dev/null +++ b/arch/metag/include/asm/dma-mapping.h @@ -0,0 +1,190 @@ +#ifndef _ASM_METAG_DMA_MAPPING_H +#define _ASM_METAG_DMA_MAPPING_H + +#include <linux/mm.h> + +#include <asm/cache.h> +#include <asm/io.h> +#include <linux/scatterlist.h> +#include <asm/bug.h> + +#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) +#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) + +void *dma_alloc_coherent(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t flag); + +void dma_free_coherent(struct device *dev, size_t size, + void *vaddr, dma_addr_t dma_handle); + +void dma_sync_for_device(void *vaddr, size_t size, int dma_direction); +void dma_sync_for_cpu(void *vaddr, size_t size, int dma_direction); + +int dma_mmap_coherent(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size); + +int dma_mmap_writecombine(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size); + +static inline dma_addr_t +dma_map_single(struct device *dev, void *ptr, size_t size, + enum dma_data_direction direction) +{ + BUG_ON(!valid_dma_direction(direction)); + WARN_ON(size == 0); + dma_sync_for_device(ptr, size, direction); + return virt_to_phys(ptr); +} + +static inline void +dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size, + enum dma_data_direction direction) +{ + BUG_ON(!valid_dma_direction(direction)); + dma_sync_for_cpu(phys_to_virt(dma_addr), size, direction); +} + +static inline int +dma_map_sg(struct device *dev, struct scatterlist *sglist, int nents, + enum dma_data_direction direction) +{ + struct scatterlist *sg; + int i; + + BUG_ON(!valid_dma_direction(direction)); + WARN_ON(nents == 0 || sglist[0].length == 0); + + for_each_sg(sglist, sg, nents, i) { + BUG_ON(!sg_page(sg)); + + sg->dma_address = sg_phys(sg); + dma_sync_for_device(sg_virt(sg), sg->length, direction); + } + + return nents; +} + +static inline dma_addr_t +dma_map_page(struct device *dev, struct page *page, unsigned long offset, + size_t size, enum dma_data_direction direction) +{ + BUG_ON(!valid_dma_direction(direction)); + dma_sync_for_device((void *)(page_to_phys(page) + offset), size, + direction); + return page_to_phys(page) + offset; +} + +static inline void +dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size, + enum dma_data_direction direction) +{ + BUG_ON(!valid_dma_direction(direction)); + dma_sync_for_cpu(phys_to_virt(dma_address), size, direction); +} + + +static inline void +dma_unmap_sg(struct device *dev, struct scatterlist *sglist, int nhwentries, + enum dma_data_direction direction) +{ + struct scatterlist *sg; + int i; + + BUG_ON(!valid_dma_direction(direction)); + WARN_ON(nhwentries == 0 || sglist[0].length == 0); + + for_each_sg(sglist, sg, nhwentries, i) { + BUG_ON(!sg_page(sg)); + + sg->dma_address = sg_phys(sg); + dma_sync_for_cpu(sg_virt(sg), sg->length, direction); + } +} + +static inline void +dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size, + enum dma_data_direction direction) +{ + dma_sync_for_cpu(phys_to_virt(dma_handle), size, direction); +} + +static inline void +dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, + size_t size, enum dma_data_direction direction) +{ + dma_sync_for_device(phys_to_virt(dma_handle), size, direction); +} + +static inline void +dma_sync_single_range_for_cpu(struct device *dev, dma_addr_t dma_handle, + unsigned long offset, size_t size, + enum dma_data_direction direction) +{ + dma_sync_for_cpu(phys_to_virt(dma_handle)+offset, size, + direction); +} + +static inline void +dma_sync_single_range_for_device(struct device *dev, dma_addr_t dma_handle, + unsigned long offset, size_t size, + enum dma_data_direction direction) +{ + dma_sync_for_device(phys_to_virt(dma_handle)+offset, size, + direction); +} + +static inline void +dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems, + enum dma_data_direction direction) +{ + int i; + for (i = 0; i < nelems; i++, sg++) + dma_sync_for_cpu(sg_virt(sg), sg->length, direction); +} + +static inline void +dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems, + enum dma_data_direction direction) +{ + int i; + for (i = 0; i < nelems; i++, sg++) + dma_sync_for_device(sg_virt(sg), sg->length, direction); +} + +static inline int +dma_mapping_error(struct device *dev, dma_addr_t dma_addr) +{ + return 0; +} + +#define dma_supported(dev, mask) (1) + +static inline int +dma_set_mask(struct device *dev, u64 mask) +{ + if (!dev->dma_mask || !dma_supported(dev, mask)) + return -EIO; + + *dev->dma_mask = mask; + + return 0; +} + +/* + * dma_alloc_noncoherent() returns non-cacheable memory, so there's no need to + * do any flushing here. + */ +static inline void +dma_cache_sync(struct device *dev, void *vaddr, size_t size, + enum dma_data_direction direction) +{ +} + +/* drivers/base/dma-mapping.c */ +extern int dma_common_get_sgtable(struct device *dev, struct sg_table *sgt, + void *cpu_addr, dma_addr_t dma_addr, + size_t size); + +#define dma_get_sgtable(d, t, v, h, s) dma_common_get_sgtable(d, t, v, h, s) + +#endif diff --git a/arch/metag/include/asm/elf.h b/arch/metag/include/asm/elf.h new file mode 100644 index 000000000000..d63b9d0e57dd --- /dev/null +++ b/arch/metag/include/asm/elf.h @@ -0,0 +1,128 @@ +#ifndef __ASM_METAG_ELF_H +#define __ASM_METAG_ELF_H + +#define EM_METAG 174 + +/* Meta relocations */ +#define R_METAG_HIADDR16 0 +#define R_METAG_LOADDR16 1 +#define R_METAG_ADDR32 2 +#define R_METAG_NONE 3 +#define R_METAG_RELBRANCH 4 +#define R_METAG_GETSETOFF 5 + +/* Backward compatability */ +#define R_METAG_REG32OP1 6 +#define R_METAG_REG32OP2 7 +#define R_METAG_REG32OP3 8 +#define R_METAG_REG16OP1 9 +#define R_METAG_REG16OP2 10 +#define R_METAG_REG16OP3 11 +#define R_METAG_REG32OP4 12 + +#define R_METAG_HIOG 13 +#define R_METAG_LOOG 14 + +/* GNU */ +#define R_METAG_GNU_VTINHERIT 30 +#define R_METAG_GNU_VTENTRY 31 + +/* PIC relocations */ +#define R_METAG_HI16_GOTOFF 32 +#define R_METAG_LO16_GOTOFF 33 +#define R_METAG_GETSET_GOTOFF 34 +#define R_METAG_GETSET_GOT 35 +#define R_METAG_HI16_GOTPC 36 +#define R_METAG_LO16_GOTPC 37 +#define R_METAG_HI16_PLT 38 +#define R_METAG_LO16_PLT 39 +#define R_METAG_RELBRANCH_PLT 40 +#define R_METAG_GOTOFF 41 +#define R_METAG_PLT 42 +#define R_METAG_COPY 43 +#define R_METAG_JMP_SLOT 44 +#define R_METAG_RELATIVE 45 +#define R_METAG_GLOB_DAT 46 + +/* + * ELF register definitions. + */ + +#include <asm/page.h> +#include <asm/processor.h> +#include <asm/ptrace.h> +#include <asm/user.h> + +typedef unsigned long elf_greg_t; + +#define ELF_NGREG (sizeof(struct user_gp_regs) / sizeof(elf_greg_t)) +typedef elf_greg_t elf_gregset_t[ELF_NGREG]; + +typedef unsigned long elf_fpregset_t; + +/* + * This is used to ensure we don't load something for the wrong architecture. + */ +#define elf_check_arch(x) ((x)->e_machine == EM_METAG) + +/* + * These are used to set parameters in the core dumps. + */ +#define ELF_CLASS ELFCLASS32 +#define ELF_DATA ELFDATA2LSB +#define ELF_ARCH EM_METAG + +#define ELF_PLAT_INIT(_r, load_addr) \ + do { _r->ctx.AX[0].U0 = 0; } while (0) + +#define USE_ELF_CORE_DUMP +#define CORE_DUMP_USE_REGSET +#define ELF_EXEC_PAGESIZE PAGE_SIZE + +/* This is the location that an ET_DYN program is loaded if exec'ed. Typical + use of this is to invoke "./ld.so someprog" to test out a new version of + the loader. We need to make sure that it is out of the way of the program + that it will "exec", and that there is sufficient room for the brk. */ + +#define ELF_ET_DYN_BASE 0x08000000UL + +#define ELF_CORE_COPY_REGS(_dest, _regs) \ + memcpy((char *)&_dest, (char *)_regs, sizeof(struct pt_regs)); + +/* This yields a mask that user programs can use to figure out what + instruction set this cpu supports. */ + +#define ELF_HWCAP (0) + +/* This yields a string that ld.so will use to load implementation + specific libraries for optimization. This is more specific in + intent than poking at uname or /proc/cpuinfo. */ + +#define ELF_PLATFORM (NULL) + +#define SET_PERSONALITY(ex) \ + set_personality(PER_LINUX | (current->personality & (~PER_MASK))) + +#define STACK_RND_MASK (0) + +#ifdef CONFIG_METAG_USER_TCM + +struct elf32_phdr; +struct file; + +unsigned long __metag_elf_map(struct file *filep, unsigned long addr, + struct elf32_phdr *eppnt, int prot, int type, + unsigned long total_size); + +static inline unsigned long metag_elf_map(struct file *filep, + unsigned long addr, + struct elf32_phdr *eppnt, int prot, + int type, unsigned long total_size) +{ + return __metag_elf_map(filep, addr, eppnt, prot, type, total_size); +} +#define elf_map metag_elf_map + +#endif + +#endif diff --git a/arch/metag/include/asm/fixmap.h b/arch/metag/include/asm/fixmap.h new file mode 100644 index 000000000000..33312751c92b --- /dev/null +++ b/arch/metag/include/asm/fixmap.h @@ -0,0 +1,99 @@ +/* + * fixmap.h: compile-time virtual memory allocation + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1998 Ingo Molnar + * + * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 + */ + +#ifndef _ASM_FIXMAP_H +#define _ASM_FIXMAP_H + +#include <asm/pgtable.h> +#ifdef CONFIG_HIGHMEM +#include <linux/threads.h> +#include <asm/kmap_types.h> +#endif + +/* + * Here we define all the compile-time 'special' virtual + * addresses. The point is to have a constant address at + * compile time, but to set the physical address only + * in the boot process. We allocate these special addresses + * from the end of the consistent memory region backwards. + * Also this lets us do fail-safe vmalloc(), we + * can guarantee that these special addresses and + * vmalloc()-ed addresses never overlap. + * + * these 'compile-time allocated' memory buffers are + * fixed-size 4k pages. (or larger if used with an increment + * higher than 1) use fixmap_set(idx,phys) to associate + * physical memory with fixmap indices. + * + * TLB entries of such buffers will not be flushed across + * task switches. + */ +enum fixed_addresses { +#define FIX_N_COLOURS 8 +#ifdef CONFIG_HIGHMEM + /* reserved pte's for temporary kernel mappings */ + FIX_KMAP_BEGIN, + FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1, +#endif + __end_of_fixed_addresses +}; + +#define FIXADDR_TOP (CONSISTENT_START - PAGE_SIZE) +#define FIXADDR_SIZE (__end_of_fixed_addresses << PAGE_SHIFT) +#define FIXADDR_START ((FIXADDR_TOP - FIXADDR_SIZE) & PMD_MASK) + +#define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT)) +#define __virt_to_fix(x) ((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT) + +extern void __this_fixmap_does_not_exist(void); +/* + * 'index to address' translation. If anyone tries to use the idx + * directly without tranlation, we catch the bug with a NULL-deference + * kernel oops. Illegal ranges of incoming indices are caught too. + */ +static inline unsigned long fix_to_virt(const unsigned int idx) +{ + /* + * this branch gets completely eliminated after inlining, + * except when someone tries to use fixaddr indices in an + * illegal way. (such as mixing up address types or using + * out-of-range indices). + * + * If it doesn't get removed, the linker will complain + * loudly with a reasonably clear error message.. + */ + if (idx >= __end_of_fixed_addresses) + __this_fixmap_does_not_exist(); + + return __fix_to_virt(idx); +} + +static inline unsigned long virt_to_fix(const unsigned long vaddr) +{ + BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START); + return __virt_to_fix(vaddr); +} + +#define kmap_get_fixmap_pte(vaddr) \ + pte_offset_kernel( \ + pmd_offset(pud_offset(pgd_offset_k(vaddr), (vaddr)), (vaddr)), \ + (vaddr) \ + ) + +/* + * Called from pgtable_init() + */ +extern void fixrange_init(unsigned long start, unsigned long end, + pgd_t *pgd_base); + + +#endif diff --git a/arch/metag/include/asm/ftrace.h b/arch/metag/include/asm/ftrace.h new file mode 100644 index 000000000000..2901f0f7d944 --- /dev/null +++ b/arch/metag/include/asm/ftrace.h @@ -0,0 +1,23 @@ +#ifndef _ASM_METAG_FTRACE +#define _ASM_METAG_FTRACE + +#ifdef CONFIG_FUNCTION_TRACER +#define MCOUNT_INSN_SIZE 8 /* sizeof mcount call */ + +#ifndef __ASSEMBLY__ +extern void mcount_wrapper(void); +#define MCOUNT_ADDR ((long)(mcount_wrapper)) + +static inline unsigned long ftrace_call_adjust(unsigned long addr) +{ + return addr; +} + +struct dyn_arch_ftrace { + /* No extra data needed on metag */ +}; +#endif /* __ASSEMBLY__ */ + +#endif /* CONFIG_FUNCTION_TRACER */ + +#endif /* _ASM_METAG_FTRACE */ diff --git a/arch/metag/include/asm/global_lock.h b/arch/metag/include/asm/global_lock.h new file mode 100644 index 000000000000..fc831c88c22a --- /dev/null +++ b/arch/metag/include/asm/global_lock.h @@ -0,0 +1,100 @@ +#ifndef __ASM_METAG_GLOBAL_LOCK_H +#define __ASM_METAG_GLOBAL_LOCK_H + +#include <asm/metag_mem.h> + +/** + * __global_lock1() - Acquire global voluntary lock (LOCK1). + * @flags: Variable to store flags into. + * + * Acquires the Meta global voluntary lock (LOCK1), also taking care to disable + * all triggers so we cannot be interrupted, and to enforce a compiler barrier + * so that the compiler cannot reorder memory accesses across the lock. + * + * No other hardware thread will be able to acquire the voluntary or exclusive + * locks until the voluntary lock is released with @__global_unlock1, but they + * may continue to execute as long as they aren't trying to acquire either of + * the locks. + */ +#define __global_lock1(flags) do { \ + unsigned int __trval; \ + asm volatile("MOV %0,#0\n\t" \ + "SWAP %0,TXMASKI\n\t" \ + "LOCK1" \ + : "=r" (__trval) \ + : \ + : "memory"); \ + (flags) = __trval; \ +} while (0) + +/** + * __global_unlock1() - Release global voluntary lock (LOCK1). + * @flags: Variable to restore flags from. + * + * Releases the Meta global voluntary lock (LOCK1) acquired with + * @__global_lock1, also taking care to re-enable triggers, and to enforce a + * compiler barrier so that the compiler cannot reorder memory accesses across + * the unlock. + * + * This immediately allows another hardware thread to acquire the voluntary or + * exclusive locks. + */ +#define __global_unlock1(flags) do { \ + unsigned int __trval = (flags); \ + asm volatile("LOCK0\n\t" \ + "MOV TXMASKI,%0" \ + : \ + : "r" (__trval) \ + : "memory"); \ +} while (0) + +/** + * __global_lock2() - Acquire global exclusive lock (LOCK2). + * @flags: Variable to store flags into. + * + * Acquires the Meta global voluntary lock and global exclusive lock (LOCK2), + * also taking care to disable all triggers so we cannot be interrupted, to take + * the atomic lock (system event) and to enforce a compiler barrier so that the + * compiler cannot reorder memory accesses across the lock. + * + * No other hardware thread will be able to execute code until the locks are + * released with @__global_unlock2. + */ +#define __global_lock2(flags) do { \ + unsigned int __trval; \ + unsigned int __aloc_hi = LINSYSEVENT_WR_ATOMIC_LOCK & 0xFFFF0000; \ + asm volatile("MOV %0,#0\n\t" \ + "SWAP %0,TXMASKI\n\t" \ + "LOCK2\n\t" \ + "SETD [%1+#0x40],D1RtP" \ + : "=r&" (__trval) \ + : "u" (__aloc_hi) \ + : "memory"); \ + (flags) = __trval; \ +} while (0) + +/** + * __global_unlock2() - Release global exclusive lock (LOCK2). + * @flags: Variable to restore flags from. + * + * Releases the Meta global exclusive lock (LOCK2) and global voluntary lock + * acquired with @__global_lock2, also taking care to release the atomic lock + * (system event), re-enable triggers, and to enforce a compiler barrier so that + * the compiler cannot reorder memory accesses across the unlock. + * + * This immediately allows other hardware threads to continue executing and one + * of them to acquire locks. + */ +#define __global_unlock2(flags) do { \ + unsigned int __trval = (flags); \ + unsigned int __alock_hi = LINSYSEVENT_WR_ATOMIC_LOCK & 0xFFFF0000; \ + asm volatile("SETD [%1+#0x00],D1RtP\n\t" \ + "LOCK0\n\t" \ + "MOV TXMASKI,%0" \ + : \ + : "r" (__trval), \ + "u" (__alock_hi) \ + : "memory"); \ +} while (0) + +#endif /* __ASM_METAG_GLOBAL_LOCK_H */ diff --git a/arch/metag/include/asm/gpio.h b/arch/metag/include/asm/gpio.h new file mode 100644 index 000000000000..b3799d88ffcf --- /dev/null +++ b/arch/metag/include/asm/gpio.h @@ -0,0 +1,4 @@ +#ifndef __LINUX_GPIO_H +#warning Include linux/gpio.h instead of asm/gpio.h +#include <linux/gpio.h> +#endif diff --git a/arch/metag/include/asm/highmem.h b/arch/metag/include/asm/highmem.h new file mode 100644 index 000000000000..6646a15c73dd --- /dev/null +++ b/arch/metag/include/asm/highmem.h @@ -0,0 +1,62 @@ +#ifndef _ASM_HIGHMEM_H +#define _ASM_HIGHMEM_H + +#include <asm/cacheflush.h> +#include <asm/kmap_types.h> +#include <asm/fixmap.h> + +/* + * Right now we initialize only a single pte table. It can be extended + * easily, subsequent pte tables have to be allocated in one physical + * chunk of RAM. + */ +/* + * Ordering is (from lower to higher memory addresses): + * + * high_memory + * Persistent kmap area + * PKMAP_BASE + * fixed_addresses + * FIXADDR_START + * FIXADDR_TOP + * Vmalloc area + * VMALLOC_START + * VMALLOC_END + */ +#define PKMAP_BASE (FIXADDR_START - PMD_SIZE) +#define LAST_PKMAP PTRS_PER_PTE +#define LAST_PKMAP_MASK (LAST_PKMAP - 1) +#define PKMAP_NR(virt) (((virt) - PKMAP_BASE) >> PAGE_SHIFT) +#define PKMAP_ADDR(nr) (PKMAP_BASE + ((nr) << PAGE_SHIFT)) + +#define kmap_prot PAGE_KERNEL + +static inline void flush_cache_kmaps(void) +{ + flush_cache_all(); +} + +/* declarations for highmem.c */ +extern unsigned long highstart_pfn, highend_pfn; + +extern pte_t *pkmap_page_table; + +extern void *kmap_high(struct page *page); +extern void kunmap_high(struct page *page); + +extern void kmap_init(void); + +/* + * The following functions are already defined by <linux/highmem.h> + * when CONFIG_HIGHMEM is not set. + */ +#ifdef CONFIG_HIGHMEM +extern void *kmap(struct page *page); +extern void kunmap(struct page *page); +extern void *kmap_atomic(struct page *page); +extern void __kunmap_atomic(void *kvaddr); +extern void *kmap_atomic_pfn(unsigned long pfn); +extern struct page *kmap_atomic_to_page(void *ptr); +#endif + +#endif diff --git a/arch/metag/include/asm/hugetlb.h b/arch/metag/include/asm/hugetlb.h new file mode 100644 index 000000000000..f545477e61f3 --- /dev/null +++ b/arch/metag/include/asm/hugetlb.h @@ -0,0 +1,86 @@ +#ifndef _ASM_METAG_HUGETLB_H +#define _ASM_METAG_HUGETLB_H + +#include <asm/page.h> + + +static inline int is_hugepage_only_range(struct mm_struct *mm, + unsigned long addr, + unsigned long len) { + return 0; +} + +int prepare_hugepage_range(struct file *file, unsigned long addr, + unsigned long len); + +static inline void hugetlb_prefault_arch_hook(struct mm_struct *mm) +{ +} + +static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb, + unsigned long addr, unsigned long end, + unsigned long floor, + unsigned long ceiling) +{ + free_pgd_range(tlb, addr, end, floor, ceiling); +} + +static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte) +{ + set_pte_at(mm, addr, ptep, pte); +} + +static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + return ptep_get_and_clear(mm, addr, ptep); +} + +static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep) +{ +} + +static inline int huge_pte_none(pte_t pte) +{ + return pte_none(pte); +} + +static inline pte_t huge_pte_wrprotect(pte_t pte) +{ + return pte_wrprotect(pte); +} + +static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + ptep_set_wrprotect(mm, addr, ptep); +} + +static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, + pte_t pte, int dirty) +{ + return ptep_set_access_flags(vma, addr, ptep, pte, dirty); +} + +static inline pte_t huge_ptep_get(pte_t *ptep) +{ + return *ptep; +} + +static inline int arch_prepare_hugepage(struct page *page) +{ + return 0; +} + +static inline void arch_release_hugepage(struct page *page) +{ +} + +static inline void arch_clear_hugepage_flags(struct page *page) +{ +} + +#endif /* _ASM_METAG_HUGETLB_H */ diff --git a/arch/metag/include/asm/hwthread.h b/arch/metag/include/asm/hwthread.h new file mode 100644 index 000000000000..8f9786619b1d --- /dev/null +++ b/arch/metag/include/asm/hwthread.h @@ -0,0 +1,40 @@ +/* + * Copyright (C) 2008 Imagination Technologies + */ +#ifndef __METAG_HWTHREAD_H +#define __METAG_HWTHREAD_H + +#include <linux/bug.h> +#include <linux/io.h> + +#include <asm/metag_mem.h> + +#define BAD_HWTHREAD_ID (0xFFU) +#define BAD_CPU_ID (0xFFU) + +extern u8 cpu_2_hwthread_id[]; +extern u8 hwthread_id_2_cpu[]; + +/* + * Each hardware thread's Control Unit registers are memory-mapped + * and can therefore be accessed by any other hardware thread. + * + * This helper function returns the memory address where "thread"'s + * register "regnum" is mapped. + */ +static inline +void __iomem *__CU_addr(unsigned int thread, unsigned int regnum) +{ + unsigned int base, thread_offset, thread_regnum; + + WARN_ON(thread == BAD_HWTHREAD_ID); + + base = T0UCTREG0; /* Control unit base */ + + thread_offset = TnUCTRX_STRIDE * thread; + thread_regnum = TXUCTREGn_STRIDE * regnum; + + return (void __iomem *)(base + thread_offset + thread_regnum); +} + +#endif /* __METAG_HWTHREAD_H */ diff --git a/arch/metag/include/asm/io.h b/arch/metag/include/asm/io.h new file mode 100644 index 000000000000..9359e5048442 --- /dev/null +++ b/arch/metag/include/asm/io.h @@ -0,0 +1,165 @@ +#ifndef _ASM_METAG_IO_H +#define _ASM_METAG_IO_H + +#include <linux/types.h> + +#define IO_SPACE_LIMIT 0 + +#define page_to_bus page_to_phys +#define bus_to_page phys_to_page + +/* + * Generic I/O + */ + +#define __raw_readb __raw_readb +static inline u8 __raw_readb(const volatile void __iomem *addr) +{ + u8 ret; + asm volatile("GETB %0,[%1]" + : "=da" (ret) + : "da" (addr) + : "memory"); + return ret; +} + +#define __raw_readw __raw_readw +static inline u16 __raw_readw(const volatile void __iomem *addr) +{ + u16 ret; + asm volatile("GETW %0,[%1]" + : "=da" (ret) + : "da" (addr) + : "memory"); + return ret; +} + +#define __raw_readl __raw_readl +static inline u32 __raw_readl(const volatile void __iomem *addr) +{ + u32 ret; + asm volatile("GETD %0,[%1]" + : "=da" (ret) + : "da" (addr) + : "memory"); + return ret; +} + +#define __raw_readq __raw_readq +static inline u64 __raw_readq(const volatile void __iomem *addr) +{ + u64 ret; + asm volatile("GETL %0,%t0,[%1]" + : "=da" (ret) + : "da" (addr) + : "memory"); + return ret; +} + +#define __raw_writeb __raw_writeb +static inline void __raw_writeb(u8 b, volatile void __iomem *addr) +{ + asm volatile("SETB [%0],%1" + : + : "da" (addr), + "da" (b) + : "memory"); +} + +#define __raw_writew __raw_writew +static inline void __raw_writew(u16 b, volatile void __iomem *addr) +{ + asm volatile("SETW [%0],%1" + : + : "da" (addr), + "da" (b) + : "memory"); +} + +#define __raw_writel __raw_writel +static inline void __raw_writel(u32 b, volatile void __iomem *addr) +{ + asm volatile("SETD [%0],%1" + : + : "da" (addr), + "da" (b) + : "memory"); +} + +#define __raw_writeq __raw_writeq +static inline void __raw_writeq(u64 b, volatile void __iomem *addr) +{ + asm volatile("SETL [%0],%1,%t1" + : + : "da" (addr), + "da" (b) + : "memory"); +} + +/* + * The generic io.h can define all the other generic accessors + */ + +#include <asm-generic/io.h> + +/* + * Despite being a 32bit architecture, Meta can do 64bit memory accesses + * (assuming the bus supports it). + */ + +#define readq __raw_readq +#define writeq __raw_writeq + +/* + * Meta specific I/O for accessing non-MMU areas. + * + * These can be provided with a physical address rather than an __iomem pointer + * and should only be used by core architecture code for accessing fixed core + * registers. Generic drivers should use ioremap and the generic I/O accessors. + */ + +#define metag_in8(addr) __raw_readb((volatile void __iomem *)(addr)) +#define metag_in16(addr) __raw_readw((volatile void __iomem *)(addr)) +#define metag_in32(addr) __raw_readl((volatile void __iomem *)(addr)) +#define metag_in64(addr) __raw_readq((volatile void __iomem *)(addr)) + +#define metag_out8(b, addr) __raw_writeb(b, (volatile void __iomem *)(addr)) +#define metag_out16(b, addr) __raw_writew(b, (volatile void __iomem *)(addr)) +#define metag_out32(b, addr) __raw_writel(b, (volatile void __iomem *)(addr)) +#define metag_out64(b, addr) __raw_writeq(b, (volatile void __iomem *)(addr)) + +/* + * io remapping functions + */ + +extern void __iomem *__ioremap(unsigned long offset, + size_t size, unsigned long flags); +extern void __iounmap(void __iomem *addr); + +/** + * ioremap - map bus memory into CPU space + * @offset: bus address of the memory + * @size: size of the resource to map + * + * ioremap performs a platform specific sequence of operations to + * make bus memory CPU accessible via the readb/readw/readl/writeb/ + * writew/writel functions and the other mmio helpers. The returned + * address is not guaranteed to be usable directly as a virtual + * address. + */ +#define ioremap(offset, size) \ + __ioremap((offset), (size), 0) + +#define ioremap_nocache(offset, size) \ + __ioremap((offset), (size), 0) + +#define ioremap_cached(offset, size) \ + __ioremap((offset), (size), _PAGE_CACHEABLE) + +#define ioremap_wc(offset, size) \ + __ioremap((offset), (size), _PAGE_WR_COMBINE) + +#define iounmap(addr) \ + __iounmap(addr) + +#endif /* _ASM_METAG_IO_H */ diff --git a/arch/metag/include/asm/irq.h b/arch/metag/include/asm/irq.h new file mode 100644 index 000000000000..be0c8f3c5a5d --- /dev/null +++ b/arch/metag/include/asm/irq.h @@ -0,0 +1,32 @@ +#ifndef __ASM_METAG_IRQ_H +#define __ASM_METAG_IRQ_H + +#ifdef CONFIG_4KSTACKS +extern void irq_ctx_init(int cpu); +extern void irq_ctx_exit(int cpu); +# define __ARCH_HAS_DO_SOFTIRQ +#else +# define irq_ctx_init(cpu) do { } while (0) +# define irq_ctx_exit(cpu) do { } while (0) +#endif + +void tbi_startup_interrupt(int); +void tbi_shutdown_interrupt(int); + +struct pt_regs; + +int tbisig_map(unsigned int hw); +extern void do_IRQ(int irq, struct pt_regs *regs); + +#ifdef CONFIG_METAG_SUSPEND_MEM +int traps_save_context(void); +int traps_restore_context(void); +#endif + +#include <asm-generic/irq.h> + +#ifdef CONFIG_HOTPLUG_CPU +extern void migrate_irqs(void); +#endif + +#endif /* __ASM_METAG_IRQ_H */ diff --git a/arch/metag/include/asm/irqflags.h b/arch/metag/include/asm/irqflags.h new file mode 100644 index 000000000000..339b16f062eb --- /dev/null +++ b/arch/metag/include/asm/irqflags.h @@ -0,0 +1,93 @@ +/* + * IRQ flags handling + * + * This file gets included from lowlevel asm headers too, to provide + * wrapped versions of the local_irq_*() APIs, based on the + * raw_local_irq_*() functions from the lowlevel headers. + */ +#ifndef _ASM_IRQFLAGS_H +#define _ASM_IRQFLAGS_H + +#ifndef __ASSEMBLY__ + +#include <asm/core_reg.h> +#include <asm/metag_regs.h> + +#define INTS_OFF_MASK TXSTATI_BGNDHALT_BIT + +#ifdef CONFIG_SMP +extern unsigned int get_trigger_mask(void); +#else + +extern unsigned int global_trigger_mask; + +static inline unsigned int get_trigger_mask(void) +{ + return global_trigger_mask; +} +#endif + +static inline unsigned long arch_local_save_flags(void) +{ + return __core_reg_get(TXMASKI); +} + +static inline int arch_irqs_disabled_flags(unsigned long flags) +{ + return (flags & ~INTS_OFF_MASK) == 0; +} + +static inline int arch_irqs_disabled(void) +{ + unsigned long flags = arch_local_save_flags(); + + return arch_irqs_disabled_flags(flags); +} + +static inline unsigned long __irqs_disabled(void) +{ + /* + * We shouldn't enable exceptions if they are not already + * enabled. This is required for chancalls to work correctly. + */ + return arch_local_save_flags() & INTS_OFF_MASK; +} + +/* + * For spinlocks, etc: + */ +static inline unsigned long arch_local_irq_save(void) +{ + unsigned long flags = __irqs_disabled(); + + asm volatile("SWAP %0,TXMASKI\n" : "=r" (flags) : "0" (flags) + : "memory"); + + return flags; +} + +static inline void arch_local_irq_restore(unsigned long flags) +{ + asm volatile("MOV TXMASKI,%0\n" : : "r" (flags) : "memory"); +} + +static inline void arch_local_irq_disable(void) +{ + unsigned long flags = __irqs_disabled(); + + asm volatile("MOV TXMASKI,%0\n" : : "r" (flags) : "memory"); +} + +#ifdef CONFIG_SMP +/* Avoid circular include dependencies through <linux/preempt.h> */ +void arch_local_irq_enable(void); +#else +static inline void arch_local_irq_enable(void) +{ + arch_local_irq_restore(get_trigger_mask()); +} +#endif + +#endif /* (__ASSEMBLY__) */ + +#endif /* !(_ASM_IRQFLAGS_H) */ diff --git a/arch/metag/include/asm/l2cache.h b/arch/metag/include/asm/l2cache.h new file mode 100644 index 000000000000..bffbeaa4d93b --- /dev/null +++ b/arch/metag/include/asm/l2cache.h @@ -0,0 +1,258 @@ +#ifndef _METAG_L2CACHE_H +#define _METAG_L2CACHE_H + +#ifdef CONFIG_METAG_L2C + +#include <asm/global_lock.h> +#include <asm/io.h> + +/* + * Store the last known value of pfenable (we don't want prefetch enabled while + * L2 is off). + */ +extern int l2c_pfenable; + +/* defined in arch/metag/drivers/core-sysfs.c */ +extern struct sysdev_class cache_sysclass; + +static inline void wr_fence(void); + +/* + * Functions for reading of L2 cache configuration. + */ + +/* Get raw L2 config register (CORE_CONFIG3) */ +static inline unsigned int meta_l2c_config(void) +{ + const unsigned int *corecfg3 = (const unsigned int *)METAC_CORE_CONFIG3; + return *corecfg3; +} + +/* Get whether the L2 is present */ +static inline int meta_l2c_is_present(void) +{ + return meta_l2c_config() & METAC_CORECFG3_L2C_HAVE_L2C_BIT; +} + +/* Get whether the L2 is configured for write-back instead of write-through */ +static inline int meta_l2c_is_writeback(void) +{ + return meta_l2c_config() & METAC_CORECFG3_L2C_MODE_BIT; +} + +/* Get whether the L2 is unified instead of separated code/data */ +static inline int meta_l2c_is_unified(void) +{ + return meta_l2c_config() & METAC_CORECFG3_L2C_UNIFIED_BIT; +} + +/* Get the L2 cache size in bytes */ +static inline unsigned int meta_l2c_size(void) +{ + unsigned int size_s; + if (!meta_l2c_is_present()) + return 0; + size_s = (meta_l2c_config() & METAC_CORECFG3_L2C_SIZE_BITS) + >> METAC_CORECFG3_L2C_SIZE_S; + /* L2CSIZE is in KiB */ + return 1024 << size_s; +} + +/* Get the number of ways in the L2 cache */ +static inline unsigned int meta_l2c_ways(void) +{ + unsigned int ways_s; + if (!meta_l2c_is_present()) + return 0; + ways_s = (meta_l2c_config() & METAC_CORECFG3_L2C_NUM_WAYS_BITS) + >> METAC_CORECFG3_L2C_NUM_WAYS_S; + return 0x1 << ways_s; +} + +/* Get the line size of the L2 cache */ +static inline unsigned int meta_l2c_linesize(void) +{ + unsigned int line_size; + if (!meta_l2c_is_present()) + return 0; + line_size = (meta_l2c_config() & METAC_CORECFG3_L2C_LINE_SIZE_BITS) + >> METAC_CORECFG3_L2C_LINE_SIZE_S; + switch (line_size) { + case METAC_CORECFG3_L2C_LINE_SIZE_64B: + return 64; + default: + return 0; + } +} + +/* Get the revision ID of the L2 cache */ +static inline unsigned int meta_l2c_revision(void) +{ + return (meta_l2c_config() & METAC_CORECFG3_L2C_REV_ID_BITS) + >> METAC_CORECFG3_L2C_REV_ID_S; +} + + +/* + * Start an initialisation of the L2 cachelines and wait for completion. + * This should only be done in a LOCK1 or LOCK2 critical section while the L2 + * is disabled. + */ +static inline void _meta_l2c_init(void) +{ + metag_out32(SYSC_L2C_INIT_INIT, SYSC_L2C_INIT); + while (metag_in32(SYSC_L2C_INIT) == SYSC_L2C_INIT_IN_PROGRESS) + /* do nothing */; +} + +/* + * Start a writeback of dirty L2 cachelines and wait for completion. + * This should only be done in a LOCK1 or LOCK2 critical section. + */ +static inline void _meta_l2c_purge(void) +{ + metag_out32(SYSC_L2C_PURGE_PURGE, SYSC_L2C_PURGE); + while (metag_in32(SYSC_L2C_PURGE) == SYSC_L2C_PURGE_IN_PROGRESS) + /* do nothing */; +} + +/* Set whether the L2 cache is enabled. */ +static inline void _meta_l2c_enable(int enabled) +{ + unsigned int enable; + + enable = metag_in32(SYSC_L2C_ENABLE); + if (enabled) + enable |= SYSC_L2C_ENABLE_ENABLE_BIT; + else + enable &= ~SYSC_L2C_ENABLE_ENABLE_BIT; + metag_out32(enable, SYSC_L2C_ENABLE); +} + +/* Set whether the L2 cache prefetch is enabled. */ +static inline void _meta_l2c_pf_enable(int pfenabled) +{ + unsigned int enable; + + enable = metag_in32(SYSC_L2C_ENABLE); + if (pfenabled) + enable |= SYSC_L2C_ENABLE_PFENABLE_BIT; + else + enable &= ~SYSC_L2C_ENABLE_PFENABLE_BIT; + metag_out32(enable, SYSC_L2C_ENABLE); +} + +/* Return whether the L2 cache is enabled */ +static inline int _meta_l2c_is_enabled(void) +{ + return metag_in32(SYSC_L2C_ENABLE) & SYSC_L2C_ENABLE_ENABLE_BIT; +} + +/* Return whether the L2 cache prefetch is enabled */ +static inline int _meta_l2c_pf_is_enabled(void) +{ + return metag_in32(SYSC_L2C_ENABLE) & SYSC_L2C_ENABLE_PFENABLE_BIT; +} + + +/* Return whether the L2 cache is enabled */ +static inline int meta_l2c_is_enabled(void) +{ + int en; + + /* + * There is no need to lock at the moment, as the enable bit is never + * intermediately changed, so we will never see an intermediate result. + */ + en = _meta_l2c_is_enabled(); + + return en; +} + +/* + * Ensure the L2 cache is disabled. + * Return whether the L2 was previously disabled. + */ +int meta_l2c_disable(void); + +/* + * Ensure the L2 cache is enabled. + * Return whether the L2 was previously enabled. + */ +int meta_l2c_enable(void); + +/* Return whether the L2 cache prefetch is enabled */ +static inline int meta_l2c_pf_is_enabled(void) +{ + return l2c_pfenable; +} + +/* + * Set whether the L2 cache prefetch is enabled. + * Return whether the L2 prefetch was previously enabled. + */ +int meta_l2c_pf_enable(int pfenable); + +/* + * Flush the L2 cache. + * Return 1 if the L2 is disabled. + */ +int meta_l2c_flush(void); + +/* + * Write back all dirty cache lines in the L2 cache. + * Return 1 if the L2 is disabled or there isn't any writeback. + */ +static inline int meta_l2c_writeback(void) +{ + unsigned long flags; + int en; + + /* no need to purge if it's not a writeback cache */ + if (!meta_l2c_is_writeback()) + return 1; + + /* + * Purge only works if the L2 is enabled, and involves reading back to + * detect completion, so keep this operation atomic with other threads. + */ + __global_lock1(flags); + en = meta_l2c_is_enabled(); + if (likely(en)) { + wr_fence(); + _meta_l2c_purge(); + } + __global_unlock1(flags); + + return !en; +} + +#else /* CONFIG_METAG_L2C */ + +#define meta_l2c_config() 0 +#define meta_l2c_is_present() 0 +#define meta_l2c_is_writeback() 0 +#define meta_l2c_is_unified() 0 +#define meta_l2c_size() 0 +#define meta_l2c_ways() 0 +#define meta_l2c_linesize() 0 +#define meta_l2c_revision() 0 + +#define meta_l2c_is_enabled() 0 +#define _meta_l2c_pf_is_enabled() 0 +#define meta_l2c_pf_is_enabled() 0 +#define meta_l2c_disable() 1 +#define meta_l2c_enable() 0 +#define meta_l2c_pf_enable(X) 0 +static inline int meta_l2c_flush(void) +{ + return 1; +} +static inline int meta_l2c_writeback(void) +{ + return 1; +} + +#endif /* CONFIG_METAG_L2C */ + +#endif /* _METAG_L2CACHE_H */ diff --git a/arch/metag/include/asm/linkage.h b/arch/metag/include/asm/linkage.h new file mode 100644 index 000000000000..73bf25ba4e18 --- /dev/null +++ b/arch/metag/include/asm/linkage.h @@ -0,0 +1,7 @@ +#ifndef __ASM_LINKAGE_H +#define __ASM_LINKAGE_H + +#define __ALIGN .p2align 2 +#define __ALIGN_STR ".p2align 2" + +#endif diff --git a/arch/metag/include/asm/mach/arch.h b/arch/metag/include/asm/mach/arch.h new file mode 100644 index 000000000000..12c5664fea6e --- /dev/null +++ b/arch/metag/include/asm/mach/arch.h @@ -0,0 +1,86 @@ +/* + * arch/metag/include/asm/mach/arch.h + * + * Copyright (C) 2012 Imagination Technologies Ltd. + * + * based on the ARM version: + * Copyright (C) 2000 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _METAG_MACH_ARCH_H_ +#define _METAG_MACH_ARCH_H_ + +#include <linux/stddef.h> + +#include <asm/clock.h> + +/** + * struct machine_desc - Describes a board controlled by a Meta. + * @name: Board/SoC name. + * @dt_compat: Array of device tree 'compatible' strings. + * @clocks: Clock callbacks. + * + * @nr_irqs: Maximum number of IRQs. + * If 0, defaults to NR_IRQS in asm-generic/irq.h. + * + * @init_early: Early init callback. + * @init_irq: IRQ init callback for setting up IRQ controllers. + * @init_machine: Arch init callback for setting up devices. + * @init_late: Late init callback. + * + * This structure is provided by each board which can be controlled by a Meta. + * It is chosen by matching the compatible strings in the device tree provided + * by the bootloader with the strings in @dt_compat, and sets up any aspects of + * the machine that aren't configured with device tree (yet). + */ +struct machine_desc { + const char *name; + const char **dt_compat; + struct meta_clock_desc *clocks; + + unsigned int nr_irqs; + + void (*init_early)(void); + void (*init_irq)(void); + void (*init_machine)(void); + void (*init_late)(void); +}; + +/* + * Current machine - only accessible during boot. + */ +extern struct machine_desc *machine_desc; + +/* + * Machine type table - also only accessible during boot + */ +extern struct machine_desc __arch_info_begin[], __arch_info_end[]; +#define for_each_machine_desc(p) \ + for (p = __arch_info_begin; p < __arch_info_end; p++) + +static inline struct machine_desc *default_machine_desc(void) +{ + /* the default machine is the last one linked in */ + if (__arch_info_end - 1 < __arch_info_begin) + return NULL; + return __arch_info_end - 1; +} + +/* + * Set of macros to define architecture features. This is built into + * a table by the linker. + */ +#define MACHINE_START(_type, _name) \ +static const struct machine_desc __mach_desc_##_type \ +__used \ +__attribute__((__section__(".arch.info.init"))) = { \ + .name = _name, + +#define MACHINE_END \ +}; + +#endif /* _METAG_MACH_ARCH_H_ */ diff --git a/arch/metag/include/asm/metag_isa.h b/arch/metag/include/asm/metag_isa.h new file mode 100644 index 000000000000..c8aa2ae3899f --- /dev/null +++ b/arch/metag/include/asm/metag_isa.h @@ -0,0 +1,81 @@ +/* + * asm/metag_isa.h + * + * Copyright (C) 2000-2007, 2012 Imagination Technologies. + * + * This program is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License version 2 as published by the + * Free Software Foundation. + * + * Various defines for Meta instruction set. + */ + +#ifndef _ASM_METAG_ISA_H_ +#define _ASM_METAG_ISA_H_ + + +/* L1 cache layout */ + +/* Data cache line size as bytes and shift */ +#define DCACHE_LINE_BYTES 64 +#define DCACHE_LINE_S 6 + +/* Number of ways in the data cache */ +#define DCACHE_WAYS 4 + +/* Instruction cache line size as bytes and shift */ +#define ICACHE_LINE_BYTES 64 +#define ICACHE_LINE_S 6 + +/* Number of ways in the instruction cache */ +#define ICACHE_WAYS 4 + + +/* + * CACHEWD/CACHEWL instructions use the bottom 8 bits of the data presented to + * control the operation actually achieved. + */ +/* Use of these two bits should be discouraged since the bits dont have + * consistent meanings + */ +#define CACHEW_ICACHE_BIT 0x01 +#define CACHEW_TLBFLUSH_BIT 0x02 + +#define CACHEW_FLUSH_L1D_L2 0x0 +#define CACHEW_INVALIDATE_L1I 0x1 +#define CACHEW_INVALIDATE_L1DTLB 0x2 +#define CACHEW_INVALIDATE_L1ITLB 0x3 +#define CACHEW_WRITEBACK_L1D_L2 0x4 +#define CACHEW_INVALIDATE_L1D 0x8 +#define CACHEW_INVALIDATE_L1D_L2 0xC + +/* + * CACHERD/CACHERL instructions use bits 3:5 of the address presented to + * control the operation achieved and hence the specific result. + */ +#define CACHER_ADDR_BITS 0xFFFFFFC0 +#define CACHER_OPER_BITS 0x00000030 +#define CACHER_OPER_S 4 +#define CACHER_OPER_LINPHY 0 +#define CACHER_ICACHE_BIT 0x00000008 +#define CACHER_ICACHE_S 3 + +/* + * CACHERD/CACHERL LINPHY Oper result is one/two 32-bit words + * + * If CRLINPHY0_VAL_BIT (Bit 0) set then, + * Lower 32-bits corresponds to MMCU_ENTRY_* above. + * Upper 32-bits corresponds to CRLINPHY1_* values below (if requested). + * else + * Lower 32-bits corresponds to CRLINPHY0_* values below. + * Upper 32-bits undefined. + */ +#define CRLINPHY0_VAL_BIT 0x00000001 +#define CRLINPHY0_FIRST_BIT 0x00000004 /* Set if VAL=0 due to first level */ + +#define CRLINPHY1_READ_BIT 0x00000001 /* Set if reads permitted */ +#define CRLINPHY1_SINGLE_BIT 0x00000004 /* Set if TLB does not cache entry */ +#define CRLINPHY1_PAGEMSK_BITS 0x0000FFF0 /* Set to ((2^n-1)>>12) value */ +#define CRLINPHY1_PAGEMSK_S 4 + +#endif /* _ASM_METAG_ISA_H_ */ diff --git a/arch/metag/include/asm/metag_mem.h b/arch/metag/include/asm/metag_mem.h new file mode 100644 index 000000000000..3f7b54d8ccac --- /dev/null +++ b/arch/metag/include/asm/metag_mem.h @@ -0,0 +1,1106 @@ +/* + * asm/metag_mem.h + * + * Copyright (C) 2000-2007, 2012 Imagination Technologies. + * + * This program is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License version 2 as published by the + * Free Software Foundation. + * + * Various defines for Meta (memory-mapped) registers. + */ + +#ifndef _ASM_METAG_MEM_H_ +#define _ASM_METAG_MEM_H_ + +/***************************************************************************** + * META MEMORY MAP LINEAR ADDRESS VALUES + ****************************************************************************/ +/* + * COMMON MEMORY MAP + * ----------------- + */ + +#define LINSYSTEM_BASE 0x00200000 +#define LINSYSTEM_LIMIT 0x07FFFFFF + +/* Linear cache flush now implemented via DCACHE instruction. These defines + related to a special region that used to exist for achieving cache flushes. + */ +#define LINSYSLFLUSH_S 0 + +#define LINSYSRES0_BASE 0x00200000 +#define LINSYSRES0_LIMIT 0x01FFFFFF + +#define LINSYSCUSTOM_BASE 0x02000000 +#define LINSYSCUSTOM_LIMIT 0x02FFFFFF + +#define LINSYSEXPAND_BASE 0x03000000 +#define LINSYSEXPAND_LIMIT 0x03FFFFFF + +#define LINSYSEVENT_BASE 0x04000000 +#define LINSYSEVENT_WR_ATOMIC_UNLOCK 0x04000000 +#define LINSYSEVENT_WR_ATOMIC_LOCK 0x04000040 +#define LINSYSEVENT_WR_CACHE_DISABLE 0x04000080 +#define LINSYSEVENT_WR_CACHE_ENABLE 0x040000C0 +#define LINSYSEVENT_WR_COMBINE_FLUSH 0x04000100 +#define LINSYSEVENT_WR_FENCE 0x04000140 +#define LINSYSEVENT_LIMIT 0x04000FFF + +#define LINSYSCFLUSH_BASE 0x04400000 +#define LINSYSCFLUSH_DCACHE_LINE 0x04400000 +#define LINSYSCFLUSH_ICACHE_LINE 0x04500000 +#define LINSYSCFLUSH_MMCU 0x04700000 +#ifndef METAC_1_2 +#define LINSYSCFLUSH_TxMMCU_BASE 0x04700020 +#define LINSYSCFLUSH_TxMMCU_STRIDE 0x00000008 +#endif +#define LINSYSCFLUSH_ADDR_BITS 0x000FFFFF +#define LINSYSCFLUSH_ADDR_S 0 +#define LINSYSCFLUSH_LIMIT 0x047FFFFF + +#define LINSYSCTRL_BASE 0x04800000 +#define LINSYSCTRL_LIMIT 0x04FFFFFF + +#define LINSYSMTABLE_BASE 0x05000000 +#define LINSYSMTABLE_LIMIT 0x05FFFFFF + +#define LINSYSDIRECT_BASE 0x06000000 +#define LINSYSDIRECT_LIMIT 0x07FFFFFF + +#define LINLOCAL_BASE 0x08000000 +#define LINLOCAL_LIMIT 0x7FFFFFFF + +#define LINCORE_BASE 0x80000000 +#define LINCORE_LIMIT 0x87FFFFFF + +#define LINCORE_CODE_BASE 0x80000000 +#define LINCORE_CODE_LIMIT 0x81FFFFFF + +#define LINCORE_DATA_BASE 0x82000000 +#define LINCORE_DATA_LIMIT 0x83FFFFFF + + +/* The core can support locked icache lines in this region */ +#define LINCORE_ICACHE_BASE 0x84000000 +#define LINCORE_ICACHE_LIMIT 0x85FFFFFF + +/* The core can support locked dcache lines in this region */ +#define LINCORE_DCACHE_BASE 0x86000000 +#define LINCORE_DCACHE_LIMIT 0x87FFFFFF + +#define LINGLOBAL_BASE 0x88000000 +#define LINGLOBAL_LIMIT 0xFFFDFFFF + +/* + * CHIP Core Register Map + * ---------------------- + */ +#define CORE_HWBASE 0x04800000 +#define PRIV_HWBASE 0x04810000 +#define TRIG_HWBASE 0x04820000 +#define SYSC_HWBASE 0x04830000 + +/***************************************************************************** + * INTER-THREAD KICK REGISTERS FOR SOFTWARE EVENT GENERATION + ****************************************************************************/ +/* + * These values define memory mapped registers that can be used to supply + * kicks to threads that service arbitrary software events. + */ + +#define T0KICK 0x04800800 /* Background kick 0 */ +#define TXXKICK_MAX 0xFFFF /* Maximum kicks */ +#define TnXKICK_STRIDE 0x00001000 /* Thread scale value */ +#define TnXKICK_STRIDE_S 12 +#define T0KICKI 0x04800808 /* Interrupt kick 0 */ +#define TXIKICK_OFFSET 0x00000008 /* Int level offset value */ +#define T1KICK 0x04801800 /* Background kick 1 */ +#define T1KICKI 0x04801808 /* Interrupt kick 1 */ +#define T2KICK 0x04802800 /* Background kick 2 */ +#define T2KICKI 0x04802808 /* Interrupt kick 2 */ +#define T3KICK 0x04803800 /* Background kick 3 */ +#define T3KICKI 0x04803808 /* Interrupt kick 3 */ + +/***************************************************************************** + * GLOBAL REGISTER ACCESS RESOURCES + ****************************************************************************/ +/* + * These values define memory mapped registers that allow access to the + * internal state of all threads in order to allow global set-up of thread + * state and external handling of thread events, errors, or debugging. + * + * The actual unit and register index values needed to access individul + * registers are chip specific see - METAC_TXUXX_VALUES in metac_x_y.h. + * However two C array initialisers TXUXX_MASKS and TGUXX_MASKS will always be + * defined to allow arbitrary loading, display, and saving of all valid + * register states without detailed knowledge of their purpose - TXUXX sets + * bits for all valid registers and TGUXX sets bits for the sub-set which are + * global. + */ + +#define T0UCTREG0 0x04800000 /* Access to all CT regs */ +#define TnUCTRX_STRIDE 0x00001000 /* Thread scale value */ +#define TXUCTREGn_STRIDE 0x00000008 /* Register scale value */ + +#define TXUXXRXDT 0x0480FFF0 /* Data to/from any threads reg */ +#define TXUXXRXRQ 0x0480FFF8 +#define TXUXXRXRQ_DREADY_BIT 0x80000000 /* Poll for done */ +#define TXUXXRXRQ_DSPEXT_BIT 0x00020000 /* Addr DSP Regs */ +#define TXUXXRXRQ_RDnWR_BIT 0x00010000 /* Set for read */ +#define TXUXXRXRQ_TX_BITS 0x00003000 /* Thread number */ +#define TXUXXRXRQ_TX_S 12 +#define TXUXXRXRQ_RX_BITS 0x000001F0 /* Register num */ +#define TXUXXRXRQ_RX_S 4 +#define TXUXXRXRQ_DSPRARD0 0 /* DSP RAM A Read Pointer 0 */ +#define TXUXXRXRQ_DSPRARD1 1 /* DSP RAM A Read Pointer 1 */ +#define TXUXXRXRQ_DSPRAWR0 2 /* DSP RAM A Write Pointer 0 */ +#define TXUXXRXRQ_DSPRAWR2 3 /* DSP RAM A Write Pointer 1 */ +#define TXUXXRXRQ_DSPRBRD0 4 /* DSP RAM B Read Pointer 0 */ +#define TXUXXRXRQ_DSPRBRD1 5 /* DSP RAM B Read Pointer 1 */ +#define TXUXXRXRQ_DSPRBWR0 6 /* DSP RAM B Write Pointer 0 */ +#define TXUXXRXRQ_DSPRBWR1 7 /* DSP RAM B Write Pointer 1 */ +#define TXUXXRXRQ_DSPRARINC0 8 /* DSP RAM A Read Increment 0 */ +#define TXUXXRXRQ_DSPRARINC1 9 /* DSP RAM A Read Increment 1 */ +#define TXUXXRXRQ_DSPRAWINC0 10 /* DSP RAM A Write Increment 0 */ +#define TXUXXRXRQ_DSPRAWINC1 11 /* DSP RAM A Write Increment 1 */ +#define TXUXXRXRQ_DSPRBRINC0 12 /* DSP RAM B Read Increment 0 */ +#define TXUXXRXRQ_DSPRBRINC1 13 /* DSP RAM B Read Increment 1 */ +#define TXUXXRXRQ_DSPRBWINC0 14 /* DSP RAM B Write Increment 0 */ +#define TXUXXRXRQ_DSPRBWINC1 15 /* DSP RAM B Write Increment 1 */ + +#define TXUXXRXRQ_ACC0L0 16 /* Accumulator 0 bottom 32-bits */ +#define TXUXXRXRQ_ACC1L0 17 /* Accumulator 1 bottom 32-bits */ +#define TXUXXRXRQ_ACC2L0 18 /* Accumulator 2 bottom 32-bits */ +#define TXUXXRXRQ_ACC3L0 19 /* Accumulator 3 bottom 32-bits */ +#define TXUXXRXRQ_ACC0HI 20 /* Accumulator 0 top 8-bits */ +#define TXUXXRXRQ_ACC1HI 21 /* Accumulator 1 top 8-bits */ +#define TXUXXRXRQ_ACC2HI 22 /* Accumulator 2 top 8-bits */ +#define TXUXXRXRQ_ACC3HI 23 /* Accumulator 3 top 8-bits */ +#define TXUXXRXRQ_UXX_BITS 0x0000000F /* Unit number */ +#define TXUXXRXRQ_UXX_S 0 + +/***************************************************************************** + * PRIVILEGE CONTROL VALUES FOR MEMORY MAPPED RESOURCES + ****************************************************************************/ +/* + * These values define memory mapped registers that give control over and + * the privilege required to access other memory mapped resources. These + * registers themselves always require privilege to update them. + */ + +#define TXPRIVREG_STRIDE 0x8 /* Delta between per-thread regs */ +#define TXPRIVREG_STRIDE_S 3 + +/* + * Each bit 0 to 15 defines privilege required to access internal register + * regions 0x04800000 to 0x048FFFFF in 64k chunks + */ +#define T0PIOREG 0x04810100 +#define T1PIOREG 0x04810108 +#define T2PIOREG 0x04810110 +#define T3PIOREG 0x04810118 + +/* + * Each bit 0 to 31 defines privilege required to use the pair of + * system events implemented as writee in the regions 0x04000000 to + * 0x04000FFF in 2*64 byte chunks. + */ +#define T0PSYREG 0x04810180 +#define T1PSYREG 0x04810188 +#define T2PSYREG 0x04810190 +#define T3PSYREG 0x04810198 + +/* + * CHIP PRIV CONTROLS + * ------------------ + */ + +/* The TXPIOREG register holds a bit mask directly mappable to + corresponding addresses in the range 0x04800000 to 049FFFFF */ +#define TXPIOREG_ADDR_BITS 0x1F0000 /* Up to 32x64K bytes */ +#define TXPIOREG_ADDR_S 16 + +/* Hence based on the _HWBASE values ... */ +#define TXPIOREG_CORE_BIT (1<<((0x04800000>>16)&0x1F)) +#define TXPIOREG_PRIV_BIT (1<<((0x04810000>>16)&0x1F)) +#define TXPIOREG_TRIG_BIT (1<<((0x04820000>>16)&0x1F)) +#define TXPIOREG_SYSC_BIT (1<<((0x04830000>>16)&0x1F)) + +#define TXPIOREG_WRC_BIT 0x00080000 /* Wr combiner reg priv */ +#define TXPIOREG_LOCALBUS_RW_BIT 0x00040000 /* Local bus rd/wr priv */ +#define TXPIOREG_SYSREGBUS_RD_BIT 0x00020000 /* Sys reg bus write priv */ +#define TXPIOREG_SYSREGBUS_WR_BIT 0x00010000 /* Sys reg bus read priv */ + +/* CORE region privilege controls */ +#define T0PRIVCORE 0x04800828 +#define TXPRIVCORE_TXBKICK_BIT 0x001 /* Background kick priv */ +#define TXPRIVCORE_TXIKICK_BIT 0x002 /* Interrupt kick priv */ +#define TXPRIVCORE_TXAMAREGX_BIT 0x004 /* TXAMAREG4|5|6 priv */ +#define TnPRIVCORE_STRIDE 0x00001000 + +#define T0PRIVSYSR 0x04810000 +#define TnPRIVSYSR_STRIDE 0x00000008 +#define TnPRIVSYSR_STRIDE_S 3 +#define TXPRIVSYSR_CFLUSH_BIT 0x01 +#define TXPRIVSYSR_MTABLE_BIT 0x02 +#define TXPRIVSYSR_DIRECT_BIT 0x04 +#ifdef METAC_1_2 +#define TXPRIVSYSR_ALL_BITS 0x07 +#else +#define TXPRIVSYSR_CORE_BIT 0x08 +#define TXPRIVSYSR_CORECODE_BIT 0x10 +#define TXPRIVSYSR_ALL_BITS 0x1F +#endif +#define T1PRIVSYSR 0x04810008 +#define T2PRIVSYSR 0x04810010 +#define T3PRIVSYSR 0x04810018 + +/***************************************************************************** + * H/W TRIGGER STATE/LEVEL REGISTERS AND H/W TRIGGER VECTORS + ****************************************************************************/ +/* + * These values define memory mapped registers that give control over and + * the state of hardware trigger sources both external to the META processor + * and internal to it. + */ + +#define HWSTATMETA 0x04820000 /* Hardware status/clear META trig */ +#define HWSTATMETA_T0HALT_BITS 0xF +#define HWSTATMETA_T0HALT_S 0 +#define HWSTATMETA_T0BHALT_BIT 0x1 /* Background HALT */ +#define HWSTATMETA_T0IHALT_BIT 0x2 /* Interrupt HALT */ +#define HWSTATMETA_T0PHALT_BIT 0x4 /* PF/RO Memory HALT */ +#define HWSTATMETA_T0AMATR_BIT 0x8 /* AMA trigger */ +#define HWSTATMETA_TnINT_S 4 /* Shift by (thread*4) */ +#define HWSTATEXT 0x04820010 /* H/W status/clear external trigs 0-31 */ +#define HWSTATEXT2 0x04820018 /* H/W status/clear external trigs 32-63 */ +#define HWSTATEXT4 0x04820020 /* H/W status/clear external trigs 64-95 */ +#define HWSTATEXT6 0x04820028 /* H/W status/clear external trigs 96-128 */ +#define HWLEVELEXT 0x04820030 /* Edge/Level type of external trigs 0-31 */ +#define HWLEVELEXT2 0x04820038 /* Edge/Level type of external trigs 32-63 */ +#define HWLEVELEXT4 0x04820040 /* Edge/Level type of external trigs 64-95 */ +#define HWLEVELEXT6 0x04820048 /* Edge/Level type of external trigs 96-128 */ +#define HWLEVELEXT_XXX_LEVEL 1 /* Level sense logic in HWSTATEXTn */ +#define HWLEVELEXT_XXX_EDGE 0 +#define HWMASKEXT 0x04820050 /* Enable/disable of external trigs 0-31 */ +#define HWMASKEXT2 0x04820058 /* Enable/disable of external trigs 32-63 */ +#define HWMASKEXT4 0x04820060 /* Enable/disable of external trigs 64-95 */ +#define HWMASKEXT6 0x04820068 /* Enable/disable of external trigs 96-128 */ +#define T0VECINT_BHALT 0x04820500 /* Background HALT trigger vector */ +#define TXVECXXX_BITS 0xF /* Per-trigger vector vals 0,1,4-15 */ +#define TXVECXXX_S 0 +#define T0VECINT_IHALT 0x04820508 /* Interrupt HALT */ +#define T0VECINT_PHALT 0x04820510 /* PF/RO memory fault */ +#define T0VECINT_AMATR 0x04820518 /* AMA trigger */ +#define TnVECINT_STRIDE 0x00000020 /* Per thread stride */ +#define HWVEC0EXT 0x04820700 /* Vectors for external triggers 0-31 */ +#define HWVEC20EXT 0x04821700 /* Vectors for external triggers 32-63 */ +#define HWVEC40EXT 0x04822700 /* Vectors for external triggers 64-95 */ +#define HWVEC60EXT 0x04823700 /* Vectors for external triggers 96-127 */ +#define HWVECnEXT_STRIDE 0x00000008 /* Per trigger stride */ +#define HWVECnEXT_DEBUG 0x1 /* Redirect trigger to debug i/f */ + +/* + * CORE HWCODE-BREAKPOINT REGISTERS/VALUES + * --------------------------------------- + */ +#define CODEB0ADDR 0x0480FF00 /* Address specifier */ +#define CODEBXADDR_MATCHX_BITS 0xFFFFFFFC +#define CODEBXADDR_MATCHX_S 2 +#define CODEB0CTRL 0x0480FF08 /* Control */ +#define CODEBXCTRL_MATEN_BIT 0x80000000 /* Match 'Enable' */ +#define CODEBXCTRL_MATTXEN_BIT 0x10000000 /* Match threadn enable */ +#define CODEBXCTRL_HITC_BITS 0x00FF0000 /* Hit counter */ +#define CODEBXCTRL_HITC_S 16 +#define CODEBXHITC_NEXT 0xFF /* Next 'hit' will trigger */ +#define CODEBXHITC_HIT1 0x00 /* No 'hits' after trigger */ +#define CODEBXCTRL_MMASK_BITS 0x0000FFFC /* Mask ADDR_MATCH bits */ +#define CODEBXCTRL_MMASK_S 2 +#define CODEBXCTRL_MATLTX_BITS 0x00000003 /* Match threadn LOCAL addr */ +#define CODEBXCTRL_MATLTX_S 0 /* Match threadn LOCAL addr */ +#define CODEBnXXXX_STRIDE 0x00000010 /* Stride between CODEB reg sets */ +#define CODEBnXXXX_STRIDE_S 4 +#define CODEBnXXXX_LIMIT 3 /* Sets 0-3 */ + +/* + * CORE DATA-WATCHPOINT REGISTERS/VALUES + * ------------------------------------- + */ +#define DATAW0ADDR 0x0480FF40 /* Address specifier */ +#define DATAWXADDR_MATCHR_BITS 0xFFFFFFF8 +#define DATAWXADDR_MATCHR_S 3 +#define DATAWXADDR_MATCHW_BITS 0xFFFFFFFF +#define DATAWXADDR_MATCHW_S 0 +#define DATAW0CTRL 0x0480FF48 /* Control */ +#define DATAWXCTRL_MATRD_BIT 0x80000000 /* Match 'Read' */ +#ifndef METAC_1_2 +#define DATAWXCTRL_MATNOTTX_BIT 0x20000000 /* Invert threadn enable */ +#endif +#define DATAWXCTRL_MATWR_BIT 0x40000000 /* Match 'Write' */ +#define DATAWXCTRL_MATTXEN_BIT 0x10000000 /* Match threadn enable */ +#define DATAWXCTRL_WRSIZE_BITS 0x0F000000 /* Write Match Size */ +#define DATAWXCTRL_WRSIZE_S 24 +#define DATAWWRSIZE_ANY 0 /* Any size transaction matches */ +#define DATAWWRSIZE_8BIT 1 /* Specific sizes ... */ +#define DATAWWRSIZE_16BIT 2 +#define DATAWWRSIZE_32BIT 3 +#define DATAWWRSIZE_64BIT 4 +#define DATAWXCTRL_HITC_BITS 0x00FF0000 /* Hit counter */ +#define DATAWXCTRL_HITC_S 16 +#define DATAWXHITC_NEXT 0xFF /* Next 'hit' will trigger */ +#define DATAWXHITC_HIT1 0x00 /* No 'hits' after trigger */ +#define DATAWXCTRL_MMASK_BITS 0x0000FFF8 /* Mask ADDR_MATCH bits */ +#define DATAWXCTRL_MMASK_S 3 +#define DATAWXCTRL_MATLTX_BITS 0x00000003 /* Match threadn LOCAL addr */ +#define DATAWXCTRL_MATLTX_S 0 /* Match threadn LOCAL addr */ +#define DATAW0DMATCH0 0x0480FF50 /* Write match data */ +#define DATAW0DMATCH1 0x0480FF58 +#define DATAW0DMASK0 0x0480FF60 /* Write match data mask */ +#define DATAW0DMASK1 0x0480FF68 +#define DATAWnXXXX_STRIDE 0x00000040 /* Stride between DATAW reg sets */ +#define DATAWnXXXX_STRIDE_S 6 +#define DATAWnXXXX_LIMIT 1 /* Sets 0,1 */ + +/* + * CHIP Automatic Mips Allocation control registers + * ------------------------------------------------ + */ + +/* CORE memory mapped AMA registers */ +#define T0AMAREG4 0x04800810 +#define TXAMAREG4_POOLSIZE_BITS 0x3FFFFF00 +#define TXAMAREG4_POOLSIZE_S 8 +#define TXAMAREG4_AVALUE_BITS 0x000000FF +#define TXAMAREG4_AVALUE_S 0 +#define T0AMAREG5 0x04800818 +#define TXAMAREG5_POOLC_BITS 0x07FFFFFF +#define TXAMAREG5_POOLC_S 0 +#define T0AMAREG6 0x04800820 +#define TXAMAREG6_DLINEDEF_BITS 0x00FFFFF0 +#define TXAMAREG6_DLINEDEF_S 0 +#define TnAMAREGX_STRIDE 0x00001000 + +/* + * Memory Management Control Unit Table Entries + * -------------------------------------------- + */ +#define MMCU_ENTRY_S 4 /* -> Entry size */ +#define MMCU_ENTRY_ADDR_BITS 0xFFFFF000 /* Physical address */ +#define MMCU_ENTRY_ADDR_S 12 /* -> Page size */ +#define MMCU_ENTRY_CWIN_BITS 0x000000C0 /* Caching 'window' selection */ +#define MMCU_ENTRY_CWIN_S 6 +#define MMCU_CWIN_UNCACHED 0 /* May not be memory etc. */ +#define MMCU_CWIN_BURST 1 /* Cached but LRU unset */ +#define MMCU_CWIN_C1SET 2 /* Cached in 1 set only */ +#define MMCU_CWIN_CACHED 3 /* Fully cached */ +#define MMCU_ENTRY_CACHE_BIT 0x00000080 /* Set for cached region */ +#define MMCU_ECACHE1_FULL_BIT 0x00000040 /* Use all the sets */ +#define MMCU_ECACHE0_BURST_BIT 0x00000040 /* Match bursts */ +#define MMCU_ENTRY_SYS_BIT 0x00000010 /* Sys-coherent access required */ +#define MMCU_ENTRY_WRC_BIT 0x00000008 /* Write combining allowed */ +#define MMCU_ENTRY_PRIV_BIT 0x00000004 /* Privilege required */ +#define MMCU_ENTRY_WR_BIT 0x00000002 /* Writes allowed */ +#define MMCU_ENTRY_VAL_BIT 0x00000001 /* Entry is valid */ + +#ifdef METAC_2_1 +/* + * Extended first-level/top table entries have extra/larger fields in later + * cores as bits 11:0 previously had no effect in such table entries. + */ +#define MMCU_E1ENT_ADDR_BITS 0xFFFFFFC0 /* Physical address */ +#define MMCU_E1ENT_ADDR_S 6 /* -> resolution < page size */ +#define MMCU_E1ENT_PGSZ_BITS 0x0000001E /* Page size for 2nd level */ +#define MMCU_E1ENT_PGSZ_S 1 +#define MMCU_E1ENT_PGSZ0_POWER 12 /* PgSz 0 -> 4K */ +#define MMCU_E1ENT_PGSZ_MAX 10 /* PgSz 10 -> 4M maximum */ +#define MMCU_E1ENT_MINIM_BIT 0x00000020 +#endif /* METAC_2_1 */ + +/* MMCU control register in SYSC region */ +#define MMCU_TABLE_PHYS_ADDR 0x04830010 +#define MMCU_TABLE_PHYS_ADDR_BITS 0xFFFFFFFC +#ifdef METAC_2_1 +#define MMCU_TABLE_PHYS_EXTEND 0x00000001 /* See below */ +#endif +#define MMCU_DCACHE_CTRL_ADDR 0x04830018 +#define MMCU_xCACHE_CTRL_ENABLE_BIT 0x00000001 +#define MMCU_xCACHE_CTRL_PARTITION_BIT 0x00000000 /* See xCPART below */ +#define MMCU_ICACHE_CTRL_ADDR 0x04830020 + +#ifdef METAC_2_1 + +/* + * Allow direct access to physical memory used to implement MMU table. + * + * Each is based on a corresponding MMCU_TnLOCAL_TABLE_PHYSn or similar + * MMCU_TnGLOBAL_TABLE_PHYSn register pair (see next). + */ +#define LINSYSMEMT0L_BASE 0x05000000 +#define LINSYSMEMT0L_LIMIT 0x051FFFFF +#define LINSYSMEMTnX_STRIDE 0x00200000 /* 2MB Local per thread */ +#define LINSYSMEMTnX_STRIDE_S 21 +#define LINSYSMEMTXG_OFFSET 0x00800000 /* +2MB Global per thread */ +#define LINSYSMEMTXG_OFFSET_S 23 +#define LINSYSMEMT1L_BASE 0x05200000 +#define LINSYSMEMT1L_LIMIT 0x053FFFFF +#define LINSYSMEMT2L_BASE 0x05400000 +#define LINSYSMEMT2L_LIMIT 0x055FFFFF +#define LINSYSMEMT3L_BASE 0x05600000 +#define LINSYSMEMT3L_LIMIT 0x057FFFFF +#define LINSYSMEMT0G_BASE 0x05800000 +#define LINSYSMEMT0G_LIMIT 0x059FFFFF +#define LINSYSMEMT1G_BASE 0x05A00000 +#define LINSYSMEMT1G_LIMIT 0x05BFFFFF +#define LINSYSMEMT2G_BASE 0x05C00000 +#define LINSYSMEMT2G_LIMIT 0x05DFFFFF +#define LINSYSMEMT3G_BASE 0x05E00000 +#define LINSYSMEMT3G_LIMIT 0x05FFFFFF + +/* + * Extended MMU table functionality allows a sparse or flat table to be + * described much more efficiently than before. + */ +#define MMCU_T0LOCAL_TABLE_PHYS0 0x04830700 +#define MMCU_TnX_TABLE_PHYSX_STRIDE 0x20 /* Offset per thread */ +#define MMCU_TnX_TABLE_PHYSX_STRIDE_S 5 +#define MMCU_TXG_TABLE_PHYSX_OFFSET 0x10 /* Global versus local */ +#define MMCU_TXG_TABLE_PHYSX_OFFSET_S 4 +#define MMCU_TBLPHYS0_DCCTRL_BITS 0x000000DF /* DC controls */ +#define MMCU_TBLPHYS0_ENTLB_BIT 0x00000020 /* Cache in TLB */ +#define MMCU_TBLPHYS0_TBLSZ_BITS 0x00000F00 /* Area supported */ +#define MMCU_TBLPHYS0_TBLSZ_S 8 +#define MMCU_TBLPHYS0_TBLSZ0_POWER 22 /* 0 -> 4M */ +#define MMCU_TBLPHYS0_TBLSZ_MAX 9 /* 9 -> 2G */ +#define MMCU_TBLPHYS0_LINBASE_BITS 0xFFC00000 /* Linear base */ +#define MMCU_TBLPHYS0_LINBASE_S 22 + +#define MMCU_T0LOCAL_TABLE_PHYS1 0x04830708 +#define MMCU_TBLPHYS1_ADDR_BITS 0xFFFFFFFC /* Physical base */ +#define MMCU_TBLPHYS1_ADDR_S 2 + +#define MMCU_T0GLOBAL_TABLE_PHYS0 0x04830710 +#define MMCU_T0GLOBAL_TABLE_PHYS1 0x04830718 +#define MMCU_T1LOCAL_TABLE_PHYS0 0x04830720 +#define MMCU_T1LOCAL_TABLE_PHYS1 0x04830728 +#define MMCU_T1GLOBAL_TABLE_PHYS0 0x04830730 +#define MMCU_T1GLOBAL_TABLE_PHYS1 0x04830738 +#define MMCU_T2LOCAL_TABLE_PHYS0 0x04830740 +#define MMCU_T2LOCAL_TABLE_PHYS1 0x04830748 +#define MMCU_T2GLOBAL_TABLE_PHYS0 0x04830750 +#define MMCU_T2GLOBAL_TABLE_PHYS1 0x04830758 +#define MMCU_T3LOCAL_TABLE_PHYS0 0x04830760 +#define MMCU_T3LOCAL_TABLE_PHYS1 0x04830768 +#define MMCU_T3GLOBAL_TABLE_PHYS0 0x04830770 +#define MMCU_T3GLOBAL_TABLE_PHYS1 0x04830778 + +#define MMCU_T0EBWCCTRL 0x04830640 +#define MMCU_TnEBWCCTRL_BITS 0x00000007 +#define MMCU_TnEBWCCTRL_S 0 +#define MMCU_TnEBWCCCTRL_DISABLE_ALL 0 +#define MMCU_TnEBWCCCTRL_ABIT25 1 +#define MMCU_TnEBWCCCTRL_ABIT26 2 +#define MMCU_TnEBWCCCTRL_ABIT27 3 +#define MMCU_TnEBWCCCTRL_ABIT28 4 +#define MMCU_TnEBWCCCTRL_ABIT29 5 +#define MMCU_TnEBWCCCTRL_ABIT30 6 +#define MMCU_TnEBWCCCTRL_ENABLE_ALL 7 +#define MMCU_TnEBWCCTRL_STRIDE 8 + +#endif /* METAC_2_1 */ + + +/* Registers within the SYSC register region */ +#define METAC_ID 0x04830000 +#define METAC_ID_MAJOR_BITS 0xFF000000 +#define METAC_ID_MAJOR_S 24 +#define METAC_ID_MINOR_BITS 0x00FF0000 +#define METAC_ID_MINOR_S 16 +#define METAC_ID_REV_BITS 0x0000FF00 +#define METAC_ID_REV_S 8 +#define METAC_ID_MAINT_BITS 0x000000FF +#define METAC_ID_MAINT_S 0 + +#ifdef METAC_2_1 +/* Use of this section is strongly deprecated */ +#define METAC_ID2 0x04830008 +#define METAC_ID2_DESIGNER_BITS 0xFFFF0000 /* Modified by customer */ +#define METAC_ID2_DESIGNER_S 16 +#define METAC_ID2_MINOR2_BITS 0x00000F00 /* 3rd digit of prod rev */ +#define METAC_ID2_MINOR2_S 8 +#define METAC_ID2_CONFIG_BITS 0x000000FF /* Wrapper configuration */ +#define METAC_ID2_CONFIG_S 0 + +/* Primary core identification and configuration information */ +#define METAC_CORE_ID 0x04831000 +#define METAC_COREID_GROUP_BITS 0xFF000000 +#define METAC_COREID_GROUP_S 24 +#define METAC_COREID_GROUP_METAG 0x14 +#define METAC_COREID_ID_BITS 0x00FF0000 +#define METAC_COREID_ID_S 16 +#define METAC_COREID_ID_W32 0x10 /* >= for 32-bit pipeline */ +#define METAC_COREID_CONFIG_BITS 0x0000FFFF +#define METAC_COREID_CONFIG_S 0 +#define METAC_COREID_CFGCACHE_BITS 0x0007 +#define METAC_COREID_CFGCACHE_S 0 +#define METAC_COREID_CFGCACHE_NOM 0 +#define METAC_COREID_CFGCACHE_TYPE0 1 +#define METAC_COREID_CFGCACHE_NOMMU 1 /* Alias for TYPE0 */ +#define METAC_COREID_CFGCACHE_NOCACHE 2 +#define METAC_COREID_CFGCACHE_PRIVNOMMU 3 +#define METAC_COREID_CFGDSP_BITS 0x0038 +#define METAC_COREID_CFGDSP_S 3 +#define METAC_COREID_CFGDSP_NOM 0 +#define METAC_COREID_CFGDSP_MIN 1 +#define METAC_COREID_NOFPACC_BIT 0x0040 /* Set if no FPU accum */ +#define METAC_COREID_CFGFPU_BITS 0x0180 +#define METAC_COREID_CFGFPU_S 7 +#define METAC_COREID_CFGFPU_NOM 0 +#define METAC_COREID_CFGFPU_SNGL 1 +#define METAC_COREID_CFGFPU_DBL 2 +#define METAC_COREID_NOAMA_BIT 0x0200 /* Set if no AMA present */ +#define METAC_COREID_NOCOH_BIT 0x0400 /* Set if no Gbl coherency */ + +/* Core revision information */ +#define METAC_CORE_REV 0x04831008 +#define METAC_COREREV_DESIGN_BITS 0xFF000000 +#define METAC_COREREV_DESIGN_S 24 +#define METAC_COREREV_MAJOR_BITS 0x00FF0000 +#define METAC_COREREV_MAJOR_S 16 +#define METAC_COREREV_MINOR_BITS 0x0000FF00 +#define METAC_COREREV_MINOR_S 8 +#define METAC_COREREV_MAINT_BITS 0x000000FF +#define METAC_COREREV_MAINT_S 0 + +/* Configuration information control outside the core */ +#define METAC_CORE_DESIGNER1 0x04831010 /* Arbitrary value */ +#define METAC_CORE_DESIGNER2 0x04831018 /* Arbitrary value */ + +/* Configuration information covering presence/number of various features */ +#define METAC_CORE_CONFIG2 0x04831020 +#define METAC_CORECFG2_COREDBGTYPE_BITS 0x60000000 /* Core debug type */ +#define METAC_CORECFG2_COREDBGTYPE_S 29 +#define METAC_CORECFG2_DCSMALL_BIT 0x04000000 /* Data cache small */ +#define METAC_CORECFG2_ICSMALL_BIT 0x02000000 /* Inst cache small */ +#define METAC_CORECFG2_DCSZNP_BITS 0x01C00000 /* Data cache size np */ +#define METAC_CORECFG2_DCSZNP_S 22 +#define METAC_CORECFG2_ICSZNP_BITS 0x00380000 /* Inst cache size np */ +#define METAC_CORECFG2_ICSZNP_S 19 +#define METAC_CORECFG2_DCSZ_BITS 0x00070000 /* Data cache size */ +#define METAC_CORECFG2_DCSZ_S 16 +#define METAC_CORECFG2_xCSZ_4K 0 /* Allocated values */ +#define METAC_CORECFG2_xCSZ_8K 1 +#define METAC_CORECFG2_xCSZ_16K 2 +#define METAC_CORECFG2_xCSZ_32K 3 +#define METAC_CORECFG2_xCSZ_64K 4 +#define METAC_CORE_C2ICSZ_BITS 0x0000E000 /* Inst cache size */ +#define METAC_CORE_C2ICSZ_S 13 +#define METAC_CORE_GBLACC_BITS 0x00001800 /* Number of Global Acc */ +#define METAC_CORE_GBLACC_S 11 +#define METAC_CORE_GBLDXR_BITS 0x00000700 /* 0 -> 0, R -> 2^(R-1) */ +#define METAC_CORE_GBLDXR_S 8 +#define METAC_CORE_GBLAXR_BITS 0x000000E0 /* 0 -> 0, R -> 2^(R-1) */ +#define METAC_CORE_GBLAXR_S 5 +#define METAC_CORE_RTTRACE_BIT 0x00000010 +#define METAC_CORE_WATCHN_BITS 0x0000000C /* 0 -> 0, N -> 2^N */ +#define METAC_CORE_WATCHN_S 2 +#define METAC_CORE_BREAKN_BITS 0x00000003 /* 0 -> 0, N -> 2^N */ +#define METAC_CORE_BREAKN_S 0 + +/* Configuration information covering presence/number of various features */ +#define METAC_CORE_CONFIG3 0x04831028 +#define METAC_CORECFG3_L2C_REV_ID_BITS 0x000F0000 /* Revision of L2 cache */ +#define METAC_CORECFG3_L2C_REV_ID_S 16 +#define METAC_CORECFG3_L2C_LINE_SIZE_BITS 0x00003000 /* L2 line size */ +#define METAC_CORECFG3_L2C_LINE_SIZE_S 12 +#define METAC_CORECFG3_L2C_LINE_SIZE_64B 0x0 /* 64 bytes */ +#define METAC_CORECFG3_L2C_NUM_WAYS_BITS 0x00000F00 /* L2 number of ways (2^n) */ +#define METAC_CORECFG3_L2C_NUM_WAYS_S 8 +#define METAC_CORECFG3_L2C_SIZE_BITS 0x000000F0 /* L2 size (2^n) */ +#define METAC_CORECFG3_L2C_SIZE_S 4 +#define METAC_CORECFG3_L2C_UNIFIED_BIT 0x00000004 /* Unified cache: */ +#define METAC_CORECFG3_L2C_UNIFIED_S 2 +#define METAC_CORECFG3_L2C_UNIFIED_UNIFIED 1 /* - Unified D/I cache */ +#define METAC_CORECFG3_L2C_UNIFIED_SEPARATE 0 /* - Separate D/I cache */ +#define METAC_CORECFG3_L2C_MODE_BIT 0x00000002 /* Cache Mode: */ +#define METAC_CORECFG3_L2C_MODE_S 1 +#define METAC_CORECFG3_L2C_MODE_WRITE_BACK 1 /* - Write back */ +#define METAC_CORECFG3_L2C_MODE_WRITE_THROUGH 0 /* - Write through */ +#define METAC_CORECFG3_L2C_HAVE_L2C_BIT 0x00000001 /* Have L2C */ +#define METAC_CORECFG3_L2C_HAVE_L2C_S 0 + +#endif /* METAC_2_1 */ + +#define SYSC_CACHE_MMU_CONFIG 0x04830028 +#ifdef METAC_2_1 +#define SYSC_CMMUCFG_DCSKEWABLE_BIT 0x00000040 +#define SYSC_CMMUCFG_ICSKEWABLE_BIT 0x00000020 +#define SYSC_CMMUCFG_DCSKEWOFF_BIT 0x00000010 /* Skew association override */ +#define SYSC_CMMUCFG_ICSKEWOFF_BIT 0x00000008 /* -> default 0 on if present */ +#define SYSC_CMMUCFG_MODE_BITS 0x00000007 /* Access to old state */ +#define SYSC_CMMUCFG_MODE_S 0 +#define SYSC_CMMUCFG_ON 0x7 +#define SYSC_CMMUCFG_EBYPASS 0x6 /* Enhanced by-pass mode */ +#define SYSC_CMMUCFG_EBYPASSIC 0x4 /* EB just inst cache */ +#define SYSC_CMMUCFG_EBYPASSDC 0x2 /* EB just data cache */ +#endif /* METAC_2_1 */ +/* Old definitions, Keep them for now */ +#define SYSC_CMMUCFG_MMU_ON_BIT 0x1 +#define SYSC_CMMUCFG_DC_ON_BIT 0x2 +#define SYSC_CMMUCFG_IC_ON_BIT 0x4 + +#define SYSC_JTAG_THREAD 0x04830030 +#define SYSC_JTAG_TX_BITS 0x00000003 /* Read only bits! */ +#define SYSC_JTAG_TX_S 0 +#define SYSC_JTAG_PRIV_BIT 0x00000004 +#ifdef METAC_2_1 +#define SYSC_JTAG_SLAVETX_BITS 0x00000018 +#define SYSC_JTAG_SLAVETX_S 3 +#endif /* METAC_2_1 */ + +#define SYSC_DCACHE_FLUSH 0x04830038 +#define SYSC_ICACHE_FLUSH 0x04830040 +#define SYSC_xCACHE_FLUSH_INIT 0x1 +#define MMCU_DIRECTMAP0_ADDR 0x04830080 /* LINSYSDIRECT_BASE -> */ +#define MMCU_DIRECTMAPn_STRIDE 0x00000010 /* 4 Region settings */ +#define MMCU_DIRECTMAPn_S 4 +#define MMCU_DIRECTMAPn_ADDR_BITS 0xFF800000 +#define MMCU_DIRECTMAPn_ADDR_S 23 +#define MMCU_DIRECTMAPn_ADDR_SCALE 0x00800000 /* 8M Regions */ +#ifdef METAC_2_1 +/* + * These fields in the above registers provide MMCU_ENTRY_* values + * for each direct mapped region to enable optimisation of these areas. + * (LSB similar to VALID must be set for enhancments to be active) + */ +#define MMCU_DIRECTMAPn_ENHANCE_BIT 0x00000001 /* 0 = no optim */ +#define MMCU_DIRECTMAPn_DCCTRL_BITS 0x000000DF /* Get DC Ctrl */ +#define MMCU_DIRECTMAPn_DCCTRL_S 0 +#define MMCU_DIRECTMAPn_ICCTRL_BITS 0x0000C000 /* Get IC Ctrl */ +#define MMCU_DIRECTMAPn_ICCTRL_S 8 +#define MMCU_DIRECTMAPn_ENTLB_BIT 0x00000020 /* Cache in TLB */ +#define MMCU_DIRECTMAPn_ICCWIN_BITS 0x0000C000 /* Get IC Win Bits */ +#define MMCU_DIRECTMAPn_ICCWIN_S 14 +#endif /* METAC_2_1 */ + +#define MMCU_DIRECTMAP1_ADDR 0x04830090 +#define MMCU_DIRECTMAP2_ADDR 0x048300a0 +#define MMCU_DIRECTMAP3_ADDR 0x048300b0 + +/* + * These bits partion each threads use of data cache or instruction cache + * resource by modifying the top 4 bits of the address within the cache + * storage area. + */ +#define SYSC_DCPART0 0x04830200 +#define SYSC_xCPARTn_STRIDE 0x00000008 +#define SYSC_xCPARTL_AND_BITS 0x0000000F /* Masks top 4 bits */ +#define SYSC_xCPARTL_AND_S 0 +#define SYSC_xCPARTG_AND_BITS 0x00000F00 /* Masks top 4 bits */ +#define SYSC_xCPARTG_AND_S 8 +#define SYSC_xCPARTL_OR_BITS 0x000F0000 /* Ors into top 4 bits */ +#define SYSC_xCPARTL_OR_S 16 +#define SYSC_xCPARTG_OR_BITS 0x0F000000 /* Ors into top 4 bits */ +#define SYSC_xCPARTG_OR_S 24 +#define SYSC_CWRMODE_BIT 0x80000000 /* Write cache mode bit */ + +#define SYSC_DCPART1 0x04830208 +#define SYSC_DCPART2 0x04830210 +#define SYSC_DCPART3 0x04830218 +#define SYSC_ICPART0 0x04830220 +#define SYSC_ICPART1 0x04830228 +#define SYSC_ICPART2 0x04830230 +#define SYSC_ICPART3 0x04830238 + +/* + * META Core Memory and Cache Update registers + */ +#define SYSC_MCMDATAX 0x04830300 /* 32-bit read/write data register */ +#define SYSC_MCMDATAT 0x04830308 /* Read or write data triggers oper */ +#define SYSC_MCMGCTRL 0x04830310 /* Control register */ +#define SYSC_MCMGCTRL_READ_BIT 0x00000001 /* Set to issue 1st read */ +#define SYSC_MCMGCTRL_AINC_BIT 0x00000002 /* Set for auto-increment */ +#define SYSC_MCMGCTRL_ADDR_BITS 0x000FFFFC /* Address or index */ +#define SYSC_MCMGCTRL_ADDR_S 2 +#define SYSC_MCMGCTRL_ID_BITS 0x0FF00000 /* Internal memory block Id */ +#define SYSC_MCMGCTRL_ID_S 20 +#define SYSC_MCMGID_NODEV 0xFF /* No Device Selected */ +#define SYSC_MCMGID_DSPRAM0A 0x04 /* DSP RAM D0 block A access */ +#define SYSC_MCMGID_DSPRAM0B 0x05 /* DSP RAM D0 block B access */ +#define SYSC_MCMGID_DSPRAM1A 0x06 /* DSP RAM D1 block A access */ +#define SYSC_MCMGID_DSPRAM1B 0x07 /* DSP RAM D1 block B access */ +#define SYSC_MCMGID_DCACHEL 0x08 /* DCACHE lines (64-bytes/line) */ +#ifdef METAC_2_1 +#define SYSC_MCMGID_DCACHETLB 0x09 /* DCACHE TLB ( Read Only ) */ +#endif /* METAC_2_1 */ +#define SYSC_MCMGID_DCACHET 0x0A /* DCACHE tags (32-bits/line) */ +#define SYSC_MCMGID_DCACHELRU 0x0B /* DCACHE LRU (8-bits/line) */ +#define SYSC_MCMGID_ICACHEL 0x0C /* ICACHE lines (64-bytes/line */ +#ifdef METAC_2_1 +#define SYSC_MCMGID_ICACHETLB 0x0D /* ICACHE TLB (Read Only ) */ +#endif /* METAC_2_1 */ +#define SYSC_MCMGID_ICACHET 0x0E /* ICACHE Tags (32-bits/line) */ +#define SYSC_MCMGID_ICACHELRU 0x0F /* ICACHE LRU (8-bits/line ) */ +#define SYSC_MCMGID_COREIRAM0 0x10 /* Core code mem id 0 */ +#define SYSC_MCMGID_COREIRAMn 0x17 +#define SYSC_MCMGID_COREDRAM0 0x18 /* Core data mem id 0 */ +#define SYSC_MCMGID_COREDRAMn 0x1F +#ifdef METAC_2_1 +#define SYSC_MCMGID_DCACHEST 0x20 /* DCACHE ST ( Read Only ) */ +#define SYSC_MCMGID_ICACHEST 0x21 /* ICACHE ST ( Read Only ) */ +#define SYSC_MCMGID_DCACHETLBLRU 0x22 /* DCACHE TLB LRU ( Read Only )*/ +#define SYSC_MCMGID_ICACHETLBLRU 0x23 /* ICACHE TLB LRU( Read Only ) */ +#define SYSC_MCMGID_DCACHESTLRU 0x24 /* DCACHE ST LRU ( Read Only ) */ +#define SYSC_MCMGID_ICACHESTLRU 0x25 /* ICACHE ST LRU ( Read Only ) */ +#define SYSC_MCMGID_DEBUGTLB 0x26 /* DEBUG TLB ( Read Only ) */ +#define SYSC_MCMGID_DEBUGST 0x27 /* DEBUG ST ( Read Only ) */ +#define SYSC_MCMGID_L2CACHEL 0x30 /* L2 Cache Lines (64-bytes/line) */ +#define SYSC_MCMGID_L2CACHET 0x31 /* L2 Cache Tags (32-bits/line) */ +#define SYSC_MCMGID_COPROX0 0x70 /* Coprocessor port id 0 */ +#define SYSC_MCMGID_COPROXn 0x77 +#endif /* METAC_2_1 */ +#define SYSC_MCMGCTRL_TR31_BIT 0x80000000 /* Trigger 31 on completion */ +#define SYSC_MCMSTATUS 0x04830318 /* Status read only */ +#define SYSC_MCMSTATUS_IDLE_BIT 0x00000001 + +/* META System Events */ +#define SYSC_SYS_EVENT 0x04830400 +#define SYSC_SYSEVT_ATOMIC_BIT 0x00000001 +#define SYSC_SYSEVT_CACHEX_BIT 0x00000002 +#define SYSC_ATOMIC_LOCK 0x04830408 +#define SYSC_ATOMIC_STATE_TX_BITS 0x0000000F +#define SYSC_ATOMIC_STATE_TX_S 0 +#ifdef METAC_1_2 +#define SYSC_ATOMIC_STATE_DX_BITS 0x000000F0 +#define SYSC_ATOMIC_STATE_DX_S 4 +#else /* METAC_1_2 */ +#define SYSC_ATOMIC_SOURCE_BIT 0x00000010 +#endif /* !METAC_1_2 */ + + +#ifdef METAC_2_1 + +/* These definitions replace the EXPAND_TIMER_DIV register defines which are to + * be deprecated. + */ +#define SYSC_TIMER_DIV 0x04830140 +#define SYSC_TIMDIV_BITS 0x000000FF +#define SYSC_TIMDIV_S 0 + +/* META Enhanced by-pass control for local and global region */ +#define MMCU_LOCAL_EBCTRL 0x04830600 +#define MMCU_GLOBAL_EBCTRL 0x04830608 +#define MMCU_EBCTRL_SINGLE_BIT 0x00000020 /* TLB Uncached */ +/* + * These fields in the above registers provide MMCU_ENTRY_* values + * for each direct mapped region to enable optimisation of these areas. + */ +#define MMCU_EBCTRL_DCCTRL_BITS 0x000000C0 /* Get DC Ctrl */ +#define MMCU_EBCTRL_DCCTRL_S 0 +#define MMCU_EBCTRL_ICCTRL_BITS 0x0000C000 /* Get DC Ctrl */ +#define MMCU_EBCTRL_ICCTRL_S 8 + +/* META Cached Core Mode Registers */ +#define MMCU_T0CCM_ICCTRL 0x04830680 /* Core cached code control */ +#define MMCU_TnCCM_xxCTRL_STRIDE 8 +#define MMCU_TnCCM_xxCTRL_STRIDE_S 3 +#define MMCU_T1CCM_ICCTRL 0x04830688 +#define MMCU_T2CCM_ICCTRL 0x04830690 +#define MMCU_T3CCM_ICCTRL 0x04830698 +#define MMCU_T0CCM_DCCTRL 0x048306C0 /* Core cached data control */ +#define MMCU_T1CCM_DCCTRL 0x048306C8 +#define MMCU_T2CCM_DCCTRL 0x048306D0 +#define MMCU_T3CCM_DCCTRL 0x048306D8 +#define MMCU_TnCCM_ENABLE_BIT 0x00000001 +#define MMCU_TnCCM_WIN3_BIT 0x00000002 +#define MMCU_TnCCM_DCWRITE_BIT 0x00000004 /* In DCCTRL only */ +#define MMCU_TnCCM_REGSZ_BITS 0x00000F00 +#define MMCU_TnCCM_REGSZ_S 8 +#define MMCU_TnCCM_REGSZ0_POWER 12 /* RegSz 0 -> 4K */ +#define MMCU_TnCCM_REGSZ_MAXBYTES 0x00080000 /* 512K max */ +#define MMCU_TnCCM_ADDR_BITS 0xFFFFF000 +#define MMCU_TnCCM_ADDR_S 12 + +#endif /* METAC_2_1 */ + +/* + * Hardware performance counter registers + * -------------------------------------- + */ +#ifdef METAC_2_1 +/* Two Performance Counter Internal Core Events Control registers */ +#define PERF_ICORE0 0x0480FFD0 +#define PERF_ICORE1 0x0480FFD8 +#define PERFI_CTRL_BITS 0x0000000F +#define PERFI_CTRL_S 0 +#define PERFI_CAH_DMISS 0x0 /* Dcache Misses in cache (TLB Hit) */ +#define PERFI_CAH_IMISS 0x1 /* Icache Misses in cache (TLB Hit) */ +#define PERFI_TLB_DMISS 0x2 /* Dcache Misses in per-thread TLB */ +#define PERFI_TLB_IMISS 0x3 /* Icache Misses in per-thread TLB */ +#define PERFI_TLB_DWRHITS 0x4 /* DC Write-Hits in per-thread TLB */ +#define PERFI_TLB_DWRMISS 0x5 /* DC Write-Miss in per-thread TLB */ +#define PERFI_CAH_DLFETCH 0x8 /* DC Read cache line fetch */ +#define PERFI_CAH_ILFETCH 0x9 /* DC Read cache line fetch */ +#define PERFI_CAH_DWFETCH 0xA /* DC Read cache word fetch */ +#define PERFI_CAH_IWFETCH 0xB /* DC Read cache word fetch */ +#endif /* METAC_2_1 */ + +/* Two memory-mapped hardware performance counter registers */ +#define PERF_COUNT0 0x0480FFE0 +#define PERF_COUNT1 0x0480FFE8 + +/* Fields in PERF_COUNTn registers */ +#define PERF_COUNT_BITS 0x00ffffff /* Event count value */ + +#define PERF_THREAD_BITS 0x0f000000 /* Thread mask selects threads */ +#define PERF_THREAD_S 24 + +#define PERF_CTRL_BITS 0xf0000000 /* Event filter control */ +#define PERF_CTRL_S 28 + +#define PERFCTRL_SUPER 0 /* Superthread cycles */ +#define PERFCTRL_REWIND 1 /* Rewinds due to Dcache Misses */ +#ifdef METAC_2_1 +#define PERFCTRL_SUPREW 2 /* Rewinds of superthreaded cycles (no mask) */ + +#define PERFCTRL_CYCLES 3 /* Counts all cycles (no mask) */ + +#define PERFCTRL_PREDBC 4 /* Conditional branch predictions */ +#define PERFCTRL_MISPBC 5 /* Conditional branch mispredictions */ +#define PERFCTRL_PREDRT 6 /* Return predictions */ +#define PERFCTRL_MISPRT 7 /* Return mispredictions */ +#endif /* METAC_2_1 */ + +#define PERFCTRL_DHITS 8 /* Dcache Hits */ +#define PERFCTRL_IHITS 9 /* Icache Hits */ +#define PERFCTRL_IMISS 10 /* Icache Misses in cache or TLB */ +#ifdef METAC_2_1 +#define PERFCTRL_DCSTALL 11 /* Dcache+TLB o/p delayed (per-thread) */ +#define PERFCTRL_ICSTALL 12 /* Icache+TLB o/p delayed (per-thread) */ + +#define PERFCTRL_INT 13 /* Internal core delailed events (see next) */ +#define PERFCTRL_EXT 15 /* External source in core periphery */ +#endif /* METAC_2_1 */ + +#ifdef METAC_2_1 +/* These definitions replace the EXPAND_PERFCHANx register defines which are to + * be deprecated. + */ +#define PERF_CHAN0 0x04830150 +#define PERF_CHAN1 0x04830158 +#define PERF_CHAN_BITS 0x0000000F +#define PERF_CHAN_S 0 +#define PERFCHAN_WRC_WRBURST 0x0 /* Write combiner write burst */ +#define PERFCHAN_WRC_WRITE 0x1 /* Write combiner write */ +#define PERFCHAN_WRC_RDBURST 0x2 /* Write combiner read burst */ +#define PERFCHAN_WRC_READ 0x3 /* Write combiner read */ +#define PERFCHAN_PREARB_DELAY 0x4 /* Pre-arbiter delay cycle */ + /* Cross-bar hold-off cycle: */ +#define PERFCHAN_XBAR_HOLDWRAP 0x5 /* wrapper register */ +#define PERFCHAN_XBAR_HOLDSBUS 0x6 /* system bus (ATP only) */ +#define PERFCHAN_XBAR_HOLDCREG 0x9 /* core registers */ +#define PERFCHAN_L2C_MISS 0x6 /* L2 Cache miss */ +#define PERFCHAN_L2C_HIT 0x7 /* L2 Cache hit */ +#define PERFCHAN_L2C_WRITEBACK 0x8 /* L2 Cache writeback */ + /* Admission delay cycle: */ +#define PERFCHAN_INPUT_CREG 0xB /* core registers */ +#define PERFCHAN_INPUT_INTR 0xC /* internal ram */ +#define PERFCHAN_INPUT_WRC 0xD /* write combiners(memory) */ + +/* Should following be removed as not in TRM anywhere? */ +#define PERFCHAN_XBAR_HOLDINTR 0x8 /* internal ram */ +#define PERFCHAN_INPUT_SBUS 0xA /* register port */ +/* End of remove section. */ + +#define PERFCHAN_MAINARB_DELAY 0xF /* Main arbiter delay cycle */ + +#endif /* METAC_2_1 */ + +#ifdef METAC_2_1 +/* + * Write combiner registers + * ------------------------ + * + * These replace the EXPAND_T0WRCOMBINE register defines, which will be + * deprecated. + */ +#define WRCOMB_CONFIG0 0x04830100 +#define WRCOMB_LFFEn_BIT 0x00004000 /* Enable auto line full flush */ +#define WRCOMB_ENABLE_BIT 0x00002000 /* Enable write combiner */ +#define WRCOMB_TIMEOUT_ENABLE_BIT 0x00001000 /* Timeout flush enable */ +#define WRCOMB_TIMEOUT_COUNT_BITS 0x000003FF +#define WRCOMB_TIMEOUT_COUNT_S 0 +#define WRCOMB_CONFIG4 0x04830180 +#define WRCOMB_PARTALLOC_BITS 0x000000C0 +#define WRCOMB_PARTALLOC_S 64 +#define WRCOMB_PARTSIZE_BITS 0x00000030 +#define WRCOMB_PARTSIZE_S 4 +#define WRCOMB_PARTOFFSET_BITS 0x0000000F +#define WRCOMB_PARTOFFSET_S 0 +#define WRCOMB_CONFIG_STRIDE 8 +#endif /* METAC_2_1 */ + +#ifdef METAC_2_1 +/* + * Thread arbiter registers + * ------------------------ + * + * These replace the EXPAND_T0ARBITER register defines, which will be + * deprecated. + */ +#define ARBITER_ARBCONFIG0 0x04830120 +#define ARBCFG_BPRIORITY_BIT 0x02000000 +#define ARBCFG_IPRIORITY_BIT 0x01000000 +#define ARBCFG_PAGE_BITS 0x00FF0000 +#define ARBCFG_PAGE_S 16 +#define ARBCFG_BBASE_BITS 0x0000FF00 +#define ARGCFG_BBASE_S 8 +#define ARBCFG_IBASE_BITS 0x000000FF +#define ARBCFG_IBASE_S 0 +#define ARBITER_TTECONFIG0 0x04820160 +#define ARBTTE_IUPPER_BITS 0xFF000000 +#define ARBTTE_IUPPER_S 24 +#define ARBTTE_ILOWER_BITS 0x00FF0000 +#define ARBTTE_ILOWER_S 16 +#define ARBTTE_BUPPER_BITS 0x0000FF00 +#define ARBTTE_BUPPER_S 8 +#define ARBTTE_BLOWER_BITS 0x000000FF +#define ARBTTE_BLOWER_S 0 +#define ARBITER_STRIDE 8 +#endif /* METAC_2_1 */ + +/* + * Expansion area registers + * -------------------------------------- + */ + +/* These defines are to be deprecated. See above instead. */ +#define EXPAND_T0WRCOMBINE 0x03000000 +#ifdef METAC_2_1 +#define EXPWRC_LFFEn_BIT 0x00004000 /* Enable auto line full flush */ +#endif /* METAC_2_1 */ +#define EXPWRC_ENABLE_BIT 0x00002000 /* Enable write combiner */ +#define EXPWRC_TIMEOUT_ENABLE_BIT 0x00001000 /* Timeout flush enable */ +#define EXPWRC_TIMEOUT_COUNT_BITS 0x000003FF +#define EXPWRC_TIMEOUT_COUNT_S 0 +#define EXPAND_TnWRCOMBINE_STRIDE 0x00000008 + +/* These defines are to be deprecated. See above instead. */ +#define EXPAND_T0ARBITER 0x03000020 +#define EXPARB_BPRIORITY_BIT 0x02000000 +#define EXPARB_IPRIORITY_BIT 0x01000000 +#define EXPARB_PAGE_BITS 0x00FF0000 +#define EXPARB_PAGE_S 16 +#define EXPARB_BBASE_BITS 0x0000FF00 +#define EXPARB_BBASE_S 8 +#define EXPARB_IBASE_BITS 0x000000FF +#define EXPARB_IBASE_S 0 +#define EXPAND_TnARBITER_STRIDE 0x00000008 + +/* These definitions are to be deprecated. See above instead. */ +#define EXPAND_TIMER_DIV 0x03000040 +#define EXPTIM_DIV_BITS 0x000000FF +#define EXPTIM_DIV_S 0 + +/* These definitions are to be deprecated. See above instead. */ +#define EXPAND_PERFCHAN0 0x03000050 +#define EXPAND_PERFCHAN1 0x03000058 +#define EXPPERF_CTRL_BITS 0x0000000F +#define EXPPERF_CTRL_S 0 +#define EXPPERF_WRC_WRBURST 0x0 /* Write combiner write burst */ +#define EXPPERF_WRC_WRITE 0x1 /* Write combiner write */ +#define EXPPERF_WRC_RDBURST 0x2 /* Write combiner read burst */ +#define EXPPERF_WRC_READ 0x3 /* Write combiner read */ +#define EXPPERF_PREARB_DELAY 0x4 /* Pre-arbiter delay cycle */ + /* Cross-bar hold-off cycle: */ +#define EXPPERF_XBAR_HOLDWRAP 0x5 /* wrapper register */ +#define EXPPERF_XBAR_HOLDSBUS 0x6 /* system bus */ +#ifdef METAC_1_2 +#define EXPPERF_XBAR_HOLDLBUS 0x7 /* local bus */ +#else /* METAC_1_2 */ +#define EXPPERF_XBAR_HOLDINTR 0x8 /* internal ram */ +#define EXPPERF_XBAR_HOLDCREG 0x9 /* core registers */ + /* Admission delay cycle: */ +#define EXPPERF_INPUT_SBUS 0xA /* register port */ +#define EXPPERF_INPUT_CREG 0xB /* core registers */ +#define EXPPERF_INPUT_INTR 0xC /* internal ram */ +#define EXPPERF_INPUT_WRC 0xD /* write combiners(memory) */ +#endif /* !METAC_1_2 */ +#define EXPPERF_MAINARB_DELAY 0xF /* Main arbiter delay cycle */ + +/* + * Debug port registers + * -------------------------------------- + */ + +/* Data Exchange Register */ +#define DBGPORT_MDBGDATAX 0x0 + +/* Data Transfer register */ +#define DBGPORT_MDBGDATAT 0x4 + +/* Control Register 0 */ +#define DBGPORT_MDBGCTRL0 0x8 +#define DBGPORT_MDBGCTRL0_ADDR_BITS 0xFFFFFFFC +#define DBGPORT_MDBGCTRL0_ADDR_S 2 +#define DBGPORT_MDBGCTRL0_AUTOINCR_BIT 0x00000002 +#define DBGPORT_MDBGCTRL0_RD_BIT 0x00000001 + +/* Control Register 1 */ +#define DBGPORT_MDBGCTRL1 0xC +#ifdef METAC_2_1 +#define DBGPORT_MDBGCTRL1_DEFERRTHREAD_BITS 0xC0000000 +#define DBGPORT_MDBGCTRL1_DEFERRTHREAD_S 30 +#endif /* METAC_2_1 */ +#define DBGPORT_MDBGCTRL1_LOCK2_INTERLOCK_BIT 0x20000000 +#define DBGPORT_MDBGCTRL1_ATOMIC_INTERLOCK_BIT 0x10000000 +#define DBGPORT_MDBGCTRL1_TRIGSTATUS_BIT 0x08000000 +#define DBGPORT_MDBGCTRL1_GBLPORT_IDLE_BIT 0x04000000 +#define DBGPORT_MDBGCTRL1_COREMEM_IDLE_BIT 0x02000000 +#define DBGPORT_MDBGCTRL1_READY_BIT 0x01000000 +#ifdef METAC_2_1 +#define DBGPORT_MDBGCTRL1_DEFERRID_BITS 0x00E00000 +#define DBGPORT_MDBGCTRL1_DEFERRID_S 21 +#define DBGPORT_MDBGCTRL1_DEFERR_BIT 0x00100000 +#endif /* METAC_2_1 */ +#define DBGPORT_MDBGCTRL1_WR_ACTIVE_BIT 0x00040000 +#define DBGPORT_MDBGCTRL1_COND_LOCK2_BIT 0x00020000 +#define DBGPORT_MDBGCTRL1_LOCK2_BIT 0x00010000 +#define DBGPORT_MDBGCTRL1_DIAGNOSE_BIT 0x00008000 +#define DBGPORT_MDBGCTRL1_FORCEDIAG_BIT 0x00004000 +#define DBGPORT_MDBGCTRL1_MEMFAULT_BITS 0x00003000 +#define DBGPORT_MDBGCTRL1_MEMFAULT_S 12 +#define DBGPORT_MDBGCTRL1_TRIGGER_BIT 0x00000100 +#ifdef METAC_2_1 +#define DBGPORT_MDBGCTRL1_INTSPECIAL_BIT 0x00000080 +#define DBGPORT_MDBGCTRL1_INTRUSIVE_BIT 0x00000040 +#endif /* METAC_2_1 */ +#define DBGPORT_MDBGCTRL1_THREAD_BITS 0x00000030 /* Thread mask selects threads */ +#define DBGPORT_MDBGCTRL1_THREAD_S 4 +#define DBGPORT_MDBGCTRL1_TRANS_SIZE_BITS 0x0000000C +#define DBGPORT_MDBGCTRL1_TRANS_SIZE_S 2 +#define DBGPORT_MDBGCTRL1_TRANS_SIZE_32_BIT 0x00000000 +#define DBGPORT_MDBGCTRL1_TRANS_SIZE_16_BIT 0x00000004 +#define DBGPORT_MDBGCTRL1_TRANS_SIZE_8_BIT 0x00000008 +#define DBGPORT_MDBGCTRL1_BYTE_ROUND_BITS 0x00000003 +#define DBGPORT_MDBGCTRL1_BYTE_ROUND_S 0 +#define DBGPORT_MDBGCTRL1_BYTE_ROUND_8_BIT 0x00000001 +#define DBGPORT_MDBGCTRL1_BYTE_ROUND_16_BIT 0x00000002 + + +/* L2 Cache registers */ +#define SYSC_L2C_INIT 0x048300C0 +#define SYSC_L2C_INIT_INIT 1 +#define SYSC_L2C_INIT_IN_PROGRESS 0 +#define SYSC_L2C_INIT_COMPLETE 1 + +#define SYSC_L2C_ENABLE 0x048300D0 +#define SYSC_L2C_ENABLE_ENABLE_BIT 0x00000001 +#define SYSC_L2C_ENABLE_PFENABLE_BIT 0x00000002 + +#define SYSC_L2C_PURGE 0x048300C8 +#define SYSC_L2C_PURGE_PURGE 1 +#define SYSC_L2C_PURGE_IN_PROGRESS 0 +#define SYSC_L2C_PURGE_COMPLETE 1 + +#endif /* _ASM_METAG_MEM_H_ */ diff --git a/arch/metag/include/asm/metag_regs.h b/arch/metag/include/asm/metag_regs.h new file mode 100644 index 000000000000..acf4b8e6e9d1 --- /dev/null +++ b/arch/metag/include/asm/metag_regs.h @@ -0,0 +1,1184 @@ +/* + * asm/metag_regs.h + * + * Copyright (C) 2000-2007, 2012 Imagination Technologies. + * + * This program is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License version 2 as published by the + * Free Software Foundation. + * + * Various defines for Meta core (non memory-mapped) registers. + */ + +#ifndef _ASM_METAG_REGS_H_ +#define _ASM_METAG_REGS_H_ + +/* + * CHIP Unit Identifiers and Valid/Global register number masks + * ------------------------------------------------------------ + */ +#define TXUCT_ID 0x0 /* Control unit regs */ +#ifdef METAC_1_2 +#define TXUCT_MASK 0xFF0FFFFF /* Valid regs 0..31 */ +#else +#define TXUCT_MASK 0xFF1FFFFF /* Valid regs 0..31 */ +#endif +#define TGUCT_MASK 0x00000000 /* No global regs */ +#define TXUD0_ID 0x1 /* Data unit regs */ +#define TXUD1_ID 0x2 +#define TXUDX_MASK 0xFFFFFFFF /* Valid regs 0..31 */ +#define TGUDX_MASK 0xFFFF0000 /* Global regs for base inst */ +#define TXUDXDSP_MASK 0x0F0FFFFF /* Valid DSP regs */ +#define TGUDXDSP_MASK 0x0E0E0000 /* Global DSP ACC regs */ +#define TXUA0_ID 0x3 /* Address unit regs */ +#define TXUA1_ID 0x4 +#define TXUAX_MASK 0x0000FFFF /* Valid regs 0-15 */ +#define TGUAX_MASK 0x0000FF00 /* Global regs 8-15 */ +#define TXUPC_ID 0x5 /* PC registers */ +#define TXUPC_MASK 0x00000003 /* Valid regs 0- 1 */ +#define TGUPC_MASK 0x00000000 /* No global regs */ +#define TXUPORT_ID 0x6 /* Ports are not registers */ +#define TXUTR_ID 0x7 +#define TXUTR_MASK 0x0000005F /* Valid regs 0-3,4,6 */ +#define TGUTR_MASK 0x00000000 /* No global regs */ +#ifdef METAC_2_1 +#define TXUTT_ID 0x8 +#define TXUTT_MASK 0x0000000F /* Valid regs 0-3 */ +#define TGUTT_MASK 0x00000010 /* Global reg 4 */ +#define TXUFP_ID 0x9 /* FPU regs */ +#define TXUFP_MASK 0x0000FFFF /* Valid regs 0-15 */ +#define TGUFP_MASK 0x00000000 /* No global regs */ +#endif /* METAC_2_1 */ + +#ifdef METAC_1_2 +#define TXUXX_MASKS { TXUCT_MASK, TXUDX_MASK, TXUDX_MASK, TXUAX_MASK, \ + TXUAX_MASK, TXUPC_MASK, 0, TXUTR_MASK, \ + 0, 0, 0, 0, 0, 0, 0, 0 } +#define TGUXX_MASKS { TGUCT_MASK, TGUDX_MASK, TGUDX_MASK, TGUAX_MASK, \ + TGUAX_MASK, TGUPC_MASK, 0, TGUTR_MASK, \ + 0, 0, 0, 0, 0, 0, 0, 0 } +#else /* METAC_1_2 */ +#define TXUXX_MASKS { TXUCT_MASK, TXUDX_MASK, TXUDX_MASK, TXUAX_MASK, \ + TXUAX_MASK, TXUPC_MASK, 0, TXUTR_MASK, \ + TXUTT_MASK, TXUFP_MASK, 0, 0, \ + 0, 0, 0, 0 } +#define TGUXX_MASKS { TGUCT_MASK, TGUDX_MASK, TGUDX_MASK, TGUAX_MASK, \ + TGUAX_MASK, TGUPC_MASK, 0, TGUTR_MASK, \ + TGUTT_MASK, TGUFP_MASK, 0, 0, \ + 0, 0, 0, 0 } +#endif /* !METAC_1_2 */ + +#define TXUXXDSP_MASKS { 0, TXUDXDSP_MASK, TXUDXDSP_MASK, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0 } +#define TGUXXDSP_MASKS { 0, TGUDXDSP_MASK, TGUDXDSP_MASK, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0 } + +/* ------------------------------------------------------------------------- +; DATA AND ADDRESS UNIT REGISTERS +; -----------------------------------------------------------------------*/ +/* + Thread local D0 registers + */ +/* D0.0 ; Holds 32-bit result, can be used as scratch */ +#define D0Re0 D0.0 +/* D0.1 ; Used to pass Arg6_32 */ +#define D0Ar6 D0.1 +/* D0.2 ; Used to pass Arg4_32 */ +#define D0Ar4 D0.2 +/* D0.3 ; Used to pass Arg2_32 to a called routine (see D1.3 below) */ +#define D0Ar2 D0.3 +/* D0.4 ; Can be used as scratch; used to save A0FrP in entry sequences */ +#define D0FrT D0.4 +/* D0.5 ; C compiler assumes preservation, save with D1.5 if used */ +/* D0.6 ; C compiler assumes preservation, save with D1.6 if used */ +/* D0.7 ; C compiler assumes preservation, save with D1.7 if used */ +/* D0.8 ; Use of D0.8 and above is not encouraged */ +/* D0.9 */ +/* D0.10 */ +/* D0.11 */ +/* D0.12 */ +/* D0.13 */ +/* D0.14 */ +/* D0.15 */ +/* + Thread local D1 registers + */ +/* D1.0 ; Holds top 32-bits of 64-bit result, can be used as scratch */ +#define D1Re0 D1.0 +/* D1.1 ; Used to pass Arg5_32 */ +#define D1Ar5 D1.1 +/* D1.2 ; Used to pass Arg3_32 */ +#define D1Ar3 D1.2 +/* D1.3 ; Used to pass Arg1_32 (first 32-bit argument) to a called routine */ +#define D1Ar1 D1.3 +/* D1.4 ; Used for Return Pointer, save during entry with A0FrP (via D0.4) */ +#define D1RtP D1.4 +/* D1.5 ; C compiler assumes preservation, save if used */ +/* D1.6 ; C compiler assumes preservation, save if used */ +/* D1.7 ; C compiler assumes preservation, save if used */ +/* D1.8 ; Use of D1.8 and above is not encouraged */ +/* D1.9 */ +/* D1.10 */ +/* D1.11 */ +/* D1.12 */ +/* D1.13 */ +/* D1.14 */ +/* D1.15 */ +/* + Thread local A0 registers + */ +/* A0.0 ; Primary stack pointer */ +#define A0StP A0.0 +/* A0.1 ; Used as local frame pointer in C, save if used (via D0.4) */ +#define A0FrP A0.1 +/* A0.2 */ +/* A0.3 */ +/* A0.4 ; Use of A0.4 and above is not encouraged */ +/* A0.5 */ +/* A0.6 */ +/* A0.7 */ +/* + Thread local A1 registers + */ +/* A1.0 ; Global static chain pointer - do not modify */ +#define A1GbP A1.0 +/* A1.1 ; Local static chain pointer in C, can be used as scratch */ +#define A1LbP A1.1 +/* A1.2 */ +/* A1.3 */ +/* A1.4 ; Use of A1.4 and above is not encouraged */ +/* A1.5 */ +/* A1.6 */ +/* A1.7 */ +#ifdef METAC_2_1 +/* Renameable registers for use with Fast Interrupts */ +/* The interrupt stack pointer (usually a global register) */ +#define A0IStP A0IReg +/* The interrupt global pointer (usually a global register) */ +#define A1IGbP A1IReg +#endif +/* + Further registers may be globally allocated via linkage/loading tools, + normally they are not used. + */ +/*------------------------------------------------------------------------- +; STACK STRUCTURE and CALLING CONVENTION +; -----------------------------------------------------------------------*/ +/* +; Calling convention indicates that the following is the state of the +; stack frame at the start of a routine- +; +; Arg9_32 [A0StP+#-12] +; Arg8_32 [A0StP+#- 8] +; Arg7_32 [A0StP+#- 4] +; A0StP-> +; +; Registers D1.3, D0.3, ..., to D0.1 are used to pass Arg1_32 to Arg6_32 +; respectively. If a routine needs to store them on the stack in order +; to make sub-calls or because of the general complexity of the routine it +; is best to dump these registers immediately at the start of a routine +; using a MSETL or SETL instruction- +; +; MSETL [A0StP],D0Ar6,D0Ar4,D0Ar2; Only dump argments expected +;or SETL [A0StP+#8++],D0Ar2 ; Up to two 32-bit args expected +; +; For non-leaf routines it is always necessary to save and restore at least +; the return address value D1RtP on the stack. Also by convention if the +; frame is saved then a new A0FrP value must be set-up. So for non-leaf +; routines at this point both these registers must be saved onto the stack +; using a SETL instruction and the new A0FrP value is then set-up- +; +; MOV D0FrT,A0FrP +; ADD A0FrP,A0StP,#0 +; SETL [A0StP+#8++],D0FrT,D1RtP +; +; Registers D0.5, D1.5, to D1.7 are assumed to be preserved across calls so +; a SETL or MSETL instruction can be used to save the current state +; of these registers if they are modified by the current routine- +; +; MSETL [A0StP],D0.5,D0.6,D0.7 ; Only save registers modified +;or SETL [A0StP+#8++],D0.5 ; Only D0.5 and/or D1.5 modified +; +; All of the above sequences can be combined into one maximal case- +; +; MOV D0FrT,A0FrP ; Save and calculate new frame pointer +; ADD A0FrP,A0StP,#(ARS) +; MSETL [A0StP],D0Ar6,D0Ar4,D0Ar2,D0FrT,D0.5,D0.6,D0.7 +; +; Having completed the above sequence the only remaining task on routine +; entry is to reserve any local and outgoing argment storage space on the +; stack. This instruction may be omitted if the size of this region is zero- +; +; ADD A0StP,A0StP,#(LCS) +; +; LCS is the first example use of one of a number of standard local defined +; values that can be created to make assembler code more readable and +; potentially more robust- +; +; #define ARS 0x18 ; Register arg bytes saved on stack +; #define FRS 0x20 ; Frame save area size in bytes +; #define LCS 0x00 ; Locals and Outgoing arg size +; #define ARO (LCS+FRS) ; Stack offset to access args +; +; All of the above defines should be undefined (#undef) at the end of each +; routine to avoid accidental use in the next routine. +; +; Given all of the above the following stack structure is expected during +; the body of a routine if all args passed in registers are saved during +; entry- +; +; ; 'Incoming args area' +; Arg10_32 [A0StP+#-((10*4)+ARO)] Arg9_32 [A0StP+#-(( 9*4)+ARO)] +; Arg8_32 [A0StP+#-(( 8*4)+ARO)] Arg7_32 [A0StP+#-(( 7*4)+ARO)] +;--- Call point +; D0Ar6= Arg6_32 [A0StP+#-(( 6*4)+ARO)] D1Ar5=Arg5_32 [A0StP+#-(( 5*4)+ARO)] +; D0Ar4= Arg4_32 [A0StP+#-(( 4*4)+ARO)] D1Ar3=Arg3_32 [A0StP+#-(( 3*4)+ARO)] +; D0Ar2= Arg2_32 [A0StP+#-(( 2*4)+ARO)] D1Ar2=Arg1_32 [A0StP+#-(( 1*4)+ARO)] +; ; 'Frame area' +; A0FrP-> D0FrT, D1RtP, +; D0.5, D1.5, +; D0.6, D1.6, +; D0.7, D1.7, +; ; 'Locals area' +; Loc0_32 [A0StP+# (( 0*4)-LCS)], Loc1_32 [A0StP+# (( 1*4)-LCS)] +; .... other locals +; Locn_32 [A0StP+# (( n*4)-LCS)] +; ; 'Outgoing args area' +; Outm_32 [A0StP+#- ( m*4)] .... other outgoing args +; Out8_32 [A0StP+#- ( 1*4)] Out7_32 [A0StP+#- ( 1*4)] +; A0StP-> (Out1_32-Out6_32 in regs D1Ar1-D0Ar6) +; +; The exit sequence for a non-leaf routine can use the frame pointer created +; in the entry sequence to optimise the recovery of the full state- +; +; MGETL D0FrT,D0.5,D0.6,D0.7,[A0FrP] +; SUB A0StP,A0FrP,#(ARS+FRS) +; MOV A0FrP,D0FrT +; MOV PC,D1RtP +; +; Having described the most complex non-leaf case above, it is worth noting +; that if a routine is a leaf and does not use any of the caller-preserved +; state. The routine can be implemented as- +; +; ADD A0StP,A0StP,#LCS +; .... body of routine +; SUB A0StP,A0StP,#LCS +; MOV PC,D1RtP +; +; The stack adjustments can also be omitted if no local storage is required. +; +; Another exit sequence structure is more applicable if for a leaf routine +; with no local frame pointer saved/generated in which the call saved +; registers need to be saved and restored- +; +; MSETL [A0StP],D0.5,D0.6,D0.7 ; Hence FRS is 0x18, ARS is 0x00 +; ADD A0StP,A0StP,#LCS +; .... body of routine +; GETL D0.5,D1.5,[A0StP+#((0*8)-(FRS+LCS))] +; GETL D0.6,D1.6,[A0StP+#((1*8)-(FRS+LCS))] +; GETL D0.7,D1.7,[A0StP+#((2*8)-(FRS+LCS))] +; SUB A0StP,A0StP,#(ARS+FRS+LCS) +; MOV PC,D1RtP +; +; Lastly, to support profiling assembler code should use a fixed entry/exit +; sequence if the trigger define _GMON_ASM is defined- +; +; #ifndef _GMON_ASM +; ... optimised entry code +; #else +; ; Profiling entry case +; MOV D0FrT,A0FrP ; Save and calculate new frame pointer +; ADD A0FrP,A0StP,#(ARS) +; MSETL [A0StP],...,D0FrT,... or SETL [A0FrP],D0FrT,D1RtP +; CALLR D0FrT,_mcount_wrapper +; #endif +; ... body of routine +; #ifndef _GMON_ASM +; ... optimised exit code +; #else +; ; Profiling exit case +; MGETL D0FrT,...,[A0FrP] or GETL D0FrT,D1RtP,[A0FrP++] +; SUB A0StP,A0FrP,#(ARS+FRS) +; MOV A0FrP,D0FrT +; MOV PC,D1RtP +; #endif + + +; ------------------------------------------------------------------------- +; CONTROL UNIT REGISTERS +; ------------------------------------------------------------------------- +; +; See the assembler guide, hardware documentation, or the field values +; defined below for some details of the use of these registers. +*/ +#define TXENABLE CT.0 /* Need to define bit-field values in these */ +#define TXMODE CT.1 +#define TXSTATUS CT.2 /* DEFAULT 0x00020000 */ +#define TXRPT CT.3 +#define TXTIMER CT.4 +#define TXL1START CT.5 +#define TXL1END CT.6 +#define TXL1COUNT CT.7 +#define TXL2START CT.8 +#define TXL2END CT.9 +#define TXL2COUNT CT.10 +#define TXBPOBITS CT.11 +#define TXMRSIZE CT.12 +#define TXTIMERI CT.13 +#define TXDRCTRL CT.14 /* DEFAULT 0x0XXXF0F0 */ +#define TXDRSIZE CT.15 +#define TXCATCH0 CT.16 +#define TXCATCH1 CT.17 +#define TXCATCH2 CT.18 +#define TXCATCH3 CT.19 + +#ifdef METAC_2_1 +#define TXDEFR CT.20 +#define TXCPRS CT.21 +#endif + +#define TXINTERN0 CT.23 +#define TXAMAREG0 CT.24 +#define TXAMAREG1 CT.25 +#define TXAMAREG2 CT.26 +#define TXAMAREG3 CT.27 +#define TXDIVTIME CT.28 /* DEFAULT 0x00000001 */ +#define TXPRIVEXT CT.29 /* DEFAULT 0x003B0000 */ +#define TXTACTCYC CT.30 +#define TXIDLECYC CT.31 + +/***************************************************************************** + * CONTROL UNIT REGISTER BITS + ****************************************************************************/ +/* + * The following registers and where appropriate the sub-fields of those + * registers are defined for pervasive use in controlling program flow. + */ + +/* + * TXENABLE register fields - only the thread id is routinely useful + */ +#define TXENABLE_REGNUM 0 +#define TXENABLE_THREAD_BITS 0x00000700 +#define TXENABLE_THREAD_S 8 +#define TXENABLE_REV_STEP_BITS 0x000000F0 +#define TXENABLE_REV_STEP_S 4 + +/* + * TXMODE register - controls extensions of the instruction set + */ +#define TXMODE_REGNUM 1 +#define TXMODE_DEFAULT 0 /* All fields default to zero */ + +/* + * TXSTATUS register - contains a couple of stable bits that can be used + * to determine the privilege processing level and interrupt + * processing level of the current thread. + */ +#define TXSTATUS_REGNUM 2 +#define TXSTATUS_PSTAT_BIT 0x00020000 /* -> Privilege active */ +#define TXSTATUS_PSTAT_S 17 +#define TXSTATUS_ISTAT_BIT 0x00010000 /* -> In interrupt state */ +#define TXSTATUS_ISTAT_S 16 + +/* + * These are all relatively boring registers, mostly full 32-bit + */ +#define TXRPT_REGNUM 3 /* Repeat counter for XFR... instructions */ +#define TXTIMER_REGNUM 4 /* Timer-- causes timer trigger on overflow */ +#define TXL1START_REGNUM 5 /* Hardware Loop 1 Start-PC/End-PC/Count */ +#define TXL1END_REGNUM 6 +#define TXL1COUNT_REGNUM 7 +#define TXL2START_REGNUM 8 /* Hardware Loop 2 Start-PC/End-PC/Count */ +#define TXL2END_REGNUM 9 +#define TXL2COUNT_REGNUM 10 +#define TXBPOBITS_REGNUM 11 /* Branch predict override bits - tune perf */ +#define TXTIMERI_REGNUM 13 /* Timer-- time based interrupt trigger */ + +/* + * TXDIVTIME register is routinely read to calculate the time-base for + * the TXTIMER register. + */ +#define TXDIVTIME_REGNUM 28 +#define TXDIVTIME_DIV_BITS 0x000000FF +#define TXDIVTIME_DIV_S 0 +#define TXDIVTIME_DIV_MIN 0x00000001 /* Maximum resolution */ +#define TXDIVTIME_DIV_MAX 0x00000100 /* 1/1 -> 1/256 resolution */ +#define TXDIVTIME_BASE_HZ 1000000 /* Timers run at 1Mhz @1/1 */ + +/* + * TXPRIVEXT register can be consulted to decide if write access to a + * part of the threads register set is not permitted when in + * unprivileged mode (PSTAT == 0). + */ +#define TXPRIVEXT_REGNUM 29 +#define TXPRIVEXT_COPRO_BITS 0xFF000000 /* Co-processor 0-7 */ +#define TXPRIVEXT_COPRO_S 24 +#ifndef METAC_1_2 +#define TXPRIVEXT_TXTIMER_BIT 0x00080000 /* TXTIMER priv */ +#define TXPRIVEXT_TRACE_BIT 0x00040000 /* TTEXEC|TTCTRL|GTEXEC */ +#endif +#define TXPRIVEXT_TXTRIGGER_BIT 0x00020000 /* TXSTAT|TXMASK|TXPOLL */ +#define TXPRIVEXT_TXGBLCREG_BIT 0x00010000 /* Global common regs */ +#define TXPRIVEXT_CBPRIV_BIT 0x00008000 /* Mem i/f dump priv */ +#define TXPRIVEXT_ILOCK_BIT 0x00004000 /* LOCK inst priv */ +#define TXPRIVEXT_TXITACCYC_BIT 0x00002000 /* TXIDLECYC|TXTACTCYC */ +#define TXPRIVEXT_TXDIVTIME_BIT 0x00001000 /* TXDIVTIME priv */ +#define TXPRIVEXT_TXAMAREGX_BIT 0x00000800 /* TXAMAREGX priv */ +#define TXPRIVEXT_TXTIMERI_BIT 0x00000400 /* TXTIMERI priv */ +#define TXPRIVEXT_TXSTATUS_BIT 0x00000200 /* TXSTATUS priv */ +#define TXPRIVEXT_TXDISABLE_BIT 0x00000100 /* TXENABLE priv */ +#ifndef METAC_1_2 +#define TXPRIVEXT_MINIMON_BIT 0x00000080 /* Enable Minim features */ +#define TXPRIVEXT_OLDBCCON_BIT 0x00000020 /* Restore Static predictions */ +#define TXPRIVEXT_ALIGNREW_BIT 0x00000010 /* Align & precise checks */ +#endif +#define TXPRIVEXT_KEEPPRI_BIT 0x00000008 /* Use AMA_Priority if ISTAT=1*/ +#define TXPRIVEXT_TXTOGGLEI_BIT 0x00000001 /* TX.....I priv */ + +/* + * TXTACTCYC register - counts instructions issued for this thread + */ +#define TXTACTCYC_REGNUM 30 +#define TXTACTCYC_COUNT_MASK 0x00FFFFFF + +/* + * TXIDLECYC register - counts idle cycles + */ +#define TXIDLECYC_REGNUM 31 +#define TXIDLECYC_COUNT_MASK 0x00FFFFFF + +/***************************************************************************** + * DSP EXTENSIONS + ****************************************************************************/ +/* + * The following values relate to fields and controls that only a program + * using the DSP extensions of the META instruction set need to know. + */ + + +#ifndef METAC_1_2 +/* + * Allow co-processor hardware to replace the read pipeline data source in + * appropriate cases. + */ +#define TXMODE_RDCPEN_BIT 0x00800000 +#endif + +/* + * Address unit addressing modes + */ +#define TXMODE_A1ADDR_BITS 0x00007000 +#define TXMODE_A1ADDR_S 12 +#define TXMODE_A0ADDR_BITS 0x00000700 +#define TXMODE_A0ADDR_S 8 +#define TXMODE_AXADDR_MODULO 3 +#define TXMODE_AXADDR_REVB 4 +#define TXMODE_AXADDR_REVW 5 +#define TXMODE_AXADDR_REVD 6 +#define TXMODE_AXADDR_REVL 7 + +/* + * Data unit OverScale select (default 0 -> normal, 1 -> top 16 bits) + */ +#define TXMODE_DXOVERSCALE_BIT 0x00000080 + +/* + * Data unit MX mode select (default 0 -> MX16, 1 -> MX8) + */ +#define TXMODE_M8_BIT 0x00000040 + +/* + * Data unit accumulator saturation point (default -> 40 bit accumulator) + */ +#define TXMODE_DXACCSAT_BIT 0x00000020 /* Set for 32-bit accumulator */ + +/* + * Data unit accumulator saturation enable (default 0 -> no saturation) + */ +#define TXMODE_DXSAT_BIT 0x00000010 + +/* + * Data unit master rounding control (default 0 -> normal, 1 -> convergent) + */ +#define TXMODE_DXROUNDING_BIT 0x00000008 + +/* + * Data unit product shift for fractional arithmetic (default off) + */ +#define TXMODE_DXPRODSHIFT_BIT 0x00000004 + +/* + * Select the arithmetic mode (multiply mostly) for both data units + */ +#define TXMODE_DXARITH_BITS 0x00000003 +#define TXMODE_DXARITH_32 3 +#define TXMODE_DXARITH_32H 2 +#define TXMODE_DXARITH_S16 1 +#define TXMODE_DXARITH_16 0 + +/* + * TXMRSIZE register value only relevant when DSP modulo addressing active + */ +#define TXMRSIZE_REGNUM 12 +#define TXMRSIZE_MIN 0x0002 /* 0, 1 -> normal addressing logic */ +#define TXMRSIZE_MAX 0xFFFF + +/* + * TXDRCTRL register can be used to detect the actaul size of the DSP RAM + * partitions allocated to this thread. + */ +#define TXDRCTRL_REGNUM 14 +#define TXDRCTRL_SINESIZE_BITS 0x0F000000 +#define TXDRCTRL_SINESIZE_S 24 +#define TXDRCTRL_RAMSZPOW_BITS 0x001F0000 /* Limit = (1<<RAMSZPOW)-1 */ +#define TXDRCTRL_RAMSZPOW_S 16 +#define TXDRCTRL_D1RSZAND_BITS 0x0000F000 /* Mask top 4 bits - D1 */ +#define TXDRCTRL_D1RSZAND_S 12 +#define TXDRCTRL_D0RSZAND_BITS 0x000000F0 /* Mask top 4 bits - D0 */ +#define TXDRCTRL_D0RSZAND_S 4 +/* Given extracted RAMSZPOW and DnRSZAND fields this returns the size */ +#define TXDRCTRL_DXSIZE(Pow, AndBits) \ + ((((~(AndBits)) & 0x0f) + 1) << ((Pow)-4)) + +/* + * TXDRSIZE register provides modulo addressing options for each DSP RAM + */ +#define TXDRSIZE_REGNUM 15 +#define TXDRSIZE_R1MOD_BITS 0xFFFF0000 +#define TXDRSIZE_R1MOD_S 16 +#define TXDRSIZE_R0MOD_BITS 0x0000FFFF +#define TXDRSIZE_R0MOD_S 0 + +#define TXDRSIZE_RBRAD_SCALE_BITS 0x70000000 +#define TXDRSIZE_RBRAD_SCALE_S 28 +#define TXDRSIZE_RBMODSIZE_BITS 0x0FFF0000 +#define TXDRSIZE_RBMODSIZE_S 16 +#define TXDRSIZE_RARAD_SCALE_BITS 0x00007000 +#define TXDRSIZE_RARAD_SCALE_S 12 +#define TXDRSIZE_RAMODSIZE_BITS 0x00000FFF +#define TXDRSIZE_RAMODSIZE_S 0 + +/***************************************************************************** + * DEFERRED and BUS ERROR EXTENSION + ****************************************************************************/ + +/* + * TXDEFR register - Deferred exception control + */ +#define TXDEFR_REGNUM 20 +#define TXDEFR_DEFAULT 0 /* All fields default to zero */ + +/* + * Bus error state is a multi-bit positive/negative event notification from + * the bus infrastructure. + */ +#define TXDEFR_BUS_ERR_BIT 0x80000000 /* Set if error (LSB STATE) */ +#define TXDEFR_BUS_ERRI_BIT 0x40000000 /* Fetch returned error */ +#define TXDEFR_BUS_STATE_BITS 0x3F000000 /* Bus event/state data */ +#define TXDEFR_BUS_STATE_S 24 +#define TXDEFR_BUS_TRIG_BIT 0x00800000 /* Set when bus error seen */ + +/* + * Bus events are collected by background code in a deferred manner unless + * selected to trigger an extended interrupt HALT trigger when they occur. + */ +#define TXDEFR_BUS_ICTRL_BIT 0x00000080 /* Enable interrupt trigger */ + +/* + * CHIP Automatic Mips Allocation control registers + * ------------------------------------------------ + */ + +/* CT Bank AMA Registers */ +#define TXAMAREG0_REGNUM 24 +#ifdef METAC_1_2 +#define TXAMAREG0_CTRL_BITS 0x07000000 +#else /* METAC_1_2 */ +#define TXAMAREG0_RCOFF_BIT 0x08000000 +#define TXAMAREG0_DLINEHLT_BIT 0x04000000 +#define TXAMAREG0_DLINEDIS_BIT 0x02000000 +#define TXAMAREG0_CYCSTRICT_BIT 0x01000000 +#define TXAMAREG0_CTRL_BITS (TXAMAREG0_RCOFF_BIT | \ + TXAMAREG0_DLINEHLT_BIT | \ + TXAMAREG0_DLINEDIS_BIT | \ + TXAMAREG0_CYCSTRICT_BIT) +#endif /* !METAC_1_2 */ +#define TXAMAREG0_CTRL_S 24 +#define TXAMAREG0_MDM_BIT 0x00400000 +#define TXAMAREG0_MPF_BIT 0x00200000 +#define TXAMAREG0_MPE_BIT 0x00100000 +#define TXAMAREG0_MASK_BITS (TXAMAREG0_MDM_BIT | \ + TXAMAREG0_MPF_BIT | \ + TXAMAREG0_MPE_BIT) +#define TXAMAREG0_MASK_S 20 +#define TXAMAREG0_SDM_BIT 0x00040000 +#define TXAMAREG0_SPF_BIT 0x00020000 +#define TXAMAREG0_SPE_BIT 0x00010000 +#define TXAMAREG0_STATUS_BITS (TXAMAREG0_SDM_BIT | \ + TXAMAREG0_SPF_BIT | \ + TXAMAREG0_SPE_BIT) +#define TXAMAREG0_STATUS_S 16 +#define TXAMAREG0_PRIORITY_BITS 0x0000FF00 +#define TXAMAREG0_PRIORITY_S 8 +#define TXAMAREG0_BVALUE_BITS 0x000000FF +#define TXAMAREG0_BVALUE_S 0 + +#define TXAMAREG1_REGNUM 25 +#define TXAMAREG1_DELAYC_BITS 0x07FFFFFF +#define TXAMAREG1_DELAYC_S 0 + +#define TXAMAREG2_REGNUM 26 +#ifdef METAC_1_2 +#define TXAMAREG2_DLINEC_BITS 0x00FFFFFF +#define TXAMAREG2_DLINEC_S 0 +#else /* METAC_1_2 */ +#define TXAMAREG2_IRQPRIORITY_BIT 0xFF000000 +#define TXAMAREG2_IRQPRIORITY_S 24 +#define TXAMAREG2_DLINEC_BITS 0x00FFFFF0 +#define TXAMAREG2_DLINEC_S 4 +#endif /* !METAC_1_2 */ + +#define TXAMAREG3_REGNUM 27 +#define TXAMAREG2_AMABLOCK_BIT 0x00080000 +#define TXAMAREG2_AMAC_BITS 0x0000FFFF +#define TXAMAREG2_AMAC_S 0 + +/***************************************************************************** + * FPU EXTENSIONS + ****************************************************************************/ +/* + * The following registers only exist in FPU enabled cores. + */ + +/* + * TXMODE register - FPU rounding mode control/status fields + */ +#define TXMODE_FPURMODE_BITS 0x00030000 +#define TXMODE_FPURMODE_S 16 +#define TXMODE_FPURMODEWRITE_BIT 0x00040000 /* Set to change FPURMODE */ + +/* + * TXDEFR register - FPU exception handling/state is a significant source + * of deferrable errors. Run-time S/W can move handling to interrupt level + * using DEFR instruction to collect state. + */ +#define TXDEFR_FPE_FE_BITS 0x003F0000 /* Set by FPU_FE events */ +#define TXDEFR_FPE_FE_S 16 + +#define TXDEFR_FPE_INEXACT_FE_BIT 0x010000 +#define TXDEFR_FPE_UNDERFLOW_FE_BIT 0x020000 +#define TXDEFR_FPE_OVERFLOW_FE_BIT 0x040000 +#define TXDEFR_FPE_DIVBYZERO_FE_BIT 0x080000 +#define TXDEFR_FPE_INVALID_FE_BIT 0x100000 +#define TXDEFR_FPE_DENORMAL_FE_BIT 0x200000 + +#define TXDEFR_FPE_ICTRL_BITS 0x000003F /* Route to interrupts */ +#define TXDEFR_FPE_ICTRL_S 0 + +#define TXDEFR_FPE_INEXACT_ICTRL_BIT 0x01 +#define TXDEFR_FPE_UNDERFLOW_ICTRL_BIT 0x02 +#define TXDEFR_FPE_OVERFLOW_ICTRL_BIT 0x04 +#define TXDEFR_FPE_DIVBYZERO_ICTRL_BIT 0x08 +#define TXDEFR_FPE_INVALID_ICTRL_BIT 0x10 +#define TXDEFR_FPE_DENORMAL_ICTRL_BIT 0x20 + +/* + * DETAILED FPU RELATED VALUES + * --------------------------- + */ + +/* + * Rounding mode field in TXMODE can hold a number of logical values + */ +#define METAG_FPURMODE_TONEAREST 0x0 /* Default */ +#define METAG_FPURMODE_TOWARDZERO 0x1 +#define METAG_FPURMODE_UPWARD 0x2 +#define METAG_FPURMODE_DOWNWARD 0x3 + +/* + * In order to set the TXMODE register field that controls the rounding mode + * an extra bit must be set in the value written versus that read in order + * to gate writes to the rounding mode field. This allows other non-FPU code + * to modify TXMODE without knowledge of the FPU units presence and not + * influence the FPU rounding mode. This macro adds the required bit so new + * rounding modes are accepted. + */ +#define TXMODE_FPURMODE_SET(FPURMode) \ + (TXMODE_FPURMODEWRITE_BIT + ((FPURMode)<<TXMODE_FPURMODE_S)) + +/* + * To successfully restore TXMODE to zero at the end of the function the + * following value (rather than zero) must be used. + */ +#define TXMODE_FPURMODE_RESET (TXMODE_FPURMODEWRITE_BIT) + +/* + * In TXSTATUS a special bit exists to indicate if FPU H/W has been accessed + * since it was last reset. + */ +#define TXSTATUS_FPACTIVE_BIT 0x01000000 + +/* + * Exception state (see TXDEFR_FPU_FE_*) and enabling (for interrupt + * level processing (see TXDEFR_FPU_ICTRL_*) are controlled by similar + * bit mask locations within each field. + */ +#define METAG_FPU_FE_INEXACT 0x01 +#define METAG_FPU_FE_UNDERFLOW 0x02 +#define METAG_FPU_FE_OVERFLOW 0x04 +#define METAG_FPU_FE_DIVBYZERO 0x08 +#define METAG_FPU_FE_INVALID 0x10 +#define METAG_FPU_FE_DENORMAL 0x20 +#define METAG_FPU_FE_ALL_EXCEPT (METAG_FPU_FE_INEXACT | \ + METAG_FPU_FE_UNDERFLOW | \ + METAG_FPU_FE_OVERFLOW | \ + METAG_FPU_FE_DIVBYZERO | \ + METAG_FPU_FE_INVALID | \ + METAG_FPU_FE_DENORMAL) + +/***************************************************************************** + * THREAD CONTROL, ERROR, OR INTERRUPT STATE EXTENSIONS + ****************************************************************************/ +/* + * The following values are only relevant to code that externally controls + * threads, handles errors/interrupts, and/or set-up interrupt/error handlers + * for subsequent use. + */ + +/* + * TXENABLE register fields - only ENABLE_BIT is potentially read/write + */ +#define TXENABLE_MAJOR_REV_BITS 0xFF000000 +#define TXENABLE_MAJOR_REV_S 24 +#define TXENABLE_MINOR_REV_BITS 0x00FF0000 +#define TXENABLE_MINOR_REV_S 16 +#define TXENABLE_CLASS_BITS 0x0000F000 +#define TXENABLE_CLASS_S 12 +#define TXENABLE_CLASS_DSP 0x0 /* -> DSP Thread */ +#define TXENABLE_CLASS_LDSP 0x8 /* -> DSP LITE Thread */ +#define TXENABLE_CLASS_GP 0xC /* -> General Purpose Thread */ +#define TXENABLE_CLASSALT_LFPU 0x2 /* Set to indicate LITE FPU */ +#define TXENABLE_CLASSALT_FPUR8 0x1 /* Set to indicate 8xFPU regs */ +#define TXENABLE_MTXARCH_BIT 0x00000800 +#define TXENABLE_STEP_REV_BITS 0x000000F0 +#define TXENABLE_STEP_REV_S 4 +#define TXENABLE_STOPPED_BIT 0x00000004 /* TXOFF due to ENABLE->0 */ +#define TXENABLE_OFF_BIT 0x00000002 /* Thread is in off state */ +#define TXENABLE_ENABLE_BIT 0x00000001 /* Set if running */ + +/* + * TXSTATUS register - used by external/internal interrupt/error handler + */ +#define TXSTATUS_CB1MARKER_BIT 0x00800000 /* -> int level mem state */ +#define TXSTATUS_CBMARKER_BIT 0x00400000 /* -> mem i/f state dumped */ +#define TXSTATUS_MEM_FAULT_BITS 0x00300000 +#define TXSTATUS_MEM_FAULT_S 20 +#define TXSTATUS_MEMFAULT_NONE 0x0 /* -> No memory fault */ +#define TXSTATUS_MEMFAULT_GEN 0x1 /* -> General fault */ +#define TXSTATUS_MEMFAULT_PF 0x2 /* -> Page fault */ +#define TXSTATUS_MEMFAULT_RO 0x3 /* -> Read only fault */ +#define TXSTATUS_MAJOR_HALT_BITS 0x000C0000 +#define TXSTATUS_MAJOR_HALT_S 18 +#define TXSTATUS_MAJHALT_TRAP 0x0 /* -> SWITCH inst used */ +#define TXSTATUS_MAJHALT_INST 0x1 /* -> Unknown inst or fetch */ +#define TXSTATUS_MAJHALT_PRIV 0x2 /* -> Internal privilege */ +#define TXSTATUS_MAJHALT_MEM 0x3 /* -> Memory i/f fault */ +#define TXSTATUS_L_STEP_BITS 0x00000800 /* -> Progress of L oper */ +#define TXSTATUS_LSM_STEP_BITS 0x00000700 /* -> Progress of L/S mult */ +#define TXSTATUS_LSM_STEP_S 8 +#define TXSTATUS_FLAG_BITS 0x0000001F /* -> All the flags */ +#define TXSTATUS_SCC_BIT 0x00000010 /* -> Split-16 flags ... */ +#define TXSTATUS_SCF_LZ_BIT 0x00000008 /* -> Split-16 Low Z flag */ +#define TXSTATUS_SCF_HZ_BIT 0x00000004 /* -> Split-16 High Z flag */ +#define TXSTATUS_SCF_HC_BIT 0x00000002 /* -> Split-16 High C flag */ +#define TXSTATUS_SCF_LC_BIT 0x00000001 /* -> Split-16 Low C flag */ +#define TXSTATUS_CF_Z_BIT 0x00000008 /* -> Condition Z flag */ +#define TXSTATUS_CF_N_BIT 0x00000004 /* -> Condition N flag */ +#define TXSTATUS_CF_O_BIT 0x00000002 /* -> Condition O flag */ +#define TXSTATUS_CF_C_BIT 0x00000001 /* -> Condition C flag */ + +/* + * TXCATCH0-3 register contents may store information on a memory operation + * that has failed if the bit TXSTATUS_CBMARKER_BIT is set. + */ +#define TXCATCH0_REGNUM 16 +#define TXCATCH1_REGNUM 17 +#define TXCATCH1_ADDR_BITS 0xFFFFFFFF /* TXCATCH1 is Addr 0-31 */ +#define TXCATCH1_ADDR_S 0 +#define TXCATCH2_REGNUM 18 +#define TXCATCH2_DATA0_BITS 0xFFFFFFFF /* TXCATCH2 is Data 0-31 */ +#define TXCATCH2_DATA0_S 0 +#define TXCATCH3_REGNUM 19 +#define TXCATCH3_DATA1_BITS 0xFFFFFFFF /* TXCATCH3 is Data 32-63 */ +#define TXCATCH3_DATA1_S 0 + +/* + * Detailed catch state information + * -------------------------------- + */ + +/* Contents of TXCATCH0 register */ +#define TXCATCH0_LDRXX_BITS 0xF8000000 /* Load destination reg 0-31 */ +#define TXCATCH0_LDRXX_S 27 +#define TXCATCH0_LDDST_BITS 0x07FF0000 /* Load destination bits */ +#define TXCATCH0_LDDST_S 16 +#define TXCATCH0_LDDST_D1DSP 0x400 /* One bit set if it's a LOAD */ +#define TXCATCH0_LDDST_D0DSP 0x200 +#define TXCATCH0_LDDST_TMPLT 0x100 +#define TXCATCH0_LDDST_TR 0x080 +#ifdef METAC_2_1 +#define TXCATCH0_LDDST_FPU 0x040 +#endif +#define TXCATCH0_LDDST_PC 0x020 +#define TXCATCH0_LDDST_A1 0x010 +#define TXCATCH0_LDDST_A0 0x008 +#define TXCATCH0_LDDST_D1 0x004 +#define TXCATCH0_LDDST_D0 0x002 +#define TXCATCH0_LDDST_CT 0x001 +#ifdef METAC_2_1 +#define TXCATCH0_WATCHSTOP_BIT 0x00004000 /* Set if Data Watch set fault */ +#endif +#define TXCATCH0_WATCHS_BIT 0x00004000 /* Set if Data Watch set fault */ +#define TXCATCH0_WATCH1_BIT 0x00002000 /* Set if Data Watch 1 matches */ +#define TXCATCH0_WATCH0_BIT 0x00001000 /* Set if Data Watch 0 matches */ +#define TXCATCH0_FAULT_BITS 0x00000C00 /* See TXSTATUS_MEMFAULT_* */ +#define TXCATCH0_FAULT_S 10 +#define TXCATCH0_PRIV_BIT 0x00000200 /* Privilege of transaction */ +#define TXCATCH0_READ_BIT 0x00000100 /* Set for Read or Load cases */ + +#ifdef METAC_2_1 +/* LNKGET Marker bit in TXCATCH0 */ +#define TXCATCH0_LNKGET_MARKER_BIT 0x00000008 +#define TXCATCH0_PREPROC_BIT 0x00000004 +#endif + +/* Loads are indicated by one of the LDDST bits being set */ +#define TXCATCH0_LDM16_BIT 0x00000004 /* Load M16 flag */ +#define TXCATCH0_LDL2L1_BITS 0x00000003 /* Load data size L2,L1 */ +#define TXCATCH0_LDL2L1_S 0 + +/* Reads are indicated by the READ bit being set without LDDST bits */ +#define TXCATCH0_RAXX_BITS 0x0000001F /* RAXX issue port for read */ +#define TXCATCH0_RAXX_S 0 + +/* Write operations are all that remain if READ bit is not set */ +#define TXCATCH0_WMASK_BITS 0x000000FF /* Write byte lane mask */ +#define TXCATCH0_WMASK_S 0 + +#ifdef METAC_2_1 + +/* When a FPU exception is signalled then FPUSPEC == FPUSPEC_TAG */ +#define TXCATCH0_FPURDREG_BITS 0xF8000000 +#define TXCATCH0_FPURDREG_S 27 +#define TXCATCH0_FPUR1REG_BITS 0x07C00000 +#define TXCATCH0_FPUR1REG_S 22 +#define TXCATCH0_FPUSPEC_BITS 0x000F0000 +#define TXCATCH0_FPUSPEC_S 16 +#define TXCATCH0_FPUSPEC_TAG 0xF +#define TXCATCH0_FPUINSTA_BIT 0x00001000 +#define TXCATCH0_FPUINSTQ_BIT 0x00000800 +#define TXCATCH0_FPUINSTZ_BIT 0x00000400 +#define TXCATCH0_FPUINSTN_BIT 0x00000200 +#define TXCATCH0_FPUINSTO3O_BIT 0x00000100 +#define TXCATCH0_FPUWIDTH_BITS 0x000000C0 +#define TXCATCH0_FPUWIDTH_S 6 +#define TXCATCH0_FPUWIDTH_FLOAT 0 +#define TXCATCH0_FPUWIDTH_DOUBLE 1 +#define TXCATCH0_FPUWIDTH_PAIRED 2 +#define TXCATCH0_FPUOPENC_BITS 0x0000003F +#define TXCATCH0_FPUOPENC_S 0 +#define TXCATCH0_FPUOPENC_ADD 0 /* rop1=Rs1, rop3=Rs2 */ +#define TXCATCH0_FPUOPENC_SUB 1 /* rop1=Rs1, rop3=Rs2 */ +#define TXCATCH0_FPUOPENC_MUL 2 /* rop1=Rs1, rop2=Rs2 */ +#define TXCATCH0_FPUOPENC_ATOI 3 /* rop3=Rs */ +#define TXCATCH0_FPUOPENC_ATOX 4 /* rop3=Rs, uses #Imm */ +#define TXCATCH0_FPUOPENC_ITOA 5 /* rop3=Rs */ +#define TXCATCH0_FPUOPENC_XTOA 6 /* rop3=Rs, uses #Imm */ +#define TXCATCH0_FPUOPENC_ATOH 7 /* rop2=Rs */ +#define TXCATCH0_FPUOPENC_HTOA 8 /* rop2=Rs */ +#define TXCATCH0_FPUOPENC_DTOF 9 /* rop3=Rs */ +#define TXCATCH0_FPUOPENC_FTOD 10 /* rop3=Rs */ +#define TXCATCH0_FPUOPENC_DTOL 11 /* rop3=Rs */ +#define TXCATCH0_FPUOPENC_LTOD 12 /* rop3=Rs */ +#define TXCATCH0_FPUOPENC_DTOXL 13 /* rop3=Rs, uses #imm */ +#define TXCATCH0_FPUOPENC_XLTOD 14 /* rop3=Rs, uses #imm */ +#define TXCATCH0_FPUOPENC_CMP 15 /* rop1=Rs1, rop2=Rs2 */ +#define TXCATCH0_FPUOPENC_MIN 16 /* rop1=Rs1, rop2=Rs2 */ +#define TXCATCH0_FPUOPENC_MAX 17 /* rop1=Rs1, rop2=Rs2 */ +#define TXCATCH0_FPUOPENC_ADDRE 18 /* rop1=Rs1, rop3=Rs2 */ +#define TXCATCH0_FPUOPENC_SUBRE 19 /* rop1=Rs1, rop3=Rs2 */ +#define TXCATCH0_FPUOPENC_MULRE 20 /* rop1=Rs1, rop2=Rs2 */ +#define TXCATCH0_FPUOPENC_MXA 21 /* rop1=Rs1, rop2=Rs2, rop3=Rs3*/ +#define TXCATCH0_FPUOPENC_MXAS 22 /* rop1=Rs1, rop2=Rs2, rop3=Rs3*/ +#define TXCATCH0_FPUOPENC_MAR 23 /* rop1=Rs1, rop2=Rs2 */ +#define TXCATCH0_FPUOPENC_MARS 24 /* rop1=Rs1, rop2=Rs2 */ +#define TXCATCH0_FPUOPENC_MUZ 25 /* rop1=Rs1, rop2=Rs2, rop3=Rs3*/ +#define TXCATCH0_FPUOPENC_MUZS 26 /* rop1=Rs1, rop2=Rs2, rop3=Rs3*/ +#define TXCATCH0_FPUOPENC_RCP 27 /* rop2=Rs */ +#define TXCATCH0_FPUOPENC_RSQ 28 /* rop2=Rs */ + +/* For floating point exceptions TXCATCH1 is used to carry extra data */ +#define TXCATCH1_FPUR2REG_BITS 0xF8000000 +#define TXCATCH1_FPUR2REG_S 27 +#define TXCATCH1_FPUR3REG_BITS 0x07C00000 /* Undefined if O3O set */ +#define TXCATCH1_FPUR3REG_S 22 +#define TXCATCH1_FPUIMM16_BITS 0x0000FFFF +#define TXCATCH1_FPUIMM16_S 0 + +#endif /* METAC_2_1 */ + +/* + * TXDIVTIME register used to hold the partial base address of memory i/f + * state dump area. Now deprecated. + */ +#define TXDIVTIME_CBBASE_MASK 0x03FFFE00 +#define TXDIVTIME_CBBASE_LINBASE 0x80000000 +#define TXDIVTIME_CBBASE_LINBOFF 0x00000000 /* BGnd state */ +#define TXDIVTIME_CBBASE_LINIOFF 0x00000100 /* Int state */ + +/* + * TXDIVTIME register used to indicate if the read pipeline was dirty when a + * thread was interrupted, halted, or generated an exception. It is invalid + * to attempt to issue a further pipeline read address while the read + * pipeline is in the dirty state. + */ +#define TXDIVTIME_RPDIRTY_BIT 0x80000000 + +/* + * Further bits in the TXDIVTIME register allow interrupt handling code to + * short-cut the discovery the most significant bit last read from TXSTATI. + * + * This is the bit number of the trigger line that a low level interrupt + * handler should acknowledge and then perhaps the index of a corresponding + * handler function. + */ +#define TXDIVTIME_IRQENC_BITS 0x0F000000 +#define TXDIVTIME_IRQENC_S 24 + +/* + * If TXDIVTIME_RPVALID_BIT is set the read pipeline contained significant + * information when the thread was interrupted|halted|exceptioned. Each slot + * containing data is indicated by a one bit in the corresponding + * TXDIVTIME_RPMASK_BITS bit (least significance bit relates to first + * location in read pipeline - most likely to have the 1 state). Empty slots + * contain zeroes with no interlock applied on reads if RPDIRTY is currently + * set with RPMASK itself being read-only state. + */ +#define TXDIVTIME_RPMASK_BITS 0x003F0000 /* -> Full (1) Empty (0) */ +#define TXDIVTIME_RPMASK_S 16 + +/* + * TXPRIVEXT register can be used to single step thread execution and + * enforce synchronous memory i/f address checking for debugging purposes. + */ +#define TXPRIVEXT_TXSTEP_BIT 0x00000004 +#define TXPRIVEXT_MEMCHECK_BIT 0x00000002 + +/* + * TXINTERNx registers holds internal state information for H/W debugging only + */ +#define TXINTERN0_REGNUM 23 +#define TXINTERN0_LOCK2_BITS 0xF0000000 +#define TXINTERN0_LOCK2_S 28 +#define TXINTERN0_LOCK1_BITS 0x0F000000 +#define TXINTERN0_LOCK1_S 24 +#define TXINTERN0_TIFDF_BITS 0x0000F000 +#define TXINTERN0_TIFDF_S 12 +#define TXINTERN0_TIFIB_BITS 0x00000F00 +#define TXINTERN0_TIFIB_S 8 +#define TXINTERN0_TIFAF_BITS 0x000000F0 +#define TXINTERN0_TIFAF_S 4 +#define TXINTERN0_MSTATE_BITS 0x0000000F +#define TXINTERN0_MSTATE_S 0 + +/* + * TXSTAT, TXMASK, TXPOLL, TXSTATI, TXMASKI, TXPOLLI registers from trigger + * bank all have similar contents (upper kick count bits not in MASK regs) + */ +#define TXSTAT_REGNUM 0 +#define TXSTAT_TIMER_BIT 0x00000001 +#define TXSTAT_TIMER_S 0 +#define TXSTAT_KICK_BIT 0x00000002 +#define TXSTAT_KICK_S 1 +#define TXSTAT_DEFER_BIT 0x00000008 +#define TXSTAT_DEFER_S 3 +#define TXSTAT_EXTTRIG_BITS 0x0000FFF0 +#define TXSTAT_EXTTRIG_S 4 +#define TXSTAT_FPE_BITS 0x003F0000 +#define TXSTAT_FPE_S 16 +#define TXSTAT_FPE_DENORMAL_BIT 0x00200000 +#define TXSTAT_FPE_DENORMAL_S 21 +#define TXSTAT_FPE_INVALID_BIT 0x00100000 +#define TXSTAT_FPE_INVALID_S 20 +#define TXSTAT_FPE_DIVBYZERO_BIT 0x00080000 +#define TXSTAT_FPE_DIVBYZERO_S 19 +#define TXSTAT_FPE_OVERFLOW_BIT 0x00040000 +#define TXSTAT_FPE_OVERFLOW_S 18 +#define TXSTAT_FPE_UNDERFLOW_BIT 0x00020000 +#define TXSTAT_FPE_UNDERFLOW_S 17 +#define TXSTAT_FPE_INEXACT_BIT 0x00010000 +#define TXSTAT_FPE_INEXACT_S 16 +#define TXSTAT_BUSERR_BIT 0x00800000 /* Set if bus error/ack state */ +#define TXSTAT_BUSERR_S 23 +#define TXSTAT_BUSSTATE_BITS 0xFF000000 /* Read only */ +#define TXSTAT_BUSSTATE_S 24 +#define TXSTAT_KICKCNT_BITS 0xFFFF0000 +#define TXSTAT_KICKCNT_S 16 +#define TXMASK_REGNUM 1 +#define TXSTATI_REGNUM 2 +#define TXSTATI_BGNDHALT_BIT 0x00000004 +#define TXMASKI_REGNUM 3 +#define TXPOLL_REGNUM 4 +#define TXPOLLI_REGNUM 6 + +/* + * TXDRCTRL register can be used to partition the DSP RAM space available to + * this thread at startup. This is achieved by offsetting the region allocated + * to each thread. + */ +#define TXDRCTRL_D1PARTOR_BITS 0x00000F00 /* OR's into top 4 bits */ +#define TXDRCTRL_D1PARTOR_S 8 +#define TXDRCTRL_D0PARTOR_BITS 0x0000000F /* OR's into top 4 bits */ +#define TXDRCTRL_D0PARTOR_S 0 +/* Given extracted Pow and Or fields this is threads base within DSP RAM */ +#define TXDRCTRL_DXBASE(Pow, Or) ((Or)<<((Pow)-4)) + +/***************************************************************************** + * RUN TIME TRACE CONTROL REGISTERS + ****************************************************************************/ +/* + * The following values are only relevant to code that implements run-time + * trace features within the META Core + */ +#define TTEXEC TT.0 +#define TTCTRL TT.1 +#define TTMARK TT.2 +#define TTREC TT.3 +#define GTEXEC TT.4 + +#define TTEXEC_REGNUM 0 +#define TTEXEC_EXTTRIGAND_BITS 0x7F000000 +#define TTEXEC_EXTTRIGAND_S 24 +#define TTEXEC_EXTTRIGEN_BIT 0x00008000 +#define TTEXEC_EXTTRIGMATCH_BITS 0x00007F00 +#define TTEXEC_EXTTRIGMATCH_S 8 +#define TTEXEC_TCMODE_BITS 0x00000003 +#define TTEXEC_TCMODE_S 0 + +#define TTCTRL_REGNUM 1 +#define TTCTRL_TRACETT_BITS 0x00008000 +#define TTCTRL_TRACETT_S 15 +#define TTCTRL_TRACEALL_BITS 0x00002000 +#define TTCTRL_TRACEALL_S 13 +#ifdef METAC_2_1 +#define TTCTRL_TRACEALLTAG_BITS 0x00000400 +#define TTCTRL_TRACEALLTAG_S 10 +#endif /* METAC_2_1 */ +#define TTCTRL_TRACETAG_BITS 0x00000200 +#define TTCTRL_TRACETAG_S 9 +#define TTCTRL_TRACETTPC_BITS 0x00000080 +#define TTCTRL_TRACETTPC_S 7 +#define TTCTRL_TRACEMPC_BITS 0x00000020 +#define TTCTRL_TRACEMPC_S 5 +#define TTCTRL_TRACEEN_BITS 0x00000008 +#define TTCTRL_TRACEEN_S 3 +#define TTCTRL_TRACEEN1_BITS 0x00000004 +#define TTCTRL_TRACEEN1_S 2 +#define TTCTRL_TRACEPC_BITS 0x00000002 +#define TTCTRL_TRACEPC_S 1 + +#ifdef METAC_2_1 +#define TTMARK_REGNUM 2 +#define TTMARK_BITS 0xFFFFFFFF +#define TTMARK_S 0x0 + +#define TTREC_REGNUM 3 +#define TTREC_BITS 0xFFFFFFFFFFFFFFFF +#define TTREC_S 0x0 +#endif /* METAC_2_1 */ + +#define GTEXEC_REGNUM 4 +#define GTEXEC_DCRUN_BITS 0x80000000 +#define GTEXEC_DCRUN_S 31 +#define GTEXEC_ICMODE_BITS 0x0C000000 +#define GTEXEC_ICMODE_S 26 +#define GTEXEC_TCMODE_BITS 0x03000000 +#define GTEXEC_TCMODE_S 24 +#define GTEXEC_PERF1CMODE_BITS 0x00040000 +#define GTEXEC_PERF1CMODE_S 18 +#define GTEXEC_PERF0CMODE_BITS 0x00010000 +#define GTEXEC_PERF0CMODE_S 16 +#define GTEXEC_REFMSEL_BITS 0x0000F000 +#define GTEXEC_REFMSEL_S 12 +#define GTEXEC_METRICTH_BITS 0x000003FF +#define GTEXEC_METRICTH_S 0 + +#ifdef METAC_2_1 +/* + * Clock Control registers + * ----------------------- + */ +#define TXCLKCTRL_REGNUM 22 + +/* + * Default setting is with clocks always on (DEFON), turning all clocks off + * can only be done from external devices (OFF), enabling automatic clock + * gating will allow clocks to stop as units fall idle. + */ +#define TXCLKCTRL_ALL_OFF 0x02222222 +#define TXCLKCTRL_ALL_DEFON 0x01111111 +#define TXCLKCTRL_ALL_AUTO 0x02222222 + +/* + * Individual fields control caches, floating point and main data/addr units + */ +#define TXCLKCTRL_CLOCKIC_BITS 0x03000000 +#define TXCLKCTRL_CLOCKIC_S 24 +#define TXCLKCTRL_CLOCKDC_BITS 0x00300000 +#define TXCLKCTRL_CLOCKDC_S 20 +#define TXCLKCTRL_CLOCKFP_BITS 0x00030000 +#define TXCLKCTRL_CLOCKFP_S 16 +#define TXCLKCTRL_CLOCKD1_BITS 0x00003000 +#define TXCLKCTRL_CLOCKD1_S 12 +#define TXCLKCTRL_CLOCKD0_BITS 0x00000300 +#define TXCLKCTRL_CLOCKD0_S 8 +#define TXCLKCTRL_CLOCKA1_BITS 0x00000030 +#define TXCLKCTRL_CLOCKA1_S 4 +#define TXCLKCTRL_CLOCKA0_BITS 0x00000003 +#define TXCLKCTRL_CLOCKA0_S 0 + +/* + * Individual settings for each field are common + */ +#define TXCLKCTRL_CLOCKxx_OFF 0 +#define TXCLKCTRL_CLOCKxx_DEFON 1 +#define TXCLKCTRL_CLOCKxx_AUTO 2 + +#endif /* METAC_2_1 */ + +#ifdef METAC_2_1 +/* + * Fast interrupt new bits + * ------------------------------------ + */ +#define TXSTATUS_IPTOGGLE_BIT 0x80000000 /* Prev PToggle of TXPRIVEXT */ +#define TXSTATUS_ISTATE_BIT 0x40000000 /* IState bit */ +#define TXSTATUS_IWAIT_BIT 0x20000000 /* wait indefinitely in decision step*/ +#define TXSTATUS_IEXCEPT_BIT 0x10000000 /* Indicate an exception occured */ +#define TXSTATUS_IRPCOUNT_BITS 0x0E000000 /* Number of 'dirty' date entries*/ +#define TXSTATUS_IRPCOUNT_S 25 +#define TXSTATUS_IRQSTAT_BITS 0x0000F000 /* IRQEnc bits, trigger or interrupts */ +#define TXSTATUS_IRQSTAT_S 12 +#define TXSTATUS_LNKSETOK_BIT 0x00000020 /* LNKSetOK bit, successful LNKSET */ + +/* New fields in TXDE for fast interrupt system */ +#define TXDIVTIME_IACTIVE_BIT 0x00008000 /* Enable new interrupt system */ +#define TXDIVTIME_INONEST_BIT 0x00004000 /* Gate nested interrupt */ +#define TXDIVTIME_IREGIDXGATE_BIT 0x00002000 /* gate of the IRegIdex field */ +#define TXDIVTIME_IREGIDX_BITS 0x00001E00 /* Index of A0.0/1 replaces */ +#define TXDIVTIME_IREGIDX_S 9 +#define TXDIVTIME_NOST_BIT 0x00000100 /* disable superthreading bit */ +#endif + +#endif /* _ASM_METAG_REGS_H_ */ diff --git a/arch/metag/include/asm/mman.h b/arch/metag/include/asm/mman.h new file mode 100644 index 000000000000..17999dba9275 --- /dev/null +++ b/arch/metag/include/asm/mman.h @@ -0,0 +1,11 @@ +#ifndef __METAG_MMAN_H__ +#define __METAG_MMAN_H__ + +#include <uapi/asm/mman.h> + +#ifndef __ASSEMBLY__ +#define arch_mmap_check metag_mmap_check +int metag_mmap_check(unsigned long addr, unsigned long len, + unsigned long flags); +#endif +#endif /* __METAG_MMAN_H__ */ diff --git a/arch/metag/include/asm/mmu.h b/arch/metag/include/asm/mmu.h new file mode 100644 index 000000000000..9c321147c0b4 --- /dev/null +++ b/arch/metag/include/asm/mmu.h @@ -0,0 +1,77 @@ +#ifndef __MMU_H +#define __MMU_H + +#ifdef CONFIG_METAG_USER_TCM +#include <linux/list.h> +#endif + +#ifdef CONFIG_HUGETLB_PAGE +#include <asm/page.h> +#endif + +typedef struct { + /* Software pgd base pointer used for Meta 1.x MMU. */ + unsigned long pgd_base; +#ifdef CONFIG_METAG_USER_TCM + struct list_head tcm; +#endif +#ifdef CONFIG_HUGETLB_PAGE +#if HPAGE_SHIFT < HUGEPT_SHIFT + /* last partially filled huge page table address */ + unsigned long part_huge; +#endif +#endif +} mm_context_t; + +/* Given a virtual address, return the pte for the top level 4meg entry + * that maps that address. + * Returns 0 (an empty pte) if that range is not mapped. + */ +unsigned long mmu_read_first_level_page(unsigned long vaddr); + +/* Given a linear (virtual) address, return the second level 4k pte + * that maps that address. Returns 0 if the address is not mapped. + */ +unsigned long mmu_read_second_level_page(unsigned long vaddr); + +/* Get the virtual base address of the MMU */ +unsigned long mmu_get_base(void); + +/* Initialize the MMU. */ +void mmu_init(unsigned long mem_end); + +#ifdef CONFIG_METAG_META21_MMU +/* + * For cpu "cpu" calculate and return the address of the + * MMCU_TnLOCAL_TABLE_PHYS0 if running in local-space or + * MMCU_TnGLOBAL_TABLE_PHYS0 if running in global-space. + */ +static inline unsigned long mmu_phys0_addr(unsigned int cpu) +{ + unsigned long phys0; + + phys0 = (MMCU_T0LOCAL_TABLE_PHYS0 + + (MMCU_TnX_TABLE_PHYSX_STRIDE * cpu)) + + (MMCU_TXG_TABLE_PHYSX_OFFSET * is_global_space(PAGE_OFFSET)); + + return phys0; +} + +/* + * For cpu "cpu" calculate and return the address of the + * MMCU_TnLOCAL_TABLE_PHYS1 if running in local-space or + * MMCU_TnGLOBAL_TABLE_PHYS1 if running in global-space. + */ +static inline unsigned long mmu_phys1_addr(unsigned int cpu) +{ + unsigned long phys1; + + phys1 = (MMCU_T0LOCAL_TABLE_PHYS1 + + (MMCU_TnX_TABLE_PHYSX_STRIDE * cpu)) + + (MMCU_TXG_TABLE_PHYSX_OFFSET * is_global_space(PAGE_OFFSET)); + + return phys1; +} +#endif /* CONFIG_METAG_META21_MMU */ + +#endif diff --git a/arch/metag/include/asm/mmu_context.h b/arch/metag/include/asm/mmu_context.h new file mode 100644 index 000000000000..ae2a71b5e0be --- /dev/null +++ b/arch/metag/include/asm/mmu_context.h @@ -0,0 +1,113 @@ +#ifndef __METAG_MMU_CONTEXT_H +#define __METAG_MMU_CONTEXT_H + +#include <asm-generic/mm_hooks.h> + +#include <asm/page.h> +#include <asm/mmu.h> +#include <asm/tlbflush.h> +#include <asm/cacheflush.h> + +#include <linux/io.h> + +static inline void enter_lazy_tlb(struct mm_struct *mm, + struct task_struct *tsk) +{ +} + +static inline int init_new_context(struct task_struct *tsk, + struct mm_struct *mm) +{ +#ifndef CONFIG_METAG_META21_MMU + /* We use context to store a pointer to the page holding the + * pgd of a process while it is running. While a process is not + * running the pgd and context fields should be equal. + */ + mm->context.pgd_base = (unsigned long) mm->pgd; +#endif +#ifdef CONFIG_METAG_USER_TCM + INIT_LIST_HEAD(&mm->context.tcm); +#endif + return 0; +} + +#ifdef CONFIG_METAG_USER_TCM + +#include <linux/slab.h> +#include <asm/tcm.h> + +static inline void destroy_context(struct mm_struct *mm) +{ + struct tcm_allocation *pos, *n; + + list_for_each_entry_safe(pos, n, &mm->context.tcm, list) { + tcm_free(pos->tag, pos->addr, pos->size); + list_del(&pos->list); + kfree(pos); + } +} +#else +#define destroy_context(mm) do { } while (0) +#endif + +#ifdef CONFIG_METAG_META21_MMU +static inline void load_pgd(pgd_t *pgd, int thread) +{ + unsigned long phys0 = mmu_phys0_addr(thread); + unsigned long phys1 = mmu_phys1_addr(thread); + + /* + * 0x900 2Gb address space + * The permission bits apply to MMU table region which gives a 2MB + * window into physical memory. We especially don't want userland to be + * able to access this. + */ + metag_out32(0x900 | _PAGE_CACHEABLE | _PAGE_PRIV | _PAGE_WRITE | + _PAGE_PRESENT, phys0); + /* Set new MMU base address */ + metag_out32(__pa(pgd) & MMCU_TBLPHYS1_ADDR_BITS, phys1); +} +#endif + +static inline void switch_mmu(struct mm_struct *prev, struct mm_struct *next) +{ +#ifdef CONFIG_METAG_META21_MMU + load_pgd(next->pgd, hard_processor_id()); +#else + unsigned int i; + + /* prev->context == prev->pgd in the case where we are initially + switching from the init task to the first process. */ + if (prev->context.pgd_base != (unsigned long) prev->pgd) { + for (i = FIRST_USER_PGD_NR; i < USER_PTRS_PER_PGD; i++) + ((pgd_t *) prev->context.pgd_base)[i] = prev->pgd[i]; + } else + prev->pgd = (pgd_t *)mmu_get_base(); + + next->pgd = prev->pgd; + prev->pgd = (pgd_t *) prev->context.pgd_base; + + for (i = FIRST_USER_PGD_NR; i < USER_PTRS_PER_PGD; i++) + next->pgd[i] = ((pgd_t *) next->context.pgd_base)[i]; + + flush_cache_all(); +#endif + flush_tlb_all(); +} + +static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, + struct task_struct *tsk) +{ + if (prev != next) + switch_mmu(prev, next); +} + +static inline void activate_mm(struct mm_struct *prev_mm, + struct mm_struct *next_mm) +{ + switch_mmu(prev_mm, next_mm); +} + +#define deactivate_mm(tsk, mm) do { } while (0) + +#endif diff --git a/arch/metag/include/asm/mmzone.h b/arch/metag/include/asm/mmzone.h new file mode 100644 index 000000000000..9c88a9c65f59 --- /dev/null +++ b/arch/metag/include/asm/mmzone.h @@ -0,0 +1,42 @@ +#ifndef __ASM_METAG_MMZONE_H +#define __ASM_METAG_MMZONE_H + +#ifdef CONFIG_NEED_MULTIPLE_NODES +#include <linux/numa.h> + +extern struct pglist_data *node_data[]; +#define NODE_DATA(nid) (node_data[nid]) + +static inline int pfn_to_nid(unsigned long pfn) +{ + int nid; + + for (nid = 0; nid < MAX_NUMNODES; nid++) + if (pfn >= node_start_pfn(nid) && pfn <= node_end_pfn(nid)) + break; + + return nid; +} + +static inline struct pglist_data *pfn_to_pgdat(unsigned long pfn) +{ + return NODE_DATA(pfn_to_nid(pfn)); +} + +/* arch/metag/mm/numa.c */ +void __init setup_bootmem_node(int nid, unsigned long start, unsigned long end); +#else +static inline void +setup_bootmem_node(int nid, unsigned long start, unsigned long end) +{ +} +#endif /* CONFIG_NEED_MULTIPLE_NODES */ + +#ifdef CONFIG_NUMA +/* SoC specific mem init */ +void __init soc_mem_setup(void); +#else +static inline void __init soc_mem_setup(void) {}; +#endif + +#endif /* __ASM_METAG_MMZONE_H */ diff --git a/arch/metag/include/asm/module.h b/arch/metag/include/asm/module.h new file mode 100644 index 000000000000..e47e60941b2b --- /dev/null +++ b/arch/metag/include/asm/module.h @@ -0,0 +1,37 @@ +#ifndef _ASM_METAG_MODULE_H +#define _ASM_METAG_MODULE_H + +#include <asm-generic/module.h> + +struct metag_plt_entry { + /* Indirect jump instruction sequence. */ + unsigned long tramp[2]; +}; + +struct mod_arch_specific { + /* Indices of PLT sections within module. */ + unsigned int core_plt_section, init_plt_section; +}; + +#if defined CONFIG_METAG_META12 +#define MODULE_PROC_FAMILY "META 1.2 " +#elif defined CONFIG_METAG_META21 +#define MODULE_PROC_FAMILY "META 2.1 " +#else +#define MODULE_PROC_FAMILY "" +#endif + +#ifdef CONFIG_4KSTACKS +#define MODULE_STACKSIZE "4KSTACKS " +#else +#define MODULE_STACKSIZE "" +#endif + +#define MODULE_ARCH_VERMAGIC MODULE_PROC_FAMILY MODULE_STACKSIZE + +#ifdef MODULE +asm(".section .plt,\"ax\",@progbits; .balign 8; .previous"); +asm(".section .init.plt,\"ax\",@progbits; .balign 8; .previous"); +#endif + +#endif /* _ASM_METAG_MODULE_H */ diff --git a/arch/metag/include/asm/page.h b/arch/metag/include/asm/page.h new file mode 100644 index 000000000000..1e8e281b8bb7 --- /dev/null +++ b/arch/metag/include/asm/page.h @@ -0,0 +1,128 @@ +#ifndef _METAG_PAGE_H +#define _METAG_PAGE_H + +#include <linux/const.h> + +#include <asm/metag_mem.h> + +/* PAGE_SHIFT determines the page size */ +#if defined(CONFIG_PAGE_SIZE_4K) +#define PAGE_SHIFT 12 +#elif defined(CONFIG_PAGE_SIZE_8K) +#define PAGE_SHIFT 13 +#elif defined(CONFIG_PAGE_SIZE_16K) +#define PAGE_SHIFT 14 +#endif + +#define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT) +#define PAGE_MASK (~(PAGE_SIZE-1)) + +#if defined(CONFIG_HUGETLB_PAGE_SIZE_8K) +# define HPAGE_SHIFT 13 +#elif defined(CONFIG_HUGETLB_PAGE_SIZE_16K) +# define HPAGE_SHIFT 14 +#elif defined(CONFIG_HUGETLB_PAGE_SIZE_32K) +# define HPAGE_SHIFT 15 +#elif defined(CONFIG_HUGETLB_PAGE_SIZE_64K) +# define HPAGE_SHIFT 16 +#elif defined(CONFIG_HUGETLB_PAGE_SIZE_128K) +# define HPAGE_SHIFT 17 +#elif defined(CONFIG_HUGETLB_PAGE_SIZE_256K) +# define HPAGE_SHIFT 18 +#elif defined(CONFIG_HUGETLB_PAGE_SIZE_512K) +# define HPAGE_SHIFT 19 +#elif defined(CONFIG_HUGETLB_PAGE_SIZE_1M) +# define HPAGE_SHIFT 20 +#elif defined(CONFIG_HUGETLB_PAGE_SIZE_2M) +# define HPAGE_SHIFT 21 +#elif defined(CONFIG_HUGETLB_PAGE_SIZE_4M) +# define HPAGE_SHIFT 22 +#endif + +#ifdef CONFIG_HUGETLB_PAGE +# define HPAGE_SIZE (1UL << HPAGE_SHIFT) +# define HPAGE_MASK (~(HPAGE_SIZE-1)) +# define HUGETLB_PAGE_ORDER (HPAGE_SHIFT-PAGE_SHIFT) +/* + * We define our own hugetlb_get_unmapped_area so we don't corrupt 2nd level + * page tables with normal pages in them. + */ +# define HUGEPT_SHIFT (22) +# define HUGEPT_ALIGN (1 << HUGEPT_SHIFT) +# define HUGEPT_MASK (HUGEPT_ALIGN - 1) +# define ALIGN_HUGEPT(x) ALIGN(x, HUGEPT_ALIGN) +# define HAVE_ARCH_HUGETLB_UNMAPPED_AREA +#endif + +#ifndef __ASSEMBLY__ + +/* On the Meta, we would like to know if the address (heap) we have is + * in local or global space. + */ +#define is_global_space(addr) ((addr) > 0x7fffffff) +#define is_local_space(addr) (!is_global_space(addr)) + +extern void clear_page(void *to); +extern void copy_page(void *to, void *from); + +#define clear_user_page(page, vaddr, pg) clear_page(page) +#define copy_user_page(to, from, vaddr, pg) copy_page(to, from) + +/* + * These are used to make use of C type-checking.. + */ +typedef struct { unsigned long pte; } pte_t; +typedef struct { unsigned long pgd; } pgd_t; +typedef struct { unsigned long pgprot; } pgprot_t; +typedef struct page *pgtable_t; + +#define pte_val(x) ((x).pte) +#define pgd_val(x) ((x).pgd) +#define pgprot_val(x) ((x).pgprot) + +#define __pte(x) ((pte_t) { (x) }) +#define __pgd(x) ((pgd_t) { (x) }) +#define __pgprot(x) ((pgprot_t) { (x) }) + +/* The kernel must now ALWAYS live at either 0xC0000000 or 0x40000000 - that + * being either global or local space. + */ +#define PAGE_OFFSET (CONFIG_PAGE_OFFSET) + +#if PAGE_OFFSET >= LINGLOBAL_BASE +#define META_MEMORY_BASE LINGLOBAL_BASE +#define META_MEMORY_LIMIT LINGLOBAL_LIMIT +#else +#define META_MEMORY_BASE LINLOCAL_BASE +#define META_MEMORY_LIMIT LINLOCAL_LIMIT +#endif + +/* Offset between physical and virtual mapping of kernel memory. */ +extern unsigned int meta_memoffset; + +#define __pa(x) ((unsigned long)(((unsigned long)(x)) - meta_memoffset)) +#define __va(x) ((void *)((unsigned long)(((unsigned long)(x)) + meta_memoffset))) + +extern unsigned long pfn_base; +#define ARCH_PFN_OFFSET (pfn_base) +#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT) +#define page_to_virt(page) __va(page_to_pfn(page) << PAGE_SHIFT) +#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) +#define page_to_phys(page) (page_to_pfn(page) << PAGE_SHIFT) +#ifdef CONFIG_FLATMEM +extern unsigned long max_pfn; +extern unsigned long min_low_pfn; +#define pfn_valid(pfn) ((pfn) >= min_low_pfn && (pfn) < max_pfn) +#endif + +#define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) + +#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ + VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) + +#include <asm-generic/memory_model.h> +#include <asm-generic/getorder.h> + +#endif /* __ASSMEBLY__ */ + +#endif /* _METAG_PAGE_H */ diff --git a/arch/metag/include/asm/perf_event.h b/arch/metag/include/asm/perf_event.h new file mode 100644 index 000000000000..105bbff0149f --- /dev/null +++ b/arch/metag/include/asm/perf_event.h @@ -0,0 +1,4 @@ +#ifndef __ASM_METAG_PERF_EVENT_H +#define __ASM_METAG_PERF_EVENT_H + +#endif /* __ASM_METAG_PERF_EVENT_H */ diff --git a/arch/metag/include/asm/pgalloc.h b/arch/metag/include/asm/pgalloc.h new file mode 100644 index 000000000000..275d9285141c --- /dev/null +++ b/arch/metag/include/asm/pgalloc.h @@ -0,0 +1,79 @@ +#ifndef _METAG_PGALLOC_H +#define _METAG_PGALLOC_H + +#include <linux/threads.h> +#include <linux/mm.h> + +#define pmd_populate_kernel(mm, pmd, pte) \ + set_pmd(pmd, __pmd(_PAGE_TABLE | __pa(pte))) + +#define pmd_populate(mm, pmd, pte) \ + set_pmd(pmd, __pmd(_PAGE_TABLE | page_to_phys(pte))) + +#define pmd_pgtable(pmd) pmd_page(pmd) + +/* + * Allocate and free page tables. + */ +#ifdef CONFIG_METAG_META21_MMU +static inline void pgd_ctor(pgd_t *pgd) +{ + memcpy(pgd + USER_PTRS_PER_PGD, + swapper_pg_dir + USER_PTRS_PER_PGD, + (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); +} +#else +#define pgd_ctor(x) do { } while (0) +#endif + +static inline pgd_t *pgd_alloc(struct mm_struct *mm) +{ + pgd_t *pgd = (pgd_t *)get_zeroed_page(GFP_KERNEL); + if (pgd) + pgd_ctor(pgd); + return pgd; +} + +static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) +{ + free_page((unsigned long)pgd); +} + +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, + unsigned long address) +{ + pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL | __GFP_REPEAT | + __GFP_ZERO); + return pte; +} + +static inline pgtable_t pte_alloc_one(struct mm_struct *mm, + unsigned long address) +{ + struct page *pte; + pte = alloc_pages(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO, 0); + if (pte) + pgtable_page_ctor(pte); + return pte; +} + +static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) +{ + free_page((unsigned long)pte); +} + +static inline void pte_free(struct mm_struct *mm, pgtable_t pte) +{ + pgtable_page_dtor(pte); + __free_page(pte); +} + +#define __pte_free_tlb(tlb, pte, addr) \ + do { \ + pgtable_page_dtor(pte); \ + tlb_remove_page((tlb), (pte)); \ + } while (0) + +#define check_pgt_cache() do { } while (0) + +#endif diff --git a/arch/metag/include/asm/pgtable.h b/arch/metag/include/asm/pgtable.h new file mode 100644 index 000000000000..1cd13d595198 --- /dev/null +++ b/arch/metag/include/asm/pgtable.h @@ -0,0 +1,370 @@ +/* + * Macros and functions to manipulate Meta page tables. + */ + +#ifndef _METAG_PGTABLE_H +#define _METAG_PGTABLE_H + +#include <asm-generic/pgtable-nopmd.h> + +/* Invalid regions on Meta: 0x00000000-0x001FFFFF and 0xFFFF0000-0xFFFFFFFF */ +#if PAGE_OFFSET >= LINGLOBAL_BASE +#define CONSISTENT_START 0xF7000000 +#define CONSISTENT_END 0xF73FFFFF +#define VMALLOC_START 0xF8000000 +#define VMALLOC_END 0xFFFEFFFF +#else +#define CONSISTENT_START 0x77000000 +#define CONSISTENT_END 0x773FFFFF +#define VMALLOC_START 0x78000000 +#define VMALLOC_END 0x7FFFFFFF +#endif + +/* + * Definitions for MMU descriptors + * + * These are the hardware bits in the MMCU pte entries. + * Derived from the Meta toolkit headers. + */ +#define _PAGE_PRESENT MMCU_ENTRY_VAL_BIT +#define _PAGE_WRITE MMCU_ENTRY_WR_BIT +#define _PAGE_PRIV MMCU_ENTRY_PRIV_BIT +/* Write combine bit - this can cause writes to occur out of order */ +#define _PAGE_WR_COMBINE MMCU_ENTRY_WRC_BIT +/* Sys coherent bit - this bit is never used by Linux */ +#define _PAGE_SYS_COHERENT MMCU_ENTRY_SYS_BIT +#define _PAGE_ALWAYS_ZERO_1 0x020 +#define _PAGE_CACHE_CTRL0 0x040 +#define _PAGE_CACHE_CTRL1 0x080 +#define _PAGE_ALWAYS_ZERO_2 0x100 +#define _PAGE_ALWAYS_ZERO_3 0x200 +#define _PAGE_ALWAYS_ZERO_4 0x400 +#define _PAGE_ALWAYS_ZERO_5 0x800 + +/* These are software bits that we stuff into the gaps in the hardware + * pte entries that are not used. Note, these DO get stored in the actual + * hardware, but the hardware just does not use them. + */ +#define _PAGE_ACCESSED _PAGE_ALWAYS_ZERO_1 +#define _PAGE_DIRTY _PAGE_ALWAYS_ZERO_2 +#define _PAGE_FILE _PAGE_ALWAYS_ZERO_3 + +/* Pages owned, and protected by, the kernel. */ +#define _PAGE_KERNEL _PAGE_PRIV + +/* No cacheing of this page */ +#define _PAGE_CACHE_WIN0 (MMCU_CWIN_UNCACHED << MMCU_ENTRY_CWIN_S) +/* burst cacheing - good for data streaming */ +#define _PAGE_CACHE_WIN1 (MMCU_CWIN_BURST << MMCU_ENTRY_CWIN_S) +/* One cache way per thread */ +#define _PAGE_CACHE_WIN2 (MMCU_CWIN_C1SET << MMCU_ENTRY_CWIN_S) +/* Full on cacheing */ +#define _PAGE_CACHE_WIN3 (MMCU_CWIN_CACHED << MMCU_ENTRY_CWIN_S) + +#define _PAGE_CACHEABLE (_PAGE_CACHE_WIN3 | _PAGE_WR_COMBINE) + +/* which bits are used for cache control ... */ +#define _PAGE_CACHE_MASK (_PAGE_CACHE_CTRL0 | _PAGE_CACHE_CTRL1 | \ + _PAGE_WR_COMBINE) + +/* This is a mask of the bits that pte_modify is allowed to change. */ +#define _PAGE_CHG_MASK (PAGE_MASK) + +#define _PAGE_SZ_SHIFT 1 +#define _PAGE_SZ_4K (0x0) +#define _PAGE_SZ_8K (0x1 << _PAGE_SZ_SHIFT) +#define _PAGE_SZ_16K (0x2 << _PAGE_SZ_SHIFT) +#define _PAGE_SZ_32K (0x3 << _PAGE_SZ_SHIFT) +#define _PAGE_SZ_64K (0x4 << _PAGE_SZ_SHIFT) +#define _PAGE_SZ_128K (0x5 << _PAGE_SZ_SHIFT) +#define _PAGE_SZ_256K (0x6 << _PAGE_SZ_SHIFT) +#define _PAGE_SZ_512K (0x7 << _PAGE_SZ_SHIFT) +#define _PAGE_SZ_1M (0x8 << _PAGE_SZ_SHIFT) +#define _PAGE_SZ_2M (0x9 << _PAGE_SZ_SHIFT) +#define _PAGE_SZ_4M (0xa << _PAGE_SZ_SHIFT) +#define _PAGE_SZ_MASK (0xf << _PAGE_SZ_SHIFT) + +#if defined(CONFIG_PAGE_SIZE_4K) +#define _PAGE_SZ (_PAGE_SZ_4K) +#elif defined(CONFIG_PAGE_SIZE_8K) +#define _PAGE_SZ (_PAGE_SZ_8K) +#elif defined(CONFIG_PAGE_SIZE_16K) +#define _PAGE_SZ (_PAGE_SZ_16K) +#endif +#define _PAGE_TABLE (_PAGE_SZ | _PAGE_PRESENT) + +#if defined(CONFIG_HUGETLB_PAGE_SIZE_8K) +# define _PAGE_SZHUGE (_PAGE_SZ_8K) +#elif defined(CONFIG_HUGETLB_PAGE_SIZE_16K) +# define _PAGE_SZHUGE (_PAGE_SZ_16K) +#elif defined(CONFIG_HUGETLB_PAGE_SIZE_32K) +# define _PAGE_SZHUGE (_PAGE_SZ_32K) +#elif defined(CONFIG_HUGETLB_PAGE_SIZE_64K) +# define _PAGE_SZHUGE (_PAGE_SZ_64K) +#elif defined(CONFIG_HUGETLB_PAGE_SIZE_128K) +# define _PAGE_SZHUGE (_PAGE_SZ_128K) +#elif defined(CONFIG_HUGETLB_PAGE_SIZE_256K) +# define _PAGE_SZHUGE (_PAGE_SZ_256K) +#elif defined(CONFIG_HUGETLB_PAGE_SIZE_512K) +# define _PAGE_SZHUGE (_PAGE_SZ_512K) +#elif defined(CONFIG_HUGETLB_PAGE_SIZE_1M) +# define _PAGE_SZHUGE (_PAGE_SZ_1M) +#elif defined(CONFIG_HUGETLB_PAGE_SIZE_2M) +# define _PAGE_SZHUGE (_PAGE_SZ_2M) +#elif defined(CONFIG_HUGETLB_PAGE_SIZE_4M) +# define _PAGE_SZHUGE (_PAGE_SZ_4M) +#endif + +/* + * The Linux memory management assumes a three-level page table setup. On + * Meta, we use that, but "fold" the mid level into the top-level page + * table. + */ + +/* PGDIR_SHIFT determines the size of the area a second-level page table can + * map. This is always 4MB. + */ + +#define PGDIR_SHIFT 22 +#define PGDIR_SIZE (1UL << PGDIR_SHIFT) +#define PGDIR_MASK (~(PGDIR_SIZE-1)) + +/* + * Entries per page directory level: we use a two-level, so + * we don't really have any PMD directory physically. First level tables + * always map 2Gb (local or global) at a granularity of 4MB, second-level + * tables map 4MB with a granularity between 4MB and 4kB (between 1 and + * 1024 entries). + */ +#define PTRS_PER_PTE (PGDIR_SIZE/PAGE_SIZE) +#define HPTRS_PER_PTE (PGDIR_SIZE/HPAGE_SIZE) +#define PTRS_PER_PGD 512 + +#define USER_PTRS_PER_PGD 256 +#define FIRST_USER_ADDRESS META_MEMORY_BASE +#define FIRST_USER_PGD_NR pgd_index(FIRST_USER_ADDRESS) + +#define PAGE_NONE __pgprot(_PAGE_PRESENT | _PAGE_ACCESSED | \ + _PAGE_CACHEABLE) + +#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_WRITE | \ + _PAGE_ACCESSED | _PAGE_CACHEABLE) +#define PAGE_SHARED_C PAGE_SHARED +#define PAGE_COPY __pgprot(_PAGE_PRESENT | _PAGE_ACCESSED | \ + _PAGE_CACHEABLE) +#define PAGE_COPY_C PAGE_COPY + +#define PAGE_READONLY __pgprot(_PAGE_PRESENT | _PAGE_ACCESSED | \ + _PAGE_CACHEABLE) +#define PAGE_KERNEL __pgprot(_PAGE_PRESENT | _PAGE_DIRTY | \ + _PAGE_ACCESSED | _PAGE_WRITE | \ + _PAGE_CACHEABLE | _PAGE_KERNEL) + +#define __P000 PAGE_NONE +#define __P001 PAGE_READONLY +#define __P010 PAGE_COPY +#define __P011 PAGE_COPY +#define __P100 PAGE_READONLY +#define __P101 PAGE_READONLY +#define __P110 PAGE_COPY_C +#define __P111 PAGE_COPY_C + +#define __S000 PAGE_NONE +#define __S001 PAGE_READONLY +#define __S010 PAGE_SHARED +#define __S011 PAGE_SHARED +#define __S100 PAGE_READONLY +#define __S101 PAGE_READONLY +#define __S110 PAGE_SHARED_C +#define __S111 PAGE_SHARED_C + +#ifndef __ASSEMBLY__ + +#include <asm/page.h> + +/* zero page used for uninitialized stuff */ +extern unsigned long empty_zero_page; +#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) + +/* Certain architectures need to do special things when pte's + * within a page table are directly modified. Thus, the following + * hook is made available. + */ +#define set_pte(pteptr, pteval) ((*(pteptr)) = (pteval)) +#define set_pte_at(mm, addr, ptep, pteval) set_pte(ptep, pteval) + +#define set_pmd(pmdptr, pmdval) (*(pmdptr) = pmdval) + +#define pte_pfn(pte) (pte_val(pte) >> PAGE_SHIFT) + +#define pfn_pte(pfn, prot) __pte(((pfn) << PAGE_SHIFT) | pgprot_val(prot)) + +#define pte_none(x) (!pte_val(x)) +#define pte_present(x) (pte_val(x) & _PAGE_PRESENT) +#define pte_clear(mm, addr, xp) do { pte_val(*(xp)) = 0; } while (0) + +#define pmd_none(x) (!pmd_val(x)) +#define pmd_bad(x) ((pmd_val(x) & ~(PAGE_MASK | _PAGE_SZ_MASK)) \ + != (_PAGE_TABLE & ~_PAGE_SZ_MASK)) +#define pmd_present(x) (pmd_val(x) & _PAGE_PRESENT) +#define pmd_clear(xp) do { pmd_val(*(xp)) = 0; } while (0) + +#define pte_page(x) pfn_to_page(pte_pfn(x)) + +/* + * The following only work if pte_present() is true. + * Undefined behaviour if not.. + */ + +static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_WRITE; } +static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } +static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } +static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; } +static inline int pte_special(pte_t pte) { return 0; } + +static inline pte_t pte_wrprotect(pte_t pte) { pte_val(pte) &= (~_PAGE_WRITE); return pte; } +static inline pte_t pte_mkclean(pte_t pte) { pte_val(pte) &= ~_PAGE_DIRTY; return pte; } +static inline pte_t pte_mkold(pte_t pte) { pte_val(pte) &= ~_PAGE_ACCESSED; return pte; } +static inline pte_t pte_mkwrite(pte_t pte) { pte_val(pte) |= _PAGE_WRITE; return pte; } +static inline pte_t pte_mkdirty(pte_t pte) { pte_val(pte) |= _PAGE_DIRTY; return pte; } +static inline pte_t pte_mkyoung(pte_t pte) { pte_val(pte) |= _PAGE_ACCESSED; return pte; } +static inline pte_t pte_mkspecial(pte_t pte) { return pte; } +static inline pte_t pte_mkhuge(pte_t pte) { return pte; } + +/* + * Macro and implementation to make a page protection as uncacheable. + */ +#define pgprot_writecombine(prot) \ + __pgprot(pgprot_val(prot) & ~(_PAGE_CACHE_CTRL1 | _PAGE_CACHE_CTRL0)) + +#define pgprot_noncached(prot) \ + __pgprot(pgprot_val(prot) & ~_PAGE_CACHEABLE) + + +/* + * Conversion functions: convert a page and protection to a page entry, + * and a page entry and page directory to the page they refer to. + */ + +#define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot)) + +static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) +{ + pte_val(pte) = (pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot); + return pte; +} + +static inline unsigned long pmd_page_vaddr(pmd_t pmd) +{ + unsigned long paddr = pmd_val(pmd) & PAGE_MASK; + if (!paddr) + return 0; + return (unsigned long)__va(paddr); +} + +#define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT)) +#define pmd_page_shift(pmd) (12 + ((pmd_val(pmd) & _PAGE_SZ_MASK) \ + >> _PAGE_SZ_SHIFT)) +#define pmd_num_ptrs(pmd) (PGDIR_SIZE >> pmd_page_shift(pmd)) + +/* + * Each pgd is only 2k, mapping 2Gb (local or global). If we're in global + * space drop the top bit before indexing the pgd. + */ +#if PAGE_OFFSET >= LINGLOBAL_BASE +#define pgd_index(address) ((((address) & ~0x80000000) >> PGDIR_SHIFT) \ + & (PTRS_PER_PGD-1)) +#else +#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1)) +#endif + +#define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address)) + +#define pgd_offset_k(address) pgd_offset(&init_mm, address) + +#define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1)) + +/* Find an entry in the second-level page table.. */ +#if !defined(CONFIG_HUGETLB_PAGE) + /* all pages are of size (1 << PAGE_SHIFT), so no need to read 1st level pt */ +# define pte_index(pmd, address) \ + (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) +#else + /* some pages are huge, so read 1st level pt to find out */ +# define pte_index(pmd, address) \ + (((address) >> pmd_page_shift(pmd)) & (pmd_num_ptrs(pmd) - 1)) +#endif +#define pte_offset_kernel(dir, address) \ + ((pte_t *) pmd_page_vaddr(*(dir)) + pte_index(*(dir), address)) +#define pte_offset_map(dir, address) pte_offset_kernel(dir, address) +#define pte_offset_map_nested(dir, address) pte_offset_kernel(dir, address) + +#define pte_unmap(pte) do { } while (0) +#define pte_unmap_nested(pte) do { } while (0) + +#define pte_ERROR(e) \ + pr_err("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e)) +#define pgd_ERROR(e) \ + pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) + +/* + * Meta doesn't have any external MMU info: the kernel page + * tables contain all the necessary information. + */ +static inline void update_mmu_cache(struct vm_area_struct *vma, + unsigned long address, pte_t *pte) +{ +} + +/* + * Encode and decode a swap entry (must be !pte_none(e) && !pte_present(e)) + * Since PAGE_PRESENT is bit 1, we can use the bits above that. + */ +#define __swp_type(x) (((x).val >> 1) & 0xff) +#define __swp_offset(x) ((x).val >> 10) +#define __swp_entry(type, offset) ((swp_entry_t) { ((type) << 1) | \ + ((offset) << 10) }) +#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) +#define __swp_entry_to_pte(x) ((pte_t) { (x).val }) + +#define PTE_FILE_MAX_BITS 22 +#define pte_to_pgoff(x) (pte_val(x) >> 10) +#define pgoff_to_pte(x) __pte(((x) << 10) | _PAGE_FILE) + +#define kern_addr_valid(addr) (1) + +#define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \ + remap_pfn_range(vma, vaddr, pfn, size, prot) + +/* + * No page table caches to initialise + */ +#define pgtable_cache_init() do { } while (0) + +extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; +void paging_init(unsigned long mem_end); + +#ifdef CONFIG_METAG_META12 +/* This is a workaround for an issue in Meta 1 cores. These cores cache + * invalid entries in the TLB so we always need to flush whenever we add + * a new pte. Unfortunately we can only flush the whole TLB not shoot down + * single entries so this is sub-optimal. This implementation ensures that + * we will get a flush at the second attempt, so we may still get repeated + * faults, we just don't overflow the kernel stack handling them. + */ +#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS +#define ptep_set_access_flags(__vma, __address, __ptep, __entry, __dirty) \ +({ \ + int __changed = !pte_same(*(__ptep), __entry); \ + if (__changed) { \ + set_pte_at((__vma)->vm_mm, (__address), __ptep, __entry); \ + } \ + flush_tlb_page(__vma, __address); \ + __changed; \ +}) +#endif + +#include <asm-generic/pgtable.h> + +#endif /* __ASSEMBLY__ */ +#endif /* _METAG_PGTABLE_H */ diff --git a/arch/metag/include/asm/processor.h b/arch/metag/include/asm/processor.h new file mode 100644 index 000000000000..9b029a7911c3 --- /dev/null +++ b/arch/metag/include/asm/processor.h @@ -0,0 +1,202 @@ +/* + * Copyright (C) 2005,2006,2007,2008 Imagination Technologies + */ + +#ifndef __ASM_METAG_PROCESSOR_H +#define __ASM_METAG_PROCESSOR_H + +#include <linux/atomic.h> + +#include <asm/page.h> +#include <asm/ptrace.h> +#include <asm/metag_regs.h> + +/* + * Default implementation of macro that returns current + * instruction pointer ("program counter"). + */ +#define current_text_addr() ({ __label__ _l; _l: &&_l; }) + +/* The task stops where the kernel starts */ +#define TASK_SIZE PAGE_OFFSET +/* Add an extra page of padding at the top of the stack for the guard page. */ +#define STACK_TOP (TASK_SIZE - PAGE_SIZE) +#define STACK_TOP_MAX STACK_TOP + +/* This decides where the kernel will search for a free chunk of vm + * space during mmap's. + */ +#define TASK_UNMAPPED_BASE META_MEMORY_BASE + +typedef struct { + unsigned long seg; +} mm_segment_t; + +#ifdef CONFIG_METAG_FPU +struct meta_fpu_context { + TBICTXEXTFPU fpstate; + union { + struct { + TBICTXEXTBB4 fx8_15; + TBICTXEXTFPACC fpacc; + } fx8_15; + struct { + TBICTXEXTFPACC fpacc; + TBICTXEXTBB4 unused; + } nofx8_15; + } extfpstate; + bool needs_restore; +}; +#else +struct meta_fpu_context {}; +#endif + +#ifdef CONFIG_METAG_DSP +struct meta_ext_context { + struct { + TBIEXTCTX ctx; + TBICTXEXTBB8 bb8; + TBIDUAL ax[TBICTXEXTAXX_BYTES / sizeof(TBIDUAL)]; + TBICTXEXTHL2 hl2; + TBICTXEXTTDPR ext; + TBICTXEXTRP6 rp; + } regs; + + /* DSPRAM A and B save areas. */ + void *ram[2]; + + /* ECH encoded size of DSPRAM save areas. */ + unsigned int ram_sz[2]; +}; +#else +struct meta_ext_context {}; +#endif + +struct thread_struct { + PTBICTX kernel_context; + /* A copy of the user process Sig.SaveMask. */ + unsigned int user_flags; + struct meta_fpu_context *fpu_context; + void __user *tls_ptr; + unsigned short int_depth; + unsigned short txdefr_failure; + struct meta_ext_context *dsp_context; +}; + +#define INIT_THREAD { \ + NULL, /* kernel_context */ \ + 0, /* user_flags */ \ + NULL, /* fpu_context */ \ + NULL, /* tls_ptr */ \ + 1, /* int_depth - we start in kernel */ \ + 0, /* txdefr_failure */ \ + NULL, /* dsp_context */ \ +} + +/* Needed to make #define as we are referencing 'current', that is not visible + * yet. + * + * Stack layout is as below. + + argc argument counter (integer) + argv[0] program name (pointer) + argv[1...N] program args (pointers) + argv[argc-1] end of args (integer) + NULL + env[0...N] environment variables (pointers) + NULL + + */ +#define start_thread(regs, pc, usp) do { \ + unsigned int *argc = (unsigned int *) bprm->exec; \ + set_fs(USER_DS); \ + current->thread.int_depth = 1; \ + /* Force this process down to user land */ \ + regs->ctx.SaveMask = TBICTX_PRIV_BIT; \ + regs->ctx.CurrPC = pc; \ + regs->ctx.AX[0].U0 = usp; \ + regs->ctx.DX[3].U1 = *((int *)argc); /* argc */ \ + regs->ctx.DX[3].U0 = (int)((int *)argc + 1); /* argv */ \ + regs->ctx.DX[2].U1 = (int)((int *)argc + \ + regs->ctx.DX[3].U1 + 2); /* envp */ \ + regs->ctx.DX[2].U0 = 0; /* rtld_fini */ \ +} while (0) + +/* Forward declaration, a strange C thing */ +struct task_struct; + +/* Free all resources held by a thread. */ +static inline void release_thread(struct task_struct *dead_task) +{ +} + +#define copy_segments(tsk, mm) do { } while (0) +#define release_segments(mm) do { } while (0) + +extern void exit_thread(void); + +/* + * Return saved PC of a blocked thread. + */ +#define thread_saved_pc(tsk) \ + ((unsigned long)(tsk)->thread.kernel_context->CurrPC) +#define thread_saved_sp(tsk) \ + ((unsigned long)(tsk)->thread.kernel_context->AX[0].U0) +#define thread_saved_fp(tsk) \ + ((unsigned long)(tsk)->thread.kernel_context->AX[1].U0) + +unsigned long get_wchan(struct task_struct *p); + +#define KSTK_EIP(tsk) ((tsk)->thread.kernel_context->CurrPC) +#define KSTK_ESP(tsk) ((tsk)->thread.kernel_context->AX[0].U0) + +#define user_stack_pointer(regs) ((regs)->ctx.AX[0].U0) + +#define cpu_relax() barrier() + +extern void setup_priv(void); + +static inline unsigned int hard_processor_id(void) +{ + unsigned int id; + + asm volatile ("MOV %0, TXENABLE\n" + "AND %0, %0, %1\n" + "LSR %0, %0, %2\n" + : "=&d" (id) + : "I" (TXENABLE_THREAD_BITS), + "K" (TXENABLE_THREAD_S) + ); + + return id; +} + +#define OP3_EXIT 0 + +#define HALT_OK 0 +#define HALT_PANIC -1 + +/* + * Halt (stop) the hardware thread. This instruction sequence is the + * standard way to cause a Meta hardware thread to exit. The exit code + * is pushed onto the stack which is interpreted by the debug adapter. + */ +static inline void hard_processor_halt(int exit_code) +{ + asm volatile ("MOV D1Ar1, %0\n" + "MOV D0Ar6, %1\n" + "MSETL [A0StP],D0Ar6,D0Ar4,D0Ar2\n" + "1:\n" + "SWITCH #0xC30006\n" + "B 1b\n" + : : "r" (exit_code), "K" (OP3_EXIT)); +} + +/* Set these hooks to call SoC specific code to restart/halt/power off. */ +extern void (*soc_restart)(char *cmd); +extern void (*soc_halt)(void); + +extern void show_trace(struct task_struct *tsk, unsigned long *sp, + struct pt_regs *regs); + +#endif diff --git a/arch/metag/include/asm/prom.h b/arch/metag/include/asm/prom.h new file mode 100644 index 000000000000..d2aa35d2228e --- /dev/null +++ b/arch/metag/include/asm/prom.h @@ -0,0 +1,23 @@ +/* + * arch/metag/include/asm/prom.h + * + * Copyright (C) 2012 Imagination Technologies Ltd. + * + * Based on ARM version: + * Copyright (C) 2009 Canonical Ltd. <jeremy.kerr@canonical.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ +#ifndef __ASM_METAG_PROM_H +#define __ASM_METAG_PROM_H + +#include <asm/setup.h> +#define HAVE_ARCH_DEVTREE_FIXUPS + +extern struct machine_desc *setup_machine_fdt(void *dt); +extern void copy_fdt(void); + +#endif /* __ASM_METAG_PROM_H */ diff --git a/arch/metag/include/asm/ptrace.h b/arch/metag/include/asm/ptrace.h new file mode 100644 index 000000000000..fcabc18daf25 --- /dev/null +++ b/arch/metag/include/asm/ptrace.h @@ -0,0 +1,60 @@ +#ifndef _METAG_PTRACE_H +#define _METAG_PTRACE_H + +#include <linux/compiler.h> +#include <uapi/asm/ptrace.h> +#include <asm/tbx.h> + +#ifndef __ASSEMBLY__ + +/* this struct defines the way the registers are stored on the + stack during a system call. */ + +struct pt_regs { + TBICTX ctx; + TBICTXEXTCB0 extcb0[5]; +}; + +#define user_mode(regs) (((regs)->ctx.SaveMask & TBICTX_PRIV_BIT) > 0) + +#define instruction_pointer(regs) ((unsigned long)(regs)->ctx.CurrPC) +#define profile_pc(regs) instruction_pointer(regs) + +#define task_pt_regs(task) \ + ((struct pt_regs *)(task_stack_page(task) + \ + sizeof(struct thread_info))) + +#define current_pt_regs() \ + ((struct pt_regs *)((char *)current_thread_info() + \ + sizeof(struct thread_info))) + +int syscall_trace_enter(struct pt_regs *regs); +void syscall_trace_leave(struct pt_regs *regs); + +/* copy a struct user_gp_regs out to user */ +int metag_gp_regs_copyout(const struct pt_regs *regs, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf); +/* copy a struct user_gp_regs in from user */ +int metag_gp_regs_copyin(struct pt_regs *regs, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf); +/* copy a struct user_cb_regs out to user */ +int metag_cb_regs_copyout(const struct pt_regs *regs, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf); +/* copy a struct user_cb_regs in from user */ +int metag_cb_regs_copyin(struct pt_regs *regs, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf); +/* copy a struct user_rp_state out to user */ +int metag_rp_state_copyout(const struct pt_regs *regs, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf); +/* copy a struct user_rp_state in from user */ +int metag_rp_state_copyin(struct pt_regs *regs, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf); + +#endif /* __ASSEMBLY__ */ +#endif /* _METAG_PTRACE_H */ diff --git a/arch/metag/include/asm/setup.h b/arch/metag/include/asm/setup.h new file mode 100644 index 000000000000..e13083b15dd0 --- /dev/null +++ b/arch/metag/include/asm/setup.h @@ -0,0 +1,8 @@ +#ifndef _ASM_METAG_SETUP_H +#define _ASM_METAG_SETUP_H + +#include <uapi/asm/setup.h> + +void per_cpu_trap_init(unsigned long); +extern void __init dump_machine_table(void); +#endif /* _ASM_METAG_SETUP_H */ diff --git a/arch/metag/include/asm/smp.h b/arch/metag/include/asm/smp.h new file mode 100644 index 000000000000..e0373f81a117 --- /dev/null +++ b/arch/metag/include/asm/smp.h @@ -0,0 +1,29 @@ +#ifndef __ASM_SMP_H +#define __ASM_SMP_H + +#include <linux/cpumask.h> + +#define raw_smp_processor_id() (current_thread_info()->cpu) + +enum ipi_msg_type { + IPI_CALL_FUNC, + IPI_CALL_FUNC_SINGLE, + IPI_RESCHEDULE, +}; + +extern void arch_send_call_function_single_ipi(int cpu); +extern void arch_send_call_function_ipi_mask(const struct cpumask *mask); +#define arch_send_call_function_ipi_mask arch_send_call_function_ipi_mask + +asmlinkage void secondary_start_kernel(void); + +extern void secondary_startup(void); + +#ifdef CONFIG_HOTPLUG_CPU +extern void __cpu_die(unsigned int cpu); +extern int __cpu_disable(void); +extern void cpu_die(void); +#endif + +extern void smp_init_cpus(void); +#endif /* __ASM_SMP_H */ diff --git a/arch/metag/include/asm/sparsemem.h b/arch/metag/include/asm/sparsemem.h new file mode 100644 index 000000000000..03fe255d697a --- /dev/null +++ b/arch/metag/include/asm/sparsemem.h @@ -0,0 +1,13 @@ +#ifndef __ASM_METAG_SPARSEMEM_H +#define __ASM_METAG_SPARSEMEM_H + +/* + * SECTION_SIZE_BITS 2^N: how big each section will be + * MAX_PHYSADDR_BITS 2^N: how much physical address space we have + * MAX_PHYSMEM_BITS 2^N: how much memory we can have in that space + */ +#define SECTION_SIZE_BITS 26 +#define MAX_PHYSADDR_BITS 32 +#define MAX_PHYSMEM_BITS 32 + +#endif /* __ASM_METAG_SPARSEMEM_H */ diff --git a/arch/metag/include/asm/spinlock.h b/arch/metag/include/asm/spinlock.h new file mode 100644 index 000000000000..86a7cf3d1386 --- /dev/null +++ b/arch/metag/include/asm/spinlock.h @@ -0,0 +1,22 @@ +#ifndef __ASM_SPINLOCK_H +#define __ASM_SPINLOCK_H + +#ifdef CONFIG_METAG_ATOMICITY_LOCK1 +#include <asm/spinlock_lock1.h> +#else +#include <asm/spinlock_lnkget.h> +#endif + +#define arch_spin_unlock_wait(lock) \ + do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0) + +#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) + +#define arch_read_lock_flags(lock, flags) arch_read_lock(lock) +#define arch_write_lock_flags(lock, flags) arch_write_lock(lock) + +#define arch_spin_relax(lock) cpu_relax() +#define arch_read_relax(lock) cpu_relax() +#define arch_write_relax(lock) cpu_relax() + +#endif /* __ASM_SPINLOCK_H */ diff --git a/arch/metag/include/asm/spinlock_lnkget.h b/arch/metag/include/asm/spinlock_lnkget.h new file mode 100644 index 000000000000..ad8436feed8d --- /dev/null +++ b/arch/metag/include/asm/spinlock_lnkget.h @@ -0,0 +1,249 @@ +#ifndef __ASM_SPINLOCK_LNKGET_H +#define __ASM_SPINLOCK_LNKGET_H + +/* + * None of these asm statements clobber memory as LNKSET writes around + * the cache so the memory it modifies cannot safely be read by any means + * other than these accessors. + */ + +static inline int arch_spin_is_locked(arch_spinlock_t *lock) +{ + int ret; + + asm volatile ("LNKGETD %0, [%1]\n" + "TST %0, #1\n" + "MOV %0, #1\n" + "XORZ %0, %0, %0\n" + : "=&d" (ret) + : "da" (&lock->lock) + : "cc"); + return ret; +} + +static inline void arch_spin_lock(arch_spinlock_t *lock) +{ + int tmp; + + asm volatile ("1: LNKGETD %0,[%1]\n" + " TST %0, #1\n" + " ADD %0, %0, #1\n" + " LNKSETDZ [%1], %0\n" + " BNZ 1b\n" + " DEFR %0, TXSTAT\n" + " ANDT %0, %0, #HI(0x3f000000)\n" + " CMPT %0, #HI(0x02000000)\n" + " BNZ 1b\n" + : "=&d" (tmp) + : "da" (&lock->lock) + : "cc"); + + smp_mb(); +} + +/* Returns 0 if failed to acquire lock */ +static inline int arch_spin_trylock(arch_spinlock_t *lock) +{ + int tmp; + + asm volatile (" LNKGETD %0,[%1]\n" + " TST %0, #1\n" + " ADD %0, %0, #1\n" + " LNKSETDZ [%1], %0\n" + " BNZ 1f\n" + " DEFR %0, TXSTAT\n" + " ANDT %0, %0, #HI(0x3f000000)\n" + " CMPT %0, #HI(0x02000000)\n" + " MOV %0, #1\n" + "1: XORNZ %0, %0, %0\n" + : "=&d" (tmp) + : "da" (&lock->lock) + : "cc"); + + smp_mb(); + + return tmp; +} + +static inline void arch_spin_unlock(arch_spinlock_t *lock) +{ + smp_mb(); + + asm volatile (" SETD [%0], %1\n" + : + : "da" (&lock->lock), "da" (0) + : "memory"); +} + +/* + * RWLOCKS + * + * + * Write locks are easy - we just set bit 31. When unlocking, we can + * just write zero since the lock is exclusively held. + */ + +static inline void arch_write_lock(arch_rwlock_t *rw) +{ + int tmp; + + asm volatile ("1: LNKGETD %0,[%1]\n" + " CMP %0, #0\n" + " ADD %0, %0, %2\n" + " LNKSETDZ [%1], %0\n" + " BNZ 1b\n" + " DEFR %0, TXSTAT\n" + " ANDT %0, %0, #HI(0x3f000000)\n" + " CMPT %0, #HI(0x02000000)\n" + " BNZ 1b\n" + : "=&d" (tmp) + : "da" (&rw->lock), "bd" (0x80000000) + : "cc"); + + smp_mb(); +} + +static inline int arch_write_trylock(arch_rwlock_t *rw) +{ + int tmp; + + asm volatile (" LNKGETD %0,[%1]\n" + " CMP %0, #0\n" + " ADD %0, %0, %2\n" + " LNKSETDZ [%1], %0\n" + " BNZ 1f\n" + " DEFR %0, TXSTAT\n" + " ANDT %0, %0, #HI(0x3f000000)\n" + " CMPT %0, #HI(0x02000000)\n" + " MOV %0,#1\n" + "1: XORNZ %0, %0, %0\n" + : "=&d" (tmp) + : "da" (&rw->lock), "bd" (0x80000000) + : "cc"); + + smp_mb(); + + return tmp; +} + +static inline void arch_write_unlock(arch_rwlock_t *rw) +{ + smp_mb(); + + asm volatile (" SETD [%0], %1\n" + : + : "da" (&rw->lock), "da" (0) + : "memory"); +} + +/* write_can_lock - would write_trylock() succeed? */ +static inline int arch_write_can_lock(arch_rwlock_t *rw) +{ + int ret; + + asm volatile ("LNKGETD %0, [%1]\n" + "CMP %0, #0\n" + "MOV %0, #1\n" + "XORNZ %0, %0, %0\n" + : "=&d" (ret) + : "da" (&rw->lock) + : "cc"); + return ret; +} + +/* + * Read locks are a bit more hairy: + * - Exclusively load the lock value. + * - Increment it. + * - Store new lock value if positive, and we still own this location. + * If the value is negative, we've already failed. + * - If we failed to store the value, we want a negative result. + * - If we failed, try again. + * Unlocking is similarly hairy. We may have multiple read locks + * currently active. However, we know we won't have any write + * locks. + */ +static inline void arch_read_lock(arch_rwlock_t *rw) +{ + int tmp; + + asm volatile ("1: LNKGETD %0,[%1]\n" + " ADDS %0, %0, #1\n" + " LNKSETDPL [%1], %0\n" + " BMI 1b\n" + " DEFR %0, TXSTAT\n" + " ANDT %0, %0, #HI(0x3f000000)\n" + " CMPT %0, #HI(0x02000000)\n" + " BNZ 1b\n" + : "=&d" (tmp) + : "da" (&rw->lock) + : "cc"); + + smp_mb(); +} + +static inline void arch_read_unlock(arch_rwlock_t *rw) +{ + int tmp; + + smp_mb(); + + asm volatile ("1: LNKGETD %0,[%1]\n" + " SUB %0, %0, #1\n" + " LNKSETD [%1], %0\n" + " DEFR %0, TXSTAT\n" + " ANDT %0, %0, #HI(0x3f000000)\n" + " CMPT %0, #HI(0x02000000)\n" + " BNZ 1b\n" + : "=&d" (tmp) + : "da" (&rw->lock) + : "cc", "memory"); +} + +static inline int arch_read_trylock(arch_rwlock_t *rw) +{ + int tmp; + + asm volatile (" LNKGETD %0,[%1]\n" + " ADDS %0, %0, #1\n" + " LNKSETDPL [%1], %0\n" + " BMI 1f\n" + " DEFR %0, TXSTAT\n" + " ANDT %0, %0, #HI(0x3f000000)\n" + " CMPT %0, #HI(0x02000000)\n" + " MOV %0,#1\n" + " BZ 2f\n" + "1: MOV %0,#0\n" + "2:\n" + : "=&d" (tmp) + : "da" (&rw->lock) + : "cc"); + + smp_mb(); + + return tmp; +} + +/* read_can_lock - would read_trylock() succeed? */ +static inline int arch_read_can_lock(arch_rwlock_t *rw) +{ + int tmp; + + asm volatile ("LNKGETD %0, [%1]\n" + "CMP %0, %2\n" + "MOV %0, #1\n" + "XORZ %0, %0, %0\n" + : "=&d" (tmp) + : "da" (&rw->lock), "bd" (0x80000000) + : "cc"); + return tmp; +} + +#define arch_read_lock_flags(lock, flags) arch_read_lock(lock) +#define arch_write_lock_flags(lock, flags) arch_write_lock(lock) + +#define arch_spin_relax(lock) cpu_relax() +#define arch_read_relax(lock) cpu_relax() +#define arch_write_relax(lock) cpu_relax() + +#endif /* __ASM_SPINLOCK_LNKGET_H */ diff --git a/arch/metag/include/asm/spinlock_lock1.h b/arch/metag/include/asm/spinlock_lock1.h new file mode 100644 index 000000000000..c630444cffe9 --- /dev/null +++ b/arch/metag/include/asm/spinlock_lock1.h @@ -0,0 +1,184 @@ +#ifndef __ASM_SPINLOCK_LOCK1_H +#define __ASM_SPINLOCK_LOCK1_H + +#include <asm/bug.h> +#include <asm/global_lock.h> + +static inline int arch_spin_is_locked(arch_spinlock_t *lock) +{ + int ret; + + barrier(); + ret = lock->lock; + WARN_ON(ret != 0 && ret != 1); + return ret; +} + +static inline void arch_spin_lock(arch_spinlock_t *lock) +{ + unsigned int we_won = 0; + unsigned long flags; + +again: + __global_lock1(flags); + if (lock->lock == 0) { + fence(); + lock->lock = 1; + we_won = 1; + } + __global_unlock1(flags); + if (we_won == 0) + goto again; + WARN_ON(lock->lock != 1); +} + +/* Returns 0 if failed to acquire lock */ +static inline int arch_spin_trylock(arch_spinlock_t *lock) +{ + unsigned long flags; + unsigned int ret; + + __global_lock1(flags); + ret = lock->lock; + if (ret == 0) { + fence(); + lock->lock = 1; + } + __global_unlock1(flags); + return (ret == 0); +} + +static inline void arch_spin_unlock(arch_spinlock_t *lock) +{ + barrier(); + WARN_ON(!lock->lock); + lock->lock = 0; +} + +/* + * RWLOCKS + * + * + * Write locks are easy - we just set bit 31. When unlocking, we can + * just write zero since the lock is exclusively held. + */ + +static inline void arch_write_lock(arch_rwlock_t *rw) +{ + unsigned long flags; + unsigned int we_won = 0; + +again: + __global_lock1(flags); + if (rw->lock == 0) { + fence(); + rw->lock = 0x80000000; + we_won = 1; + } + __global_unlock1(flags); + if (we_won == 0) + goto again; + WARN_ON(rw->lock != 0x80000000); +} + +static inline int arch_write_trylock(arch_rwlock_t *rw) +{ + unsigned long flags; + unsigned int ret; + + __global_lock1(flags); + ret = rw->lock; + if (ret == 0) { + fence(); + rw->lock = 0x80000000; + } + __global_unlock1(flags); + + return (ret == 0); +} + +static inline void arch_write_unlock(arch_rwlock_t *rw) +{ + barrier(); + WARN_ON(rw->lock != 0x80000000); + rw->lock = 0; +} + +/* write_can_lock - would write_trylock() succeed? */ +static inline int arch_write_can_lock(arch_rwlock_t *rw) +{ + unsigned int ret; + + barrier(); + ret = rw->lock; + return (ret == 0); +} + +/* + * Read locks are a bit more hairy: + * - Exclusively load the lock value. + * - Increment it. + * - Store new lock value if positive, and we still own this location. + * If the value is negative, we've already failed. + * - If we failed to store the value, we want a negative result. + * - If we failed, try again. + * Unlocking is similarly hairy. We may have multiple read locks + * currently active. However, we know we won't have any write + * locks. + */ +static inline void arch_read_lock(arch_rwlock_t *rw) +{ + unsigned long flags; + unsigned int we_won = 0, ret; + +again: + __global_lock1(flags); + ret = rw->lock; + if (ret < 0x80000000) { + fence(); + rw->lock = ret + 1; + we_won = 1; + } + __global_unlock1(flags); + if (!we_won) + goto again; +} + +static inline void arch_read_unlock(arch_rwlock_t *rw) +{ + unsigned long flags; + unsigned int ret; + + __global_lock1(flags); + fence(); + ret = rw->lock--; + __global_unlock1(flags); + WARN_ON(ret == 0); +} + +static inline int arch_read_trylock(arch_rwlock_t *rw) +{ + unsigned long flags; + unsigned int ret; + + __global_lock1(flags); + ret = rw->lock; + if (ret < 0x80000000) { + fence(); + rw->lock = ret + 1; + } + __global_unlock1(flags); + return (ret < 0x80000000); +} + +/* read_can_lock - would read_trylock() succeed? */ +static inline int arch_read_can_lock(arch_rwlock_t *rw) +{ + unsigned int ret; + + barrier(); + ret = rw->lock; + return (ret < 0x80000000); +} + +#endif /* __ASM_SPINLOCK_LOCK1_H */ diff --git a/arch/metag/include/asm/spinlock_types.h b/arch/metag/include/asm/spinlock_types.h new file mode 100644 index 000000000000..b76391405fea --- /dev/null +++ b/arch/metag/include/asm/spinlock_types.h @@ -0,0 +1,20 @@ +#ifndef _ASM_METAG_SPINLOCK_TYPES_H +#define _ASM_METAG_SPINLOCK_TYPES_H + +#ifndef __LINUX_SPINLOCK_TYPES_H +# error "please don't include this file directly" +#endif + +typedef struct { + volatile unsigned int lock; +} arch_spinlock_t; + +#define __ARCH_SPIN_LOCK_UNLOCKED { 0 } + +typedef struct { + volatile unsigned int lock; +} arch_rwlock_t; + +#define __ARCH_RW_LOCK_UNLOCKED { 0 } + +#endif /* _ASM_METAG_SPINLOCK_TYPES_H */ diff --git a/arch/metag/include/asm/stacktrace.h b/arch/metag/include/asm/stacktrace.h new file mode 100644 index 000000000000..2830a0fe7ac9 --- /dev/null +++ b/arch/metag/include/asm/stacktrace.h @@ -0,0 +1,20 @@ +#ifndef __ASM_STACKTRACE_H +#define __ASM_STACKTRACE_H + +struct stackframe { + unsigned long fp; + unsigned long sp; + unsigned long lr; + unsigned long pc; +}; + +struct metag_frame { + unsigned long fp; + unsigned long lr; +}; + +extern int unwind_frame(struct stackframe *frame); +extern void walk_stackframe(struct stackframe *frame, + int (*fn)(struct stackframe *, void *), void *data); + +#endif /* __ASM_STACKTRACE_H */ diff --git a/arch/metag/include/asm/string.h b/arch/metag/include/asm/string.h new file mode 100644 index 000000000000..53e3806eee04 --- /dev/null +++ b/arch/metag/include/asm/string.h @@ -0,0 +1,13 @@ +#ifndef _METAG_STRING_H_ +#define _METAG_STRING_H_ + +#define __HAVE_ARCH_MEMSET +extern void *memset(void *__s, int __c, size_t __count); + +#define __HAVE_ARCH_MEMCPY +void *memcpy(void *__to, __const__ void *__from, size_t __n); + +#define __HAVE_ARCH_MEMMOVE +extern void *memmove(void *__dest, __const__ void *__src, size_t __n); + +#endif /* _METAG_STRING_H_ */ diff --git a/arch/metag/include/asm/switch.h b/arch/metag/include/asm/switch.h new file mode 100644 index 000000000000..1fd6a587c844 --- /dev/null +++ b/arch/metag/include/asm/switch.h @@ -0,0 +1,21 @@ +/* + * Copyright (C) 2012 Imagination Technologies Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef _ASM_METAG_SWITCH_H +#define _ASM_METAG_SWITCH_H + +/* metag SWITCH codes */ +#define __METAG_SW_PERM_BREAK 0x400002 /* compiled in breakpoint */ +#define __METAG_SW_SYS_LEGACY 0x440000 /* legacy system calls */ +#define __METAG_SW_SYS 0x440001 /* system calls */ + +/* metag SWITCH instruction encoding */ +#define __METAG_SW_ENCODING(TYPE) (0xaf000000 | (__METAG_SW_##TYPE)) + +#endif /* _ASM_METAG_SWITCH_H */ diff --git a/arch/metag/include/asm/syscall.h b/arch/metag/include/asm/syscall.h new file mode 100644 index 000000000000..24fc97939f77 --- /dev/null +++ b/arch/metag/include/asm/syscall.h @@ -0,0 +1,104 @@ +/* + * Access to user system call parameters and results + * + * Copyright (C) 2008 Imagination Technologies Ltd. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License v.2. + * + * See asm-generic/syscall.h for descriptions of what we must do here. + */ + +#ifndef _ASM_METAG_SYSCALL_H +#define _ASM_METAG_SYSCALL_H + +#include <linux/sched.h> +#include <linux/err.h> +#include <linux/uaccess.h> + +#include <asm/switch.h> + +static inline long syscall_get_nr(struct task_struct *task, + struct pt_regs *regs) +{ + unsigned long insn; + + /* + * FIXME there's no way to find out how we got here other than to + * examine the memory at the PC to see if it is a syscall + * SWITCH instruction. + */ + if (get_user(insn, (unsigned long *)(regs->ctx.CurrPC - 4))) + return -1; + + if (insn == __METAG_SW_ENCODING(SYS)) + return regs->ctx.DX[0].U1; + else + return -1L; +} + +static inline void syscall_rollback(struct task_struct *task, + struct pt_regs *regs) +{ + /* do nothing */ +} + +static inline long syscall_get_error(struct task_struct *task, + struct pt_regs *regs) +{ + unsigned long error = regs->ctx.DX[0].U0; + return IS_ERR_VALUE(error) ? error : 0; +} + +static inline long syscall_get_return_value(struct task_struct *task, + struct pt_regs *regs) +{ + return regs->ctx.DX[0].U0; +} + +static inline void syscall_set_return_value(struct task_struct *task, + struct pt_regs *regs, + int error, long val) +{ + regs->ctx.DX[0].U0 = (long) error ?: val; +} + +static inline void syscall_get_arguments(struct task_struct *task, + struct pt_regs *regs, + unsigned int i, unsigned int n, + unsigned long *args) +{ + unsigned int reg, j; + BUG_ON(i + n > 6); + + for (j = i, reg = 6 - i; j < (i + n); j++, reg--) { + if (reg % 2) + args[j] = regs->ctx.DX[(reg + 1) / 2].U0; + else + args[j] = regs->ctx.DX[reg / 2].U1; + } +} + +static inline void syscall_set_arguments(struct task_struct *task, + struct pt_regs *regs, + unsigned int i, unsigned int n, + const unsigned long *args) +{ + unsigned int reg; + BUG_ON(i + n > 6); + + for (reg = 6 - i; i < (i + n); i++, reg--) { + if (reg % 2) + regs->ctx.DX[(reg + 1) / 2].U0 = args[i]; + else + regs->ctx.DX[reg / 2].U1 = args[i]; + } +} + +#define NR_syscalls __NR_syscalls + +/* generic syscall table */ +extern const void *sys_call_table[]; + +#endif /* _ASM_METAG_SYSCALL_H */ diff --git a/arch/metag/include/asm/syscalls.h b/arch/metag/include/asm/syscalls.h new file mode 100644 index 000000000000..a02b95556522 --- /dev/null +++ b/arch/metag/include/asm/syscalls.h @@ -0,0 +1,39 @@ +#ifndef _ASM_METAG_SYSCALLS_H +#define _ASM_METAG_SYSCALLS_H + +#include <linux/compiler.h> +#include <linux/linkage.h> +#include <linux/types.h> +#include <linux/signal.h> + +/* kernel/signal.c */ +#define sys_rt_sigreturn sys_rt_sigreturn +asmlinkage long sys_rt_sigreturn(void); + +#include <asm-generic/syscalls.h> + +/* kernel/sys_metag.c */ +asmlinkage int sys_metag_setglobalbit(char __user *, int); +asmlinkage void sys_metag_set_fpu_flags(unsigned int); +asmlinkage int sys_metag_set_tls(void __user *); +asmlinkage void *sys_metag_get_tls(void); + +asmlinkage long sys_truncate64_metag(const char __user *, unsigned long, + unsigned long); +asmlinkage long sys_ftruncate64_metag(unsigned int, unsigned long, + unsigned long); +asmlinkage long sys_fadvise64_64_metag(int, unsigned long, unsigned long, + unsigned long, unsigned long, int); +asmlinkage long sys_readahead_metag(int, unsigned long, unsigned long, size_t); +asmlinkage ssize_t sys_pread64_metag(unsigned long, char __user *, size_t, + unsigned long, unsigned long); +asmlinkage ssize_t sys_pwrite64_metag(unsigned long, char __user *, size_t, + unsigned long, unsigned long); +asmlinkage long sys_sync_file_range_metag(int, unsigned long, unsigned long, + unsigned long, unsigned long, + unsigned int); + +int do_work_pending(struct pt_regs *regs, unsigned int thread_flags, + int syscall); + +#endif /* _ASM_METAG_SYSCALLS_H */ diff --git a/arch/metag/include/asm/tbx.h b/arch/metag/include/asm/tbx.h new file mode 100644 index 000000000000..287b36ff8ad1 --- /dev/null +++ b/arch/metag/include/asm/tbx.h @@ -0,0 +1,1425 @@ +/* + * asm/tbx.h + * + * Copyright (C) 2000-2012 Imagination Technologies. + * + * This program is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License version 2 as published by the + * Free Software Foundation. + * + * Thread binary interface header + */ + +#ifndef _ASM_METAG_TBX_H_ +#define _ASM_METAG_TBX_H_ + +/* for CACHEW_* values */ +#include <asm/metag_isa.h> +/* for LINSYSEVENT_* addresses */ +#include <asm/metag_mem.h> + +#ifdef TBI_1_4 +#ifndef TBI_MUTEXES_1_4 +#define TBI_MUTEXES_1_4 +#endif +#ifndef TBI_SEMAPHORES_1_4 +#define TBI_SEMAPHORES_1_4 +#endif +#ifndef TBI_ASYNC_SWITCH_1_4 +#define TBI_ASYNC_SWITCH_1_4 +#endif +#ifndef TBI_FASTINT_1_4 +#define TBI_FASTINT_1_4 +#endif +#endif + + +/* Id values in the TBI system describe a segment using an arbitrary + integer value and flags in the bottom 8 bits, the SIGPOLL value is + used in cases where control over blocking or polling behaviour is + needed. */ +#define TBID_SIGPOLL_BIT 0x02 /* Set bit in an Id value to poll vs block */ +/* Extended segment identifiers use strings in the string table */ +#define TBID_IS_SEGSTR( Id ) (((Id) & (TBID_SEGTYPE_BITS>>1)) == 0) + +/* Segment identifiers contain the following related bit-fields */ +#define TBID_SEGTYPE_BITS 0x0F /* One of the predefined segment types */ +#define TBID_SEGTYPE_S 0 +#define TBID_SEGSCOPE_BITS 0x30 /* Indicates the scope of the segment */ +#define TBID_SEGSCOPE_S 4 +#define TBID_SEGGADDR_BITS 0xC0 /* Indicates access possible via pGAddr */ +#define TBID_SEGGADDR_S 6 + +/* Segments of memory can only really contain a few types of data */ +#define TBID_SEGTYPE_TEXT 0x02 /* Code segment */ +#define TBID_SEGTYPE_DATA 0x04 /* Data segment */ +#define TBID_SEGTYPE_STACK 0x06 /* Stack segment */ +#define TBID_SEGTYPE_HEAP 0x0A /* Heap segment */ +#define TBID_SEGTYPE_ROOT 0x0C /* Root block segments */ +#define TBID_SEGTYPE_STRING 0x0E /* String table segment */ + +/* Segments have one of three possible scopes */ +#define TBID_SEGSCOPE_INIT 0 /* Temporary area for initialisation phase */ +#define TBID_SEGSCOPE_LOCAL 1 /* Private to this thread */ +#define TBID_SEGSCOPE_GLOBAL 2 /* Shared globally throughout the system */ +#define TBID_SEGSCOPE_SHARED 3 /* Limited sharing between local/global */ + +/* For segment specifier a further field in two of the remaining bits + indicates the usefulness of the pGAddr field in the segment descriptor + descriptor. */ +#define TBID_SEGGADDR_NULL 0 /* pGAddr is NULL -> SEGSCOPE_(LOCAL|INIT) */ +#define TBID_SEGGADDR_READ 1 /* Only read via pGAddr */ +#define TBID_SEGGADDR_WRITE 2 /* Full access via pGAddr */ +#define TBID_SEGGADDR_EXEC 3 /* Only execute via pGAddr */ + +/* The following values are common to both segment and signal Id value and + live in the top 8 bits of the Id values. */ + +/* The ISTAT bit indicates if segments are related to interrupt vs + background level interfaces a thread can still handle all triggers at + either level, but can also split these up if it wants to. */ +#define TBID_ISTAT_BIT 0x01000000 +#define TBID_ISTAT_S 24 + +/* Privilege needed to access a segment is indicated by the next bit. + + This bit is set to mirror the current privilege level when starting a + search for a segment - setting it yourself toggles the automatically + generated state which is only useful to emulate unprivileged behaviour + or access unprivileged areas of memory while at privileged level. */ +#define TBID_PSTAT_BIT 0x02000000 +#define TBID_PSTAT_S 25 + +/* The top six bits of a signal/segment specifier identifies a thread within + the system. This represents a segments owner. */ +#define TBID_THREAD_BITS 0xFC000000 +#define TBID_THREAD_S 26 + +/* Special thread id values */ +#define TBID_THREAD_NULL (-32) /* Never matches any thread/segment id used */ +#define TBID_THREAD_GLOBAL (-31) /* Things global to all threads */ +#define TBID_THREAD_HOST ( -1) /* Host interface */ +#define TBID_THREAD_EXTIO (TBID_THREAD_HOST) /* Host based ExtIO i/f */ + +/* Virtual Id's are used for external thread interface structures or the + above special Id's */ +#define TBID_IS_VIRTTHREAD( Id ) ((Id) < 0) + +/* Real Id's are used for actual hardware threads that are local */ +#define TBID_IS_REALTHREAD( Id ) ((Id) >= 0) + +/* Generate a segment Id given Thread, Scope, and Type */ +#define TBID_SEG( Thread, Scope, Type ) (\ + ((Thread)<<TBID_THREAD_S) + ((Scope)<<TBID_SEGSCOPE_S) + (Type)) + +/* Generate a signal Id given Thread and SigNum */ +#define TBID_SIG( Thread, SigNum ) (\ + ((Thread)<<TBID_THREAD_S) + ((SigNum)<<TBID_SIGNUM_S) + TBID_SIGNAL_BIT) + +/* Generate an Id that solely represents a thread - useful for cache ops */ +#define TBID_THD( Thread ) ((Thread)<<TBID_THREAD_S) +#define TBID_THD_NULL ((TBID_THREAD_NULL) <<TBID_THREAD_S) +#define TBID_THD_GLOBAL ((TBID_THREAD_GLOBAL)<<TBID_THREAD_S) + +/* Common exception handler (see TBID_SIGNUM_XXF below) receives hardware + generated fault codes TBIXXF_SIGNUM_xxF in it's SigNum parameter */ +#define TBIXXF_SIGNUM_IIF 0x01 /* General instruction fault */ +#define TBIXXF_SIGNUM_PGF 0x02 /* Privilege general fault */ +#define TBIXXF_SIGNUM_DHF 0x03 /* Data access watchpoint HIT */ +#define TBIXXF_SIGNUM_IGF 0x05 /* Code fetch general read failure */ +#define TBIXXF_SIGNUM_DGF 0x07 /* Data access general read/write fault */ +#define TBIXXF_SIGNUM_IPF 0x09 /* Code fetch page fault */ +#define TBIXXF_SIGNUM_DPF 0x0B /* Data access page fault */ +#define TBIXXF_SIGNUM_IHF 0x0D /* Instruction breakpoint HIT */ +#define TBIXXF_SIGNUM_DWF 0x0F /* Data access read-only fault */ + +/* Hardware signals communicate events between processing levels within a + single thread all the _xxF cases are exceptions and are routed via a + common exception handler, _SWx are software trap events and kicks including + __TBISignal generated kicks, and finally _TRx are hardware triggers */ +#define TBID_SIGNUM_SW0 0x00 /* SWITCH GROUP 0 - Per thread user */ +#define TBID_SIGNUM_SW1 0x01 /* SWITCH GROUP 1 - Per thread system */ +#define TBID_SIGNUM_SW2 0x02 /* SWITCH GROUP 2 - Internal global request */ +#define TBID_SIGNUM_SW3 0x03 /* SWITCH GROUP 3 - External global request */ +#ifdef TBI_1_4 +#define TBID_SIGNUM_FPE 0x04 /* Deferred exception - Any IEEE 754 exception */ +#define TBID_SIGNUM_FPD 0x05 /* Deferred exception - Denormal exception */ +/* Reserved 0x6 for a reserved deferred exception */ +#define TBID_SIGNUM_BUS 0x07 /* Deferred exception - Bus Error */ +/* Reserved 0x08-0x09 */ +#else +/* Reserved 0x04-0x09 */ +#endif +#define TBID_SIGNUM_SWS 0x0A /* KICK received with SigMask != 0 */ +#define TBID_SIGNUM_SWK 0x0B /* KICK received with SigMask == 0 */ +/* Reserved 0x0C-0x0F */ +#define TBID_SIGNUM_TRT 0x10 /* Timer trigger */ +#define TBID_SIGNUM_LWK 0x11 /* Low level kick (handler provided by TBI) */ +#define TBID_SIGNUM_XXF 0x12 /* Fault handler - receives ALL _xxF sigs */ +#ifdef TBI_1_4 +#define TBID_SIGNUM_DFR 0x13 /* Deferred Exception handler */ +#else +#define TBID_SIGNUM_FPE 0x13 /* FPE Exception handler */ +#endif +/* External trigger one group 0x14 to 0x17 - per thread */ +#define TBID_SIGNUM_TR1(Thread) (0x14+(Thread)) +#define TBID_SIGNUM_T10 0x14 +#define TBID_SIGNUM_T11 0x15 +#define TBID_SIGNUM_T12 0x16 +#define TBID_SIGNUM_T13 0x17 +/* External trigger two group 0x18 to 0x1b - per thread */ +#define TBID_SIGNUM_TR2(Thread) (0x18+(Thread)) +#define TBID_SIGNUM_T20 0x18 +#define TBID_SIGNUM_T21 0x19 +#define TBID_SIGNUM_T22 0x1A +#define TBID_SIGNUM_T23 0x1B +#define TBID_SIGNUM_TR3 0x1C /* External trigger N-4 (global) */ +#define TBID_SIGNUM_TR4 0x1D /* External trigger N-3 (global) */ +#define TBID_SIGNUM_TR5 0x1E /* External trigger N-2 (global) */ +#define TBID_SIGNUM_TR6 0x1F /* External trigger N-1 (global) */ +#define TBID_SIGNUM_MAX 0x1F + +/* Return the trigger register(TXMASK[I]/TXSTAT[I]) bits related to + each hardware signal, sometimes this is a many-to-one relationship. */ +#define TBI_TRIG_BIT(SigNum) (\ + ((SigNum) >= TBID_SIGNUM_TRT) ? 1<<((SigNum)-TBID_SIGNUM_TRT) :\ + ( ((SigNum) == TBID_SIGNUM_SWS) || \ + ((SigNum) == TBID_SIGNUM_SWK) ) ? \ + TXSTAT_KICK_BIT : TXSTATI_BGNDHALT_BIT ) + +/* Return the hardware trigger vector number for entries in the + HWVEC0EXT table that will generate the required internal trigger. */ +#define TBI_TRIG_VEC(SigNum) (\ + ((SigNum) >= TBID_SIGNUM_T10) ? ((SigNum)-TBID_SIGNUM_TRT) : -1) + +/* Default trigger masks for each thread at background/interrupt level */ +#define TBI_TRIGS_INIT( Thread ) (\ + TXSTAT_KICK_BIT + TBI_TRIG_BIT(TBID_SIGNUM_TR1(Thread)) ) +#define TBI_INTS_INIT( Thread ) (\ + TXSTAT_KICK_BIT + TXSTATI_BGNDHALT_BIT \ + + TBI_TRIG_BIT(TBID_SIGNUM_TR2(Thread)) ) + +#ifndef __ASSEMBLY__ +/* A spin-lock location is a zero-initialised location in memory */ +typedef volatile int TBISPIN, *PTBISPIN; + +/* A kick location is a hardware location you can write to + * in order to cause a kick + */ +typedef volatile int *PTBIKICK; + +#if defined(METAC_1_0) || defined(METAC_1_1) +/* Macro to perform a kick */ +#define TBI_KICK( pKick ) do { pKick[0] = 1; } while (0) +#else +/* #define METAG_LIN_VALUES before including machine.h if required */ +#ifdef LINSYSEVENT_WR_COMBINE_FLUSH +/* Macro to perform a kick - write combiners must be flushed */ +#define TBI_KICK( pKick ) do {\ + volatile int *pFlush = (volatile int *) LINSYSEVENT_WR_COMBINE_FLUSH; \ + pFlush[0] = 0; \ + pKick[0] = 1; } while (0) +#endif +#endif /* if defined(METAC_1_0) || defined(METAC_1_1) */ +#endif /* ifndef __ASSEMBLY__ */ + +#ifndef __ASSEMBLY__ +/* 64-bit dual unit state value */ +typedef struct _tbidual_tag_ { + /* 32-bit value from a pair of registers in data or address units */ + int U0, U1; +} TBIDUAL, *PTBIDUAL; +#endif /* ifndef __ASSEMBLY__ */ + +/* Byte offsets of fields within TBIDUAL */ +#define TBIDUAL_U0 (0) +#define TBIDUAL_U1 (4) + +#define TBIDUAL_BYTES (8) + +#define TBICTX_CRIT_BIT 0x0001 /* ASync state saved in TBICTX */ +#define TBICTX_SOFT_BIT 0x0002 /* Sync state saved in TBICTX (other bits 0) */ +#ifdef TBI_FASTINT_1_4 +#define TBICTX_FINT_BIT 0x0004 /* Using Fast Interrupts */ +#endif +#define TBICTX_FPAC_BIT 0x0010 /* FPU state in TBICTX, FPU active on entry */ +#define TBICTX_XMCC_BIT 0x0020 /* Bit to identify a MECC task */ +#define TBICTX_CBUF_BIT 0x0040 /* Hardware catch buffer flag from TXSTATUS */ +#define TBICTX_CBRP_BIT 0x0080 /* Read pipeline dirty from TXDIVTIME */ +#define TBICTX_XDX8_BIT 0x0100 /* Saved DX.8 to DX.15 too */ +#define TBICTX_XAXX_BIT 0x0200 /* Save remaining AX registers to AX.7 */ +#define TBICTX_XHL2_BIT 0x0400 /* Saved hardware loop registers too */ +#define TBICTX_XTDP_BIT 0x0800 /* Saved DSP registers too */ +#define TBICTX_XEXT_BIT 0x1000 /* Set if TBICTX.Ext.Ctx contains extended + state save area, otherwise TBICTX.Ext.AX2 + just holds normal A0.2 and A1.2 states */ +#define TBICTX_WAIT_BIT 0x2000 /* Causes wait for trigger - sticky toggle */ +#define TBICTX_XCBF_BIT 0x4000 /* Catch buffer or RD extracted into TBICTX */ +#define TBICTX_PRIV_BIT 0x8000 /* Set if system uses 'privileged' model */ + +#ifdef METAC_1_0 +#define TBICTX_XAX3_BIT 0x0200 /* Saved AX.5 to AX.7 for XAXX */ +#define TBICTX_AX_REGS 5 /* Ax.0 to Ax.4 are core GP regs on CHORUS */ +#else +#define TBICTX_XAX4_BIT 0x0200 /* Saved AX.4 to AX.7 for XAXX */ +#define TBICTX_AX_REGS 4 /* Default is Ax.0 to Ax.3 */ +#endif + +#ifdef TBI_1_4 +#define TBICTX_CFGFPU_FX16_BIT 0x00010000 /* Save FX.8 to FX.15 too */ + +/* The METAC_CORE_ID_CONFIG field indicates omitted DSP resources */ +#define METAC_COREID_CFGXCTX_MASK( Value ) (\ + ( (((Value & METAC_COREID_CFGDSP_BITS)>> \ + METAC_COREID_CFGDSP_S ) == METAC_COREID_CFGDSP_MIN) ? \ + ~(TBICTX_XHL2_BIT+TBICTX_XTDP_BIT+ \ + TBICTX_XAXX_BIT+TBICTX_XDX8_BIT ) : ~0U ) ) +#endif + +/* Extended context state provides a standardised method for registering the + arguments required by __TBICtxSave to save the additional register states + currently in use by non general purpose code. The state of the __TBIExtCtx + variable in the static space of the thread forms an extension of the base + context of the thread. + + If ( __TBIExtCtx.Ctx.SaveMask == 0 ) then pExt is assumed to be NULL and + the empty state of __TBIExtCtx is represented by the fact that + TBICTX.SaveMask does not have the bit TBICTX_XEXT_BIT set. + + If ( __TBIExtCtx.Ctx.SaveMask != 0 ) then pExt should point at a suitably + sized extended context save area (usually at the end of the stack space + allocated by the current routine). This space should allow for the + displaced state of A0.2 and A1.2 to be saved along with the other extended + states indicated via __TBIExtCtx.Ctx.SaveMask. */ +#ifndef __ASSEMBLY__ +typedef union _tbiextctx_tag_ { + long long Val; + TBIDUAL AX2; + struct _tbiextctxext_tag { +#ifdef TBI_1_4 + short DspramSizes; /* DSPRAM sizes. Encoding varies between + TBICtxAlloc and the ECH scheme. */ +#else + short Reserved0; +#endif + short SaveMask; /* Flag bits for state saved */ + PTBIDUAL pExt; /* AX[2] state saved first plus Xxxx state */ + + } Ctx; + +} TBIEXTCTX, *PTBIEXTCTX; + +/* Automatic registration of extended context save for __TBINestInts */ +extern TBIEXTCTX __TBIExtCtx; +#endif /* ifndef __ASSEMBLY__ */ + +/* Byte offsets of fields within TBIEXTCTX */ +#define TBIEXTCTX_AX2 (0) +#define TBIEXTCTX_Ctx (0) +#define TBIEXTCTX_Ctx_SaveMask (TBIEXTCTX_Ctx + 2) +#define TBIEXTCTX_Ctx_pExt (TBIEXTCTX_Ctx + 2 + 2) + +/* Extended context data size calculation constants */ +#define TBICTXEXT_BYTES (8) +#define TBICTXEXTBB8_BYTES (8*8) +#define TBICTXEXTAX3_BYTES (3*8) +#define TBICTXEXTAX4_BYTES (4*8) +#ifdef METAC_1_0 +#define TBICTXEXTAXX_BYTES TBICTXEXTAX3_BYTES +#else +#define TBICTXEXTAXX_BYTES TBICTXEXTAX4_BYTES +#endif +#define TBICTXEXTHL2_BYTES (3*8) +#define TBICTXEXTTDR_BYTES (27*8) +#define TBICTXEXTTDP_BYTES TBICTXEXTTDR_BYTES + +#ifdef TBI_1_4 +#define TBICTXEXTFX8_BYTES (4*8) +#define TBICTXEXTFPAC_BYTES (1*4 + 2*2 + 4*8) +#define TBICTXEXTFACF_BYTES (3*8) +#endif + +/* Maximum flag bits to be set via the TBICTX_EXTSET macro */ +#define TBICTXEXT_MAXBITS (TBICTX_XEXT_BIT| \ + TBICTX_XDX8_BIT|TBICTX_XAXX_BIT|\ + TBICTX_XHL2_BIT|TBICTX_XTDP_BIT ) + +/* Maximum size of the extended context save area for current variant */ +#define TBICTXEXT_MAXBYTES (TBICTXEXT_BYTES+TBICTXEXTBB8_BYTES+\ + TBICTXEXTAXX_BYTES+TBICTXEXTHL2_BYTES+\ + TBICTXEXTTDP_BYTES ) + +#ifdef TBI_FASTINT_1_4 +/* Maximum flag bits to be set via the TBICTX_EXTSET macro */ +#define TBICTX2EXT_MAXBITS (TBICTX_XDX8_BIT|TBICTX_XAXX_BIT|\ + TBICTX_XHL2_BIT|TBICTX_XTDP_BIT ) + +/* Maximum size of the extended context save area for current variant */ +#define TBICTX2EXT_MAXBYTES (TBICTXEXTBB8_BYTES+TBICTXEXTAXX_BYTES\ + +TBICTXEXTHL2_BYTES+TBICTXEXTTDP_BYTES ) +#endif + +/* Specify extended resources being used by current routine, code must be + assembler generated to utilise extended resources- + + MOV D0xxx,A0StP ; Perform alloca - routine should + ADD A0StP,A0StP,#SaveSize ; setup/use A0FrP to access locals + MOVT D1xxx,#SaveMask ; TBICTX_XEXT_BIT MUST be set + SETL [A1GbP+#OG(___TBIExtCtx)],D0xxx,D1xxx + + NB: OG(___TBIExtCtx) is a special case supported for SETL/GETL operations + on 64-bit sizes structures only, other accesses must be based on use + of OGA(___TBIExtCtx). + + At exit of routine- + + MOV D0xxx,#0 ; Clear extended context save state + MOV D1xxx,#0 + SETL [A1GbP+#OG(___TBIExtCtx)],D0xxx,D1xxx + SUB A0StP,A0StP,#SaveSize ; If original A0StP required + + NB: Both the setting and clearing of the whole __TBIExtCtx MUST be done + atomically in one 64-bit write operation. + + For simple interrupt handling only via __TBINestInts there should be no + impact of the __TBIExtCtx system. If pre-emptive scheduling is being + performed however (assuming __TBINestInts has already been called earlier + on) then the following logic will correctly call __TBICtxSave if required + and clear out the currently selected background task- + + if ( __TBIExtCtx.Ctx.SaveMask & TBICTX_XEXT_BIT ) + { + / * Store extended states in pCtx * / + State.Sig.SaveMask |= __TBIExtCtx.Ctx.SaveMask; + + (void) __TBICtxSave( State, (void *) __TBIExtCtx.Ctx.pExt ); + __TBIExtCtx.Val = 0; + } + + and when restoring task states call __TBICtxRestore- + + / * Restore state from pCtx * / + State.Sig.pCtx = pCtx; + State.Sig.SaveMask = pCtx->SaveMask; + + if ( State.Sig.SaveMask & TBICTX_XEXT_BIT ) + { + / * Restore extended states from pCtx * / + __TBIExtCtx.Val = pCtx->Ext.Val; + + (void) __TBICtxRestore( State, (void *) __TBIExtCtx.Ctx.pExt ); + } + + */ + +/* Critical thread state save area */ +#ifndef __ASSEMBLY__ +typedef struct _tbictx_tag_ { + /* TXSTATUS_FLAG_BITS and TXSTATUS_LSM_STEP_BITS from TXSTATUS */ + short Flags; + /* Mask indicates any extended context state saved; 0 -> Never run */ + short SaveMask; + /* Saved PC value */ + int CurrPC; + /* Saved critical register states */ + TBIDUAL DX[8]; + /* Background control register states - for cores without catch buffer + base in DIVTIME the TXSTATUS bits RPVALID and RPMASK are stored with + the real state TXDIVTIME in CurrDIVTIME */ + int CurrRPT, CurrBPOBITS, CurrMODE, CurrDIVTIME; + /* Saved AX register states */ + TBIDUAL AX[2]; + TBIEXTCTX Ext; + TBIDUAL AX3[TBICTX_AX_REGS-3]; + + /* Any CBUF state to be restored by a handler return must be stored here. + Other extended state can be stored anywhere - see __TBICtxSave and + __TBICtxRestore. */ + +} TBICTX, *PTBICTX; + +#ifdef TBI_FASTINT_1_4 +typedef struct _tbictx2_tag_ { + TBIDUAL AX[2]; /* AU.0, AU.1 */ + TBIDUAL DX[2]; /* DU.0, DU.4 */ + int CurrMODE; + int CurrRPT; + int CurrSTATUS; + void *CurrPC; /* PC in PC address space */ +} TBICTX2, *PTBICTX2; +/* TBICTX2 is followed by: + * TBICTXEXTCB0 if TXSTATUS.CBMarker + * TBIDUAL * TXSTATUS.IRPCount if TXSTATUS.IRPCount > 0 + * TBICTXGP if using __TBIStdRootIntHandler or __TBIStdCtxSwitchRootIntHandler + */ + +typedef struct _tbictxgp_tag_ { + short DspramSizes; + short SaveMask; + void *pExt; + TBIDUAL DX[6]; /* DU.1-DU.3, DU.5-DU.7 */ + TBIDUAL AX[2]; /* AU.2-AU.3 */ +} TBICTXGP, *PTBICTXGP; + +#define TBICTXGP_DspramSizes (0) +#define TBICTXGP_SaveMask (TBICTXGP_DspramSizes + 2) +#define TBICTXGP_MAX_BYTES (2 + 2 + 4 + 8*(6+2)) + +#endif +#endif /* ifndef __ASSEMBLY__ */ + +/* Byte offsets of fields within TBICTX */ +#define TBICTX_Flags (0) +#define TBICTX_SaveMask (2) +#define TBICTX_CurrPC (4) +#define TBICTX_DX (2 + 2 + 4) +#define TBICTX_CurrRPT (2 + 2 + 4 + 8 * 8) +#define TBICTX_CurrMODE (2 + 2 + 4 + 8 * 8 + 4 + 4) +#define TBICTX_AX (2 + 2 + 4 + 8 * 8 + 4 + 4 + 4 + 4) +#define TBICTX_Ext (2 + 2 + 4 + 8 * 8 + 4 + 4 + 4 + 4 + 2 * 8) +#define TBICTX_Ext_AX2 (TBICTX_Ext + TBIEXTCTX_AX2) +#define TBICTX_Ext_AX2_U0 (TBICTX_Ext + TBIEXTCTX_AX2 + TBIDUAL_U0) +#define TBICTX_Ext_AX2_U1 (TBICTX_Ext + TBIEXTCTX_AX2 + TBIDUAL_U1) +#define TBICTX_Ext_Ctx_pExt (TBICTX_Ext + TBIEXTCTX_Ctx_pExt) +#define TBICTX_Ext_Ctx_SaveMask (TBICTX_Ext + TBIEXTCTX_Ctx_SaveMask) + +#ifdef TBI_FASTINT_1_4 +#define TBICTX2_BYTES (8 * 2 + 8 * 2 + 4 + 4 + 4 + 4) +#define TBICTXEXTCB0_BYTES (4 + 4 + 8) + +#define TBICTX2_CRIT_MAX_BYTES (TBICTX2_BYTES + TBICTXEXTCB0_BYTES + 6 * TBIDUAL_BYTES) +#define TBI_SWITCH_NEXT_PC(PC, EXTRA) ((PC) + (EXTRA & 1) ? 8 : 4) +#endif + +#ifndef __ASSEMBLY__ +/* Extended thread state save areas - catch buffer state element */ +typedef struct _tbictxextcb0_tag_ { + /* Flags data and address value - see METAC_CATCH_VALUES in machine.h */ + unsigned long CBFlags, CBAddr; + /* 64-bit data */ + TBIDUAL CBData; + +} TBICTXEXTCB0, *PTBICTXEXTCB0; + +/* Read pipeline state saved on later cores after single catch buffer slot */ +typedef struct _tbictxextrp6_tag_ { + /* RPMask is TXSTATUS_RPMASK_BITS only, reserved is undefined */ + unsigned long RPMask, Reserved0; + TBIDUAL CBData[6]; + +} TBICTXEXTRP6, *PTBICTXEXTRP6; + +/* Extended thread state save areas - 8 DU register pairs */ +typedef struct _tbictxextbb8_tag_ { + /* Remaining Data unit registers in 64-bit pairs */ + TBIDUAL UX[8]; + +} TBICTXEXTBB8, *PTBICTXEXTBB8; + +/* Extended thread state save areas - 3 AU register pairs */ +typedef struct _tbictxextbb3_tag_ { + /* Remaining Address unit registers in 64-bit pairs */ + TBIDUAL UX[3]; + +} TBICTXEXTBB3, *PTBICTXEXTBB3; + +/* Extended thread state save areas - 4 AU register pairs or 4 FX pairs */ +typedef struct _tbictxextbb4_tag_ { + /* Remaining Address unit or FPU registers in 64-bit pairs */ + TBIDUAL UX[4]; + +} TBICTXEXTBB4, *PTBICTXEXTBB4; + +/* Extended thread state save areas - Hardware loop states (max 2) */ +typedef struct _tbictxexthl2_tag_ { + /* Hardware looping register states */ + TBIDUAL Start, End, Count; + +} TBICTXEXTHL2, *PTBICTXEXTHL2; + +/* Extended thread state save areas - DSP register states */ +typedef struct _tbictxexttdp_tag_ { + /* DSP 32-bit accumulator register state (Bits 31:0 of ACX.0) */ + TBIDUAL Acc32[1]; + /* DSP > 32-bit accumulator bits 63:32 of ACX.0 (zero-extended) */ + TBIDUAL Acc64[1]; + /* Twiddle register state, and three phase increment states */ + TBIDUAL PReg[4]; + /* Modulo region size, padded to 64-bits */ + int CurrMRSIZE, Reserved0; + +} TBICTXEXTTDP, *PTBICTXEXTTDP; + +/* Extended thread state save areas - DSP register states including DSP RAM */ +typedef struct _tbictxexttdpr_tag_ { + /* DSP 32-bit accumulator register state (Bits 31:0 of ACX.0) */ + TBIDUAL Acc32[1]; + /* DSP 40-bit accumulator register state (Bits 39:8 of ACX.0) */ + TBIDUAL Acc40[1]; + /* DSP RAM Pointers */ + TBIDUAL RP0[2], WP0[2], RP1[2], WP1[2]; + /* DSP RAM Increments */ + TBIDUAL RPI0[2], WPI0[2], RPI1[2], WPI1[2]; + /* Template registers */ + unsigned long Tmplt[16]; + /* Modulo address region size and DSP RAM module region sizes */ + int CurrMRSIZE, CurrDRSIZE; + +} TBICTXEXTTDPR, *PTBICTXEXTTDPR; + +#ifdef TBI_1_4 +/* The METAC_ID_CORE register state is a marker for the FPU + state that is then stored after this core header structure. */ +#define TBICTXEXTFPU_CONFIG_MASK ( (METAC_COREID_NOFPACC_BIT+ \ + METAC_COREID_CFGFPU_BITS ) << \ + METAC_COREID_CONFIG_BITS ) + +/* Recorded FPU exception state from TXDEFR in DefrFpu */ +#define TBICTXEXTFPU_DEFRFPU_MASK (TXDEFR_FPU_FE_BITS) + +/* Extended thread state save areas - FPU register states */ +typedef struct _tbictxextfpu_tag_ { + /* Stored METAC_CORE_ID CONFIG */ + int CfgFpu; + /* Stored deferred TXDEFR bits related to FPU + * + * This is encoded as follows in order to fit into 16-bits: + * DefrFPU:15 - 14 <= 0 + * :13 - 8 <= TXDEFR:21-16 + * : 7 - 6 <= 0 + * : 5 - 0 <= TXDEFR:5-0 + */ + short DefrFpu; + + /* TXMODE bits related to FPU */ + short ModeFpu; + + /* FPU Even/Odd register states */ + TBIDUAL FX[4]; + + /* if CfgFpu & TBICTX_CFGFPU_FX16_BIT -> 1 then TBICTXEXTBB4 holds FX.8-15 */ + /* if CfgFpu & TBICTX_CFGFPU_NOACF_BIT -> 0 then TBICTXEXTFPACC holds state */ +} TBICTXEXTFPU, *PTBICTXEXTFPU; + +/* Extended thread state save areas - FPU accumulator state */ +typedef struct _tbictxextfpacc_tag_ { + /* FPU accumulator register state - three 64-bit parts */ + TBIDUAL FAcc32[3]; + +} TBICTXEXTFPACC, *PTBICTXEXTFPACC; +#endif + +/* Prototype TBI structure */ +struct _tbi_tag_ ; + +/* A 64-bit return value used commonly in the TBI APIs */ +typedef union _tbires_tag_ { + /* Save and load this value to get/set the whole result quickly */ + long long Val; + + /* Parameter of a fnSigs or __TBICtx* call */ + struct _tbires_sig_tag_ { + /* TXMASK[I] bits zeroed upto and including current trigger level */ + unsigned short TrigMask; + /* Control bits for handlers - see PTBIAPIFN documentation below */ + unsigned short SaveMask; + /* Pointer to the base register context save area of the thread */ + PTBICTX pCtx; + } Sig; + + /* Result of TBIThrdPrivId call */ + struct _tbires_thrdprivid_tag_ { + /* Basic thread identifier; just TBID_THREAD_BITS */ + int Id; + /* None thread number bits; TBID_ISTAT_BIT+TBID_PSTAT_BIT */ + int Priv; + } Thrd; + + /* Parameter and Result of a __TBISwitch call */ + struct _tbires_switch_tag_ { + /* Parameter passed across context switch */ + void *pPara; + /* Thread context of other Thread includng restore flags */ + PTBICTX pCtx; + } Switch; + + /* For extended S/W events only */ + struct _tbires_ccb_tag_ { + void *pCCB; + int COff; + } CCB; + + struct _tbires_tlb_tag_ { + int Leaf; /* TLB Leaf data */ + int Flags; /* TLB Flags */ + } Tlb; + +#ifdef TBI_FASTINT_1_4 + struct _tbires_intr_tag_ { + short TrigMask; + short SaveMask; + PTBICTX2 pCtx; + } Intr; +#endif + +} TBIRES, *PTBIRES; +#endif /* ifndef __ASSEMBLY__ */ + +#ifndef __ASSEMBLY__ +/* Prototype for all signal handler functions, called via ___TBISyncTrigger or + ___TBIASyncTrigger. + + State.Sig.TrigMask will indicate the bits set within TXMASKI at + the time of the handler call that have all been cleared to prevent + nested interrupt occuring immediately. + + State.Sig.SaveMask is a bit-mask which will be set to Zero when a trigger + occurs at background level and TBICTX_CRIT_BIT and optionally + TBICTX_CBUF_BIT when a trigger occurs at interrupt level. + + TBICTX_CBUF_BIT reflects the state of TXSTATUS_CBMARKER_BIT for + the interrupted background thread. + + State.Sig.pCtx will point at a TBICTX structure generated to hold the + critical state of the interrupted thread at interrupt level and + should be set to NULL when called at background level. + + Triggers will indicate the status of TXSTAT or TXSTATI sampled by the + code that called the handler. + + InstOrSWSId is defined firstly as 'Inst' if the SigNum is TBID_SIGNUM_SWx + and hold the actual SWITCH instruction detected, secondly if SigNum + is TBID_SIGNUM_SWS the 'SWSId' is defined to hold the Id of the + software signal detected, in other cases the value of this + parameter is undefined. + + pTBI points at the PTBI structure related to the thread and processing + level involved. + + TBIRES return value at both processing levels is similar in terms of any + changes that the handler makes. By default the State argument value + passed in should be returned. + + Sig.TrigMask value is bits to OR back into TXMASKI when the handler + completes to enable currently disabled interrupts. + + Sig.SaveMask value is ignored. + + Sig.pCtx is ignored. + + */ +typedef TBIRES (*PTBIAPIFN)( TBIRES State, int SigNum, + int Triggers, int InstOrSWSId, + volatile struct _tbi_tag_ *pTBI ); +#endif /* ifndef __ASSEMBLY__ */ + +#ifndef __ASSEMBLY__ +/* The global memory map is described by a list of segment descriptors */ +typedef volatile struct _tbiseg_tag_ { + volatile struct _tbiseg_tag_ *pLink; + int Id; /* Id of the segment */ + TBISPIN Lock; /* Spin-lock for struct (normally 0) */ + unsigned int Bytes; /* Size of region in bytes */ + void *pGAddr; /* Base addr of region in global space */ + void *pLAddr; /* Base addr of region in local space */ + int Data[2]; /* Segment specific data (may be extended) */ + +} TBISEG, *PTBISEG; +#endif /* ifndef __ASSEMBLY__ */ + +/* Offsets of fields in TBISEG structure */ +#define TBISEG_pLink ( 0) +#define TBISEG_Id ( 4) +#define TBISEG_Lock ( 8) +#define TBISEG_Bytes (12) +#define TBISEG_pGAddr (16) +#define TBISEG_pLAddr (20) +#define TBISEG_Data (24) + +#ifndef __ASSEMBLY__ +typedef volatile struct _tbi_tag_ { + int SigMask; /* Bits set to represent S/W events */ + PTBIKICK pKick; /* Kick addr for S/W events */ + void *pCCB; /* Extended S/W events */ + PTBISEG pSeg; /* Related segment structure */ + PTBIAPIFN fnSigs[TBID_SIGNUM_MAX+1];/* Signal handler API table */ +} *PTBI, TBI; +#endif /* ifndef __ASSEMBLY__ */ + +/* Byte offsets of fields within TBI */ +#define TBI_SigMask (0) +#define TBI_pKick (4) +#define TBI_pCCB (8) +#define TBI_pSeg (12) +#define TBI_fnSigs (16) + +#ifdef TBI_1_4 +#ifndef __ASSEMBLY__ +/* This handler should be used for TBID_SIGNUM_DFR */ +extern TBIRES __TBIHandleDFR ( TBIRES State, int SigNum, + int Triggers, int InstOrSWSId, + volatile struct _tbi_tag_ *pTBI ); +#endif +#endif + +/* String table entry - special values */ +#define METAG_TBI_STRS (0x5300) /* Tag : If entry is valid */ +#define METAG_TBI_STRE (0x4500) /* Tag : If entry is end of table */ +#define METAG_TBI_STRG (0x4700) /* Tag : If entry is a gap */ +#define METAG_TBI_STRX (0x5A00) /* TransLen : If no translation present */ + +#ifndef __ASSEMBLY__ +typedef volatile struct _tbistr_tag_ { + short Bytes; /* Length of entry in Bytes */ + short Tag; /* Normally METAG_TBI_STRS(0x5300) */ + short Len; /* Length of the string entry (incl null) */ + short TransLen; /* Normally METAG_TBI_STRX(0x5A00) */ + char String[8]; /* Zero terminated (may-be bigger) */ + +} TBISTR, *PTBISTR; +#endif /* ifndef __ASSEMBLY__ */ + +/* Cache size information - available as fields of Data[1] of global heap + segment */ +#define METAG_TBI_ICACHE_SIZE_S 0 /* see comments below */ +#define METAG_TBI_ICACHE_SIZE_BITS 0x0000000F +#define METAG_TBI_ICACHE_FILL_S 4 +#define METAG_TBI_ICACHE_FILL_BITS 0x000000F0 +#define METAG_TBI_DCACHE_SIZE_S 8 +#define METAG_TBI_DCACHE_SIZE_BITS 0x00000F00 +#define METAG_TBI_DCACHE_FILL_S 12 +#define METAG_TBI_DCACHE_FILL_BITS 0x0000F000 + +/* METAG_TBI_xCACHE_SIZE + Describes the physical cache size rounded up to the next power of 2 + relative to a 16K (2^14) cache. These sizes are encoded as a signed addend + to this base power of 2, for example + 4K -> 2^12 -> -2 (i.e. 12-14) + 8K -> 2^13 -> -1 + 16K -> 2^14 -> 0 + 32K -> 2^15 -> +1 + 64K -> 2^16 -> +2 + 128K -> 2^17 -> +3 + + METAG_TBI_xCACHE_FILL + Describes the physical cache size within the power of 2 area given by + the value above. For example a 10K cache may be represented as having + nearest size 16K with a fill of 10 sixteenths. This is encoded as the + number of unused 1/16ths, for example + 0000 -> 0 -> 16/16 + 0001 -> 1 -> 15/16 + 0010 -> 2 -> 14/16 + ... + 1111 -> 15 -> 1/16 + */ + +#define METAG_TBI_CACHE_SIZE_BASE_LOG2 14 + +/* Each declaration made by this macro generates a TBISTR entry */ +#ifndef __ASSEMBLY__ +#define TBISTR_DECL( Name, Str ) \ + __attribute__ ((__section__ (".tbistr") )) const char Name[] = #Str +#endif + +/* META timer values - see below for Timer support routines */ +#define TBI_TIMERWAIT_MIN (-16) /* Minimum 'recommended' period */ +#define TBI_TIMERWAIT_MAX (-0x7FFFFFFF) /* Maximum 'recommended' period */ + +#ifndef __ASSEMBLY__ +/* These macros allow direct access from C to any register known to the + assembler or defined in machine.h. Example candidates are TXTACTCYC, + TXIDLECYC, and TXPRIVEXT. Note that when higher level macros and routines + like the timer and trigger handling features below these should be used in + preference to this direct low-level access mechanism. */ +#define TBI_GETREG( Reg ) __extension__ ({\ + int __GRValue; \ + __asm__ volatile ("MOV\t%0," #Reg "\t/* (*TBI_GETREG OK) */" : \ + "=r" (__GRValue) ); \ + __GRValue; }) + +#define TBI_SETREG( Reg, Value ) do {\ + int __SRValue = Value; \ + __asm__ volatile ("MOV\t" #Reg ",%0\t/* (*TBI_SETREG OK) */" : \ + : "r" (__SRValue) ); } while (0) + +#define TBI_SWAPREG( Reg, Value ) do {\ + int __XRValue = (Value); \ + __asm__ volatile ("SWAP\t" #Reg ",%0\t/* (*TBI_SWAPREG OK) */" : \ + "=r" (__XRValue) : "0" (__XRValue) ); \ + Value = __XRValue; } while (0) + +/* Obtain and/or release global critical section lock given that interrupts + are already disabled and/or should remain disabled. */ +#define TBI_NOINTSCRITON do {\ + __asm__ volatile ("LOCK1\t\t/* (*TBI_NOINTSCRITON OK) */");} while (0) +#define TBI_NOINTSCRITOFF do {\ + __asm__ volatile ("LOCK0\t\t/* (*TBI_NOINTSCRITOFF OK) */");} while (0) +/* Optimised in-lining versions of the above macros */ + +#define TBI_LOCK( TrigState ) do {\ + int __TRValue; \ + int __ALOCKHI = LINSYSEVENT_WR_ATOMIC_LOCK & 0xFFFF0000; \ + __asm__ volatile ("MOV %0,#0\t\t/* (*TBI_LOCK ... */\n\t" \ + "SWAP\t%0,TXMASKI\t/* ... */\n\t" \ + "LOCK2\t\t/* ... */\n\t" \ + "SETD\t[%1+#0x40],D1RtP /* ... OK) */" : \ + "=r&" (__TRValue) : "u" (__ALOCKHI) ); \ + TrigState = __TRValue; } while (0) +#define TBI_CRITON( TrigState ) do {\ + int __TRValue; \ + __asm__ volatile ("MOV %0,#0\t\t/* (*TBI_CRITON ... */\n\t" \ + "SWAP\t%0,TXMASKI\t/* ... */\n\t" \ + "LOCK1\t\t/* ... OK) */" : \ + "=r" (__TRValue) ); \ + TrigState = __TRValue; } while (0) + +#define TBI_INTSX( TrigState ) do {\ + int __TRValue = TrigState; \ + __asm__ volatile ("SWAP\t%0,TXMASKI\t/* (*TBI_INTSX OK) */" : \ + "=r" (__TRValue) : "0" (__TRValue) ); \ + TrigState = __TRValue; } while (0) + +#define TBI_UNLOCK( TrigState ) do {\ + int __TRValue = TrigState; \ + int __ALOCKHI = LINSYSEVENT_WR_ATOMIC_LOCK & 0xFFFF0000; \ + __asm__ volatile ("SETD\t[%1+#0x00],D1RtP\t/* (*TBI_UNLOCK ... */\n\t" \ + "LOCK0\t\t/* ... */\n\t" \ + "MOV\tTXMASKI,%0\t/* ... OK) */" : \ + : "r" (__TRValue), "u" (__ALOCKHI) ); } while (0) + +#define TBI_CRITOFF( TrigState ) do {\ + int __TRValue = TrigState; \ + __asm__ volatile ("LOCK0\t\t/* (*TBI_CRITOFF ... */\n\t" \ + "MOV\tTXMASKI,%0\t/* ... OK) */" : \ + : "r" (__TRValue) ); } while (0) + +#define TBI_TRIGSX( SrcDst ) do { TBI_SWAPREG( TXMASK, SrcDst );} while (0) + +/* Composite macros to perform logic ops on INTS or TRIGS masks */ +#define TBI_INTSOR( Bits ) do {\ + int __TT = 0; TBI_INTSX(__TT); \ + __TT |= (Bits); TBI_INTSX(__TT); } while (0) + +#define TBI_INTSAND( Bits ) do {\ + int __TT = 0; TBI_INTSX(__TT); \ + __TT &= (Bits); TBI_INTSX(__TT); } while (0) + +#ifdef TBI_1_4 +#define TBI_DEFRICTRLSOR( Bits ) do {\ + int __TT = TBI_GETREG( CT.20 ); \ + __TT |= (Bits); TBI_SETREG( CT.20, __TT); } while (0) + +#define TBI_DEFRICTRLSAND( Bits ) do {\ + int __TT = TBI_GETREG( TXDEFR ); \ + __TT &= (Bits); TBI_SETREG( CT.20, __TT); } while (0) +#endif + +#define TBI_TRIGSOR( Bits ) do {\ + int __TT = TBI_GETREG( TXMASK ); \ + __TT |= (Bits); TBI_SETREG( TXMASK, __TT); } while (0) + +#define TBI_TRIGSAND( Bits ) do {\ + int __TT = TBI_GETREG( TXMASK ); \ + __TT &= (Bits); TBI_SETREG( TXMASK, __TT); } while (0) + +/* Macros to disable and re-enable interrupts using TBI_INTSX, deliberate + traps and exceptions can still be handled within the critical section. */ +#define TBI_STOPINTS( Value ) do {\ + int __TT = TBI_GETREG( TXMASKI ); \ + __TT &= TXSTATI_BGNDHALT_BIT; TBI_INTSX( __TT ); \ + Value = __TT; } while (0) +#define TBI_RESTINTS( Value ) do {\ + int __TT = Value; TBI_INTSX( __TT ); } while (0) + +/* Return pointer to segment list at current privilege level */ +PTBISEG __TBISegList( void ); + +/* Search the segment list for a match given Id, pStart can be NULL */ +PTBISEG __TBIFindSeg( PTBISEG pStart, int Id ); + +/* Prepare a new segment structure using space from within another */ +PTBISEG __TBINewSeg( PTBISEG pFromSeg, int Id, unsigned int Bytes ); + +/* Prepare a new segment using any global or local heap segments available */ +PTBISEG __TBIMakeNewSeg( int Id, unsigned int Bytes ); + +/* Insert a new segment into the segment list so __TBIFindSeg can locate it */ +void __TBIAddSeg( PTBISEG pSeg ); +#define __TBIADDSEG_DEF /* Some versions failed to define this */ + +/* Return Id of current thread; TBID_ISTAT_BIT+TBID_THREAD_BITS */ +int __TBIThreadId( void ); + +/* Return TBIRES.Thrd data for current thread */ +TBIRES __TBIThrdPrivId( void ); + +/* Return pointer to current threads TBI root block. + Id implies whether Int or Background root block is required */ +PTBI __TBI( int Id ); + +/* Try to set Mask bit using the spin-lock protocol, return 0 if fails and + new state if succeeds */ +int __TBIPoll( PTBISPIN pLock, int Mask ); + +/* Set Mask bits via the spin-lock protocol in *pLock, return new state */ +int __TBISpin( PTBISPIN pLock, int Mask ); + +/* Default handler set up for all TBI.fnSigs entries during initialisation */ +TBIRES __TBIUnExpXXX( TBIRES State, int SigNum, + int Triggers, int Inst, PTBI pTBI ); + +/* Call this routine to service triggers at background processing level. The + TBID_POLL_BIT of the Id parameter value will be used to indicate that the + routine should return if no triggers need to be serviced initially. If this + bit is not set the routine will block until one trigger handler is serviced + and then behave like the poll case servicing any remaining triggers + actually outstanding before returning. Normally the State parameter should + be simply initialised to zero and the result should be ignored, other + values/options are for internal use only. */ +TBIRES __TBISyncTrigger( TBIRES State, int Id ); + +/* Call this routine to enable processing of triggers by signal handlers at + interrupt level. The State parameter value passed is returned by this + routine. The State.Sig.TrigMask field also specifies the initial + state of the interrupt mask register TXMASKI to be setup by the call. + The other parts of the State parameter are ignored unless the PRIV bit is + set in the SaveMask field. In this case the State.Sig.pCtx field specifies + the base of the stack to which the interrupt system should switch into + as it saves the state of the previously executing code. In the case the + thread will be unprivileged as it continues execution at the return + point of this routine and it's future state will be effectively never + trusted to be valid. */ +TBIRES __TBIASyncTrigger( TBIRES State ); + +/* Call this to swap soft threads executing at the background processing level. + The TBIRES returned to the new thread will be the same as the NextThread + value specified to the call. The NextThread.Switch.pCtx value specifies + which thread context to restore and the NextThread.Switch.Para value can + hold an arbitrary expression to be passed between the threads. The saved + state of the previous thread will be stored in a TBICTX descriptor created + on it's stack and the address of this will be stored into the *rpSaveCtx + location specified. */ +TBIRES __TBISwitch( TBIRES NextThread, PTBICTX *rpSaveCtx ); + +/* Call this to initialise a stack frame ready for further use, up to four + 32-bit arguments may be specified after the fixed args to be passed via + the new stack pStack to the routine specified via fnMain. If the + main-line routine ever returns the thread will operate as if main itself + had returned and terminate with the return code given. */ +typedef int (*PTBIMAINFN)( TBIRES Arg /*, <= 4 additional 32-bit args */ ); +PTBICTX __TBISwitchInit( void *pStack, PTBIMAINFN fnMain, ... ); + +/* Call this to resume a thread from a saved synchronous TBICTX state. + The TBIRES returned to the new thread will be the same as the NextThread + value specified to the call. The NextThread.Switch.pCtx value specifies + which thread context to restore and the NextThread.Switch.Para value can + hold an arbitrary expression to be passed between the threads. The context + of the calling thread is lost and this routine never returns to the + caller. The TrigsMask value supplied is ored into TXMASKI to enable + interrupts after the context of the new thread is established. */ +void __TBISyncResume( TBIRES NextThread, int TrigsMask ); + +/* Call these routines to save and restore the extended states of + scheduled tasks. */ +void *__TBICtxSave( TBIRES State, void *pExt ); +void *__TBICtxRestore( TBIRES State, void *pExt ); + +#ifdef TBI_1_4 +#ifdef TBI_FASTINT_1_4 +/* Call these routines to copy the GP state to a separate buffer + * Only necessary for context switching. + */ +PTBICTXGP __TBICtx2SaveCrit( PTBICTX2 pCurrentCtx, PTBICTX2 pSaveCtx ); +void *__TBICtx2SaveGP( PTBICTXGP pCurrentCtxGP, PTBICTXGP pSaveCtxGP ); + +/* Call these routines to save and restore the extended states of + scheduled tasks. */ +void *__TBICtx2Save( PTBICTXGP pCtxGP, short SaveMask, void *pExt ); +void *__TBICtx2Restore( PTBICTX2 pCtx, short SaveMask, void *pExt ); +#endif + +/* If FPAC flag is set then significant FPU context exists. Call these routine + to save and restore it */ +void *__TBICtxFPUSave( TBIRES State, void *pExt ); +void *__TBICtxFPURestore( TBIRES State, void *pExt ); + +#ifdef TBI_FASTINT_1_4 +extern void *__TBICtx2FPUSave (PTBICTXGP, short, void*); +extern void *__TBICtx2FPURestore (PTBICTXGP, short, void*); +#endif +#endif + +#ifdef TBI_1_4 +/* Call these routines to save and restore DSPRAM. */ +void *__TBIDspramSaveA (short DspramSizes, void *pExt); +void *__TBIDspramSaveB (short DspramSizes, void *pExt); +void *__TBIDspramRestoreA (short DspramSizes, void *pExt); +void *__TBIDspramRestoreB (short DspramSizes, void *pExt); +#endif + +/* This routine should be used at the entrypoint of interrupt handlers to + re-enable higher priority interrupts and/or save state from the previously + executing background code. State is a TBIRES.Sig parameter with NoNestMask + indicating the triggers (if any) that should remain disabled and SaveMask + CBUF bit indicating the if the hardware catch buffer is dirty. Optionally + any number of extended state bits X??? including XCBF can be specified to + force a nested state save call to __TBICtxSave before the current routine + continues. (In the latter case __TBICtxRestore should be called to restore + any extended states before the background thread of execution is resumed) + + By default (no X??? bits specified in SaveMask) this routine performs a + sub-call to __TBICtxSave with the pExt and State parameters specified IF + some triggers could be serviced while the current interrupt handler + executes and the hardware catch buffer is actually dirty. In this case + this routine provides the XCBF bit in State.Sig.SaveMask to force the + __TBICtxSave to extract the current catch state. + + The NoNestMask parameter should normally indicate that the same or lower + triggers than those provoking the current handler call should not be + serviced in nested calls, zero may be specified if all possible interrupts + are to be allowed. + + The TBIRES.Sig value returned will be similar to the State parameter + specified with the XCBF bit ORed into it's SaveMask if a context save was + required and fewer bits set in it's TrigMask corresponding to the same/lower + priority interrupt triggers still not enabled. */ +TBIRES __TBINestInts( TBIRES State, void *pExt, int NoNestMask ); + +/* This routine causes the TBICTX structure specified in State.Sig.pCtx to + be restored. This implies that execution will not return to the caller. + The State.Sig.TrigMask field will be restored during the context switch + such that any immediately occuring interrupts occur in the context of the + newly specified task. The State.Sig.SaveMask parameter is ignored. */ +void __TBIASyncResume( TBIRES State ); + +/* Call this routine to enable fastest possible processing of one or more + interrupt triggers via a unified signal handler. The handler concerned + must simple return after servicing the related hardware. + The State.Sig.TrigMask parameter indicates the interrupt triggers to be + enabled and the Thin.Thin.fnHandler specifies the routine to call and + the whole Thin parameter value will be passed to this routine unaltered as + it's first parameter. */ +void __TBIASyncThin( TBIRES State, TBIRES Thin ); + +/* Do this before performing your own direct spin-lock access - use TBI_LOCK */ +int __TBILock( void ); + +/* Do this after performing your own direct spin-lock access - use TBI_UNLOCK */ +void __TBIUnlock( int TrigState ); + +/* Obtain and release global critical section lock - only stops execution + of interrupts on this thread and similar critical section code on other + local threads - use TBI_CRITON or TBI_CRITOFF */ +int __TBICritOn( void ); +void __TBICritOff( int TrigState ); + +/* Change INTS (TXMASKI) - return old state - use TBI_INTSX */ +int __TBIIntsX( int NewMask ); + +/* Change TRIGS (TXMASK) - return old state - use TBI_TRIGSX */ +int __TBITrigsX( int NewMask ); + +/* This function initialises a timer for first use, only the TBID_ISTAT_BIT + of the Id parameter is used to indicate which timer is to be modified. The + Wait value should either be zero to disable the timer concerned or be in + the recommended TBI_TIMERWAIT_* range to specify the delay required before + the first timer trigger occurs. + + The TBID_ISTAT_BIT of the Id parameter similar effects all other timer + support functions (see below). */ +void __TBITimerCtrl( int Id, int Wait ); + +/* This routine returns a 64-bit time stamp value that is initialised to zero + via a __TBITimerCtrl timer enabling call. */ +long long __TBITimeStamp( int Id ); + +/* To manage a periodic timer each period elapsed should be subracted from + the current timer value to attempt to set up the next timer trigger. The + Wait parameter should be a value in the recommended TBI_TIMERWAIT_* range. + The return value is the new aggregate value that the timer was updated to, + if this is less than zero then a timer trigger is guaranteed to be + generated after the number of ticks implied, if a positive result is + returned either itterative or step-wise corrective action must be taken to + resynchronise the timer and hence provoke a future timer trigger. */ +int __TBITimerAdd( int Id, int Wait ); + +/* String table search function, pStart is first entry to check or NULL, + pStr is string data to search for and MatchLen is either length of string + to compare for an exact match or negative length to compare for partial + match. */ +const TBISTR *__TBIFindStr( const TBISTR *pStart, + const char *pStr, int MatchLen ); + +/* String table translate function, pStr is text to translate and Len is + it's length. Value returned may not be a string pointer if the + translation value is really some other type, 64-bit alignment of the return + pointer is guaranteed so almost any type including a structure could be + located with this routine. */ +const void *__TBITransStr( const char *pStr, int Len ); + + + +/* Arbitrary physical memory access windows, use different Channels to avoid + conflict/thrashing within a single piece of code. */ +void *__TBIPhysAccess( int Channel, int PhysAddr, int Bytes ); +void __TBIPhysRelease( int Channel, void *pLinAddr ); + +#ifdef METAC_1_0 +/* Data cache function nullified because data cache is off */ +#define TBIDCACHE_FLUSH( pAddr ) +#define TBIDCACHE_PRELOAD( Type, pAddr ) ((Type) (pAddr)) +#define TBIDCACHE_REFRESH( Type, pAddr ) ((Type) (pAddr)) +#endif +#ifdef METAC_1_1 +/* To flush a single cache line from the data cache using a linear address */ +#define TBIDCACHE_FLUSH( pAddr ) ((volatile char *) \ + (((unsigned int) (pAddr))>>LINSYSLFLUSH_S))[0] = 0 + +extern void * __builtin_dcache_preload (void *); + +/* Try to ensure that the data at the address concerned is in the cache */ +#define TBIDCACHE_PRELOAD( Type, Addr ) \ + ((Type) __builtin_dcache_preload ((void *)(Addr))) + +extern void * __builtin_dcache_refresh (void *); + +/* Flush any old version of data from address and re-load a new copy */ +#define TBIDCACHE_REFRESH( Type, Addr ) __extension__ ({ \ + Type __addr = (Type)(Addr); \ + (void)__builtin_dcache_refresh ((void *)(((unsigned int)(__addr))>>6)); \ + __addr; }) + +#endif +#ifndef METAC_1_0 +#ifndef METAC_1_1 +/* Support for DCACHE builtin */ +extern void __builtin_dcache_flush (void *); + +/* To flush a single cache line from the data cache using a linear address */ +#define TBIDCACHE_FLUSH( Addr ) \ + __builtin_dcache_flush ((void *)(Addr)) + +extern void * __builtin_dcache_preload (void *); + +/* Try to ensure that the data at the address concerned is in the cache */ +#define TBIDCACHE_PRELOAD( Type, Addr ) \ + ((Type) __builtin_dcache_preload ((void *)(Addr))) + +extern void * __builtin_dcache_refresh (void *); + +/* Flush any old version of data from address and re-load a new copy */ +#define TBIDCACHE_REFRESH( Type, Addr ) \ + ((Type) __builtin_dcache_refresh ((void *)(Addr))) + +#endif +#endif + +/* Flush the MMCU cache */ +#define TBIMCACHE_FLUSH() { ((volatile int *) LINSYSCFLUSH_MMCU)[0] = 0; } + +#ifdef METAC_2_1 +/* Obtain the MMU table entry for the specified address */ +#define TBIMTABLE_LEAFDATA(ADDR) TBIXCACHE_RD((int)(ADDR) & (-1<<6)) + +#ifndef __ASSEMBLY__ +/* Obtain the full MMU table entry for the specified address */ +#define TBIMTABLE_DATA(ADDR) __extension__ ({ TBIRES __p; \ + __p.Val = TBIXCACHE_RL((int)(ADDR) & (-1<<6)); \ + __p; }) +#endif +#endif + +/* Combine a physical base address, and a linear address + * Internal use only + */ +#define _TBIMTABLE_LIN2PHYS(PHYS, LIN, LMASK) (void*)(((int)(PHYS)&0xFFFFF000)\ + +((int)(LIN)&(LMASK))) + +/* Convert a linear to a physical address */ +#define TBIMTABLE_LIN2PHYS(LEAFDATA, ADDR) \ + (((LEAFDATA) & CRLINPHY0_VAL_BIT) \ + ? _TBIMTABLE_LIN2PHYS(LEAFDATA, ADDR, 0x00000FFF) \ + : 0) + +/* Debug support - using external debugger or host */ +void __TBIDumpSegListEntries( void ); +void __TBILogF( const char *pFmt, ... ); +void __TBIAssert( const char *pFile, int LineNum, const char *pExp ); +void __TBICont( const char *pMsg, ... ); /* TBIAssert -> 'wait for continue' */ + +/* Array of signal name data for debug messages */ +extern const char __TBISigNames[]; +#endif /* ifndef __ASSEMBLY__ */ + + + +/* Scale of sub-strings in the __TBISigNames string list */ +#define TBI_SIGNAME_SCALE 4 +#define TBI_SIGNAME_SCALE_S 2 + +#define TBI_1_3 + +#ifdef TBI_1_3 + +#ifndef __ASSEMBLY__ +#define TBIXCACHE_RD(ADDR) __extension__ ({\ + void * __Addr = (void *)(ADDR); \ + int __Data; \ + __asm__ volatile ( "CACHERD\t%0,[%1+#0]" : \ + "=r" (__Data) : "r" (__Addr) ); \ + __Data; }) + +#define TBIXCACHE_RL(ADDR) __extension__ ({\ + void * __Addr = (void *)(ADDR); \ + long long __Data; \ + __asm__ volatile ( "CACHERL\t%0,%t0,[%1+#0]" : \ + "=d" (__Data) : "r" (__Addr) ); \ + __Data; }) + +#define TBIXCACHE_WD(ADDR, DATA) do {\ + void * __Addr = (void *)(ADDR); \ + int __Data = DATA; \ + __asm__ volatile ( "CACHEWD\t[%0+#0],%1" : \ + : "r" (__Addr), "r" (__Data) ); } while(0) + +#define TBIXCACHE_WL(ADDR, DATA) do {\ + void * __Addr = (void *)(ADDR); \ + long long __Data = DATA; \ + __asm__ volatile ( "CACHEWL\t[%0+#0],%1,%t1" : \ + : "r" (__Addr), "r" (__Data) ); } while(0) + +#ifdef TBI_4_0 + +#define TBICACHE_FLUSH_L1D_L2(ADDR) \ + TBIXCACHE_WD(ADDR, CACHEW_FLUSH_L1D_L2) +#define TBICACHE_WRITEBACK_L1D_L2(ADDR) \ + TBIXCACHE_WD(ADDR, CACHEW_WRITEBACK_L1D_L2) +#define TBICACHE_INVALIDATE_L1D(ADDR) \ + TBIXCACHE_WD(ADDR, CACHEW_INVALIDATE_L1D) +#define TBICACHE_INVALIDATE_L1D_L2(ADDR) \ + TBIXCACHE_WD(ADDR, CACHEW_INVALIDATE_L1D_L2) +#define TBICACHE_INVALIDATE_L1DTLB(ADDR) \ + TBIXCACHE_WD(ADDR, CACHEW_INVALIDATE_L1DTLB) +#define TBICACHE_INVALIDATE_L1I(ADDR) \ + TBIXCACHE_WD(ADDR, CACHEW_INVALIDATE_L1I) +#define TBICACHE_INVALIDATE_L1ITLB(ADDR) \ + TBIXCACHE_WD(ADDR, CACHEW_INVALIDATE_L1ITLB) + +#endif /* TBI_4_0 */ +#endif /* ifndef __ASSEMBLY__ */ + +/* + * Calculate linear PC value from real PC and Minim mode control, the LSB of + * the result returned indicates if address compression has occured. + */ +#ifndef __ASSEMBLY__ +#define METAG_LINPC( PCVal ) (\ + ( (TBI_GETREG(TXPRIVEXT) & TXPRIVEXT_MINIMON_BIT) != 0 ) ? ( \ + ( ((PCVal) & 0x00900000) == 0x00900000 ) ? \ + (((PCVal) & 0xFFE00000) + (((PCVal) & 0x001FFFFC)>>1) + 1) : \ + ( ((PCVal) & 0x00800000) == 0x00000000 ) ? \ + (((PCVal) & 0xFF800000) + (((PCVal) & 0x007FFFFC)>>1) + 1) : \ + (PCVal) ) \ + : (PCVal) ) +#define METAG_LINPC_X2BIT 0x00000001 /* Make (Size>>1) if compressed */ + +/* Convert an arbitrary Linear address into a valid Minim PC or return 0 */ +#define METAG_PCMINIM( LinVal ) (\ + (((LinVal) & 0x00980000) == 0x00880000) ? \ + (((LinVal) & 0xFFE00000) + (((LinVal) & 0x000FFFFE)<<1)) : \ + (((LinVal) & 0x00C00000) == 0x00000000) ? \ + (((LinVal) & 0xFF800000) + (((LinVal) & 0x003FFFFE)<<1)) : 0 ) + +/* Reverse a METAG_LINPC conversion step to return the original PCVal */ +#define METAG_PCLIN( LinVal ) ( 0xFFFFFFFC & (\ + ( (LinVal & METAG_LINPC_X2BIT) != 0 ) ? METAG_PCMINIM( LinVal ) : \ + (LinVal) )) + +/* + * Flush the MMCU Table cache privately for each thread. On cores that do not + * support per-thread flushing it will flush all threads mapping data. + */ +#define TBIMCACHE_TFLUSH(Thread) do {\ + ((volatile int *)( LINSYSCFLUSH_TxMMCU_BASE + \ + (LINSYSCFLUSH_TxMMCU_STRIDE*(Thread)) ))[0] = 0; \ + } while(0) + +/* + * To flush a single linear-matched cache line from the code cache. In + * cases where Minim is possible the METAC_LINPC operation must be used + * to pre-process the address being flushed. + */ +#define TBIICACHE_FLUSH( pAddr ) TBIXCACHE_WD (pAddr, CACHEW_ICACHE_BIT) + +/* To flush a single linear-matched mapping from code/data MMU table cache */ +#define TBIMCACHE_AFLUSH( pAddr, SegType ) \ + TBIXCACHE_WD(pAddr, CACHEW_TLBFLUSH_BIT + ( \ + ((SegType) == TBID_SEGTYPE_TEXT) ? CACHEW_ICACHE_BIT : 0 )) + +/* + * To flush translation data corresponding to a range of addresses without + * using TBITCACHE_FLUSH to flush all of this threads translation data. It + * is necessary to know what stride (>= 4K) must be used to flush a specific + * region. + * + * For example direct mapped regions use the maximum page size (512K) which may + * mean that only one flush is needed to cover the sub-set of the direct + * mapped area used since it was setup. + * + * The function returns the stride on which flushes should be performed. + * + * If 0 is returned then the region is not subject to MMU caching, if -1 is + * returned then this indicates that only TBIMCACHE_TFLUSH can be used to + * flush the region concerned rather than TBIMCACHE_AFLUSH which this + * function is designed to support. + */ +int __TBIMMUCacheStride( const void *pStart, int Bytes ); + +/* + * This function will use the above lower level functions to achieve a MMU + * table data flush in an optimal a fashion as possible. On a system that + * supports linear address based caching this function will also call the + * code or data cache flush functions to maintain address/data coherency. + * + * SegType should be TBID_SEGTYPE_TEXT if the address range is for code or + * any other value such as TBID_SEGTYPE_DATA for data. If an area is + * used in both ways then call this function twice; once for each. + */ +void __TBIMMUCacheFlush( const void *pStart, int Bytes, int SegType ); + +/* + * Cached Core mode setup and flush functions allow one code and one data + * region of the corresponding global or local cache partion size to be + * locked into the corresponding cache memory. This prevents normal LRU + * logic discarding the code or data and avoids write-thru bandwidth in + * data areas. Code mappings are selected by specifying TBID_SEGTYPE_TEXT + * for SegType, otherwise data mappings are created. + * + * Mode supplied should always contain the VALID bit and WINx selection data. + * Data areas will be mapped read-only if the WRITE bit is not added. + * + * The address returned by the Opt function will either be the same as that + * passed in (if optimisation cannot be supported) or the base of the new core + * cached region in linear address space. The returned address must be passed + * into the End function to remove the mapping when required. If a non-core + * cached memory address is passed into it the End function has no effect. + * Note that the region accessed MUST be flushed from the appropriate cache + * before the End function is called to deliver correct operation. + */ +void *__TBICoreCacheOpt( const void *pStart, int Bytes, int SegType, int Mode ); +void __TBICoreCacheEnd( const void *pOpt, int Bytes, int SegType ); + +/* + * Optimise physical access channel and flush side effects before releasing + * the channel. If pStart is NULL the whole region must be flushed and this is + * done automatically by the channel release function if optimisation is + * enabled. Flushing the specific region that may have been accessed before + * release should optimises this process. On physically cached systems we do + * not flush the code/data caches only the MMU table data needs flushing. + */ +void __TBIPhysOptim( int Channel, int IMode, int DMode ); +void __TBIPhysFlush( int Channel, const void *pStart, int Bytes ); +#endif +#endif /* ifdef TBI_1_3 */ + +#endif /* _ASM_METAG_TBX_H_ */ diff --git a/arch/metag/include/asm/tcm.h b/arch/metag/include/asm/tcm.h new file mode 100644 index 000000000000..7711c317b1d2 --- /dev/null +++ b/arch/metag/include/asm/tcm.h @@ -0,0 +1,30 @@ +#ifndef __ASM_TCM_H__ +#define __ASM_TCM_H__ + +#include <linux/ioport.h> +#include <linux/list.h> + +struct tcm_allocation { + struct list_head list; + unsigned int tag; + unsigned long addr; + unsigned long size; +}; + +/* + * TCM memory region descriptor. + */ +struct tcm_region { + unsigned int tag; + struct resource res; +}; + +#define TCM_INVALID_TAG 0xffffffff + +unsigned long tcm_alloc(unsigned int tag, size_t len); +void tcm_free(unsigned int tag, unsigned long addr, size_t len); +unsigned int tcm_lookup_tag(unsigned long p); + +int tcm_add_region(struct tcm_region *reg); + +#endif diff --git a/arch/metag/include/asm/thread_info.h b/arch/metag/include/asm/thread_info.h new file mode 100644 index 000000000000..0ecd34d8b5f6 --- /dev/null +++ b/arch/metag/include/asm/thread_info.h @@ -0,0 +1,155 @@ +/* thread_info.h: Meta low-level thread information + * + * Copyright (C) 2002 David Howells (dhowells@redhat.com) + * - Incorporating suggestions made by Linus Torvalds and Dave Miller + * + * Meta port by Imagination Technologies + */ + +#ifndef _ASM_THREAD_INFO_H +#define _ASM_THREAD_INFO_H + +#include <linux/compiler.h> +#include <asm/page.h> + +#ifndef __ASSEMBLY__ +#include <asm/processor.h> +#endif + +/* + * low level task data that entry.S needs immediate access to + * - this struct should fit entirely inside of one cache line + * - this struct shares the supervisor stack pages + * - if the contents of this structure are changed, the assembly constants must + * also be changed + */ +#ifndef __ASSEMBLY__ + +/* This must be 8 byte aligned so we can ensure stack alignment. */ +struct thread_info { + struct task_struct *task; /* main task structure */ + struct exec_domain *exec_domain; /* execution domain */ + unsigned long flags; /* low level flags */ + unsigned long status; /* thread-synchronous flags */ + u32 cpu; /* current CPU */ + int preempt_count; /* 0 => preemptable, <0 => BUG */ + + mm_segment_t addr_limit; /* thread address space */ + struct restart_block restart_block; + + u8 supervisor_stack[0]; +}; + +#else /* !__ASSEMBLY__ */ + +#include <generated/asm-offsets.h> + +#endif + +#define PREEMPT_ACTIVE 0x10000000 + +#ifdef CONFIG_4KSTACKS +#define THREAD_SHIFT 12 +#else +#define THREAD_SHIFT 13 +#endif + +#if THREAD_SHIFT >= PAGE_SHIFT +#define THREAD_SIZE_ORDER (THREAD_SHIFT - PAGE_SHIFT) +#else +#define THREAD_SIZE_ORDER 0 +#endif + +#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) + +#define STACK_WARN (THREAD_SIZE/8) +/* + * macros/functions for gaining access to the thread information structure + */ +#ifndef __ASSEMBLY__ + +#define INIT_THREAD_INFO(tsk) \ +{ \ + .task = &tsk, \ + .exec_domain = &default_exec_domain, \ + .flags = 0, \ + .cpu = 0, \ + .preempt_count = INIT_PREEMPT_COUNT, \ + .addr_limit = KERNEL_DS, \ + .restart_block = { \ + .fn = do_no_restart_syscall, \ + }, \ +} + +#define init_thread_info (init_thread_union.thread_info) +#define init_stack (init_thread_union.stack) + +/* how to get the current stack pointer from C */ +register unsigned long current_stack_pointer asm("A0StP") __used; + +/* how to get the thread information struct from C */ +static inline struct thread_info *current_thread_info(void) +{ + return (struct thread_info *)(current_stack_pointer & + ~(THREAD_SIZE - 1)); +} + +#define __HAVE_ARCH_KSTACK_END +static inline int kstack_end(void *addr) +{ + return addr == (void *) (((unsigned long) addr & ~(THREAD_SIZE - 1)) + + sizeof(struct thread_info)); +} + +#endif + +/* + * thread information flags + * - these are process state flags that various assembly files may need to + * access + * - pending work-to-be-done flags are in LSW + * - other flags in MSW + */ +#define TIF_SYSCALL_TRACE 0 /* syscall trace active */ +#define TIF_SIGPENDING 1 /* signal pending */ +#define TIF_NEED_RESCHED 2 /* rescheduling necessary */ +#define TIF_SINGLESTEP 3 /* restore singlestep on return to user + mode */ +#define TIF_SYSCALL_AUDIT 4 /* syscall auditing active */ +#define TIF_SECCOMP 5 /* secure computing */ +#define TIF_RESTORE_SIGMASK 6 /* restore signal mask in do_signal() */ +#define TIF_NOTIFY_RESUME 7 /* callback before returning to user */ +#define TIF_POLLING_NRFLAG 8 /* true if poll_idle() is polling + TIF_NEED_RESCHED */ +#define TIF_MEMDIE 9 /* is terminating due to OOM killer */ +#define TIF_SYSCALL_TRACEPOINT 10 /* syscall tracepoint instrumentation */ + + +#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE) +#define _TIF_SIGPENDING (1<<TIF_SIGPENDING) +#define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED) +#define _TIF_SINGLESTEP (1<<TIF_SINGLESTEP) +#define _TIF_SYSCALL_AUDIT (1<<TIF_SYSCALL_AUDIT) +#define _TIF_SECCOMP (1<<TIF_SECCOMP) +#define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME) +#define _TIF_RESTORE_SIGMASK (1<<TIF_RESTORE_SIGMASK) +#define _TIF_SYSCALL_TRACEPOINT (1<<TIF_SYSCALL_TRACEPOINT) + +/* work to do in syscall trace */ +#define _TIF_WORK_SYSCALL_MASK (_TIF_SYSCALL_TRACE | _TIF_SINGLESTEP | \ + _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \ + _TIF_SYSCALL_TRACEPOINT) + +/* work to do on any return to u-space */ +#define _TIF_ALLWORK_MASK (_TIF_SYSCALL_TRACE | _TIF_SIGPENDING | \ + _TIF_NEED_RESCHED | _TIF_SYSCALL_AUDIT | \ + _TIF_SINGLESTEP | _TIF_RESTORE_SIGMASK | \ + _TIF_NOTIFY_RESUME) + +/* work to do on interrupt/exception return */ +#define _TIF_WORK_MASK (_TIF_ALLWORK_MASK & ~(_TIF_SYSCALL_TRACE | \ + _TIF_SYSCALL_AUDIT | _TIF_SINGLESTEP)) + +#define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG) + +#endif /* _ASM_THREAD_INFO_H */ diff --git a/arch/metag/include/asm/tlb.h b/arch/metag/include/asm/tlb.h new file mode 100644 index 000000000000..048282f1cc1f --- /dev/null +++ b/arch/metag/include/asm/tlb.h @@ -0,0 +1,36 @@ +#ifndef __ASM_METAG_TLB_H +#define __ASM_METAG_TLB_H + +#include <asm/cacheflush.h> +#include <asm/page.h> + +/* Note, read http://lkml.org/lkml/2004/1/15/6 */ + +#ifdef CONFIG_METAG_META12 + +#define tlb_start_vma(tlb, vma) \ + do { \ + if (!tlb->fullmm) \ + flush_cache_range(vma, vma->vm_start, vma->vm_end); \ + } while (0) + +#define tlb_end_vma(tlb, vma) \ + do { \ + if (!tlb->fullmm) \ + flush_tlb_range(vma, vma->vm_start, vma->vm_end); \ + } while (0) + + +#else + +#define tlb_start_vma(tlb, vma) do { } while (0) +#define tlb_end_vma(tlb, vma) do { } while (0) + +#endif + +#define __tlb_remove_tlb_entry(tlb, pte, addr) do { } while (0) +#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm) + +#include <asm-generic/tlb.h> + +#endif diff --git a/arch/metag/include/asm/tlbflush.h b/arch/metag/include/asm/tlbflush.h new file mode 100644 index 000000000000..566acf918a64 --- /dev/null +++ b/arch/metag/include/asm/tlbflush.h @@ -0,0 +1,77 @@ +#ifndef __ASM_METAG_TLBFLUSH_H +#define __ASM_METAG_TLBFLUSH_H + +#include <linux/io.h> +#include <linux/sched.h> +#include <asm/metag_mem.h> +#include <asm/pgalloc.h> + +/* + * TLB flushing: + * + * - flush_tlb() flushes the current mm struct TLBs + * - flush_tlb_all() flushes all processes TLBs + * - flush_tlb_mm(mm) flushes the specified mm context TLB's + * - flush_tlb_page(vma, vmaddr) flushes one page + * - flush_tlb_range(mm, start, end) flushes a range of pages + * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages + * - flush_tlb_pgtables(mm, start, end) flushes a range of page tables + * + * FIXME: Meta 2 can flush single TLB entries. + * + */ + +#if defined(CONFIG_METAG_META21) && !defined(CONFIG_SMP) +static inline void __flush_tlb(void) +{ + /* flush TLB entries for just the current hardware thread */ + int thread = hard_processor_id(); + metag_out32(0, (LINSYSCFLUSH_TxMMCU_BASE + + LINSYSCFLUSH_TxMMCU_STRIDE * thread)); +} +#else +static inline void __flush_tlb(void) +{ + /* flush TLB entries for all hardware threads */ + metag_out32(0, LINSYSCFLUSH_MMCU); +} +#endif /* defined(CONFIG_METAG_META21) && !defined(CONFIG_SMP) */ + +#define flush_tlb() __flush_tlb() + +#define flush_tlb_all() __flush_tlb() + +#define local_flush_tlb_all() __flush_tlb() + +static inline void flush_tlb_mm(struct mm_struct *mm) +{ + if (mm == current->active_mm) + __flush_tlb(); +} + +static inline void flush_tlb_page(struct vm_area_struct *vma, + unsigned long addr) +{ + flush_tlb_mm(vma->vm_mm); +} + +static inline void flush_tlb_range(struct vm_area_struct *vma, + unsigned long start, unsigned long end) +{ + flush_tlb_mm(vma->vm_mm); +} + +static inline void flush_tlb_pgtables(struct mm_struct *mm, + unsigned long start, unsigned long end) +{ + flush_tlb_mm(mm); +} + +static inline void flush_tlb_kernel_range(unsigned long start, + unsigned long end) +{ + flush_tlb_all(); +} + +#endif /* __ASM_METAG_TLBFLUSH_H */ + diff --git a/arch/metag/include/asm/topology.h b/arch/metag/include/asm/topology.h new file mode 100644 index 000000000000..23f5118f58db --- /dev/null +++ b/arch/metag/include/asm/topology.h @@ -0,0 +1,53 @@ +#ifndef _ASM_METAG_TOPOLOGY_H +#define _ASM_METAG_TOPOLOGY_H + +#ifdef CONFIG_NUMA + +/* sched_domains SD_NODE_INIT for Meta machines */ +#define SD_NODE_INIT (struct sched_domain) { \ + .parent = NULL, \ + .child = NULL, \ + .groups = NULL, \ + .min_interval = 8, \ + .max_interval = 32, \ + .busy_factor = 32, \ + .imbalance_pct = 125, \ + .cache_nice_tries = 2, \ + .busy_idx = 3, \ + .idle_idx = 2, \ + .newidle_idx = 0, \ + .wake_idx = 0, \ + .forkexec_idx = 0, \ + .flags = SD_LOAD_BALANCE \ + | SD_BALANCE_FORK \ + | SD_BALANCE_EXEC \ + | SD_BALANCE_NEWIDLE \ + | SD_SERIALIZE, \ + .last_balance = jiffies, \ + .balance_interval = 1, \ + .nr_balance_failed = 0, \ +} + +#define cpu_to_node(cpu) ((void)(cpu), 0) +#define parent_node(node) ((void)(node), 0) + +#define cpumask_of_node(node) ((void)node, cpu_online_mask) + +#define pcibus_to_node(bus) ((void)(bus), -1) +#define cpumask_of_pcibus(bus) (pcibus_to_node(bus) == -1 ? \ + cpu_all_mask : \ + cpumask_of_node(pcibus_to_node(bus))) + +#endif + +#define mc_capable() (1) + +const struct cpumask *cpu_coregroup_mask(unsigned int cpu); + +extern cpumask_t cpu_core_map[NR_CPUS]; + +#define topology_core_cpumask(cpu) (&cpu_core_map[cpu]) + +#include <asm-generic/topology.h> + +#endif /* _ASM_METAG_TOPOLOGY_H */ diff --git a/arch/metag/include/asm/traps.h b/arch/metag/include/asm/traps.h new file mode 100644 index 000000000000..ac808740bd84 --- /dev/null +++ b/arch/metag/include/asm/traps.h @@ -0,0 +1,48 @@ +/* + * Copyright (C) 2005,2008 Imagination Technologies + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file COPYING in the main directory of this archive + * for more details. + */ + +#ifndef _METAG_TBIVECTORS_H +#define _METAG_TBIVECTORS_H + +#ifndef __ASSEMBLY__ + +#include <asm/tbx.h> + +typedef TBIRES (*kick_irq_func_t)(TBIRES, int, int, int, PTBI, int *); + +extern TBIRES kick_handler(TBIRES, int, int, int, PTBI); +struct kick_irq_handler { + struct list_head list; + kick_irq_func_t func; +}; + +extern void kick_register_func(struct kick_irq_handler *); +extern void kick_unregister_func(struct kick_irq_handler *); + +extern void head_end(TBIRES, unsigned long); +extern void restart_critical_section(TBIRES State); +extern TBIRES tail_end_sys(TBIRES, int, int *); +static inline TBIRES tail_end(TBIRES state) +{ + return tail_end_sys(state, -1, NULL); +} + +DECLARE_PER_CPU(PTBI, pTBI); +extern PTBI pTBI_get(unsigned int); + +extern int ret_from_fork(TBIRES arg); + +extern int do_page_fault(struct pt_regs *regs, unsigned long address, + unsigned int write_access, unsigned int trapno); + +extern TBIRES __TBIUnExpXXX(TBIRES State, int SigNum, int Triggers, int Inst, + PTBI pTBI); + +#endif + +#endif /* _METAG_TBIVECTORS_H */ diff --git a/arch/metag/include/asm/uaccess.h b/arch/metag/include/asm/uaccess.h new file mode 100644 index 000000000000..0748b0a97986 --- /dev/null +++ b/arch/metag/include/asm/uaccess.h @@ -0,0 +1,241 @@ +#ifndef __METAG_UACCESS_H +#define __METAG_UACCESS_H + +/* + * User space memory access functions + */ +#include <linux/sched.h> + +#define VERIFY_READ 0 +#define VERIFY_WRITE 1 + +/* + * The fs value determines whether argument validity checking should be + * performed or not. If get_fs() == USER_DS, checking is performed, with + * get_fs() == KERNEL_DS, checking is bypassed. + * + * For historical reasons, these macros are grossly misnamed. + */ + +#define MAKE_MM_SEG(s) ((mm_segment_t) { (s) }) + +#define KERNEL_DS MAKE_MM_SEG(0xFFFFFFFF) +#define USER_DS MAKE_MM_SEG(PAGE_OFFSET) + +#define get_ds() (KERNEL_DS) +#define get_fs() (current_thread_info()->addr_limit) +#define set_fs(x) (current_thread_info()->addr_limit = (x)) + +#define segment_eq(a, b) ((a).seg == (b).seg) + +#define __kernel_ok (segment_eq(get_fs(), KERNEL_DS)) +/* + * Explicitly allow NULL pointers here. Parts of the kernel such + * as readv/writev use access_ok to validate pointers, but want + * to allow NULL pointers for various reasons. NULL pointers are + * safe to allow through because the first page is not mappable on + * Meta. + * + * We also wish to avoid letting user code access the system area + * and the kernel half of the address space. + */ +#define __user_bad(addr, size) (((addr) > 0 && (addr) < META_MEMORY_BASE) || \ + ((addr) > PAGE_OFFSET && \ + (addr) < LINCORE_BASE)) + +static inline int __access_ok(unsigned long addr, unsigned long size) +{ + return __kernel_ok || !__user_bad(addr, size); +} + +#define access_ok(type, addr, size) __access_ok((unsigned long)(addr), \ + (unsigned long)(size)) + +static inline int verify_area(int type, const void *addr, unsigned long size) +{ + return access_ok(type, addr, size) ? 0 : -EFAULT; +} + +/* + * The exception table consists of pairs of addresses: the first is the + * address of an instruction that is allowed to fault, and the second is + * the address at which the program should continue. No registers are + * modified, so it is entirely up to the continuation code to figure out + * what to do. + * + * All the routines below use bits of fixup code that are out of line + * with the main instruction path. This means when everything is well, + * we don't even have to jump over them. Further, they do not intrude + * on our cache or tlb entries. + */ +struct exception_table_entry { + unsigned long insn, fixup; +}; + +extern int fixup_exception(struct pt_regs *regs); + +/* + * These are the main single-value transfer routines. They automatically + * use the right size if we just have the right pointer type. + */ + +#define put_user(x, ptr) \ + __put_user_check((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr))) +#define __put_user(x, ptr) \ + __put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr))) + +extern void __put_user_bad(void); + +#define __put_user_nocheck(x, ptr, size) \ +({ \ + long __pu_err; \ + __put_user_size((x), (ptr), (size), __pu_err); \ + __pu_err; \ +}) + +#define __put_user_check(x, ptr, size) \ +({ \ + long __pu_err = -EFAULT; \ + __typeof__(*(ptr)) __user *__pu_addr = (ptr); \ + if (access_ok(VERIFY_WRITE, __pu_addr, size)) \ + __put_user_size((x), __pu_addr, (size), __pu_err); \ + __pu_err; \ +}) + +extern long __put_user_asm_b(unsigned int x, void __user *addr); +extern long __put_user_asm_w(unsigned int x, void __user *addr); +extern long __put_user_asm_d(unsigned int x, void __user *addr); +extern long __put_user_asm_l(unsigned long long x, void __user *addr); + +#define __put_user_size(x, ptr, size, retval) \ +do { \ + retval = 0; \ + switch (size) { \ + case 1: \ + retval = __put_user_asm_b((unsigned int)x, ptr); break; \ + case 2: \ + retval = __put_user_asm_w((unsigned int)x, ptr); break; \ + case 4: \ + retval = __put_user_asm_d((unsigned int)x, ptr); break; \ + case 8: \ + retval = __put_user_asm_l((unsigned long long)x, ptr); break; \ + default: \ + __put_user_bad(); \ + } \ +} while (0) + +#define get_user(x, ptr) \ + __get_user_check((x), (ptr), sizeof(*(ptr))) +#define __get_user(x, ptr) \ + __get_user_nocheck((x), (ptr), sizeof(*(ptr))) + +extern long __get_user_bad(void); + +#define __get_user_nocheck(x, ptr, size) \ +({ \ + long __gu_err, __gu_val; \ + __get_user_size(__gu_val, (ptr), (size), __gu_err); \ + (x) = (__typeof__(*(ptr)))__gu_val; \ + __gu_err; \ +}) + +#define __get_user_check(x, ptr, size) \ +({ \ + long __gu_err = -EFAULT, __gu_val = 0; \ + const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ + if (access_ok(VERIFY_READ, __gu_addr, size)) \ + __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \ + (x) = (__typeof__(*(ptr)))__gu_val; \ + __gu_err; \ +}) + +extern unsigned char __get_user_asm_b(const void __user *addr, long *err); +extern unsigned short __get_user_asm_w(const void __user *addr, long *err); +extern unsigned int __get_user_asm_d(const void __user *addr, long *err); + +#define __get_user_size(x, ptr, size, retval) \ +do { \ + retval = 0; \ + switch (size) { \ + case 1: \ + x = __get_user_asm_b(ptr, &retval); break; \ + case 2: \ + x = __get_user_asm_w(ptr, &retval); break; \ + case 4: \ + x = __get_user_asm_d(ptr, &retval); break; \ + default: \ + (x) = __get_user_bad(); \ + } \ +} while (0) + +/* + * Copy a null terminated string from userspace. + * + * Must return: + * -EFAULT for an exception + * count if we hit the buffer limit + * bytes copied if we hit a null byte + * (without the null byte) + */ + +extern long __must_check __strncpy_from_user(char *dst, const char __user *src, + long count); + +#define strncpy_from_user(dst, src, count) __strncpy_from_user(dst, src, count) + +/* + * Return the size of a string (including the ending 0) + * + * Return 0 on exception, a value greater than N if too long + */ +extern long __must_check strnlen_user(const char __user *src, long count); + +#define strlen_user(str) strnlen_user(str, 32767) + +extern unsigned long __must_check __copy_user_zeroing(void *to, + const void __user *from, + unsigned long n); + +static inline unsigned long +copy_from_user(void *to, const void __user *from, unsigned long n) +{ + if (access_ok(VERIFY_READ, from, n)) + return __copy_user_zeroing(to, from, n); + return n; +} + +#define __copy_from_user(to, from, n) __copy_user_zeroing(to, from, n) +#define __copy_from_user_inatomic __copy_from_user + +extern unsigned long __must_check __copy_user(void __user *to, + const void *from, + unsigned long n); + +static inline unsigned long copy_to_user(void __user *to, const void *from, + unsigned long n) +{ + if (access_ok(VERIFY_WRITE, to, n)) + return __copy_user(to, from, n); + return n; +} + +#define __copy_to_user(to, from, n) __copy_user(to, from, n) +#define __copy_to_user_inatomic __copy_to_user + +/* + * Zero Userspace + */ + +extern unsigned long __must_check __do_clear_user(void __user *to, + unsigned long n); + +static inline unsigned long clear_user(void __user *to, unsigned long n) +{ + if (access_ok(VERIFY_WRITE, to, n)) + return __do_clear_user(to, n); + return n; +} + +#define __clear_user(to, n) __do_clear_user(to, n) + +#endif /* _METAG_UACCESS_H */ diff --git a/arch/metag/include/asm/unistd.h b/arch/metag/include/asm/unistd.h new file mode 100644 index 000000000000..32955a18fb32 --- /dev/null +++ b/arch/metag/include/asm/unistd.h @@ -0,0 +1,12 @@ +/* + * Copyright (C) 2012 Imagination Technologies Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <uapi/asm/unistd.h> + +#define __ARCH_WANT_SYS_CLONE diff --git a/arch/metag/include/asm/user_gateway.h b/arch/metag/include/asm/user_gateway.h new file mode 100644 index 000000000000..e404c09e3b74 --- /dev/null +++ b/arch/metag/include/asm/user_gateway.h @@ -0,0 +1,44 @@ +/* + * Copyright (C) 2010 Imagination Technologies + */ + +#ifndef __ASM_METAG_USER_GATEWAY_H +#define __ASM_METAG_USER_GATEWAY_H + +#include <asm/page.h> + +/* Page of kernel code accessible to userspace. */ +#define USER_GATEWAY_PAGE 0x6ffff000 +/* Offset of TLS pointer array in gateway page. */ +#define USER_GATEWAY_TLS 0x100 + +#ifndef __ASSEMBLY__ + +extern char __user_gateway_start; +extern char __user_gateway_end; + +/* Kernel mapping of the gateway page. */ +extern void *gateway_page; + +static inline void set_gateway_tls(void __user *tls_ptr) +{ + void **gateway_tls = (void **)(gateway_page + USER_GATEWAY_TLS + + hard_processor_id() * 4); + + *gateway_tls = (__force void *)tls_ptr; +#ifdef CONFIG_METAG_META12 + /* Avoid cache aliases on virtually tagged cache. */ + __builtin_dcache_flush((void *)USER_GATEWAY_PAGE + USER_GATEWAY_TLS + + hard_processor_id() * sizeof(void *)); +#endif +} + +extern int __kuser_get_tls(void); +extern char *__kuser_get_tls_end[]; + +extern int __kuser_cmpxchg(int, int, unsigned long *); +extern char *__kuser_cmpxchg_end[]; + +#endif + +#endif diff --git a/arch/metag/include/uapi/asm/Kbuild b/arch/metag/include/uapi/asm/Kbuild new file mode 100644 index 000000000000..876c71f866de --- /dev/null +++ b/arch/metag/include/uapi/asm/Kbuild @@ -0,0 +1,13 @@ +# UAPI Header export list +include include/uapi/asm-generic/Kbuild.asm + +header-y += byteorder.h +header-y += ptrace.h +header-y += resource.h +header-y += sigcontext.h +header-y += siginfo.h +header-y += swab.h +header-y += unistd.h + +generic-y += mman.h +generic-y += setup.h diff --git a/arch/metag/include/uapi/asm/byteorder.h b/arch/metag/include/uapi/asm/byteorder.h new file mode 100644 index 000000000000..9558416d578b --- /dev/null +++ b/arch/metag/include/uapi/asm/byteorder.h @@ -0,0 +1 @@ +#include <linux/byteorder/little_endian.h> diff --git a/arch/metag/include/uapi/asm/ptrace.h b/arch/metag/include/uapi/asm/ptrace.h new file mode 100644 index 000000000000..45d97809d33e --- /dev/null +++ b/arch/metag/include/uapi/asm/ptrace.h @@ -0,0 +1,113 @@ +#ifndef _UAPI_METAG_PTRACE_H +#define _UAPI_METAG_PTRACE_H + +#ifndef __ASSEMBLY__ + +/* + * These are the layouts of the regsets returned by the GETREGSET ptrace call + */ + +/* user_gp_regs::status */ + +/* CBMarker bit (indicates catch state / catch replay) */ +#define USER_GP_REGS_STATUS_CATCH_BIT (1 << 22) +#define USER_GP_REGS_STATUS_CATCH_S 22 +/* LSM_STEP field (load/store multiple step) */ +#define USER_GP_REGS_STATUS_LSM_STEP_BITS (0x7 << 8) +#define USER_GP_REGS_STATUS_LSM_STEP_S 8 +/* SCC bit (indicates split 16x16 condition flags) */ +#define USER_GP_REGS_STATUS_SCC_BIT (1 << 4) +#define USER_GP_REGS_STATUS_SCC_S 4 + +/* normal condition flags */ +/* CF_Z bit (Zero flag) */ +#define USER_GP_REGS_STATUS_CF_Z_BIT (1 << 3) +#define USER_GP_REGS_STATUS_CF_Z_S 3 +/* CF_N bit (Negative flag) */ +#define USER_GP_REGS_STATUS_CF_N_BIT (1 << 2) +#define USER_GP_REGS_STATUS_CF_N_S 2 +/* CF_V bit (oVerflow flag) */ +#define USER_GP_REGS_STATUS_CF_V_BIT (1 << 1) +#define USER_GP_REGS_STATUS_CF_V_S 1 +/* CF_C bit (Carry flag) */ +#define USER_GP_REGS_STATUS_CF_C_BIT (1 << 0) +#define USER_GP_REGS_STATUS_CF_C_S 0 + +/* split 16x16 condition flags */ +/* SCF_LZ bit (Low Zero flag) */ +#define USER_GP_REGS_STATUS_SCF_LZ_BIT (1 << 3) +#define USER_GP_REGS_STATUS_SCF_LZ_S 3 +/* SCF_HZ bit (High Zero flag) */ +#define USER_GP_REGS_STATUS_SCF_HZ_BIT (1 << 2) +#define USER_GP_REGS_STATUS_SCF_HZ_S 2 +/* SCF_HC bit (High Carry flag) */ +#define USER_GP_REGS_STATUS_SCF_HC_BIT (1 << 1) +#define USER_GP_REGS_STATUS_SCF_HC_S 1 +/* SCF_LC bit (Low Carry flag) */ +#define USER_GP_REGS_STATUS_SCF_LC_BIT (1 << 0) +#define USER_GP_REGS_STATUS_SCF_LC_S 0 + +/** + * struct user_gp_regs - User general purpose registers + * @dx: GP data unit regs (dx[reg][unit] = D{unit:0-1}.{reg:0-7}) + * @ax: GP address unit regs (ax[reg][unit] = A{unit:0-1}.{reg:0-3}) + * @pc: PC register + * @status: TXSTATUS register (condition flags, LSM_STEP etc) + * @rpt: TXRPT registers (branch repeat counter) + * @bpobits: TXBPOBITS register ("branch prediction other" bits) + * @mode: TXMODE register + * @_pad1: Reserved padding to make sizeof obviously 64bit aligned + * + * This is the user-visible general purpose register state structure. + * + * It can be accessed through PTRACE_GETREGSET with NT_PRSTATUS. + * + * It is also used in the signal context. + */ +struct user_gp_regs { + unsigned long dx[8][2]; + unsigned long ax[4][2]; + unsigned long pc; + unsigned long status; + unsigned long rpt; + unsigned long bpobits; + unsigned long mode; + unsigned long _pad1; +}; + +/** + * struct user_cb_regs - User catch buffer registers + * @flags: TXCATCH0 register (fault flags) + * @addr: TXCATCH1 register (fault address) + * @data: TXCATCH2 and TXCATCH3 registers (low and high data word) + * + * This is the user-visible catch buffer register state structure containing + * information about a failed memory access, and allowing the access to be + * modified and replayed. + * + * It can be accessed through PTRACE_GETREGSET with NT_METAG_CBUF. + */ +struct user_cb_regs { + unsigned long flags; + unsigned long addr; + unsigned long long data; +}; + +/** + * struct user_rp_state - User read pipeline state + * @entries: Read pipeline entries + * @mask: Mask of valid pipeline entries (RPMask from TXDIVTIME register) + * + * This is the user-visible read pipeline state structure containing the entries + * currently in the read pipeline and the mask of valid entries. + * + * It can be accessed through PTRACE_GETREGSET with NT_METAG_RPIPE. + */ +struct user_rp_state { + unsigned long long entries[6]; + unsigned long mask; +}; + +#endif /* __ASSEMBLY__ */ + +#endif /* _UAPI_METAG_PTRACE_H */ diff --git a/arch/metag/include/uapi/asm/resource.h b/arch/metag/include/uapi/asm/resource.h new file mode 100644 index 000000000000..526d23cc3054 --- /dev/null +++ b/arch/metag/include/uapi/asm/resource.h @@ -0,0 +1,7 @@ +#ifndef _UAPI_METAG_RESOURCE_H +#define _UAPI_METAG_RESOURCE_H + +#define _STK_LIM_MAX (1 << 28) +#include <asm-generic/resource.h> + +#endif /* _UAPI_METAG_RESOURCE_H */ diff --git a/arch/metag/include/uapi/asm/sigcontext.h b/arch/metag/include/uapi/asm/sigcontext.h new file mode 100644 index 000000000000..ef79a910c1c4 --- /dev/null +++ b/arch/metag/include/uapi/asm/sigcontext.h @@ -0,0 +1,31 @@ +#ifndef _ASM_METAG_SIGCONTEXT_H +#define _ASM_METAG_SIGCONTEXT_H + +#include <asm/ptrace.h> + +/* + * In a sigcontext structure we need to store the active state of the + * user process so that it does not get trashed when we call the signal + * handler. That not really the same as a user context that we are + * going to store on syscall etc. + */ +struct sigcontext { + struct user_gp_regs regs; /* needs to be first */ + + /* + * Catch registers describing a memory fault. + * If USER_GP_REGS_STATUS_CATCH_BIT is set in regs.status then catch + * buffers have been saved and will be replayed on sigreturn. + * Clear that bit to discard the catch state instead of replaying it. + */ + struct user_cb_regs cb; + + /* + * Read pipeline state. This will get restored on sigreturn. + */ + struct user_rp_state rp; + + unsigned long oldmask; +}; + +#endif diff --git a/arch/metag/include/uapi/asm/siginfo.h b/arch/metag/include/uapi/asm/siginfo.h new file mode 100644 index 000000000000..b2e0c8b62aef --- /dev/null +++ b/arch/metag/include/uapi/asm/siginfo.h @@ -0,0 +1,8 @@ +#ifndef _METAG_SIGINFO_H +#define _METAG_SIGINFO_H + +#define __ARCH_SI_TRAPNO + +#include <asm-generic/siginfo.h> + +#endif diff --git a/arch/metag/include/uapi/asm/swab.h b/arch/metag/include/uapi/asm/swab.h new file mode 100644 index 000000000000..1076b3a6387a --- /dev/null +++ b/arch/metag/include/uapi/asm/swab.h @@ -0,0 +1,26 @@ +#ifndef __ASM_METAG_SWAB_H +#define __ASM_METAG_SWAB_H + +#include <linux/compiler.h> +#include <linux/types.h> +#include <asm-generic/swab.h> + +static inline __attribute_const__ __u16 __arch_swab16(__u16 x) +{ + return __builtin_metag_bswaps(x); +} +#define __arch_swab16 __arch_swab16 + +static inline __attribute_const__ __u32 __arch_swab32(__u32 x) +{ + return __builtin_metag_bswap(x); +} +#define __arch_swab32 __arch_swab32 + +static inline __attribute_const__ __u64 __arch_swab64(__u64 x) +{ + return __builtin_metag_bswapll(x); +} +#define __arch_swab64 __arch_swab64 + +#endif /* __ASM_METAG_SWAB_H */ diff --git a/arch/metag/include/uapi/asm/unistd.h b/arch/metag/include/uapi/asm/unistd.h new file mode 100644 index 000000000000..b80b8e899d22 --- /dev/null +++ b/arch/metag/include/uapi/asm/unistd.h @@ -0,0 +1,21 @@ +/* + * Copyright (C) 2012 Imagination Technologies Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +/* Use the standard ABI for syscalls. */ +#include <asm-generic/unistd.h> + +/* metag-specific syscalls. */ +#define __NR_metag_setglobalbit (__NR_arch_specific_syscall + 1) +__SYSCALL(__NR_metag_setglobalbit, sys_metag_setglobalbit) +#define __NR_metag_set_fpu_flags (__NR_arch_specific_syscall + 2) +__SYSCALL(__NR_metag_set_fpu_flags, sys_metag_set_fpu_flags) +#define __NR_metag_set_tls (__NR_arch_specific_syscall + 3) +__SYSCALL(__NR_metag_set_tls, sys_metag_set_tls) +#define __NR_metag_get_tls (__NR_arch_specific_syscall + 4) +__SYSCALL(__NR_metag_get_tls, sys_metag_get_tls) diff --git a/arch/metag/kernel/.gitignore b/arch/metag/kernel/.gitignore new file mode 100644 index 000000000000..c5f676c3c224 --- /dev/null +++ b/arch/metag/kernel/.gitignore @@ -0,0 +1 @@ +vmlinux.lds diff --git a/arch/metag/kernel/Makefile b/arch/metag/kernel/Makefile new file mode 100644 index 000000000000..d7675f4a5df8 --- /dev/null +++ b/arch/metag/kernel/Makefile @@ -0,0 +1,39 @@ +# +# Makefile for the Linux/Meta kernel. +# + +extra-y += head.o +extra-y += vmlinux.lds + +obj-y += cachepart.o +obj-y += clock.o +obj-y += core_reg.o +obj-y += devtree.o +obj-y += dma.o +obj-y += irq.o +obj-y += kick.o +obj-y += machines.o +obj-y += process.o +obj-y += ptrace.o +obj-y += setup.o +obj-y += signal.o +obj-y += stacktrace.o +obj-y += sys_metag.o +obj-y += tbiunexp.o +obj-y += time.o +obj-y += topology.o +obj-y += traps.o +obj-y += user_gateway.o + +obj-$(CONFIG_PERF_EVENTS) += perf/ + +obj-$(CONFIG_METAG_COREMEM) += coremem.o +obj-$(CONFIG_METAG_DA) += da.o +obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o +obj-$(CONFIG_FUNCTION_TRACER) += ftrace_stub.o +obj-$(CONFIG_MODULES) += metag_ksyms.o +obj-$(CONFIG_MODULES) += module.o +obj-$(CONFIG_PERF_EVENTS) += perf_callchain.o +obj-$(CONFIG_SMP) += smp.o +obj-$(CONFIG_METAG_SUSPEND_MEM) += suspend.o +obj-$(CONFIG_METAG_USER_TCM) += tcm.o diff --git a/arch/metag/kernel/asm-offsets.c b/arch/metag/kernel/asm-offsets.c new file mode 100644 index 000000000000..bfc9205f9647 --- /dev/null +++ b/arch/metag/kernel/asm-offsets.c @@ -0,0 +1,14 @@ +/* + * This program is used to generate definitions needed by + * assembly language modules. + * + */ + +#include <linux/kbuild.h> +#include <linux/thread_info.h> + +int main(void) +{ + DEFINE(THREAD_INFO_SIZE, sizeof(struct thread_info)); + return 0; +} diff --git a/arch/metag/kernel/cachepart.c b/arch/metag/kernel/cachepart.c new file mode 100644 index 000000000000..3a589dfb966b --- /dev/null +++ b/arch/metag/kernel/cachepart.c @@ -0,0 +1,124 @@ +/* + * Meta cache partition manipulation. + * + * Copyright 2010 Imagination Technologies Ltd. + */ + +#include <linux/kernel.h> +#include <linux/io.h> +#include <linux/errno.h> +#include <asm/processor.h> +#include <asm/cachepart.h> +#include <asm/metag_isa.h> +#include <asm/metag_mem.h> + +#define SYSC_DCPART(n) (SYSC_DCPART0 + SYSC_xCPARTn_STRIDE * (n)) +#define SYSC_ICPART(n) (SYSC_ICPART0 + SYSC_xCPARTn_STRIDE * (n)) + +#define CACHE_ASSOCIATIVITY 4 /* 4 way set-assosiative */ +#define ICACHE 0 +#define DCACHE 1 + +/* The CORE_CONFIG2 register is not available on Meta 1 */ +#ifdef CONFIG_METAG_META21 +unsigned int get_dcache_size(void) +{ + unsigned int config2 = metag_in32(METAC_CORE_CONFIG2); + return 0x1000 << ((config2 & METAC_CORECFG2_DCSZ_BITS) + >> METAC_CORECFG2_DCSZ_S); +} + +unsigned int get_icache_size(void) +{ + unsigned int config2 = metag_in32(METAC_CORE_CONFIG2); + return 0x1000 << ((config2 & METAC_CORE_C2ICSZ_BITS) + >> METAC_CORE_C2ICSZ_S); +} + +unsigned int get_global_dcache_size(void) +{ + unsigned int cpart = metag_in32(SYSC_DCPART(hard_processor_id())); + unsigned int temp = cpart & SYSC_xCPARTG_AND_BITS; + return (get_dcache_size() * ((temp >> SYSC_xCPARTG_AND_S) + 1)) >> 4; +} + +unsigned int get_global_icache_size(void) +{ + unsigned int cpart = metag_in32(SYSC_ICPART(hard_processor_id())); + unsigned int temp = cpart & SYSC_xCPARTG_AND_BITS; + return (get_icache_size() * ((temp >> SYSC_xCPARTG_AND_S) + 1)) >> 4; +} + +static unsigned int get_thread_cache_size(unsigned int cache, int thread_id) +{ + unsigned int cache_size; + unsigned int t_cache_part; + unsigned int isEnabled; + unsigned int offset = 0; + isEnabled = (cache == DCACHE ? metag_in32(MMCU_DCACHE_CTRL_ADDR) & 0x1 : + metag_in32(MMCU_ICACHE_CTRL_ADDR) & 0x1); + if (!isEnabled) + return 0; +#if PAGE_OFFSET >= LINGLOBAL_BASE + /* Checking for global cache */ + cache_size = (cache == DCACHE ? get_global_dache_size() : + get_global_icache_size()); + offset = 8; +#else + cache_size = (cache == DCACHE ? get_dcache_size() : + get_icache_size()); +#endif + t_cache_part = (cache == DCACHE ? + (metag_in32(SYSC_DCPART(thread_id)) >> offset) & 0xF : + (metag_in32(SYSC_ICPART(thread_id)) >> offset) & 0xF); + switch (t_cache_part) { + case 0xF: + return cache_size; + case 0x7: + return cache_size / 2; + case 0x3: + return cache_size / 4; + case 0x1: + return cache_size / 8; + case 0: + return cache_size / 16; + } + return -1; +} + +void check_for_cache_aliasing(int thread_id) +{ + unsigned int thread_cache_size; + unsigned int cache_type; + for (cache_type = ICACHE; cache_type <= DCACHE; cache_type++) { + thread_cache_size = + get_thread_cache_size(cache_type, thread_id); + if (thread_cache_size < 0) + pr_emerg("Can't read %s cache size", \ + cache_type ? "DCACHE" : "ICACHE"); + else if (thread_cache_size == 0) + /* Cache is off. No need to check for aliasing */ + continue; + if (thread_cache_size / CACHE_ASSOCIATIVITY > PAGE_SIZE) { + pr_emerg("Cache aliasing detected in %s on Thread %d", + cache_type ? "DCACHE" : "ICACHE", thread_id); + pr_warn("Total %s size: %u bytes", + cache_type ? "DCACHE" : "ICACHE ", + cache_type ? get_dcache_size() + : get_icache_size()); + pr_warn("Thread %s size: %d bytes", + cache_type ? "CACHE" : "ICACHE", + thread_cache_size); + pr_warn("Page Size: %lu bytes", PAGE_SIZE); + } + } +} + +#else + +void check_for_cache_aliasing(int thread_id) +{ + return; +} + +#endif diff --git a/arch/metag/kernel/clock.c b/arch/metag/kernel/clock.c new file mode 100644 index 000000000000..defc84056f18 --- /dev/null +++ b/arch/metag/kernel/clock.c @@ -0,0 +1,53 @@ +/* + * arch/metag/kernel/clock.c + * + * Copyright (C) 2012 Imagination Technologies Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/delay.h> +#include <linux/io.h> + +#include <asm/param.h> +#include <asm/clock.h> + +struct meta_clock_desc _meta_clock; + +/* Default machine get_core_freq callback. */ +static unsigned long get_core_freq_default(void) +{ +#ifdef CONFIG_METAG_META21 + /* + * Meta 2 cores divide down the core clock for the Meta timers, so we + * can estimate the core clock from the divider. + */ + return (metag_in32(EXPAND_TIMER_DIV) + 1) * 1000000; +#else + /* + * On Meta 1 we don't know the core clock, but assuming the Meta timer + * is correct it can be estimated based on loops_per_jiffy. + */ + return (loops_per_jiffy * HZ * 5) >> 1; +#endif +} + +/** + * setup_meta_clocks() - Set up the Meta clock. + * @desc: Clock descriptor usually provided by machine description + * + * Ensures all callbacks are valid. + */ +void __init setup_meta_clocks(struct meta_clock_desc *desc) +{ + /* copy callbacks */ + if (desc) + _meta_clock = *desc; + + /* set fallback functions */ + if (!_meta_clock.get_core_freq) + _meta_clock.get_core_freq = get_core_freq_default; +} + diff --git a/arch/metag/kernel/core_reg.c b/arch/metag/kernel/core_reg.c new file mode 100644 index 000000000000..671cce8c34f2 --- /dev/null +++ b/arch/metag/kernel/core_reg.c @@ -0,0 +1,117 @@ +/* + * Support for reading and writing Meta core internal registers. + * + * Copyright (C) 2011 Imagination Technologies Ltd. + * + */ + +#include <linux/delay.h> +#include <linux/export.h> + +#include <asm/core_reg.h> +#include <asm/global_lock.h> +#include <asm/hwthread.h> +#include <asm/io.h> +#include <asm/metag_mem.h> +#include <asm/metag_regs.h> + +#define UNIT_BIT_MASK TXUXXRXRQ_UXX_BITS +#define REG_BIT_MASK TXUXXRXRQ_RX_BITS +#define THREAD_BIT_MASK TXUXXRXRQ_TX_BITS + +#define UNIT_SHIFTS TXUXXRXRQ_UXX_S +#define REG_SHIFTS TXUXXRXRQ_RX_S +#define THREAD_SHIFTS TXUXXRXRQ_TX_S + +#define UNIT_VAL(x) (((x) << UNIT_SHIFTS) & UNIT_BIT_MASK) +#define REG_VAL(x) (((x) << REG_SHIFTS) & REG_BIT_MASK) +#define THREAD_VAL(x) (((x) << THREAD_SHIFTS) & THREAD_BIT_MASK) + +/* + * core_reg_write() - modify the content of a register in a core unit. + * @unit: The unit to be modified. + * @reg: Register number within the unit. + * @thread: The thread we want to access. + * @val: The new value to write. + * + * Check asm/metag_regs.h for a list/defines of supported units (ie: TXUPC_ID, + * TXUTR_ID, etc), and regnums within the units (ie: TXMASKI_REGNUM, + * TXPOLLI_REGNUM, etc). + */ +void core_reg_write(int unit, int reg, int thread, unsigned int val) +{ + unsigned long flags; + + /* TXUCT_ID has its own memory mapped registers */ + if (unit == TXUCT_ID) { + void __iomem *cu_reg = __CU_addr(thread, reg); + metag_out32(val, cu_reg); + return; + } + + __global_lock2(flags); + + /* wait for ready */ + while (!(metag_in32(TXUXXRXRQ) & TXUXXRXRQ_DREADY_BIT)) + udelay(10); + + /* set the value to write */ + metag_out32(val, TXUXXRXDT); + + /* set the register to write */ + val = UNIT_VAL(unit) | REG_VAL(reg) | THREAD_VAL(thread); + metag_out32(val, TXUXXRXRQ); + + /* wait for finish */ + while (!(metag_in32(TXUXXRXRQ) & TXUXXRXRQ_DREADY_BIT)) + udelay(10); + + __global_unlock2(flags); +} +EXPORT_SYMBOL(core_reg_write); + +/* + * core_reg_read() - read the content of a register in a core unit. + * @unit: The unit to be modified. + * @reg: Register number within the unit. + * @thread: The thread we want to access. + * + * Check asm/metag_regs.h for a list/defines of supported units (ie: TXUPC_ID, + * TXUTR_ID, etc), and regnums within the units (ie: TXMASKI_REGNUM, + * TXPOLLI_REGNUM, etc). + */ +unsigned int core_reg_read(int unit, int reg, int thread) +{ + unsigned long flags; + unsigned int val; + + /* TXUCT_ID has its own memory mapped registers */ + if (unit == TXUCT_ID) { + void __iomem *cu_reg = __CU_addr(thread, reg); + val = metag_in32(cu_reg); + return val; + } + + __global_lock2(flags); + + /* wait for ready */ + while (!(metag_in32(TXUXXRXRQ) & TXUXXRXRQ_DREADY_BIT)) + udelay(10); + + /* set the register to read */ + val = (UNIT_VAL(unit) | REG_VAL(reg) | THREAD_VAL(thread) | + TXUXXRXRQ_RDnWR_BIT); + metag_out32(val, TXUXXRXRQ); + + /* wait for finish */ + while (!(metag_in32(TXUXXRXRQ) & TXUXXRXRQ_DREADY_BIT)) + udelay(10); + + /* read the register value */ + val = metag_in32(TXUXXRXDT); + + __global_unlock2(flags); + + return val; +} +EXPORT_SYMBOL(core_reg_read); diff --git a/arch/metag/kernel/da.c b/arch/metag/kernel/da.c new file mode 100644 index 000000000000..52aabb658fde --- /dev/null +++ b/arch/metag/kernel/da.c @@ -0,0 +1,23 @@ +/* + * Meta DA JTAG debugger control. + * + * Copyright 2012 Imagination Technologies Ltd. + */ + + +#include <linux/io.h> +#include <linux/kernel.h> +#include <asm/da.h> +#include <asm/metag_mem.h> + +bool _metag_da_present; + +int __init metag_da_probe(void) +{ + _metag_da_present = (metag_in32(T0VECINT_BHALT) == 1); + if (_metag_da_present) + pr_info("DA present\n"); + else + pr_info("DA not present\n"); + return 0; +} diff --git a/arch/metag/kernel/devtree.c b/arch/metag/kernel/devtree.c new file mode 100644 index 000000000000..7cd02529636e --- /dev/null +++ b/arch/metag/kernel/devtree.c @@ -0,0 +1,114 @@ +/* + * linux/arch/metag/kernel/devtree.c + * + * Copyright (C) 2012 Imagination Technologies Ltd. + * + * Based on ARM version: + * Copyright (C) 2009 Canonical Ltd. <jeremy.kerr@canonical.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/init.h> +#include <linux/export.h> +#include <linux/types.h> +#include <linux/bootmem.h> +#include <linux/memblock.h> +#include <linux/of.h> +#include <linux/of_fdt.h> + +#include <asm/setup.h> +#include <asm/page.h> +#include <asm/mach/arch.h> + +void __init early_init_dt_add_memory_arch(u64 base, u64 size) +{ + pr_err("%s(%llx, %llx)\n", + __func__, base, size); +} + +void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align) +{ + return alloc_bootmem_align(size, align); +} + +/** + * setup_machine_fdt - Machine setup when an dtb was passed to the kernel + * @dt: virtual address pointer to dt blob + * + * If a dtb was passed to the kernel, then use it to choose the correct + * machine_desc and to setup the system. + */ +struct machine_desc * __init setup_machine_fdt(void *dt) +{ + struct boot_param_header *devtree = dt; + struct machine_desc *mdesc, *mdesc_best = NULL; + unsigned int score, mdesc_score = ~1; + unsigned long dt_root; + const char *model; + + /* check device tree validity */ + if (be32_to_cpu(devtree->magic) != OF_DT_HEADER) + return NULL; + + /* Search the mdescs for the 'best' compatible value match */ + initial_boot_params = devtree; + dt_root = of_get_flat_dt_root(); + + for_each_machine_desc(mdesc) { + score = of_flat_dt_match(dt_root, mdesc->dt_compat); + if (score > 0 && score < mdesc_score) { + mdesc_best = mdesc; + mdesc_score = score; + } + } + if (!mdesc_best) { + const char *prop; + long size; + + pr_err("\nError: unrecognized/unsupported device tree compatible list:\n[ "); + + prop = of_get_flat_dt_prop(dt_root, "compatible", &size); + if (prop) { + while (size > 0) { + printk("'%s' ", prop); + size -= strlen(prop) + 1; + prop += strlen(prop) + 1; + } + } + printk("]\n\n"); + + dump_machine_table(); /* does not return */ + } + + model = of_get_flat_dt_prop(dt_root, "model", NULL); + if (!model) + model = of_get_flat_dt_prop(dt_root, "compatible", NULL); + if (!model) + model = "<unknown>"; + pr_info("Machine: %s, model: %s\n", mdesc_best->name, model); + + /* Retrieve various information from the /chosen node */ + of_scan_flat_dt(early_init_dt_scan_chosen, boot_command_line); + + return mdesc_best; +} + +/** + * copy_fdt - Copy device tree into non-init memory. + * + * We must copy the flattened device tree blob into non-init memory because the + * unflattened device tree will reference the strings in it directly. + */ +void __init copy_fdt(void) +{ + void *alloc = early_init_dt_alloc_memory_arch( + be32_to_cpu(initial_boot_params->totalsize), 0x40); + if (alloc) { + memcpy(alloc, initial_boot_params, + be32_to_cpu(initial_boot_params->totalsize)); + initial_boot_params = alloc; + } +} diff --git a/arch/metag/kernel/dma.c b/arch/metag/kernel/dma.c new file mode 100644 index 000000000000..8c00dedadc54 --- /dev/null +++ b/arch/metag/kernel/dma.c @@ -0,0 +1,507 @@ +/* + * Meta version derived from arch/powerpc/lib/dma-noncoherent.c + * Copyright (C) 2008 Imagination Technologies Ltd. + * + * PowerPC version derived from arch/arm/mm/consistent.c + * Copyright (C) 2001 Dan Malek (dmalek@jlc.net) + * + * Copyright (C) 2000 Russell King + * + * Consistent memory allocators. Used for DMA devices that want to + * share uncached memory with the processor core. The function return + * is the virtual address and 'dma_handle' is the physical address. + * Mostly stolen from the ARM port, with some changes for PowerPC. + * -- Dan + * + * Reorganized to get rid of the arch-specific consistent_* functions + * and provide non-coherent implementations for the DMA API. -Matt + * + * Added in_interrupt() safe dma_alloc_coherent()/dma_free_coherent() + * implementation. This is pulled straight from ARM and barely + * modified. -Matt + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/export.h> +#include <linux/string.h> +#include <linux/types.h> +#include <linux/highmem.h> +#include <linux/dma-mapping.h> +#include <linux/slab.h> + +#include <asm/tlbflush.h> +#include <asm/mmu.h> + +#define CONSISTENT_OFFSET(x) (((unsigned long)(x) - CONSISTENT_START) \ + >> PAGE_SHIFT) + +static u64 get_coherent_dma_mask(struct device *dev) +{ + u64 mask = ~0ULL; + + if (dev) { + mask = dev->coherent_dma_mask; + + /* + * Sanity check the DMA mask - it must be non-zero, and + * must be able to be satisfied by a DMA allocation. + */ + if (mask == 0) { + dev_warn(dev, "coherent DMA mask is unset\n"); + return 0; + } + } + + return mask; +} +/* + * This is the page table (2MB) covering uncached, DMA consistent allocations + */ +static pte_t *consistent_pte; +static DEFINE_SPINLOCK(consistent_lock); + +/* + * VM region handling support. + * + * This should become something generic, handling VM region allocations for + * vmalloc and similar (ioremap, module space, etc). + * + * I envisage vmalloc()'s supporting vm_struct becoming: + * + * struct vm_struct { + * struct metag_vm_region region; + * unsigned long flags; + * struct page **pages; + * unsigned int nr_pages; + * unsigned long phys_addr; + * }; + * + * get_vm_area() would then call metag_vm_region_alloc with an appropriate + * struct metag_vm_region head (eg): + * + * struct metag_vm_region vmalloc_head = { + * .vm_list = LIST_HEAD_INIT(vmalloc_head.vm_list), + * .vm_start = VMALLOC_START, + * .vm_end = VMALLOC_END, + * }; + * + * However, vmalloc_head.vm_start is variable (typically, it is dependent on + * the amount of RAM found at boot time.) I would imagine that get_vm_area() + * would have to initialise this each time prior to calling + * metag_vm_region_alloc(). + */ +struct metag_vm_region { + struct list_head vm_list; + unsigned long vm_start; + unsigned long vm_end; + struct page *vm_pages; + int vm_active; +}; + +static struct metag_vm_region consistent_head = { + .vm_list = LIST_HEAD_INIT(consistent_head.vm_list), + .vm_start = CONSISTENT_START, + .vm_end = CONSISTENT_END, +}; + +static struct metag_vm_region *metag_vm_region_alloc(struct metag_vm_region + *head, size_t size, + gfp_t gfp) +{ + unsigned long addr = head->vm_start, end = head->vm_end - size; + unsigned long flags; + struct metag_vm_region *c, *new; + + new = kmalloc(sizeof(struct metag_vm_region), gfp); + if (!new) + goto out; + + spin_lock_irqsave(&consistent_lock, flags); + + list_for_each_entry(c, &head->vm_list, vm_list) { + if ((addr + size) < addr) + goto nospc; + if ((addr + size) <= c->vm_start) + goto found; + addr = c->vm_end; + if (addr > end) + goto nospc; + } + +found: + /* + * Insert this entry _before_ the one we found. + */ + list_add_tail(&new->vm_list, &c->vm_list); + new->vm_start = addr; + new->vm_end = addr + size; + new->vm_active = 1; + + spin_unlock_irqrestore(&consistent_lock, flags); + return new; + +nospc: + spin_unlock_irqrestore(&consistent_lock, flags); + kfree(new); +out: + return NULL; +} + +static struct metag_vm_region *metag_vm_region_find(struct metag_vm_region + *head, unsigned long addr) +{ + struct metag_vm_region *c; + + list_for_each_entry(c, &head->vm_list, vm_list) { + if (c->vm_active && c->vm_start == addr) + goto out; + } + c = NULL; +out: + return c; +} + +/* + * Allocate DMA-coherent memory space and return both the kernel remapped + * virtual and bus address for that space. + */ +void *dma_alloc_coherent(struct device *dev, size_t size, + dma_addr_t *handle, gfp_t gfp) +{ + struct page *page; + struct metag_vm_region *c; + unsigned long order; + u64 mask = get_coherent_dma_mask(dev); + u64 limit; + + if (!consistent_pte) { + pr_err("%s: not initialised\n", __func__); + dump_stack(); + return NULL; + } + + if (!mask) + goto no_page; + size = PAGE_ALIGN(size); + limit = (mask + 1) & ~mask; + if ((limit && size >= limit) + || size >= (CONSISTENT_END - CONSISTENT_START)) { + pr_warn("coherent allocation too big (requested %#x mask %#Lx)\n", + size, mask); + return NULL; + } + + order = get_order(size); + + if (mask != 0xffffffff) + gfp |= GFP_DMA; + + page = alloc_pages(gfp, order); + if (!page) + goto no_page; + + /* + * Invalidate any data that might be lurking in the + * kernel direct-mapped region for device DMA. + */ + { + void *kaddr = page_address(page); + memset(kaddr, 0, size); + flush_dcache_region(kaddr, size); + } + + /* + * Allocate a virtual address in the consistent mapping region. + */ + c = metag_vm_region_alloc(&consistent_head, size, + gfp & ~(__GFP_DMA | __GFP_HIGHMEM)); + if (c) { + unsigned long vaddr = c->vm_start; + pte_t *pte = consistent_pte + CONSISTENT_OFFSET(vaddr); + struct page *end = page + (1 << order); + + c->vm_pages = page; + split_page(page, order); + + /* + * Set the "dma handle" + */ + *handle = page_to_bus(page); + + do { + BUG_ON(!pte_none(*pte)); + + SetPageReserved(page); + set_pte_at(&init_mm, vaddr, + pte, mk_pte(page, + pgprot_writecombine + (PAGE_KERNEL))); + page++; + pte++; + vaddr += PAGE_SIZE; + } while (size -= PAGE_SIZE); + + /* + * Free the otherwise unused pages. + */ + while (page < end) { + __free_page(page); + page++; + } + + return (void *)c->vm_start; + } + + if (page) + __free_pages(page, order); +no_page: + return NULL; +} +EXPORT_SYMBOL(dma_alloc_coherent); + +/* + * free a page as defined by the above mapping. + */ +void dma_free_coherent(struct device *dev, size_t size, + void *vaddr, dma_addr_t dma_handle) +{ + struct metag_vm_region *c; + unsigned long flags, addr; + pte_t *ptep; + + size = PAGE_ALIGN(size); + + spin_lock_irqsave(&consistent_lock, flags); + + c = metag_vm_region_find(&consistent_head, (unsigned long)vaddr); + if (!c) + goto no_area; + + c->vm_active = 0; + if ((c->vm_end - c->vm_start) != size) { + pr_err("%s: freeing wrong coherent size (%ld != %d)\n", + __func__, c->vm_end - c->vm_start, size); + dump_stack(); + size = c->vm_end - c->vm_start; + } + + ptep = consistent_pte + CONSISTENT_OFFSET(c->vm_start); + addr = c->vm_start; + do { + pte_t pte = ptep_get_and_clear(&init_mm, addr, ptep); + unsigned long pfn; + + ptep++; + addr += PAGE_SIZE; + + if (!pte_none(pte) && pte_present(pte)) { + pfn = pte_pfn(pte); + + if (pfn_valid(pfn)) { + struct page *page = pfn_to_page(pfn); + ClearPageReserved(page); + + __free_page(page); + continue; + } + } + + pr_crit("%s: bad page in kernel page table\n", + __func__); + } while (size -= PAGE_SIZE); + + flush_tlb_kernel_range(c->vm_start, c->vm_end); + + list_del(&c->vm_list); + + spin_unlock_irqrestore(&consistent_lock, flags); + + kfree(c); + return; + +no_area: + spin_unlock_irqrestore(&consistent_lock, flags); + pr_err("%s: trying to free invalid coherent area: %p\n", + __func__, vaddr); + dump_stack(); +} +EXPORT_SYMBOL(dma_free_coherent); + + +static int dma_mmap(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size) +{ + int ret = -ENXIO; + + unsigned long flags, user_size, kern_size; + struct metag_vm_region *c; + + user_size = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + + spin_lock_irqsave(&consistent_lock, flags); + c = metag_vm_region_find(&consistent_head, (unsigned long)cpu_addr); + spin_unlock_irqrestore(&consistent_lock, flags); + + if (c) { + unsigned long off = vma->vm_pgoff; + + kern_size = (c->vm_end - c->vm_start) >> PAGE_SHIFT; + + if (off < kern_size && + user_size <= (kern_size - off)) { + ret = remap_pfn_range(vma, vma->vm_start, + page_to_pfn(c->vm_pages) + off, + user_size << PAGE_SHIFT, + vma->vm_page_prot); + } + } + + + return ret; +} + +int dma_mmap_coherent(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size) +{ + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + return dma_mmap(dev, vma, cpu_addr, dma_addr, size); +} +EXPORT_SYMBOL(dma_mmap_coherent); + +int dma_mmap_writecombine(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size) +{ + vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); + return dma_mmap(dev, vma, cpu_addr, dma_addr, size); +} +EXPORT_SYMBOL(dma_mmap_writecombine); + + + + +/* + * Initialise the consistent memory allocation. + */ +static int __init dma_alloc_init(void) +{ + pgd_t *pgd, *pgd_k; + pud_t *pud, *pud_k; + pmd_t *pmd, *pmd_k; + pte_t *pte; + int ret = 0; + + do { + int offset = pgd_index(CONSISTENT_START); + pgd = pgd_offset(&init_mm, CONSISTENT_START); + pud = pud_alloc(&init_mm, pgd, CONSISTENT_START); + pmd = pmd_alloc(&init_mm, pud, CONSISTENT_START); + if (!pmd) { + pr_err("%s: no pmd tables\n", __func__); + ret = -ENOMEM; + break; + } + WARN_ON(!pmd_none(*pmd)); + + pte = pte_alloc_kernel(pmd, CONSISTENT_START); + if (!pte) { + pr_err("%s: no pte tables\n", __func__); + ret = -ENOMEM; + break; + } + + pgd_k = ((pgd_t *) mmu_get_base()) + offset; + pud_k = pud_offset(pgd_k, CONSISTENT_START); + pmd_k = pmd_offset(pud_k, CONSISTENT_START); + set_pmd(pmd_k, *pmd); + + consistent_pte = pte; + } while (0); + + return ret; +} +early_initcall(dma_alloc_init); + +/* + * make an area consistent to devices. + */ +void dma_sync_for_device(void *vaddr, size_t size, int dma_direction) +{ + /* + * Ensure any writes get through the write combiner. This is necessary + * even with DMA_FROM_DEVICE, or the write may dirty the cache after + * we've invalidated it and get written back during the DMA. + */ + + barrier(); + + switch (dma_direction) { + case DMA_BIDIRECTIONAL: + /* + * Writeback to ensure the device can see our latest changes and + * so that we have no dirty lines, and invalidate the cache + * lines too in preparation for receiving the buffer back + * (dma_sync_for_cpu) later. + */ + flush_dcache_region(vaddr, size); + break; + case DMA_TO_DEVICE: + /* + * Writeback to ensure the device can see our latest changes. + * There's no need to invalidate as the device shouldn't write + * to the buffer. + */ + writeback_dcache_region(vaddr, size); + break; + case DMA_FROM_DEVICE: + /* + * Invalidate to ensure we have no dirty lines that could get + * written back during the DMA. It's also safe to flush + * (writeback) here if necessary. + */ + invalidate_dcache_region(vaddr, size); + break; + case DMA_NONE: + BUG(); + } + + wmb(); +} +EXPORT_SYMBOL(dma_sync_for_device); + +/* + * make an area consistent to the core. + */ +void dma_sync_for_cpu(void *vaddr, size_t size, int dma_direction) +{ + /* + * Hardware L2 cache prefetch doesn't occur across 4K physical + * boundaries, however according to Documentation/DMA-API-HOWTO.txt + * kmalloc'd memory is DMA'able, so accesses in nearby memory could + * trigger a cache fill in the DMA buffer. + * + * This should never cause dirty lines, so a flush or invalidate should + * be safe to allow us to see data from the device. + */ + if (_meta_l2c_pf_is_enabled()) { + switch (dma_direction) { + case DMA_BIDIRECTIONAL: + case DMA_FROM_DEVICE: + invalidate_dcache_region(vaddr, size); + break; + case DMA_TO_DEVICE: + /* The device shouldn't have written to the buffer */ + break; + case DMA_NONE: + BUG(); + } + } + + rmb(); +} +EXPORT_SYMBOL(dma_sync_for_cpu); diff --git a/arch/metag/kernel/ftrace.c b/arch/metag/kernel/ftrace.c new file mode 100644 index 000000000000..a774f321643f --- /dev/null +++ b/arch/metag/kernel/ftrace.c @@ -0,0 +1,126 @@ +/* + * Copyright (C) 2008 Imagination Technologies Ltd. + * Licensed under the GPL + * + * Dynamic ftrace support. + */ + +#include <linux/ftrace.h> +#include <linux/io.h> +#include <linux/uaccess.h> + +#include <asm/cacheflush.h> + +#define D04_MOVT_TEMPLATE 0x02200005 +#define D04_CALL_TEMPLATE 0xAC200005 +#define D1RTP_MOVT_TEMPLATE 0x03200005 +#define D1RTP_CALL_TEMPLATE 0xAC200006 + +static const unsigned long NOP[2] = {0xa0fffffe, 0xa0fffffe}; +static unsigned long movt_and_call_insn[2]; + +static unsigned char *ftrace_nop_replace(void) +{ + return (char *)&NOP[0]; +} + +static unsigned char *ftrace_call_replace(unsigned long pc, unsigned long addr) +{ + unsigned long hi16, low16; + + hi16 = (addr & 0xffff0000) >> 13; + low16 = (addr & 0x0000ffff) << 3; + + /* + * The compiler makes the call to mcount_wrapper() + * (Meta's wrapper around mcount()) through the register + * D0.4. So whenever we're patching one of those compiler-generated + * calls we also need to go through D0.4. Otherwise use D1RtP. + */ + if (pc == (unsigned long)&ftrace_call) { + writel(D1RTP_MOVT_TEMPLATE | hi16, &movt_and_call_insn[0]); + writel(D1RTP_CALL_TEMPLATE | low16, &movt_and_call_insn[1]); + } else { + writel(D04_MOVT_TEMPLATE | hi16, &movt_and_call_insn[0]); + writel(D04_CALL_TEMPLATE | low16, &movt_and_call_insn[1]); + } + + return (unsigned char *)&movt_and_call_insn[0]; +} + +static int ftrace_modify_code(unsigned long pc, unsigned char *old_code, + unsigned char *new_code) +{ + unsigned char replaced[MCOUNT_INSN_SIZE]; + + /* + * Note: Due to modules and __init, code can + * disappear and change, we need to protect against faulting + * as well as code changing. + * + * No real locking needed, this code is run through + * kstop_machine. + */ + + /* read the text we want to modify */ + if (probe_kernel_read(replaced, (void *)pc, MCOUNT_INSN_SIZE)) + return -EFAULT; + + /* Make sure it is what we expect it to be */ + if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0) + return -EINVAL; + + /* replace the text with the new text */ + if (probe_kernel_write((void *)pc, new_code, MCOUNT_INSN_SIZE)) + return -EPERM; + + flush_icache_range(pc, pc + MCOUNT_INSN_SIZE); + + return 0; +} + +int ftrace_update_ftrace_func(ftrace_func_t func) +{ + int ret; + unsigned long pc; + unsigned char old[MCOUNT_INSN_SIZE], *new; + + pc = (unsigned long)&ftrace_call; + memcpy(old, &ftrace_call, MCOUNT_INSN_SIZE); + new = ftrace_call_replace(pc, (unsigned long)func); + ret = ftrace_modify_code(pc, old, new); + + return ret; +} + +int ftrace_make_nop(struct module *mod, + struct dyn_ftrace *rec, unsigned long addr) +{ + unsigned char *new, *old; + unsigned long ip = rec->ip; + + old = ftrace_call_replace(ip, addr); + new = ftrace_nop_replace(); + + return ftrace_modify_code(ip, old, new); +} + +int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) +{ + unsigned char *new, *old; + unsigned long ip = rec->ip; + + old = ftrace_nop_replace(); + new = ftrace_call_replace(ip, addr); + + return ftrace_modify_code(ip, old, new); +} + +/* run from kstop_machine */ +int __init ftrace_dyn_arch_init(void *data) +{ + /* The return code is returned via data */ + writel(0, data); + + return 0; +} diff --git a/arch/metag/kernel/ftrace_stub.S b/arch/metag/kernel/ftrace_stub.S new file mode 100644 index 000000000000..e70bff745bdd --- /dev/null +++ b/arch/metag/kernel/ftrace_stub.S @@ -0,0 +1,76 @@ +/* + * Copyright (C) 2008 Imagination Technologies Ltd. + * Licensed under the GPL + * + */ + +#include <asm/ftrace.h> + + .text +#ifdef CONFIG_DYNAMIC_FTRACE + .global _mcount_wrapper + .type _mcount_wrapper,function +_mcount_wrapper: + MOV PC,D0.4 + + .global _ftrace_caller + .type _ftrace_caller,function +_ftrace_caller: + MOVT D0Re0,#HI(_function_trace_stop) + ADD D0Re0,D0Re0,#LO(_function_trace_stop) + GETD D0Re0,[D0Re0] + CMP D0Re0,#0 + BEQ $Lcall_stub + MOV PC,D0.4 +$Lcall_stub: + MSETL [A0StP], D0Ar6, D0Ar4, D0Ar2, D0.4 + MOV D1Ar1, D0.4 + MOV D0Ar2, D1RtP + SUB D1Ar1,D1Ar1,#MCOUNT_INSN_SIZE + + .global _ftrace_call +_ftrace_call: + MOVT D1RtP,#HI(_ftrace_stub) + CALL D1RtP,#LO(_ftrace_stub) + GETL D0.4, D1RtP, [A0StP++#(-8)] + GETL D0Ar2, D1Ar1, [A0StP++#(-8)] + GETL D0Ar4, D1Ar3, [A0StP++#(-8)] + GETL D0Ar6, D1Ar5, [A0StP++#(-8)] + MOV PC, D0.4 +#else + + .global _mcount_wrapper + .type _mcount_wrapper,function +_mcount_wrapper: + MOVT D0Re0,#HI(_function_trace_stop) + ADD D0Re0,D0Re0,#LO(_function_trace_stop) + GETD D0Re0,[D0Re0] + CMP D0Re0,#0 + BEQ $Lcall_mcount + MOV PC,D0.4 +$Lcall_mcount: + MSETL [A0StP], D0Ar6, D0Ar4, D0Ar2, D0.4 + MOV D1Ar1, D0.4 + MOV D0Ar2, D1RtP + MOVT D0Re0,#HI(_ftrace_trace_function) + ADD D0Re0,D0Re0,#LO(_ftrace_trace_function) + GET D1Ar3,[D0Re0] + MOVT D1Re0,#HI(_ftrace_stub) + ADD D1Re0,D1Re0,#LO(_ftrace_stub) + CMP D1Ar3,D1Re0 + BEQ $Ltrace_exit + MOV D1RtP,D1Ar3 + SUB D1Ar1,D1Ar1,#MCOUNT_INSN_SIZE + SWAP PC,D1RtP +$Ltrace_exit: + GETL D0.4, D1RtP, [A0StP++#(-8)] + GETL D0Ar2, D1Ar1, [A0StP++#(-8)] + GETL D0Ar4, D1Ar3, [A0StP++#(-8)] + GETL D0Ar6, D1Ar5, [A0StP++#(-8)] + MOV PC, D0.4 + +#endif /* CONFIG_DYNAMIC_FTRACE */ + + .global _ftrace_stub +_ftrace_stub: + MOV PC,D1RtP diff --git a/arch/metag/kernel/head.S b/arch/metag/kernel/head.S new file mode 100644 index 000000000000..969dffabc03a --- /dev/null +++ b/arch/metag/kernel/head.S @@ -0,0 +1,57 @@ + ! Copyright 2005,2006,2007,2009 Imagination Technologies + +#include <linux/init.h> +#include <generated/asm-offsets.h> +#undef __exit + + __HEAD + ! Setup the stack and get going into _metag_start_kernel + .global __start + .type __start,function +__start: + ! D1Ar1 contains pTBI (ISTAT) + ! D0Ar2 contains pTBI + ! D1Ar3 contains __pTBISegs + ! D0Ar4 contains kernel arglist pointer + + MOVT D0Re0,#HI(___pTBIs) + ADD D0Re0,D0Re0,#LO(___pTBIs) + SETL [D0Re0],D0Ar2,D1Ar1 + MOVT D0Re0,#HI(___pTBISegs) + ADD D0Re0,D0Re0,#LO(___pTBISegs) + SETD [D0Re0],D1Ar3 + MOV A0FrP,#0 + MOV D0Re0,#0 + MOV D1Re0,#0 + MOV D1Ar3,#0 + MOV D1Ar1,D0Ar4 !Store kernel boot params + MOV D1Ar5,#0 + MOV D0Ar6,#0 +#ifdef CONFIG_METAG_DSP + MOV D0.8,#0 +#endif + MOVT A0StP,#HI(_init_thread_union) + ADD A0StP,A0StP,#LO(_init_thread_union) + ADD A0StP,A0StP,#THREAD_INFO_SIZE + MOVT D1RtP,#HI(_metag_start_kernel) + CALL D1RtP,#LO(_metag_start_kernel) + .size __start,.-__start + + !! Needed by TBX + .global __exit + .type __exit,function +__exit: + XOR TXENABLE,D0Re0,D0Re0 + .size __exit,.-__exit + +#ifdef CONFIG_SMP + .global _secondary_startup + .type _secondary_startup,function +_secondary_startup: + MOVT A0StP,#HI(_secondary_data_stack) + ADD A0StP,A0StP,#LO(_secondary_data_stack) + GETD A0StP,[A0StP] + ADD A0StP,A0StP,#THREAD_INFO_SIZE + B _secondary_start_kernel + .size _secondary_startup,.-_secondary_startup +#endif diff --git a/arch/metag/kernel/irq.c b/arch/metag/kernel/irq.c new file mode 100644 index 000000000000..87707efeb0a3 --- /dev/null +++ b/arch/metag/kernel/irq.c @@ -0,0 +1,323 @@ +/* + * Linux/Meta general interrupt handling code + * + */ + +#include <linux/kernel.h> +#include <linux/interrupt.h> +#include <linux/init.h> +#include <linux/irqchip/metag-ext.h> +#include <linux/irqchip/metag.h> +#include <linux/irqdomain.h> +#include <linux/ratelimit.h> + +#include <asm/core_reg.h> +#include <asm/mach/arch.h> +#include <asm/uaccess.h> + +#ifdef CONFIG_4KSTACKS +union irq_ctx { + struct thread_info tinfo; + u32 stack[THREAD_SIZE/sizeof(u32)]; +}; + +static union irq_ctx *hardirq_ctx[NR_CPUS] __read_mostly; +static union irq_ctx *softirq_ctx[NR_CPUS] __read_mostly; +#endif + +struct irq_domain *root_domain; + +static unsigned int startup_meta_irq(struct irq_data *data) +{ + tbi_startup_interrupt(data->hwirq); + return 0; +} + +static void shutdown_meta_irq(struct irq_data *data) +{ + tbi_shutdown_interrupt(data->hwirq); +} + +void do_IRQ(int irq, struct pt_regs *regs) +{ + struct pt_regs *old_regs = set_irq_regs(regs); +#ifdef CONFIG_4KSTACKS + struct irq_desc *desc; + union irq_ctx *curctx, *irqctx; + u32 *isp; +#endif + + irq_enter(); + + irq = irq_linear_revmap(root_domain, irq); + +#ifdef CONFIG_DEBUG_STACKOVERFLOW + /* Debugging check for stack overflow: is there less than 1KB free? */ + { + unsigned long sp; + + sp = __core_reg_get(A0StP); + sp &= THREAD_SIZE - 1; + + if (unlikely(sp > (THREAD_SIZE - 1024))) + pr_err("Stack overflow in do_IRQ: %ld\n", sp); + } +#endif + + +#ifdef CONFIG_4KSTACKS + curctx = (union irq_ctx *) current_thread_info(); + irqctx = hardirq_ctx[smp_processor_id()]; + + /* + * this is where we switch to the IRQ stack. However, if we are + * already using the IRQ stack (because we interrupted a hardirq + * handler) we can't do that and just have to keep using the + * current stack (which is the irq stack already after all) + */ + if (curctx != irqctx) { + /* build the stack frame on the IRQ stack */ + isp = (u32 *) ((char *)irqctx + sizeof(struct thread_info)); + irqctx->tinfo.task = curctx->tinfo.task; + + /* + * Copy the softirq bits in preempt_count so that the + * softirq checks work in the hardirq context. + */ + irqctx->tinfo.preempt_count = + (irqctx->tinfo.preempt_count & ~SOFTIRQ_MASK) | + (curctx->tinfo.preempt_count & SOFTIRQ_MASK); + + desc = irq_to_desc(irq); + + asm volatile ( + "MOV D0.5,%0\n" + "MOV D1Ar1,%1\n" + "MOV D1RtP,%2\n" + "MOV D0Ar2,%3\n" + "SWAP A0StP,D0.5\n" + "SWAP PC,D1RtP\n" + "MOV A0StP,D0.5\n" + : + : "r" (isp), "r" (irq), "r" (desc->handle_irq), + "r" (desc) + : "memory", "cc", "D1Ar1", "D0Ar2", "D1Ar3", "D0Ar4", + "D1Ar5", "D0Ar6", "D0Re0", "D1Re0", "D0.4", "D1RtP", + "D0.5" + ); + } else +#endif + generic_handle_irq(irq); + + irq_exit(); + + set_irq_regs(old_regs); +} + +#ifdef CONFIG_4KSTACKS + +static char softirq_stack[NR_CPUS * THREAD_SIZE] __page_aligned_bss; + +static char hardirq_stack[NR_CPUS * THREAD_SIZE] __page_aligned_bss; + +/* + * allocate per-cpu stacks for hardirq and for softirq processing + */ +void irq_ctx_init(int cpu) +{ + union irq_ctx *irqctx; + + if (hardirq_ctx[cpu]) + return; + + irqctx = (union irq_ctx *) &hardirq_stack[cpu * THREAD_SIZE]; + irqctx->tinfo.task = NULL; + irqctx->tinfo.exec_domain = NULL; + irqctx->tinfo.cpu = cpu; + irqctx->tinfo.preempt_count = HARDIRQ_OFFSET; + irqctx->tinfo.addr_limit = MAKE_MM_SEG(0); + + hardirq_ctx[cpu] = irqctx; + + irqctx = (union irq_ctx *) &softirq_stack[cpu * THREAD_SIZE]; + irqctx->tinfo.task = NULL; + irqctx->tinfo.exec_domain = NULL; + irqctx->tinfo.cpu = cpu; + irqctx->tinfo.preempt_count = 0; + irqctx->tinfo.addr_limit = MAKE_MM_SEG(0); + + softirq_ctx[cpu] = irqctx; + + pr_info("CPU %u irqstacks, hard=%p soft=%p\n", + cpu, hardirq_ctx[cpu], softirq_ctx[cpu]); +} + +void irq_ctx_exit(int cpu) +{ + hardirq_ctx[smp_processor_id()] = NULL; +} + +extern asmlinkage void __do_softirq(void); + +asmlinkage void do_softirq(void) +{ + unsigned long flags; + struct thread_info *curctx; + union irq_ctx *irqctx; + u32 *isp; + + if (in_interrupt()) + return; + + local_irq_save(flags); + + if (local_softirq_pending()) { + curctx = current_thread_info(); + irqctx = softirq_ctx[smp_processor_id()]; + irqctx->tinfo.task = curctx->task; + + /* build the stack frame on the softirq stack */ + isp = (u32 *) ((char *)irqctx + sizeof(struct thread_info)); + + asm volatile ( + "MOV D0.5,%0\n" + "SWAP A0StP,D0.5\n" + "CALLR D1RtP,___do_softirq\n" + "MOV A0StP,D0.5\n" + : + : "r" (isp) + : "memory", "cc", "D1Ar1", "D0Ar2", "D1Ar3", "D0Ar4", + "D1Ar5", "D0Ar6", "D0Re0", "D1Re0", "D0.4", "D1RtP", + "D0.5" + ); + /* + * Shouldn't happen, we returned above if in_interrupt(): + */ + WARN_ON_ONCE(softirq_count()); + } + + local_irq_restore(flags); +} +#endif + +static struct irq_chip meta_irq_type = { + .name = "META-IRQ", + .irq_startup = startup_meta_irq, + .irq_shutdown = shutdown_meta_irq, +}; + +/** + * tbisig_map() - Map a TBI signal number to a virtual IRQ number. + * @hw: Number of the TBI signal. Must be in range. + * + * Returns: The virtual IRQ number of the TBI signal number IRQ specified by + * @hw. + */ +int tbisig_map(unsigned int hw) +{ + return irq_create_mapping(root_domain, hw); +} + +/** + * metag_tbisig_map() - map a tbi signal to a Linux virtual IRQ number + * @d: root irq domain + * @irq: virtual irq number + * @hw: hardware irq number (TBI signal number) + * + * This sets up a virtual irq for a specified TBI signal number. + */ +static int metag_tbisig_map(struct irq_domain *d, unsigned int irq, + irq_hw_number_t hw) +{ +#ifdef CONFIG_SMP + irq_set_chip_and_handler(irq, &meta_irq_type, handle_percpu_irq); +#else + irq_set_chip_and_handler(irq, &meta_irq_type, handle_simple_irq); +#endif + return 0; +} + +static const struct irq_domain_ops metag_tbisig_domain_ops = { + .map = metag_tbisig_map, +}; + +/* + * void init_IRQ(void) + * + * Parameters: None + * + * Returns: Nothing + * + * This function should be called during kernel startup to initialize + * the IRQ handling routines. + */ +void __init init_IRQ(void) +{ + root_domain = irq_domain_add_linear(NULL, 32, + &metag_tbisig_domain_ops, NULL); + if (unlikely(!root_domain)) + panic("init_IRQ: cannot add root IRQ domain"); + + irq_ctx_init(smp_processor_id()); + + init_internal_IRQ(); + init_external_IRQ(); + + if (machine_desc->init_irq) + machine_desc->init_irq(); +} + +int __init arch_probe_nr_irqs(void) +{ + if (machine_desc->nr_irqs) + nr_irqs = machine_desc->nr_irqs; + return 0; +} + +#ifdef CONFIG_HOTPLUG_CPU +static void route_irq(struct irq_data *data, unsigned int irq, unsigned int cpu) +{ + struct irq_desc *desc = irq_to_desc(irq); + struct irq_chip *chip = irq_data_get_irq_chip(data); + + raw_spin_lock_irq(&desc->lock); + if (chip->irq_set_affinity) + chip->irq_set_affinity(data, cpumask_of(cpu), false); + raw_spin_unlock_irq(&desc->lock); +} + +/* + * The CPU has been marked offline. Migrate IRQs off this CPU. If + * the affinity settings do not allow other CPUs, force them onto any + * available CPU. + */ +void migrate_irqs(void) +{ + unsigned int i, cpu = smp_processor_id(); + struct irq_desc *desc; + + for_each_irq_desc(i, desc) { + struct irq_data *data = irq_desc_get_irq_data(desc); + unsigned int newcpu; + + if (irqd_is_per_cpu(data)) + continue; + + if (!cpumask_test_cpu(cpu, data->affinity)) + continue; + + newcpu = cpumask_any_and(data->affinity, cpu_online_mask); + + if (newcpu >= nr_cpu_ids) { + pr_info_ratelimited("IRQ%u no longer affine to CPU%u\n", + i, cpu); + + cpumask_setall(data->affinity); + newcpu = cpumask_any_and(data->affinity, + cpu_online_mask); + } + + route_irq(data, i, newcpu); + } +} +#endif /* CONFIG_HOTPLUG_CPU */ diff --git a/arch/metag/kernel/kick.c b/arch/metag/kernel/kick.c new file mode 100644 index 000000000000..50fcbec98cd2 --- /dev/null +++ b/arch/metag/kernel/kick.c @@ -0,0 +1,101 @@ +/* + * Copyright (C) 2009 Imagination Technologies + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file COPYING in the main directory of this archive + * for more details. + * + * The Meta KICK interrupt mechanism is generally a useful feature, so + * we provide an interface for registering multiple interrupt + * handlers. All the registered interrupt handlers are "chained". When + * a KICK interrupt is received the first function in the list is + * called. If that interrupt handler cannot handle the KICK the next + * one is called, then the next until someone handles it (or we run + * out of functions). As soon as one function handles the interrupt no + * other handlers are called. + * + * The only downside of chaining interrupt handlers is that each + * handler must be able to detect whether the KICK was intended for it + * or not. For example, when the IPI handler runs and it sees that + * there are no IPI messages it must not signal that the KICK was + * handled, thereby giving the other handlers a chance to run. + * + * The reason that we provide our own interface for calling KICK + * handlers instead of using the generic kernel infrastructure is that + * the KICK handlers require access to a CPU's pTBI structure. So we + * pass it as an argument. + */ +#include <linux/export.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/types.h> + +#include <asm/traps.h> + +/* + * All accesses/manipulations of kick_handlers_list should be + * performed while holding kick_handlers_lock. + */ +static DEFINE_SPINLOCK(kick_handlers_lock); +static LIST_HEAD(kick_handlers_list); + +void kick_register_func(struct kick_irq_handler *kh) +{ + unsigned long flags; + + spin_lock_irqsave(&kick_handlers_lock, flags); + + list_add_tail(&kh->list, &kick_handlers_list); + + spin_unlock_irqrestore(&kick_handlers_lock, flags); +} +EXPORT_SYMBOL(kick_register_func); + +void kick_unregister_func(struct kick_irq_handler *kh) +{ + unsigned long flags; + + spin_lock_irqsave(&kick_handlers_lock, flags); + + list_del(&kh->list); + + spin_unlock_irqrestore(&kick_handlers_lock, flags); +} +EXPORT_SYMBOL(kick_unregister_func); + +TBIRES +kick_handler(TBIRES State, int SigNum, int Triggers, int Inst, PTBI pTBI) +{ + struct kick_irq_handler *kh; + struct list_head *lh; + int handled = 0; + TBIRES ret; + + head_end(State, ~INTS_OFF_MASK); + + /* If we interrupted user code handle any critical sections. */ + if (State.Sig.SaveMask & TBICTX_PRIV_BIT) + restart_critical_section(State); + + trace_hardirqs_off(); + + /* + * There is no need to disable interrupts here because we + * can't nest KICK interrupts in a KICK interrupt handler. + */ + spin_lock(&kick_handlers_lock); + + list_for_each(lh, &kick_handlers_list) { + kh = list_entry(lh, struct kick_irq_handler, list); + + ret = kh->func(State, SigNum, Triggers, Inst, pTBI, &handled); + if (handled) + break; + } + + spin_unlock(&kick_handlers_lock); + + WARN_ON(!handled); + + return tail_end(ret); +} diff --git a/arch/metag/kernel/machines.c b/arch/metag/kernel/machines.c new file mode 100644 index 000000000000..1edf6ba193b1 --- /dev/null +++ b/arch/metag/kernel/machines.c @@ -0,0 +1,20 @@ +/* + * arch/metag/kernel/machines.c + * + * Copyright (C) 2012 Imagination Technologies Ltd. + * + * Generic Meta Boards. + */ + +#include <linux/init.h> +#include <asm/irq.h> +#include <asm/mach/arch.h> + +static const char *meta_boards_compat[] __initdata = { + "img,meta", + NULL, +}; + +MACHINE_START(META, "Generic Meta") + .dt_compat = meta_boards_compat, +MACHINE_END diff --git a/arch/metag/kernel/metag_ksyms.c b/arch/metag/kernel/metag_ksyms.c new file mode 100644 index 000000000000..ec872ef14eb1 --- /dev/null +++ b/arch/metag/kernel/metag_ksyms.c @@ -0,0 +1,49 @@ +#include <linux/export.h> + +#include <asm/div64.h> +#include <asm/ftrace.h> +#include <asm/page.h> +#include <asm/string.h> +#include <asm/tbx.h> + +EXPORT_SYMBOL(clear_page); +EXPORT_SYMBOL(copy_page); + +#ifdef CONFIG_FLATMEM +/* needed for the pfn_valid macro */ +EXPORT_SYMBOL(max_pfn); +EXPORT_SYMBOL(min_low_pfn); +#endif + +/* TBI symbols */ +EXPORT_SYMBOL(__TBI); +EXPORT_SYMBOL(__TBIFindSeg); +EXPORT_SYMBOL(__TBIPoll); +EXPORT_SYMBOL(__TBITimeStamp); + +#define DECLARE_EXPORT(name) extern void name(void); EXPORT_SYMBOL(name) + +/* libgcc functions */ +DECLARE_EXPORT(__ashldi3); +DECLARE_EXPORT(__ashrdi3); +DECLARE_EXPORT(__lshrdi3); +DECLARE_EXPORT(__udivsi3); +DECLARE_EXPORT(__divsi3); +DECLARE_EXPORT(__umodsi3); +DECLARE_EXPORT(__modsi3); +DECLARE_EXPORT(__muldi3); +DECLARE_EXPORT(__cmpdi2); +DECLARE_EXPORT(__ucmpdi2); + +/* Maths functions */ +EXPORT_SYMBOL(div_u64); +EXPORT_SYMBOL(div_s64); + +/* String functions */ +EXPORT_SYMBOL(memcpy); +EXPORT_SYMBOL(memset); +EXPORT_SYMBOL(memmove); + +#ifdef CONFIG_FUNCTION_TRACER +EXPORT_SYMBOL(mcount_wrapper); +#endif diff --git a/arch/metag/kernel/module.c b/arch/metag/kernel/module.c new file mode 100644 index 000000000000..986331cd0a52 --- /dev/null +++ b/arch/metag/kernel/module.c @@ -0,0 +1,284 @@ +/* Kernel module help for Meta. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. +*/ +#include <linux/moduleloader.h> +#include <linux/elf.h> +#include <linux/vmalloc.h> +#include <linux/fs.h> +#include <linux/string.h> +#include <linux/kernel.h> +#include <linux/sort.h> + +#include <asm/unaligned.h> + +/* Count how many different relocations (different symbol, different + addend) */ +static unsigned int count_relocs(const Elf32_Rela *rela, unsigned int num) +{ + unsigned int i, r_info, r_addend, _count_relocs; + + _count_relocs = 0; + r_info = 0; + r_addend = 0; + for (i = 0; i < num; i++) + /* Only count relbranch relocs, others don't need stubs */ + if (ELF32_R_TYPE(rela[i].r_info) == R_METAG_RELBRANCH && + (r_info != ELF32_R_SYM(rela[i].r_info) || + r_addend != rela[i].r_addend)) { + _count_relocs++; + r_info = ELF32_R_SYM(rela[i].r_info); + r_addend = rela[i].r_addend; + } + + return _count_relocs; +} + +static int relacmp(const void *_x, const void *_y) +{ + const Elf32_Rela *x, *y; + + y = (Elf32_Rela *)_x; + x = (Elf32_Rela *)_y; + + /* Compare the entire r_info (as opposed to ELF32_R_SYM(r_info) only) to + * make the comparison cheaper/faster. It won't affect the sorting or + * the counting algorithms' performance + */ + if (x->r_info < y->r_info) + return -1; + else if (x->r_info > y->r_info) + return 1; + else if (x->r_addend < y->r_addend) + return -1; + else if (x->r_addend > y->r_addend) + return 1; + else + return 0; +} + +static void relaswap(void *_x, void *_y, int size) +{ + uint32_t *x, *y, tmp; + int i; + + y = (uint32_t *)_x; + x = (uint32_t *)_y; + + for (i = 0; i < sizeof(Elf32_Rela) / sizeof(uint32_t); i++) { + tmp = x[i]; + x[i] = y[i]; + y[i] = tmp; + } +} + +/* Get the potential trampolines size required of the init and + non-init sections */ +static unsigned long get_plt_size(const Elf32_Ehdr *hdr, + const Elf32_Shdr *sechdrs, + const char *secstrings, + int is_init) +{ + unsigned long ret = 0; + unsigned i; + + /* Everything marked ALLOC (this includes the exported + symbols) */ + for (i = 1; i < hdr->e_shnum; i++) { + /* If it's called *.init*, and we're not init, we're + not interested */ + if ((strstr(secstrings + sechdrs[i].sh_name, ".init") != NULL) + != is_init) + continue; + + /* We don't want to look at debug sections. */ + if (strstr(secstrings + sechdrs[i].sh_name, ".debug") != NULL) + continue; + + if (sechdrs[i].sh_type == SHT_RELA) { + pr_debug("Found relocations in section %u\n", i); + pr_debug("Ptr: %p. Number: %u\n", + (void *)hdr + sechdrs[i].sh_offset, + sechdrs[i].sh_size / sizeof(Elf32_Rela)); + + /* Sort the relocation information based on a symbol and + * addend key. This is a stable O(n*log n) complexity + * alogrithm but it will reduce the complexity of + * count_relocs() to linear complexity O(n) + */ + sort((void *)hdr + sechdrs[i].sh_offset, + sechdrs[i].sh_size / sizeof(Elf32_Rela), + sizeof(Elf32_Rela), relacmp, relaswap); + + ret += count_relocs((void *)hdr + + sechdrs[i].sh_offset, + sechdrs[i].sh_size + / sizeof(Elf32_Rela)) + * sizeof(struct metag_plt_entry); + } + } + + return ret; +} + +int module_frob_arch_sections(Elf32_Ehdr *hdr, + Elf32_Shdr *sechdrs, + char *secstrings, + struct module *me) +{ + unsigned int i; + + /* Find .plt and .init.plt sections */ + for (i = 0; i < hdr->e_shnum; i++) { + if (strcmp(secstrings + sechdrs[i].sh_name, ".init.plt") == 0) + me->arch.init_plt_section = i; + else if (strcmp(secstrings + sechdrs[i].sh_name, ".plt") == 0) + me->arch.core_plt_section = i; + } + if (!me->arch.core_plt_section || !me->arch.init_plt_section) { + pr_err("Module doesn't contain .plt or .init.plt sections.\n"); + return -ENOEXEC; + } + + /* Override their sizes */ + sechdrs[me->arch.core_plt_section].sh_size + = get_plt_size(hdr, sechdrs, secstrings, 0); + sechdrs[me->arch.core_plt_section].sh_type = SHT_NOBITS; + sechdrs[me->arch.init_plt_section].sh_size + = get_plt_size(hdr, sechdrs, secstrings, 1); + sechdrs[me->arch.init_plt_section].sh_type = SHT_NOBITS; + return 0; +} + +/* Set up a trampoline in the PLT to bounce us to the distant function */ +static uint32_t do_plt_call(void *location, Elf32_Addr val, + Elf32_Shdr *sechdrs, struct module *mod) +{ + struct metag_plt_entry *entry; + /* Instructions used to do the indirect jump. */ + uint32_t tramp[2]; + + /* We have to trash a register, so we assume that any control + transfer more than 21-bits away must be a function call + (so we can use a call-clobbered register). */ + + /* MOVT D0Re0,#HI(v) */ + tramp[0] = 0x02000005 | (((val & 0xffff0000) >> 16) << 3); + /* JUMP D0Re0,#LO(v) */ + tramp[1] = 0xac000001 | ((val & 0x0000ffff) << 3); + + /* Init, or core PLT? */ + if (location >= mod->module_core + && location < mod->module_core + mod->core_size) + entry = (void *)sechdrs[mod->arch.core_plt_section].sh_addr; + else + entry = (void *)sechdrs[mod->arch.init_plt_section].sh_addr; + + /* Find this entry, or if that fails, the next avail. entry */ + while (entry->tramp[0]) + if (entry->tramp[0] == tramp[0] && entry->tramp[1] == tramp[1]) + return (uint32_t)entry; + else + entry++; + + entry->tramp[0] = tramp[0]; + entry->tramp[1] = tramp[1]; + + return (uint32_t)entry; +} + +int apply_relocate_add(Elf32_Shdr *sechdrs, + const char *strtab, + unsigned int symindex, + unsigned int relsec, + struct module *me) +{ + unsigned int i; + Elf32_Rela *rel = (void *)sechdrs[relsec].sh_addr; + Elf32_Sym *sym; + Elf32_Addr relocation; + uint32_t *location; + int32_t value; + + pr_debug("Applying relocate section %u to %u\n", relsec, + sechdrs[relsec].sh_info); + for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) { + /* This is where to make the change */ + location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr + + rel[i].r_offset; + /* This is the symbol it is referring to. Note that all + undefined symbols have been resolved. */ + sym = (Elf32_Sym *)sechdrs[symindex].sh_addr + + ELF32_R_SYM(rel[i].r_info); + relocation = sym->st_value + rel[i].r_addend; + + switch (ELF32_R_TYPE(rel[i].r_info)) { + case R_METAG_NONE: + break; + case R_METAG_HIADDR16: + relocation >>= 16; + case R_METAG_LOADDR16: + *location = (*location & 0xfff80007) | + ((relocation & 0xffff) << 3); + break; + case R_METAG_ADDR32: + /* + * Packed data structures may cause a misaligned + * R_METAG_ADDR32 to be emitted. + */ + put_unaligned(relocation, location); + break; + case R_METAG_GETSETOFF: + *location += ((relocation & 0xfff) << 7); + break; + case R_METAG_RELBRANCH: + if (*location & (0x7ffff << 5)) { + pr_err("bad relbranch relocation\n"); + break; + } + + /* This jump is too big for the offset slot. Build + * a PLT to jump through to get to where we want to go. + * NB: 21bit check - not scaled to 19bit yet + */ + if (((int32_t)(relocation - + (uint32_t)location) > 0xfffff) || + ((int32_t)(relocation - + (uint32_t)location) < -0xfffff)) { + relocation = do_plt_call(location, relocation, + sechdrs, me); + } + + value = relocation - (uint32_t)location; + + /* branch instruction aligned */ + value /= 4; + + if ((value > 0x7ffff) || (value < -0x7ffff)) { + /* + * this should have been caught by the code + * above! + */ + pr_err("overflow of relbranch reloc\n"); + } + + *location = (*location & (~(0x7ffff << 5))) | + ((value & 0x7ffff) << 5); + break; + + default: + pr_err("module %s: Unknown relocation: %u\n", + me->name, ELF32_R_TYPE(rel[i].r_info)); + return -ENOEXEC; + } + } + return 0; +} diff --git a/arch/metag/kernel/perf/Makefile b/arch/metag/kernel/perf/Makefile new file mode 100644 index 000000000000..b158cb27208d --- /dev/null +++ b/arch/metag/kernel/perf/Makefile @@ -0,0 +1,3 @@ +# Makefile for performance event core + +obj-y += perf_event.o diff --git a/arch/metag/kernel/perf/perf_event.c b/arch/metag/kernel/perf/perf_event.c new file mode 100644 index 000000000000..a876d5ff3897 --- /dev/null +++ b/arch/metag/kernel/perf/perf_event.c @@ -0,0 +1,861 @@ +/* + * Meta performance counter support. + * Copyright (C) 2012 Imagination Technologies Ltd + * + * This code is based on the sh pmu code: + * Copyright (C) 2009 Paul Mundt + * + * and on the arm pmu code: + * Copyright (C) 2009 picoChip Designs, Ltd., James Iles + * Copyright (C) 2010 ARM Ltd., Will Deacon <will.deacon@arm.com> + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ + +#include <linux/atomic.h> +#include <linux/export.h> +#include <linux/init.h> +#include <linux/irqchip/metag.h> +#include <linux/perf_event.h> +#include <linux/slab.h> + +#include <asm/core_reg.h> +#include <asm/hwthread.h> +#include <asm/io.h> +#include <asm/irq.h> + +#include "perf_event.h" + +static int _hw_perf_event_init(struct perf_event *); +static void _hw_perf_event_destroy(struct perf_event *); + +/* Determines which core type we are */ +static struct metag_pmu *metag_pmu __read_mostly; + +/* Processor specific data */ +static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events); + +/* PMU admin */ +const char *perf_pmu_name(void) +{ + if (metag_pmu) + return metag_pmu->pmu.name; + + return NULL; +} +EXPORT_SYMBOL_GPL(perf_pmu_name); + +int perf_num_counters(void) +{ + if (metag_pmu) + return metag_pmu->max_events; + + return 0; +} +EXPORT_SYMBOL_GPL(perf_num_counters); + +static inline int metag_pmu_initialised(void) +{ + return !!metag_pmu; +} + +static void release_pmu_hardware(void) +{ + int irq; + unsigned int version = (metag_pmu->version & + (METAC_ID_MINOR_BITS | METAC_ID_REV_BITS)) >> + METAC_ID_REV_S; + + /* Early cores don't have overflow interrupts */ + if (version < 0x0104) + return; + + irq = internal_irq_map(17); + if (irq >= 0) + free_irq(irq, (void *)1); + + irq = internal_irq_map(16); + if (irq >= 0) + free_irq(irq, (void *)0); +} + +static int reserve_pmu_hardware(void) +{ + int err = 0, irq[2]; + unsigned int version = (metag_pmu->version & + (METAC_ID_MINOR_BITS | METAC_ID_REV_BITS)) >> + METAC_ID_REV_S; + + /* Early cores don't have overflow interrupts */ + if (version < 0x0104) + goto out; + + /* + * Bit 16 on HWSTATMETA is the interrupt for performance counter 0; + * similarly, 17 is the interrupt for performance counter 1. + * We can't (yet) interrupt on the cycle counter, because it's a + * register, however it holds a 32-bit value as opposed to 24-bit. + */ + irq[0] = internal_irq_map(16); + if (irq[0] < 0) { + pr_err("unable to map internal IRQ %d\n", 16); + goto out; + } + err = request_irq(irq[0], metag_pmu->handle_irq, IRQF_NOBALANCING, + "metagpmu0", (void *)0); + if (err) { + pr_err("unable to request IRQ%d for metag PMU counters\n", + irq[0]); + goto out; + } + + irq[1] = internal_irq_map(17); + if (irq[1] < 0) { + pr_err("unable to map internal IRQ %d\n", 17); + goto out_irq1; + } + err = request_irq(irq[1], metag_pmu->handle_irq, IRQF_NOBALANCING, + "metagpmu1", (void *)1); + if (err) { + pr_err("unable to request IRQ%d for metag PMU counters\n", + irq[1]); + goto out_irq1; + } + + return 0; + +out_irq1: + free_irq(irq[0], (void *)0); +out: + return err; +} + +/* PMU operations */ +static void metag_pmu_enable(struct pmu *pmu) +{ +} + +static void metag_pmu_disable(struct pmu *pmu) +{ +} + +static int metag_pmu_event_init(struct perf_event *event) +{ + int err = 0; + atomic_t *active_events = &metag_pmu->active_events; + + if (!metag_pmu_initialised()) { + err = -ENODEV; + goto out; + } + + if (has_branch_stack(event)) + return -EOPNOTSUPP; + + event->destroy = _hw_perf_event_destroy; + + if (!atomic_inc_not_zero(active_events)) { + mutex_lock(&metag_pmu->reserve_mutex); + if (atomic_read(active_events) == 0) + err = reserve_pmu_hardware(); + + if (!err) + atomic_inc(active_events); + + mutex_unlock(&metag_pmu->reserve_mutex); + } + + /* Hardware and caches counters */ + switch (event->attr.type) { + case PERF_TYPE_HARDWARE: + case PERF_TYPE_HW_CACHE: + err = _hw_perf_event_init(event); + break; + + default: + return -ENOENT; + } + + if (err) + event->destroy(event); + +out: + return err; +} + +void metag_pmu_event_update(struct perf_event *event, + struct hw_perf_event *hwc, int idx) +{ + u64 prev_raw_count, new_raw_count; + s64 delta; + + /* + * If this counter is chained, it may be that the previous counter + * value has been changed beneath us. + * + * To get around this, we read and exchange the new raw count, then + * add the delta (new - prev) to the generic counter atomically. + * + * Without interrupts, this is the simplest approach. + */ +again: + prev_raw_count = local64_read(&hwc->prev_count); + new_raw_count = metag_pmu->read(idx); + + if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, + new_raw_count) != prev_raw_count) + goto again; + + /* + * Calculate the delta and add it to the counter. + */ + delta = new_raw_count - prev_raw_count; + + local64_add(delta, &event->count); +} + +int metag_pmu_event_set_period(struct perf_event *event, + struct hw_perf_event *hwc, int idx) +{ + s64 left = local64_read(&hwc->period_left); + s64 period = hwc->sample_period; + int ret = 0; + + if (unlikely(left <= -period)) { + left = period; + local64_set(&hwc->period_left, left); + hwc->last_period = period; + ret = 1; + } + + if (unlikely(left <= 0)) { + left += period; + local64_set(&hwc->period_left, left); + hwc->last_period = period; + ret = 1; + } + + if (left > (s64)metag_pmu->max_period) + left = metag_pmu->max_period; + + if (metag_pmu->write) + metag_pmu->write(idx, (u64)(-left) & MAX_PERIOD); + + perf_event_update_userpage(event); + + return ret; +} + +static void metag_pmu_start(struct perf_event *event, int flags) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + if (WARN_ON_ONCE(idx == -1)) + return; + + /* + * We always have to reprogram the period, so ignore PERF_EF_RELOAD. + */ + if (flags & PERF_EF_RELOAD) + WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); + + hwc->state = 0; + + /* + * Reset the period. + * Some counters can't be stopped (i.e. are core global), so when the + * counter was 'stopped' we merely disabled the IRQ. If we don't reset + * the period, then we'll either: a) get an overflow too soon; + * or b) too late if the overflow happened since disabling. + * Obviously, this has little bearing on cores without the overflow + * interrupt, as the performance counter resets to zero on write + * anyway. + */ + if (metag_pmu->max_period) + metag_pmu_event_set_period(event, hwc, hwc->idx); + cpuc->events[idx] = event; + metag_pmu->enable(hwc, idx); +} + +static void metag_pmu_stop(struct perf_event *event, int flags) +{ + struct hw_perf_event *hwc = &event->hw; + + /* + * We should always update the counter on stop; see comment above + * why. + */ + if (!(hwc->state & PERF_HES_STOPPED)) { + metag_pmu_event_update(event, hwc, hwc->idx); + metag_pmu->disable(hwc, hwc->idx); + hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; + } +} + +static int metag_pmu_add(struct perf_event *event, int flags) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + int idx = 0, ret = 0; + + perf_pmu_disable(event->pmu); + + /* check whether we're counting instructions */ + if (hwc->config == 0x100) { + if (__test_and_set_bit(METAG_INST_COUNTER, + cpuc->used_mask)) { + ret = -EAGAIN; + goto out; + } + idx = METAG_INST_COUNTER; + } else { + /* Check whether we have a spare counter */ + idx = find_first_zero_bit(cpuc->used_mask, + atomic_read(&metag_pmu->active_events)); + if (idx >= METAG_INST_COUNTER) { + ret = -EAGAIN; + goto out; + } + + __set_bit(idx, cpuc->used_mask); + } + hwc->idx = idx; + + /* Make sure the counter is disabled */ + metag_pmu->disable(hwc, idx); + + hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE; + if (flags & PERF_EF_START) + metag_pmu_start(event, PERF_EF_RELOAD); + + perf_event_update_userpage(event); +out: + perf_pmu_enable(event->pmu); + return ret; +} + +static void metag_pmu_del(struct perf_event *event, int flags) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + WARN_ON(idx < 0); + metag_pmu_stop(event, PERF_EF_UPDATE); + cpuc->events[idx] = NULL; + __clear_bit(idx, cpuc->used_mask); + + perf_event_update_userpage(event); +} + +static void metag_pmu_read(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + /* Don't read disabled counters! */ + if (hwc->idx < 0) + return; + + metag_pmu_event_update(event, hwc, hwc->idx); +} + +static struct pmu pmu = { + .pmu_enable = metag_pmu_enable, + .pmu_disable = metag_pmu_disable, + + .event_init = metag_pmu_event_init, + + .add = metag_pmu_add, + .del = metag_pmu_del, + .start = metag_pmu_start, + .stop = metag_pmu_stop, + .read = metag_pmu_read, +}; + +/* Core counter specific functions */ +static const int metag_general_events[] = { + [PERF_COUNT_HW_CPU_CYCLES] = 0x03, + [PERF_COUNT_HW_INSTRUCTIONS] = 0x100, + [PERF_COUNT_HW_CACHE_REFERENCES] = -1, + [PERF_COUNT_HW_CACHE_MISSES] = -1, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = -1, + [PERF_COUNT_HW_BRANCH_MISSES] = -1, + [PERF_COUNT_HW_BUS_CYCLES] = -1, + [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = -1, + [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = -1, + [PERF_COUNT_HW_REF_CPU_CYCLES] = -1, +}; + +static const int metag_pmu_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { + [C(L1D)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0x08, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0x09, + [C(RESULT_MISS)] = 0x0a, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(DTLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0xd0, + [C(RESULT_MISS)] = 0xd2, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = 0xd4, + [C(RESULT_MISS)] = 0xd5, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0xd1, + [C(RESULT_MISS)] = 0xd3, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(NODE)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, +}; + + +static void _hw_perf_event_destroy(struct perf_event *event) +{ + atomic_t *active_events = &metag_pmu->active_events; + struct mutex *pmu_mutex = &metag_pmu->reserve_mutex; + + if (atomic_dec_and_mutex_lock(active_events, pmu_mutex)) { + release_pmu_hardware(); + mutex_unlock(pmu_mutex); + } +} + +static int _hw_perf_cache_event(int config, int *evp) +{ + unsigned long type, op, result; + int ev; + + if (!metag_pmu->cache_events) + return -EINVAL; + + /* Unpack config */ + type = config & 0xff; + op = (config >> 8) & 0xff; + result = (config >> 16) & 0xff; + + if (type >= PERF_COUNT_HW_CACHE_MAX || + op >= PERF_COUNT_HW_CACHE_OP_MAX || + result >= PERF_COUNT_HW_CACHE_RESULT_MAX) + return -EINVAL; + + ev = (*metag_pmu->cache_events)[type][op][result]; + if (ev == 0) + return -EOPNOTSUPP; + if (ev == -1) + return -EINVAL; + *evp = ev; + return 0; +} + +static int _hw_perf_event_init(struct perf_event *event) +{ + struct perf_event_attr *attr = &event->attr; + struct hw_perf_event *hwc = &event->hw; + int mapping = 0, err; + + switch (attr->type) { + case PERF_TYPE_HARDWARE: + if (attr->config >= PERF_COUNT_HW_MAX) + return -EINVAL; + + mapping = metag_pmu->event_map(attr->config); + break; + + case PERF_TYPE_HW_CACHE: + err = _hw_perf_cache_event(attr->config, &mapping); + if (err) + return err; + break; + } + + /* Return early if the event is unsupported */ + if (mapping == -1) + return -EINVAL; + + /* + * Early cores have "limited" counters - they have no overflow + * interrupts - and so are unable to do sampling without extra work + * and timer assistance. + */ + if (metag_pmu->max_period == 0) { + if (hwc->sample_period) + return -EINVAL; + } + + /* + * Don't assign an index until the event is placed into the hardware. + * -1 signifies that we're still deciding where to put it. On SMP + * systems each core has its own set of counters, so we can't do any + * constraint checking yet. + */ + hwc->idx = -1; + + /* Store the event encoding */ + hwc->config |= (unsigned long)mapping; + + /* + * For non-sampling runs, limit the sample_period to half of the + * counter width. This way, the new counter value should be less + * likely to overtake the previous one (unless there are IRQ latency + * issues...) + */ + if (metag_pmu->max_period) { + if (!hwc->sample_period) { + hwc->sample_period = metag_pmu->max_period >> 1; + hwc->last_period = hwc->sample_period; + local64_set(&hwc->period_left, hwc->sample_period); + } + } + + return 0; +} + +static void metag_pmu_enable_counter(struct hw_perf_event *event, int idx) +{ + struct cpu_hw_events *events = &__get_cpu_var(cpu_hw_events); + unsigned int config = event->config; + unsigned int tmp = config & 0xf0; + unsigned long flags; + + raw_spin_lock_irqsave(&events->pmu_lock, flags); + + /* + * Check if we're enabling the instruction counter (index of + * MAX_HWEVENTS - 1) + */ + if (METAG_INST_COUNTER == idx) { + WARN_ONCE((config != 0x100), + "invalid configuration (%d) for counter (%d)\n", + config, idx); + + /* Reset the cycle count */ + __core_reg_set(TXTACTCYC, 0); + goto unlock; + } + + /* Check for a core internal or performance channel event. */ + if (tmp) { + void *perf_addr = (void *)PERF_COUNT(idx); + + /* + * Anything other than a cycle count will write the low- + * nibble to the correct counter register. + */ + switch (tmp) { + case 0xd0: + perf_addr = (void *)PERF_ICORE(idx); + break; + + case 0xf0: + perf_addr = (void *)PERF_CHAN(idx); + break; + } + + metag_out32((tmp & 0x0f), perf_addr); + + /* + * Now we use the high nibble as the performance event to + * to count. + */ + config = tmp >> 4; + } + + /* + * Enabled counters start from 0. Early cores clear the count on + * write but newer cores don't, so we make sure that the count is + * set to 0. + */ + tmp = ((config & 0xf) << 28) | + ((1 << 24) << cpu_2_hwthread_id[get_cpu()]); + metag_out32(tmp, PERF_COUNT(idx)); +unlock: + raw_spin_unlock_irqrestore(&events->pmu_lock, flags); +} + +static void metag_pmu_disable_counter(struct hw_perf_event *event, int idx) +{ + struct cpu_hw_events *events = &__get_cpu_var(cpu_hw_events); + unsigned int tmp = 0; + unsigned long flags; + + /* + * The cycle counter can't be disabled per se, as it's a hardware + * thread register which is always counting. We merely return if this + * is the counter we're attempting to disable. + */ + if (METAG_INST_COUNTER == idx) + return; + + /* + * The counter value _should_ have been read prior to disabling, + * as if we're running on an early core then the value gets reset to + * 0, and any read after that would be useless. On the newer cores, + * however, it's better to read-modify-update this for purposes of + * the overflow interrupt. + * Here we remove the thread id AND the event nibble (there are at + * least two events that count events that are core global and ignore + * the thread id mask). This only works because we don't mix thread + * performance counts, and event 0x00 requires a thread id mask! + */ + raw_spin_lock_irqsave(&events->pmu_lock, flags); + + tmp = metag_in32(PERF_COUNT(idx)); + tmp &= 0x00ffffff; + metag_out32(tmp, PERF_COUNT(idx)); + + raw_spin_unlock_irqrestore(&events->pmu_lock, flags); +} + +static u64 metag_pmu_read_counter(int idx) +{ + u32 tmp = 0; + + /* The act of reading the cycle counter also clears it */ + if (METAG_INST_COUNTER == idx) { + __core_reg_swap(TXTACTCYC, tmp); + goto out; + } + + tmp = metag_in32(PERF_COUNT(idx)) & 0x00ffffff; +out: + return tmp; +} + +static void metag_pmu_write_counter(int idx, u32 val) +{ + struct cpu_hw_events *events = &__get_cpu_var(cpu_hw_events); + u32 tmp = 0; + unsigned long flags; + + /* + * This _shouldn't_ happen, but if it does, then we can just + * ignore the write, as the register is read-only and clear-on-write. + */ + if (METAG_INST_COUNTER == idx) + return; + + /* + * We'll keep the thread mask and event id, and just update the + * counter itself. Also , we should bound the value to 24-bits. + */ + raw_spin_lock_irqsave(&events->pmu_lock, flags); + + val &= 0x00ffffff; + tmp = metag_in32(PERF_COUNT(idx)) & 0xff000000; + val |= tmp; + metag_out32(val, PERF_COUNT(idx)); + + raw_spin_unlock_irqrestore(&events->pmu_lock, flags); +} + +static int metag_pmu_event_map(int idx) +{ + return metag_general_events[idx]; +} + +static irqreturn_t metag_pmu_counter_overflow(int irq, void *dev) +{ + int idx = (int)dev; + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + struct perf_event *event = cpuhw->events[idx]; + struct hw_perf_event *hwc = &event->hw; + struct pt_regs *regs = get_irq_regs(); + struct perf_sample_data sampledata; + unsigned long flags; + u32 counter = 0; + + /* + * We need to stop the core temporarily from generating another + * interrupt while we disable this counter. However, we don't want + * to flag the counter as free + */ + __global_lock2(flags); + counter = metag_in32(PERF_COUNT(idx)); + metag_out32((counter & 0x00ffffff), PERF_COUNT(idx)); + __global_unlock2(flags); + + /* Update the counts and reset the sample period */ + metag_pmu_event_update(event, hwc, idx); + perf_sample_data_init(&sampledata, 0, hwc->last_period); + metag_pmu_event_set_period(event, hwc, idx); + + /* + * Enable the counter again once core overflow processing has + * completed. + */ + if (!perf_event_overflow(event, &sampledata, regs)) + metag_out32(counter, PERF_COUNT(idx)); + + return IRQ_HANDLED; +} + +static struct metag_pmu _metag_pmu = { + .handle_irq = metag_pmu_counter_overflow, + .enable = metag_pmu_enable_counter, + .disable = metag_pmu_disable_counter, + .read = metag_pmu_read_counter, + .write = metag_pmu_write_counter, + .event_map = metag_pmu_event_map, + .cache_events = &metag_pmu_cache_events, + .max_period = MAX_PERIOD, + .max_events = MAX_HWEVENTS, +}; + +/* PMU CPU hotplug notifier */ +static int __cpuinit metag_pmu_cpu_notify(struct notifier_block *b, + unsigned long action, void *hcpu) +{ + unsigned int cpu = (unsigned int)hcpu; + struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); + + if ((action & ~CPU_TASKS_FROZEN) != CPU_STARTING) + return NOTIFY_DONE; + + memset(cpuc, 0, sizeof(struct cpu_hw_events)); + raw_spin_lock_init(&cpuc->pmu_lock); + + return NOTIFY_OK; +} + +static struct notifier_block __cpuinitdata metag_pmu_notifier = { + .notifier_call = metag_pmu_cpu_notify, +}; + +/* PMU Initialisation */ +static int __init init_hw_perf_events(void) +{ + int ret = 0, cpu; + u32 version = *(u32 *)METAC_ID; + int major = (version & METAC_ID_MAJOR_BITS) >> METAC_ID_MAJOR_S; + int min_rev = (version & (METAC_ID_MINOR_BITS | METAC_ID_REV_BITS)) + >> METAC_ID_REV_S; + + /* Not a Meta 2 core, then not supported */ + if (0x02 > major) { + pr_info("no hardware counter support available\n"); + goto out; + } else if (0x02 == major) { + metag_pmu = &_metag_pmu; + + if (min_rev < 0x0104) { + /* + * A core without overflow interrupts, and clear-on- + * write counters. + */ + metag_pmu->handle_irq = NULL; + metag_pmu->write = NULL; + metag_pmu->max_period = 0; + } + + metag_pmu->name = "Meta 2"; + metag_pmu->version = version; + metag_pmu->pmu = pmu; + } + + pr_info("enabled with %s PMU driver, %d counters available\n", + metag_pmu->name, metag_pmu->max_events); + + /* Initialise the active events and reservation mutex */ + atomic_set(&metag_pmu->active_events, 0); + mutex_init(&metag_pmu->reserve_mutex); + + /* Clear the counters */ + metag_out32(0, PERF_COUNT(0)); + metag_out32(0, PERF_COUNT(1)); + + for_each_possible_cpu(cpu) { + struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); + + memset(cpuc, 0, sizeof(struct cpu_hw_events)); + raw_spin_lock_init(&cpuc->pmu_lock); + } + + register_cpu_notifier(&metag_pmu_notifier); + ret = perf_pmu_register(&pmu, (char *)metag_pmu->name, PERF_TYPE_RAW); +out: + return ret; +} +early_initcall(init_hw_perf_events); diff --git a/arch/metag/kernel/perf/perf_event.h b/arch/metag/kernel/perf/perf_event.h new file mode 100644 index 000000000000..fd10a1345b67 --- /dev/null +++ b/arch/metag/kernel/perf/perf_event.h @@ -0,0 +1,106 @@ +/* + * Meta performance counter support. + * Copyright (C) 2012 Imagination Technologies Ltd + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ + +#ifndef METAG_PERF_EVENT_H_ +#define METAG_PERF_EVENT_H_ + +#include <linux/kernel.h> +#include <linux/interrupt.h> +#include <linux/perf_event.h> + +/* For performance counter definitions */ +#include <asm/metag_mem.h> + +/* + * The Meta core has two performance counters, with 24-bit resolution. Newer + * cores generate an overflow interrupt on transition from 0xffffff to 0. + * + * Each counter consists of the counter id, hardware thread id, and the count + * itself; each counter can be assigned to multiple hardware threads at any + * one time, with the returned count being an aggregate of events. A small + * number of events are thread global, i.e. they count the aggregate of all + * threads' events, regardless of the thread selected. + * + * Newer cores can store an arbitrary 24-bit number in the counter, whereas + * older cores will clear the counter bits on write. + * + * We also have a pseudo-counter in the form of the thread active cycles + * counter (which, incidentally, is also bound to + */ + +#define MAX_HWEVENTS 3 +#define MAX_PERIOD ((1UL << 24) - 1) +#define METAG_INST_COUNTER (MAX_HWEVENTS - 1) + +/** + * struct cpu_hw_events - a processor core's performance events + * @events: an array of perf_events active for a given index. + * @used_mask: a bitmap of in-use counters. + * @pmu_lock: a perf counter lock + * + * This is a per-cpu/core structure that maintains a record of its + * performance counters' state. + */ +struct cpu_hw_events { + struct perf_event *events[MAX_HWEVENTS]; + unsigned long used_mask[BITS_TO_LONGS(MAX_HWEVENTS)]; + raw_spinlock_t pmu_lock; +}; + +/** + * struct metag_pmu - the Meta PMU structure + * @pmu: core pmu structure + * @name: pmu name + * @version: core version + * @handle_irq: overflow interrupt handler + * @enable: enable a counter + * @disable: disable a counter + * @read: read the value of a counter + * @write: write a value to a counter + * @event_map: kernel event to counter event id map + * @cache_events: kernel cache counter to core cache counter map + * @max_period: maximum value of the counter before overflow + * @max_events: maximum number of counters available at any one time + * @active_events: number of active counters + * @reserve_mutex: counter reservation mutex + * + * This describes the main functionality and data used by the performance + * event core. + */ +struct metag_pmu { + struct pmu pmu; + const char *name; + u32 version; + irqreturn_t (*handle_irq)(int irq_num, void *dev); + void (*enable)(struct hw_perf_event *evt, int idx); + void (*disable)(struct hw_perf_event *evt, int idx); + u64 (*read)(int idx); + void (*write)(int idx, u32 val); + int (*event_map)(int idx); + const int (*cache_events)[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX]; + u32 max_period; + int max_events; + atomic_t active_events; + struct mutex reserve_mutex; +}; + +/* Convenience macros for accessing the perf counters */ +/* Define some convenience accessors */ +#define PERF_COUNT(x) (PERF_COUNT0 + (sizeof(u64) * (x))) +#define PERF_ICORE(x) (PERF_ICORE0 + (sizeof(u64) * (x))) +#define PERF_CHAN(x) (PERF_CHAN0 + (sizeof(u64) * (x))) + +/* Cache index macros */ +#define C(x) PERF_COUNT_HW_CACHE_##x +#define CACHE_OP_UNSUPPORTED 0xfffe +#define CACHE_OP_NONSENSE 0xffff + +#endif diff --git a/arch/metag/kernel/perf_callchain.c b/arch/metag/kernel/perf_callchain.c new file mode 100644 index 000000000000..315633461a94 --- /dev/null +++ b/arch/metag/kernel/perf_callchain.c @@ -0,0 +1,96 @@ +/* + * Perf callchain handling code. + * + * Based on the ARM perf implementation. + */ + +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/perf_event.h> +#include <linux/uaccess.h> +#include <asm/ptrace.h> +#include <asm/stacktrace.h> + +static bool is_valid_call(unsigned long calladdr) +{ + unsigned int callinsn; + + /* Check the possible return address is aligned. */ + if (!(calladdr & 0x3)) { + if (!get_user(callinsn, (unsigned int *)calladdr)) { + /* Check for CALLR or SWAP PC,D1RtP. */ + if ((callinsn & 0xff000000) == 0xab000000 || + callinsn == 0xa3200aa0) + return true; + } + } + return false; +} + +static struct metag_frame __user * +user_backtrace(struct metag_frame __user *user_frame, + struct perf_callchain_entry *entry) +{ + struct metag_frame frame; + unsigned long calladdr; + + /* We cannot rely on having frame pointers in user code. */ + while (1) { + /* Also check accessibility of one struct frame beyond */ + if (!access_ok(VERIFY_READ, user_frame, sizeof(frame))) + return 0; + if (__copy_from_user_inatomic(&frame, user_frame, + sizeof(frame))) + return 0; + + --user_frame; + + calladdr = frame.lr - 4; + if (is_valid_call(calladdr)) { + perf_callchain_store(entry, calladdr); + return user_frame; + } + } + + return 0; +} + +void +perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) +{ + unsigned long sp = regs->ctx.AX[0].U0; + struct metag_frame __user *frame; + + frame = (struct metag_frame __user *)sp; + + --frame; + + while ((entry->nr < PERF_MAX_STACK_DEPTH) && frame) + frame = user_backtrace(frame, entry); +} + +/* + * Gets called by walk_stackframe() for every stackframe. This will be called + * whist unwinding the stackframe and is like a subroutine return so we use + * the PC. + */ +static int +callchain_trace(struct stackframe *fr, + void *data) +{ + struct perf_callchain_entry *entry = data; + perf_callchain_store(entry, fr->pc); + return 0; +} + +void +perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) +{ + struct stackframe fr; + + fr.fp = regs->ctx.AX[1].U0; + fr.sp = regs->ctx.AX[0].U0; + fr.lr = regs->ctx.DX[4].U1; + fr.pc = regs->ctx.CurrPC; + walk_stackframe(&fr, callchain_trace, entry); +} diff --git a/arch/metag/kernel/process.c b/arch/metag/kernel/process.c new file mode 100644 index 000000000000..c6efe62e5b76 --- /dev/null +++ b/arch/metag/kernel/process.c @@ -0,0 +1,461 @@ +/* + * Copyright (C) 2005,2006,2007,2008,2009,2010,2011 Imagination Technologies + * + * This file contains the architecture-dependent parts of process handling. + * + */ + +#include <linux/errno.h> +#include <linux/export.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/unistd.h> +#include <linux/ptrace.h> +#include <linux/user.h> +#include <linux/reboot.h> +#include <linux/elfcore.h> +#include <linux/fs.h> +#include <linux/tick.h> +#include <linux/slab.h> +#include <linux/mman.h> +#include <linux/pm.h> +#include <linux/syscalls.h> +#include <linux/uaccess.h> +#include <asm/core_reg.h> +#include <asm/user_gateway.h> +#include <asm/tcm.h> +#include <asm/traps.h> +#include <asm/switch_to.h> + +/* + * Wait for the next interrupt and enable local interrupts + */ +static inline void arch_idle(void) +{ + int tmp; + + /* + * Quickly jump straight into the interrupt entry point without actually + * triggering an interrupt. When TXSTATI gets read the processor will + * block until an interrupt is triggered. + */ + asm volatile (/* Switch into ISTAT mode */ + "RTH\n\t" + /* Enable local interrupts */ + "MOV TXMASKI, %1\n\t" + /* + * We can't directly "SWAP PC, PCX", so we swap via a + * temporary. Essentially we do: + * PCX_new = 1f (the place to continue execution) + * PC = PCX_old + */ + "ADD %0, CPC0, #(1f-.)\n\t" + "SWAP PCX, %0\n\t" + "MOV PC, %0\n" + /* Continue execution here with interrupts enabled */ + "1:" + : "=a" (tmp) + : "r" (get_trigger_mask())); +} + +void cpu_idle(void) +{ + set_thread_flag(TIF_POLLING_NRFLAG); + + while (1) { + tick_nohz_idle_enter(); + rcu_idle_enter(); + + while (!need_resched()) { + /* + * We need to disable interrupts here to ensure we don't + * miss a wakeup call. + */ + local_irq_disable(); + if (!need_resched()) { +#ifdef CONFIG_HOTPLUG_CPU + if (cpu_is_offline(smp_processor_id())) + cpu_die(); +#endif + arch_idle(); + } else { + local_irq_enable(); + } + } + + rcu_idle_exit(); + tick_nohz_idle_exit(); + schedule_preempt_disabled(); + } +} + +void (*pm_power_off)(void); +EXPORT_SYMBOL(pm_power_off); + +void (*soc_restart)(char *cmd); +void (*soc_halt)(void); + +void machine_restart(char *cmd) +{ + if (soc_restart) + soc_restart(cmd); + hard_processor_halt(HALT_OK); +} + +void machine_halt(void) +{ + if (soc_halt) + soc_halt(); + smp_send_stop(); + hard_processor_halt(HALT_OK); +} + +void machine_power_off(void) +{ + if (pm_power_off) + pm_power_off(); + smp_send_stop(); + hard_processor_halt(HALT_OK); +} + +#define FLAG_Z 0x8 +#define FLAG_N 0x4 +#define FLAG_O 0x2 +#define FLAG_C 0x1 + +void show_regs(struct pt_regs *regs) +{ + int i; + const char *AX0_names[] = {"A0StP", "A0FrP"}; + const char *AX1_names[] = {"A1GbP", "A1LbP"}; + + const char *DX0_names[] = { + "D0Re0", + "D0Ar6", + "D0Ar4", + "D0Ar2", + "D0FrT", + "D0.5 ", + "D0.6 ", + "D0.7 " + }; + + const char *DX1_names[] = { + "D1Re0", + "D1Ar5", + "D1Ar3", + "D1Ar1", + "D1RtP", + "D1.5 ", + "D1.6 ", + "D1.7 " + }; + + pr_info(" pt_regs @ %p\n", regs); + pr_info(" SaveMask = 0x%04hx\n", regs->ctx.SaveMask); + pr_info(" Flags = 0x%04hx (%c%c%c%c)\n", regs->ctx.Flags, + regs->ctx.Flags & FLAG_Z ? 'Z' : 'z', + regs->ctx.Flags & FLAG_N ? 'N' : 'n', + regs->ctx.Flags & FLAG_O ? 'O' : 'o', + regs->ctx.Flags & FLAG_C ? 'C' : 'c'); + pr_info(" TXRPT = 0x%08x\n", regs->ctx.CurrRPT); + pr_info(" PC = 0x%08x\n", regs->ctx.CurrPC); + + /* AX regs */ + for (i = 0; i < 2; i++) { + pr_info(" %s = 0x%08x ", + AX0_names[i], + regs->ctx.AX[i].U0); + printk(" %s = 0x%08x\n", + AX1_names[i], + regs->ctx.AX[i].U1); + } + + if (regs->ctx.SaveMask & TBICTX_XEXT_BIT) + pr_warn(" Extended state present - AX2.[01] will be WRONG\n"); + + /* Special place with AXx.2 */ + pr_info(" A0.2 = 0x%08x ", + regs->ctx.Ext.AX2.U0); + printk(" A1.2 = 0x%08x\n", + regs->ctx.Ext.AX2.U1); + + /* 'extended' AX regs (nominally, just AXx.3) */ + for (i = 0; i < (TBICTX_AX_REGS - 3); i++) { + pr_info(" A0.%d = 0x%08x ", i + 3, regs->ctx.AX3[i].U0); + printk(" A1.%d = 0x%08x\n", i + 3, regs->ctx.AX3[i].U1); + } + + for (i = 0; i < 8; i++) { + pr_info(" %s = 0x%08x ", DX0_names[i], regs->ctx.DX[i].U0); + printk(" %s = 0x%08x\n", DX1_names[i], regs->ctx.DX[i].U1); + } + + show_trace(NULL, (unsigned long *)regs->ctx.AX[0].U0, regs); +} + +int copy_thread(unsigned long clone_flags, unsigned long usp, + unsigned long arg, struct task_struct *tsk) +{ + struct pt_regs *childregs = task_pt_regs(tsk); + void *kernel_context = ((void *) childregs + + sizeof(struct pt_regs)); + unsigned long global_base; + + BUG_ON(((unsigned long)childregs) & 0x7); + BUG_ON(((unsigned long)kernel_context) & 0x7); + + memset(&tsk->thread.kernel_context, 0, + sizeof(tsk->thread.kernel_context)); + + tsk->thread.kernel_context = __TBISwitchInit(kernel_context, + ret_from_fork, + 0, 0); + + if (unlikely(tsk->flags & PF_KTHREAD)) { + /* + * Make sure we don't leak any kernel data to child's regs + * if kernel thread becomes a userspace thread in the future + */ + memset(childregs, 0 , sizeof(struct pt_regs)); + + global_base = __core_reg_get(A1GbP); + childregs->ctx.AX[0].U1 = (unsigned long) global_base; + childregs->ctx.AX[0].U0 = (unsigned long) kernel_context; + /* Set D1Ar1=arg and D1RtP=usp (fn) */ + childregs->ctx.DX[4].U1 = usp; + childregs->ctx.DX[3].U1 = arg; + tsk->thread.int_depth = 2; + return 0; + } + /* + * Get a pointer to where the new child's register block should have + * been pushed. + * The Meta's stack grows upwards, and the context is the the first + * thing to be pushed by TBX (phew) + */ + *childregs = *current_pt_regs(); + /* Set the correct stack for the clone mode */ + if (usp) + childregs->ctx.AX[0].U0 = ALIGN(usp, 8); + tsk->thread.int_depth = 1; + + /* set return value for child process */ + childregs->ctx.DX[0].U0 = 0; + + /* The TLS pointer is passed as an argument to sys_clone. */ + if (clone_flags & CLONE_SETTLS) + tsk->thread.tls_ptr = + (__force void __user *)childregs->ctx.DX[1].U1; + +#ifdef CONFIG_METAG_FPU + if (tsk->thread.fpu_context) { + struct meta_fpu_context *ctx; + + ctx = kmemdup(tsk->thread.fpu_context, + sizeof(struct meta_fpu_context), GFP_ATOMIC); + tsk->thread.fpu_context = ctx; + } +#endif + +#ifdef CONFIG_METAG_DSP + if (tsk->thread.dsp_context) { + struct meta_ext_context *ctx; + int i; + + ctx = kmemdup(tsk->thread.dsp_context, + sizeof(struct meta_ext_context), GFP_ATOMIC); + for (i = 0; i < 2; i++) + ctx->ram[i] = kmemdup(ctx->ram[i], ctx->ram_sz[i], + GFP_ATOMIC); + tsk->thread.dsp_context = ctx; + } +#endif + + return 0; +} + +#ifdef CONFIG_METAG_FPU +static void alloc_fpu_context(struct thread_struct *thread) +{ + thread->fpu_context = kzalloc(sizeof(struct meta_fpu_context), + GFP_ATOMIC); +} + +static void clear_fpu(struct thread_struct *thread) +{ + thread->user_flags &= ~TBICTX_FPAC_BIT; + kfree(thread->fpu_context); + thread->fpu_context = NULL; +} +#else +static void clear_fpu(struct thread_struct *thread) +{ +} +#endif + +#ifdef CONFIG_METAG_DSP +static void clear_dsp(struct thread_struct *thread) +{ + if (thread->dsp_context) { + kfree(thread->dsp_context->ram[0]); + kfree(thread->dsp_context->ram[1]); + + kfree(thread->dsp_context); + + thread->dsp_context = NULL; + } + + __core_reg_set(D0.8, 0); +} +#else +static void clear_dsp(struct thread_struct *thread) +{ +} +#endif + +struct task_struct *__sched __switch_to(struct task_struct *prev, + struct task_struct *next) +{ + TBIRES to, from; + + to.Switch.pCtx = next->thread.kernel_context; + to.Switch.pPara = prev; + +#ifdef CONFIG_METAG_FPU + if (prev->thread.user_flags & TBICTX_FPAC_BIT) { + struct pt_regs *regs = task_pt_regs(prev); + TBIRES state; + + state.Sig.SaveMask = prev->thread.user_flags; + state.Sig.pCtx = ®s->ctx; + + if (!prev->thread.fpu_context) + alloc_fpu_context(&prev->thread); + if (prev->thread.fpu_context) + __TBICtxFPUSave(state, prev->thread.fpu_context); + } + /* + * Force a restore of the FPU context next time this process is + * scheduled. + */ + if (prev->thread.fpu_context) + prev->thread.fpu_context->needs_restore = true; +#endif + + + from = __TBISwitch(to, &prev->thread.kernel_context); + + /* Restore TLS pointer for this process. */ + set_gateway_tls(current->thread.tls_ptr); + + return (struct task_struct *) from.Switch.pPara; +} + +void flush_thread(void) +{ + clear_fpu(¤t->thread); + clear_dsp(¤t->thread); +} + +/* + * Free current thread data structures etc. + */ +void exit_thread(void) +{ + clear_fpu(¤t->thread); + clear_dsp(¤t->thread); +} + +/* TODO: figure out how to unwind the kernel stack here to figure out + * where we went to sleep. */ +unsigned long get_wchan(struct task_struct *p) +{ + return 0; +} + +int dump_fpu(struct pt_regs *regs, elf_fpregset_t *fpu) +{ + /* Returning 0 indicates that the FPU state was not stored (as it was + * not in use) */ + return 0; +} + +#ifdef CONFIG_METAG_USER_TCM + +#define ELF_MIN_ALIGN PAGE_SIZE + +#define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1)) +#define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1)) +#define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1)) + +#define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE) + +unsigned long __metag_elf_map(struct file *filep, unsigned long addr, + struct elf_phdr *eppnt, int prot, int type, + unsigned long total_size) +{ + unsigned long map_addr, size; + unsigned long page_off = ELF_PAGEOFFSET(eppnt->p_vaddr); + unsigned long raw_size = eppnt->p_filesz + page_off; + unsigned long off = eppnt->p_offset - page_off; + unsigned int tcm_tag; + addr = ELF_PAGESTART(addr); + size = ELF_PAGEALIGN(raw_size); + + /* mmap() will return -EINVAL if given a zero size, but a + * segment with zero filesize is perfectly valid */ + if (!size) + return addr; + + tcm_tag = tcm_lookup_tag(addr); + + if (tcm_tag != TCM_INVALID_TAG) + type &= ~MAP_FIXED; + + /* + * total_size is the size of the ELF (interpreter) image. + * The _first_ mmap needs to know the full size, otherwise + * randomization might put this image into an overlapping + * position with the ELF binary image. (since size < total_size) + * So we first map the 'big' image - and unmap the remainder at + * the end. (which unmap is needed for ELF images with holes.) + */ + if (total_size) { + total_size = ELF_PAGEALIGN(total_size); + map_addr = vm_mmap(filep, addr, total_size, prot, type, off); + if (!BAD_ADDR(map_addr)) + vm_munmap(map_addr+size, total_size-size); + } else + map_addr = vm_mmap(filep, addr, size, prot, type, off); + + if (!BAD_ADDR(map_addr) && tcm_tag != TCM_INVALID_TAG) { + struct tcm_allocation *tcm; + unsigned long tcm_addr; + + tcm = kmalloc(sizeof(*tcm), GFP_KERNEL); + if (!tcm) + return -ENOMEM; + + tcm_addr = tcm_alloc(tcm_tag, raw_size); + if (tcm_addr != addr) { + kfree(tcm); + return -ENOMEM; + } + + tcm->tag = tcm_tag; + tcm->addr = tcm_addr; + tcm->size = raw_size; + + list_add(&tcm->list, ¤t->mm->context.tcm); + + eppnt->p_vaddr = map_addr; + if (copy_from_user((void *) addr, (void __user *) map_addr, + raw_size)) + return -EFAULT; + } + + return map_addr; +} +#endif diff --git a/arch/metag/kernel/ptrace.c b/arch/metag/kernel/ptrace.c new file mode 100644 index 000000000000..47a8828615a5 --- /dev/null +++ b/arch/metag/kernel/ptrace.c @@ -0,0 +1,380 @@ +/* + * Copyright (C) 2005-2012 Imagination Technologies Ltd. + * + * This file is subject to the terms and conditions of the GNU General + * Public License. See the file COPYING in the main directory of + * this archive for more details. + */ + +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/errno.h> +#include <linux/ptrace.h> +#include <linux/user.h> +#include <linux/regset.h> +#include <linux/tracehook.h> +#include <linux/elf.h> +#include <linux/uaccess.h> +#include <trace/syscall.h> + +#define CREATE_TRACE_POINTS +#include <trace/events/syscalls.h> + +/* + * user_regset definitions. + */ + +int metag_gp_regs_copyout(const struct pt_regs *regs, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + const void *ptr; + unsigned long data; + int ret; + + /* D{0-1}.{0-7} */ + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + regs->ctx.DX, 0, 4*16); + if (ret) + goto out; + /* A{0-1}.{0-1} */ + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + regs->ctx.AX, 4*16, 4*20); + if (ret) + goto out; + /* A{0-1}.2 */ + if (regs->ctx.SaveMask & TBICTX_XEXT_BIT) + ptr = regs->ctx.Ext.Ctx.pExt; + else + ptr = ®s->ctx.Ext.AX2; + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + ptr, 4*20, 4*22); + if (ret) + goto out; + /* A{0-1}.3 */ + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + ®s->ctx.AX3, 4*22, 4*24); + if (ret) + goto out; + /* PC */ + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + ®s->ctx.CurrPC, 4*24, 4*25); + if (ret) + goto out; + /* TXSTATUS */ + data = (unsigned long)regs->ctx.Flags; + if (regs->ctx.SaveMask & TBICTX_CBUF_BIT) + data |= USER_GP_REGS_STATUS_CATCH_BIT; + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &data, 4*25, 4*26); + if (ret) + goto out; + /* TXRPT, TXBPOBITS, TXMODE */ + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + ®s->ctx.CurrRPT, 4*26, 4*29); + if (ret) + goto out; + /* Padding */ + ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, + 4*29, 4*30); +out: + return ret; +} + +int metag_gp_regs_copyin(struct pt_regs *regs, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + void *ptr; + unsigned long data; + int ret; + + /* D{0-1}.{0-7} */ + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + regs->ctx.DX, 0, 4*16); + if (ret) + goto out; + /* A{0-1}.{0-1} */ + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + regs->ctx.AX, 4*16, 4*20); + if (ret) + goto out; + /* A{0-1}.2 */ + if (regs->ctx.SaveMask & TBICTX_XEXT_BIT) + ptr = regs->ctx.Ext.Ctx.pExt; + else + ptr = ®s->ctx.Ext.AX2; + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + ptr, 4*20, 4*22); + if (ret) + goto out; + /* A{0-1}.3 */ + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + ®s->ctx.AX3, 4*22, 4*24); + if (ret) + goto out; + /* PC */ + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + ®s->ctx.CurrPC, 4*24, 4*25); + if (ret) + goto out; + /* TXSTATUS */ + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &data, 4*25, 4*26); + if (ret) + goto out; + regs->ctx.Flags = data & 0xffff; + if (data & USER_GP_REGS_STATUS_CATCH_BIT) + regs->ctx.SaveMask |= TBICTX_XCBF_BIT | TBICTX_CBUF_BIT; + else + regs->ctx.SaveMask &= ~TBICTX_CBUF_BIT; + /* TXRPT, TXBPOBITS, TXMODE */ + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + ®s->ctx.CurrRPT, 4*26, 4*29); +out: + return ret; +} + +static int metag_gp_regs_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + const struct pt_regs *regs = task_pt_regs(target); + return metag_gp_regs_copyout(regs, pos, count, kbuf, ubuf); +} + +static int metag_gp_regs_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + struct pt_regs *regs = task_pt_regs(target); + return metag_gp_regs_copyin(regs, pos, count, kbuf, ubuf); +} + +int metag_cb_regs_copyout(const struct pt_regs *regs, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + int ret; + + /* TXCATCH{0-3} */ + if (regs->ctx.SaveMask & TBICTX_XCBF_BIT) + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + regs->extcb0, 0, 4*4); + else + ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, + 0, 4*4); + return ret; +} + +int metag_cb_regs_copyin(struct pt_regs *regs, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret; + + /* TXCATCH{0-3} */ + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + regs->extcb0, 0, 4*4); + return ret; +} + +static int metag_cb_regs_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + const struct pt_regs *regs = task_pt_regs(target); + return metag_cb_regs_copyout(regs, pos, count, kbuf, ubuf); +} + +static int metag_cb_regs_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + struct pt_regs *regs = task_pt_regs(target); + return metag_cb_regs_copyin(regs, pos, count, kbuf, ubuf); +} + +int metag_rp_state_copyout(const struct pt_regs *regs, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + unsigned long mask; + u64 *ptr; + int ret, i; + + /* Empty read pipeline */ + if (!(regs->ctx.SaveMask & TBICTX_CBRP_BIT)) { + ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, + 0, 4*13); + goto out; + } + + mask = (regs->ctx.CurrDIVTIME & TXDIVTIME_RPMASK_BITS) >> + TXDIVTIME_RPMASK_S; + + /* Read pipeline entries */ + ptr = (void *)®s->extcb0[1]; + for (i = 0; i < 6; ++i, ++ptr) { + if (mask & (1 << i)) + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + ptr, 8*i, 8*(i + 1)); + else + ret = user_regset_copyout_zero(&pos, &count, &kbuf, + &ubuf, 8*i, 8*(i + 1)); + if (ret) + goto out; + } + /* Mask of entries */ + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &mask, 4*12, 4*13); +out: + return ret; +} + +int metag_rp_state_copyin(struct pt_regs *regs, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + struct user_rp_state rp; + unsigned long long *ptr; + int ret, i; + + /* Read the entire pipeline before making any changes */ + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &rp, 0, 4*13); + if (ret) + goto out; + + /* Write pipeline entries */ + ptr = (void *)®s->extcb0[1]; + for (i = 0; i < 6; ++i, ++ptr) + if (rp.mask & (1 << i)) + *ptr = rp.entries[i]; + + /* Update RPMask in TXDIVTIME */ + regs->ctx.CurrDIVTIME &= ~TXDIVTIME_RPMASK_BITS; + regs->ctx.CurrDIVTIME |= (rp.mask << TXDIVTIME_RPMASK_S) + & TXDIVTIME_RPMASK_BITS; + + /* Set/clear flags to indicate catch/read pipeline state */ + if (rp.mask) + regs->ctx.SaveMask |= TBICTX_XCBF_BIT | TBICTX_CBRP_BIT; + else + regs->ctx.SaveMask &= ~TBICTX_CBRP_BIT; +out: + return ret; +} + +static int metag_rp_state_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + const struct pt_regs *regs = task_pt_regs(target); + return metag_rp_state_copyout(regs, pos, count, kbuf, ubuf); +} + +static int metag_rp_state_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + struct pt_regs *regs = task_pt_regs(target); + return metag_rp_state_copyin(regs, pos, count, kbuf, ubuf); +} + +enum metag_regset { + REGSET_GENERAL, + REGSET_CBUF, + REGSET_READPIPE, +}; + +static const struct user_regset metag_regsets[] = { + [REGSET_GENERAL] = { + .core_note_type = NT_PRSTATUS, + .n = ELF_NGREG, + .size = sizeof(long), + .align = sizeof(long long), + .get = metag_gp_regs_get, + .set = metag_gp_regs_set, + }, + [REGSET_CBUF] = { + .core_note_type = NT_METAG_CBUF, + .n = sizeof(struct user_cb_regs) / sizeof(long), + .size = sizeof(long), + .align = sizeof(long long), + .get = metag_cb_regs_get, + .set = metag_cb_regs_set, + }, + [REGSET_READPIPE] = { + .core_note_type = NT_METAG_RPIPE, + .n = sizeof(struct user_rp_state) / sizeof(long), + .size = sizeof(long), + .align = sizeof(long long), + .get = metag_rp_state_get, + .set = metag_rp_state_set, + }, +}; + +static const struct user_regset_view user_metag_view = { + .name = "metag", + .e_machine = EM_METAG, + .regsets = metag_regsets, + .n = ARRAY_SIZE(metag_regsets) +}; + +const struct user_regset_view *task_user_regset_view(struct task_struct *task) +{ + return &user_metag_view; +} + +/* + * Called by kernel/ptrace.c when detaching.. + * + * Make sure single step bits etc are not set. + */ +void ptrace_disable(struct task_struct *child) +{ + /* nothing to do.. */ +} + +long arch_ptrace(struct task_struct *child, long request, unsigned long addr, + unsigned long data) +{ + int ret; + + switch (request) { + default: + ret = ptrace_request(child, request, addr, data); + break; + } + + return ret; +} + +int syscall_trace_enter(struct pt_regs *regs) +{ + int ret = 0; + + if (test_thread_flag(TIF_SYSCALL_TRACE)) + ret = tracehook_report_syscall_entry(regs); + + if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) + trace_sys_enter(regs, regs->ctx.DX[0].U1); + + return ret ? -1 : regs->ctx.DX[0].U1; +} + +void syscall_trace_leave(struct pt_regs *regs) +{ + if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) + trace_sys_exit(regs, regs->ctx.DX[0].U1); + + if (test_thread_flag(TIF_SYSCALL_TRACE)) + tracehook_report_syscall_exit(regs, 0); +} diff --git a/arch/metag/kernel/setup.c b/arch/metag/kernel/setup.c new file mode 100644 index 000000000000..879246170aec --- /dev/null +++ b/arch/metag/kernel/setup.c @@ -0,0 +1,631 @@ +/* + * Copyright (C) 2005-2012 Imagination Technologies Ltd. + * + * This file contains the architecture-dependant parts of system setup. + * + */ + +#include <linux/export.h> +#include <linux/bootmem.h> +#include <linux/console.h> +#include <linux/cpu.h> +#include <linux/delay.h> +#include <linux/errno.h> +#include <linux/fs.h> +#include <linux/genhd.h> +#include <linux/init.h> +#include <linux/initrd.h> +#include <linux/interrupt.h> +#include <linux/kernel.h> +#include <linux/memblock.h> +#include <linux/mm.h> +#include <linux/of_fdt.h> +#include <linux/pfn.h> +#include <linux/root_dev.h> +#include <linux/sched.h> +#include <linux/seq_file.h> +#include <linux/start_kernel.h> +#include <linux/string.h> + +#include <asm/cachepart.h> +#include <asm/clock.h> +#include <asm/core_reg.h> +#include <asm/cpu.h> +#include <asm/da.h> +#include <asm/highmem.h> +#include <asm/hwthread.h> +#include <asm/l2cache.h> +#include <asm/mach/arch.h> +#include <asm/metag_mem.h> +#include <asm/metag_regs.h> +#include <asm/mmu.h> +#include <asm/mmzone.h> +#include <asm/processor.h> +#include <asm/prom.h> +#include <asm/sections.h> +#include <asm/setup.h> +#include <asm/traps.h> + +/* Priv protect as many registers as possible. */ +#define DEFAULT_PRIV (TXPRIVEXT_COPRO_BITS | \ + TXPRIVEXT_TXTRIGGER_BIT | \ + TXPRIVEXT_TXGBLCREG_BIT | \ + TXPRIVEXT_ILOCK_BIT | \ + TXPRIVEXT_TXITACCYC_BIT | \ + TXPRIVEXT_TXDIVTIME_BIT | \ + TXPRIVEXT_TXAMAREGX_BIT | \ + TXPRIVEXT_TXTIMERI_BIT | \ + TXPRIVEXT_TXSTATUS_BIT | \ + TXPRIVEXT_TXDISABLE_BIT) + +/* Meta2 specific bits. */ +#ifdef CONFIG_METAG_META12 +#define META2_PRIV 0 +#else +#define META2_PRIV (TXPRIVEXT_TXTIMER_BIT | \ + TXPRIVEXT_TRACE_BIT) +#endif + +/* Unaligned access checking bits. */ +#ifdef CONFIG_METAG_UNALIGNED +#define UNALIGNED_PRIV TXPRIVEXT_ALIGNREW_BIT +#else +#define UNALIGNED_PRIV 0 +#endif + +#define PRIV_BITS (DEFAULT_PRIV | \ + META2_PRIV | \ + UNALIGNED_PRIV) + +/* + * Protect access to: + * 0x06000000-0x07ffffff Direct mapped region + * 0x05000000-0x05ffffff MMU table region (Meta1) + * 0x04400000-0x047fffff Cache flush region + * 0x84000000-0x87ffffff Core cache memory region (Meta2) + * + * Allow access to: + * 0x80000000-0x81ffffff Core code memory region (Meta2) + */ +#ifdef CONFIG_METAG_META12 +#define PRIVSYSR_BITS TXPRIVSYSR_ALL_BITS +#else +#define PRIVSYSR_BITS (TXPRIVSYSR_ALL_BITS & ~TXPRIVSYSR_CORECODE_BIT) +#endif + +/* Protect all 0x02xxxxxx and 0x048xxxxx. */ +#define PIOREG_BITS 0xffffffff + +/* + * Protect all 0x04000xx0 (system events) + * except write combiner flush and write fence (system events 4 and 5). + */ +#define PSYREG_BITS 0xfffffffb + + +extern char _heap_start[]; + +#ifdef CONFIG_METAG_BUILTIN_DTB +extern u32 __dtb_start[]; +#endif + +#ifdef CONFIG_DA_CONSOLE +/* Our early channel based console driver */ +extern struct console dash_console; +#endif + +struct machine_desc *machine_desc __initdata; + +/* + * Map a Linux CPU number to a hardware thread ID + * In SMP this will be setup with the correct mapping at startup; in UP this + * will map to the HW thread on which we are running. + */ +u8 cpu_2_hwthread_id[NR_CPUS] __read_mostly = { + [0 ... NR_CPUS-1] = BAD_HWTHREAD_ID +}; + +/* + * Map a hardware thread ID to a Linux CPU number + * In SMP this will be fleshed out with the correct CPU ID for a particular + * hardware thread. In UP this will be initialised with the boot CPU ID. + */ +u8 hwthread_id_2_cpu[4] __read_mostly = { + [0 ... 3] = BAD_CPU_ID +}; + +/* The relative offset of the MMU mapped memory (from ldlk or bootloader) + * to the real physical memory. This is needed as we have to use the + * physical addresses in the MMU tables (pte entries), and not the virtual + * addresses. + * This variable is used in the __pa() and __va() macros, and should + * probably only be used via them. + */ +unsigned int meta_memoffset; +EXPORT_SYMBOL(meta_memoffset); + +static char __initdata *original_cmd_line; + +DEFINE_PER_CPU(PTBI, pTBI); + +/* + * Mapping are specified as "CPU_ID:HWTHREAD_ID", e.g. + * + * "hwthread_map=0:1,1:2,2:3,3:0" + * + * Linux CPU ID HWTHREAD_ID + * --------------------------- + * 0 1 + * 1 2 + * 2 3 + * 3 0 + */ +static int __init parse_hwthread_map(char *p) +{ + int cpu; + + while (*p) { + cpu = (*p++) - '0'; + if (cpu < 0 || cpu > 9) + goto err_cpu; + + p++; /* skip semi-colon */ + cpu_2_hwthread_id[cpu] = (*p++) - '0'; + if (cpu_2_hwthread_id[cpu] >= 4) + goto err_thread; + hwthread_id_2_cpu[cpu_2_hwthread_id[cpu]] = cpu; + + if (*p == ',') + p++; /* skip comma */ + } + + return 0; +err_cpu: + pr_err("%s: hwthread_map cpu argument out of range\n", __func__); + return -EINVAL; +err_thread: + pr_err("%s: hwthread_map thread argument out of range\n", __func__); + return -EINVAL; +} +early_param("hwthread_map", parse_hwthread_map); + +void __init dump_machine_table(void) +{ + struct machine_desc *p; + const char **compat; + + pr_info("Available machine support:\n\tNAME\t\tCOMPATIBLE LIST\n"); + for_each_machine_desc(p) { + pr_info("\t%s\t[", p->name); + for (compat = p->dt_compat; compat && *compat; ++compat) + printk(" '%s'", *compat); + printk(" ]\n"); + } + + pr_info("\nPlease check your kernel config and/or bootloader.\n"); + + hard_processor_halt(HALT_PANIC); +} + +#ifdef CONFIG_METAG_HALT_ON_PANIC +static int metag_panic_event(struct notifier_block *this, unsigned long event, + void *ptr) +{ + hard_processor_halt(HALT_PANIC); + return NOTIFY_DONE; +} + +static struct notifier_block metag_panic_block = { + metag_panic_event, + NULL, + 0 +}; +#endif + +void __init setup_arch(char **cmdline_p) +{ + unsigned long start_pfn; + unsigned long text_start = (unsigned long)(&_stext); + unsigned long cpu = smp_processor_id(); + unsigned long heap_start, heap_end; + unsigned long start_pte; + PTBI _pTBI; + PTBISEG p_heap; + int heap_id, i; + + metag_cache_probe(); + + metag_da_probe(); +#ifdef CONFIG_DA_CONSOLE + if (metag_da_enabled()) { + /* An early channel based console driver */ + register_console(&dash_console); + add_preferred_console("ttyDA", 1, NULL); + } +#endif + + /* try interpreting the argument as a device tree */ + machine_desc = setup_machine_fdt(original_cmd_line); + /* if it doesn't look like a device tree it must be a command line */ + if (!machine_desc) { +#ifdef CONFIG_METAG_BUILTIN_DTB + /* try the embedded device tree */ + machine_desc = setup_machine_fdt(__dtb_start); + if (!machine_desc) + panic("Invalid embedded device tree."); +#else + /* use the default machine description */ + machine_desc = default_machine_desc(); +#endif +#ifndef CONFIG_CMDLINE_FORCE + /* append the bootloader cmdline to any builtin fdt cmdline */ + if (boot_command_line[0] && original_cmd_line[0]) + strlcat(boot_command_line, " ", COMMAND_LINE_SIZE); + strlcat(boot_command_line, original_cmd_line, + COMMAND_LINE_SIZE); +#endif + } + setup_meta_clocks(machine_desc->clocks); + + *cmdline_p = boot_command_line; + parse_early_param(); + + /* + * Make sure we don't alias in dcache or icache + */ + check_for_cache_aliasing(cpu); + + +#ifdef CONFIG_METAG_HALT_ON_PANIC + atomic_notifier_chain_register(&panic_notifier_list, + &metag_panic_block); +#endif + +#ifdef CONFIG_DUMMY_CONSOLE + conswitchp = &dummy_con; +#endif + + if (!(__core_reg_get(TXSTATUS) & TXSTATUS_PSTAT_BIT)) + panic("Privilege must be enabled for this thread."); + + _pTBI = __TBI(TBID_ISTAT_BIT); + + per_cpu(pTBI, cpu) = _pTBI; + + if (!per_cpu(pTBI, cpu)) + panic("No TBI found!"); + + /* + * Initialize all interrupt vectors to our copy of __TBIUnExpXXX, + * rather than the version from the bootloader. This makes call + * stacks easier to understand and may allow us to unmap the + * bootloader at some point. + * + * We need to keep the LWK handler that TBI installed in order to + * be able to do inter-thread comms. + */ + for (i = 0; i <= TBID_SIGNUM_MAX; i++) + if (i != TBID_SIGNUM_LWK) + _pTBI->fnSigs[i] = __TBIUnExpXXX; + + /* A Meta requirement is that the kernel is loaded (virtually) + * at the PAGE_OFFSET. + */ + if (PAGE_OFFSET != text_start) + panic("Kernel not loaded at PAGE_OFFSET (%#x) but at %#lx.", + PAGE_OFFSET, text_start); + + start_pte = mmu_read_second_level_page(text_start); + + /* + * Kernel pages should have the PRIV bit set by the bootloader. + */ + if (!(start_pte & _PAGE_KERNEL)) + panic("kernel pte does not have PRIV set"); + + /* + * See __pa and __va in include/asm/page.h. + * This value is negative when running in local space but the + * calculations work anyway. + */ + meta_memoffset = text_start - (start_pte & PAGE_MASK); + + /* Now lets look at the heap space */ + heap_id = (__TBIThreadId() & TBID_THREAD_BITS) + + TBID_SEG(0, TBID_SEGSCOPE_LOCAL, TBID_SEGTYPE_HEAP); + + p_heap = __TBIFindSeg(NULL, heap_id); + + if (!p_heap) + panic("Could not find heap from TBI!"); + + /* The heap begins at the first full page after the kernel data. */ + heap_start = (unsigned long) &_heap_start; + + /* The heap ends at the end of the heap segment specified with + * ldlk. + */ + if (is_global_space(text_start)) { + pr_debug("WARNING: running in global space!\n"); + heap_end = (unsigned long)p_heap->pGAddr + p_heap->Bytes; + } else { + heap_end = (unsigned long)p_heap->pLAddr + p_heap->Bytes; + } + + ROOT_DEV = Root_RAM0; + + /* init_mm is the mm struct used for the first task. It is then + * cloned for all other tasks spawned from that task. + * + * Note - we are using the virtual addresses here. + */ + init_mm.start_code = (unsigned long)(&_stext); + init_mm.end_code = (unsigned long)(&_etext); + init_mm.end_data = (unsigned long)(&_edata); + init_mm.brk = (unsigned long)heap_start; + + min_low_pfn = PFN_UP(__pa(text_start)); + max_low_pfn = PFN_DOWN(__pa(heap_end)); + + pfn_base = min_low_pfn; + + /* Round max_pfn up to a 4Mb boundary. The free_bootmem_node() + * call later makes sure to keep the rounded up pages marked reserved. + */ + max_pfn = max_low_pfn + ((1 << MAX_ORDER) - 1); + max_pfn &= ~((1 << MAX_ORDER) - 1); + + start_pfn = PFN_UP(__pa(heap_start)); + + if (min_low_pfn & ((1 << MAX_ORDER) - 1)) { + /* Theoretically, we could expand the space that the + * bootmem allocator covers - much as we do for the + * 'high' address, and then tell the bootmem system + * that the lowest chunk is 'not available'. Right + * now it is just much easier to constrain the + * user to always MAX_ORDER align their kernel space. + */ + + panic("Kernel must be %d byte aligned, currently at %#lx.", + 1 << (MAX_ORDER + PAGE_SHIFT), + min_low_pfn << PAGE_SHIFT); + } + +#ifdef CONFIG_HIGHMEM + highstart_pfn = highend_pfn = max_pfn; + high_memory = (void *) __va(PFN_PHYS(highstart_pfn)); +#else + high_memory = (void *)__va(PFN_PHYS(max_pfn)); +#endif + + paging_init(heap_end); + + setup_priv(); + + /* Setup the boot cpu's mapping. The rest will be setup below. */ + cpu_2_hwthread_id[smp_processor_id()] = hard_processor_id(); + hwthread_id_2_cpu[hard_processor_id()] = smp_processor_id(); + + /* Copy device tree blob into non-init memory before unflattening */ + copy_fdt(); + unflatten_device_tree(); + +#ifdef CONFIG_SMP + smp_init_cpus(); +#endif + + if (machine_desc->init_early) + machine_desc->init_early(); +} + +static int __init customize_machine(void) +{ + /* customizes platform devices, or adds new ones */ + if (machine_desc->init_machine) + machine_desc->init_machine(); + return 0; +} +arch_initcall(customize_machine); + +static int __init init_machine_late(void) +{ + if (machine_desc->init_late) + machine_desc->init_late(); + return 0; +} +late_initcall(init_machine_late); + +#ifdef CONFIG_PROC_FS +/* + * Get CPU information for use by the procfs. + */ +static const char *get_cpu_capabilities(unsigned int txenable) +{ +#ifdef CONFIG_METAG_META21 + /* See CORE_ID in META HTP.GP TRM - Architecture Overview 2.1.238 */ + int coreid = metag_in32(METAC_CORE_ID); + unsigned int dsp_type = (coreid >> 3) & 7; + unsigned int fpu_type = (coreid >> 7) & 3; + + switch (dsp_type | fpu_type << 3) { + case (0x00): return "EDSP"; + case (0x01): return "DSP"; + case (0x08): return "EDSP+LFPU"; + case (0x09): return "DSP+LFPU"; + case (0x10): return "EDSP+FPU"; + case (0x11): return "DSP+FPU"; + } + return "UNKNOWN"; + +#else + if (!(txenable & TXENABLE_CLASS_BITS)) + return "DSP"; + else + return ""; +#endif +} + +static int show_cpuinfo(struct seq_file *m, void *v) +{ + const char *cpu; + unsigned int txenable, thread_id, major, minor; + unsigned long clockfreq = get_coreclock(); +#ifdef CONFIG_SMP + int i; + unsigned long lpj; +#endif + + cpu = "META"; + + txenable = __core_reg_get(TXENABLE); + major = (txenable & TXENABLE_MAJOR_REV_BITS) >> TXENABLE_MAJOR_REV_S; + minor = (txenable & TXENABLE_MINOR_REV_BITS) >> TXENABLE_MINOR_REV_S; + thread_id = (txenable >> 8) & 0x3; + +#ifdef CONFIG_SMP + for_each_online_cpu(i) { + lpj = per_cpu(cpu_data, i).loops_per_jiffy; + txenable = core_reg_read(TXUCT_ID, TXENABLE_REGNUM, + cpu_2_hwthread_id[i]); + + seq_printf(m, "CPU:\t\t%s %d.%d (thread %d)\n" + "Clocking:\t%lu.%1luMHz\n" + "BogoMips:\t%lu.%02lu\n" + "Calibration:\t%lu loops\n" + "Capabilities:\t%s\n\n", + cpu, major, minor, i, + clockfreq / 1000000, (clockfreq / 100000) % 10, + lpj / (500000 / HZ), (lpj / (5000 / HZ)) % 100, + lpj, + get_cpu_capabilities(txenable)); + } +#else + seq_printf(m, "CPU:\t\t%s %d.%d (thread %d)\n" + "Clocking:\t%lu.%1luMHz\n" + "BogoMips:\t%lu.%02lu\n" + "Calibration:\t%lu loops\n" + "Capabilities:\t%s\n", + cpu, major, minor, thread_id, + clockfreq / 1000000, (clockfreq / 100000) % 10, + loops_per_jiffy / (500000 / HZ), + (loops_per_jiffy / (5000 / HZ)) % 100, + loops_per_jiffy, + get_cpu_capabilities(txenable)); +#endif /* CONFIG_SMP */ + +#ifdef CONFIG_METAG_L2C + if (meta_l2c_is_present()) { + seq_printf(m, "L2 cache:\t%s\n" + "L2 cache size:\t%d KB\n", + meta_l2c_is_enabled() ? "enabled" : "disabled", + meta_l2c_size() >> 10); + } +#endif + return 0; +} + +static void *c_start(struct seq_file *m, loff_t *pos) +{ + return (void *)(*pos == 0); +} +static void *c_next(struct seq_file *m, void *v, loff_t *pos) +{ + return NULL; +} +static void c_stop(struct seq_file *m, void *v) +{ +} +const struct seq_operations cpuinfo_op = { + .start = c_start, + .next = c_next, + .stop = c_stop, + .show = show_cpuinfo, +}; +#endif /* CONFIG_PROC_FS */ + +void __init metag_start_kernel(char *args) +{ + /* Zero the timer register so timestamps are from the point at + * which the kernel started running. + */ + __core_reg_set(TXTIMER, 0); + + /* Clear the bss. */ + memset(__bss_start, 0, + (unsigned long)__bss_stop - (unsigned long)__bss_start); + + /* Remember where these are for use in setup_arch */ + original_cmd_line = args; + + current_thread_info()->cpu = hard_processor_id(); + + start_kernel(); +} + +/** + * setup_priv() - Set up privilege protection registers. + * + * Set up privilege protection registers such as TXPRIVEXT to prevent userland + * from touching our precious registers and sensitive memory areas. + */ +void setup_priv(void) +{ + unsigned int offset = hard_processor_id() << TXPRIVREG_STRIDE_S; + + __core_reg_set(TXPRIVEXT, PRIV_BITS); + + metag_out32(PRIVSYSR_BITS, T0PRIVSYSR + offset); + metag_out32(PIOREG_BITS, T0PIOREG + offset); + metag_out32(PSYREG_BITS, T0PSYREG + offset); +} + +PTBI pTBI_get(unsigned int cpu) +{ + return per_cpu(pTBI, cpu); +} +EXPORT_SYMBOL(pTBI_get); + +#if defined(CONFIG_METAG_DSP) && defined(CONFIG_METAG_FPU) +char capabilites[] = "dsp fpu"; +#elif defined(CONFIG_METAG_DSP) +char capabilites[] = "dsp"; +#elif defined(CONFIG_METAG_FPU) +char capabilites[] = "fpu"; +#else +char capabilites[] = ""; +#endif + +static struct ctl_table caps_kern_table[] = { + { + .procname = "capabilities", + .data = capabilites, + .maxlen = sizeof(capabilites), + .mode = 0444, + .proc_handler = proc_dostring, + }, + {} +}; + +static struct ctl_table caps_root_table[] = { + { + .procname = "kernel", + .mode = 0555, + .child = caps_kern_table, + }, + {} +}; + +static int __init capabilities_register_sysctl(void) +{ + struct ctl_table_header *caps_table_header; + + caps_table_header = register_sysctl_table(caps_root_table); + if (!caps_table_header) { + pr_err("Unable to register CAPABILITIES sysctl\n"); + return -ENOMEM; + } + + return 0; +} + +core_initcall(capabilities_register_sysctl); diff --git a/arch/metag/kernel/signal.c b/arch/metag/kernel/signal.c new file mode 100644 index 000000000000..3be61cf0b147 --- /dev/null +++ b/arch/metag/kernel/signal.c @@ -0,0 +1,344 @@ +/* + * Copyright (C) 1991,1992 Linus Torvalds + * Copyright (C) 2005-2012 Imagination Technologies Ltd. + * + * 1997-11-28 Modified for POSIX.1b signals by Richard Henderson + * + */ + +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/smp.h> +#include <linux/kernel.h> +#include <linux/signal.h> +#include <linux/errno.h> +#include <linux/wait.h> +#include <linux/ptrace.h> +#include <linux/unistd.h> +#include <linux/stddef.h> +#include <linux/personality.h> +#include <linux/uaccess.h> +#include <linux/tracehook.h> + +#include <asm/ucontext.h> +#include <asm/cacheflush.h> +#include <asm/switch.h> +#include <asm/syscall.h> +#include <asm/syscalls.h> + +#define REG_FLAGS ctx.SaveMask +#define REG_RETVAL ctx.DX[0].U0 +#define REG_SYSCALL ctx.DX[0].U1 +#define REG_SP ctx.AX[0].U0 +#define REG_ARG1 ctx.DX[3].U1 +#define REG_ARG2 ctx.DX[3].U0 +#define REG_ARG3 ctx.DX[2].U1 +#define REG_PC ctx.CurrPC +#define REG_RTP ctx.DX[4].U1 + +struct rt_sigframe { + struct siginfo info; + struct ucontext uc; + unsigned long retcode[2]; +}; + +static int restore_sigcontext(struct pt_regs *regs, + struct sigcontext __user *sc) +{ + int err; + + /* Always make any pending restarted system calls return -EINTR */ + current_thread_info()->restart_block.fn = do_no_restart_syscall; + + err = metag_gp_regs_copyin(regs, 0, sizeof(struct user_gp_regs), NULL, + &sc->regs); + if (!err) + err = metag_cb_regs_copyin(regs, 0, + sizeof(struct user_cb_regs), NULL, + &sc->cb); + if (!err) + err = metag_rp_state_copyin(regs, 0, + sizeof(struct user_rp_state), NULL, + &sc->rp); + + /* This is a user-mode context. */ + regs->REG_FLAGS |= TBICTX_PRIV_BIT; + + return err; +} + +long sys_rt_sigreturn(void) +{ + /* NOTE - Meta stack goes UPWARDS - so we wind the stack back */ + struct pt_regs *regs = current_pt_regs(); + struct rt_sigframe __user *frame; + sigset_t set; + + frame = (__force struct rt_sigframe __user *)(regs->REG_SP - + sizeof(*frame)); + + if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) + goto badframe; + + if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) + goto badframe; + + set_current_blocked(&set); + + if (restore_sigcontext(regs, &frame->uc.uc_mcontext)) + goto badframe; + + if (restore_altstack(&frame->uc.uc_stack)) + goto badframe; + + return regs->REG_RETVAL; + +badframe: + force_sig(SIGSEGV, current); + + return 0; +} + +static int setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, + unsigned long mask) +{ + int err; + + err = metag_gp_regs_copyout(regs, 0, sizeof(struct user_gp_regs), NULL, + &sc->regs); + + if (!err) + err = metag_cb_regs_copyout(regs, 0, + sizeof(struct user_cb_regs), NULL, + &sc->cb); + if (!err) + err = metag_rp_state_copyout(regs, 0, + sizeof(struct user_rp_state), NULL, + &sc->rp); + + /* OK, clear that cbuf flag in the old context, or our stored + * catch buffer will be restored when we go to call the signal + * handler. Also clear out the CBRP RA/RD pipe bit incase + * that is pending as well! + * Note that as we have already stored this context, these + * flags will get restored on sigreturn to their original + * state. + */ + regs->REG_FLAGS &= ~(TBICTX_XCBF_BIT | TBICTX_CBUF_BIT | + TBICTX_CBRP_BIT); + + /* Clear out the LSM_STEP bits in case we are in the middle of + * and MSET/MGET. + */ + regs->ctx.Flags &= ~TXSTATUS_LSM_STEP_BITS; + + err |= __put_user(mask, &sc->oldmask); + + return err; +} + +/* + * Determine which stack to use.. + */ +static void __user *get_sigframe(struct k_sigaction *ka, unsigned long sp, + size_t frame_size) +{ + /* Meta stacks grows upwards */ + if ((ka->sa.sa_flags & SA_ONSTACK) && (sas_ss_flags(sp) == 0)) + sp = current->sas_ss_sp; + + sp = (sp + 7) & ~7; /* 8byte align stack */ + + return (void __user *)sp; +} + +static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, + sigset_t *set, struct pt_regs *regs) +{ + struct rt_sigframe __user *frame; + int err = -EFAULT; + unsigned long code; + + frame = get_sigframe(ka, regs->REG_SP, sizeof(*frame)); + if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) + goto out; + + err = copy_siginfo_to_user(&frame->info, info); + + /* Create the ucontext. */ + err |= __put_user(0, &frame->uc.uc_flags); + err |= __put_user(0, (unsigned long __user *)&frame->uc.uc_link); + err |= __save_altstack(&frame->uc.uc_stack, regs->REG_SP); + err |= setup_sigcontext(&frame->uc.uc_mcontext, + regs, set->sig[0]); + err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); + + if (err) + goto out; + + /* Set up to return from userspace. */ + + /* MOV D1Re0 (D1.0), #__NR_rt_sigreturn */ + code = 0x03000004 | (__NR_rt_sigreturn << 3); + err |= __put_user(code, (unsigned long __user *)(&frame->retcode[0])); + + /* SWITCH #__METAG_SW_SYS */ + code = __METAG_SW_ENCODING(SYS); + err |= __put_user(code, (unsigned long __user *)(&frame->retcode[1])); + + if (err) + goto out; + + /* Set up registers for signal handler */ + regs->REG_RTP = (unsigned long) frame->retcode; + regs->REG_SP = (unsigned long) frame + sizeof(*frame); + regs->REG_ARG1 = sig; + regs->REG_ARG2 = (unsigned long) &frame->info; + regs->REG_ARG3 = (unsigned long) &frame->uc; + regs->REG_PC = (unsigned long) ka->sa.sa_handler; + + pr_debug("SIG deliver (%s:%d): sp=%p pc=%08x pr=%08x\n", + current->comm, current->pid, frame, regs->REG_PC, + regs->REG_RTP); + + /* Now pass size of 'new code' into sigtramp so we can do a more + * effective cache flush - directed rather than 'full flush'. + */ + flush_cache_sigtramp(regs->REG_RTP, sizeof(frame->retcode)); +out: + if (err) { + force_sigsegv(sig, current); + return -EFAULT; + } + return 0; +} + +static void handle_signal(unsigned long sig, siginfo_t *info, + struct k_sigaction *ka, struct pt_regs *regs) +{ + sigset_t *oldset = sigmask_to_save(); + + /* Set up the stack frame */ + if (setup_rt_frame(sig, ka, info, oldset, regs)) + return; + + signal_delivered(sig, info, ka, regs, test_thread_flag(TIF_SINGLESTEP)); +} + + /* + * Notes for Meta. + * We have moved from the old 2.4.9 SH way of using syscall_nr (in the stored + * context) to passing in the syscall flag on the stack. + * This is because having syscall_nr in our context does not fit with TBX, and + * corrupted the stack. + */ +static int do_signal(struct pt_regs *regs, int syscall) +{ + unsigned int retval = 0, continue_addr = 0, restart_addr = 0; + struct k_sigaction ka; + siginfo_t info; + int signr; + int restart = 0; + + /* + * By the end of rt_sigreturn the context describes the point that the + * signal was taken (which may happen to be just before a syscall if + * it's already been restarted). This should *never* be mistaken for a + * system call in need of restarting. + */ + if (syscall == __NR_rt_sigreturn) + syscall = -1; + + /* Did we come from a system call? */ + if (syscall >= 0) { + continue_addr = regs->REG_PC; + restart_addr = continue_addr - 4; + retval = regs->REG_RETVAL; + + /* + * Prepare for system call restart. We do this here so that a + * debugger will see the already changed PC. + */ + switch (retval) { + case -ERESTART_RESTARTBLOCK: + restart = -2; + case -ERESTARTNOHAND: + case -ERESTARTSYS: + case -ERESTARTNOINTR: + ++restart; + regs->REG_PC = restart_addr; + break; + } + } + + /* + * Get the signal to deliver. When running under ptrace, at this point + * the debugger may change all our registers ... + */ + signr = get_signal_to_deliver(&info, &ka, regs, NULL); + /* + * Depending on the signal settings we may need to revert the decision + * to restart the system call. But skip this if a debugger has chosen to + * restart at a different PC. + */ + if (regs->REG_PC != restart_addr) + restart = 0; + if (signr > 0) { + if (unlikely(restart)) { + if (retval == -ERESTARTNOHAND + || retval == -ERESTART_RESTARTBLOCK + || (retval == -ERESTARTSYS + && !(ka.sa.sa_flags & SA_RESTART))) { + regs->REG_RETVAL = -EINTR; + regs->REG_PC = continue_addr; + } + } + + /* Whee! Actually deliver the signal. */ + handle_signal(signr, &info, &ka, regs); + return 0; + } + + /* Handlerless -ERESTART_RESTARTBLOCK re-enters via restart_syscall */ + if (unlikely(restart < 0)) + regs->REG_SYSCALL = __NR_restart_syscall; + + /* + * If there's no signal to deliver, we just put the saved sigmask back. + */ + restore_saved_sigmask(); + + return restart; +} + +int do_work_pending(struct pt_regs *regs, unsigned int thread_flags, + int syscall) +{ + do { + if (likely(thread_flags & _TIF_NEED_RESCHED)) { + schedule(); + } else { + if (unlikely(!user_mode(regs))) + return 0; + local_irq_enable(); + if (thread_flags & _TIF_SIGPENDING) { + int restart = do_signal(regs, syscall); + if (unlikely(restart)) { + /* + * Restart without handlers. + * Deal with it without leaving + * the kernel space. + */ + return restart; + } + syscall = -1; + } else { + clear_thread_flag(TIF_NOTIFY_RESUME); + tracehook_notify_resume(regs); + } + } + local_irq_disable(); + thread_flags = current_thread_info()->flags; + } while (thread_flags & _TIF_WORK_MASK); + return 0; +} diff --git a/arch/metag/kernel/smp.c b/arch/metag/kernel/smp.c new file mode 100644 index 000000000000..4b6d1f14df32 --- /dev/null +++ b/arch/metag/kernel/smp.c @@ -0,0 +1,575 @@ +/* + * Copyright (C) 2009,2010,2011 Imagination Technologies Ltd. + * + * Copyright (C) 2002 ARM Limited, All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/atomic.h> +#include <linux/delay.h> +#include <linux/init.h> +#include <linux/spinlock.h> +#include <linux/sched.h> +#include <linux/interrupt.h> +#include <linux/cache.h> +#include <linux/profile.h> +#include <linux/errno.h> +#include <linux/mm.h> +#include <linux/err.h> +#include <linux/cpu.h> +#include <linux/smp.h> +#include <linux/seq_file.h> +#include <linux/irq.h> +#include <linux/bootmem.h> + +#include <asm/cacheflush.h> +#include <asm/cachepart.h> +#include <asm/core_reg.h> +#include <asm/cpu.h> +#include <asm/mmu_context.h> +#include <asm/pgtable.h> +#include <asm/pgalloc.h> +#include <asm/processor.h> +#include <asm/setup.h> +#include <asm/tlbflush.h> +#include <asm/hwthread.h> +#include <asm/traps.h> + +DECLARE_PER_CPU(PTBI, pTBI); + +void *secondary_data_stack; + +/* + * structures for inter-processor calls + * - A collection of single bit ipi messages. + */ +struct ipi_data { + spinlock_t lock; + unsigned long ipi_count; + unsigned long bits; +}; + +static DEFINE_PER_CPU(struct ipi_data, ipi_data) = { + .lock = __SPIN_LOCK_UNLOCKED(ipi_data.lock), +}; + +static DEFINE_SPINLOCK(boot_lock); + +/* + * "thread" is assumed to be a valid Meta hardware thread ID. + */ +int __cpuinit boot_secondary(unsigned int thread, struct task_struct *idle) +{ + u32 val; + + /* + * set synchronisation state between this boot processor + * and the secondary one + */ + spin_lock(&boot_lock); + + core_reg_write(TXUPC_ID, 0, thread, (unsigned int)secondary_startup); + core_reg_write(TXUPC_ID, 1, thread, 0); + + /* + * Give the thread privilege (PSTAT) and clear potentially problematic + * bits in the process (namely ISTAT, CBMarker, CBMarkerI, LSM_STEP). + */ + core_reg_write(TXUCT_ID, TXSTATUS_REGNUM, thread, TXSTATUS_PSTAT_BIT); + + /* Clear the minim enable bit. */ + val = core_reg_read(TXUCT_ID, TXPRIVEXT_REGNUM, thread); + core_reg_write(TXUCT_ID, TXPRIVEXT_REGNUM, thread, val & ~0x80); + + /* + * set the ThreadEnable bit (0x1) in the TXENABLE register + * for the specified thread - off it goes! + */ + val = core_reg_read(TXUCT_ID, TXENABLE_REGNUM, thread); + core_reg_write(TXUCT_ID, TXENABLE_REGNUM, thread, val | 0x1); + + /* + * now the secondary core is starting up let it run its + * calibrations, then wait for it to finish + */ + spin_unlock(&boot_lock); + + return 0; +} + +int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *idle) +{ + unsigned int thread = cpu_2_hwthread_id[cpu]; + int ret; + + load_pgd(swapper_pg_dir, thread); + + flush_tlb_all(); + + /* + * Tell the secondary CPU where to find its idle thread's stack. + */ + secondary_data_stack = task_stack_page(idle); + + wmb(); + + /* + * Now bring the CPU into our world. + */ + ret = boot_secondary(thread, idle); + if (ret == 0) { + unsigned long timeout; + + /* + * CPU was successfully started, wait for it + * to come online or time out. + */ + timeout = jiffies + HZ; + while (time_before(jiffies, timeout)) { + if (cpu_online(cpu)) + break; + + udelay(10); + barrier(); + } + + if (!cpu_online(cpu)) + ret = -EIO; + } + + secondary_data_stack = NULL; + + if (ret) { + pr_crit("CPU%u: processor failed to boot\n", cpu); + + /* + * FIXME: We need to clean up the new idle thread. --rmk + */ + } + + return ret; +} + +#ifdef CONFIG_HOTPLUG_CPU +static DECLARE_COMPLETION(cpu_killed); + +/* + * __cpu_disable runs on the processor to be shutdown. + */ +int __cpuexit __cpu_disable(void) +{ + unsigned int cpu = smp_processor_id(); + struct task_struct *p; + + /* + * Take this CPU offline. Once we clear this, we can't return, + * and we must not schedule until we're ready to give up the cpu. + */ + set_cpu_online(cpu, false); + + /* + * OK - migrate IRQs away from this CPU + */ + migrate_irqs(); + + /* + * Flush user cache and TLB mappings, and then remove this CPU + * from the vm mask set of all processes. + */ + flush_cache_all(); + local_flush_tlb_all(); + + read_lock(&tasklist_lock); + for_each_process(p) { + if (p->mm) + cpumask_clear_cpu(cpu, mm_cpumask(p->mm)); + } + read_unlock(&tasklist_lock); + + return 0; +} + +/* + * called on the thread which is asking for a CPU to be shutdown - + * waits until shutdown has completed, or it is timed out. + */ +void __cpuexit __cpu_die(unsigned int cpu) +{ + if (!wait_for_completion_timeout(&cpu_killed, msecs_to_jiffies(1))) + pr_err("CPU%u: unable to kill\n", cpu); +} + +/* + * Called from the idle thread for the CPU which has been shutdown. + * + * Note that we do not return from this function. If this cpu is + * brought online again it will need to run secondary_startup(). + */ +void __cpuexit cpu_die(void) +{ + local_irq_disable(); + idle_task_exit(); + + complete(&cpu_killed); + + asm ("XOR TXENABLE, D0Re0,D0Re0\n"); +} +#endif /* CONFIG_HOTPLUG_CPU */ + +/* + * Called by both boot and secondaries to move global data into + * per-processor storage. + */ +void __cpuinit smp_store_cpu_info(unsigned int cpuid) +{ + struct cpuinfo_metag *cpu_info = &per_cpu(cpu_data, cpuid); + + cpu_info->loops_per_jiffy = loops_per_jiffy; +} + +/* + * This is the secondary CPU boot entry. We're using this CPUs + * idle thread stack and the global page tables. + */ +asmlinkage void secondary_start_kernel(void) +{ + struct mm_struct *mm = &init_mm; + unsigned int cpu = smp_processor_id(); + + /* + * All kernel threads share the same mm context; grab a + * reference and switch to it. + */ + atomic_inc(&mm->mm_users); + atomic_inc(&mm->mm_count); + current->active_mm = mm; + cpumask_set_cpu(cpu, mm_cpumask(mm)); + enter_lazy_tlb(mm, current); + local_flush_tlb_all(); + + /* + * TODO: Some day it might be useful for each Linux CPU to + * have its own TBI structure. That would allow each Linux CPU + * to run different interrupt handlers for the same IRQ + * number. + * + * For now, simply copying the pointer to the boot CPU's TBI + * structure is sufficient because we always want to run the + * same interrupt handler whatever CPU takes the interrupt. + */ + per_cpu(pTBI, cpu) = __TBI(TBID_ISTAT_BIT); + + if (!per_cpu(pTBI, cpu)) + panic("No TBI found!"); + + per_cpu_trap_init(cpu); + + preempt_disable(); + + setup_priv(); + + /* + * Enable local interrupts. + */ + tbi_startup_interrupt(TBID_SIGNUM_TRT); + notify_cpu_starting(cpu); + local_irq_enable(); + + pr_info("CPU%u (thread %u): Booted secondary processor\n", + cpu, cpu_2_hwthread_id[cpu]); + + calibrate_delay(); + smp_store_cpu_info(cpu); + + /* + * OK, now it's safe to let the boot CPU continue + */ + set_cpu_online(cpu, true); + + /* + * Check for cache aliasing. + * Preemption is disabled + */ + check_for_cache_aliasing(cpu); + + /* + * OK, it's off to the idle thread for us + */ + cpu_idle(); +} + +void __init smp_cpus_done(unsigned int max_cpus) +{ + int cpu; + unsigned long bogosum = 0; + + for_each_online_cpu(cpu) + bogosum += per_cpu(cpu_data, cpu).loops_per_jiffy; + + pr_info("SMP: Total of %d processors activated (%lu.%02lu BogoMIPS).\n", + num_online_cpus(), + bogosum / (500000/HZ), + (bogosum / (5000/HZ)) % 100); +} + +void __init smp_prepare_cpus(unsigned int max_cpus) +{ + unsigned int cpu = smp_processor_id(); + + init_new_context(current, &init_mm); + current_thread_info()->cpu = cpu; + + smp_store_cpu_info(cpu); + init_cpu_present(cpu_possible_mask); +} + +void __init smp_prepare_boot_cpu(void) +{ + unsigned int cpu = smp_processor_id(); + + per_cpu(pTBI, cpu) = __TBI(TBID_ISTAT_BIT); + + if (!per_cpu(pTBI, cpu)) + panic("No TBI found!"); +} + +static void smp_cross_call(cpumask_t callmap, enum ipi_msg_type msg); + +static void send_ipi_message(const struct cpumask *mask, enum ipi_msg_type msg) +{ + unsigned long flags; + unsigned int cpu; + cpumask_t map; + + cpumask_clear(&map); + local_irq_save(flags); + + for_each_cpu(cpu, mask) { + struct ipi_data *ipi = &per_cpu(ipi_data, cpu); + + spin_lock(&ipi->lock); + + /* + * KICK interrupts are queued in hardware so we'll get + * multiple interrupts if we call smp_cross_call() + * multiple times for one msg. The problem is that we + * only have one bit for each message - we can't queue + * them in software. + * + * The first time through ipi_handler() we'll clear + * the msg bit, having done all the work. But when we + * return we'll get _another_ interrupt (and another, + * and another until we've handled all the queued + * KICKs). Running ipi_handler() when there's no work + * to do is bad because that's how kick handler + * chaining detects who the KICK was intended for. + * See arch/metag/kernel/kick.c for more details. + * + * So only add 'cpu' to 'map' if we haven't already + * queued a KICK interrupt for 'msg'. + */ + if (!(ipi->bits & (1 << msg))) { + ipi->bits |= 1 << msg; + cpumask_set_cpu(cpu, &map); + } + + spin_unlock(&ipi->lock); + } + + /* + * Call the platform specific cross-CPU call function. + */ + smp_cross_call(map, msg); + + local_irq_restore(flags); +} + +void arch_send_call_function_ipi_mask(const struct cpumask *mask) +{ + send_ipi_message(mask, IPI_CALL_FUNC); +} + +void arch_send_call_function_single_ipi(int cpu) +{ + send_ipi_message(cpumask_of(cpu), IPI_CALL_FUNC_SINGLE); +} + +void show_ipi_list(struct seq_file *p) +{ + unsigned int cpu; + + seq_puts(p, "IPI:"); + + for_each_present_cpu(cpu) + seq_printf(p, " %10lu", per_cpu(ipi_data, cpu).ipi_count); + + seq_putc(p, '\n'); +} + +static DEFINE_SPINLOCK(stop_lock); + +/* + * Main handler for inter-processor interrupts + * + * For Meta, the ipimask now only identifies a single + * category of IPI (Bit 1 IPIs have been replaced by a + * different mechanism): + * + * Bit 0 - Inter-processor function call + */ +static int do_IPI(struct pt_regs *regs) +{ + unsigned int cpu = smp_processor_id(); + struct ipi_data *ipi = &per_cpu(ipi_data, cpu); + struct pt_regs *old_regs = set_irq_regs(regs); + unsigned long msgs, nextmsg; + int handled = 0; + + ipi->ipi_count++; + + spin_lock(&ipi->lock); + msgs = ipi->bits; + nextmsg = msgs & -msgs; + ipi->bits &= ~nextmsg; + spin_unlock(&ipi->lock); + + if (nextmsg) { + handled = 1; + + nextmsg = ffz(~nextmsg); + switch (nextmsg) { + case IPI_RESCHEDULE: + scheduler_ipi(); + break; + + case IPI_CALL_FUNC: + generic_smp_call_function_interrupt(); + break; + + case IPI_CALL_FUNC_SINGLE: + generic_smp_call_function_single_interrupt(); + break; + + default: + pr_crit("CPU%u: Unknown IPI message 0x%lx\n", + cpu, nextmsg); + break; + } + } + + set_irq_regs(old_regs); + + return handled; +} + +void smp_send_reschedule(int cpu) +{ + send_ipi_message(cpumask_of(cpu), IPI_RESCHEDULE); +} + +static void stop_this_cpu(void *data) +{ + unsigned int cpu = smp_processor_id(); + + if (system_state == SYSTEM_BOOTING || + system_state == SYSTEM_RUNNING) { + spin_lock(&stop_lock); + pr_crit("CPU%u: stopping\n", cpu); + dump_stack(); + spin_unlock(&stop_lock); + } + + set_cpu_online(cpu, false); + + local_irq_disable(); + + hard_processor_halt(HALT_OK); +} + +void smp_send_stop(void) +{ + smp_call_function(stop_this_cpu, NULL, 0); +} + +/* + * not supported here + */ +int setup_profiling_timer(unsigned int multiplier) +{ + return -EINVAL; +} + +/* + * We use KICKs for inter-processor interrupts. + * + * For every CPU in "callmap" the IPI data must already have been + * stored in that CPU's "ipi_data" member prior to calling this + * function. + */ +static void kick_raise_softirq(cpumask_t callmap, unsigned int irq) +{ + int cpu; + + for_each_cpu(cpu, &callmap) { + unsigned int thread; + + thread = cpu_2_hwthread_id[cpu]; + + BUG_ON(thread == BAD_HWTHREAD_ID); + + metag_out32(1, T0KICKI + (thread * TnXKICK_STRIDE)); + } +} + +static TBIRES ipi_handler(TBIRES State, int SigNum, int Triggers, + int Inst, PTBI pTBI, int *handled) +{ + *handled = do_IPI((struct pt_regs *)State.Sig.pCtx); + + return State; +} + +static struct kick_irq_handler ipi_irq = { + .func = ipi_handler, +}; + +static void smp_cross_call(cpumask_t callmap, enum ipi_msg_type msg) +{ + kick_raise_softirq(callmap, 1); +} + +static inline unsigned int get_core_count(void) +{ + int i; + unsigned int ret = 0; + + for (i = 0; i < CONFIG_NR_CPUS; i++) { + if (core_reg_read(TXUCT_ID, TXENABLE_REGNUM, i)) + ret++; + } + + return ret; +} + +/* + * Initialise the CPU possible map early - this describes the CPUs + * which may be present or become present in the system. + */ +void __init smp_init_cpus(void) +{ + unsigned int i, ncores = get_core_count(); + + /* If no hwthread_map early param was set use default mapping */ + for (i = 0; i < NR_CPUS; i++) + if (cpu_2_hwthread_id[i] == BAD_HWTHREAD_ID) { + cpu_2_hwthread_id[i] = i; + hwthread_id_2_cpu[i] = i; + } + + for (i = 0; i < ncores; i++) + set_cpu_possible(i, true); + + kick_register_func(&ipi_irq); +} diff --git a/arch/metag/kernel/stacktrace.c b/arch/metag/kernel/stacktrace.c new file mode 100644 index 000000000000..5510361d5bea --- /dev/null +++ b/arch/metag/kernel/stacktrace.c @@ -0,0 +1,187 @@ +#include <linux/export.h> +#include <linux/sched.h> +#include <linux/stacktrace.h> + +#include <asm/stacktrace.h> + +#if defined(CONFIG_FRAME_POINTER) + +#ifdef CONFIG_KALLSYMS +#include <linux/kallsyms.h> +#include <linux/module.h> + +static unsigned long tbi_boing_addr; +static unsigned long tbi_boing_size; + +static void tbi_boing_init(void) +{ + /* We need to know where TBIBoingVec is and it's size */ + unsigned long size; + unsigned long offset; + char modname[MODULE_NAME_LEN]; + char name[KSYM_NAME_LEN]; + tbi_boing_addr = kallsyms_lookup_name("___TBIBoingVec"); + if (!tbi_boing_addr) + tbi_boing_addr = 1; + else if (!lookup_symbol_attrs(tbi_boing_addr, &size, + &offset, modname, name)) + tbi_boing_size = size; +} +#endif + +#define ALIGN_DOWN(addr, size) ((addr)&(~((size)-1))) + +/* + * Unwind the current stack frame and store the new register values in the + * structure passed as argument. Unwinding is equivalent to a function return, + * hence the new PC value rather than LR should be used for backtrace. + */ +int notrace unwind_frame(struct stackframe *frame) +{ + struct metag_frame *fp = (struct metag_frame *)frame->fp; + unsigned long lr; + unsigned long fpnew; + + if (frame->fp & 0x7) + return -EINVAL; + + fpnew = fp->fp; + lr = fp->lr - 4; + +#ifdef CONFIG_KALLSYMS + /* If we've reached TBIBoingVec then we're at an interrupt + * entry point or a syscall entry point. The frame pointer + * points to a pt_regs which can be used to continue tracing on + * the other side of the boing. + */ + if (!tbi_boing_addr) + tbi_boing_init(); + if (tbi_boing_size && lr >= tbi_boing_addr && + lr < tbi_boing_addr + tbi_boing_size) { + struct pt_regs *regs = (struct pt_regs *)fpnew; + if (user_mode(regs)) + return -EINVAL; + fpnew = regs->ctx.AX[1].U0; + lr = regs->ctx.DX[4].U1; + } +#endif + + /* stack grows up, so frame pointers must decrease */ + if (fpnew < (ALIGN_DOWN((unsigned long)fp, THREAD_SIZE) + + sizeof(struct thread_info)) || fpnew >= (unsigned long)fp) + return -EINVAL; + + /* restore the registers from the stack frame */ + frame->fp = fpnew; + frame->pc = lr; + + return 0; +} +#else +int notrace unwind_frame(struct stackframe *frame) +{ + struct metag_frame *sp = (struct metag_frame *)frame->sp; + + if (frame->sp & 0x7) + return -EINVAL; + + while (!kstack_end(sp)) { + unsigned long addr = sp->lr - 4; + sp--; + + if (__kernel_text_address(addr)) { + frame->sp = (unsigned long)sp; + frame->pc = addr; + return 0; + } + } + return -EINVAL; +} +#endif + +void notrace walk_stackframe(struct stackframe *frame, + int (*fn)(struct stackframe *, void *), void *data) +{ + while (1) { + int ret; + + if (fn(frame, data)) + break; + ret = unwind_frame(frame); + if (ret < 0) + break; + } +} +EXPORT_SYMBOL(walk_stackframe); + +#ifdef CONFIG_STACKTRACE +struct stack_trace_data { + struct stack_trace *trace; + unsigned int no_sched_functions; + unsigned int skip; +}; + +static int save_trace(struct stackframe *frame, void *d) +{ + struct stack_trace_data *data = d; + struct stack_trace *trace = data->trace; + unsigned long addr = frame->pc; + + if (data->no_sched_functions && in_sched_functions(addr)) + return 0; + if (data->skip) { + data->skip--; + return 0; + } + + trace->entries[trace->nr_entries++] = addr; + + return trace->nr_entries >= trace->max_entries; +} + +void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) +{ + struct stack_trace_data data; + struct stackframe frame; + + data.trace = trace; + data.skip = trace->skip; + + if (tsk != current) { +#ifdef CONFIG_SMP + /* + * What guarantees do we have here that 'tsk' is not + * running on another CPU? For now, ignore it as we + * can't guarantee we won't explode. + */ + if (trace->nr_entries < trace->max_entries) + trace->entries[trace->nr_entries++] = ULONG_MAX; + return; +#else + data.no_sched_functions = 1; + frame.fp = thread_saved_fp(tsk); + frame.sp = thread_saved_sp(tsk); + frame.lr = 0; /* recovered from the stack */ + frame.pc = thread_saved_pc(tsk); +#endif + } else { + register unsigned long current_sp asm ("A0StP"); + + data.no_sched_functions = 0; + frame.fp = (unsigned long)__builtin_frame_address(0); + frame.sp = current_sp; + frame.lr = (unsigned long)__builtin_return_address(0); + frame.pc = (unsigned long)save_stack_trace_tsk; + } + + walk_stackframe(&frame, save_trace, &data); + if (trace->nr_entries < trace->max_entries) + trace->entries[trace->nr_entries++] = ULONG_MAX; +} + +void save_stack_trace(struct stack_trace *trace) +{ + save_stack_trace_tsk(current, trace); +} +EXPORT_SYMBOL_GPL(save_stack_trace); +#endif diff --git a/arch/metag/kernel/sys_metag.c b/arch/metag/kernel/sys_metag.c new file mode 100644 index 000000000000..efe833a452f7 --- /dev/null +++ b/arch/metag/kernel/sys_metag.c @@ -0,0 +1,180 @@ +/* + * This file contains various random system calls that + * have a non-standard calling sequence on the Linux/Meta + * platform. + */ + +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/syscalls.h> +#include <linux/mman.h> +#include <linux/file.h> +#include <linux/fs.h> +#include <linux/uaccess.h> +#include <linux/unistd.h> +#include <asm/cacheflush.h> +#include <asm/core_reg.h> +#include <asm/global_lock.h> +#include <asm/switch.h> +#include <asm/syscall.h> +#include <asm/syscalls.h> +#include <asm/user_gateway.h> + +#define merge_64(hi, lo) ((((unsigned long long)(hi)) << 32) + \ + ((lo) & 0xffffffffUL)) + +int metag_mmap_check(unsigned long addr, unsigned long len, + unsigned long flags) +{ + /* We can't have people trying to write to the bottom of the + * memory map, there are mysterious unspecified things there that + * we don't want people trampling on. + */ + if ((flags & MAP_FIXED) && (addr < TASK_UNMAPPED_BASE)) + return -EINVAL; + + return 0; +} + +asmlinkage long sys_mmap2(unsigned long addr, unsigned long len, + unsigned long prot, unsigned long flags, + unsigned long fd, unsigned long pgoff) +{ + /* The shift for mmap2 is constant, regardless of PAGE_SIZE setting. */ + if (pgoff & ((1 << (PAGE_SHIFT - 12)) - 1)) + return -EINVAL; + + pgoff >>= PAGE_SHIFT - 12; + + return sys_mmap_pgoff(addr, len, prot, flags, fd, pgoff); +} + +asmlinkage int sys_metag_setglobalbit(char __user *addr, int mask) +{ + char tmp; + int ret = 0; + unsigned int flags; + + if (!((__force unsigned int)addr >= LINCORE_BASE)) + return -EFAULT; + + __global_lock2(flags); + + metag_data_cache_flush((__force void *)addr, sizeof(mask)); + + ret = __get_user(tmp, addr); + if (ret) + goto out; + tmp |= mask; + ret = __put_user(tmp, addr); + + metag_data_cache_flush((__force void *)addr, sizeof(mask)); + +out: + __global_unlock2(flags); + + return ret; +} + +#define TXDEFR_FPU_MASK ((0x1f << 16) | 0x1f) + +asmlinkage void sys_metag_set_fpu_flags(unsigned int flags) +{ + unsigned int temp; + + flags &= TXDEFR_FPU_MASK; + + temp = __core_reg_get(TXDEFR); + temp &= ~TXDEFR_FPU_MASK; + temp |= flags; + __core_reg_set(TXDEFR, temp); +} + +asmlinkage int sys_metag_set_tls(void __user *ptr) +{ + current->thread.tls_ptr = ptr; + set_gateway_tls(ptr); + + return 0; +} + +asmlinkage void *sys_metag_get_tls(void) +{ + return (__force void *)current->thread.tls_ptr; +} + +asmlinkage long sys_truncate64_metag(const char __user *path, unsigned long lo, + unsigned long hi) +{ + return sys_truncate64(path, merge_64(hi, lo)); +} + +asmlinkage long sys_ftruncate64_metag(unsigned int fd, unsigned long lo, + unsigned long hi) +{ + return sys_ftruncate64(fd, merge_64(hi, lo)); +} + +asmlinkage long sys_fadvise64_64_metag(int fd, unsigned long offs_lo, + unsigned long offs_hi, + unsigned long len_lo, + unsigned long len_hi, int advice) +{ + return sys_fadvise64_64(fd, merge_64(offs_hi, offs_lo), + merge_64(len_hi, len_lo), advice); +} + +asmlinkage long sys_readahead_metag(int fd, unsigned long lo, unsigned long hi, + size_t count) +{ + return sys_readahead(fd, merge_64(hi, lo), count); +} + +asmlinkage ssize_t sys_pread64_metag(unsigned long fd, char __user *buf, + size_t count, unsigned long lo, + unsigned long hi) +{ + return sys_pread64(fd, buf, count, merge_64(hi, lo)); +} + +asmlinkage ssize_t sys_pwrite64_metag(unsigned long fd, char __user *buf, + size_t count, unsigned long lo, + unsigned long hi) +{ + return sys_pwrite64(fd, buf, count, merge_64(hi, lo)); +} + +asmlinkage long sys_sync_file_range_metag(int fd, unsigned long offs_lo, + unsigned long offs_hi, + unsigned long len_lo, + unsigned long len_hi, + unsigned int flags) +{ + return sys_sync_file_range(fd, merge_64(offs_hi, offs_lo), + merge_64(len_hi, len_lo), flags); +} + +/* Provide the actual syscall number to call mapping. */ +#undef __SYSCALL +#define __SYSCALL(nr, call) [nr] = (call), + +/* + * We need wrappers for anything with unaligned 64bit arguments + */ +#define sys_truncate64 sys_truncate64_metag +#define sys_ftruncate64 sys_ftruncate64_metag +#define sys_fadvise64_64 sys_fadvise64_64_metag +#define sys_readahead sys_readahead_metag +#define sys_pread64 sys_pread64_metag +#define sys_pwrite64 sys_pwrite64_metag +#define sys_sync_file_range sys_sync_file_range_metag + +/* + * Note that we can't include <linux/unistd.h> here since the header + * guard will defeat us; <asm/unistd.h> checks for __SYSCALL as well. + */ +const void *sys_call_table[__NR_syscalls] = { + [0 ... __NR_syscalls-1] = sys_ni_syscall, +#include <asm/unistd.h> +}; diff --git a/arch/metag/kernel/tbiunexp.S b/arch/metag/kernel/tbiunexp.S new file mode 100644 index 000000000000..907bbe0b2e68 --- /dev/null +++ b/arch/metag/kernel/tbiunexp.S @@ -0,0 +1,22 @@ +/* Pass a breakpoint through to Codescape */ + +#include <asm/tbx.h> + + .text + .global ___TBIUnExpXXX + .type ___TBIUnExpXXX,function +___TBIUnExpXXX: + TSTT D0Ar2,#TBICTX_CRIT_BIT ! Result of nestable int call? + BZ $LTBINormCase ! UnExpXXX at background level + MOV D0Re0,TXMASKI ! Read TXMASKI + XOR TXMASKI,D1Re0,D1Re0 ! Turn off BGNDHALT handling! + OR D0Ar2,D0Ar2,D0Re0 ! Preserve bits cleared +$LTBINormCase: + MSETL [A0StP],D0Ar6,D0Ar4,D0Ar2 ! Save args on stack + SETL [A0StP++],D0Ar2,D1Ar1 ! Init area for returned values + SWITCH #0xC20208 ! Total stack frame size 8 Dwords + ! write back size 2 Dwords + GETL D0Re0,D1Re0,[--A0StP] ! Get result + SUB A0StP,A0StP,#(8*3) ! Recover stack frame + MOV PC,D1RtP + .size ___TBIUnExpXXX,.-___TBIUnExpXXX diff --git a/arch/metag/kernel/tcm.c b/arch/metag/kernel/tcm.c new file mode 100644 index 000000000000..5d102b31ce84 --- /dev/null +++ b/arch/metag/kernel/tcm.c @@ -0,0 +1,151 @@ +/* + * Copyright (C) 2010 Imagination Technologies Ltd. + */ + +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/spinlock.h> +#include <linux/stddef.h> +#include <linux/genalloc.h> +#include <linux/string.h> +#include <linux/list.h> +#include <linux/slab.h> +#include <asm/page.h> +#include <asm/tcm.h> + +struct tcm_pool { + struct list_head list; + unsigned int tag; + unsigned long start; + unsigned long end; + struct gen_pool *pool; +}; + +static LIST_HEAD(pool_list); + +static struct tcm_pool *find_pool(unsigned int tag) +{ + struct list_head *lh; + struct tcm_pool *pool; + + list_for_each(lh, &pool_list) { + pool = list_entry(lh, struct tcm_pool, list); + if (pool->tag == tag) + return pool; + } + + return NULL; +} + +/** + * tcm_alloc - allocate memory from a TCM pool + * @tag: tag of the pool to allocate memory from + * @len: number of bytes to be allocated + * + * Allocate the requested number of bytes from the pool matching + * the specified tag. Returns the address of the allocated memory + * or zero on failure. + */ +unsigned long tcm_alloc(unsigned int tag, size_t len) +{ + unsigned long vaddr; + struct tcm_pool *pool; + + pool = find_pool(tag); + if (!pool) + return 0; + + vaddr = gen_pool_alloc(pool->pool, len); + if (!vaddr) + return 0; + + return vaddr; +} + +/** + * tcm_free - free a block of memory to a TCM pool + * @tag: tag of the pool to free memory to + * @addr: address of the memory to be freed + * @len: number of bytes to be freed + * + * Free the requested number of bytes at a specific address to the + * pool matching the specified tag. + */ +void tcm_free(unsigned int tag, unsigned long addr, size_t len) +{ + struct tcm_pool *pool; + + pool = find_pool(tag); + if (!pool) + return; + gen_pool_free(pool->pool, addr, len); +} + +/** + * tcm_lookup_tag - find the tag matching an address + * @p: memory address to lookup the tag for + * + * Find the tag of the tcm memory region that contains the + * specified address. Returns %TCM_INVALID_TAG if no such + * memory region could be found. + */ +unsigned int tcm_lookup_tag(unsigned long p) +{ + struct list_head *lh; + struct tcm_pool *pool; + unsigned long addr = (unsigned long) p; + + list_for_each(lh, &pool_list) { + pool = list_entry(lh, struct tcm_pool, list); + if (addr >= pool->start && addr < pool->end) + return pool->tag; + } + + return TCM_INVALID_TAG; +} + +/** + * tcm_add_region - add a memory region to TCM pool list + * @reg: descriptor of region to be added + * + * Add a region of memory to the TCM pool list. Returns 0 on success. + */ +int __init tcm_add_region(struct tcm_region *reg) +{ + struct tcm_pool *pool; + + pool = kmalloc(sizeof(*pool), GFP_KERNEL); + if (!pool) { + pr_err("Failed to alloc memory for TCM pool!\n"); + return -ENOMEM; + } + + pool->tag = reg->tag; + pool->start = reg->res.start; + pool->end = reg->res.end; + + /* + * 2^3 = 8 bytes granularity to allow for 64bit access alignment. + * -1 = NUMA node specifier. + */ + pool->pool = gen_pool_create(3, -1); + + if (!pool->pool) { + pr_err("Failed to create TCM pool!\n"); + kfree(pool); + return -ENOMEM; + } + + if (gen_pool_add(pool->pool, reg->res.start, + reg->res.end - reg->res.start + 1, -1)) { + pr_err("Failed to add memory to TCM pool!\n"); + return -ENOMEM; + } + pr_info("Added %s TCM pool (%08x bytes @ %08x)\n", + reg->res.name, reg->res.end - reg->res.start + 1, + reg->res.start); + + list_add_tail(&pool->list, &pool_list); + + return 0; +} diff --git a/arch/metag/kernel/time.c b/arch/metag/kernel/time.c new file mode 100644 index 000000000000..17dc10733b2f --- /dev/null +++ b/arch/metag/kernel/time.c @@ -0,0 +1,15 @@ +/* + * Copyright (C) 2005-2013 Imagination Technologies Ltd. + * + * This file contains the Meta-specific time handling details. + * + */ + +#include <linux/init.h> + +#include <clocksource/metag_generic.h> + +void __init time_init(void) +{ + metag_generic_timer_init(); +} diff --git a/arch/metag/kernel/topology.c b/arch/metag/kernel/topology.c new file mode 100644 index 000000000000..bec3dec4922e --- /dev/null +++ b/arch/metag/kernel/topology.c @@ -0,0 +1,77 @@ +/* + * Copyright (C) 2007 Paul Mundt + * Copyright (C) 2010 Imagination Technolohies Ltd. + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ +#include <linux/cpu.h> +#include <linux/cpumask.h> +#include <linux/init.h> +#include <linux/percpu.h> +#include <linux/node.h> +#include <linux/nodemask.h> +#include <linux/topology.h> + +#include <asm/cpu.h> + +DEFINE_PER_CPU(struct cpuinfo_metag, cpu_data); + +cpumask_t cpu_core_map[NR_CPUS]; + +static cpumask_t cpu_coregroup_map(unsigned int cpu) +{ + return *cpu_possible_mask; +} + +const struct cpumask *cpu_coregroup_mask(unsigned int cpu) +{ + return &cpu_core_map[cpu]; +} + +int arch_update_cpu_topology(void) +{ + unsigned int cpu; + + for_each_possible_cpu(cpu) + cpu_core_map[cpu] = cpu_coregroup_map(cpu); + + return 0; +} + +static int __init topology_init(void) +{ + int i, ret; + +#ifdef CONFIG_NEED_MULTIPLE_NODES + for_each_online_node(i) + register_one_node(i); +#endif + + for_each_present_cpu(i) { + struct cpuinfo_metag *cpuinfo = &per_cpu(cpu_data, i); +#ifdef CONFIG_HOTPLUG_CPU + cpuinfo->cpu.hotpluggable = 1; +#endif + ret = register_cpu(&cpuinfo->cpu, i); + if (unlikely(ret)) + pr_warn("%s: register_cpu %d failed (%d)\n", + __func__, i, ret); + } + +#if defined(CONFIG_NUMA) && !defined(CONFIG_SMP) + /* + * In the UP case, make sure the CPU association is still + * registered under each node. Without this, sysfs fails + * to make the connection between nodes other than node0 + * and cpu0. + */ + for_each_online_node(i) + if (i != numa_node_id()) + register_cpu_under_node(raw_smp_processor_id(), i); +#endif + + return 0; +} +subsys_initcall(topology_init); diff --git a/arch/metag/kernel/traps.c b/arch/metag/kernel/traps.c new file mode 100644 index 000000000000..8961f247b500 --- /dev/null +++ b/arch/metag/kernel/traps.c @@ -0,0 +1,995 @@ +/* + * Meta exception handling. + * + * Copyright (C) 2005,2006,2007,2008,2009,2012 Imagination Technologies Ltd. + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file COPYING in the main directory of this archive + * for more details. + */ + +#include <linux/export.h> +#include <linux/sched.h> +#include <linux/signal.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/types.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/preempt.h> +#include <linux/ptrace.h> +#include <linux/module.h> +#include <linux/kallsyms.h> +#include <linux/kdebug.h> +#include <linux/kexec.h> +#include <linux/unistd.h> +#include <linux/smp.h> +#include <linux/slab.h> +#include <linux/syscalls.h> + +#include <asm/bug.h> +#include <asm/core_reg.h> +#include <asm/irqflags.h> +#include <asm/siginfo.h> +#include <asm/traps.h> +#include <asm/hwthread.h> +#include <asm/switch.h> +#include <asm/user_gateway.h> +#include <asm/syscall.h> +#include <asm/syscalls.h> + +/* Passing syscall arguments as long long is quicker. */ +typedef unsigned int (*LPSYSCALL) (unsigned long long, + unsigned long long, + unsigned long long); + +/* + * Users of LNKSET should compare the bus error bits obtained from DEFR + * against TXDEFR_LNKSET_SUCCESS only as the failure code will vary between + * different cores revisions. + */ +#define TXDEFR_LNKSET_SUCCESS 0x02000000 +#define TXDEFR_LNKSET_FAILURE 0x04000000 + +/* + * Our global TBI handle. Initialised from setup.c/setup_arch. + */ +DECLARE_PER_CPU(PTBI, pTBI); + +#ifdef CONFIG_SMP +static DEFINE_PER_CPU(unsigned int, trigger_mask); +#else +unsigned int global_trigger_mask; +EXPORT_SYMBOL(global_trigger_mask); +#endif + +unsigned long per_cpu__stack_save[NR_CPUS]; + +static const char * const trap_names[] = { + [TBIXXF_SIGNUM_IIF] = "Illegal instruction fault", + [TBIXXF_SIGNUM_PGF] = "Privilege violation", + [TBIXXF_SIGNUM_DHF] = "Unaligned data access fault", + [TBIXXF_SIGNUM_IGF] = "Code fetch general read failure", + [TBIXXF_SIGNUM_DGF] = "Data access general read/write fault", + [TBIXXF_SIGNUM_IPF] = "Code fetch page fault", + [TBIXXF_SIGNUM_DPF] = "Data access page fault", + [TBIXXF_SIGNUM_IHF] = "Instruction breakpoint", + [TBIXXF_SIGNUM_DWF] = "Read-only data access fault", +}; + +const char *trap_name(int trapno) +{ + if (trapno >= 0 && trapno < ARRAY_SIZE(trap_names) + && trap_names[trapno]) + return trap_names[trapno]; + return "Unknown fault"; +} + +static DEFINE_SPINLOCK(die_lock); + +void die(const char *str, struct pt_regs *regs, long err, + unsigned long addr) +{ + static int die_counter; + + oops_enter(); + + spin_lock_irq(&die_lock); + console_verbose(); + bust_spinlocks(1); + pr_err("%s: err %04lx (%s) addr %08lx [#%d]\n", str, err & 0xffff, + trap_name(err & 0xffff), addr, ++die_counter); + + print_modules(); + show_regs(regs); + + pr_err("Process: %s (pid: %d, stack limit = %p)\n", current->comm, + task_pid_nr(current), task_stack_page(current) + THREAD_SIZE); + + bust_spinlocks(0); + add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE); + if (kexec_should_crash(current)) + crash_kexec(regs); + + if (in_interrupt()) + panic("Fatal exception in interrupt"); + + if (panic_on_oops) + panic("Fatal exception"); + + spin_unlock_irq(&die_lock); + oops_exit(); + do_exit(SIGSEGV); +} + +#ifdef CONFIG_METAG_DSP +/* + * The ECH encoding specifies the size of a DSPRAM as, + * + * "slots" / 4 + * + * A "slot" is the size of two DSPRAM bank entries; an entry from + * DSPRAM bank A and an entry from DSPRAM bank B. One DSPRAM bank + * entry is 4 bytes. + */ +#define SLOT_SZ 8 +static inline unsigned int decode_dspram_size(unsigned int size) +{ + unsigned int _sz = size & 0x7f; + + return _sz * SLOT_SZ * 4; +} + +static void dspram_save(struct meta_ext_context *dsp_ctx, + unsigned int ramA_sz, unsigned int ramB_sz) +{ + unsigned int ram_sz[2]; + int i; + + ram_sz[0] = ramA_sz; + ram_sz[1] = ramB_sz; + + for (i = 0; i < 2; i++) { + if (ram_sz[i] != 0) { + unsigned int sz; + + if (i == 0) + sz = decode_dspram_size(ram_sz[i] >> 8); + else + sz = decode_dspram_size(ram_sz[i]); + + if (dsp_ctx->ram[i] == NULL) { + dsp_ctx->ram[i] = kmalloc(sz, GFP_KERNEL); + + if (dsp_ctx->ram[i] == NULL) + panic("couldn't save DSP context"); + } else { + if (ram_sz[i] > dsp_ctx->ram_sz[i]) { + kfree(dsp_ctx->ram[i]); + + dsp_ctx->ram[i] = kmalloc(sz, + GFP_KERNEL); + + if (dsp_ctx->ram[i] == NULL) + panic("couldn't save DSP context"); + } + } + + if (i == 0) + __TBIDspramSaveA(ram_sz[i], dsp_ctx->ram[i]); + else + __TBIDspramSaveB(ram_sz[i], dsp_ctx->ram[i]); + + dsp_ctx->ram_sz[i] = ram_sz[i]; + } + } +} +#endif /* CONFIG_METAG_DSP */ + +/* + * Allow interrupts to be nested and save any "extended" register + * context state, e.g. DSP regs and RAMs. + */ +static void nest_interrupts(TBIRES State, unsigned long mask) +{ +#ifdef CONFIG_METAG_DSP + struct meta_ext_context *dsp_ctx; + unsigned int D0_8; + + /* + * D0.8 may contain an ECH encoding. The upper 16 bits + * tell us what DSP resources the current process is + * using. OR the bits into the SaveMask so that + * __TBINestInts() knows what resources to save as + * part of this context. + * + * Don't save the context if we're nesting interrupts in the + * kernel because the kernel doesn't use DSP hardware. + */ + D0_8 = __core_reg_get(D0.8); + + if (D0_8 && (State.Sig.SaveMask & TBICTX_PRIV_BIT)) { + State.Sig.SaveMask |= (D0_8 >> 16); + + dsp_ctx = current->thread.dsp_context; + if (dsp_ctx == NULL) { + dsp_ctx = kzalloc(sizeof(*dsp_ctx), GFP_KERNEL); + if (dsp_ctx == NULL) + panic("couldn't save DSP context: ENOMEM"); + + current->thread.dsp_context = dsp_ctx; + } + + current->thread.user_flags |= (D0_8 & 0xffff0000); + __TBINestInts(State, &dsp_ctx->regs, mask); + dspram_save(dsp_ctx, D0_8 & 0x7f00, D0_8 & 0x007f); + } else + __TBINestInts(State, NULL, mask); +#else + __TBINestInts(State, NULL, mask); +#endif +} + +void head_end(TBIRES State, unsigned long mask) +{ + unsigned int savemask = (unsigned short)State.Sig.SaveMask; + unsigned int ctx_savemask = (unsigned short)State.Sig.pCtx->SaveMask; + + if (savemask & TBICTX_PRIV_BIT) { + ctx_savemask |= TBICTX_PRIV_BIT; + current->thread.user_flags = savemask; + } + + /* Always undo the sleep bit */ + ctx_savemask &= ~TBICTX_WAIT_BIT; + + /* Always save the catch buffer and RD pipe if they are dirty */ + savemask |= TBICTX_XCBF_BIT; + + /* Only save the catch and RD if we have not already done so. + * Note - the RD bits are in the pCtx only, and not in the + * State.SaveMask. + */ + if ((savemask & TBICTX_CBUF_BIT) || + (ctx_savemask & TBICTX_CBRP_BIT)) { + /* Have we already saved the buffers though? + * - See TestTrack 5071 */ + if (ctx_savemask & TBICTX_XCBF_BIT) { + /* Strip off the bits so the call to __TBINestInts + * won't save the buffers again. */ + savemask &= ~TBICTX_CBUF_BIT; + ctx_savemask &= ~TBICTX_CBRP_BIT; + } + } + +#ifdef CONFIG_METAG_META21 + { + unsigned int depth, txdefr; + + /* + * Save TXDEFR state. + * + * The process may have been interrupted after a LNKSET, but + * before it could read the DEFR state, so we mustn't lose that + * state or it could end up retrying an atomic operation that + * succeeded. + * + * All interrupts are disabled at this point so we + * don't need to perform any locking. We must do this + * dance before we use LNKGET or LNKSET. + */ + BUG_ON(current->thread.int_depth > HARDIRQ_BITS); + + depth = current->thread.int_depth++; + + txdefr = __core_reg_get(TXDEFR); + + txdefr &= TXDEFR_BUS_STATE_BITS; + if (txdefr & TXDEFR_LNKSET_SUCCESS) + current->thread.txdefr_failure &= ~(1 << depth); + else + current->thread.txdefr_failure |= (1 << depth); + } +#endif + + State.Sig.SaveMask = savemask; + State.Sig.pCtx->SaveMask = ctx_savemask; + + nest_interrupts(State, mask); + +#ifdef CONFIG_METAG_POISON_CATCH_BUFFERS + /* Poison the catch registers. This shows up any mistakes we have + * made in their handling MUCH quicker. + */ + __core_reg_set(TXCATCH0, 0x87650021); + __core_reg_set(TXCATCH1, 0x87654322); + __core_reg_set(TXCATCH2, 0x87654323); + __core_reg_set(TXCATCH3, 0x87654324); +#endif /* CONFIG_METAG_POISON_CATCH_BUFFERS */ +} + +TBIRES tail_end_sys(TBIRES State, int syscall, int *restart) +{ + struct pt_regs *regs = (struct pt_regs *)State.Sig.pCtx; + unsigned long flags; + + local_irq_disable(); + + if (user_mode(regs)) { + flags = current_thread_info()->flags; + if (flags & _TIF_WORK_MASK && + do_work_pending(regs, flags, syscall)) { + *restart = 1; + return State; + } + +#ifdef CONFIG_METAG_FPU + if (current->thread.fpu_context && + current->thread.fpu_context->needs_restore) { + __TBICtxFPURestore(State, current->thread.fpu_context); + /* + * Clearing this bit ensures the FP unit is not made + * active again unless it is used. + */ + State.Sig.SaveMask &= ~TBICTX_FPAC_BIT; + current->thread.fpu_context->needs_restore = false; + } + State.Sig.TrigMask |= TBI_TRIG_BIT(TBID_SIGNUM_DFR); +#endif + } + + /* TBI will turn interrupts back on at some point. */ + if (!irqs_disabled_flags((unsigned long)State.Sig.TrigMask)) + trace_hardirqs_on(); + +#ifdef CONFIG_METAG_DSP + /* + * If we previously saved an extended context then restore it + * now. Otherwise, clear D0.8 because this process is not + * using DSP hardware. + */ + if (State.Sig.pCtx->SaveMask & TBICTX_XEXT_BIT) { + unsigned int D0_8; + struct meta_ext_context *dsp_ctx = current->thread.dsp_context; + + /* Make sure we're going to return to userland. */ + BUG_ON(current->thread.int_depth != 1); + + if (dsp_ctx->ram_sz[0] > 0) + __TBIDspramRestoreA(dsp_ctx->ram_sz[0], + dsp_ctx->ram[0]); + if (dsp_ctx->ram_sz[1] > 0) + __TBIDspramRestoreB(dsp_ctx->ram_sz[1], + dsp_ctx->ram[1]); + + State.Sig.SaveMask |= State.Sig.pCtx->SaveMask; + __TBICtxRestore(State, current->thread.dsp_context); + D0_8 = __core_reg_get(D0.8); + D0_8 |= current->thread.user_flags & 0xffff0000; + D0_8 |= (dsp_ctx->ram_sz[1] | dsp_ctx->ram_sz[0]) & 0xffff; + __core_reg_set(D0.8, D0_8); + } else + __core_reg_set(D0.8, 0); +#endif /* CONFIG_METAG_DSP */ + +#ifdef CONFIG_METAG_META21 + { + unsigned int depth, txdefr; + + /* + * If there hasn't been a LNKSET since the last LNKGET then the + * link flag will be set, causing the next LNKSET to succeed if + * the addresses match. The two LNK operations may not be a pair + * (e.g. see atomic_read()), so the LNKSET should fail. + * We use a conditional-never LNKSET to clear the link flag + * without side effects. + */ + asm volatile("LNKSETDNV [D0Re0],D0Re0"); + + depth = --current->thread.int_depth; + + BUG_ON(user_mode(regs) && depth); + + txdefr = __core_reg_get(TXDEFR); + + txdefr &= ~TXDEFR_BUS_STATE_BITS; + + /* Do we need to restore a failure code into TXDEFR? */ + if (current->thread.txdefr_failure & (1 << depth)) + txdefr |= (TXDEFR_LNKSET_FAILURE | TXDEFR_BUS_TRIG_BIT); + else + txdefr |= (TXDEFR_LNKSET_SUCCESS | TXDEFR_BUS_TRIG_BIT); + + __core_reg_set(TXDEFR, txdefr); + } +#endif + return State; +} + +#ifdef CONFIG_SMP +/* + * If we took an interrupt in the middle of __kuser_get_tls then we need + * to rewind the PC to the start of the function in case the process + * gets migrated to another thread (SMP only) and it reads the wrong tls + * data. + */ +static inline void _restart_critical_section(TBIRES State) +{ + unsigned long get_tls_start; + unsigned long get_tls_end; + + get_tls_start = (unsigned long)__kuser_get_tls - + (unsigned long)&__user_gateway_start; + + get_tls_start += USER_GATEWAY_PAGE; + + get_tls_end = (unsigned long)__kuser_get_tls_end - + (unsigned long)&__user_gateway_start; + + get_tls_end += USER_GATEWAY_PAGE; + + if ((State.Sig.pCtx->CurrPC >= get_tls_start) && + (State.Sig.pCtx->CurrPC < get_tls_end)) + State.Sig.pCtx->CurrPC = get_tls_start; +} +#else +/* + * If we took an interrupt in the middle of + * __kuser_cmpxchg then we need to rewind the PC to the + * start of the function. + */ +static inline void _restart_critical_section(TBIRES State) +{ + unsigned long cmpxchg_start; + unsigned long cmpxchg_end; + + cmpxchg_start = (unsigned long)__kuser_cmpxchg - + (unsigned long)&__user_gateway_start; + + cmpxchg_start += USER_GATEWAY_PAGE; + + cmpxchg_end = (unsigned long)__kuser_cmpxchg_end - + (unsigned long)&__user_gateway_start; + + cmpxchg_end += USER_GATEWAY_PAGE; + + if ((State.Sig.pCtx->CurrPC >= cmpxchg_start) && + (State.Sig.pCtx->CurrPC < cmpxchg_end)) + State.Sig.pCtx->CurrPC = cmpxchg_start; +} +#endif + +/* Used by kick_handler() */ +void restart_critical_section(TBIRES State) +{ + _restart_critical_section(State); +} + +TBIRES trigger_handler(TBIRES State, int SigNum, int Triggers, int Inst, + PTBI pTBI) +{ + head_end(State, ~INTS_OFF_MASK); + + /* If we interrupted user code handle any critical sections. */ + if (State.Sig.SaveMask & TBICTX_PRIV_BIT) + _restart_critical_section(State); + + trace_hardirqs_off(); + + do_IRQ(SigNum, (struct pt_regs *)State.Sig.pCtx); + + return tail_end(State); +} + +static unsigned int load_fault(PTBICTXEXTCB0 pbuf) +{ + return pbuf->CBFlags & TXCATCH0_READ_BIT; +} + +static unsigned long fault_address(PTBICTXEXTCB0 pbuf) +{ + return pbuf->CBAddr; +} + +static void unhandled_fault(struct pt_regs *regs, unsigned long addr, + int signo, int code, int trapno) +{ + if (user_mode(regs)) { + siginfo_t info; + + if (show_unhandled_signals && unhandled_signal(current, signo) + && printk_ratelimit()) { + + pr_info("pid %d unhandled fault: pc 0x%08x, addr 0x%08lx, trap %d (%s)\n", + current->pid, regs->ctx.CurrPC, addr, + trapno, trap_name(trapno)); + print_vma_addr(" in ", regs->ctx.CurrPC); + print_vma_addr(" rtp in ", regs->ctx.DX[4].U1); + printk("\n"); + show_regs(regs); + } + + info.si_signo = signo; + info.si_errno = 0; + info.si_code = code; + info.si_addr = (__force void __user *)addr; + info.si_trapno = trapno; + force_sig_info(signo, &info, current); + } else { + die("Oops", regs, trapno, addr); + } +} + +static int handle_data_fault(PTBICTXEXTCB0 pcbuf, struct pt_regs *regs, + unsigned int data_address, int trapno) +{ + int ret; + + ret = do_page_fault(regs, data_address, !load_fault(pcbuf), trapno); + + return ret; +} + +static unsigned long get_inst_fault_address(struct pt_regs *regs) +{ + return regs->ctx.CurrPC; +} + +TBIRES fault_handler(TBIRES State, int SigNum, int Triggers, + int Inst, PTBI pTBI) +{ + struct pt_regs *regs = (struct pt_regs *)State.Sig.pCtx; + PTBICTXEXTCB0 pcbuf = (PTBICTXEXTCB0)®s->extcb0; + unsigned long data_address; + + head_end(State, ~INTS_OFF_MASK); + + /* Hardware breakpoint or data watch */ + if ((SigNum == TBIXXF_SIGNUM_IHF) || + ((SigNum == TBIXXF_SIGNUM_DHF) && + (pcbuf[0].CBFlags & (TXCATCH0_WATCH1_BIT | + TXCATCH0_WATCH0_BIT)))) { + State = __TBIUnExpXXX(State, SigNum, Triggers, Inst, + pTBI); + return tail_end(State); + } + + local_irq_enable(); + + data_address = fault_address(pcbuf); + + switch (SigNum) { + case TBIXXF_SIGNUM_IGF: + /* 1st-level entry invalid (instruction fetch) */ + case TBIXXF_SIGNUM_IPF: { + /* 2nd-level entry invalid (instruction fetch) */ + unsigned long addr = get_inst_fault_address(regs); + do_page_fault(regs, addr, 0, SigNum); + break; + } + + case TBIXXF_SIGNUM_DGF: + /* 1st-level entry invalid (data access) */ + case TBIXXF_SIGNUM_DPF: + /* 2nd-level entry invalid (data access) */ + case TBIXXF_SIGNUM_DWF: + /* Write to read only page */ + handle_data_fault(pcbuf, regs, data_address, SigNum); + break; + + case TBIXXF_SIGNUM_IIF: + /* Illegal instruction */ + unhandled_fault(regs, regs->ctx.CurrPC, SIGILL, ILL_ILLOPC, + SigNum); + break; + + case TBIXXF_SIGNUM_DHF: + /* Unaligned access */ + unhandled_fault(regs, data_address, SIGBUS, BUS_ADRALN, + SigNum); + break; + case TBIXXF_SIGNUM_PGF: + /* Privilege violation */ + unhandled_fault(regs, data_address, SIGSEGV, SEGV_ACCERR, + SigNum); + break; + default: + BUG(); + break; + } + + return tail_end(State); +} + +static bool switch_is_syscall(unsigned int inst) +{ + return inst == __METAG_SW_ENCODING(SYS); +} + +static bool switch_is_legacy_syscall(unsigned int inst) +{ + return inst == __METAG_SW_ENCODING(SYS_LEGACY); +} + +static inline void step_over_switch(struct pt_regs *regs, unsigned int inst) +{ + regs->ctx.CurrPC += 4; +} + +static inline int test_syscall_work(void) +{ + return current_thread_info()->flags & _TIF_WORK_SYSCALL_MASK; +} + +TBIRES switch1_handler(TBIRES State, int SigNum, int Triggers, + int Inst, PTBI pTBI) +{ + struct pt_regs *regs = (struct pt_regs *)State.Sig.pCtx; + unsigned int sysnumber; + unsigned long long a1_a2, a3_a4, a5_a6; + LPSYSCALL syscall_entry; + int restart; + + head_end(State, ~INTS_OFF_MASK); + + /* + * If this is not a syscall SWITCH it could be a breakpoint. + */ + if (!switch_is_syscall(Inst)) { + /* + * Alert the user if they're trying to use legacy system + * calls. This suggests they need to update their C + * library and build against up to date kernel headers. + */ + if (switch_is_legacy_syscall(Inst)) + pr_warn_once("WARNING: A legacy syscall was made. Your userland needs updating.\n"); + /* + * We don't know how to handle the SWITCH and cannot + * safely ignore it, so treat all unknown switches + * (including breakpoints) as traps. + */ + force_sig(SIGTRAP, current); + return tail_end(State); + } + + local_irq_enable(); + +restart_syscall: + restart = 0; + sysnumber = regs->ctx.DX[0].U1; + + if (test_syscall_work()) + sysnumber = syscall_trace_enter(regs); + + /* Skip over the SWITCH instruction - or you just get 'stuck' on it! */ + step_over_switch(regs, Inst); + + if (sysnumber >= __NR_syscalls) { + pr_debug("unknown syscall number: %d\n", sysnumber); + syscall_entry = (LPSYSCALL) sys_ni_syscall; + } else { + syscall_entry = (LPSYSCALL) sys_call_table[sysnumber]; + } + + /* Use 64bit loads for speed. */ + a5_a6 = *(unsigned long long *)®s->ctx.DX[1]; + a3_a4 = *(unsigned long long *)®s->ctx.DX[2]; + a1_a2 = *(unsigned long long *)®s->ctx.DX[3]; + + /* here is the actual call to the syscall handler functions */ + regs->ctx.DX[0].U0 = syscall_entry(a1_a2, a3_a4, a5_a6); + + if (test_syscall_work()) + syscall_trace_leave(regs); + + State = tail_end_sys(State, sysnumber, &restart); + /* Handlerless restarts shouldn't go via userland */ + if (restart) + goto restart_syscall; + return State; +} + +TBIRES switchx_handler(TBIRES State, int SigNum, int Triggers, + int Inst, PTBI pTBI) +{ + struct pt_regs *regs = (struct pt_regs *)State.Sig.pCtx; + + /* + * This can be caused by any user process simply executing an unusual + * SWITCH instruction. If there's no DA, __TBIUnExpXXX will cause the + * thread to stop, so signal a SIGTRAP instead. + */ + head_end(State, ~INTS_OFF_MASK); + if (user_mode(regs)) + force_sig(SIGTRAP, current); + else + State = __TBIUnExpXXX(State, SigNum, Triggers, Inst, pTBI); + return tail_end(State); +} + +#ifdef CONFIG_METAG_META21 +TBIRES fpe_handler(TBIRES State, int SigNum, int Triggers, int Inst, PTBI pTBI) +{ + struct pt_regs *regs = (struct pt_regs *)State.Sig.pCtx; + unsigned int error_state = Triggers; + siginfo_t info; + + head_end(State, ~INTS_OFF_MASK); + + local_irq_enable(); + + info.si_signo = SIGFPE; + + if (error_state & TXSTAT_FPE_INVALID_BIT) + info.si_code = FPE_FLTINV; + else if (error_state & TXSTAT_FPE_DIVBYZERO_BIT) + info.si_code = FPE_FLTDIV; + else if (error_state & TXSTAT_FPE_OVERFLOW_BIT) + info.si_code = FPE_FLTOVF; + else if (error_state & TXSTAT_FPE_UNDERFLOW_BIT) + info.si_code = FPE_FLTUND; + else if (error_state & TXSTAT_FPE_INEXACT_BIT) + info.si_code = FPE_FLTRES; + else + info.si_code = 0; + info.si_errno = 0; + info.si_addr = (__force void __user *)regs->ctx.CurrPC; + force_sig_info(SIGFPE, &info, current); + + return tail_end(State); +} +#endif + +#ifdef CONFIG_METAG_SUSPEND_MEM +struct traps_context { + PTBIAPIFN fnSigs[TBID_SIGNUM_MAX + 1]; +}; + +static struct traps_context *metag_traps_context; + +int traps_save_context(void) +{ + unsigned long cpu = smp_processor_id(); + PTBI _pTBI = per_cpu(pTBI, cpu); + struct traps_context *context; + + context = kzalloc(sizeof(*context), GFP_ATOMIC); + if (!context) + return -ENOMEM; + + memcpy(context->fnSigs, (void *)_pTBI->fnSigs, sizeof(context->fnSigs)); + + metag_traps_context = context; + return 0; +} + +int traps_restore_context(void) +{ + unsigned long cpu = smp_processor_id(); + PTBI _pTBI = per_cpu(pTBI, cpu); + struct traps_context *context = metag_traps_context; + + metag_traps_context = NULL; + + memcpy((void *)_pTBI->fnSigs, context->fnSigs, sizeof(context->fnSigs)); + + kfree(context); + return 0; +} +#endif + +#ifdef CONFIG_SMP +static inline unsigned int _get_trigger_mask(void) +{ + unsigned long cpu = smp_processor_id(); + return per_cpu(trigger_mask, cpu); +} + +unsigned int get_trigger_mask(void) +{ + return _get_trigger_mask(); +} +EXPORT_SYMBOL(get_trigger_mask); + +static void set_trigger_mask(unsigned int mask) +{ + unsigned long cpu = smp_processor_id(); + per_cpu(trigger_mask, cpu) = mask; +} + +void arch_local_irq_enable(void) +{ + preempt_disable(); + arch_local_irq_restore(_get_trigger_mask()); + preempt_enable_no_resched(); +} +EXPORT_SYMBOL(arch_local_irq_enable); +#else +static void set_trigger_mask(unsigned int mask) +{ + global_trigger_mask = mask; +} +#endif + +void __cpuinit per_cpu_trap_init(unsigned long cpu) +{ + TBIRES int_context; + unsigned int thread = cpu_2_hwthread_id[cpu]; + + set_trigger_mask(TBI_INTS_INIT(thread) | /* interrupts */ + TBI_TRIG_BIT(TBID_SIGNUM_LWK) | /* low level kick */ + TBI_TRIG_BIT(TBID_SIGNUM_SW1) | + TBI_TRIG_BIT(TBID_SIGNUM_SWS)); + + /* non-priv - use current stack */ + int_context.Sig.pCtx = NULL; + /* Start with interrupts off */ + int_context.Sig.TrigMask = INTS_OFF_MASK; + int_context.Sig.SaveMask = 0; + + /* And call __TBIASyncTrigger() */ + __TBIASyncTrigger(int_context); +} + +void __init trap_init(void) +{ + unsigned long cpu = smp_processor_id(); + PTBI _pTBI = per_cpu(pTBI, cpu); + + _pTBI->fnSigs[TBID_SIGNUM_XXF] = fault_handler; + _pTBI->fnSigs[TBID_SIGNUM_SW0] = switchx_handler; + _pTBI->fnSigs[TBID_SIGNUM_SW1] = switch1_handler; + _pTBI->fnSigs[TBID_SIGNUM_SW2] = switchx_handler; + _pTBI->fnSigs[TBID_SIGNUM_SW3] = switchx_handler; + _pTBI->fnSigs[TBID_SIGNUM_SWK] = kick_handler; + +#ifdef CONFIG_METAG_META21 + _pTBI->fnSigs[TBID_SIGNUM_DFR] = __TBIHandleDFR; + _pTBI->fnSigs[TBID_SIGNUM_FPE] = fpe_handler; +#endif + + per_cpu_trap_init(cpu); +} + +void tbi_startup_interrupt(int irq) +{ + unsigned long cpu = smp_processor_id(); + PTBI _pTBI = per_cpu(pTBI, cpu); + + BUG_ON(irq > TBID_SIGNUM_MAX); + + /* For TR1 and TR2, the thread id is encoded in the irq number */ + if (irq >= TBID_SIGNUM_T10 && irq < TBID_SIGNUM_TR3) + cpu = hwthread_id_2_cpu[(irq - TBID_SIGNUM_T10) % 4]; + + set_trigger_mask(get_trigger_mask() | TBI_TRIG_BIT(irq)); + + _pTBI->fnSigs[irq] = trigger_handler; +} + +void tbi_shutdown_interrupt(int irq) +{ + unsigned long cpu = smp_processor_id(); + PTBI _pTBI = per_cpu(pTBI, cpu); + + BUG_ON(irq > TBID_SIGNUM_MAX); + + set_trigger_mask(get_trigger_mask() & ~TBI_TRIG_BIT(irq)); + + _pTBI->fnSigs[irq] = __TBIUnExpXXX; +} + +int ret_from_fork(TBIRES arg) +{ + struct task_struct *prev = arg.Switch.pPara; + struct task_struct *tsk = current; + struct pt_regs *regs = task_pt_regs(tsk); + int (*fn)(void *); + TBIRES Next; + + schedule_tail(prev); + + if (tsk->flags & PF_KTHREAD) { + fn = (void *)regs->ctx.DX[4].U1; + BUG_ON(!fn); + + fn((void *)regs->ctx.DX[3].U1); + } + + if (test_syscall_work()) + syscall_trace_leave(regs); + + preempt_disable(); + + Next.Sig.TrigMask = get_trigger_mask(); + Next.Sig.SaveMask = 0; + Next.Sig.pCtx = ®s->ctx; + + set_gateway_tls(current->thread.tls_ptr); + + preempt_enable_no_resched(); + + /* And interrupts should come back on when we resume the real usermode + * code. Call __TBIASyncResume() + */ + __TBIASyncResume(tail_end(Next)); + /* ASyncResume should NEVER return */ + BUG(); + return 0; +} + +void show_trace(struct task_struct *tsk, unsigned long *sp, + struct pt_regs *regs) +{ + unsigned long addr; +#ifdef CONFIG_FRAME_POINTER + unsigned long fp, fpnew; + unsigned long stack; +#endif + + if (regs && user_mode(regs)) + return; + + printk("\nCall trace: "); +#ifdef CONFIG_KALLSYMS + printk("\n"); +#endif + + if (!tsk) + tsk = current; + +#ifdef CONFIG_FRAME_POINTER + if (regs) { + print_ip_sym(regs->ctx.CurrPC); + fp = regs->ctx.AX[1].U0; + } else { + fp = __core_reg_get(A0FrP); + } + + /* detect when the frame pointer has been used for other purposes and + * doesn't point to the stack (it may point completely elsewhere which + * kstack_end may not detect). + */ + stack = (unsigned long)task_stack_page(tsk); + while (fp >= stack && fp + 8 <= stack + THREAD_SIZE) { + addr = __raw_readl((unsigned long *)(fp + 4)) - 4; + if (kernel_text_address(addr)) + print_ip_sym(addr); + else + break; + /* stack grows up, so frame pointers must decrease */ + fpnew = __raw_readl((unsigned long *)(fp + 0)); + if (fpnew >= fp) + break; + fp = fpnew; + } +#else + while (!kstack_end(sp)) { + addr = (*sp--) - 4; + if (kernel_text_address(addr)) + print_ip_sym(addr); + } +#endif + + printk("\n"); + + debug_show_held_locks(tsk); +} + +void show_stack(struct task_struct *tsk, unsigned long *sp) +{ + if (!tsk) + tsk = current; + if (tsk == current) + sp = (unsigned long *)current_stack_pointer; + else + sp = (unsigned long *)tsk->thread.kernel_context->AX[0].U0; + + show_trace(tsk, sp, NULL); +} + +void dump_stack(void) +{ + show_stack(NULL, NULL); +} +EXPORT_SYMBOL(dump_stack); diff --git a/arch/metag/kernel/user_gateway.S b/arch/metag/kernel/user_gateway.S new file mode 100644 index 000000000000..7167f3e8db6b --- /dev/null +++ b/arch/metag/kernel/user_gateway.S @@ -0,0 +1,97 @@ +/* + * Copyright (C) 2010 Imagination Technologies Ltd. + * + * This file contains code that can be accessed from userspace and can + * access certain kernel data structures without the overhead of a system + * call. + */ + +#include <asm/metag_regs.h> +#include <asm/user_gateway.h> + +/* + * User helpers. + * + * These are segment of kernel provided user code reachable from user space + * at a fixed address in kernel memory. This is used to provide user space + * with some operations which require kernel help because of unimplemented + * native feature and/or instructions in some Meta CPUs. The idea is for + * this code to be executed directly in user mode for best efficiency but + * which is too intimate with the kernel counter part to be left to user + * libraries. The kernel reserves the right to change this code as needed + * without warning. Only the entry points and their results are guaranteed + * to be stable. + * + * Each segment is 64-byte aligned. This mechanism should be used only for + * for things that are really small and justified, and not be abused freely. + */ + .text + .global ___user_gateway_start +___user_gateway_start: + + /* get_tls + * Offset: 0 + * Description: Get the TLS pointer for this process. + */ + .global ___kuser_get_tls + .type ___kuser_get_tls,function +___kuser_get_tls: + MOVT D1Ar1,#HI(USER_GATEWAY_PAGE + USER_GATEWAY_TLS) + ADD D1Ar1,D1Ar1,#LO(USER_GATEWAY_PAGE + USER_GATEWAY_TLS) + MOV D1Ar3,TXENABLE + AND D1Ar3,D1Ar3,#(TXENABLE_THREAD_BITS) + LSR D1Ar3,D1Ar3,#(TXENABLE_THREAD_S - 2) + GETD D0Re0,[D1Ar1+D1Ar3] +___kuser_get_tls_end: /* Beyond this point the read will complete */ + MOV PC,D1RtP + .size ___kuser_get_tls,.-___kuser_get_tls + .global ___kuser_get_tls_end + + /* cmpxchg + * Offset: 64 + * Description: Replace the value at 'ptr' with 'newval' if the current + * value is 'oldval'. Return zero if we succeeded, + * non-zero otherwise. + * + * Reference prototype: + * + * int __kuser_cmpxchg(int oldval, int newval, unsigned long *ptr) + * + */ + .balign 64 + .global ___kuser_cmpxchg + .type ___kuser_cmpxchg,function +___kuser_cmpxchg: +#ifdef CONFIG_SMP + /* + * We must use LNKGET/LNKSET with an SMP kernel because the other method + * does not provide atomicity across multiple CPUs. + */ +0: LNKGETD D0Re0,[D1Ar3] + CMP D0Re0,D1Ar1 + LNKSETDZ [D1Ar3],D0Ar2 + BNZ 1f + DEFR D0Re0,TXSTAT + ANDT D0Re0,D0Re0,#HI(0x3f000000) + CMPT D0Re0,#HI(0x02000000) + BNE 0b +#ifdef CONFIG_METAG_LNKGET_AROUND_CACHE + DCACHE [D1Ar3], D0Re0 +#endif +1: MOV D0Re0,#1 + XORZ D0Re0,D0Re0,D0Re0 + MOV PC,D1RtP +#else + GETD D0Re0,[D1Ar3] + CMP D0Re0,D1Ar1 + SETDZ [D1Ar3],D0Ar2 +___kuser_cmpxchg_end: /* Beyond this point the write will complete */ + MOV D0Re0,#1 + XORZ D0Re0,D0Re0,D0Re0 + MOV PC,D1RtP +#endif /* CONFIG_SMP */ + .size ___kuser_cmpxchg,.-___kuser_cmpxchg + .global ___kuser_cmpxchg_end + + .global ___user_gateway_end +___user_gateway_end: diff --git a/arch/metag/kernel/vmlinux.lds.S b/arch/metag/kernel/vmlinux.lds.S new file mode 100644 index 000000000000..e12055e88bfe --- /dev/null +++ b/arch/metag/kernel/vmlinux.lds.S @@ -0,0 +1,71 @@ +/* ld script to make Meta Linux kernel */ + +#include <asm/thread_info.h> +#include <asm/page.h> +#include <asm/cache.h> + +#include <asm-generic/vmlinux.lds.h> + +OUTPUT_FORMAT("elf32-metag", "elf32-metag", "elf32-metag") +OUTPUT_ARCH(metag) +ENTRY(__start) + +_jiffies = _jiffies_64; +SECTIONS +{ + . = CONFIG_PAGE_OFFSET; + _text = .; + __text = .; + __stext = .; + HEAD_TEXT_SECTION + .text : { + TEXT_TEXT + SCHED_TEXT + LOCK_TEXT + KPROBES_TEXT + IRQENTRY_TEXT + *(.text.*) + *(.gnu.warning) + } + + __etext = .; /* End of text section */ + + __sdata = .; + RO_DATA_SECTION(PAGE_SIZE) + RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE) + __edata = .; /* End of data section */ + + EXCEPTION_TABLE(16) + NOTES + + . = ALIGN(PAGE_SIZE); /* Init code and data */ + ___init_begin = .; + INIT_TEXT_SECTION(PAGE_SIZE) + INIT_DATA_SECTION(16) + + .init.arch.info : { + ___arch_info_begin = .; + *(.arch.info.init) + ___arch_info_end = .; + } + + PERCPU_SECTION(L1_CACHE_BYTES) + + ___init_end = .; + + BSS_SECTION(0, PAGE_SIZE, 0) + + __end = .; + + . = ALIGN(PAGE_SIZE); + __heap_start = .; + + DWARF_DEBUG + + /* When something in the kernel is NOT compiled as a module, the + * module cleanup code and data are put into these segments. Both + * can then be thrown away, as cleanup code is never called unless + * it's a module. + */ + DISCARDS +} diff --git a/arch/metag/lib/Makefile b/arch/metag/lib/Makefile new file mode 100644 index 000000000000..a41d24e270e6 --- /dev/null +++ b/arch/metag/lib/Makefile @@ -0,0 +1,22 @@ +# +# Makefile for Meta-specific library files. +# + +lib-y += usercopy.o +lib-y += copy_page.o +lib-y += clear_page.o +lib-y += memcpy.o +lib-y += memmove.o +lib-y += memset.o +lib-y += delay.o +lib-y += div64.o +lib-y += muldi3.o +lib-y += ashrdi3.o +lib-y += ashldi3.o +lib-y += lshrdi3.o +lib-y += divsi3.o +lib-y += modsi3.o +lib-y += cmpdi2.o +lib-y += ucmpdi2.o +lib-y += ip_fast_csum.o +lib-y += checksum.o diff --git a/arch/metag/lib/ashldi3.S b/arch/metag/lib/ashldi3.S new file mode 100644 index 000000000000..78d6974cffef --- /dev/null +++ b/arch/metag/lib/ashldi3.S @@ -0,0 +1,33 @@ +! Copyright (C) 2012 by Imagination Technologies Ltd. +! +! 64-bit arithmetic shift left routine. +! + + .text + .global ___ashldi3 + .type ___ashldi3,function + +___ashldi3: + MOV D0Re0,D0Ar2 + MOV D1Re0,D1Ar1 + CMP D1Ar3,#0 ! COUNT == 0 + MOVEQ PC,D1RtP ! Yes, return + + SUBS D0Ar4,D1Ar3,#32 ! N = COUNT - 32 + BGE $L10 + +!! Shift < 32 + NEG D0Ar4,D0Ar4 ! N = - N + LSL D1Re0,D1Re0,D1Ar3 ! HI = HI << COUNT + LSR D0Ar6,D0Re0,D0Ar4 ! TMP= LO >> -(COUNT - 32) + OR D1Re0,D1Re0,D0Ar6 ! HI = HI | TMP + SWAP D0Ar4,D1Ar3 + LSL D0Re0,D0Re0,D0Ar4 ! LO = LO << COUNT + MOV PC,D1RtP + +$L10: +!! Shift >= 32 + LSL D1Re0,D0Re0,D0Ar4 ! HI = LO << N + MOV D0Re0,#0 ! LO = 0 + MOV PC,D1RtP + .size ___ashldi3,.-___ashldi3 diff --git a/arch/metag/lib/ashrdi3.S b/arch/metag/lib/ashrdi3.S new file mode 100644 index 000000000000..7cb7ed3bb1ad --- /dev/null +++ b/arch/metag/lib/ashrdi3.S @@ -0,0 +1,33 @@ +! Copyright (C) 2012 by Imagination Technologies Ltd. +! +! 64-bit arithmetic shift right routine. +! + + .text + .global ___ashrdi3 + .type ___ashrdi3,function + +___ashrdi3: + MOV D0Re0,D0Ar2 + MOV D1Re0,D1Ar1 + CMP D1Ar3,#0 ! COUNT == 0 + MOVEQ PC,D1RtP ! Yes, return + + MOV D0Ar4,D1Ar3 + SUBS D1Ar3,D1Ar3,#32 ! N = COUNT - 32 + BGE $L20 + +!! Shift < 32 + NEG D1Ar3,D1Ar3 ! N = - N + LSR D0Re0,D0Re0,D0Ar4 ! LO = LO >> COUNT + LSL D0Ar6,D1Re0,D1Ar3 ! TMP= HI << -(COUNT - 32) + OR D0Re0,D0Re0,D0Ar6 ! LO = LO | TMP + SWAP D1Ar3,D0Ar4 + ASR D1Re0,D1Re0,D1Ar3 ! HI = HI >> COUNT + MOV PC,D1RtP +$L20: +!! Shift >= 32 + ASR D0Re0,D1Re0,D1Ar3 ! LO = HI >> N + ASR D1Re0,D1Re0,#31 ! HI = HI >> 31 + MOV PC,D1RtP + .size ___ashrdi3,.-___ashrdi3 diff --git a/arch/metag/lib/checksum.c b/arch/metag/lib/checksum.c new file mode 100644 index 000000000000..44d2e1913560 --- /dev/null +++ b/arch/metag/lib/checksum.c @@ -0,0 +1,168 @@ +/* + * + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * IP/TCP/UDP checksumming routines + * + * Authors: Jorge Cwik, <jorge@laser.satlink.net> + * Arnt Gulbrandsen, <agulbra@nvg.unit.no> + * Tom May, <ftom@netcom.com> + * Andreas Schwab, <schwab@issan.informatik.uni-dortmund.de> + * Lots of code moved from tcp.c and ip.c; see those files + * for more names. + * + * 03/02/96 Jes Sorensen, Andreas Schwab, Roman Hodek: + * Fixed some nasty bugs, causing some horrible crashes. + * A: At some points, the sum (%0) was used as + * length-counter instead of the length counter + * (%1). Thanks to Roman Hodek for pointing this out. + * B: GCC seems to mess up if one uses too many + * data-registers to hold input values and one tries to + * specify d0 and d1 as scratch registers. Letting gcc + * choose these registers itself solves the problem. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +/* Revised by Kenneth Albanowski for m68knommu. Basic problem: unaligned access + kills, so most of the assembly has to go. */ + +#include <linux/module.h> +#include <net/checksum.h> + +#include <asm/byteorder.h> + +static inline unsigned short from32to16(unsigned int x) +{ + /* add up 16-bit and 16-bit for 16+c bit */ + x = (x & 0xffff) + (x >> 16); + /* add up carry.. */ + x = (x & 0xffff) + (x >> 16); + return x; +} + +static unsigned int do_csum(const unsigned char *buff, int len) +{ + int odd; + unsigned int result = 0; + + if (len <= 0) + goto out; + odd = 1 & (unsigned long) buff; + if (odd) { +#ifdef __LITTLE_ENDIAN + result += (*buff << 8); +#else + result = *buff; +#endif + len--; + buff++; + } + if (len >= 2) { + if (2 & (unsigned long) buff) { + result += *(unsigned short *) buff; + len -= 2; + buff += 2; + } + if (len >= 4) { + const unsigned char *end = buff + ((unsigned)len & ~3); + unsigned int carry = 0; + do { + unsigned int w = *(unsigned int *) buff; + buff += 4; + result += carry; + result += w; + carry = (w > result); + } while (buff < end); + result += carry; + result = (result & 0xffff) + (result >> 16); + } + if (len & 2) { + result += *(unsigned short *) buff; + buff += 2; + } + } + if (len & 1) +#ifdef __LITTLE_ENDIAN + result += *buff; +#else + result += (*buff << 8); +#endif + result = from32to16(result); + if (odd) + result = ((result >> 8) & 0xff) | ((result & 0xff) << 8); +out: + return result; +} +EXPORT_SYMBOL(ip_fast_csum); + +/* + * computes the checksum of a memory block at buff, length len, + * and adds in "sum" (32-bit) + * + * returns a 32-bit number suitable for feeding into itself + * or csum_tcpudp_magic + * + * this function must be called with even lengths, except + * for the last fragment, which may be odd + * + * it's best to have buff aligned on a 32-bit boundary + */ +__wsum csum_partial(const void *buff, int len, __wsum wsum) +{ + unsigned int sum = (__force unsigned int)wsum; + unsigned int result = do_csum(buff, len); + + /* add in old sum, and carry.. */ + result += sum; + if (sum > result) + result += 1; + return (__force __wsum)result; +} +EXPORT_SYMBOL(csum_partial); + +/* + * this routine is used for miscellaneous IP-like checksums, mainly + * in icmp.c + */ +__sum16 ip_compute_csum(const void *buff, int len) +{ + return (__force __sum16)~do_csum(buff, len); +} +EXPORT_SYMBOL(ip_compute_csum); + +/* + * copy from fs while checksumming, otherwise like csum_partial + */ +__wsum +csum_partial_copy_from_user(const void __user *src, void *dst, int len, + __wsum sum, int *csum_err) +{ + int missing; + + missing = __copy_from_user(dst, src, len); + if (missing) { + memset(dst + len - missing, 0, missing); + *csum_err = -EFAULT; + } else + *csum_err = 0; + + return csum_partial(dst, len, sum); +} +EXPORT_SYMBOL(csum_partial_copy_from_user); + +/* + * copy from ds while checksumming, otherwise like csum_partial + */ +__wsum +csum_partial_copy(const void *src, void *dst, int len, __wsum sum) +{ + memcpy(dst, src, len); + return csum_partial(dst, len, sum); +} +EXPORT_SYMBOL(csum_partial_copy); diff --git a/arch/metag/lib/clear_page.S b/arch/metag/lib/clear_page.S new file mode 100644 index 000000000000..43144eebec55 --- /dev/null +++ b/arch/metag/lib/clear_page.S @@ -0,0 +1,17 @@ + ! Copyright 2007,2008,2009 Imagination Technologies Ltd. + +#include <asm/page.h> + + .text + .global _clear_page + .type _clear_page,function + !! D1Ar1 - page +_clear_page: + MOV TXRPT,#((PAGE_SIZE / 8) - 1) + MOV D0Re0,#0 + MOV D1Re0,#0 +$Lclear_page_loop: + SETL [D1Ar1++],D0Re0,D1Re0 + BR $Lclear_page_loop + MOV PC,D1RtP + .size _clear_page,.-_clear_page diff --git a/arch/metag/lib/cmpdi2.S b/arch/metag/lib/cmpdi2.S new file mode 100644 index 000000000000..9c5c663c5aea --- /dev/null +++ b/arch/metag/lib/cmpdi2.S @@ -0,0 +1,32 @@ +! Copyright (C) 2012 by Imagination Technologies Ltd. +! +! 64-bit signed compare routine. +! + + .text + .global ___cmpdi2 + .type ___cmpdi2,function + +! low high +! s64 a (D0Ar2, D1Ar1) +! s64 b (D0Ar4, D1Ar3) +___cmpdi2: + ! start at 1 (equal) and conditionally increment or decrement + MOV D0Re0,#1 + + ! high words differ? + CMP D1Ar1,D1Ar3 + BNE $Lhigh_differ + + ! unsigned compare low words + CMP D0Ar2,D0Ar4 + SUBLO D0Re0,D0Re0,#1 + ADDHI D0Re0,D0Re0,#1 + MOV PC,D1RtP + +$Lhigh_differ: + ! signed compare high words + SUBLT D0Re0,D0Re0,#1 + ADDGT D0Re0,D0Re0,#1 + MOV PC,D1RtP + .size ___cmpdi2,.-___cmpdi2 diff --git a/arch/metag/lib/copy_page.S b/arch/metag/lib/copy_page.S new file mode 100644 index 000000000000..91f7d461239c --- /dev/null +++ b/arch/metag/lib/copy_page.S @@ -0,0 +1,20 @@ + ! Copyright 2007,2008 Imagination Technologies Ltd. + +#include <asm/page.h> + + .text + .global _copy_page + .type _copy_page,function + !! D1Ar1 - to + !! D0Ar2 - from +_copy_page: + MOV D0FrT,#PAGE_SIZE +$Lcopy_page_loop: + GETL D0Re0,D1Re0,[D0Ar2++] + GETL D0Ar6,D1Ar5,[D0Ar2++] + SETL [D1Ar1++],D0Re0,D1Re0 + SETL [D1Ar1++],D0Ar6,D1Ar5 + SUBS D0FrT,D0FrT,#16 + BNZ $Lcopy_page_loop + MOV PC,D1RtP + .size _copy_page,.-_copy_page diff --git a/arch/metag/lib/delay.c b/arch/metag/lib/delay.c new file mode 100644 index 000000000000..0b308f48b37a --- /dev/null +++ b/arch/metag/lib/delay.c @@ -0,0 +1,56 @@ +/* + * Precise Delay Loops for Meta + * + * Copyright (C) 1993 Linus Torvalds + * Copyright (C) 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz> + * Copyright (C) 2007,2009 Imagination Technologies Ltd. + * + */ + +#include <linux/export.h> +#include <linux/sched.h> +#include <linux/delay.h> + +#include <asm/core_reg.h> +#include <asm/processor.h> + +/* + * TXTACTCYC is only 24 bits, so on chips with fast clocks it will wrap + * many times per-second. If it does wrap __delay will return prematurely, + * but this is only likely with large delay values. + * + * We also can't implement read_current_timer() with TXTACTCYC due to + * this wrapping behaviour. + */ +#define rdtimer(t) t = __core_reg_get(TXTACTCYC) + +void __delay(unsigned long loops) +{ + unsigned long bclock, now; + + rdtimer(bclock); + do { + asm("NOP"); + rdtimer(now); + } while ((now-bclock) < loops); +} +EXPORT_SYMBOL(__delay); + +inline void __const_udelay(unsigned long xloops) +{ + u64 loops = (u64)xloops * (u64)loops_per_jiffy * HZ; + __delay(loops >> 32); +} +EXPORT_SYMBOL(__const_udelay); + +void __udelay(unsigned long usecs) +{ + __const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */ +} +EXPORT_SYMBOL(__udelay); + +void __ndelay(unsigned long nsecs) +{ + __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */ +} +EXPORT_SYMBOL(__ndelay); diff --git a/arch/metag/lib/div64.S b/arch/metag/lib/div64.S new file mode 100644 index 000000000000..1cfc93498f70 --- /dev/null +++ b/arch/metag/lib/div64.S @@ -0,0 +1,108 @@ +! Copyright (C) 2012 Imagination Technologies Ltd. +! +! Signed/unsigned 64-bit division routines. +! + + .text + .global _div_u64 + .type _div_u64,function + +_div_u64: +$L1: + ORS A0.3,D1Ar3,D0Ar4 + BNE $L3 +$L2: + MOV D0Re0,D0Ar2 + MOV D1Re0,D1Ar1 + MOV PC,D1RtP +$L3: + CMP D1Ar3,D1Ar1 + CMPEQ D0Ar4,D0Ar2 + MOV D0Re0,#1 + MOV D1Re0,#0 + BHS $L6 +$L4: + ADDS D0Ar6,D0Ar4,D0Ar4 + ADD D1Ar5,D1Ar3,D1Ar3 + ADDCS D1Ar5,D1Ar5,#1 + CMP D1Ar5,D1Ar3 + CMPEQ D0Ar6,D0Ar4 + BLO $L6 +$L5: + MOV D0Ar4,D0Ar6 + MOV D1Ar3,D1Ar5 + ADDS D0Re0,D0Re0,D0Re0 + ADD D1Re0,D1Re0,D1Re0 + ADDCS D1Re0,D1Re0,#1 + CMP D1Ar3,D1Ar1 + CMPEQ D0Ar4,D0Ar2 + BLO $L4 +$L6: + ORS A0.3,D1Re0,D0Re0 + MOV D0Ar6,#0 + MOV D1Ar5,D0Ar6 + BEQ $L10 +$L7: + CMP D1Ar1,D1Ar3 + CMPEQ D0Ar2,D0Ar4 + BLO $L9 +$L8: + ADDS D0Ar6,D0Ar6,D0Re0 + ADD D1Ar5,D1Ar5,D1Re0 + ADDCS D1Ar5,D1Ar5,#1 + + SUBS D0Ar2,D0Ar2,D0Ar4 + SUB D1Ar1,D1Ar1,D1Ar3 + SUBCS D1Ar1,D1Ar1,#1 +$L9: + LSL A0.3,D1Re0,#31 + LSR D0Re0,D0Re0,#1 + LSR D1Re0,D1Re0,#1 + OR D0Re0,D0Re0,A0.3 + LSL A0.3,D1Ar3,#31 + LSR D0Ar4,D0Ar4,#1 + LSR D1Ar3,D1Ar3,#1 + OR D0Ar4,D0Ar4,A0.3 + ORS A0.3,D1Re0,D0Re0 + BNE $L7 +$L10: + MOV D0Re0,D0Ar6 + MOV D1Re0,D1Ar5 + MOV PC,D1RtP + .size _div_u64,.-_div_u64 + + .text + .global _div_s64 + .type _div_s64,function +_div_s64: + MSETL [A0StP],D0FrT,D0.5 + XOR D0.5,D0Ar2,D0Ar4 + XOR D1.5,D1Ar1,D1Ar3 + TSTT D1Ar1,#HI(0x80000000) + BZ $L25 + + NEGS D0Ar2,D0Ar2 + NEG D1Ar1,D1Ar1 + SUBCS D1Ar1,D1Ar1,#1 +$L25: + TSTT D1Ar3,#HI(0x80000000) + BZ $L27 + + NEGS D0Ar4,D0Ar4 + NEG D1Ar3,D1Ar3 + SUBCS D1Ar3,D1Ar3,#1 +$L27: + CALLR D1RtP,_div_u64 + TSTT D1.5,#HI(0x80000000) + BZ $L29 + + NEGS D0Re0,D0Re0 + NEG D1Re0,D1Re0 + SUBCS D1Re0,D1Re0,#1 +$L29: + + GETL D0FrT,D1RtP,[A0StP+#(-16)] + GETL D0.5,D1.5,[A0StP+#(-8)] + SUB A0StP,A0StP,#16 + MOV PC,D1RtP + .size _div_s64,.-_div_s64 diff --git a/arch/metag/lib/divsi3.S b/arch/metag/lib/divsi3.S new file mode 100644 index 000000000000..7c8a8ae9a0a1 --- /dev/null +++ b/arch/metag/lib/divsi3.S @@ -0,0 +1,100 @@ +! Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007 +! Imagination Technologies Ltd +! +! Integer divide routines. +! + + .text + .global ___udivsi3 + .type ___udivsi3,function + .align 2 +___udivsi3: +!! +!! Since core is signed divide case, just set control variable +!! + MOV D1Re0,D0Ar2 ! Au already in A1Ar1, Bu -> D1Re0 + MOV D0Re0,#0 ! Result is 0 + MOV D0Ar4,#0 ! Return positive result + B $LIDMCUStart + .size ___udivsi3,.-___udivsi3 + +!! +!! 32-bit division signed i/p - passed signed 32-bit numbers +!! + .global ___divsi3 + .type ___divsi3,function + .align 2 +___divsi3: +!! +!! A already in D1Ar1, B already in D0Ar2 -> make B abs(B) +!! + MOV D1Re0,D0Ar2 ! A already in A1Ar1, B -> D1Re0 + MOV D0Re0,#0 ! Result is 0 + XOR D0Ar4,D1Ar1,D1Re0 ! D0Ar4 -ive if result is -ive + ABS D1Ar1,D1Ar1 ! abs(A) -> Au + ABS D1Re0,D1Re0 ! abs(B) -> Bu +$LIDMCUStart: + CMP D1Ar1,D1Re0 ! Is ( Au > Bu )? + LSR D1Ar3,D1Ar1,#2 ! Calculate (Au & (~3)) >> 2 + CMPHI D1Re0,D1Ar3 ! OR ( (Au & (~3)) <= (Bu << 2) )? + LSLSHI D1Ar3,D1Re0,#1 ! Buq = Bu << 1 + BLS $LIDMCUSetup ! Yes: Do normal divide +!! +!! Quick divide setup can assume that CurBit only needs to start at 2 +!! +$LIDMCQuick: + CMP D1Ar1,D1Ar3 ! ( A >= Buq )? + ADDCC D0Re0,D0Re0,#2 ! If yes result += 2 + SUBCC D1Ar1,D1Ar1,D1Ar3 ! and A -= Buq + CMP D1Ar1,D1Re0 ! ( A >= Bu )? + ADDCC D0Re0,D0Re0,#1 ! If yes result += 1 + SUBCC D1Ar1,D1Ar1,D1Re0 ! and A -= Bu + ORS D0Ar4,D0Ar4,D0Ar4 ! Return neg result? + NEG D0Ar2,D0Re0 ! Calulate neg result + MOVMI D0Re0,D0Ar2 ! Yes: Take neg result +$LIDMCRet: + MOV PC,D1RtP +!! +!! Setup for general unsigned divide code +!! +!! D0Re0 is used to form the result, already set to Zero +!! D1Re0 is the input Bu value, this gets trashed +!! D0Ar6 is curbit which is set to 1 at the start and shifted up +!! D0Ar4 is negative if we should return a negative result +!! D1Ar1 is the input Au value, eventually this holds the remainder +!! +$LIDMCUSetup: + CMP D1Ar1,D1Re0 ! Is ( Au < Bu )? + MOV D0Ar6,#1 ! Set curbit to 1 + BCS $LIDMCRet ! Yes: Return 0 remainder Au +!! +!! Calculate alignment using FFB instruction +!! + FFB D1Ar5,D1Ar1 ! Find first bit of Au + ANDN D1Ar5,D1Ar5,#31 ! Handle exceptional case. + ORN D1Ar5,D1Ar5,#31 ! if N bit set, set to 31 + FFB D1Ar3,D1Re0 ! Find first bit of Bu + ANDN D1Ar3,D1Ar3,#31 ! Handle exceptional case. + ORN D1Ar3,D1Ar3,#31 ! if N bit set, set to 31 + SUBS D1Ar3,D1Ar5,D1Ar3 ! calculate diff, ffbA - ffbB + MOV D0Ar2,D1Ar3 ! copy into bank 0 + LSLGT D1Re0,D1Re0,D1Ar3 ! ( > 0) ? left shift B + LSLGT D0Ar6,D0Ar6,D0Ar2 ! ( > 0) ? left shift curbit +!! +!! Now we start the divide proper, logic is +!! +!! if ( A >= B ) add curbit to result and subtract B from A +!! shift curbit and B down by 1 in either case +!! +$LIDMCLoop: + CMP D1Ar1, D1Re0 ! ( A >= B )? + ADDCC D0Re0, D0Re0, D0Ar6 ! If yes result += curbit + SUBCC D1Ar1, D1Ar1, D1Re0 ! and A -= B + LSRS D0Ar6, D0Ar6, #1 ! Shift down curbit, is it zero? + LSR D1Re0, D1Re0, #1 ! Shift down B + BNZ $LIDMCLoop ! Was single bit in curbit lost? + ORS D0Ar4,D0Ar4,D0Ar4 ! Return neg result? + NEG D0Ar2,D0Re0 ! Calulate neg result + MOVMI D0Re0,D0Ar2 ! Yes: Take neg result + MOV PC,D1RtP + .size ___divsi3,.-___divsi3 diff --git a/arch/metag/lib/ip_fast_csum.S b/arch/metag/lib/ip_fast_csum.S new file mode 100644 index 000000000000..533b1e73deac --- /dev/null +++ b/arch/metag/lib/ip_fast_csum.S @@ -0,0 +1,32 @@ + + .text +/* + * This is a version of ip_compute_csum() optimized for IP headers, + * which always checksum on 4 octet boundaries. + * + * extern __sum16 ip_fast_csum(const void *iph, unsigned int ihl); + * + */ + .global _ip_fast_csum + .type _ip_fast_csum,function +_ip_fast_csum: + !! TXRPT needs loops - 1 + SUBS TXRPT,D0Ar2,#1 + MOV D0Re0,#0 + BLO $Lfast_csum_exit +$Lfast_csum_loop: + GETD D1Ar3,[D1Ar1++] + ADDS D0Re0,D0Re0,D1Ar3 + ADDCS D0Re0,D0Re0,#1 + BR $Lfast_csum_loop + LSR D0Ar4,D0Re0,#16 + AND D0Re0,D0Re0,#0xffff + AND D0Ar4,D0Ar4,#0xffff + ADD D0Re0,D0Re0,D0Ar4 + LSR D0Ar4,D0Re0,#16 + ADD D0Re0,D0Re0,D0Ar4 + XOR D0Re0,D0Re0,#-1 + AND D0Re0,D0Re0,#0xffff +$Lfast_csum_exit: + MOV PC,D1RtP + .size _ip_fast_csum,.-_ip_fast_csum diff --git a/arch/metag/lib/lshrdi3.S b/arch/metag/lib/lshrdi3.S new file mode 100644 index 000000000000..47f720283077 --- /dev/null +++ b/arch/metag/lib/lshrdi3.S @@ -0,0 +1,33 @@ +! Copyright (C) 2012 by Imagination Technologies Ltd. +! +! 64-bit logical shift right routine. +! + + .text + .global ___lshrdi3 + .type ___lshrdi3,function + +___lshrdi3: + MOV D0Re0,D0Ar2 + MOV D1Re0,D1Ar1 + CMP D1Ar3,#0 ! COUNT == 0 + MOVEQ PC,D1RtP ! Yes, return + + MOV D0Ar4,D1Ar3 + SUBS D1Ar3,D1Ar3,#32 ! N = COUNT - 32 + BGE $L30 + +!! Shift < 32 + NEG D1Ar3,D1Ar3 ! N = - N + LSR D0Re0,D0Re0,D0Ar4 ! LO = LO >> COUNT + LSL D0Ar6,D1Re0,D1Ar3 ! TMP= HI << -(COUNT - 32) + OR D0Re0,D0Re0,D0Ar6 ! LO = LO | TMP + SWAP D1Ar3,D0Ar4 + LSR D1Re0,D1Re0,D1Ar3 ! HI = HI >> COUNT + MOV PC,D1RtP +$L30: +!! Shift >= 32 + LSR D0Re0,D1Re0,D1Ar3 ! LO = HI >> N + MOV D1Re0,#0 ! HI = 0 + MOV PC,D1RtP + .size ___lshrdi3,.-___lshrdi3 diff --git a/arch/metag/lib/memcpy.S b/arch/metag/lib/memcpy.S new file mode 100644 index 000000000000..46b7a2b9479e --- /dev/null +++ b/arch/metag/lib/memcpy.S @@ -0,0 +1,185 @@ +! Copyright (C) 2008-2012 Imagination Technologies Ltd. + + .text + .global _memcpy + .type _memcpy,function +! D1Ar1 dst +! D0Ar2 src +! D1Ar3 cnt +! D0Re0 dst +_memcpy: + CMP D1Ar3, #16 + MOV A1.2, D0Ar2 ! source pointer + MOV A0.2, D1Ar1 ! destination pointer + MOV A0.3, D1Ar1 ! for return value +! If there are less than 16 bytes to copy use the byte copy loop + BGE $Llong_copy + +$Lbyte_copy: +! Simply copy a byte at a time + SUBS TXRPT, D1Ar3, #1 + BLT $Lend +$Lloop_byte: + GETB D1Re0, [A1.2++] + SETB [A0.2++], D1Re0 + BR $Lloop_byte + +$Lend: +! Finally set return value and return + MOV D0Re0, A0.3 + MOV PC, D1RtP + +$Llong_copy: + ANDS D1Ar5, D1Ar1, #7 ! test destination alignment + BZ $Laligned_dst + +! The destination address is not 8 byte aligned. We will copy bytes from +! the source to the destination until the remaining data has an 8 byte +! destination address alignment (i.e we should never copy more than 7 +! bytes here). +$Lalign_dst: + GETB D0Re0, [A1.2++] + ADD D1Ar5, D1Ar5, #1 ! dest is aligned when D1Ar5 reaches #8 + SUB D1Ar3, D1Ar3, #1 ! decrement count of remaining bytes + SETB [A0.2++], D0Re0 + CMP D1Ar5, #8 + BNE $Lalign_dst + +! We have at least (16 - 7) = 9 bytes to copy - calculate the number of 8 byte +! blocks, then jump to the unaligned copy loop or fall through to the aligned +! copy loop as appropriate. +$Laligned_dst: + MOV D0Ar4, A1.2 + LSR D1Ar5, D1Ar3, #3 ! D1Ar5 = number of 8 byte blocks + ANDS D0Ar4, D0Ar4, #7 ! test source alignment + BNZ $Lunaligned_copy ! if unaligned, use unaligned copy loop + +! Both source and destination are 8 byte aligned - the easy case. +$Laligned_copy: + LSRS D1Ar5, D1Ar3, #5 ! D1Ar5 = number of 32 byte blocks + BZ $Lbyte_copy + SUB TXRPT, D1Ar5, #1 + +$Laligned_32: + GETL D0Re0, D1Re0, [A1.2++] + GETL D0Ar6, D1Ar5, [A1.2++] + SETL [A0.2++], D0Re0, D1Re0 + SETL [A0.2++], D0Ar6, D1Ar5 + GETL D0Re0, D1Re0, [A1.2++] + GETL D0Ar6, D1Ar5, [A1.2++] + SETL [A0.2++], D0Re0, D1Re0 + SETL [A0.2++], D0Ar6, D1Ar5 + BR $Laligned_32 + +! If there are any remaining bytes use the byte copy loop, otherwise we are done + ANDS D1Ar3, D1Ar3, #0x1f + BNZ $Lbyte_copy + B $Lend + +! The destination is 8 byte aligned but the source is not, and there are 8 +! or more bytes to be copied. +$Lunaligned_copy: +! Adjust the source pointer (A1.2) to the 8 byte boundary before its +! current value + MOV D0Ar4, A1.2 + MOV D0Ar6, A1.2 + ANDMB D0Ar4, D0Ar4, #0xfff8 + MOV A1.2, D0Ar4 +! Save the number of bytes of mis-alignment in D0Ar4 for use later + SUBS D0Ar6, D0Ar6, D0Ar4 + MOV D0Ar4, D0Ar6 +! if there is no mis-alignment after all, use the aligned copy loop + BZ $Laligned_copy + +! prefetch 8 bytes + GETL D0Re0, D1Re0, [A1.2] + + SUB TXRPT, D1Ar5, #1 + +! There are 3 mis-alignment cases to be considered. Less than 4 bytes, exactly +! 4 bytes, and more than 4 bytes. + CMP D0Ar6, #4 + BLT $Lunaligned_1_2_3 ! use 1-3 byte mis-alignment loop + BZ $Lunaligned_4 ! use 4 byte mis-alignment loop + +! The mis-alignment is more than 4 bytes +$Lunaligned_5_6_7: + SUB D0Ar6, D0Ar6, #4 +! Calculate the bit offsets required for the shift operations necesssary +! to align the data. +! D0Ar6 = bit offset, D1Ar5 = (32 - bit offset) + MULW D0Ar6, D0Ar6, #8 + MOV D1Ar5, #32 + SUB D1Ar5, D1Ar5, D0Ar6 +! Move data 4 bytes before we enter the main loop + MOV D0Re0, D1Re0 + +$Lloop_5_6_7: + GETL D0Ar2, D1Ar1, [++A1.2] +! form 64-bit data in D0Re0, D1Re0 + LSR D0Re0, D0Re0, D0Ar6 + MOV D1Re0, D0Ar2 + LSL D1Re0, D1Re0, D1Ar5 + ADD D0Re0, D0Re0, D1Re0 + + LSR D0Ar2, D0Ar2, D0Ar6 + LSL D1Re0, D1Ar1, D1Ar5 + ADD D1Re0, D1Re0, D0Ar2 + + SETL [A0.2++], D0Re0, D1Re0 + MOV D0Re0, D1Ar1 + BR $Lloop_5_6_7 + + B $Lunaligned_end + +$Lunaligned_1_2_3: +! Calculate the bit offsets required for the shift operations necesssary +! to align the data. +! D0Ar6 = bit offset, D1Ar5 = (32 - bit offset) + MULW D0Ar6, D0Ar6, #8 + MOV D1Ar5, #32 + SUB D1Ar5, D1Ar5, D0Ar6 + +$Lloop_1_2_3: +! form 64-bit data in D0Re0,D1Re0 + LSR D0Re0, D0Re0, D0Ar6 + LSL D1Ar1, D1Re0, D1Ar5 + ADD D0Re0, D0Re0, D1Ar1 + MOV D0Ar2, D1Re0 + LSR D0FrT, D0Ar2, D0Ar6 + GETL D0Ar2, D1Ar1, [++A1.2] + + MOV D1Re0, D0Ar2 + LSL D1Re0, D1Re0, D1Ar5 + ADD D1Re0, D1Re0, D0FrT + + SETL [A0.2++], D0Re0, D1Re0 + MOV D0Re0, D0Ar2 + MOV D1Re0, D1Ar1 + BR $Lloop_1_2_3 + + B $Lunaligned_end + +! The 4 byte mis-alignment case - this does not require any shifting, just a +! shuffling of registers. +$Lunaligned_4: + MOV D0Re0, D1Re0 +$Lloop_4: + GETL D0Ar2, D1Ar1, [++A1.2] + MOV D1Re0, D0Ar2 + SETL [A0.2++], D0Re0, D1Re0 + MOV D0Re0, D1Ar1 + BR $Lloop_4 + +$Lunaligned_end: +! If there are no remaining bytes to copy, we are done. + ANDS D1Ar3, D1Ar3, #7 + BZ $Lend +! Re-adjust the source pointer (A1.2) back to the actual (unaligned) byte +! address of the remaining bytes, and fall through to the byte copy loop. + MOV D0Ar6, A1.2 + ADD D1Ar5, D0Ar4, D0Ar6 + MOV A1.2, D1Ar5 + B $Lbyte_copy + + .size _memcpy,.-_memcpy diff --git a/arch/metag/lib/memmove.S b/arch/metag/lib/memmove.S new file mode 100644 index 000000000000..228ea04d7b39 --- /dev/null +++ b/arch/metag/lib/memmove.S @@ -0,0 +1,345 @@ +! Copyright (C) 2008-2012 Imagination Technologies Ltd. + + .text + .global _memmove + .type _memmove,function +! D1Ar1 dst +! D0Ar2 src +! D1Ar3 cnt +! D0Re0 dst +_memmove: + CMP D1Ar3, #0 + MOV D0Re0, D1Ar1 + BZ $LEND2 + MSETL [A0StP], D0.5, D0.6, D0.7 + MOV D1Ar5, D0Ar2 + CMP D1Ar1, D1Ar5 + BLT $Lforwards_copy + SUB D0Ar4, D1Ar1, D1Ar3 + ADD D0Ar4, D0Ar4, #1 + CMP D0Ar2, D0Ar4 + BLT $Lforwards_copy + ! should copy backwards + MOV D1Re0, D0Ar2 + ! adjust pointer to the end of mem + ADD D0Ar2, D1Re0, D1Ar3 + ADD D1Ar1, D1Ar1, D1Ar3 + + MOV A1.2, D0Ar2 + MOV A0.2, D1Ar1 + CMP D1Ar3, #8 + BLT $Lbbyte_loop + + MOV D0Ar4, D0Ar2 + MOV D1Ar5, D1Ar1 + + ! test 8 byte alignment + ANDS D1Ar5, D1Ar5, #7 + BNE $Lbdest_unaligned + + ANDS D0Ar4, D0Ar4, #7 + BNE $Lbsrc_unaligned + + LSR D1Ar5, D1Ar3, #3 + +$Lbaligned_loop: + GETL D0Re0, D1Re0, [--A1.2] + SETL [--A0.2], D0Re0, D1Re0 + SUBS D1Ar5, D1Ar5, #1 + BNE $Lbaligned_loop + + ANDS D1Ar3, D1Ar3, #7 + BZ $Lbbyte_loop_exit +$Lbbyte_loop: + GETB D1Re0, [--A1.2] + SETB [--A0.2], D1Re0 + SUBS D1Ar3, D1Ar3, #1 + BNE $Lbbyte_loop +$Lbbyte_loop_exit: + MOV D0Re0, A0.2 +$LEND: + SUB A0.2, A0StP, #24 + MGETL D0.5, D0.6, D0.7, [A0.2] + SUB A0StP, A0StP, #24 +$LEND2: + MOV PC, D1RtP + +$Lbdest_unaligned: + GETB D0Re0, [--A1.2] + SETB [--A0.2], D0Re0 + SUBS D1Ar5, D1Ar5, #1 + SUB D1Ar3, D1Ar3, #1 + BNE $Lbdest_unaligned + CMP D1Ar3, #8 + BLT $Lbbyte_loop +$Lbsrc_unaligned: + LSR D1Ar5, D1Ar3, #3 + ! adjust A1.2 + MOV D0Ar4, A1.2 + ! save original address + MOV D0Ar6, A1.2 + + ADD D0Ar4, D0Ar4, #7 + ANDMB D0Ar4, D0Ar4, #0xfff8 + ! new address is the 8-byte aligned one above the original + MOV A1.2, D0Ar4 + + ! A0.2 dst 64-bit is aligned + ! measure the gap size + SUB D0Ar6, D0Ar4, D0Ar6 + MOVS D0Ar4, D0Ar6 + ! keep this information for the later adjustment + ! both aligned + BZ $Lbaligned_loop + + ! prefetch + GETL D0Re0, D1Re0, [--A1.2] + + CMP D0Ar6, #4 + BLT $Lbunaligned_1_2_3 + ! 32-bit aligned + BZ $Lbaligned_4 + + SUB D0Ar6, D0Ar6, #4 + ! D1.6 stores the gap size in bits + MULW D1.6, D0Ar6, #8 + MOV D0.6, #32 + ! D0.6 stores the complement of the gap size + SUB D0.6, D0.6, D1.6 + +$Lbunaligned_5_6_7: + GETL D0.7, D1.7, [--A1.2] + ! form 64-bit data in D0Re0, D1Re0 + MOV D1Re0, D0Re0 + ! D1Re0 << gap-size + LSL D1Re0, D1Re0, D1.6 + MOV D0Re0, D1.7 + ! D0Re0 >> complement + LSR D0Re0, D0Re0, D0.6 + MOV D1.5, D0Re0 + ! combine the both + ADD D1Re0, D1Re0, D1.5 + + MOV D1.5, D1.7 + LSL D1.5, D1.5, D1.6 + MOV D0Re0, D0.7 + LSR D0Re0, D0Re0, D0.6 + MOV D0.5, D1.5 + ADD D0Re0, D0Re0, D0.5 + + SETL [--A0.2], D0Re0, D1Re0 + MOV D0Re0, D0.7 + MOV D1Re0, D1.7 + SUBS D1Ar5, D1Ar5, #1 + BNE $Lbunaligned_5_6_7 + + ANDS D1Ar3, D1Ar3, #7 + BZ $Lbbyte_loop_exit + ! Adjust A1.2 + ! A1.2 <- A1.2 +8 - gapsize + ADD A1.2, A1.2, #8 + SUB A1.2, A1.2, D0Ar4 + B $Lbbyte_loop + +$Lbunaligned_1_2_3: + MULW D1.6, D0Ar6, #8 + MOV D0.6, #32 + SUB D0.6, D0.6, D1.6 + +$Lbunaligned_1_2_3_loop: + GETL D0.7, D1.7, [--A1.2] + ! form 64-bit data in D0Re0, D1Re0 + LSL D1Re0, D1Re0, D1.6 + ! save D0Re0 for later use + MOV D0.5, D0Re0 + LSR D0Re0, D0Re0, D0.6 + MOV D1.5, D0Re0 + ADD D1Re0, D1Re0, D1.5 + + ! orignal data in D0Re0 + MOV D1.5, D0.5 + LSL D1.5, D1.5, D1.6 + MOV D0Re0, D1.7 + LSR D0Re0, D0Re0, D0.6 + MOV D0.5, D1.5 + ADD D0Re0, D0Re0, D0.5 + + SETL [--A0.2], D0Re0, D1Re0 + MOV D0Re0, D0.7 + MOV D1Re0, D1.7 + SUBS D1Ar5, D1Ar5, #1 + BNE $Lbunaligned_1_2_3_loop + + ANDS D1Ar3, D1Ar3, #7 + BZ $Lbbyte_loop_exit + ! Adjust A1.2 + ADD A1.2, A1.2, #8 + SUB A1.2, A1.2, D0Ar4 + B $Lbbyte_loop + +$Lbaligned_4: + GETL D0.7, D1.7, [--A1.2] + MOV D1Re0, D0Re0 + MOV D0Re0, D1.7 + SETL [--A0.2], D0Re0, D1Re0 + MOV D0Re0, D0.7 + MOV D1Re0, D1.7 + SUBS D1Ar5, D1Ar5, #1 + BNE $Lbaligned_4 + ANDS D1Ar3, D1Ar3, #7 + BZ $Lbbyte_loop_exit + ! Adjust A1.2 + ADD A1.2, A1.2, #8 + SUB A1.2, A1.2, D0Ar4 + B $Lbbyte_loop + +$Lforwards_copy: + MOV A1.2, D0Ar2 + MOV A0.2, D1Ar1 + CMP D1Ar3, #8 + BLT $Lfbyte_loop + + MOV D0Ar4, D0Ar2 + MOV D1Ar5, D1Ar1 + + ANDS D1Ar5, D1Ar5, #7 + BNE $Lfdest_unaligned + + ANDS D0Ar4, D0Ar4, #7 + BNE $Lfsrc_unaligned + + LSR D1Ar5, D1Ar3, #3 + +$Lfaligned_loop: + GETL D0Re0, D1Re0, [A1.2++] + SUBS D1Ar5, D1Ar5, #1 + SETL [A0.2++], D0Re0, D1Re0 + BNE $Lfaligned_loop + + ANDS D1Ar3, D1Ar3, #7 + BZ $Lfbyte_loop_exit +$Lfbyte_loop: + GETB D1Re0, [A1.2++] + SETB [A0.2++], D1Re0 + SUBS D1Ar3, D1Ar3, #1 + BNE $Lfbyte_loop +$Lfbyte_loop_exit: + MOV D0Re0, D1Ar1 + B $LEND + +$Lfdest_unaligned: + GETB D0Re0, [A1.2++] + ADD D1Ar5, D1Ar5, #1 + SUB D1Ar3, D1Ar3, #1 + SETB [A0.2++], D0Re0 + CMP D1Ar5, #8 + BNE $Lfdest_unaligned + CMP D1Ar3, #8 + BLT $Lfbyte_loop +$Lfsrc_unaligned: + ! adjust A1.2 + LSR D1Ar5, D1Ar3, #3 + + MOV D0Ar4, A1.2 + MOV D0Ar6, A1.2 + ANDMB D0Ar4, D0Ar4, #0xfff8 + MOV A1.2, D0Ar4 + + ! A0.2 dst 64-bit is aligned + SUB D0Ar6, D0Ar6, D0Ar4 + ! keep the information for the later adjustment + MOVS D0Ar4, D0Ar6 + + ! both aligned + BZ $Lfaligned_loop + + ! prefetch + GETL D0Re0, D1Re0, [A1.2] + + CMP D0Ar6, #4 + BLT $Lfunaligned_1_2_3 + BZ $Lfaligned_4 + + SUB D0Ar6, D0Ar6, #4 + MULW D0.6, D0Ar6, #8 + MOV D1.6, #32 + SUB D1.6, D1.6, D0.6 + +$Lfunaligned_5_6_7: + GETL D0.7, D1.7, [++A1.2] + ! form 64-bit data in D0Re0, D1Re0 + MOV D0Re0, D1Re0 + LSR D0Re0, D0Re0, D0.6 + MOV D1Re0, D0.7 + LSL D1Re0, D1Re0, D1.6 + MOV D0.5, D1Re0 + ADD D0Re0, D0Re0, D0.5 + + MOV D0.5, D0.7 + LSR D0.5, D0.5, D0.6 + MOV D1Re0, D1.7 + LSL D1Re0, D1Re0, D1.6 + MOV D1.5, D0.5 + ADD D1Re0, D1Re0, D1.5 + + SETL [A0.2++], D0Re0, D1Re0 + MOV D0Re0, D0.7 + MOV D1Re0, D1.7 + SUBS D1Ar5, D1Ar5, #1 + BNE $Lfunaligned_5_6_7 + + ANDS D1Ar3, D1Ar3, #7 + BZ $Lfbyte_loop_exit + ! Adjust A1.2 + ADD A1.2, A1.2, D0Ar4 + B $Lfbyte_loop + +$Lfunaligned_1_2_3: + MULW D0.6, D0Ar6, #8 + MOV D1.6, #32 + SUB D1.6, D1.6, D0.6 + +$Lfunaligned_1_2_3_loop: + GETL D0.7, D1.7, [++A1.2] + ! form 64-bit data in D0Re0, D1Re0 + LSR D0Re0, D0Re0, D0.6 + MOV D1.5, D1Re0 + LSL D1Re0, D1Re0, D1.6 + MOV D0.5, D1Re0 + ADD D0Re0, D0Re0, D0.5 + + MOV D0.5, D1.5 + LSR D0.5, D0.5, D0.6 + MOV D1Re0, D0.7 + LSL D1Re0, D1Re0, D1.6 + MOV D1.5, D0.5 + ADD D1Re0, D1Re0, D1.5 + + SETL [A0.2++], D0Re0, D1Re0 + MOV D0Re0, D0.7 + MOV D1Re0, D1.7 + SUBS D1Ar5, D1Ar5, #1 + BNE $Lfunaligned_1_2_3_loop + + ANDS D1Ar3, D1Ar3, #7 + BZ $Lfbyte_loop_exit + ! Adjust A1.2 + ADD A1.2, A1.2, D0Ar4 + B $Lfbyte_loop + +$Lfaligned_4: + GETL D0.7, D1.7, [++A1.2] + MOV D0Re0, D1Re0 + MOV D1Re0, D0.7 + SETL [A0.2++], D0Re0, D1Re0 + MOV D0Re0, D0.7 + MOV D1Re0, D1.7 + SUBS D1Ar5, D1Ar5, #1 + BNE $Lfaligned_4 + ANDS D1Ar3, D1Ar3, #7 + BZ $Lfbyte_loop_exit + ! Adjust A1.2 + ADD A1.2, A1.2, D0Ar4 + B $Lfbyte_loop + + .size _memmove,.-_memmove diff --git a/arch/metag/lib/memset.S b/arch/metag/lib/memset.S new file mode 100644 index 000000000000..721085bad1d2 --- /dev/null +++ b/arch/metag/lib/memset.S @@ -0,0 +1,86 @@ +! Copyright (C) 2008-2012 Imagination Technologies Ltd. + + .text + .global _memset + .type _memset,function +! D1Ar1 dst +! D0Ar2 c +! D1Ar3 cnt +! D0Re0 dst +_memset: + AND D0Ar2,D0Ar2,#0xFF ! Ensure a byte input value + MULW D0Ar2,D0Ar2,#0x0101 ! Duplicate byte value into 0-15 + ANDS D0Ar4,D1Ar1,#7 ! Extract bottom LSBs of dst + LSL D0Re0,D0Ar2,#16 ! Duplicate byte value into 16-31 + ADD A0.2,D0Ar2,D0Re0 ! Duplicate byte value into 4 (A0.2) + MOV D0Re0,D1Ar1 ! Return dst + BZ $LLongStub ! if start address is aligned + ! start address is not aligned on an 8 byte boundary, so we + ! need the number of bytes up to the next 8 byte address + ! boundary, or the length of the string if less than 8, in D1Ar5 + MOV D0Ar2,#8 ! Need 8 - N in D1Ar5 ... + SUB D1Ar5,D0Ar2,D0Ar4 ! ... subtract N + CMP D1Ar3,D1Ar5 + MOVMI D1Ar5,D1Ar3 + B $LByteStub ! dst is mis-aligned, do $LByteStub + +! +! Preamble to LongLoop which generates 4*8 bytes per interation (5 cycles) +! +$LLongStub: + LSRS D0Ar2,D1Ar3,#5 + AND D1Ar3,D1Ar3,#0x1F + MOV A1.2,A0.2 + BEQ $LLongishStub + SUB TXRPT,D0Ar2,#1 + CMP D1Ar3,#0 +$LLongLoop: + SETL [D1Ar1++],A0.2,A1.2 + SETL [D1Ar1++],A0.2,A1.2 + SETL [D1Ar1++],A0.2,A1.2 + SETL [D1Ar1++],A0.2,A1.2 + BR $LLongLoop + BZ $Lexit +! +! Preamble to LongishLoop which generates 1*8 bytes per interation (2 cycles) +! +$LLongishStub: + LSRS D0Ar2,D1Ar3,#3 + AND D1Ar3,D1Ar3,#0x7 + MOV D1Ar5,D1Ar3 + BEQ $LByteStub + SUB TXRPT,D0Ar2,#1 + CMP D1Ar3,#0 +$LLongishLoop: + SETL [D1Ar1++],A0.2,A1.2 + BR $LLongishLoop + BZ $Lexit +! +! This does a byte structured burst of up to 7 bytes +! +! D1Ar1 should point to the location required +! D1Ar3 should be the remaining total byte count +! D1Ar5 should be burst size (<= D1Ar3) +! +$LByteStub: + SUBS D1Ar3,D1Ar3,D1Ar5 ! Reduce count + ADD D1Ar1,D1Ar1,D1Ar5 ! Advance pointer to end of area + MULW D1Ar5,D1Ar5,#4 ! Scale to (1*4), (2*4), (3*4) + SUB D1Ar5,D1Ar5,#(8*4) ! Rebase to -(7*4), -(6*4), -(5*4), ... + MOV A1.2,D1Ar5 + SUB PC,CPC1,A1.2 ! Jump into table below + SETB [D1Ar1+#(-7)],A0.2 + SETB [D1Ar1+#(-6)],A0.2 + SETB [D1Ar1+#(-5)],A0.2 + SETB [D1Ar1+#(-4)],A0.2 + SETB [D1Ar1+#(-3)],A0.2 + SETB [D1Ar1+#(-2)],A0.2 + SETB [D1Ar1+#(-1)],A0.2 +! +! Return if all data has been output, otherwise do $LLongStub +! + BNZ $LLongStub +$Lexit: + MOV PC,D1RtP + .size _memset,.-_memset + diff --git a/arch/metag/lib/modsi3.S b/arch/metag/lib/modsi3.S new file mode 100644 index 000000000000..210cfa856593 --- /dev/null +++ b/arch/metag/lib/modsi3.S @@ -0,0 +1,38 @@ +! Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007 +! Imagination Technologies Ltd +! +! Integer modulus routines. +! +!! +!! 32-bit modulus unsigned i/p - passed unsigned 32-bit numbers +!! + .text + .global ___umodsi3 + .type ___umodsi3,function + .align 2 +___umodsi3: + MOV D0FrT,D1RtP ! Save original return address + CALLR D1RtP,___udivsi3 + MOV D1RtP,D0FrT ! Recover return address + MOV D0Re0,D1Ar1 ! Return remainder + MOV PC,D1RtP + .size ___umodsi3,.-___umodsi3 + +!! +!! 32-bit modulus signed i/p - passed signed 32-bit numbers +!! + .global ___modsi3 + .type ___modsi3,function + .align 2 +___modsi3: + MOV D0FrT,D1RtP ! Save original return address + MOV A0.2,D1Ar1 ! Save A in A0.2 + CALLR D1RtP,___divsi3 + MOV D1RtP,D0FrT ! Recover return address + MOV D1Re0,A0.2 ! Recover A + MOV D0Re0,D1Ar1 ! Return remainder + ORS D1Re0,D1Re0,D1Re0 ! Was A negative? + NEG D1Ar1,D1Ar1 ! Negate remainder + MOVMI D0Re0,D1Ar1 ! Return neg remainder + MOV PC, D1RtP + .size ___modsi3,.-___modsi3 diff --git a/arch/metag/lib/muldi3.S b/arch/metag/lib/muldi3.S new file mode 100644 index 000000000000..ee66ca8644d0 --- /dev/null +++ b/arch/metag/lib/muldi3.S @@ -0,0 +1,44 @@ +! Copyright (C) 2012 by Imagination Technologies Ltd. +! +! 64-bit multiply routine. +! + +! +! 64-bit signed/unsigned multiply +! +! A = D1Ar1:D0Ar2 = a 2^48 + b 2^32 + c 2^16 + d 2^0 +! +! B = D1Ar3:D0Ar4 = w 2^48 + x 2^32 + y 2^16 + z 2^0 +! + .text + .global ___muldi3 + .type ___muldi3,function + +___muldi3: + MULD D1Re0,D1Ar1,D0Ar4 ! (a 2^48 + b 2^32)(y 2^16 + z 2^0) + MULD D0Re0,D0Ar2,D1Ar3 ! (w 2^48 + x 2^32)(c 2^16 + d 2^0) + ADD D1Re0,D1Re0,D0Re0 + + MULW D0Re0,D0Ar2,D0Ar4 ! (d 2^0) * (z 2^0) + + RTDW D0Ar2,D0Ar2 + MULW D0Ar6,D0Ar2,D0Ar4 ! (c 2^16)(z 2^0) + LSR D1Ar5,D0Ar6,#16 + LSL D0Ar6,D0Ar6,#16 + ADDS D0Re0,D0Re0,D0Ar6 + ADDCS D1Re0,D1Re0,#1 + RTDW D0Ar4,D0Ar4 + ADD D1Re0,D1Re0,D1Ar5 + + MULW D0Ar6,D0Ar2,D0Ar4 ! (c 2^16)(y 2^16) + ADD D1Re0,D1Re0,D0Ar6 + + RTDW D0Ar2,D0Ar2 + MULW D0Ar6,D0Ar2,D0Ar4 ! (d 2^0)(y 2^16) + LSR D1Ar5,D0Ar6,#16 + LSL D0Ar6,D0Ar6,#16 + ADDS D0Re0,D0Re0,D0Ar6 + ADD D1Re0,D1Re0,D1Ar5 + ADDCS D1Re0,D1Re0,#1 + MOV PC, D1RtP + .size ___muldi3,.-___muldi3 diff --git a/arch/metag/lib/ucmpdi2.S b/arch/metag/lib/ucmpdi2.S new file mode 100644 index 000000000000..6f3347f7daeb --- /dev/null +++ b/arch/metag/lib/ucmpdi2.S @@ -0,0 +1,27 @@ +! Copyright (C) 2012 by Imagination Technologies Ltd. +! +! 64-bit unsigned compare routine. +! + + .text + .global ___ucmpdi2 + .type ___ucmpdi2,function + +! low high +! u64 a (D0Ar2, D1Ar1) +! u64 b (D0Ar4, D1Ar3) +___ucmpdi2: + ! start at 1 (equal) and conditionally increment or decrement + MOV D0Re0,#1 + + ! high words + CMP D1Ar1,D1Ar3 + ! or if equal, low words + CMPEQ D0Ar2,D0Ar4 + + ! unsigned compare + SUBLO D0Re0,D0Re0,#1 + ADDHI D0Re0,D0Re0,#1 + + MOV PC,D1RtP + .size ___ucmpdi2,.-___ucmpdi2 diff --git a/arch/metag/lib/usercopy.c b/arch/metag/lib/usercopy.c new file mode 100644 index 000000000000..b3ebfe9c8e88 --- /dev/null +++ b/arch/metag/lib/usercopy.c @@ -0,0 +1,1354 @@ +/* + * User address space access functions. + * The non-inlined parts of asm-metag/uaccess.h are here. + * + * Copyright (C) 2006, Imagination Technologies. + * Copyright (C) 2000, Axis Communications AB. + * + * Written by Hans-Peter Nilsson. + * Pieces used from memcpy, originally by Kenny Ranerup long time ago. + * Modified for Meta by Will Newton. + */ + +#include <linux/export.h> +#include <linux/uaccess.h> +#include <asm/cache.h> /* def of L1_CACHE_BYTES */ + +#define USE_RAPF +#define RAPF_MIN_BUF_SIZE (3*L1_CACHE_BYTES) + + +/* The "double write" in this code is because the Meta will not fault + * immediately unless the memory pipe is forced to by e.g. a data stall or + * another memory op. The second write should be discarded by the write + * combiner so should have virtually no cost. + */ + +#define __asm_copy_user_cont(to, from, ret, COPY, FIXUP, TENTRY) \ + asm volatile ( \ + COPY \ + "1:\n" \ + " .section .fixup,\"ax\"\n" \ + " MOV D1Ar1,#0\n" \ + FIXUP \ + " MOVT D1Ar1,#HI(1b)\n" \ + " JUMP D1Ar1,#LO(1b)\n" \ + " .previous\n" \ + " .section __ex_table,\"a\"\n" \ + TENTRY \ + " .previous\n" \ + : "=r" (to), "=r" (from), "=r" (ret) \ + : "0" (to), "1" (from), "2" (ret) \ + : "D1Ar1", "memory") + + +#define __asm_copy_to_user_1(to, from, ret) \ + __asm_copy_user_cont(to, from, ret, \ + " GETB D1Ar1,[%1++]\n" \ + " SETB [%0],D1Ar1\n" \ + "2: SETB [%0++],D1Ar1\n", \ + "3: ADD %2,%2,#1\n", \ + " .long 2b,3b\n") + +#define __asm_copy_to_user_2x_cont(to, from, ret, COPY, FIXUP, TENTRY) \ + __asm_copy_user_cont(to, from, ret, \ + " GETW D1Ar1,[%1++]\n" \ + " SETW [%0],D1Ar1\n" \ + "2: SETW [%0++],D1Ar1\n" COPY, \ + "3: ADD %2,%2,#2\n" FIXUP, \ + " .long 2b,3b\n" TENTRY) + +#define __asm_copy_to_user_2(to, from, ret) \ + __asm_copy_to_user_2x_cont(to, from, ret, "", "", "") + +#define __asm_copy_to_user_3(to, from, ret) \ + __asm_copy_to_user_2x_cont(to, from, ret, \ + " GETB D1Ar1,[%1++]\n" \ + " SETB [%0],D1Ar1\n" \ + "4: SETB [%0++],D1Ar1\n", \ + "5: ADD %2,%2,#1\n", \ + " .long 4b,5b\n") + +#define __asm_copy_to_user_4x_cont(to, from, ret, COPY, FIXUP, TENTRY) \ + __asm_copy_user_cont(to, from, ret, \ + " GETD D1Ar1,[%1++]\n" \ + " SETD [%0],D1Ar1\n" \ + "2: SETD [%0++],D1Ar1\n" COPY, \ + "3: ADD %2,%2,#4\n" FIXUP, \ + " .long 2b,3b\n" TENTRY) + +#define __asm_copy_to_user_4(to, from, ret) \ + __asm_copy_to_user_4x_cont(to, from, ret, "", "", "") + +#define __asm_copy_to_user_5(to, from, ret) \ + __asm_copy_to_user_4x_cont(to, from, ret, \ + " GETB D1Ar1,[%1++]\n" \ + " SETB [%0],D1Ar1\n" \ + "4: SETB [%0++],D1Ar1\n", \ + "5: ADD %2,%2,#1\n", \ + " .long 4b,5b\n") + +#define __asm_copy_to_user_6x_cont(to, from, ret, COPY, FIXUP, TENTRY) \ + __asm_copy_to_user_4x_cont(to, from, ret, \ + " GETW D1Ar1,[%1++]\n" \ + " SETW [%0],D1Ar1\n" \ + "4: SETW [%0++],D1Ar1\n" COPY, \ + "5: ADD %2,%2,#2\n" FIXUP, \ + " .long 4b,5b\n" TENTRY) + +#define __asm_copy_to_user_6(to, from, ret) \ + __asm_copy_to_user_6x_cont(to, from, ret, "", "", "") + +#define __asm_copy_to_user_7(to, from, ret) \ + __asm_copy_to_user_6x_cont(to, from, ret, \ + " GETB D1Ar1,[%1++]\n" \ + " SETB [%0],D1Ar1\n" \ + "6: SETB [%0++],D1Ar1\n", \ + "7: ADD %2,%2,#1\n", \ + " .long 6b,7b\n") + +#define __asm_copy_to_user_8x_cont(to, from, ret, COPY, FIXUP, TENTRY) \ + __asm_copy_to_user_4x_cont(to, from, ret, \ + " GETD D1Ar1,[%1++]\n" \ + " SETD [%0],D1Ar1\n" \ + "4: SETD [%0++],D1Ar1\n" COPY, \ + "5: ADD %2,%2,#4\n" FIXUP, \ + " .long 4b,5b\n" TENTRY) + +#define __asm_copy_to_user_8(to, from, ret) \ + __asm_copy_to_user_8x_cont(to, from, ret, "", "", "") + +#define __asm_copy_to_user_9(to, from, ret) \ + __asm_copy_to_user_8x_cont(to, from, ret, \ + " GETB D1Ar1,[%1++]\n" \ + " SETB [%0],D1Ar1\n" \ + "6: SETB [%0++],D1Ar1\n", \ + "7: ADD %2,%2,#1\n", \ + " .long 6b,7b\n") + +#define __asm_copy_to_user_10x_cont(to, from, ret, COPY, FIXUP, TENTRY) \ + __asm_copy_to_user_8x_cont(to, from, ret, \ + " GETW D1Ar1,[%1++]\n" \ + " SETW [%0],D1Ar1\n" \ + "6: SETW [%0++],D1Ar1\n" COPY, \ + "7: ADD %2,%2,#2\n" FIXUP, \ + " .long 6b,7b\n" TENTRY) + +#define __asm_copy_to_user_10(to, from, ret) \ + __asm_copy_to_user_10x_cont(to, from, ret, "", "", "") + +#define __asm_copy_to_user_11(to, from, ret) \ + __asm_copy_to_user_10x_cont(to, from, ret, \ + " GETB D1Ar1,[%1++]\n" \ + " SETB [%0],D1Ar1\n" \ + "8: SETB [%0++],D1Ar1\n", \ + "9: ADD %2,%2,#1\n", \ + " .long 8b,9b\n") + +#define __asm_copy_to_user_12x_cont(to, from, ret, COPY, FIXUP, TENTRY) \ + __asm_copy_to_user_8x_cont(to, from, ret, \ + " GETD D1Ar1,[%1++]\n" \ + " SETD [%0],D1Ar1\n" \ + "6: SETD [%0++],D1Ar1\n" COPY, \ + "7: ADD %2,%2,#4\n" FIXUP, \ + " .long 6b,7b\n" TENTRY) +#define __asm_copy_to_user_12(to, from, ret) \ + __asm_copy_to_user_12x_cont(to, from, ret, "", "", "") + +#define __asm_copy_to_user_13(to, from, ret) \ + __asm_copy_to_user_12x_cont(to, from, ret, \ + " GETB D1Ar1,[%1++]\n" \ + " SETB [%0],D1Ar1\n" \ + "8: SETB [%0++],D1Ar1\n", \ + "9: ADD %2,%2,#1\n", \ + " .long 8b,9b\n") + +#define __asm_copy_to_user_14x_cont(to, from, ret, COPY, FIXUP, TENTRY) \ + __asm_copy_to_user_12x_cont(to, from, ret, \ + " GETW D1Ar1,[%1++]\n" \ + " SETW [%0],D1Ar1\n" \ + "8: SETW [%0++],D1Ar1\n" COPY, \ + "9: ADD %2,%2,#2\n" FIXUP, \ + " .long 8b,9b\n" TENTRY) + +#define __asm_copy_to_user_14(to, from, ret) \ + __asm_copy_to_user_14x_cont(to, from, ret, "", "", "") + +#define __asm_copy_to_user_15(to, from, ret) \ + __asm_copy_to_user_14x_cont(to, from, ret, \ + " GETB D1Ar1,[%1++]\n" \ + " SETB [%0],D1Ar1\n" \ + "10: SETB [%0++],D1Ar1\n", \ + "11: ADD %2,%2,#1\n", \ + " .long 10b,11b\n") + +#define __asm_copy_to_user_16x_cont(to, from, ret, COPY, FIXUP, TENTRY) \ + __asm_copy_to_user_12x_cont(to, from, ret, \ + " GETD D1Ar1,[%1++]\n" \ + " SETD [%0],D1Ar1\n" \ + "8: SETD [%0++],D1Ar1\n" COPY, \ + "9: ADD %2,%2,#4\n" FIXUP, \ + " .long 8b,9b\n" TENTRY) + +#define __asm_copy_to_user_16(to, from, ret) \ + __asm_copy_to_user_16x_cont(to, from, ret, "", "", "") + +#define __asm_copy_to_user_8x64(to, from, ret) \ + asm volatile ( \ + " GETL D0Ar2,D1Ar1,[%1++]\n" \ + " SETL [%0],D0Ar2,D1Ar1\n" \ + "2: SETL [%0++],D0Ar2,D1Ar1\n" \ + "1:\n" \ + " .section .fixup,\"ax\"\n" \ + "3: ADD %2,%2,#8\n" \ + " MOVT D0Ar2,#HI(1b)\n" \ + " JUMP D0Ar2,#LO(1b)\n" \ + " .previous\n" \ + " .section __ex_table,\"a\"\n" \ + " .long 2b,3b\n" \ + " .previous\n" \ + : "=r" (to), "=r" (from), "=r" (ret) \ + : "0" (to), "1" (from), "2" (ret) \ + : "D1Ar1", "D0Ar2", "memory") + +/* + * optimized copying loop using RAPF when 64 bit aligned + * + * n will be automatically decremented inside the loop + * ret will be left intact. if error occurs we will rewind + * so that the original non optimized code will fill up + * this value correctly. + * + * on fault: + * > n will hold total number of uncopied bytes + * + * > {'to','from'} will be rewind back so that + * the non-optimized code will do the proper fix up + * + * DCACHE drops the cacheline which helps in reducing cache + * pollution. + * + * We introduce an extra SETL at the end of the loop to + * ensure we don't fall off the loop before we catch all + * erros. + * + * NOTICE: + * LSM_STEP in TXSTATUS must be cleared in fix up code. + * since we're using M{S,G}ETL, a fault might happen at + * any address in the middle of M{S,G}ETL causing + * the value of LSM_STEP to be incorrect which can + * cause subsequent use of M{S,G}ET{L,D} to go wrong. + * ie: if LSM_STEP was 1 when a fault occurs, the + * next call to M{S,G}ET{L,D} will skip the first + * copy/getting as it think that the first 1 has already + * been done. + * + */ +#define __asm_copy_user_64bit_rapf_loop( \ + to, from, ret, n, id, FIXUP) \ + asm volatile ( \ + ".balign 8\n" \ + "MOV RAPF, %1\n" \ + "MSETL [A0StP++], D0Ar6, D0FrT, D0.5, D0.6, D0.7\n" \ + "MOV D0Ar6, #0\n" \ + "LSR D1Ar5, %3, #6\n" \ + "SUB TXRPT, D1Ar5, #2\n" \ + "MOV RAPF, %1\n" \ + "$Lloop"id":\n" \ + "ADD RAPF, %1, #64\n" \ + "21:\n" \ + "MGETL D0FrT, D0.5, D0.6, D0.7, [%1++]\n" \ + "22:\n" \ + "MSETL [%0++], D0FrT, D0.5, D0.6, D0.7\n" \ + "SUB %3, %3, #32\n" \ + "23:\n" \ + "MGETL D0FrT, D0.5, D0.6, D0.7, [%1++]\n" \ + "24:\n" \ + "MSETL [%0++], D0FrT, D0.5, D0.6, D0.7\n" \ + "SUB %3, %3, #32\n" \ + "DCACHE [%1+#-64], D0Ar6\n" \ + "BR $Lloop"id"\n" \ + \ + "MOV RAPF, %1\n" \ + "25:\n" \ + "MGETL D0FrT, D0.5, D0.6, D0.7, [%1++]\n" \ + "26:\n" \ + "MSETL [%0++], D0FrT, D0.5, D0.6, D0.7\n" \ + "SUB %3, %3, #32\n" \ + "27:\n" \ + "MGETL D0FrT, D0.5, D0.6, D0.7, [%1++]\n" \ + "28:\n" \ + "MSETL [%0++], D0FrT, D0.5, D0.6, D0.7\n" \ + "SUB %0, %0, #8\n" \ + "29:\n" \ + "SETL [%0++], D0.7, D1.7\n" \ + "SUB %3, %3, #32\n" \ + "1:" \ + "DCACHE [%1+#-64], D0Ar6\n" \ + "GETL D0Ar6, D1Ar5, [A0StP+#-40]\n" \ + "GETL D0FrT, D1RtP, [A0StP+#-32]\n" \ + "GETL D0.5, D1.5, [A0StP+#-24]\n" \ + "GETL D0.6, D1.6, [A0StP+#-16]\n" \ + "GETL D0.7, D1.7, [A0StP+#-8]\n" \ + "SUB A0StP, A0StP, #40\n" \ + " .section .fixup,\"ax\"\n" \ + "4:\n" \ + " ADD %0, %0, #8\n" \ + "3:\n" \ + " MOV D0Ar2, TXSTATUS\n" \ + " MOV D1Ar1, TXSTATUS\n" \ + " AND D1Ar1, D1Ar1, #0xFFFFF8FF\n" \ + " MOV TXSTATUS, D1Ar1\n" \ + FIXUP \ + " MOVT D0Ar2,#HI(1b)\n" \ + " JUMP D0Ar2,#LO(1b)\n" \ + " .previous\n" \ + " .section __ex_table,\"a\"\n" \ + " .long 21b,3b\n" \ + " .long 22b,3b\n" \ + " .long 23b,3b\n" \ + " .long 24b,3b\n" \ + " .long 25b,3b\n" \ + " .long 26b,3b\n" \ + " .long 27b,3b\n" \ + " .long 28b,3b\n" \ + " .long 29b,4b\n" \ + " .previous\n" \ + : "=r" (to), "=r" (from), "=r" (ret), "=d" (n) \ + : "0" (to), "1" (from), "2" (ret), "3" (n) \ + : "D1Ar1", "D0Ar2", "memory") + +/* rewind 'to' and 'from' pointers when a fault occurs + * + * Rationale: + * A fault always occurs on writing to user buffer. A fault + * is at a single address, so we need to rewind by only 4 + * bytes. + * Since we do a complete read from kernel buffer before + * writing, we need to rewind it also. The amount to be + * rewind equals the number of faulty writes in MSETD + * which is: [4 - (LSM_STEP-1)]*8 + * LSM_STEP is bits 10:8 in TXSTATUS which is already read + * and stored in D0Ar2 + * + * NOTE: If a fault occurs at the last operation in M{G,S}ETL + * LSM_STEP will be 0. ie: we do 4 writes in our case, if + * a fault happens at the 4th write, LSM_STEP will be 0 + * instead of 4. The code copes with that. + * + * n is updated by the number of successful writes, which is: + * n = n - (LSM_STEP-1)*8 + */ +#define __asm_copy_to_user_64bit_rapf_loop(to, from, ret, n, id)\ + __asm_copy_user_64bit_rapf_loop(to, from, ret, n, id, \ + "LSR D0Ar2, D0Ar2, #8\n" \ + "AND D0Ar2, D0Ar2, #0x7\n" \ + "ADDZ D0Ar2, D0Ar2, #4\n" \ + "SUB D0Ar2, D0Ar2, #1\n" \ + "MOV D1Ar1, #4\n" \ + "SUB D0Ar2, D1Ar1, D0Ar2\n" \ + "LSL D0Ar2, D0Ar2, #3\n" \ + "LSL D1Ar1, D1Ar1, #3\n" \ + "SUB D1Ar1, D1Ar1, D0Ar2\n" \ + "SUB %0, %0, #8\n" \ + "SUB %1, %1,D0Ar2\n" \ + "SUB %3, %3, D1Ar1\n") + +/* + * optimized copying loop using RAPF when 32 bit aligned + * + * n will be automatically decremented inside the loop + * ret will be left intact. if error occurs we will rewind + * so that the original non optimized code will fill up + * this value correctly. + * + * on fault: + * > n will hold total number of uncopied bytes + * + * > {'to','from'} will be rewind back so that + * the non-optimized code will do the proper fix up + * + * DCACHE drops the cacheline which helps in reducing cache + * pollution. + * + * We introduce an extra SETD at the end of the loop to + * ensure we don't fall off the loop before we catch all + * erros. + * + * NOTICE: + * LSM_STEP in TXSTATUS must be cleared in fix up code. + * since we're using M{S,G}ETL, a fault might happen at + * any address in the middle of M{S,G}ETL causing + * the value of LSM_STEP to be incorrect which can + * cause subsequent use of M{S,G}ET{L,D} to go wrong. + * ie: if LSM_STEP was 1 when a fault occurs, the + * next call to M{S,G}ET{L,D} will skip the first + * copy/getting as it think that the first 1 has already + * been done. + * + */ +#define __asm_copy_user_32bit_rapf_loop( \ + to, from, ret, n, id, FIXUP) \ + asm volatile ( \ + ".balign 8\n" \ + "MOV RAPF, %1\n" \ + "MSETL [A0StP++], D0Ar6, D0FrT, D0.5, D0.6, D0.7\n" \ + "MOV D0Ar6, #0\n" \ + "LSR D1Ar5, %3, #6\n" \ + "SUB TXRPT, D1Ar5, #2\n" \ + "MOV RAPF, %1\n" \ + "$Lloop"id":\n" \ + "ADD RAPF, %1, #64\n" \ + "21:\n" \ + "MGETD D0FrT, D0.5, D0.6, D0.7, [%1++]\n" \ + "22:\n" \ + "MSETD [%0++], D0FrT, D0.5, D0.6, D0.7\n" \ + "SUB %3, %3, #16\n" \ + "23:\n" \ + "MGETD D0FrT, D0.5, D0.6, D0.7, [%1++]\n" \ + "24:\n" \ + "MSETD [%0++], D0FrT, D0.5, D0.6, D0.7\n" \ + "SUB %3, %3, #16\n" \ + "25:\n" \ + "MGETD D0FrT, D0.5, D0.6, D0.7, [%1++]\n" \ + "26:\n" \ + "MSETD [%0++], D0FrT, D0.5, D0.6, D0.7\n" \ + "SUB %3, %3, #16\n" \ + "27:\n" \ + "MGETD D0FrT, D0.5, D0.6, D0.7, [%1++]\n" \ + "28:\n" \ + "MSETD [%0++], D0FrT, D0.5, D0.6, D0.7\n" \ + "SUB %3, %3, #16\n" \ + "DCACHE [%1+#-64], D0Ar6\n" \ + "BR $Lloop"id"\n" \ + \ + "MOV RAPF, %1\n" \ + "29:\n" \ + "MGETD D0FrT, D0.5, D0.6, D0.7, [%1++]\n" \ + "30:\n" \ + "MSETD [%0++], D0FrT, D0.5, D0.6, D0.7\n" \ + "SUB %3, %3, #16\n" \ + "31:\n" \ + "MGETD D0FrT, D0.5, D0.6, D0.7, [%1++]\n" \ + "32:\n" \ + "MSETD [%0++], D0FrT, D0.5, D0.6, D0.7\n" \ + "SUB %3, %3, #16\n" \ + "33:\n" \ + "MGETD D0FrT, D0.5, D0.6, D0.7, [%1++]\n" \ + "34:\n" \ + "MSETD [%0++], D0FrT, D0.5, D0.6, D0.7\n" \ + "SUB %3, %3, #16\n" \ + "35:\n" \ + "MGETD D0FrT, D0.5, D0.6, D0.7, [%1++]\n" \ + "36:\n" \ + "MSETD [%0++], D0FrT, D0.5, D0.6, D0.7\n" \ + "SUB %0, %0, #4\n" \ + "37:\n" \ + "SETD [%0++], D0.7\n" \ + "SUB %3, %3, #16\n" \ + "1:" \ + "DCACHE [%1+#-64], D0Ar6\n" \ + "GETL D0Ar6, D1Ar5, [A0StP+#-40]\n" \ + "GETL D0FrT, D1RtP, [A0StP+#-32]\n" \ + "GETL D0.5, D1.5, [A0StP+#-24]\n" \ + "GETL D0.6, D1.6, [A0StP+#-16]\n" \ + "GETL D0.7, D1.7, [A0StP+#-8]\n" \ + "SUB A0StP, A0StP, #40\n" \ + " .section .fixup,\"ax\"\n" \ + "4:\n" \ + " ADD %0, %0, #4\n" \ + "3:\n" \ + " MOV D0Ar2, TXSTATUS\n" \ + " MOV D1Ar1, TXSTATUS\n" \ + " AND D1Ar1, D1Ar1, #0xFFFFF8FF\n" \ + " MOV TXSTATUS, D1Ar1\n" \ + FIXUP \ + " MOVT D0Ar2,#HI(1b)\n" \ + " JUMP D0Ar2,#LO(1b)\n" \ + " .previous\n" \ + " .section __ex_table,\"a\"\n" \ + " .long 21b,3b\n" \ + " .long 22b,3b\n" \ + " .long 23b,3b\n" \ + " .long 24b,3b\n" \ + " .long 25b,3b\n" \ + " .long 26b,3b\n" \ + " .long 27b,3b\n" \ + " .long 28b,3b\n" \ + " .long 29b,3b\n" \ + " .long 30b,3b\n" \ + " .long 31b,3b\n" \ + " .long 32b,3b\n" \ + " .long 33b,3b\n" \ + " .long 34b,3b\n" \ + " .long 35b,3b\n" \ + " .long 36b,3b\n" \ + " .long 37b,4b\n" \ + " .previous\n" \ + : "=r" (to), "=r" (from), "=r" (ret), "=d" (n) \ + : "0" (to), "1" (from), "2" (ret), "3" (n) \ + : "D1Ar1", "D0Ar2", "memory") + +/* rewind 'to' and 'from' pointers when a fault occurs + * + * Rationale: + * A fault always occurs on writing to user buffer. A fault + * is at a single address, so we need to rewind by only 4 + * bytes. + * Since we do a complete read from kernel buffer before + * writing, we need to rewind it also. The amount to be + * rewind equals the number of faulty writes in MSETD + * which is: [4 - (LSM_STEP-1)]*4 + * LSM_STEP is bits 10:8 in TXSTATUS which is already read + * and stored in D0Ar2 + * + * NOTE: If a fault occurs at the last operation in M{G,S}ETL + * LSM_STEP will be 0. ie: we do 4 writes in our case, if + * a fault happens at the 4th write, LSM_STEP will be 0 + * instead of 4. The code copes with that. + * + * n is updated by the number of successful writes, which is: + * n = n - (LSM_STEP-1)*4 + */ +#define __asm_copy_to_user_32bit_rapf_loop(to, from, ret, n, id)\ + __asm_copy_user_32bit_rapf_loop(to, from, ret, n, id, \ + "LSR D0Ar2, D0Ar2, #8\n" \ + "AND D0Ar2, D0Ar2, #0x7\n" \ + "ADDZ D0Ar2, D0Ar2, #4\n" \ + "SUB D0Ar2, D0Ar2, #1\n" \ + "MOV D1Ar1, #4\n" \ + "SUB D0Ar2, D1Ar1, D0Ar2\n" \ + "LSL D0Ar2, D0Ar2, #2\n" \ + "LSL D1Ar1, D1Ar1, #2\n" \ + "SUB D1Ar1, D1Ar1, D0Ar2\n" \ + "SUB %0, %0, #4\n" \ + "SUB %1, %1, D0Ar2\n" \ + "SUB %3, %3, D1Ar1\n") + +unsigned long __copy_user(void __user *pdst, const void *psrc, + unsigned long n) +{ + register char __user *dst asm ("A0.2") = pdst; + register const char *src asm ("A1.2") = psrc; + unsigned long retn = 0; + + if (n == 0) + return 0; + + if ((unsigned long) src & 1) { + __asm_copy_to_user_1(dst, src, retn); + n--; + } + if ((unsigned long) dst & 1) { + /* Worst case - byte copy */ + while (n > 0) { + __asm_copy_to_user_1(dst, src, retn); + n--; + } + } + if (((unsigned long) src & 2) && n >= 2) { + __asm_copy_to_user_2(dst, src, retn); + n -= 2; + } + if ((unsigned long) dst & 2) { + /* Second worst case - word copy */ + while (n >= 2) { + __asm_copy_to_user_2(dst, src, retn); + n -= 2; + } + } + +#ifdef USE_RAPF + /* 64 bit copy loop */ + if (!(((unsigned long) src | (__force unsigned long) dst) & 7)) { + if (n >= RAPF_MIN_BUF_SIZE) { + /* copy user using 64 bit rapf copy */ + __asm_copy_to_user_64bit_rapf_loop(dst, src, retn, + n, "64cu"); + } + while (n >= 8) { + __asm_copy_to_user_8x64(dst, src, retn); + n -= 8; + } + } + if (n >= RAPF_MIN_BUF_SIZE) { + /* copy user using 32 bit rapf copy */ + __asm_copy_to_user_32bit_rapf_loop(dst, src, retn, n, "32cu"); + } +#else + /* 64 bit copy loop */ + if (!(((unsigned long) src | (__force unsigned long) dst) & 7)) { + while (n >= 8) { + __asm_copy_to_user_8x64(dst, src, retn); + n -= 8; + } + } +#endif + + while (n >= 16) { + __asm_copy_to_user_16(dst, src, retn); + n -= 16; + } + + while (n >= 4) { + __asm_copy_to_user_4(dst, src, retn); + n -= 4; + } + + switch (n) { + case 0: + break; + case 1: + __asm_copy_to_user_1(dst, src, retn); + break; + case 2: + __asm_copy_to_user_2(dst, src, retn); + break; + case 3: + __asm_copy_to_user_3(dst, src, retn); + break; + } + + return retn; +} +EXPORT_SYMBOL(__copy_user); + +#define __asm_copy_from_user_1(to, from, ret) \ + __asm_copy_user_cont(to, from, ret, \ + " GETB D1Ar1,[%1++]\n" \ + "2: SETB [%0++],D1Ar1\n", \ + "3: ADD %2,%2,#1\n" \ + " SETB [%0++],D1Ar1\n", \ + " .long 2b,3b\n") + +#define __asm_copy_from_user_2x_cont(to, from, ret, COPY, FIXUP, TENTRY) \ + __asm_copy_user_cont(to, from, ret, \ + " GETW D1Ar1,[%1++]\n" \ + "2: SETW [%0++],D1Ar1\n" COPY, \ + "3: ADD %2,%2,#2\n" \ + " SETW [%0++],D1Ar1\n" FIXUP, \ + " .long 2b,3b\n" TENTRY) + +#define __asm_copy_from_user_2(to, from, ret) \ + __asm_copy_from_user_2x_cont(to, from, ret, "", "", "") + +#define __asm_copy_from_user_3(to, from, ret) \ + __asm_copy_from_user_2x_cont(to, from, ret, \ + " GETB D1Ar1,[%1++]\n" \ + "4: SETB [%0++],D1Ar1\n", \ + "5: ADD %2,%2,#1\n" \ + " SETB [%0++],D1Ar1\n", \ + " .long 4b,5b\n") + +#define __asm_copy_from_user_4x_cont(to, from, ret, COPY, FIXUP, TENTRY) \ + __asm_copy_user_cont(to, from, ret, \ + " GETD D1Ar1,[%1++]\n" \ + "2: SETD [%0++],D1Ar1\n" COPY, \ + "3: ADD %2,%2,#4\n" \ + " SETD [%0++],D1Ar1\n" FIXUP, \ + " .long 2b,3b\n" TENTRY) + +#define __asm_copy_from_user_4(to, from, ret) \ + __asm_copy_from_user_4x_cont(to, from, ret, "", "", "") + +#define __asm_copy_from_user_5(to, from, ret) \ + __asm_copy_from_user_4x_cont(to, from, ret, \ + " GETB D1Ar1,[%1++]\n" \ + "4: SETB [%0++],D1Ar1\n", \ + "5: ADD %2,%2,#1\n" \ + " SETB [%0++],D1Ar1\n", \ + " .long 4b,5b\n") + +#define __asm_copy_from_user_6x_cont(to, from, ret, COPY, FIXUP, TENTRY) \ + __asm_copy_from_user_4x_cont(to, from, ret, \ + " GETW D1Ar1,[%1++]\n" \ + "4: SETW [%0++],D1Ar1\n" COPY, \ + "5: ADD %2,%2,#2\n" \ + " SETW [%0++],D1Ar1\n" FIXUP, \ + " .long 4b,5b\n" TENTRY) + +#define __asm_copy_from_user_6(to, from, ret) \ + __asm_copy_from_user_6x_cont(to, from, ret, "", "", "") + +#define __asm_copy_from_user_7(to, from, ret) \ + __asm_copy_from_user_6x_cont(to, from, ret, \ + " GETB D1Ar1,[%1++]\n" \ + "6: SETB [%0++],D1Ar1\n", \ + "7: ADD %2,%2,#1\n" \ + " SETB [%0++],D1Ar1\n", \ + " .long 6b,7b\n") + +#define __asm_copy_from_user_8x_cont(to, from, ret, COPY, FIXUP, TENTRY) \ + __asm_copy_from_user_4x_cont(to, from, ret, \ + " GETD D1Ar1,[%1++]\n" \ + "4: SETD [%0++],D1Ar1\n" COPY, \ + "5: ADD %2,%2,#4\n" \ + " SETD [%0++],D1Ar1\n" FIXUP, \ + " .long 4b,5b\n" TENTRY) + +#define __asm_copy_from_user_8(to, from, ret) \ + __asm_copy_from_user_8x_cont(to, from, ret, "", "", "") + +#define __asm_copy_from_user_9(to, from, ret) \ + __asm_copy_from_user_8x_cont(to, from, ret, \ + " GETB D1Ar1,[%1++]\n" \ + "6: SETB [%0++],D1Ar1\n", \ + "7: ADD %2,%2,#1\n" \ + " SETB [%0++],D1Ar1\n", \ + " .long 6b,7b\n") + +#define __asm_copy_from_user_10x_cont(to, from, ret, COPY, FIXUP, TENTRY) \ + __asm_copy_from_user_8x_cont(to, from, ret, \ + " GETW D1Ar1,[%1++]\n" \ + "6: SETW [%0++],D1Ar1\n" COPY, \ + "7: ADD %2,%2,#2\n" \ + " SETW [%0++],D1Ar1\n" FIXUP, \ + " .long 6b,7b\n" TENTRY) + +#define __asm_copy_from_user_10(to, from, ret) \ + __asm_copy_from_user_10x_cont(to, from, ret, "", "", "") + +#define __asm_copy_from_user_11(to, from, ret) \ + __asm_copy_from_user_10x_cont(to, from, ret, \ + " GETB D1Ar1,[%1++]\n" \ + "8: SETB [%0++],D1Ar1\n", \ + "9: ADD %2,%2,#1\n" \ + " SETB [%0++],D1Ar1\n", \ + " .long 8b,9b\n") + +#define __asm_copy_from_user_12x_cont(to, from, ret, COPY, FIXUP, TENTRY) \ + __asm_copy_from_user_8x_cont(to, from, ret, \ + " GETD D1Ar1,[%1++]\n" \ + "6: SETD [%0++],D1Ar1\n" COPY, \ + "7: ADD %2,%2,#4\n" \ + " SETD [%0++],D1Ar1\n" FIXUP, \ + " .long 6b,7b\n" TENTRY) + +#define __asm_copy_from_user_12(to, from, ret) \ + __asm_copy_from_user_12x_cont(to, from, ret, "", "", "") + +#define __asm_copy_from_user_13(to, from, ret) \ + __asm_copy_from_user_12x_cont(to, from, ret, \ + " GETB D1Ar1,[%1++]\n" \ + "8: SETB [%0++],D1Ar1\n", \ + "9: ADD %2,%2,#1\n" \ + " SETB [%0++],D1Ar1\n", \ + " .long 8b,9b\n") + +#define __asm_copy_from_user_14x_cont(to, from, ret, COPY, FIXUP, TENTRY) \ + __asm_copy_from_user_12x_cont(to, from, ret, \ + " GETW D1Ar1,[%1++]\n" \ + "8: SETW [%0++],D1Ar1\n" COPY, \ + "9: ADD %2,%2,#2\n" \ + " SETW [%0++],D1Ar1\n" FIXUP, \ + " .long 8b,9b\n" TENTRY) + +#define __asm_copy_from_user_14(to, from, ret) \ + __asm_copy_from_user_14x_cont(to, from, ret, "", "", "") + +#define __asm_copy_from_user_15(to, from, ret) \ + __asm_copy_from_user_14x_cont(to, from, ret, \ + " GETB D1Ar1,[%1++]\n" \ + "10: SETB [%0++],D1Ar1\n", \ + "11: ADD %2,%2,#1\n" \ + " SETB [%0++],D1Ar1\n", \ + " .long 10b,11b\n") + +#define __asm_copy_from_user_16x_cont(to, from, ret, COPY, FIXUP, TENTRY) \ + __asm_copy_from_user_12x_cont(to, from, ret, \ + " GETD D1Ar1,[%1++]\n" \ + "8: SETD [%0++],D1Ar1\n" COPY, \ + "9: ADD %2,%2,#4\n" \ + " SETD [%0++],D1Ar1\n" FIXUP, \ + " .long 8b,9b\n" TENTRY) + +#define __asm_copy_from_user_16(to, from, ret) \ + __asm_copy_from_user_16x_cont(to, from, ret, "", "", "") + +#define __asm_copy_from_user_8x64(to, from, ret) \ + asm volatile ( \ + " GETL D0Ar2,D1Ar1,[%1++]\n" \ + "2: SETL [%0++],D0Ar2,D1Ar1\n" \ + "1:\n" \ + " .section .fixup,\"ax\"\n" \ + " MOV D1Ar1,#0\n" \ + " MOV D0Ar2,#0\n" \ + "3: ADD %2,%2,#8\n" \ + " SETL [%0++],D0Ar2,D1Ar1\n" \ + " MOVT D0Ar2,#HI(1b)\n" \ + " JUMP D0Ar2,#LO(1b)\n" \ + " .previous\n" \ + " .section __ex_table,\"a\"\n" \ + " .long 2b,3b\n" \ + " .previous\n" \ + : "=a" (to), "=r" (from), "=r" (ret) \ + : "0" (to), "1" (from), "2" (ret) \ + : "D1Ar1", "D0Ar2", "memory") + +/* rewind 'from' pointer when a fault occurs + * + * Rationale: + * A fault occurs while reading from user buffer, which is the + * source. Since the fault is at a single address, we only + * need to rewind by 8 bytes. + * Since we don't write to kernel buffer until we read first, + * the kernel buffer is at the right state and needn't be + * corrected. + */ +#define __asm_copy_from_user_64bit_rapf_loop(to, from, ret, n, id) \ + __asm_copy_user_64bit_rapf_loop(to, from, ret, n, id, \ + "SUB %1, %1, #8\n") + +/* rewind 'from' pointer when a fault occurs + * + * Rationale: + * A fault occurs while reading from user buffer, which is the + * source. Since the fault is at a single address, we only + * need to rewind by 4 bytes. + * Since we don't write to kernel buffer until we read first, + * the kernel buffer is at the right state and needn't be + * corrected. + */ +#define __asm_copy_from_user_32bit_rapf_loop(to, from, ret, n, id) \ + __asm_copy_user_32bit_rapf_loop(to, from, ret, n, id, \ + "SUB %1, %1, #4\n") + + +/* Copy from user to kernel, zeroing the bytes that were inaccessible in + userland. The return-value is the number of bytes that were + inaccessible. */ +unsigned long __copy_user_zeroing(void *pdst, const void __user *psrc, + unsigned long n) +{ + register char *dst asm ("A0.2") = pdst; + register const char __user *src asm ("A1.2") = psrc; + unsigned long retn = 0; + + if (n == 0) + return 0; + + if ((unsigned long) src & 1) { + __asm_copy_from_user_1(dst, src, retn); + n--; + } + if ((unsigned long) dst & 1) { + /* Worst case - byte copy */ + while (n > 0) { + __asm_copy_from_user_1(dst, src, retn); + n--; + if (retn) + goto copy_exception_bytes; + } + } + if (((unsigned long) src & 2) && n >= 2) { + __asm_copy_from_user_2(dst, src, retn); + n -= 2; + } + if ((unsigned long) dst & 2) { + /* Second worst case - word copy */ + while (n >= 2) { + __asm_copy_from_user_2(dst, src, retn); + n -= 2; + if (retn) + goto copy_exception_bytes; + } + } + + /* We only need one check after the unalignment-adjustments, + because if both adjustments were done, either both or + neither reference had an exception. */ + if (retn != 0) + goto copy_exception_bytes; + +#ifdef USE_RAPF + /* 64 bit copy loop */ + if (!(((unsigned long) src | (unsigned long) dst) & 7)) { + if (n >= RAPF_MIN_BUF_SIZE) { + /* Copy using fast 64bit rapf */ + __asm_copy_from_user_64bit_rapf_loop(dst, src, retn, + n, "64cuz"); + } + while (n >= 8) { + __asm_copy_from_user_8x64(dst, src, retn); + n -= 8; + if (retn) + goto copy_exception_bytes; + } + } + + if (n >= RAPF_MIN_BUF_SIZE) { + /* Copy using fast 32bit rapf */ + __asm_copy_from_user_32bit_rapf_loop(dst, src, retn, + n, "32cuz"); + } +#else + /* 64 bit copy loop */ + if (!(((unsigned long) src | (unsigned long) dst) & 7)) { + while (n >= 8) { + __asm_copy_from_user_8x64(dst, src, retn); + n -= 8; + if (retn) + goto copy_exception_bytes; + } + } +#endif + + while (n >= 4) { + __asm_copy_from_user_4(dst, src, retn); + n -= 4; + + if (retn) + goto copy_exception_bytes; + } + + /* If we get here, there were no memory read faults. */ + switch (n) { + /* These copies are at least "naturally aligned" (so we don't + have to check each byte), due to the src alignment code. + The *_3 case *will* get the correct count for retn. */ + case 0: + /* This case deliberately left in (if you have doubts check the + generated assembly code). */ + break; + case 1: + __asm_copy_from_user_1(dst, src, retn); + break; + case 2: + __asm_copy_from_user_2(dst, src, retn); + break; + case 3: + __asm_copy_from_user_3(dst, src, retn); + break; + } + + /* If we get here, retn correctly reflects the number of failing + bytes. */ + return retn; + + copy_exception_bytes: + /* We already have "retn" bytes cleared, and need to clear the + remaining "n" bytes. A non-optimized simple byte-for-byte in-line + memset is preferred here, since this isn't speed-critical code and + we'd rather have this a leaf-function than calling memset. */ + { + char *endp; + for (endp = dst + n; dst < endp; dst++) + *dst = 0; + } + + return retn + n; +} +EXPORT_SYMBOL(__copy_user_zeroing); + +#define __asm_clear_8x64(to, ret) \ + asm volatile ( \ + " MOV D0Ar2,#0\n" \ + " MOV D1Ar1,#0\n" \ + " SETL [%0],D0Ar2,D1Ar1\n" \ + "2: SETL [%0++],D0Ar2,D1Ar1\n" \ + "1:\n" \ + " .section .fixup,\"ax\"\n" \ + "3: ADD %1,%1,#8\n" \ + " MOVT D0Ar2,#HI(1b)\n" \ + " JUMP D0Ar2,#LO(1b)\n" \ + " .previous\n" \ + " .section __ex_table,\"a\"\n" \ + " .long 2b,3b\n" \ + " .previous\n" \ + : "=r" (to), "=r" (ret) \ + : "0" (to), "1" (ret) \ + : "D1Ar1", "D0Ar2", "memory") + +/* Zero userspace. */ + +#define __asm_clear(to, ret, CLEAR, FIXUP, TENTRY) \ + asm volatile ( \ + " MOV D1Ar1,#0\n" \ + CLEAR \ + "1:\n" \ + " .section .fixup,\"ax\"\n" \ + FIXUP \ + " MOVT D1Ar1,#HI(1b)\n" \ + " JUMP D1Ar1,#LO(1b)\n" \ + " .previous\n" \ + " .section __ex_table,\"a\"\n" \ + TENTRY \ + " .previous" \ + : "=r" (to), "=r" (ret) \ + : "0" (to), "1" (ret) \ + : "D1Ar1", "memory") + +#define __asm_clear_1(to, ret) \ + __asm_clear(to, ret, \ + " SETB [%0],D1Ar1\n" \ + "2: SETB [%0++],D1Ar1\n", \ + "3: ADD %1,%1,#1\n", \ + " .long 2b,3b\n") + +#define __asm_clear_2(to, ret) \ + __asm_clear(to, ret, \ + " SETW [%0],D1Ar1\n" \ + "2: SETW [%0++],D1Ar1\n", \ + "3: ADD %1,%1,#2\n", \ + " .long 2b,3b\n") + +#define __asm_clear_3(to, ret) \ + __asm_clear(to, ret, \ + "2: SETW [%0++],D1Ar1\n" \ + " SETB [%0],D1Ar1\n" \ + "3: SETB [%0++],D1Ar1\n", \ + "4: ADD %1,%1,#2\n" \ + "5: ADD %1,%1,#1\n", \ + " .long 2b,4b\n" \ + " .long 3b,5b\n") + +#define __asm_clear_4x_cont(to, ret, CLEAR, FIXUP, TENTRY) \ + __asm_clear(to, ret, \ + " SETD [%0],D1Ar1\n" \ + "2: SETD [%0++],D1Ar1\n" CLEAR, \ + "3: ADD %1,%1,#4\n" FIXUP, \ + " .long 2b,3b\n" TENTRY) + +#define __asm_clear_4(to, ret) \ + __asm_clear_4x_cont(to, ret, "", "", "") + +#define __asm_clear_8x_cont(to, ret, CLEAR, FIXUP, TENTRY) \ + __asm_clear_4x_cont(to, ret, \ + " SETD [%0],D1Ar1\n" \ + "4: SETD [%0++],D1Ar1\n" CLEAR, \ + "5: ADD %1,%1,#4\n" FIXUP, \ + " .long 4b,5b\n" TENTRY) + +#define __asm_clear_8(to, ret) \ + __asm_clear_8x_cont(to, ret, "", "", "") + +#define __asm_clear_12x_cont(to, ret, CLEAR, FIXUP, TENTRY) \ + __asm_clear_8x_cont(to, ret, \ + " SETD [%0],D1Ar1\n" \ + "6: SETD [%0++],D1Ar1\n" CLEAR, \ + "7: ADD %1,%1,#4\n" FIXUP, \ + " .long 6b,7b\n" TENTRY) + +#define __asm_clear_12(to, ret) \ + __asm_clear_12x_cont(to, ret, "", "", "") + +#define __asm_clear_16x_cont(to, ret, CLEAR, FIXUP, TENTRY) \ + __asm_clear_12x_cont(to, ret, \ + " SETD [%0],D1Ar1\n" \ + "8: SETD [%0++],D1Ar1\n" CLEAR, \ + "9: ADD %1,%1,#4\n" FIXUP, \ + " .long 8b,9b\n" TENTRY) + +#define __asm_clear_16(to, ret) \ + __asm_clear_16x_cont(to, ret, "", "", "") + +unsigned long __do_clear_user(void __user *pto, unsigned long pn) +{ + register char __user *dst asm ("D0Re0") = pto; + register unsigned long n asm ("D1Re0") = pn; + register unsigned long retn asm ("D0Ar6") = 0; + + if ((unsigned long) dst & 1) { + __asm_clear_1(dst, retn); + n--; + } + + if ((unsigned long) dst & 2) { + __asm_clear_2(dst, retn); + n -= 2; + } + + /* 64 bit copy loop */ + if (!((__force unsigned long) dst & 7)) { + while (n >= 8) { + __asm_clear_8x64(dst, retn); + n -= 8; + } + } + + while (n >= 16) { + __asm_clear_16(dst, retn); + n -= 16; + } + + while (n >= 4) { + __asm_clear_4(dst, retn); + n -= 4; + } + + switch (n) { + case 0: + break; + case 1: + __asm_clear_1(dst, retn); + break; + case 2: + __asm_clear_2(dst, retn); + break; + case 3: + __asm_clear_3(dst, retn); + break; + } + + return retn; +} +EXPORT_SYMBOL(__do_clear_user); + +unsigned char __get_user_asm_b(const void __user *addr, long *err) +{ + register unsigned char x asm ("D0Re0") = 0; + asm volatile ( + " GETB %0,[%2]\n" + "1:\n" + " GETB %0,[%2]\n" + "2:\n" + " .section .fixup,\"ax\"\n" + "3: MOV D0FrT,%3\n" + " SETD [%1],D0FrT\n" + " MOVT D0FrT,#HI(2b)\n" + " JUMP D0FrT,#LO(2b)\n" + " .previous\n" + " .section __ex_table,\"a\"\n" + " .long 1b,3b\n" + " .previous\n" + : "=r" (x) + : "r" (err), "r" (addr), "P" (-EFAULT) + : "D0FrT"); + return x; +} +EXPORT_SYMBOL(__get_user_asm_b); + +unsigned short __get_user_asm_w(const void __user *addr, long *err) +{ + register unsigned short x asm ("D0Re0") = 0; + asm volatile ( + " GETW %0,[%2]\n" + "1:\n" + " GETW %0,[%2]\n" + "2:\n" + " .section .fixup,\"ax\"\n" + "3: MOV D0FrT,%3\n" + " SETD [%1],D0FrT\n" + " MOVT D0FrT,#HI(2b)\n" + " JUMP D0FrT,#LO(2b)\n" + " .previous\n" + " .section __ex_table,\"a\"\n" + " .long 1b,3b\n" + " .previous\n" + : "=r" (x) + : "r" (err), "r" (addr), "P" (-EFAULT) + : "D0FrT"); + return x; +} +EXPORT_SYMBOL(__get_user_asm_w); + +unsigned int __get_user_asm_d(const void __user *addr, long *err) +{ + register unsigned int x asm ("D0Re0") = 0; + asm volatile ( + " GETD %0,[%2]\n" + "1:\n" + " GETD %0,[%2]\n" + "2:\n" + " .section .fixup,\"ax\"\n" + "3: MOV D0FrT,%3\n" + " SETD [%1],D0FrT\n" + " MOVT D0FrT,#HI(2b)\n" + " JUMP D0FrT,#LO(2b)\n" + " .previous\n" + " .section __ex_table,\"a\"\n" + " .long 1b,3b\n" + " .previous\n" + : "=r" (x) + : "r" (err), "r" (addr), "P" (-EFAULT) + : "D0FrT"); + return x; +} +EXPORT_SYMBOL(__get_user_asm_d); + +long __put_user_asm_b(unsigned int x, void __user *addr) +{ + register unsigned int err asm ("D0Re0") = 0; + asm volatile ( + " MOV %0,#0\n" + " SETB [%2],%1\n" + "1:\n" + " SETB [%2],%1\n" + "2:\n" + ".section .fixup,\"ax\"\n" + "3: MOV %0,%3\n" + " MOVT D0FrT,#HI(2b)\n" + " JUMP D0FrT,#LO(2b)\n" + ".previous\n" + ".section __ex_table,\"a\"\n" + " .long 1b,3b\n" + ".previous" + : "=r"(err) + : "d" (x), "a" (addr), "P"(-EFAULT) + : "D0FrT"); + return err; +} +EXPORT_SYMBOL(__put_user_asm_b); + +long __put_user_asm_w(unsigned int x, void __user *addr) +{ + register unsigned int err asm ("D0Re0") = 0; + asm volatile ( + " MOV %0,#0\n" + " SETW [%2],%1\n" + "1:\n" + " SETW [%2],%1\n" + "2:\n" + ".section .fixup,\"ax\"\n" + "3: MOV %0,%3\n" + " MOVT D0FrT,#HI(2b)\n" + " JUMP D0FrT,#LO(2b)\n" + ".previous\n" + ".section __ex_table,\"a\"\n" + " .long 1b,3b\n" + ".previous" + : "=r"(err) + : "d" (x), "a" (addr), "P"(-EFAULT) + : "D0FrT"); + return err; +} +EXPORT_SYMBOL(__put_user_asm_w); + +long __put_user_asm_d(unsigned int x, void __user *addr) +{ + register unsigned int err asm ("D0Re0") = 0; + asm volatile ( + " MOV %0,#0\n" + " SETD [%2],%1\n" + "1:\n" + " SETD [%2],%1\n" + "2:\n" + ".section .fixup,\"ax\"\n" + "3: MOV %0,%3\n" + " MOVT D0FrT,#HI(2b)\n" + " JUMP D0FrT,#LO(2b)\n" + ".previous\n" + ".section __ex_table,\"a\"\n" + " .long 1b,3b\n" + ".previous" + : "=r"(err) + : "d" (x), "a" (addr), "P"(-EFAULT) + : "D0FrT"); + return err; +} +EXPORT_SYMBOL(__put_user_asm_d); + +long __put_user_asm_l(unsigned long long x, void __user *addr) +{ + register unsigned int err asm ("D0Re0") = 0; + asm volatile ( + " MOV %0,#0\n" + " SETL [%2],%1,%t1\n" + "1:\n" + " SETL [%2],%1,%t1\n" + "2:\n" + ".section .fixup,\"ax\"\n" + "3: MOV %0,%3\n" + " MOVT D0FrT,#HI(2b)\n" + " JUMP D0FrT,#LO(2b)\n" + ".previous\n" + ".section __ex_table,\"a\"\n" + " .long 1b,3b\n" + ".previous" + : "=r"(err) + : "d" (x), "a" (addr), "P"(-EFAULT) + : "D0FrT"); + return err; +} +EXPORT_SYMBOL(__put_user_asm_l); + +long strnlen_user(const char __user *src, long count) +{ + long res; + + if (!access_ok(VERIFY_READ, src, 0)) + return 0; + + asm volatile (" MOV D0Ar4, %1\n" + " MOV D0Ar6, %2\n" + "0:\n" + " SUBS D0FrT, D0Ar6, #0\n" + " SUB D0Ar6, D0Ar6, #1\n" + " BLE 2f\n" + " GETB D0FrT, [D0Ar4+#1++]\n" + "1:\n" + " TST D0FrT, #255\n" + " BNE 0b\n" + "2:\n" + " SUB %0, %2, D0Ar6\n" + "3:\n" + " .section .fixup,\"ax\"\n" + "4:\n" + " MOV %0, #0\n" + " MOVT D0FrT,#HI(3b)\n" + " JUMP D0FrT,#LO(3b)\n" + " .previous\n" + " .section __ex_table,\"a\"\n" + " .long 1b,4b\n" + " .previous\n" + : "=r" (res) + : "r" (src), "r" (count) + : "D0FrT", "D0Ar4", "D0Ar6", "cc"); + + return res; +} +EXPORT_SYMBOL(strnlen_user); + +long __strncpy_from_user(char *dst, const char __user *src, long count) +{ + long res; + + if (count == 0) + return 0; + + /* + * Currently, in 2.4.0-test9, most ports use a simple byte-copy loop. + * So do we. + * + * This code is deduced from: + * + * char tmp2; + * long tmp1, tmp3; + * tmp1 = count; + * while ((*dst++ = (tmp2 = *src++)) != 0 + * && --tmp1) + * ; + * + * res = count - tmp1; + * + * with tweaks. + */ + + asm volatile (" MOV %0,%3\n" + "1:\n" + " GETB D0FrT,[%2++]\n" + "2:\n" + " CMP D0FrT,#0\n" + " SETB [%1++],D0FrT\n" + " BEQ 3f\n" + " SUBS %0,%0,#1\n" + " BNZ 1b\n" + "3:\n" + " SUB %0,%3,%0\n" + "4:\n" + " .section .fixup,\"ax\"\n" + "5:\n" + " MOV %0,%7\n" + " MOVT D0FrT,#HI(4b)\n" + " JUMP D0FrT,#LO(4b)\n" + " .previous\n" + " .section __ex_table,\"a\"\n" + " .long 2b,5b\n" + " .previous" + : "=r" (res), "=r" (dst), "=r" (src), "=r" (count) + : "3" (count), "1" (dst), "2" (src), "P" (-EFAULT) + : "D0FrT", "memory", "cc"); + + return res; +} +EXPORT_SYMBOL(__strncpy_from_user); diff --git a/arch/metag/mm/Kconfig b/arch/metag/mm/Kconfig new file mode 100644 index 000000000000..cd7f2f2ad416 --- /dev/null +++ b/arch/metag/mm/Kconfig @@ -0,0 +1,153 @@ +menu "Memory management options" + +config PAGE_OFFSET + hex "Kernel page offset address" + default "0x40000000" + help + This option allows you to set the virtual address at which the + kernel will be mapped to. +endmenu + +config KERNEL_4M_PAGES + bool "Map kernel with 4MB pages" + depends on METAG_META21_MMU + default y + help + Map the kernel with large pages to reduce TLB pressure. + +choice + prompt "User page size" + default PAGE_SIZE_4K + +config PAGE_SIZE_4K + bool "4kB" + help + This is the default page size used by all Meta cores. + +config PAGE_SIZE_8K + bool "8kB" + depends on METAG_META21_MMU + help + This enables 8kB pages as supported by Meta 2.x and later MMUs. + +config PAGE_SIZE_16K + bool "16kB" + depends on METAG_META21_MMU + help + This enables 16kB pages as supported by Meta 2.x and later MMUs. + +endchoice + +config NUMA + bool "Non Uniform Memory Access (NUMA) Support" + help + Some Meta systems have MMU-mappable on-chip memories with + lower latencies than main memory. This enables support for + these blocks by binding them to nodes and allowing + memory policies to be used for prioritizing and controlling + allocation behaviour. + +config FORCE_MAX_ZONEORDER + int "Maximum zone order" + range 10 32 + default "10" + help + The kernel memory allocator divides physically contiguous memory + blocks into "zones", where each zone is a power of two number of + pages. This option selects the largest power of two that the kernel + keeps in the memory allocator. If you need to allocate very large + blocks of physically contiguous memory, then you may need to + increase this value. + + This config option is actually maximum order plus one. For example, + a value of 11 means that the largest free memory block is 2^10 pages. + + The page size is not necessarily 4KB. Keep this in mind + when choosing a value for this option. + +config METAG_L2C + bool "Level 2 Cache Support" + depends on METAG_META21 + help + Press y here to enable support for the Meta Level 2 (L2) cache. This + will enable the cache at start up if it hasn't already been enabled + by the bootloader. + + If the bootloader enables the L2 you must press y here to ensure the + kernel takes the appropriate actions to keep the cache coherent. + +config NODES_SHIFT + int + default "1" + depends on NEED_MULTIPLE_NODES + +config ARCH_FLATMEM_ENABLE + def_bool y + depends on !NUMA + +config ARCH_SPARSEMEM_ENABLE + def_bool y + select SPARSEMEM_STATIC + +config ARCH_SPARSEMEM_DEFAULT + def_bool y + +config MAX_ACTIVE_REGIONS + int + default "2" if SPARSEMEM + default "1" + +config ARCH_POPULATES_NODE_MAP + def_bool y + +config ARCH_SELECT_MEMORY_MODEL + def_bool y + +config SYS_SUPPORTS_HUGETLBFS + def_bool y + depends on METAG_META21_MMU + +choice + prompt "HugeTLB page size" + depends on METAG_META21_MMU && HUGETLB_PAGE + default HUGETLB_PAGE_SIZE_1M + +config HUGETLB_PAGE_SIZE_8K + bool "8kB" + depends on PAGE_SIZE_4K + +config HUGETLB_PAGE_SIZE_16K + bool "16kB" + depends on PAGE_SIZE_4K || PAGE_SIZE_8K + +config HUGETLB_PAGE_SIZE_32K + bool "32kB" + +config HUGETLB_PAGE_SIZE_64K + bool "64kB" + +config HUGETLB_PAGE_SIZE_128K + bool "128kB" + +config HUGETLB_PAGE_SIZE_256K + bool "256kB" + +config HUGETLB_PAGE_SIZE_512K + bool "512kB" + +config HUGETLB_PAGE_SIZE_1M + bool "1MB" + +config HUGETLB_PAGE_SIZE_2M + bool "2MB" + +config HUGETLB_PAGE_SIZE_4M + bool "4MB" + +endchoice + +config METAG_COREMEM + bool + default y if SUSPEND + +source "mm/Kconfig" diff --git a/arch/metag/mm/Makefile b/arch/metag/mm/Makefile new file mode 100644 index 000000000000..994331164125 --- /dev/null +++ b/arch/metag/mm/Makefile @@ -0,0 +1,19 @@ +# +# Makefile for the linux Meta-specific parts of the memory manager. +# + +obj-y += cache.o +obj-y += extable.o +obj-y += fault.o +obj-y += init.o +obj-y += ioremap.o +obj-y += maccess.o + +mmu-y := mmu-meta1.o +mmu-$(CONFIG_METAG_META21_MMU) := mmu-meta2.o +obj-y += $(mmu-y) + +obj-$(CONFIG_HIGHMEM) += highmem.o +obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o +obj-$(CONFIG_METAG_L2C) += l2cache.o +obj-$(CONFIG_NUMA) += numa.o diff --git a/arch/metag/mm/cache.c b/arch/metag/mm/cache.c new file mode 100644 index 000000000000..b5d3b2e7c160 --- /dev/null +++ b/arch/metag/mm/cache.c @@ -0,0 +1,521 @@ +/* + * arch/metag/mm/cache.c + * + * Copyright (C) 2001, 2002, 2005, 2007, 2012 Imagination Technologies. + * + * This program is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License version 2 as published by the + * Free Software Foundation. + * + * Cache control code + */ + +#include <linux/export.h> +#include <linux/io.h> +#include <asm/cacheflush.h> +#include <asm/core_reg.h> +#include <asm/global_lock.h> +#include <asm/metag_isa.h> +#include <asm/metag_mem.h> +#include <asm/metag_regs.h> + +#define DEFAULT_CACHE_WAYS_LOG2 2 + +/* + * Size of a set in the caches. Initialised for default 16K stride, adjusted + * according to values passed through TBI global heap segment via LDLK (on ATP) + * or config registers (on HTP/MTP) + */ +static int dcache_set_shift = METAG_TBI_CACHE_SIZE_BASE_LOG2 + - DEFAULT_CACHE_WAYS_LOG2; +static int icache_set_shift = METAG_TBI_CACHE_SIZE_BASE_LOG2 + - DEFAULT_CACHE_WAYS_LOG2; +/* + * The number of sets in the caches. Initialised for HTP/ATP, adjusted + * according to NOMMU setting in config registers + */ +static unsigned char dcache_sets_log2 = DEFAULT_CACHE_WAYS_LOG2; +static unsigned char icache_sets_log2 = DEFAULT_CACHE_WAYS_LOG2; + +#ifndef CONFIG_METAG_META12 +/** + * metag_lnkget_probe() - Probe whether lnkget/lnkset go around the cache + */ +static volatile u32 lnkget_testdata[16] __initdata __aligned(64); + +#define LNKGET_CONSTANT 0xdeadbeef + +void __init metag_lnkget_probe(void) +{ + int temp; + long flags; + + /* + * It's conceivable the user has configured a globally coherent cache + * shared with non-Linux hardware threads, so use LOCK2 to prevent them + * from executing and causing cache eviction during the test. + */ + __global_lock2(flags); + + /* read a value to bring it into the cache */ + (void)lnkget_testdata[0]; + lnkget_testdata[0] = 0; + + /* lnkget/lnkset it to modify it */ + asm volatile( + "1: LNKGETD %0, [%1]\n" + " LNKSETD [%1], %2\n" + " DEFR %0, TXSTAT\n" + " ANDT %0, %0, #HI(0x3f000000)\n" + " CMPT %0, #HI(0x02000000)\n" + " BNZ 1b\n" + : "=&d" (temp) + : "da" (&lnkget_testdata[0]), "bd" (LNKGET_CONSTANT) + : "cc"); + + /* re-read it to see if the cached value changed */ + temp = lnkget_testdata[0]; + + __global_unlock2(flags); + + /* flush the cache line to fix any incoherency */ + __builtin_dcache_flush((void *)&lnkget_testdata[0]); + +#if defined(CONFIG_METAG_LNKGET_AROUND_CACHE) + /* if the cache is right, LNKGET_AROUND_CACHE is unnecessary */ + if (temp == LNKGET_CONSTANT) + pr_info("LNKGET/SET go through cache but CONFIG_METAG_LNKGET_AROUND_CACHE=y\n"); +#elif defined(CONFIG_METAG_ATOMICITY_LNKGET) + /* + * if the cache is wrong, LNKGET_AROUND_CACHE is really necessary + * because the kernel is configured to use LNKGET/SET for atomicity + */ + WARN(temp != LNKGET_CONSTANT, + "LNKGET/SET go around cache but CONFIG_METAG_LNKGET_AROUND_CACHE=n\n" + "Expect kernel failure as it's used for atomicity primitives\n"); +#elif defined(CONFIG_SMP) + /* + * if the cache is wrong, LNKGET_AROUND_CACHE should be used or the + * gateway page won't flush and userland could break. + */ + WARN(temp != LNKGET_CONSTANT, + "LNKGET/SET go around cache but CONFIG_METAG_LNKGET_AROUND_CACHE=n\n" + "Expect userland failure as it's used for user gateway page\n"); +#else + /* + * if the cache is wrong, LNKGET_AROUND_CACHE is set wrong, but it + * doesn't actually matter as it doesn't have any effect on !SMP && + * !ATOMICITY_LNKGET. + */ + if (temp != LNKGET_CONSTANT) + pr_warn("LNKGET/SET go around cache but CONFIG_METAG_LNKGET_AROUND_CACHE=n\n"); +#endif +} +#endif /* !CONFIG_METAG_META12 */ + +/** + * metag_cache_probe() - Probe L1 cache configuration. + * + * Probe the L1 cache configuration to aid the L1 physical cache flushing + * functions. + */ +void __init metag_cache_probe(void) +{ +#ifndef CONFIG_METAG_META12 + int coreid = metag_in32(METAC_CORE_ID); + int config = metag_in32(METAC_CORE_CONFIG2); + int cfgcache = coreid & METAC_COREID_CFGCACHE_BITS; + + if (cfgcache == METAC_COREID_CFGCACHE_TYPE0 || + cfgcache == METAC_COREID_CFGCACHE_PRIVNOMMU) { + icache_sets_log2 = 1; + dcache_sets_log2 = 1; + } + + /* For normal size caches, the smallest size is 4Kb. + For small caches, the smallest size is 64b */ + icache_set_shift = (config & METAC_CORECFG2_ICSMALL_BIT) + ? 6 : 12; + icache_set_shift += (config & METAC_CORE_C2ICSZ_BITS) + >> METAC_CORE_C2ICSZ_S; + icache_set_shift -= icache_sets_log2; + + dcache_set_shift = (config & METAC_CORECFG2_DCSMALL_BIT) + ? 6 : 12; + dcache_set_shift += (config & METAC_CORECFG2_DCSZ_BITS) + >> METAC_CORECFG2_DCSZ_S; + dcache_set_shift -= dcache_sets_log2; + + metag_lnkget_probe(); +#else + /* Extract cache sizes from global heap segment */ + unsigned long val, u; + int width, shift, addend; + PTBISEG seg; + + seg = __TBIFindSeg(NULL, TBID_SEG(TBID_THREAD_GLOBAL, + TBID_SEGSCOPE_GLOBAL, + TBID_SEGTYPE_HEAP)); + if (seg != NULL) { + val = seg->Data[1]; + + /* Work out width of I-cache size bit-field */ + u = ((unsigned long) METAG_TBI_ICACHE_SIZE_BITS) + >> METAG_TBI_ICACHE_SIZE_S; + width = 0; + while (u & 1) { + width++; + u >>= 1; + } + /* Extract sign-extended size addend value */ + shift = 32 - (METAG_TBI_ICACHE_SIZE_S + width); + addend = (long) ((val & METAG_TBI_ICACHE_SIZE_BITS) + << shift) + >> (shift + METAG_TBI_ICACHE_SIZE_S); + /* Now calculate I-cache set size */ + icache_set_shift = (METAG_TBI_CACHE_SIZE_BASE_LOG2 + - DEFAULT_CACHE_WAYS_LOG2) + + addend; + + /* Similarly for D-cache */ + u = ((unsigned long) METAG_TBI_DCACHE_SIZE_BITS) + >> METAG_TBI_DCACHE_SIZE_S; + width = 0; + while (u & 1) { + width++; + u >>= 1; + } + shift = 32 - (METAG_TBI_DCACHE_SIZE_S + width); + addend = (long) ((val & METAG_TBI_DCACHE_SIZE_BITS) + << shift) + >> (shift + METAG_TBI_DCACHE_SIZE_S); + dcache_set_shift = (METAG_TBI_CACHE_SIZE_BASE_LOG2 + - DEFAULT_CACHE_WAYS_LOG2) + + addend; + } +#endif +} + +static void metag_phys_data_cache_flush(const void *start) +{ + unsigned long flush0, flush1, flush2, flush3; + int loops, step; + int thread; + int part, offset; + int set_shift; + + /* Use a sequence of writes to flush the cache region requested */ + thread = (__core_reg_get(TXENABLE) & TXENABLE_THREAD_BITS) + >> TXENABLE_THREAD_S; + + /* Cache is broken into sets which lie in contiguous RAMs */ + set_shift = dcache_set_shift; + + /* Move to the base of the physical cache flush region */ + flush0 = LINSYSCFLUSH_DCACHE_LINE; + step = 64; + + /* Get partition data for this thread */ + part = metag_in32(SYSC_DCPART0 + + (SYSC_xCPARTn_STRIDE * thread)); + + if ((int)start < 0) + /* Access Global vs Local partition */ + part >>= SYSC_xCPARTG_AND_S + - SYSC_xCPARTL_AND_S; + + /* Extract offset and move SetOff */ + offset = (part & SYSC_xCPARTL_OR_BITS) + >> SYSC_xCPARTL_OR_S; + flush0 += (offset << (set_shift - 4)); + + /* Shrink size */ + part = (part & SYSC_xCPARTL_AND_BITS) + >> SYSC_xCPARTL_AND_S; + loops = ((part + 1) << (set_shift - 4)); + + /* Reduce loops by step of cache line size */ + loops /= step; + + flush1 = flush0 + (1 << set_shift); + flush2 = flush0 + (2 << set_shift); + flush3 = flush0 + (3 << set_shift); + + if (dcache_sets_log2 == 1) { + flush2 = flush1; + flush3 = flush1 + step; + flush1 = flush0 + step; + step <<= 1; + loops >>= 1; + } + + /* Clear loops ways in cache */ + while (loops-- != 0) { + /* Clear the ways. */ +#if 0 + /* + * GCC doesn't generate very good code for this so we + * provide inline assembly instead. + */ + metag_out8(0, flush0); + metag_out8(0, flush1); + metag_out8(0, flush2); + metag_out8(0, flush3); + + flush0 += step; + flush1 += step; + flush2 += step; + flush3 += step; +#else + asm volatile ( + "SETB\t[%0+%4++],%5\n" + "SETB\t[%1+%4++],%5\n" + "SETB\t[%2+%4++],%5\n" + "SETB\t[%3+%4++],%5\n" + : "+e" (flush0), + "+e" (flush1), + "+e" (flush2), + "+e" (flush3) + : "e" (step), "a" (0)); +#endif + } +} + +void metag_data_cache_flush_all(const void *start) +{ + if ((metag_in32(SYSC_CACHE_MMU_CONFIG) & SYSC_CMMUCFG_DC_ON_BIT) == 0) + /* No need to flush the data cache it's not actually enabled */ + return; + + metag_phys_data_cache_flush(start); +} + +void metag_data_cache_flush(const void *start, int bytes) +{ + unsigned long flush0; + int loops, step; + + if ((metag_in32(SYSC_CACHE_MMU_CONFIG) & SYSC_CMMUCFG_DC_ON_BIT) == 0) + /* No need to flush the data cache it's not actually enabled */ + return; + + if (bytes >= 4096) { + metag_phys_data_cache_flush(start); + return; + } + + /* Use linear cache flush mechanism on META IP */ + flush0 = (int)start; + loops = ((int)start & (DCACHE_LINE_BYTES - 1)) + bytes + + (DCACHE_LINE_BYTES - 1); + loops >>= DCACHE_LINE_S; + +#define PRIM_FLUSH(addr, offset) do { \ + int __addr = ((int) (addr)) + ((offset) * 64); \ + __builtin_dcache_flush((void *)(__addr)); \ + } while (0) + +#define LOOP_INC (4*64) + + do { + /* By default stop */ + step = 0; + + switch (loops) { + /* Drop Thru Cases! */ + default: + PRIM_FLUSH(flush0, 3); + loops -= 4; + step = 1; + case 3: + PRIM_FLUSH(flush0, 2); + case 2: + PRIM_FLUSH(flush0, 1); + case 1: + PRIM_FLUSH(flush0, 0); + flush0 += LOOP_INC; + case 0: + break; + } + } while (step); +} +EXPORT_SYMBOL(metag_data_cache_flush); + +static void metag_phys_code_cache_flush(const void *start, int bytes) +{ + unsigned long flush0, flush1, flush2, flush3, end_set; + int loops, step; + int thread; + int set_shift, set_size; + int part, offset; + + /* Use a sequence of writes to flush the cache region requested */ + thread = (__core_reg_get(TXENABLE) & TXENABLE_THREAD_BITS) + >> TXENABLE_THREAD_S; + set_shift = icache_set_shift; + + /* Move to the base of the physical cache flush region */ + flush0 = LINSYSCFLUSH_ICACHE_LINE; + step = 64; + + /* Get partition code for this thread */ + part = metag_in32(SYSC_ICPART0 + + (SYSC_xCPARTn_STRIDE * thread)); + + if ((int)start < 0) + /* Access Global vs Local partition */ + part >>= SYSC_xCPARTG_AND_S-SYSC_xCPARTL_AND_S; + + /* Extract offset and move SetOff */ + offset = (part & SYSC_xCPARTL_OR_BITS) + >> SYSC_xCPARTL_OR_S; + flush0 += (offset << (set_shift - 4)); + + /* Shrink size */ + part = (part & SYSC_xCPARTL_AND_BITS) + >> SYSC_xCPARTL_AND_S; + loops = ((part + 1) << (set_shift - 4)); + + /* Where does the Set end? */ + end_set = flush0 + loops; + set_size = loops; + +#ifdef CONFIG_METAG_META12 + if ((bytes < 4096) && (bytes < loops)) { + /* Unreachable on HTP/MTP */ + /* Only target the sets that could be relavent */ + flush0 += (loops - step) & ((int) start); + loops = (((int) start) & (step-1)) + bytes + step - 1; + } +#endif + + /* Reduce loops by step of cache line size */ + loops /= step; + + flush1 = flush0 + (1<<set_shift); + flush2 = flush0 + (2<<set_shift); + flush3 = flush0 + (3<<set_shift); + + if (icache_sets_log2 == 1) { + flush2 = flush1; + flush3 = flush1 + step; + flush1 = flush0 + step; +#if 0 + /* flush0 will stop one line early in this case + * (flush1 will do the final line). + * However we don't correct end_set here at the moment + * because it will never wrap on HTP/MTP + */ + end_set -= step; +#endif + step <<= 1; + loops >>= 1; + } + + /* Clear loops ways in cache */ + while (loops-- != 0) { +#if 0 + /* + * GCC doesn't generate very good code for this so we + * provide inline assembly instead. + */ + /* Clear the ways */ + metag_out8(0, flush0); + metag_out8(0, flush1); + metag_out8(0, flush2); + metag_out8(0, flush3); + + flush0 += step; + flush1 += step; + flush2 += step; + flush3 += step; +#else + asm volatile ( + "SETB\t[%0+%4++],%5\n" + "SETB\t[%1+%4++],%5\n" + "SETB\t[%2+%4++],%5\n" + "SETB\t[%3+%4++],%5\n" + : "+e" (flush0), + "+e" (flush1), + "+e" (flush2), + "+e" (flush3) + : "e" (step), "a" (0)); +#endif + + if (flush0 == end_set) { + /* Wrap within Set 0 */ + flush0 -= set_size; + flush1 -= set_size; + flush2 -= set_size; + flush3 -= set_size; + } + } +} + +void metag_code_cache_flush_all(const void *start) +{ + if ((metag_in32(SYSC_CACHE_MMU_CONFIG) & SYSC_CMMUCFG_IC_ON_BIT) == 0) + /* No need to flush the code cache it's not actually enabled */ + return; + + metag_phys_code_cache_flush(start, 4096); +} +EXPORT_SYMBOL(metag_code_cache_flush_all); + +void metag_code_cache_flush(const void *start, int bytes) +{ +#ifndef CONFIG_METAG_META12 + void *flush; + int loops, step; +#endif /* !CONFIG_METAG_META12 */ + + if ((metag_in32(SYSC_CACHE_MMU_CONFIG) & SYSC_CMMUCFG_IC_ON_BIT) == 0) + /* No need to flush the code cache it's not actually enabled */ + return; + +#ifdef CONFIG_METAG_META12 + /* CACHEWD isn't available on Meta1, so always do full cache flush */ + metag_phys_code_cache_flush(start, bytes); + +#else /* CONFIG_METAG_META12 */ + /* If large size do full physical cache flush */ + if (bytes >= 4096) { + metag_phys_code_cache_flush(start, bytes); + return; + } + + /* Use linear cache flush mechanism on META IP */ + flush = (void *)((int)start & ~(ICACHE_LINE_BYTES-1)); + loops = ((int)start & (ICACHE_LINE_BYTES-1)) + bytes + + (ICACHE_LINE_BYTES-1); + loops >>= ICACHE_LINE_S; + +#define PRIM_IFLUSH(addr, offset) \ + __builtin_meta2_cachewd(((addr) + ((offset) * 64)), CACHEW_ICACHE_BIT) + +#define LOOP_INC (4*64) + + do { + /* By default stop */ + step = 0; + + switch (loops) { + /* Drop Thru Cases! */ + default: + PRIM_IFLUSH(flush, 3); + loops -= 4; + step = 1; + case 3: + PRIM_IFLUSH(flush, 2); + case 2: + PRIM_IFLUSH(flush, 1); + case 1: + PRIM_IFLUSH(flush, 0); + flush += LOOP_INC; + case 0: + break; + } + } while (step); +#endif /* !CONFIG_METAG_META12 */ +} +EXPORT_SYMBOL(metag_code_cache_flush); diff --git a/arch/metag/mm/extable.c b/arch/metag/mm/extable.c new file mode 100644 index 000000000000..2a21eaebe84d --- /dev/null +++ b/arch/metag/mm/extable.c @@ -0,0 +1,15 @@ + +#include <linux/module.h> +#include <linux/uaccess.h> + +int fixup_exception(struct pt_regs *regs) +{ + const struct exception_table_entry *fixup; + unsigned long pc = instruction_pointer(regs); + + fixup = search_exception_tables(pc); + if (fixup) + regs->ctx.CurrPC = fixup->fixup; + + return fixup != NULL; +} diff --git a/arch/metag/mm/fault.c b/arch/metag/mm/fault.c new file mode 100644 index 000000000000..2c75bf7357c5 --- /dev/null +++ b/arch/metag/mm/fault.c @@ -0,0 +1,239 @@ +/* + * Meta page fault handling. + * + * Copyright (C) 2005-2012 Imagination Technologies Ltd. + */ + +#include <linux/mman.h> +#include <linux/mm.h> +#include <linux/kernel.h> +#include <linux/ptrace.h> +#include <linux/interrupt.h> +#include <linux/uaccess.h> + +#include <asm/tlbflush.h> +#include <asm/mmu.h> +#include <asm/traps.h> + +/* Clear any pending catch buffer state. */ +static void clear_cbuf_entry(struct pt_regs *regs, unsigned long addr, + unsigned int trapno) +{ + PTBICTXEXTCB0 cbuf = regs->extcb0; + + switch (trapno) { + /* Instruction fetch faults leave no catch buffer state. */ + case TBIXXF_SIGNUM_IGF: + case TBIXXF_SIGNUM_IPF: + return; + default: + if (cbuf[0].CBAddr == addr) { + cbuf[0].CBAddr = 0; + cbuf[0].CBFlags &= ~TXCATCH0_FAULT_BITS; + + /* And, as this is the ONLY catch entry, we + * need to clear the cbuf bit from the context! + */ + regs->ctx.SaveMask &= ~(TBICTX_CBUF_BIT | + TBICTX_XCBF_BIT); + + return; + } + pr_err("Failed to clear cbuf entry!\n"); + } +} + +int show_unhandled_signals = 1; + +int do_page_fault(struct pt_regs *regs, unsigned long address, + unsigned int write_access, unsigned int trapno) +{ + struct task_struct *tsk; + struct mm_struct *mm; + struct vm_area_struct *vma, *prev_vma; + siginfo_t info; + int fault; + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | + (write_access ? FAULT_FLAG_WRITE : 0); + + tsk = current; + + if ((address >= VMALLOC_START) && (address < VMALLOC_END)) { + /* + * Synchronize this task's top level page-table + * with the 'reference' page table. + * + * Do _not_ use "tsk" here. We might be inside + * an interrupt in the middle of a task switch.. + */ + int offset = pgd_index(address); + pgd_t *pgd, *pgd_k; + pud_t *pud, *pud_k; + pmd_t *pmd, *pmd_k; + pte_t *pte_k; + + pgd = ((pgd_t *)mmu_get_base()) + offset; + pgd_k = swapper_pg_dir + offset; + + /* This will never happen with the folded page table. */ + if (!pgd_present(*pgd)) { + if (!pgd_present(*pgd_k)) + goto bad_area_nosemaphore; + set_pgd(pgd, *pgd_k); + return 0; + } + + pud = pud_offset(pgd, address); + pud_k = pud_offset(pgd_k, address); + if (!pud_present(*pud_k)) + goto bad_area_nosemaphore; + set_pud(pud, *pud_k); + + pmd = pmd_offset(pud, address); + pmd_k = pmd_offset(pud_k, address); + if (!pmd_present(*pmd_k)) + goto bad_area_nosemaphore; + set_pmd(pmd, *pmd_k); + + pte_k = pte_offset_kernel(pmd_k, address); + if (!pte_present(*pte_k)) + goto bad_area_nosemaphore; + + /* May only be needed on Chorus2 */ + flush_tlb_all(); + return 0; + } + + mm = tsk->mm; + + if (in_atomic() || !mm) + goto no_context; + +retry: + down_read(&mm->mmap_sem); + + vma = find_vma_prev(mm, address, &prev_vma); + + if (!vma || address < vma->vm_start) + goto check_expansion; + +good_area: + if (write_access) { + if (!(vma->vm_flags & VM_WRITE)) + goto bad_area; + } else { + if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))) + goto bad_area; + } + + /* + * If for any reason at all we couldn't handle the fault, + * make sure we exit gracefully rather than endlessly redo + * the fault. + */ + fault = handle_mm_fault(mm, vma, address, flags); + + if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) + return 0; + + if (unlikely(fault & VM_FAULT_ERROR)) { + if (fault & VM_FAULT_OOM) + goto out_of_memory; + else if (fault & VM_FAULT_SIGBUS) + goto do_sigbus; + BUG(); + } + if (flags & FAULT_FLAG_ALLOW_RETRY) { + if (fault & VM_FAULT_MAJOR) + tsk->maj_flt++; + else + tsk->min_flt++; + if (fault & VM_FAULT_RETRY) { + flags &= ~FAULT_FLAG_ALLOW_RETRY; + flags |= FAULT_FLAG_TRIED; + + /* + * No need to up_read(&mm->mmap_sem) as we would + * have already released it in __lock_page_or_retry + * in mm/filemap.c. + */ + + goto retry; + } + } + + up_read(&mm->mmap_sem); + return 0; + +check_expansion: + vma = prev_vma; + if (vma && (expand_stack(vma, address) == 0)) + goto good_area; + +bad_area: + up_read(&mm->mmap_sem); + +bad_area_nosemaphore: + if (user_mode(regs)) { + info.si_signo = SIGSEGV; + info.si_errno = 0; + info.si_code = SEGV_MAPERR; + info.si_addr = (__force void __user *)address; + info.si_trapno = trapno; + + if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && + printk_ratelimit()) { + pr_info("%s%s[%d]: segfault at %lx pc %08x sp %08x write %d trap %#x (%s)", + task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG, + tsk->comm, task_pid_nr(tsk), address, + regs->ctx.CurrPC, regs->ctx.AX[0].U0, + write_access, trapno, trap_name(trapno)); + print_vma_addr(" in ", regs->ctx.CurrPC); + print_vma_addr(" rtp in ", regs->ctx.DX[4].U1); + printk("\n"); + show_regs(regs); + } + force_sig_info(SIGSEGV, &info, tsk); + return 1; + } + goto no_context; + +do_sigbus: + up_read(&mm->mmap_sem); + + /* + * Send a sigbus, regardless of whether we were in kernel + * or user mode. + */ + info.si_signo = SIGBUS; + info.si_errno = 0; + info.si_code = BUS_ADRERR; + info.si_addr = (__force void __user *)address; + info.si_trapno = trapno; + force_sig_info(SIGBUS, &info, tsk); + + /* Kernel mode? Handle exceptions or die */ + if (!user_mode(regs)) + goto no_context; + + return 1; + + /* + * We ran out of memory, or some other thing happened to us that made + * us unable to handle the page fault gracefully. + */ +out_of_memory: + up_read(&mm->mmap_sem); + if (user_mode(regs)) + do_group_exit(SIGKILL); + +no_context: + /* Are we prepared to handle this kernel fault? */ + if (fixup_exception(regs)) { + clear_cbuf_entry(regs, address, trapno); + return 1; + } + + die("Oops", regs, (write_access << 15) | trapno, address); + do_exit(SIGKILL); +} diff --git a/arch/metag/mm/highmem.c b/arch/metag/mm/highmem.c new file mode 100644 index 000000000000..d71f621a2c0b --- /dev/null +++ b/arch/metag/mm/highmem.c @@ -0,0 +1,133 @@ +#include <linux/export.h> +#include <linux/highmem.h> +#include <linux/sched.h> +#include <linux/smp.h> +#include <linux/interrupt.h> +#include <asm/fixmap.h> +#include <asm/tlbflush.h> + +static pte_t *kmap_pte; + +unsigned long highstart_pfn, highend_pfn; + +void *kmap(struct page *page) +{ + might_sleep(); + if (!PageHighMem(page)) + return page_address(page); + return kmap_high(page); +} +EXPORT_SYMBOL(kmap); + +void kunmap(struct page *page) +{ + BUG_ON(in_interrupt()); + if (!PageHighMem(page)) + return; + kunmap_high(page); +} +EXPORT_SYMBOL(kunmap); + +/* + * kmap_atomic/kunmap_atomic is significantly faster than kmap/kunmap because + * no global lock is needed and because the kmap code must perform a global TLB + * invalidation when the kmap pool wraps. + * + * However when holding an atomic kmap is is not legal to sleep, so atomic + * kmaps are appropriate for short, tight code paths only. + */ + +void *kmap_atomic(struct page *page) +{ + enum fixed_addresses idx; + unsigned long vaddr; + int type; + + /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */ + pagefault_disable(); + if (!PageHighMem(page)) + return page_address(page); + + type = kmap_atomic_idx_push(); + idx = type + KM_TYPE_NR * smp_processor_id(); + vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); +#ifdef CONFIG_DEBUG_HIGHMEM + BUG_ON(!pte_none(*(kmap_pte - idx))); +#endif + set_pte(kmap_pte - idx, mk_pte(page, PAGE_KERNEL)); + + return (void *)vaddr; +} +EXPORT_SYMBOL(kmap_atomic); + +void __kunmap_atomic(void *kvaddr) +{ + unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; + int idx, type; + + if (kvaddr >= (void *)FIXADDR_START) { + type = kmap_atomic_idx(); + idx = type + KM_TYPE_NR * smp_processor_id(); + + /* + * Force other mappings to Oops if they'll try to access this + * pte without first remap it. Keeping stale mappings around + * is a bad idea also, in case the page changes cacheability + * attributes or becomes a protected page in a hypervisor. + */ + pte_clear(&init_mm, vaddr, kmap_pte-idx); + flush_tlb_kernel_range(vaddr, vaddr + PAGE_SIZE); + + kmap_atomic_idx_pop(); + } + + pagefault_enable(); +} +EXPORT_SYMBOL(__kunmap_atomic); + +/* + * This is the same as kmap_atomic() but can map memory that doesn't + * have a struct page associated with it. + */ +void *kmap_atomic_pfn(unsigned long pfn) +{ + enum fixed_addresses idx; + unsigned long vaddr; + int type; + + pagefault_disable(); + + type = kmap_atomic_idx_push(); + idx = type + KM_TYPE_NR * smp_processor_id(); + vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); +#ifdef CONFIG_DEBUG_HIGHMEM + BUG_ON(!pte_none(*(kmap_pte - idx))); +#endif + set_pte(kmap_pte - idx, pfn_pte(pfn, PAGE_KERNEL)); + flush_tlb_kernel_range(vaddr, vaddr + PAGE_SIZE); + + return (void *)vaddr; +} + +struct page *kmap_atomic_to_page(void *ptr) +{ + unsigned long vaddr = (unsigned long)ptr; + int idx; + pte_t *pte; + + if (vaddr < FIXADDR_START) + return virt_to_page(ptr); + + idx = virt_to_fix(vaddr); + pte = kmap_pte - (idx - FIX_KMAP_BEGIN); + return pte_page(*pte); +} + +void __init kmap_init(void) +{ + unsigned long kmap_vstart; + + /* cache the first kmap pte */ + kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN); + kmap_pte = kmap_get_fixmap_pte(kmap_vstart); +} diff --git a/arch/metag/mm/hugetlbpage.c b/arch/metag/mm/hugetlbpage.c new file mode 100644 index 000000000000..3c52fa6d0f8e --- /dev/null +++ b/arch/metag/mm/hugetlbpage.c @@ -0,0 +1,259 @@ +/* + * arch/metag/mm/hugetlbpage.c + * + * METAG HugeTLB page support. + * + * Cloned from SuperH + * + * Cloned from sparc64 by Paul Mundt. + * + * Copyright (C) 2002, 2003 David S. Miller (davem@redhat.com) + */ + +#include <linux/init.h> +#include <linux/fs.h> +#include <linux/mm.h> +#include <linux/hugetlb.h> +#include <linux/pagemap.h> +#include <linux/sysctl.h> + +#include <asm/mman.h> +#include <asm/pgalloc.h> +#include <asm/tlb.h> +#include <asm/tlbflush.h> +#include <asm/cacheflush.h> + +/* + * If the arch doesn't supply something else, assume that hugepage + * size aligned regions are ok without further preparation. + */ +int prepare_hugepage_range(struct file *file, unsigned long addr, + unsigned long len) +{ + struct mm_struct *mm = current->mm; + struct hstate *h = hstate_file(file); + struct vm_area_struct *vma; + + if (len & ~huge_page_mask(h)) + return -EINVAL; + if (addr & ~huge_page_mask(h)) + return -EINVAL; + if (TASK_SIZE - len < addr) + return -EINVAL; + + vma = find_vma(mm, ALIGN_HUGEPT(addr)); + if (vma && !(vma->vm_flags & MAP_HUGETLB)) + return -EINVAL; + + vma = find_vma(mm, addr); + if (vma) { + if (addr + len > vma->vm_start) + return -EINVAL; + if (!(vma->vm_flags & MAP_HUGETLB) && + (ALIGN_HUGEPT(addr + len) > vma->vm_start)) + return -EINVAL; + } + return 0; +} + +pte_t *huge_pte_alloc(struct mm_struct *mm, + unsigned long addr, unsigned long sz) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + + pgd = pgd_offset(mm, addr); + pud = pud_offset(pgd, addr); + pmd = pmd_offset(pud, addr); + pte = pte_alloc_map(mm, NULL, pmd, addr); + pgd->pgd &= ~_PAGE_SZ_MASK; + pgd->pgd |= _PAGE_SZHUGE; + + return pte; +} + +pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte = NULL; + + pgd = pgd_offset(mm, addr); + pud = pud_offset(pgd, addr); + pmd = pmd_offset(pud, addr); + pte = pte_offset_kernel(pmd, addr); + + return pte; +} + +int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) +{ + return 0; +} + +struct page *follow_huge_addr(struct mm_struct *mm, + unsigned long address, int write) +{ + return ERR_PTR(-EINVAL); +} + +int pmd_huge(pmd_t pmd) +{ + return pmd_page_shift(pmd) > PAGE_SHIFT; +} + +int pud_huge(pud_t pud) +{ + return 0; +} + +struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, + pmd_t *pmd, int write) +{ + return NULL; +} + +#ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA + +/* + * Look for an unmapped area starting after another hugetlb vma. + * There are guaranteed to be no huge pte's spare if all the huge pages are + * full size (4MB), so in that case compile out this search. + */ +#if HPAGE_SHIFT == HUGEPT_SHIFT +static inline unsigned long +hugetlb_get_unmapped_area_existing(unsigned long len) +{ + return 0; +} +#else +static unsigned long +hugetlb_get_unmapped_area_existing(unsigned long len) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + unsigned long start_addr, addr; + int after_huge; + + if (mm->context.part_huge) { + start_addr = mm->context.part_huge; + after_huge = 1; + } else { + start_addr = TASK_UNMAPPED_BASE; + after_huge = 0; + } +new_search: + addr = start_addr; + + for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { + if ((!vma && !after_huge) || TASK_SIZE - len < addr) { + /* + * Start a new search - just in case we missed + * some holes. + */ + if (start_addr != TASK_UNMAPPED_BASE) { + start_addr = TASK_UNMAPPED_BASE; + goto new_search; + } + return 0; + } + /* skip ahead if we've aligned right over some vmas */ + if (vma && vma->vm_end <= addr) + continue; + /* space before the next vma? */ + if (after_huge && (!vma || ALIGN_HUGEPT(addr + len) + <= vma->vm_start)) { + unsigned long end = addr + len; + if (end & HUGEPT_MASK) + mm->context.part_huge = end; + else if (addr == mm->context.part_huge) + mm->context.part_huge = 0; + return addr; + } + if (vma && (vma->vm_flags & MAP_HUGETLB)) { + /* space after a huge vma in 2nd level page table? */ + if (vma->vm_end & HUGEPT_MASK) { + after_huge = 1; + /* no need to align to the next PT block */ + addr = vma->vm_end; + continue; + } + } + after_huge = 0; + addr = ALIGN_HUGEPT(vma->vm_end); + } +} +#endif + +/* Do a full search to find an area without any nearby normal pages. */ +static unsigned long +hugetlb_get_unmapped_area_new_pmd(unsigned long len) +{ + struct vm_unmapped_area_info info; + + info.flags = 0; + info.length = len; + info.low_limit = TASK_UNMAPPED_BASE; + info.high_limit = TASK_SIZE; + info.align_mask = PAGE_MASK & HUGEPT_MASK; + info.align_offset = 0; + return vm_unmapped_area(&info); +} + +unsigned long +hugetlb_get_unmapped_area(struct file *file, unsigned long addr, + unsigned long len, unsigned long pgoff, unsigned long flags) +{ + struct hstate *h = hstate_file(file); + + if (len & ~huge_page_mask(h)) + return -EINVAL; + if (len > TASK_SIZE) + return -ENOMEM; + + if (flags & MAP_FIXED) { + if (prepare_hugepage_range(file, addr, len)) + return -EINVAL; + return addr; + } + + if (addr) { + addr = ALIGN(addr, huge_page_size(h)); + if (!prepare_hugepage_range(file, addr, len)) + return addr; + } + + /* + * Look for an existing hugetlb vma with space after it (this is to to + * minimise fragmentation caused by huge pages. + */ + addr = hugetlb_get_unmapped_area_existing(len); + if (addr) + return addr; + + /* + * Find an unmapped naturally aligned set of 4MB blocks that we can use + * for huge pages. + */ + return hugetlb_get_unmapped_area_new_pmd(len); +} + +#endif /*HAVE_ARCH_HUGETLB_UNMAPPED_AREA*/ + +/* necessary for boot time 4MB huge page allocation */ +static __init int setup_hugepagesz(char *opt) +{ + unsigned long ps = memparse(opt, &opt); + if (ps == (1 << HPAGE_SHIFT)) { + hugetlb_add_hstate(HPAGE_SHIFT - PAGE_SHIFT); + } else { + pr_err("hugepagesz: Unsupported page size %lu M\n", + ps >> 20); + return 0; + } + return 1; +} +__setup("hugepagesz=", setup_hugepagesz); diff --git a/arch/metag/mm/init.c b/arch/metag/mm/init.c new file mode 100644 index 000000000000..504a398d5f8b --- /dev/null +++ b/arch/metag/mm/init.c @@ -0,0 +1,451 @@ +/* + * Copyright (C) 2005,2006,2007,2008,2009,2010 Imagination Technologies + * + */ + +#include <linux/export.h> +#include <linux/mm.h> +#include <linux/swap.h> +#include <linux/init.h> +#include <linux/bootmem.h> +#include <linux/pagemap.h> +#include <linux/percpu.h> +#include <linux/memblock.h> +#include <linux/initrd.h> +#include <linux/of_fdt.h> + +#include <asm/setup.h> +#include <asm/page.h> +#include <asm/pgalloc.h> +#include <asm/mmu.h> +#include <asm/mmu_context.h> +#include <asm/sections.h> +#include <asm/tlb.h> +#include <asm/user_gateway.h> +#include <asm/mmzone.h> +#include <asm/fixmap.h> + +unsigned long pfn_base; +EXPORT_SYMBOL(pfn_base); + +pgd_t swapper_pg_dir[PTRS_PER_PGD] __page_aligned_data; + +unsigned long empty_zero_page; +EXPORT_SYMBOL(empty_zero_page); + +extern char __user_gateway_start; +extern char __user_gateway_end; + +void *gateway_page; + +/* + * Insert the gateway page into a set of page tables, creating the + * page tables if necessary. + */ +static void insert_gateway_page(pgd_t *pgd, unsigned long address) +{ + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + + BUG_ON(!pgd_present(*pgd)); + + pud = pud_offset(pgd, address); + BUG_ON(!pud_present(*pud)); + + pmd = pmd_offset(pud, address); + if (!pmd_present(*pmd)) { + pte = alloc_bootmem_pages(PAGE_SIZE); + set_pmd(pmd, __pmd(_PAGE_TABLE | __pa(pte))); + } + + pte = pte_offset_kernel(pmd, address); + set_pte(pte, pfn_pte(__pa(gateway_page) >> PAGE_SHIFT, PAGE_READONLY)); +} + +/* Alloc and map a page in a known location accessible to userspace. */ +static void __init user_gateway_init(void) +{ + unsigned long address = USER_GATEWAY_PAGE; + int offset = pgd_index(address); + pgd_t *pgd; + + gateway_page = alloc_bootmem_pages(PAGE_SIZE); + + pgd = swapper_pg_dir + offset; + insert_gateway_page(pgd, address); + +#ifdef CONFIG_METAG_META12 + /* + * Insert the gateway page into our current page tables even + * though we've already inserted it into our reference page + * table (swapper_pg_dir). This is because with a META1 mmu we + * copy just the user address range and not the gateway page + * entry on context switch, see switch_mmu(). + */ + pgd = (pgd_t *)mmu_get_base() + offset; + insert_gateway_page(pgd, address); +#endif /* CONFIG_METAG_META12 */ + + BUG_ON((&__user_gateway_end - &__user_gateway_start) > PAGE_SIZE); + + gateway_page += (address & ~PAGE_MASK); + + memcpy(gateway_page, &__user_gateway_start, + &__user_gateway_end - &__user_gateway_start); + + /* + * We don't need to flush the TLB here, there should be no mapping + * present at boot for this address and only valid mappings are in + * the TLB (apart from on Meta 1.x, but those cached invalid + * mappings should be impossible to hit here). + * + * We don't flush the code cache here even though we have written + * code through the data cache and they may not be coherent. At + * this point we assume there is no stale data in the code cache + * for this address so there is no need to flush. + */ +} + +static void __init allocate_pgdat(unsigned int nid) +{ + unsigned long start_pfn, end_pfn; +#ifdef CONFIG_NEED_MULTIPLE_NODES + unsigned long phys; +#endif + + get_pfn_range_for_nid(nid, &start_pfn, &end_pfn); + +#ifdef CONFIG_NEED_MULTIPLE_NODES + phys = __memblock_alloc_base(sizeof(struct pglist_data), + SMP_CACHE_BYTES, end_pfn << PAGE_SHIFT); + /* Retry with all of system memory */ + if (!phys) + phys = __memblock_alloc_base(sizeof(struct pglist_data), + SMP_CACHE_BYTES, + memblock_end_of_DRAM()); + if (!phys) + panic("Can't allocate pgdat for node %d\n", nid); + + NODE_DATA(nid) = __va(phys); + memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); + + NODE_DATA(nid)->bdata = &bootmem_node_data[nid]; +#endif + + NODE_DATA(nid)->node_start_pfn = start_pfn; + NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn; +} + +static void __init bootmem_init_one_node(unsigned int nid) +{ + unsigned long total_pages, paddr; + unsigned long end_pfn; + struct pglist_data *p; + + p = NODE_DATA(nid); + + /* Nothing to do.. */ + if (!p->node_spanned_pages) + return; + + end_pfn = p->node_start_pfn + p->node_spanned_pages; +#ifdef CONFIG_HIGHMEM + if (end_pfn > max_low_pfn) + end_pfn = max_low_pfn; +#endif + + total_pages = bootmem_bootmap_pages(end_pfn - p->node_start_pfn); + + paddr = memblock_alloc(total_pages << PAGE_SHIFT, PAGE_SIZE); + if (!paddr) + panic("Can't allocate bootmap for nid[%d]\n", nid); + + init_bootmem_node(p, paddr >> PAGE_SHIFT, p->node_start_pfn, end_pfn); + + free_bootmem_with_active_regions(nid, end_pfn); + + /* + * XXX Handle initial reservations for the system memory node + * only for the moment, we'll refactor this later for handling + * reservations in other nodes. + */ + if (nid == 0) { + struct memblock_region *reg; + + /* Reserve the sections we're already using. */ + for_each_memblock(reserved, reg) { + unsigned long size = reg->size; + +#ifdef CONFIG_HIGHMEM + /* ...but not highmem */ + if (PFN_DOWN(reg->base) >= highstart_pfn) + continue; + + if (PFN_UP(reg->base + size) > highstart_pfn) + size = (highstart_pfn - PFN_DOWN(reg->base)) + << PAGE_SHIFT; +#endif + + reserve_bootmem(reg->base, size, BOOTMEM_DEFAULT); + } + } + + sparse_memory_present_with_active_regions(nid); +} + +static void __init do_init_bootmem(void) +{ + struct memblock_region *reg; + int i; + + /* Add active regions with valid PFNs. */ + for_each_memblock(memory, reg) { + unsigned long start_pfn, end_pfn; + start_pfn = memblock_region_memory_base_pfn(reg); + end_pfn = memblock_region_memory_end_pfn(reg); + memblock_set_node(PFN_PHYS(start_pfn), + PFN_PHYS(end_pfn - start_pfn), 0); + } + + /* All of system RAM sits in node 0 for the non-NUMA case */ + allocate_pgdat(0); + node_set_online(0); + + soc_mem_setup(); + + for_each_online_node(i) + bootmem_init_one_node(i); + + sparse_init(); +} + +extern char _heap_start[]; + +static void __init init_and_reserve_mem(void) +{ + unsigned long start_pfn, heap_start; + u64 base = min_low_pfn << PAGE_SHIFT; + u64 size = (max_low_pfn << PAGE_SHIFT) - base; + + heap_start = (unsigned long) &_heap_start; + + memblock_add(base, size); + + /* + * Partially used pages are not usable - thus + * we are rounding upwards: + */ + start_pfn = PFN_UP(__pa(heap_start)); + + /* + * Reserve the kernel text. + */ + memblock_reserve(base, (PFN_PHYS(start_pfn) + PAGE_SIZE - 1) - base); + +#ifdef CONFIG_HIGHMEM + /* + * Add & reserve highmem, so page structures are initialised. + */ + base = highstart_pfn << PAGE_SHIFT; + size = (highend_pfn << PAGE_SHIFT) - base; + if (size) { + memblock_add(base, size); + memblock_reserve(base, size); + } +#endif +} + +#ifdef CONFIG_HIGHMEM +/* + * Ensure we have allocated page tables in swapper_pg_dir for the + * fixed mappings range from 'start' to 'end'. + */ +static void __init allocate_pgtables(unsigned long start, unsigned long end) +{ + pgd_t *pgd; + pmd_t *pmd; + pte_t *pte; + int i, j; + unsigned long vaddr; + + vaddr = start; + i = pgd_index(vaddr); + j = pmd_index(vaddr); + pgd = swapper_pg_dir + i; + + for ( ; (i < PTRS_PER_PGD) && (vaddr != end); pgd++, i++) { + pmd = (pmd_t *)pgd; + for (; (j < PTRS_PER_PMD) && (vaddr != end); pmd++, j++) { + vaddr += PMD_SIZE; + + if (!pmd_none(*pmd)) + continue; + + pte = (pte_t *)alloc_bootmem_low_pages(PAGE_SIZE); + pmd_populate_kernel(&init_mm, pmd, pte); + } + j = 0; + } +} + +static void __init fixedrange_init(void) +{ + unsigned long vaddr, end; + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + + /* + * Fixed mappings: + */ + vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK; + end = (FIXADDR_TOP + PMD_SIZE - 1) & PMD_MASK; + allocate_pgtables(vaddr, end); + + /* + * Permanent kmaps: + */ + vaddr = PKMAP_BASE; + allocate_pgtables(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP); + + pgd = swapper_pg_dir + pgd_index(vaddr); + pud = pud_offset(pgd, vaddr); + pmd = pmd_offset(pud, vaddr); + pte = pte_offset_kernel(pmd, vaddr); + pkmap_page_table = pte; +} +#endif /* CONFIG_HIGHMEM */ + +/* + * paging_init() continues the virtual memory environment setup which + * was begun by the code in arch/metag/kernel/setup.c. + */ +void __init paging_init(unsigned long mem_end) +{ + unsigned long max_zone_pfns[MAX_NR_ZONES]; + int nid; + + init_and_reserve_mem(); + + memblock_allow_resize(); + + memblock_dump_all(); + + nodes_clear(node_online_map); + + init_new_context(&init_task, &init_mm); + + memset(swapper_pg_dir, 0, sizeof(swapper_pg_dir)); + + do_init_bootmem(); + mmu_init(mem_end); + +#ifdef CONFIG_HIGHMEM + fixedrange_init(); + kmap_init(); +#endif + + /* Initialize the zero page to a bootmem page, already zeroed. */ + empty_zero_page = (unsigned long)alloc_bootmem_pages(PAGE_SIZE); + + user_gateway_init(); + + memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); + + for_each_online_node(nid) { + pg_data_t *pgdat = NODE_DATA(nid); + unsigned long low, start_pfn; + + start_pfn = pgdat->bdata->node_min_pfn; + low = pgdat->bdata->node_low_pfn; + + if (max_zone_pfns[ZONE_NORMAL] < low) + max_zone_pfns[ZONE_NORMAL] = low; + +#ifdef CONFIG_HIGHMEM + max_zone_pfns[ZONE_HIGHMEM] = highend_pfn; +#endif + pr_info("Node %u: start_pfn = 0x%lx, low = 0x%lx\n", + nid, start_pfn, low); + } + + free_area_init_nodes(max_zone_pfns); +} + +void __init mem_init(void) +{ + int nid; + +#ifdef CONFIG_HIGHMEM + unsigned long tmp; + for (tmp = highstart_pfn; tmp < highend_pfn; tmp++) { + struct page *page = pfn_to_page(tmp); + ClearPageReserved(page); + init_page_count(page); + __free_page(page); + totalhigh_pages++; + } + totalram_pages += totalhigh_pages; + num_physpages += totalhigh_pages; +#endif /* CONFIG_HIGHMEM */ + + for_each_online_node(nid) { + pg_data_t *pgdat = NODE_DATA(nid); + unsigned long node_pages = 0; + + num_physpages += pgdat->node_present_pages; + + if (pgdat->node_spanned_pages) + node_pages = free_all_bootmem_node(pgdat); + + totalram_pages += node_pages; + } + + pr_info("Memory: %luk/%luk available\n", + (unsigned long)nr_free_pages() << (PAGE_SHIFT - 10), + num_physpages << (PAGE_SHIFT - 10)); + + show_mem(0); + + return; +} + +static void free_init_pages(char *what, unsigned long begin, unsigned long end) +{ + unsigned long addr; + + for (addr = begin; addr < end; addr += PAGE_SIZE) { + ClearPageReserved(virt_to_page(addr)); + init_page_count(virt_to_page(addr)); + memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE); + free_page(addr); + totalram_pages++; + } + pr_info("Freeing %s: %luk freed\n", what, (end - begin) >> 10); +} + +void free_initmem(void) +{ + free_init_pages("unused kernel memory", + (unsigned long)(&__init_begin), + (unsigned long)(&__init_end)); +} + +#ifdef CONFIG_BLK_DEV_INITRD +void free_initrd_mem(unsigned long start, unsigned long end) +{ + end = end & PAGE_MASK; + free_init_pages("initrd memory", start, end); +} +#endif + +#ifdef CONFIG_OF_FLATTREE +void __init early_init_dt_setup_initrd_arch(unsigned long start, + unsigned long end) +{ + pr_err("%s(%lx, %lx)\n", + __func__, start, end); +} +#endif /* CONFIG_OF_FLATTREE */ diff --git a/arch/metag/mm/ioremap.c b/arch/metag/mm/ioremap.c new file mode 100644 index 000000000000..a136a435fdaa --- /dev/null +++ b/arch/metag/mm/ioremap.c @@ -0,0 +1,89 @@ +/* + * Re-map IO memory to kernel address space so that we can access it. + * Needed for memory-mapped I/O devices mapped outside our normal DRAM + * window (that is, all memory-mapped I/O devices). + * + * Copyright (C) 1995,1996 Linus Torvalds + * + * Meta port based on CRIS-port by Axis Communications AB + */ + +#include <linux/vmalloc.h> +#include <linux/io.h> +#include <linux/export.h> +#include <linux/slab.h> +#include <linux/mm.h> + +#include <asm/pgtable.h> + +/* + * Remap an arbitrary physical address space into the kernel virtual + * address space. Needed when the kernel wants to access high addresses + * directly. + * + * NOTE! We need to allow non-page-aligned mappings too: we will obviously + * have to convert them into an offset in a page-aligned mapping, but the + * caller shouldn't need to know that small detail. + */ +void __iomem *__ioremap(unsigned long phys_addr, size_t size, + unsigned long flags) +{ + unsigned long addr; + struct vm_struct *area; + unsigned long offset, last_addr; + pgprot_t prot; + + /* Don't allow wraparound or zero size */ + last_addr = phys_addr + size - 1; + if (!size || last_addr < phys_addr) + return NULL; + + /* Custom region addresses are accessible and uncached by default. */ + if (phys_addr >= LINSYSCUSTOM_BASE && + phys_addr < (LINSYSCUSTOM_BASE + LINSYSCUSTOM_LIMIT)) + return (__force void __iomem *) phys_addr; + + /* + * Mappings have to be page-aligned + */ + offset = phys_addr & ~PAGE_MASK; + phys_addr &= PAGE_MASK; + size = PAGE_ALIGN(last_addr+1) - phys_addr; + prot = __pgprot(_PAGE_PRESENT | _PAGE_WRITE | _PAGE_DIRTY | + _PAGE_ACCESSED | _PAGE_KERNEL | _PAGE_CACHE_WIN0 | + flags); + + /* + * Ok, go for it.. + */ + area = get_vm_area(size, VM_IOREMAP); + if (!area) + return NULL; + area->phys_addr = phys_addr; + addr = (unsigned long) area->addr; + if (ioremap_page_range(addr, addr + size, phys_addr, prot)) { + vunmap((void *) addr); + return NULL; + } + return (__force void __iomem *) (offset + (char *)addr); +} +EXPORT_SYMBOL(__ioremap); + +void __iounmap(void __iomem *addr) +{ + struct vm_struct *p; + + if ((__force unsigned long)addr >= LINSYSCUSTOM_BASE && + (__force unsigned long)addr < (LINSYSCUSTOM_BASE + + LINSYSCUSTOM_LIMIT)) + return; + + p = remove_vm_area((void *)(PAGE_MASK & (unsigned long __force)addr)); + if (unlikely(!p)) { + pr_err("iounmap: bad address %p\n", addr); + return; + } + + kfree(p); +} +EXPORT_SYMBOL(__iounmap); diff --git a/arch/metag/mm/l2cache.c b/arch/metag/mm/l2cache.c new file mode 100644 index 000000000000..c64ee615cf90 --- /dev/null +++ b/arch/metag/mm/l2cache.c @@ -0,0 +1,192 @@ +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/delay.h> + +#include <asm/l2cache.h> +#include <asm/metag_isa.h> + +/* If non-0, then initialise the L2 cache */ +static int l2cache_init = 1; +/* If non-0, then initialise the L2 cache prefetch */ +static int l2cache_init_pf = 1; + +int l2c_pfenable; + +static volatile u32 l2c_testdata[16] __initdata __aligned(64); + +static int __init parse_l2cache(char *p) +{ + char *cp = p; + + if (get_option(&cp, &l2cache_init) != 1) { + pr_err("Bad l2cache parameter (%s)\n", p); + return 1; + } + return 0; +} +early_param("l2cache", parse_l2cache); + +static int __init parse_l2cache_pf(char *p) +{ + char *cp = p; + + if (get_option(&cp, &l2cache_init_pf) != 1) { + pr_err("Bad l2cache_pf parameter (%s)\n", p); + return 1; + } + return 0; +} +early_param("l2cache_pf", parse_l2cache_pf); + +static int __init meta_l2c_setup(void) +{ + /* + * If the L2 cache isn't even present, don't do anything, but say so in + * the log. + */ + if (!meta_l2c_is_present()) { + pr_info("L2 Cache: Not present\n"); + return 0; + } + + /* + * Check whether the line size is recognised. + */ + if (!meta_l2c_linesize()) { + pr_warn_once("L2 Cache: unknown line size id (config=0x%08x)\n", + meta_l2c_config()); + } + + /* + * Initialise state. + */ + l2c_pfenable = _meta_l2c_pf_is_enabled(); + + /* + * Enable the L2 cache and print to log whether it was already enabled + * by the bootloader. + */ + if (l2cache_init) { + pr_info("L2 Cache: Enabling... "); + if (meta_l2c_enable()) + pr_cont("already enabled\n"); + else + pr_cont("done\n"); + } else { + pr_info("L2 Cache: Not enabling\n"); + } + + /* + * Enable L2 cache prefetch. + */ + if (l2cache_init_pf) { + pr_info("L2 Cache: Enabling prefetch... "); + if (meta_l2c_pf_enable(1)) + pr_cont("already enabled\n"); + else + pr_cont("done\n"); + } else { + pr_info("L2 Cache: Not enabling prefetch\n"); + } + + return 0; +} +core_initcall(meta_l2c_setup); + +int meta_l2c_disable(void) +{ + unsigned long flags; + int en; + + if (!meta_l2c_is_present()) + return 1; + + /* + * Prevent other threads writing during the writeback, otherwise the + * writes will get "lost" when the L2 is disabled. + */ + __global_lock2(flags); + en = meta_l2c_is_enabled(); + if (likely(en)) { + _meta_l2c_pf_enable(0); + wr_fence(); + _meta_l2c_purge(); + _meta_l2c_enable(0); + } + __global_unlock2(flags); + + return !en; +} + +int meta_l2c_enable(void) +{ + unsigned long flags; + int en; + + if (!meta_l2c_is_present()) + return 0; + + /* + * Init (clearing the L2) can happen while the L2 is disabled, so other + * threads are safe to continue executing, however we must not init the + * cache if it's already enabled (dirty lines would be discarded), so + * this operation should still be atomic with other threads. + */ + __global_lock1(flags); + en = meta_l2c_is_enabled(); + if (likely(!en)) { + _meta_l2c_init(); + _meta_l2c_enable(1); + _meta_l2c_pf_enable(l2c_pfenable); + } + __global_unlock1(flags); + + return en; +} + +int meta_l2c_pf_enable(int pfenable) +{ + unsigned long flags; + int en = l2c_pfenable; + + if (!meta_l2c_is_present()) + return 0; + + /* + * We read modify write the enable register, so this operation must be + * atomic with other threads. + */ + __global_lock1(flags); + en = l2c_pfenable; + l2c_pfenable = pfenable; + if (meta_l2c_is_enabled()) + _meta_l2c_pf_enable(pfenable); + __global_unlock1(flags); + + return en; +} + +int meta_l2c_flush(void) +{ + unsigned long flags; + int en; + + /* + * Prevent other threads writing during the writeback. This also + * involves read modify writes. + */ + __global_lock2(flags); + en = meta_l2c_is_enabled(); + if (likely(en)) { + _meta_l2c_pf_enable(0); + wr_fence(); + _meta_l2c_purge(); + _meta_l2c_enable(0); + _meta_l2c_init(); + _meta_l2c_enable(1); + _meta_l2c_pf_enable(l2c_pfenable); + } + __global_unlock2(flags); + + return !en; +} diff --git a/arch/metag/mm/maccess.c b/arch/metag/mm/maccess.c new file mode 100644 index 000000000000..eba2cfc935b1 --- /dev/null +++ b/arch/metag/mm/maccess.c @@ -0,0 +1,68 @@ +/* + * safe read and write memory routines callable while atomic + * + * Copyright 2012 Imagination Technologies + */ + +#include <linux/uaccess.h> +#include <asm/io.h> + +/* + * The generic probe_kernel_write() uses the user copy code which can split the + * writes if the source is unaligned, and repeats writes to make exceptions + * precise. We override it here to avoid these things happening to memory mapped + * IO memory where they could have undesired effects. + * Due to the use of CACHERD instruction this only works on Meta2 onwards. + */ +#ifdef CONFIG_METAG_META21 +long probe_kernel_write(void *dst, const void *src, size_t size) +{ + unsigned long ldst = (unsigned long)dst; + void __iomem *iodst = (void __iomem *)dst; + unsigned long lsrc = (unsigned long)src; + const u8 *psrc = (u8 *)src; + unsigned int pte, i; + u8 bounce[8] __aligned(8); + + if (!size) + return 0; + + /* Use the write combine bit to decide is the destination is MMIO. */ + pte = __builtin_meta2_cacherd(dst); + + /* Check the mapping is valid and writeable. */ + if ((pte & (MMCU_ENTRY_WR_BIT | MMCU_ENTRY_VAL_BIT)) + != (MMCU_ENTRY_WR_BIT | MMCU_ENTRY_VAL_BIT)) + return -EFAULT; + + /* Fall back to generic version for cases we're not interested in. */ + if (pte & MMCU_ENTRY_WRC_BIT || /* write combined memory */ + (ldst & (size - 1)) || /* destination unaligned */ + size > 8 || /* more than max write size */ + (size & (size - 1))) /* non power of 2 size */ + return __probe_kernel_write(dst, src, size); + + /* If src is unaligned, copy to the aligned bounce buffer first. */ + if (lsrc & (size - 1)) { + for (i = 0; i < size; ++i) + bounce[i] = psrc[i]; + psrc = bounce; + } + + switch (size) { + case 1: + writeb(*psrc, iodst); + break; + case 2: + writew(*(const u16 *)psrc, iodst); + break; + case 4: + writel(*(const u32 *)psrc, iodst); + break; + case 8: + writeq(*(const u64 *)psrc, iodst); + break; + } + return 0; +} +#endif diff --git a/arch/metag/mm/mmu-meta1.c b/arch/metag/mm/mmu-meta1.c new file mode 100644 index 000000000000..91f4255bcb5c --- /dev/null +++ b/arch/metag/mm/mmu-meta1.c @@ -0,0 +1,157 @@ +/* + * Copyright (C) 2005,2006,2007,2008,2009 Imagination Technologies + * + * Meta 1 MMU handling code. + * + */ + +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/io.h> + +#include <asm/mmu.h> + +#define DM3_BASE (LINSYSDIRECT_BASE + (MMCU_DIRECTMAPn_ADDR_SCALE * 3)) + +/* + * This contains the physical address of the top level 2k pgd table. + */ +static unsigned long mmu_base_phys; + +/* + * Given a physical address, return a mapped virtual address that can be used + * to access that location. + * In practice, we use the DirectMap region to make this happen. + */ +static unsigned long map_addr(unsigned long phys) +{ + static unsigned long dm_base = 0xFFFFFFFF; + int offset; + + offset = phys - dm_base; + + /* Are we in the current map range ? */ + if ((offset < 0) || (offset >= MMCU_DIRECTMAPn_ADDR_SCALE)) { + /* Calculate new DM area */ + dm_base = phys & ~(MMCU_DIRECTMAPn_ADDR_SCALE - 1); + + /* Actually map it in! */ + metag_out32(dm_base, MMCU_DIRECTMAP3_ADDR); + + /* And calculate how far into that area our reference is */ + offset = phys - dm_base; + } + + return DM3_BASE + offset; +} + +/* + * Return the physical address of the base of our pgd table. + */ +static inline unsigned long __get_mmu_base(void) +{ + unsigned long base_phys; + unsigned int stride; + + if (is_global_space(PAGE_OFFSET)) + stride = 4; + else + stride = hard_processor_id(); /* [0..3] */ + + base_phys = metag_in32(MMCU_TABLE_PHYS_ADDR); + base_phys += (0x800 * stride); + + return base_phys; +} + +/* Given a virtual address, return the virtual address of the relevant pgd */ +static unsigned long pgd_entry_addr(unsigned long virt) +{ + unsigned long pgd_phys; + unsigned long pgd_virt; + + if (!mmu_base_phys) + mmu_base_phys = __get_mmu_base(); + + /* + * Are we trying to map a global address. If so, then index + * the global pgd table instead of our local one. + */ + if (is_global_space(virt)) { + /* Scale into 2gig map */ + virt &= ~0x80000000; + } + + /* Base of the pgd table plus our 4Meg entry, 4bytes each */ + pgd_phys = mmu_base_phys + ((virt >> PGDIR_SHIFT) * 4); + + pgd_virt = map_addr(pgd_phys); + + return pgd_virt; +} + +/* Given a virtual address, return the virtual address of the relevant pte */ +static unsigned long pgtable_entry_addr(unsigned long virt) +{ + unsigned long pgtable_phys; + unsigned long pgtable_virt, pte_virt; + + /* Find the physical address of the 4MB page table*/ + pgtable_phys = metag_in32(pgd_entry_addr(virt)) & MMCU_ENTRY_ADDR_BITS; + + /* Map it to a virtual address */ + pgtable_virt = map_addr(pgtable_phys); + + /* And index into it for our pte */ + pte_virt = pgtable_virt + ((virt >> PAGE_SHIFT) & 0x3FF) * 4; + + return pte_virt; +} + +unsigned long mmu_read_first_level_page(unsigned long vaddr) +{ + return metag_in32(pgd_entry_addr(vaddr)); +} + +unsigned long mmu_read_second_level_page(unsigned long vaddr) +{ + return metag_in32(pgtable_entry_addr(vaddr)); +} + +unsigned long mmu_get_base(void) +{ + static unsigned long __base; + + /* Find the base of our MMU pgd table */ + if (!__base) + __base = pgd_entry_addr(0); + + return __base; +} + +void __init mmu_init(unsigned long mem_end) +{ + unsigned long entry, addr; + pgd_t *p_swapper_pg_dir; + + /* + * Now copy over any MMU pgd entries already in the mmu page tables + * over to our root init process (swapper_pg_dir) map. This map is + * then inherited by all other processes, which means all processes + * inherit a map of the kernel space. + */ + addr = PAGE_OFFSET; + entry = pgd_index(PAGE_OFFSET); + p_swapper_pg_dir = pgd_offset_k(0) + entry; + + while (addr <= META_MEMORY_LIMIT) { + unsigned long pgd_entry; + /* copy over the current MMU value */ + pgd_entry = mmu_read_first_level_page(addr); + pgd_val(*p_swapper_pg_dir) = pgd_entry; + + p_swapper_pg_dir++; + addr += PGDIR_SIZE; + entry++; + } +} diff --git a/arch/metag/mm/mmu-meta2.c b/arch/metag/mm/mmu-meta2.c new file mode 100644 index 000000000000..81dcbb0bba34 --- /dev/null +++ b/arch/metag/mm/mmu-meta2.c @@ -0,0 +1,207 @@ +/* + * Copyright (C) 2008,2009,2010,2011 Imagination Technologies Ltd. + * + * Meta 2 enhanced mode MMU handling code. + * + */ + +#include <linux/mm.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/io.h> +#include <linux/bootmem.h> +#include <linux/syscore_ops.h> + +#include <asm/mmu.h> +#include <asm/mmu_context.h> + +unsigned long mmu_read_first_level_page(unsigned long vaddr) +{ + unsigned int cpu = hard_processor_id(); + unsigned long offset, linear_base, linear_limit; + unsigned int phys0; + pgd_t *pgd, entry; + + if (is_global_space(vaddr)) + vaddr &= ~0x80000000; + + offset = vaddr >> PGDIR_SHIFT; + + phys0 = metag_in32(mmu_phys0_addr(cpu)); + + /* Top bit of linear base is always zero. */ + linear_base = (phys0 >> PGDIR_SHIFT) & 0x1ff; + + /* Limit in the range 0 (4MB) to 9 (2GB). */ + linear_limit = 1 << ((phys0 >> 8) & 0xf); + linear_limit += linear_base; + + /* + * If offset is below linear base or above the limit then no + * mapping exists. + */ + if (offset < linear_base || offset > linear_limit) + return 0; + + offset -= linear_base; + pgd = (pgd_t *)mmu_get_base(); + entry = pgd[offset]; + + return pgd_val(entry); +} + +unsigned long mmu_read_second_level_page(unsigned long vaddr) +{ + return __builtin_meta2_cacherd((void *)(vaddr & PAGE_MASK)); +} + +unsigned long mmu_get_base(void) +{ + unsigned int cpu = hard_processor_id(); + unsigned long stride; + + stride = cpu * LINSYSMEMTnX_STRIDE; + + /* + * Bits 18:2 of the MMCU_TnLocal_TABLE_PHYS1 register should be + * used as an offset to the start of the top-level pgd table. + */ + stride += (metag_in32(mmu_phys1_addr(cpu)) & 0x7fffc); + + if (is_global_space(PAGE_OFFSET)) + stride += LINSYSMEMTXG_OFFSET; + + return LINSYSMEMT0L_BASE + stride; +} + +#define FIRST_LEVEL_MASK 0xffffffc0 +#define SECOND_LEVEL_MASK 0xfffff000 +#define SECOND_LEVEL_ALIGN 64 + +static void repriv_mmu_tables(void) +{ + unsigned long phys0_addr; + unsigned int g; + + /* + * Check that all the mmu table regions are priv protected, and if not + * fix them and emit a warning. If we left them without priv protection + * then userland processes would have access to a 2M window into + * physical memory near where the page tables are. + */ + phys0_addr = MMCU_T0LOCAL_TABLE_PHYS0; + for (g = 0; g < 2; ++g) { + unsigned int t, phys0; + unsigned long flags; + for (t = 0; t < 4; ++t) { + __global_lock2(flags); + phys0 = metag_in32(phys0_addr); + if ((phys0 & _PAGE_PRESENT) && !(phys0 & _PAGE_PRIV)) { + pr_warn("Fixing priv protection on T%d %s MMU table region\n", + t, + g ? "global" : "local"); + phys0 |= _PAGE_PRIV; + metag_out32(phys0, phys0_addr); + } + __global_unlock2(flags); + + phys0_addr += MMCU_TnX_TABLE_PHYSX_STRIDE; + } + + phys0_addr += MMCU_TXG_TABLE_PHYSX_OFFSET + - 4*MMCU_TnX_TABLE_PHYSX_STRIDE; + } +} + +#ifdef CONFIG_METAG_SUSPEND_MEM +static void mmu_resume(void) +{ + /* + * If a full suspend to RAM has happened then the original bad MMU table + * priv may have been restored, so repriv them again. + */ + repriv_mmu_tables(); +} +#else +#define mmu_resume NULL +#endif /* CONFIG_METAG_SUSPEND_MEM */ + +static struct syscore_ops mmu_syscore_ops = { + .resume = mmu_resume, +}; + +void __init mmu_init(unsigned long mem_end) +{ + unsigned long entry, addr; + pgd_t *p_swapper_pg_dir; +#ifdef CONFIG_KERNEL_4M_PAGES + unsigned long mem_size = mem_end - PAGE_OFFSET; + unsigned int pages = DIV_ROUND_UP(mem_size, 1 << 22); + unsigned int second_level_entry = 0; + unsigned long *second_level_table; +#endif + + /* + * Now copy over any MMU pgd entries already in the mmu page tables + * over to our root init process (swapper_pg_dir) map. This map is + * then inherited by all other processes, which means all processes + * inherit a map of the kernel space. + */ + addr = META_MEMORY_BASE; + entry = pgd_index(META_MEMORY_BASE); + p_swapper_pg_dir = pgd_offset_k(0) + entry; + + while (entry < (PTRS_PER_PGD - pgd_index(META_MEMORY_BASE))) { + unsigned long pgd_entry; + /* copy over the current MMU value */ + pgd_entry = mmu_read_first_level_page(addr); + pgd_val(*p_swapper_pg_dir) = pgd_entry; + + p_swapper_pg_dir++; + addr += PGDIR_SIZE; + entry++; + } + +#ifdef CONFIG_KERNEL_4M_PAGES + /* + * At this point we can also map the kernel with 4MB pages to + * reduce TLB pressure. + */ + second_level_table = alloc_bootmem_pages(SECOND_LEVEL_ALIGN * pages); + + addr = PAGE_OFFSET; + entry = pgd_index(PAGE_OFFSET); + p_swapper_pg_dir = pgd_offset_k(0) + entry; + + while (pages > 0) { + unsigned long phys_addr, second_level_phys; + pte_t *pte = (pte_t *)&second_level_table[second_level_entry]; + + phys_addr = __pa(addr); + + second_level_phys = __pa(pte); + + pgd_val(*p_swapper_pg_dir) = ((second_level_phys & + FIRST_LEVEL_MASK) | + _PAGE_SZ_4M | + _PAGE_PRESENT); + + pte_val(*pte) = ((phys_addr & SECOND_LEVEL_MASK) | + _PAGE_PRESENT | _PAGE_DIRTY | + _PAGE_ACCESSED | _PAGE_WRITE | + _PAGE_CACHEABLE | _PAGE_KERNEL); + + p_swapper_pg_dir++; + addr += PGDIR_SIZE; + /* Second level pages must be 64byte aligned. */ + second_level_entry += (SECOND_LEVEL_ALIGN / + sizeof(unsigned long)); + pages--; + } + load_pgd(swapper_pg_dir, hard_processor_id()); + flush_tlb_all(); +#endif + + repriv_mmu_tables(); + register_syscore_ops(&mmu_syscore_ops); +} diff --git a/arch/metag/mm/numa.c b/arch/metag/mm/numa.c new file mode 100644 index 000000000000..9ae578c9b620 --- /dev/null +++ b/arch/metag/mm/numa.c @@ -0,0 +1,81 @@ +/* + * Multiple memory node support for Meta machines + * + * Copyright (C) 2007 Paul Mundt + * Copyright (C) 2010 Imagination Technologies Ltd. + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ +#include <linux/export.h> +#include <linux/bootmem.h> +#include <linux/memblock.h> +#include <linux/mm.h> +#include <linux/numa.h> +#include <linux/pfn.h> +#include <asm/sections.h> + +struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; +EXPORT_SYMBOL_GPL(node_data); + +extern char _heap_start[]; + +/* + * On Meta machines the conventional approach is to stash system RAM + * in node 0, and other memory blocks in to node 1 and up, ordered by + * latency. Each node's pgdat is node-local at the beginning of the node, + * immediately followed by the node mem map. + */ +void __init setup_bootmem_node(int nid, unsigned long start, unsigned long end) +{ + unsigned long bootmap_pages, bootmem_paddr; + unsigned long start_pfn, end_pfn; + unsigned long pgdat_paddr; + + /* Don't allow bogus node assignment */ + BUG_ON(nid > MAX_NUMNODES || nid <= 0); + + start_pfn = start >> PAGE_SHIFT; + end_pfn = end >> PAGE_SHIFT; + + memblock_add(start, end - start); + + memblock_set_node(PFN_PHYS(start_pfn), + PFN_PHYS(end_pfn - start_pfn), nid); + + /* Node-local pgdat */ + pgdat_paddr = memblock_alloc_base(sizeof(struct pglist_data), + SMP_CACHE_BYTES, end); + NODE_DATA(nid) = __va(pgdat_paddr); + memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); + + NODE_DATA(nid)->bdata = &bootmem_node_data[nid]; + NODE_DATA(nid)->node_start_pfn = start_pfn; + NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn; + + /* Node-local bootmap */ + bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn); + bootmem_paddr = memblock_alloc_base(bootmap_pages << PAGE_SHIFT, + PAGE_SIZE, end); + init_bootmem_node(NODE_DATA(nid), bootmem_paddr >> PAGE_SHIFT, + start_pfn, end_pfn); + + free_bootmem_with_active_regions(nid, end_pfn); + + /* Reserve the pgdat and bootmap space with the bootmem allocator */ + reserve_bootmem_node(NODE_DATA(nid), pgdat_paddr & PAGE_MASK, + sizeof(struct pglist_data), BOOTMEM_DEFAULT); + reserve_bootmem_node(NODE_DATA(nid), bootmem_paddr, + bootmap_pages << PAGE_SHIFT, BOOTMEM_DEFAULT); + + /* It's up */ + node_set_online(nid); + + /* Kick sparsemem */ + sparse_memory_present_with_active_regions(nid); +} + +void __init __weak soc_mem_setup(void) +{ +} diff --git a/arch/metag/tbx/Makefile b/arch/metag/tbx/Makefile new file mode 100644 index 000000000000..e994239e518c --- /dev/null +++ b/arch/metag/tbx/Makefile @@ -0,0 +1,21 @@ +# +# Makefile for TBX library files.. +# + +asflags-y += -mmetac=2.1 -Wa,-mfpu=metac21 -mdsp +asflags-$(CONFIG_SMP) += -DTBX_PERCPU_SP_SAVE + +ccflags-y += -mmetac=2.1 + +lib-y += tbicore.o +lib-y += tbictx.o +lib-y += tbidefr.o +lib-y += tbilogf.o +lib-y += tbipcx.o +lib-y += tbiroot.o +lib-y += tbisoft.o +lib-y += tbistring.o +lib-y += tbitimer.o + +lib-$(CONFIG_METAG_DSP) += tbidspram.o +lib-$(CONFIG_METAG_FPU) += tbictxfpu.o diff --git a/arch/metag/tbx/tbicore.S b/arch/metag/tbx/tbicore.S new file mode 100644 index 000000000000..a0838ebcb433 --- /dev/null +++ b/arch/metag/tbx/tbicore.S @@ -0,0 +1,136 @@ +/* + * tbicore.S + * + * Copyright (C) 2001, 2002, 2007, 2012 Imagination Technologies. + * + * This program is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License version 2 as published by the + * Free Software Foundation. + * + * Core functions needed to support use of the thread binary interface for META + * processors + */ + + .file "tbicore.S" +/* Get data structures and defines from the TBI C header */ +#include <asm/metag_mem.h> +#include <asm/metag_regs.h> +#include <asm/tbx.h> + + .data + .balign 8 + .global ___pTBISegs + .type ___pTBISegs,object +___pTBISegs: + .quad 0 /* Segment list pointer with it's */ + .size ___pTBISegs,.-___pTBISegs + /* own id or spin-lock location */ +/* + * Return ___pTBISegs value specific to privilege level - not very complicated + * at the moment + * + * Register Usage: D0Re0 is the result, D1Re0 is used as a scratch + */ + .text + .balign 4 + .global ___TBISegList + .type ___TBISegList,function +___TBISegList: + MOVT A1LbP,#HI(___pTBISegs) + ADD A1LbP,A1LbP,#LO(___pTBISegs) + GETL D0Re0,D1Re0,[A1LbP] + MOV PC,D1RtP + .size ___TBISegList,.-___TBISegList + +/* + * Search the segment list for a match given Id, pStart can be NULL + * + * Register Usage: D1Ar1 is pSeg, D0Ar2 is Id, D0Re0 is the result + * D0Ar4, D1Ar3 are used as a scratch + * NB: The PSTAT bit if Id in D0Ar2 may be toggled + */ + .text + .balign 4 + .global ___TBIFindSeg + .type ___TBIFindSeg,function +___TBIFindSeg: + MOVT A1LbP,#HI(___pTBISegs) + ADD A1LbP,A1LbP,#LO(___pTBISegs) + GETL D1Ar3,D0Ar4,[A1LbP] /* Read segment list head */ + MOV D0Re0,TXSTATUS /* What priv level are we at? */ + CMP D1Ar1,#0 /* Is pStart provided? */ +/* Disable privilege adaption for now */ + ANDT D0Re0,D0Re0,#0 /*HI(TXSTATUS_PSTAT_BIT) ; Is PSTAT set? Zero if not */ + LSL D0Re0,D0Re0,#(TBID_PSTAT_S-TXSTATUS_PSTAT_S) + XOR D0Ar2,D0Ar2,D0Re0 /* Toggle Id PSTAT if privileged */ + MOVNZ D1Ar3,D1Ar1 /* Use pStart if provided */ +$LFindSegLoop: + ADDS D0Re0,D1Ar3,#0 /* End of list? Load result into D0Re0 */ + MOVZ PC,D1RtP /* If result is NULL we leave */ + GETL D1Ar3,D0Ar4,[D1Ar3] /* Read pLink and Id */ + CMP D0Ar4,D0Ar2 /* Does it match? */ + BNZ $LFindSegLoop /* Loop if there is no match */ + TST D0Re0,D0Re0 /* Clear zero flag - we found it! */ + MOV PC,D1RtP /* Return */ + .size ___TBIFindSeg,.-___TBIFindSeg + +/* Useful offsets to encode the lower bits of the lock/unlock addresses */ +#define UON (LINSYSEVENT_WR_ATOMIC_LOCK & 0xFFF8) +#define UOFF (LINSYSEVENT_WR_ATOMIC_UNLOCK & 0xFFF8) + +/* + * Perform a whole spin-lock sequence as used by the TBISignal routine + * + * Register Usage: D1Ar1 is pLock, D0Ar2 is Mask, D0Re0 is the result + * (All other usage due to ___TBIPoll - D0Ar6, D1Re0) + */ + .text + .balign 4 + .global ___TBISpin + .type ___TBISpin,function +___TBISpin: + SETL [A0StP++],D0FrT,D1RtP /* Save our return address */ + ORS D0Re0,D0Re0,#1 /* Clear zero flag */ + MOV D1RtP,PC /* Setup return address to form loop */ +$LSpinLoop: + BNZ ___TBIPoll /* Keep repeating if fail to set */ + GETL D0FrT,D1RtP,[--A0StP] /* Restore return address */ + MOV PC,D1RtP /* Return */ + .size ___TBISpin,.-___TBISpin + +/* + * Perform an attempt to gain access to a spin-lock and set some bits + * + * Register Usage: D1Ar1 is pLock, D0Ar2 is Mask, D0Re0 is the result + * !!On return Zero flag is SET if we are sucessfull!! + * A0.3 is used to hold base address of system event region + * D1Re0 use to hold TXMASKI while interrupts are off + */ + .text + .balign 4 + .global ___TBIPoll + .type ___TBIPoll,function +___TBIPoll: + MOV D1Re0,#0 /* Prepare to disable ints */ + MOVT A0.3,#HI(LINSYSEVENT_WR_ATOMIC_LOCK) + SWAP D1Re0,TXMASKI /* Really stop ints */ + LOCK2 /* Gain all locks */ + SET [A0.3+#UON],D1RtP /* Stop shared memory access too */ + DCACHE [D1Ar1],A0.3 /* Flush Cache line */ + GETD D0Re0,[D1Ar1] /* Get new state from memory or hit */ + DCACHE [D1Ar1],A0.3 /* Flush Cache line */ + GETD D0Re0,[D1Ar1] /* Get current state */ + TST D0Re0,D0Ar2 /* Are we clear to send? */ + ORZ D0Re0,D0Re0,D0Ar2 /* Yes: So set bits and */ + SETDZ [D1Ar1],D0Re0 /* transmit new state */ + SET [A0.3+#UOFF],D1RtP /* Allow shared memory access */ + LOCK0 /* Release all locks */ + MOV TXMASKI,D1Re0 /* Allow ints */ +$LPollEnd: + XORNZ D0Re0,D0Re0,D0Re0 /* No: Generate zero result */ + MOV PC,D1RtP /* Return (NZ indicates failure) */ + .size ___TBIPoll,.-___TBIPoll + +/* + * End of tbicore.S + */ diff --git a/arch/metag/tbx/tbictx.S b/arch/metag/tbx/tbictx.S new file mode 100644 index 000000000000..19af983a13ae --- /dev/null +++ b/arch/metag/tbx/tbictx.S @@ -0,0 +1,366 @@ +/* + * tbictx.S + * + * Copyright (C) 2001, 2002, 2007, 2012 Imagination Technologies. + * + * This program is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License version 2 as published by the + * Free Software Foundation. + * + * Explicit state save and restore routines forming part of the thread binary + * interface for META processors + */ + + .file "tbictx.S" +#include <asm/metag_regs.h> +#include <asm/tbx.h> + +#ifdef METAC_1_0 +/* Ax.4 is NOT saved in XAX3 */ +#define A0_4 +#else +/* Ax.4 is saved in XAX4 */ +#define A0_4 A0.4, +#endif + + +/* Size of the TBICTX structure */ +#define TBICTX_BYTES ((TBICTX_AX_REGS*8)+TBICTX_AX) + +/* + * TBIRES __TBINestInts( TBIRES State, void *pExt, int NoNestMask ) + */ + .text + .balign 4 + .global ___TBINestInts + .type ___TBINestInts,function +___TBINestInts: + XOR D0Ar4,D0Ar4,#-1 /* D0Ar4 = ~TrigBit */ + AND D0Ar4,D0Ar4,#0xFFFF /* D0Ar4 &= 0xFFFF */ + MOV D0Ar6,TXMASKI /* BGNDHALT currently enabled? */ + TSTT D0Ar2,#TBICTX_XDX8_BIT+TBICTX_XAXX_BIT+TBICTX_XHL2_BIT+TBICTX_XTDP_BIT+TBICTX_XCBF_BIT + AND D0Ar4,D0Ar2,D0Ar4 /* D0Ar4 = Ints to allow */ + XOR D0Ar2,D0Ar2,D0Ar4 /* Less Ints in TrigMask */ + BNZ ___TBINestInts2 /* Jump if ctx save required! */ + TSTT D0Ar2,#TBICTX_CBUF_BIT+TBICTX_CBRP_BIT /* Is catch state dirty? */ + OR D0Ar4,D0Ar4,D0Ar6 /* Or in TXMASKI BGNDHALT if set */ + TSTNZ D0Ar4,D0Ar4 /* Yes: AND triggers enabled */ + MOV D0Re0,D0Ar2 /* Update State argument */ + MOV D1Re0,D1Ar1 /* with less Ints in TrigMask */ + MOVZ TXMASKI,D0Ar4 /* Early return: Enable Ints */ + MOVZ PC,D1RtP /* Early return */ + .size ___TBINestInts,.-___TBINestInts +/* + * Drop thru into sub-function- + */ + .global ___TBINestInts2 + .type ___TBINestInts2,function +___TBINestInts2: + MOV D0FrT,A0FrP /* Full entry sequence so we */ + ADD A0FrP,A0StP,#0 /* can make sub-calls */ + MSETL [A0StP],D0FrT,D0.5,D0.6 /* and preserve our result */ + ORT D0Ar2,D0Ar2,#TBICTX_XCBF_BIT /* Add in XCBF save request */ + MOV D0.5,D0Ar2 /* Save State in DX.5 */ + MOV D1.5,D1Ar1 + OR D0.6,D0Ar4,D0Ar6 /* Save TrigMask in D0.6 */ + MOVT D1RtP,#HI(___TBICtxSave) /* Save catch buffer */ + CALL D1RtP,#LO(___TBICtxSave) + MOV TXMASKI,D0.6 /* Allow Ints */ + MOV D0Re0,D0.5 /* Return State */ + MOV D1Re0,D1.5 + MGETL D0FrT,D0.5,D0.6,[A0FrP] /* Full exit sequence */ + SUB A0StP,A0FrP,#(8*3) + MOV A0FrP,D0FrT + MOV PC,D1RtP + .size ___TBINestInts2,.-___TBINestInts2 + +/* + * void *__TBICtxSave( TBIRES State, void *pExt ) + * + * D0Ar2 contains TBICTX_*_BIT values that control what + * extended data is to be saved beyond the end of D1Ar1. + * These bits must be ored into the SaveMask of this structure. + * + * Virtually all possible scratch registers are used. + * + * The D1Ar1 parameter is only used as the basis for saving + * CBUF state. + */ +/* + * If TBICTX_XEXT_BIT is specified in State. then State.pCtx->Ext is + * utilised to save the base address of the context save area and + * the extended states saved. The XEXT flag then indicates that the + * original state of the A0.2 and A1.2 registers from TBICTX.Ext.AX2 + * are stored as the first part of the extended state structure. + */ + .balign 4 + .global ___TBICtxSave + .type ___TBICtxSave,function +___TBICtxSave: + GETD D0Re0,[D1Ar1+#TBICTX_SaveMask-2] /* Get SaveMask */ + TSTT D0Ar2,#TBICTX_XDX8_BIT+TBICTX_XAXX_BIT+TBICTX_XHL2_BIT+TBICTX_XTDP_BIT+TBICTX_XEXT_BIT + /* Just XCBF to save? */ + MOV A0.2,D1Ar3 /* Save pointer into A0.2 */ + MOV A1.2,D1RtP /* Free off D0FrT:D1RtP pair */ + BZ $LCtxSaveCBUF /* Yes: Only XCBF may be saved */ + TSTT D0Ar2,#TBICTX_XEXT_BIT /* Extended base-state model? */ + BZ $LCtxSaveXDX8 + GETL D0Ar6,D1Ar5,[D1Ar1+#TBICTX_Ext_AX2] /* Get A0.2, A1.2 state */ + MOV D0Ar4,D0Ar2 /* Extract Ctx.SaveFlags value */ + ANDMT D0Ar4,D0Ar4,#TBICTX_XDX8_BIT+TBICTX_XAXX_BIT+TBICTX_XHL2_BIT+TBICTX_XTDP_BIT+TBICTX_XEXT_BIT + SETD [D1Ar1+#TBICTX_Ext_Ctx_pExt],A0.2 + SETD [D1Ar1+#TBICTX_Ext_Ctx_SaveMask-2],D0Ar4 + SETL [A0.2++],D0Ar6,D1Ar5 /* Save A0.2, A1.2 state */ +$LCtxSaveXDX8: + TSTT D0Ar2,#TBICTX_XDX8_BIT /* Save extended DX regs? */ + BZ $LCtxSaveXAXX +/* + * Save 8 extra DX registers + */ + MSETL [A0.2],D0.8,D0.9,D0.10,D0.11,D0.12,D0.13,D0.14,D0.15 +$LCtxSaveXAXX: + TSTT D0Ar2,#TBICTX_XAXX_BIT /* Save extended AX regs? */ + SWAP D0Re0,A0.2 /* pDst into D0Re0 */ + BZ $LCtxSaveXHL2 +/* + * Save 4 extra AX registers + */ + MSETL [D0Re0], A0_4 A0.5,A0.6,A0.7 /* Save 8*3 bytes */ +$LCtxSaveXHL2: + TSTT D0Ar2,#TBICTX_XHL2_BIT /* Save hardware-loop regs? */ + SWAP D0Re0,A0.2 /* pDst back into A0.2 */ + MOV D0Ar6,TXL1START + MOV D1Ar5,TXL2START + BZ $LCtxSaveXTDP +/* + * Save hardware loop registers + */ + SETL [A0.2++],D0Ar6,D1Ar5 /* Save 8*1 bytes */ + MOV D0Ar6,TXL1END + MOV D1Ar5,TXL2END + MOV D0FrT,TXL1COUNT + MOV D1RtP,TXL2COUNT + MSETL [A0.2],D0Ar6,D0FrT /* Save 8*2 bytes */ +/* + * Clear loop counters to disable any current loops + */ + XOR TXL1COUNT,D0FrT,D0FrT + XOR TXL2COUNT,D1RtP,D1RtP +$LCtxSaveXTDP: + TSTT D0Ar2,#TBICTX_XTDP_BIT /* Save per-thread DSP regs? */ + BZ $LCtxSaveCBUF +/* + * Save per-thread DSP registers; ACC.0, PR.0, PI.1-3 (PI.0 is zero) + */ +#ifndef CTX_NO_DSP +D SETL [A0.2++],AC0.0,AC1.0 /* Save ACx.0 lower 32-bits */ +DH SETL [A0.2++],AC0.0,AC1.0 /* Save ACx.0 upper 32-bits */ +D SETL [A0.2++],D0AR.0,D1AR.0 /* Save DSP RAM registers */ +D SETL [A0.2++],D0AR.1,D1AR.1 +D SETL [A0.2++],D0AW.0,D1AW.0 +D SETL [A0.2++],D0AW.1,D1AW.1 +D SETL [A0.2++],D0BR.0,D1BR.0 +D SETL [A0.2++],D0BR.1,D1BR.1 +D SETL [A0.2++],D0BW.0,D1BW.0 +D SETL [A0.2++],D0BW.1,D1BW.1 +D SETL [A0.2++],D0ARI.0,D1ARI.0 +D SETL [A0.2++],D0ARI.1,D1ARI.1 +D SETL [A0.2++],D0AWI.0,D1AWI.0 +D SETL [A0.2++],D0AWI.1,D1AWI.1 +D SETL [A0.2++],D0BRI.0,D1BRI.0 +D SETL [A0.2++],D0BRI.1,D1BRI.1 +D SETL [A0.2++],D0BWI.0,D1BWI.0 +D SETL [A0.2++],D0BWI.1,D1BWI.1 +D SETD [A0.2++],T0 +D SETD [A0.2++],T1 +D SETD [A0.2++],T2 +D SETD [A0.2++],T3 +D SETD [A0.2++],T4 +D SETD [A0.2++],T5 +D SETD [A0.2++],T6 +D SETD [A0.2++],T7 +D SETD [A0.2++],T8 +D SETD [A0.2++],T9 +D SETD [A0.2++],TA +D SETD [A0.2++],TB +D SETD [A0.2++],TC +D SETD [A0.2++],TD +D SETD [A0.2++],TE +D SETD [A0.2++],TF +#else + ADD A0.2,A0.2,#(8*18+4*16) +#endif + MOV D0Ar6,TXMRSIZE + MOV D1Ar5,TXDRSIZE + SETL [A0.2++],D0Ar6,D1Ar5 /* Save 8*1 bytes */ + +$LCtxSaveCBUF: +#ifdef TBI_1_3 + MOV D0Ar4,D0Re0 /* Copy Ctx Flags */ + ANDT D0Ar4,D0Ar4,#TBICTX_XCBF_BIT /* mask XCBF if already set */ + XOR D0Ar4,D0Ar4,#-1 + AND D0Ar2,D0Ar2,D0Ar4 /* remove XCBF if already set */ +#endif + TSTT D0Ar2,#TBICTX_XCBF_BIT /* Want to save CBUF? */ + ANDT D0Ar2,D0Ar2,#TBICTX_XDX8_BIT+TBICTX_XAXX_BIT+TBICTX_XHL2_BIT+TBICTX_XTDP_BIT+TBICTX_XEXT_BIT + OR D0Ar2,D0Ar2,D0Re0 /* Generate new SaveMask */ + SETD [D1Ar1+#TBICTX_SaveMask-2],D0Ar2/* Add in bits saved to TBICTX */ + MOV D0Re0,A0.2 /* Return end of save area */ + MOV D0Ar4,TXDIVTIME /* Get TXDIVTIME */ + MOVZ PC,A1.2 /* No: Early return */ + TSTT D0Ar2,#TBICTX_CBUF_BIT+TBICTX_CBRP_BIT /* Need to save CBUF? */ + MOVZ PC,A1.2 /* No: Early return */ + ORT D0Ar2,D0Ar2,#TBICTX_XCBF_BIT + SETD [D1Ar1+#TBICTX_SaveMask-2],D0Ar2/* Add in XCBF bit to TBICTX */ + ADD A0.2,D1Ar1,#TBICTX_BYTES /* Dump CBUF state after TBICTX */ +/* + * Save CBUF + */ + SETD [A0.2+# 0],TXCATCH0 /* Restore TXCATCHn */ + SETD [A0.2+# 4],TXCATCH1 + TSTT D0Ar2,#TBICTX_CBRP_BIT /* ... RDDIRTY was/is set */ + SETD [A0.2+# 8],TXCATCH2 + SETD [A0.2+#12],TXCATCH3 + BZ $LCtxSaveComplete + SETL [A0.2+#(2*8)],RD /* Save read pipeline */ + SETL [A0.2+#(3*8)],RD /* Save read pipeline */ + SETL [A0.2+#(4*8)],RD /* Save read pipeline */ + SETL [A0.2+#(5*8)],RD /* Save read pipeline */ + SETL [A0.2+#(6*8)],RD /* Save read pipeline */ + SETL [A0.2+#(7*8)],RD /* Save read pipeline */ + AND TXDIVTIME,D0Ar4,#TXDIVTIME_DIV_BITS /* Clear RPDIRTY */ +$LCtxSaveComplete: + MOV PC,A1.2 /* Return */ + .size ___TBICtxSave,.-___TBICtxSave + +/* + * void *__TBICtxRestore( TBIRES State, void *pExt ) + * + * D0Ar2 contains TBICTX_*_BIT values that control what + * extended data is to be recovered from D1Ar3 (pExt). + * + * Virtually all possible scratch registers are used. + */ +/* + * If TBICTX_XEXT_BIT is specified in State. Then the saved state of + * the orginal A0.2 and A1.2 is restored from pExt and the XEXT + * related flags are removed from State.pCtx->SaveMask. + * + */ + .balign 4 + .global ___TBICtxRestore + .type ___TBICtxRestore,function +___TBICtxRestore: + GETD D0Ar6,[D1Ar1+#TBICTX_CurrMODE] /* Get TXMODE Value */ + ANDST D0Ar2,D0Ar2,#TBICTX_XDX8_BIT+TBICTX_XAXX_BIT+TBICTX_XHL2_BIT+TBICTX_XTDP_BIT+TBICTX_XEXT_BIT + MOV D1Re0,D0Ar2 /* Keep flags in D1Re0 */ + MOV D0Re0,D1Ar3 /* D1Ar3 is default result */ + MOVZ PC,D1RtP /* Early return, nothing to do */ + ANDT D0Ar6,D0Ar6,#0xE000 /* Top bits of TXMODE required */ + MOV A0.3,D0Ar6 /* Save TXMODE for later */ + TSTT D1Re0,#TBICTX_XEXT_BIT /* Check for XEXT bit */ + BZ $LCtxRestXDX8 + GETD D0Ar4,[D1Ar1+#TBICTX_SaveMask-2]/* Get current SaveMask */ + GETL D0Ar6,D1Ar5,[D0Re0++] /* Restore A0.2, A1.2 state */ + ANDMT D0Ar4,D0Ar4,#(0xFFFF-(TBICTX_XDX8_BIT+TBICTX_XAXX_BIT+TBICTX_XHL2_BIT+TBICTX_XTDP_BIT+TBICTX_XEXT_BIT)) + SETD [D1Ar1+#TBICTX_SaveMask-2],D0Ar4/* New SaveMask */ +#ifdef METAC_1_0 + SETD [D1Ar1+#TBICTX_Ext_AX2_U0],D0Ar6 + MOV D0Ar6,D1Ar1 + SETD [D0Ar6+#TBICTX_Ext_AX2_U1],D1Ar5 +#else + SETL [D1Ar1+#TBICTX_Ext_AX2],D0Ar6,D1Ar5 +#endif +$LCtxRestXDX8: + TSTT D1Re0,#TBICTX_XDX8_BIT /* Get extended DX regs? */ + MOV A1.2,D1RtP /* Free off D1RtP register */ + BZ $LCtxRestXAXX +/* + * Restore 8 extra DX registers + */ + MGETL D0.8,D0.9,D0.10,D0.11,D0.12,D0.13,D0.14,D0.15,[D0Re0] +$LCtxRestXAXX: + TSTT D1Re0,#TBICTX_XAXX_BIT /* Get extended AX regs? */ + BZ $LCtxRestXHL2 +/* + * Restore 3 extra AX registers + */ + MGETL A0_4 A0.5,A0.6,A0.7,[D0Re0] /* Get 8*3 bytes */ +$LCtxRestXHL2: + TSTT D1Re0,#TBICTX_XHL2_BIT /* Get hardware-loop regs? */ + BZ $LCtxRestXTDP +/* + * Get hardware loop registers + */ + MGETL D0Ar6,D0Ar4,D0Ar2,[D0Re0] /* Get 8*3 bytes */ + MOV TXL1START,D0Ar6 + MOV TXL2START,D1Ar5 + MOV TXL1END,D0Ar4 + MOV TXL2END,D1Ar3 + MOV TXL1COUNT,D0Ar2 + MOV TXL2COUNT,D1Ar1 +$LCtxRestXTDP: + TSTT D1Re0,#TBICTX_XTDP_BIT /* Get per-thread DSP regs? */ + MOVZ PC,A1.2 /* No: Early return */ +/* + * Get per-thread DSP registers; ACC.0, PR.0, PI.1-3 (PI.0 is zero) + */ + MOV A0.2,D0Re0 + GETL D0Ar6,D1Ar5,[D0Re0++#((16*4)+(18*8))] +#ifndef CTX_NO_DSP +D GETL AC0.0,AC1.0,[A0.2++] /* Restore ACx.0 lower 32-bits */ +DH GETL AC0.0,AC1.0,[A0.2++] /* Restore ACx.0 upper 32-bits */ +#else + ADD A0.2,A0.2,#(2*8) +#endif + ADD D0Re0,D0Re0,#(2*4) + MOV TXMODE,A0.3 /* Some TXMODE bits needed */ + MOV TXMRSIZE,D0Ar6 + MOV TXDRSIZE,D1Ar5 +#ifndef CTX_NO_DSP +D GETL D0AR.0,D1AR.0,[A0.2++] /* Restore DSP RAM registers */ +D GETL D0AR.1,D1AR.1,[A0.2++] +D GETL D0AW.0,D1AW.0,[A0.2++] +D GETL D0AW.1,D1AW.1,[A0.2++] +D GETL D0BR.0,D1BR.0,[A0.2++] +D GETL D0BR.1,D1BR.1,[A0.2++] +D GETL D0BW.0,D1BW.0,[A0.2++] +D GETL D0BW.1,D1BW.1,[A0.2++] +#else + ADD A0.2,A0.2,#(8*8) +#endif + MOV TXMODE,#0 /* Restore TXMODE */ +#ifndef CTX_NO_DSP +D GETL D0ARI.0,D1ARI.0,[A0.2++] +D GETL D0ARI.1,D1ARI.1,[A0.2++] +D GETL D0AWI.0,D1AWI.0,[A0.2++] +D GETL D0AWI.1,D1AWI.1,[A0.2++] +D GETL D0BRI.0,D1BRI.0,[A0.2++] +D GETL D0BRI.1,D1BRI.1,[A0.2++] +D GETL D0BWI.0,D1BWI.0,[A0.2++] +D GETL D0BWI.1,D1BWI.1,[A0.2++] +D GETD T0,[A0.2++] +D GETD T1,[A0.2++] +D GETD T2,[A0.2++] +D GETD T3,[A0.2++] +D GETD T4,[A0.2++] +D GETD T5,[A0.2++] +D GETD T6,[A0.2++] +D GETD T7,[A0.2++] +D GETD T8,[A0.2++] +D GETD T9,[A0.2++] +D GETD TA,[A0.2++] +D GETD TB,[A0.2++] +D GETD TC,[A0.2++] +D GETD TD,[A0.2++] +D GETD TE,[A0.2++] +D GETD TF,[A0.2++] +#else + ADD A0.2,A0.2,#(8*8+4*16) +#endif + MOV PC,A1.2 /* Return */ + .size ___TBICtxRestore,.-___TBICtxRestore + +/* + * End of tbictx.S + */ diff --git a/arch/metag/tbx/tbictxfpu.S b/arch/metag/tbx/tbictxfpu.S new file mode 100644 index 000000000000..e773bea3e7bd --- /dev/null +++ b/arch/metag/tbx/tbictxfpu.S @@ -0,0 +1,190 @@ +/* + * tbictxfpu.S + * + * Copyright (C) 2009, 2012 Imagination Technologies. + * + * This program is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License version 2 as published by the + * Free Software Foundation. + * + * Explicit state save and restore routines forming part of the thread binary + * interface for META processors + */ + + .file "tbifpuctx.S" + +#include <asm/metag_regs.h> +#include <asm/tbx.h> + +#ifdef TBI_1_4 +/* + * void *__TBICtxFPUSave( TBIRES State, void *pExt ) + * + * D0Ar2 contains TBICTX_*_BIT values that control what + * extended data is to be saved. + * These bits must be ored into the SaveMask of this structure. + * + * Virtually all possible scratch registers are used. + */ + .text + .balign 4 + .global ___TBICtxFPUSave + .type ___TBICtxFPUSave,function +___TBICtxFPUSave: + + /* D1Ar1:D0Ar2 - State + * D1Ar3 - pExt + * D0Ar4 - Value of METAC_CORE_ID + * D1Ar5 - Scratch + * D0Ar6 - Scratch + */ + + /* If the FPAC bit isnt set then there is nothing to do */ + TSTT D0Ar2,#TBICTX_FPAC_BIT + MOVZ PC, D1RtP + + /* Obtain the Core config */ + MOVT D0Ar4, #HI(METAC_CORE_ID) + ADD D0Ar4, D0Ar4, #LO(METAC_CORE_ID) + GETD D0Ar4, [D0Ar4] + + /* Detect FX.8 - FX.15 and add to core config */ + MOV D0Ar6, TXENABLE + AND D0Ar6, D0Ar6, #(TXENABLE_CLASSALT_FPUR8 << TXENABLE_CLASS_S) + AND D0Ar4, D0Ar4, #LO(0x0000FFFF) + ORT D0Ar4, D0Ar4, #HI(TBICTX_CFGFPU_FX16_BIT) + XOR D0Ar4, D0Ar4, D0Ar6 + + /* Save the relevant bits to the buffer */ + SETD [D1Ar3++], D0Ar4 + + /* Save the relevant bits of TXDEFR (Assumes TXDEFR is coherent) ... */ + MOV D0Ar6, TXDEFR + LSR D0Re0, D0Ar6, #8 + AND D0Re0, D0Re0, #LO(TXDEFR_FPE_FE_BITS>>8) + AND D0Ar6, D0Ar6, #LO(TXDEFR_FPE_ICTRL_BITS) + OR D0Re0, D0Re0, D0Ar6 + + /* ... along with relevant bits of TXMODE to buffer */ + MOV D0Ar6, TXMODE + ANDT D0Ar6, D0Ar6, #HI(TXMODE_FPURMODE_BITS) + ORT D0Ar6, D0Ar6, #HI(TXMODE_FPURMODEWRITE_BIT) + OR D0Ar6, D0Ar6, D0Re0 + SETD [D1Ar3++], D0Ar6 + + GETD D0Ar6,[D1Ar1+#TBICTX_SaveMask-2] /* Get the current SaveMask */ + /* D0Ar6 - pCtx->SaveMask */ + + TSTT D0Ar4, #HI(TBICTX_CFGFPU_FX16_BIT) /* Perform test here for extended FPU registers + * to avoid stalls + */ + /* Save the standard FPU registers */ +F MSETL [D1Ar3++], FX.0, FX.2, FX.4, FX.6 + + /* Save the extended FPU registers if they are present */ + BZ $Lskip_save_fx8_fx16 +F MSETL [D1Ar3++], FX.8, FX.10, FX.12, FX.14 +$Lskip_save_fx8_fx16: + + /* Save the FPU Accumulator if it is present */ + TST D0Ar4, #METAC_COREID_NOFPACC_BIT + BNZ $Lskip_save_fpacc +F SETL [D1Ar3++], ACF.0 +F SETL [D1Ar3++], ACF.1 +F SETL [D1Ar3++], ACF.2 +$Lskip_save_fpacc: + + /* Update pCtx->SaveMask */ + ANDT D0Ar2, D0Ar2, #TBICTX_FPAC_BIT + OR D0Ar6, D0Ar6, D0Ar2 + SETD [D1Ar1+#TBICTX_SaveMask-2],D0Ar6/* Add in XCBF bit to TBICTX */ + + MOV D0Re0, D1Ar3 /* Return end of save area */ + MOV PC, D1RtP + + .size ___TBICtxFPUSave,.-___TBICtxFPUSave + +/* + * void *__TBICtxFPURestore( TBIRES State, void *pExt ) + * + * D0Ar2 contains TBICTX_*_BIT values that control what + * extended data is to be recovered from D1Ar3 (pExt). + * + * Virtually all possible scratch registers are used. + */ +/* + * If TBICTX_XEXT_BIT is specified in State. Then the saved state of + * the orginal A0.2 and A1.2 is restored from pExt and the XEXT + * related flags are removed from State.pCtx->SaveMask. + * + */ + .balign 4 + .global ___TBICtxFPURestore + .type ___TBICtxFPURestore,function +___TBICtxFPURestore: + + /* D1Ar1:D0Ar2 - State + * D1Ar3 - pExt + * D0Ar4 - Value of METAC_CORE_ID + * D1Ar5 - Scratch + * D0Ar6 - Scratch + * D1Re0 - Scratch + */ + + /* If the FPAC bit isnt set then there is nothing to do */ + TSTT D0Ar2,#TBICTX_FPAC_BIT + MOVZ PC, D1RtP + + /* Obtain the relevant bits of the Core config */ + GETD D0Ar4, [D1Ar3++] + + /* Restore FPU related parts of TXDEFR. Assumes TXDEFR is coherent */ + GETD D1Ar5, [D1Ar3++] + MOV D0Ar6, D1Ar5 + LSL D1Re0, D1Ar5, #8 + ANDT D1Re0, D1Re0, #HI(TXDEFR_FPE_FE_BITS|TXDEFR_FPE_ICTRL_BITS) + AND D1Ar5, D1Ar5, #LO(TXDEFR_FPE_FE_BITS|TXDEFR_FPE_ICTRL_BITS) + OR D1Re0, D1Re0, D1Ar5 + + MOV D1Ar5, TXDEFR + ANDMT D1Ar5, D1Ar5, #HI(~(TXDEFR_FPE_FE_BITS|TXDEFR_FPE_ICTRL_BITS)) + ANDMB D1Ar5, D1Ar5, #LO(~(TXDEFR_FPE_FE_BITS|TXDEFR_FPE_ICTRL_BITS)) + OR D1Re0, D1Re0, D1Ar5 + MOV TXDEFR, D1Re0 + + /* Restore relevant bits of TXMODE */ + MOV D1Ar5, TXMODE + ANDMT D1Ar5, D1Ar5, #HI(~TXMODE_FPURMODE_BITS) + ANDT D0Ar6, D0Ar6, #HI(TXMODE_FPURMODE_BITS|TXMODE_FPURMODEWRITE_BIT) + OR D0Ar6, D0Ar6, D1Ar5 + MOV TXMODE, D0Ar6 + + TSTT D0Ar4, #HI(TBICTX_CFGFPU_FX16_BIT) /* Perform test here for extended FPU registers + * to avoid stalls + */ + /* Save the standard FPU registers */ +F MGETL FX.0, FX.2, FX.4, FX.6, [D1Ar3++] + + /* Save the extended FPU registers if they are present */ + BZ $Lskip_restore_fx8_fx16 +F MGETL FX.8, FX.10, FX.12, FX.14, [D1Ar3++] +$Lskip_restore_fx8_fx16: + + /* Save the FPU Accumulator if it is present */ + TST D0Ar4, #METAC_COREID_NOFPACC_BIT + BNZ $Lskip_restore_fpacc +F GETL ACF.0, [D1Ar3++] +F GETL ACF.1, [D1Ar3++] +F GETL ACF.2, [D1Ar3++] +$Lskip_restore_fpacc: + + MOV D0Re0, D1Ar3 /* Return end of save area */ + MOV PC, D1RtP + + .size ___TBICtxFPURestore,.-___TBICtxFPURestore + +#endif /* TBI_1_4 */ + +/* + * End of tbictx.S + */ diff --git a/arch/metag/tbx/tbidefr.S b/arch/metag/tbx/tbidefr.S new file mode 100644 index 000000000000..3eb165ebf540 --- /dev/null +++ b/arch/metag/tbx/tbidefr.S @@ -0,0 +1,175 @@ +/* + * tbidefr.S + * + * Copyright (C) 2009, 2012 Imagination Technologies. + * + * This program is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License version 2 as published by the + * Free Software Foundation. + * + * Routing deferred exceptions + */ + +#include <asm/metag_regs.h> +#include <asm/tbx.h> + + .text + .balign 4 + .global ___TBIHandleDFR + .type ___TBIHandleDFR,function +/* D1Ar1:D0Ar2 -- State + * D0Ar3 -- SigNum + * D0Ar4 -- Triggers + * D1Ar5 -- InstOrSWSId + * D0Ar6 -- pTBI (volatile) + */ +___TBIHandleDFR: +#ifdef META_BUG_MBN100212 + MSETL [A0StP++], D0FrT, D0.5 + + /* D1Ar1,D0Ar2,D1Ar5,D0Ar6 -- Arguments to handler, must be preserved + * D0Ar4 -- The deferred exceptions + * D1Ar3 -- As per D0Ar4 but just the trigger bits + * D0.5 -- The bgnd deferred exceptions + * D1.5 -- TXDEFR with bgnd re-added + */ + + /* - Collect the pending deferred exceptions using TXSTAT, + * (ack's the bgnd exceptions as a side-effect) + * - Manually collect remaining (interrupt) deferred exceptions + * using TXDEFR + * - Replace the triggers (from TXSTATI) with the int deferred + * exceptions DEFR ..., TXSTATI would have returned if it was valid + * from bgnd code + * - Reconstruct TXDEFR by or'ing bgnd deferred exceptions (except + * the DEFER bit) and the int deferred exceptions. This will be + * restored later + */ + DEFR D0.5, TXSTAT + MOV D1.5, TXDEFR + ANDT D0.5, D0.5, #HI(0xFFFF0000) + MOV D1Ar3, D1.5 + ANDT D1Ar3, D1Ar3, #HI(0xFFFF0000) + OR D0Ar4, D1Ar3, #TXSTAT_DEFER_BIT + OR D1.5, D1.5, D0.5 + + /* Mask off anything unrelated to the deferred exception triggers */ + ANDT D1Ar3, D1Ar3, #HI(TXSTAT_BUSERR_BIT | TXSTAT_FPE_BITS) + + /* Can assume that at least one exception happened since this + * handler wouldnt have been called otherwise. + * + * Replace the signal number and at the same time, prepare + * the mask to acknowledge the exception + * + * D1Re0 -- The bits to acknowledge + * D1Ar3 -- The signal number + * D1RtP -- Scratch to deal with non-conditional insns + */ + MOVT D1Re0, #HI(TXSTAT_FPE_BITS & ~TXSTAT_FPE_DENORMAL_BIT) + MOV D1RtP, #TXSTAT_FPE_INVALID_S + FFB D1Ar3, D1Ar3 + CMP D1Ar3, #TXSTAT_FPE_INVALID_S + MOVLE D1Ar3, D1RtP /* Collapse FPE triggers to a single signal */ + MOV D1RtP, #1 + LSLGT D1Re0, D1RtP, D1Ar3 + + /* Get the handler using the signal number + * + * D1Ar3 -- The signal number + * D0Re0 -- Offset into TBI struct containing handler address + * D1Re0 -- Mask of triggers to keep + * D1RtP -- Address of handler + */ + SUB D1Ar3, D1Ar3, #(TXSTAT_FPE_INVALID_S - TBID_SIGNUM_FPE) + LSL D0Re0, D1Ar3, #2 + XOR D1Re0, D1Re0, #-1 /* Prepare mask for acknowledge (avoids stall) */ + ADD D0Re0,D0Re0,#TBI_fnSigs + GETD D1RtP, [D0Ar6+D0Re0] + + /* Acknowledge triggers */ + AND D1.5, D1.5, D1Re0 + + /* Restore remaining exceptions + * Do this here in case the handler enables nested interrupts + * + * D1.5 -- TXDEFR with this exception ack'd + */ + MOV TXDEFR, D1.5 + + /* Call the handler */ + SWAP D1RtP, PC + + GETL D0.5, D1.5, [--A0StP] + GETL D0FrT, D1RtP, [--A0StP] + MOV PC,D1RtP +#else /* META_BUG_MBN100212 */ + + /* D1Ar1,D0Ar2,D1Ar5,D0Ar6 -- Arguments to handler, must be preserved + * D0Ar4 -- The deferred exceptions + * D1Ar3 -- As per D0Ar4 but just the trigger bits + */ + + /* - Collect the pending deferred exceptions using TXSTAT, + * (ack's the interrupt exceptions as a side-effect) + */ + DEFR D0Ar4, TXSTATI + + /* Mask off anything unrelated to the deferred exception triggers */ + MOV D1Ar3, D0Ar4 + ANDT D1Ar3, D1Ar3, #HI(TXSTAT_BUSERR_BIT | TXSTAT_FPE_BITS) + + /* Can assume that at least one exception happened since this + * handler wouldnt have been called otherwise. + * + * Replace the signal number and at the same time, prepare + * the mask to acknowledge the exception + * + * The unusual code for 1<<D1Ar3 may need explanation. + * Normally this would be done using 'MOV rs,#1' and 'LSL rd,rs,D1Ar3' + * but only D1Re0 is available in D1 and no crossunit insns are available + * Even worse, there is no conditional 'MOV r,#uimm8'. + * Since the CMP proves that D1Ar3 >= 20, we can reuse the bottom 12-bits + * of D1Re0 (using 'ORGT r,#1') in the knowledge that the top 20-bits will + * be discarded without affecting the result. + * + * D1Re0 -- The bits to acknowledge + * D1Ar3 -- The signal number + */ + MOVT D1Re0, #HI(TXSTAT_FPE_BITS & ~TXSTAT_FPE_DENORMAL_BIT) + MOV D0Re0, #TXSTAT_FPE_INVALID_S + FFB D1Ar3, D1Ar3 + CMP D1Ar3, #TXSTAT_FPE_INVALID_S + MOVLE D1Ar3, D0Re0 /* Collapse FPE triggers to a single signal */ + ORGT D1Re0, D1Re0, #1 + LSLGT D1Re0, D1Re0, D1Ar3 + + SUB D1Ar3, D1Ar3, #(TXSTAT_FPE_INVALID_S - TBID_SIGNUM_FPE) + + /* Acknowledge triggers and restore remaining exceptions + * Do this here in case the handler enables nested interrupts + * + * (x | y) ^ y == x & ~y. It avoids the restrictive XOR ...,#-1 insn + * and is the same length + */ + MOV D0Re0, TXDEFR + OR D0Re0, D0Re0, D1Re0 + XOR TXDEFR, D0Re0, D1Re0 + + /* Get the handler using the signal number + * + * D1Ar3 -- The signal number + * D0Re0 -- Address of handler + */ + LSL D0Re0, D1Ar3, #2 + ADD D0Re0,D0Re0,#TBI_fnSigs + GETD D0Re0, [D0Ar6+D0Re0] + + /* Tailcall the handler */ + MOV PC,D0Re0 + +#endif /* META_BUG_MBN100212 */ + .size ___TBIHandleDFR,.-___TBIHandleDFR +/* + * End of tbidefr.S + */ diff --git a/arch/metag/tbx/tbidspram.S b/arch/metag/tbx/tbidspram.S new file mode 100644 index 000000000000..2f27c0372212 --- /dev/null +++ b/arch/metag/tbx/tbidspram.S @@ -0,0 +1,161 @@ +/* + * tbidspram.S + * + * Copyright (C) 2009, 2012 Imagination Technologies. + * + * This program is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License version 2 as published by the + * Free Software Foundation. + * + * Explicit state save and restore routines forming part of the thread binary + * interface for META processors + */ + + .file "tbidspram.S" + +/* These aren't generally useful to a user so for now, they arent publically available */ +#define _TBIECH_DSPRAM_DUA_S 8 +#define _TBIECH_DSPRAM_DUA_BITS 0x7f00 +#define _TBIECH_DSPRAM_DUB_S 0 +#define _TBIECH_DSPRAM_DUB_BITS 0x007f + +/* + * void *__TBIDspramSaveA( short DspramSizes, void *pExt ) + */ + .text + .balign 4 + .global ___TBIDspramSaveA + .type ___TBIDspramSaveA,function +___TBIDspramSaveA: + + SETL [A0StP++], D0.5, D1.5 + MOV A0.3, D0Ar2 + + /* D1Ar1 - Dspram Sizes + * A0.4 - Pointer to buffer + */ + + /* Save the specified amount of dspram DUA */ +DL MOV D0AR.0, #0 + LSR D1Ar1, D1Ar1, #_TBIECH_DSPRAM_DUA_S + AND D1Ar1, D1Ar1, #(_TBIECH_DSPRAM_DUA_BITS >> _TBIECH_DSPRAM_DUA_S) + SUB TXRPT, D1Ar1, #1 +$L1: +DL MOV D0Re0, [D0AR.0++] +DL MOV D0Ar6, [D0AR.0++] +DL MOV D0Ar4, [D0AR.0++] +DL MOV D0.5, [D0AR.0++] + MSETL [A0.3++], D0Re0, D0Ar6, D0Ar4, D0.5 + + BR $L1 + + GETL D0.5, D1.5, [--A0StP] + MOV PC, D1RtP + + .size ___TBIDspramSaveA,.-___TBIDspramSaveA + +/* + * void *__TBIDspramSaveB( short DspramSizes, void *pExt ) + */ + .balign 4 + .global ___TBIDspramSaveB + .type ___TBIDspramSaveB,function +___TBIDspramSaveB: + + SETL [A0StP++], D0.5, D1.5 + MOV A0.3, D0Ar2 + + /* D1Ar1 - Dspram Sizes + * A0.3 - Pointer to buffer + */ + + /* Save the specified amount of dspram DUA */ +DL MOV D0BR.0, #0 + LSR D1Ar1, D1Ar1, #_TBIECH_DSPRAM_DUB_S + AND D1Ar1, D1Ar1, #(_TBIECH_DSPRAM_DUB_BITS >> _TBIECH_DSPRAM_DUB_S) + SUB TXRPT, D1Ar1, #1 +$L2: +DL MOV D0Re0, [D0BR.0++] +DL MOV D0Ar6, [D0BR.0++] +DL MOV D0Ar4, [D0BR.0++] +DL MOV D0.5, [D0BR.0++] + MSETL [A0.3++], D0Re0, D0Ar6, D0Ar4, D0.5 + + BR $L2 + + GETL D0.5, D1.5, [--A0StP] + MOV PC, D1RtP + + .size ___TBIDspramSaveB,.-___TBIDspramSaveB + +/* + * void *__TBIDspramRestoreA( short DspramSizes, void *pExt ) + */ + .balign 4 + .global ___TBIDspramRestoreA + .type ___TBIDspramRestoreA,function +___TBIDspramRestoreA: + + SETL [A0StP++], D0.5, D1.5 + MOV A0.3, D0Ar2 + + /* D1Ar1 - Dspram Sizes + * A0.3 - Pointer to buffer + */ + + /* Restore the specified amount of dspram DUA */ +DL MOV D0AW.0, #0 + LSR D1Ar1, D1Ar1, #_TBIECH_DSPRAM_DUA_S + AND D1Ar1, D1Ar1, #(_TBIECH_DSPRAM_DUA_BITS >> _TBIECH_DSPRAM_DUA_S) + SUB TXRPT, D1Ar1, #1 +$L3: + MGETL D0Re0, D0Ar6, D0Ar4, D0.5, [A0.3++] +DL MOV [D0AW.0++], D0Re0 +DL MOV [D0AW.0++], D0Ar6 +DL MOV [D0AW.0++], D0Ar4 +DL MOV [D0AW.0++], D0.5 + + BR $L3 + + GETL D0.5, D1.5, [--A0StP] + MOV PC, D1RtP + + .size ___TBIDspramRestoreA,.-___TBIDspramRestoreA + +/* + * void *__TBIDspramRestoreB( short DspramSizes, void *pExt ) + */ + .balign 4 + .global ___TBIDspramRestoreB + .type ___TBIDspramRestoreB,function +___TBIDspramRestoreB: + + SETL [A0StP++], D0.5, D1.5 + MOV A0.3, D0Ar2 + + /* D1Ar1 - Dspram Sizes + * A0.3 - Pointer to buffer + */ + + /* Restore the specified amount of dspram DUA */ +DL MOV D0BW.0, #0 + LSR D1Ar1, D1Ar1, #_TBIECH_DSPRAM_DUB_S + AND D1Ar1, D1Ar1, #(_TBIECH_DSPRAM_DUB_BITS >> _TBIECH_DSPRAM_DUB_S) + SUB TXRPT, D1Ar1, #1 +$L4: + MGETL D0Re0, D0Ar6, D0Ar4, D0.5, [A0.3++] +DL MOV [D0BW.0++], D0Re0 +DL MOV [D0BW.0++], D0Ar6 +DL MOV [D0BW.0++], D0Ar4 +DL MOV [D0BW.0++], D0.5 + + BR $L4 + + GETL D0.5, D1.5, [--A0StP] + MOV PC, D1RtP + + .size ___TBIDspramRestoreB,.-___TBIDspramRestoreB + +/* + * End of tbidspram.S + */ diff --git a/arch/metag/tbx/tbilogf.S b/arch/metag/tbx/tbilogf.S new file mode 100644 index 000000000000..4a34d80657db --- /dev/null +++ b/arch/metag/tbx/tbilogf.S @@ -0,0 +1,48 @@ +/* + * tbilogf.S + * + * Copyright (C) 2001, 2002, 2007, 2012 Imagination Technologies. + * + * This program is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License version 2 as published by the + * Free Software Foundation. + * + * Defines __TBILogF trap code for debugging messages and __TBICont for debug + * assert to be implemented on. + */ + + .file "tbilogf.S" + +/* + * Perform console printf using external debugger or host support + */ + .text + .balign 4 + .global ___TBILogF + .type ___TBILogF,function +___TBILogF: + MSETL [A0StP],D0Ar6,D0Ar4,D0Ar2 + SWITCH #0xC10020 + MOV D0Re0,#0 + SUB A0StP,A0StP,#24 + MOV PC,D1RtP + .size ___TBILogF,.-___TBILogF + +/* + * Perform wait for continue under control of the debugger + */ + .text + .balign 4 + .global ___TBICont + .type ___TBICont,function +___TBICont: + MOV D0Ar6,#1 + MSETL [A0StP],D0Ar6,D0Ar4,D0Ar2 + SWITCH #0xC30006 /* Returns if we are to continue */ + SUB A0StP,A0StP,#(8*3) + MOV PC,D1RtP /* Return */ + .size ___TBICont,.-___TBICont + +/* + * End of tbilogf.S + */ diff --git a/arch/metag/tbx/tbipcx.S b/arch/metag/tbx/tbipcx.S new file mode 100644 index 000000000000..de0626fdad25 --- /dev/null +++ b/arch/metag/tbx/tbipcx.S @@ -0,0 +1,451 @@ +/* + * tbipcx.S + * + * Copyright (C) 2001, 2002, 2007, 2009, 2012 Imagination Technologies. + * + * This program is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License version 2 as published by the + * Free Software Foundation. + * + * Asyncronous trigger handling including exceptions + */ + + .file "tbipcx.S" +#include <asm/metag_regs.h> +#include <asm/tbx.h> + +/* BEGIN HACK */ +/* define these for now while doing inital conversion to GAS + will fix properly later */ + +/* Signal identifiers always have the TBID_SIGNAL_BIT set and contain the + following related bit-fields */ +#define TBID_SIGNUM_S 2 + +/* END HACK */ + +#ifdef METAC_1_0 +/* Ax.4 is saved in TBICTX */ +#define A0_4 ,A0.4 +#else +/* Ax.4 is NOT saved in TBICTX */ +#define A0_4 +#endif + +/* Size of the TBICTX structure */ +#define TBICTX_BYTES ((TBICTX_AX_REGS*8)+TBICTX_AX) + +#ifdef METAC_1_1 +#ifndef BOOTROM +#ifndef SPECIAL_BUILD +/* Jump straight into the boot ROM version of this code */ +#define CODE_USES_BOOTROM +#endif +#endif +#endif + +/* Define space needed for CATCH buffer state in traditional units */ +#define CATCH_ENTRIES 5 +#define CATCH_ENTRY_BYTES 16 + +#ifndef CODE_USES_BOOTROM +#define A0GblIStP A0.15 /* PTBICTX for current thread in PRIV system */ +#define A1GblIGbP A1.15 /* Interrupt A1GbP value in PRIV system */ +#endif + +/* + * TBIRES __TBIASyncTrigger( TBIRES State ) + */ + .text + .balign 4 + .global ___TBIASyncTrigger + .type ___TBIASyncTrigger,function +___TBIASyncTrigger: +#ifdef CODE_USES_BOOTROM + MOVT D0Re0,#HI(LINCORE_BASE) + JUMP D0Re0,#0xA0 +#else + MOV D0FrT,A0FrP /* Boing entry sequence */ + ADD A0FrP,A0StP,#0 + SETL [A0StP++],D0FrT,D1RtP + MOV D0Re0,PCX /* Check for repeat call */ + MOVT D0FrT,#HI(___TBIBoingRTI+4) + ADD D0FrT,D0FrT,#LO(___TBIBoingRTI+4) + CMP D0Re0,D0FrT + BEQ ___TBIBoingExit /* Already set up - come out */ + ADD D1Ar1,D1Ar1,#7 /* PRIV system stack here */ + MOV A0.2,A0StP /* else push context here */ + MOVS D0Re0,D0Ar2 /* Return in user mode? */ + ANDMB D1Ar1,D1Ar1,#0xfff8 /* align priv stack to 64-bit */ + MOV D1Re0,D1Ar1 /* and set result to arg */ + MOVMI A0.2,D1Ar1 /* use priv stack if PRIV set */ +/* + * Generate an initial TBICTX to return to our own current call context + */ + MOVT D1Ar5,#HI(___TBIBoingExit) /* Go here to return */ + ADD D1Ar5,D1Ar5,#LO(___TBIBoingExit) + ADD A0.3,A0.2,#TBICTX_DX /* DX Save area */ + ANDT D0Ar2,D0Ar2,#TBICTX_PRIV_BIT /* Extract PRIV bit */ + MOVT D0Ar6,#TBICTX_SOFT_BIT /* Only soft thread state */ + ADD D0Ar6,D0Ar6,D0Ar2 /* Add in PRIV bit if requested */ + SETL [A0.2],D0Ar6,D1Ar5 /* Push header fields */ + ADD D0FrT,A0.2,#TBICTX_AX /* Address AX save area */ + MSETL [A0.3],D0Re0,D0Ar6,D0Ar4,D0Ar2,D0FrT,D0.5,D0.6,D0.7 + MOV D0Ar6,#0 + MOV D1Ar5,#0 + SETL [A0.3++],D0Ar6,D1Ar5 /* Zero CT register states */ + SETL [A0.3++],D0Ar6,D1Ar5 + MSETL [D0FrT],A0StP,A0FrP,A0.2,A0.3 A0_4 /* Save AX regs */ + MOV A0FrP,A0.2 /* Restore me! */ + B ___TBIResume + .size ___TBIASyncTrigger,.-___TBIASyncTrigger + +/* + * Optimised return to handler for META Core + */ +___TBIBoingRTH: + RTH /* Go to background level */ + MOVT A0.2, #HI($Lpcx_target) + ADD A0.2,A0.2,#LO($Lpcx_target) + MOV PCX,A0.2 /* Setup PCX for interrupts */ + MOV PC,D1Re0 /* Jump to handler */ +/* + * This is where the code below needs to jump to wait for outermost interrupt + * event in a non-privilege mode system (single shared interrupt stack). + */ +___TBIBoingPCX: + MGETL A0StP,A0FrP,A0.2,A0.3 A0_4,[D1Re0] /* Restore AX regs */ + MOV TXSTATUS,D0Re0 /* Restore flags */ + GETL D0Re0,D1Re0,[D1Re0+#TBICTX_DX-TBICTX_BYTES] +___TBIBoingRTI: + RTI /* Wait for interrupt */ +$Lpcx_target: +/* + * Save initial interrupt state on current stack + */ + SETL [A0StP+#TBICTX_DX],D0Re0,D1Re0 /* Save key registers */ + ADD D1Re0,A0StP,#TBICTX_AX /* Address AX save area */ + MOV D0Re0,TXSTATUS /* Read TXSTATUS into D0Re0 */ + MOV TXSTATUS,#0 /* Clear TXSTATUS */ + MSETL [D1Re0],A0StP,A0FrP,A0.2,A0.3 A0_4 /* Save AX critical regs */ +/* + * Register state at this point is- + * + * D0Re0 - Old TXSTATUS with PRIV and CBUF bits set if appropriate + * A0StP - Is call stack frame and base of TBICTX being generated + * A1GbP - Is valid static access link + */ +___TBIBoing: + LOCK0 /* Make sure we have no locks! */ + ADD A1.2,A0StP,#TBICTX_DX+(8*1) /* Address DX.1 save area */ + MOV A0FrP,A0StP /* Setup frame pointer */ + MSETL [A1.2],D0Ar6,D0Ar4,D0Ar2,D0FrT,D0.5,D0.6,D0.7 + MOV D0Ar4,TXRPT /* Save critical CT regs */ + MOV D1Ar3,TXBPOBITS + MOV D1Ar1,TXDIVTIME /* Calc catch buffer pSrc */ + MOV D0Ar2,TXMODE + MOV TXMODE,#0 /* Clear TXMODE */ +#ifdef TXDIVTIME_RPDIRTY_BIT + TSTT D1Ar1,#HI(TXDIVTIME_RPDIRTY_BIT)/* NZ = RPDIRTY */ + MOVT D0Ar6,#TBICTX_CBRP_BIT + ORNZ D0Re0,D0Re0,D0Ar6 /* Set CBRP if RPDIRTY set */ +#endif + MSETL [A1.2],D0Ar4,D0Ar2 /* Save CT regs state */ + MOV D0Ar2,D0Re0 /* Copy TXSTATUS */ + ANDMT D0Ar2,D0Ar2,#TBICTX_CBUF_BIT+TBICTX_CBRP_BIT +#ifdef TBI_1_4 + MOVT D1Ar1,#TBICTX_FPAC_BIT /* Copy FPActive into FPAC */ + TSTT D0Re0,#HI(TXSTATUS_FPACTIVE_BIT) + ORNZ D0Ar2,D0Ar2,D1Ar1 +#endif + MOV D1Ar1,PCX /* Read CurrPC */ + ORT D0Ar2,D0Ar2,#TBICTX_CRIT_BIT /* SaveMask + CRIT bit */ + SETL [A0FrP+#TBICTX_Flags],D0Ar2,D1Ar1 /* Set pCtx header fields */ +/* + * Completed context save, now we need to make a call to an interrupt handler + * + * D0Re0 - holds PRIV, WAIT, CBUF flags, HALT reason if appropriate + * A0FrP - interrupt stack frame and base of TBICTX being generated + * A0StP - same as A0FrP + */ +___TBIBoingWait: + /* Reserve space for TBICTX and CBUF */ + ADD A0StP,A0StP,#TBICTX_BYTES+(CATCH_ENTRY_BYTES*CATCH_ENTRIES) + MOV D0Ar4,TXSTATI /* Read the Triggers data */ + MOV D1Ar3,TXDIVTIME /* Read IRQEnc bits */ + MOV D0Ar2,D0Re0 /* Copy PRIV and WAIT flags */ + ANDT D0Ar2,D0Ar2,#TBICTX_PRIV_BIT+TBICTX_WAIT_BIT+TBICTX_CBUF_BIT +#ifdef TBI_1_4 + MOVT D1Ar5,#TBICTX_FPAC_BIT /* Copy FPActive into FPAC */ + TSTT D0Re0,#HI(TXSTATUS_FPACTIVE_BIT) + ORNZ D0Ar2,D0Ar2,D1Ar5 +#endif + ANDT D1Ar3,D1Ar3,#HI(TXDIVTIME_IRQENC_BITS) + LSR D1Ar3,D1Ar3,#TXDIVTIME_IRQENC_S + AND TXSTATI,D0Ar4,#TXSTATI_BGNDHALT_BIT/* Ack any HALT seen */ + ANDS D0Ar4,D0Ar4,#0xFFFF-TXSTATI_BGNDHALT_BIT /* Only seen HALT? */ + ORT D0Ar2,D0Ar2,#TBICTX_CRIT_BIT /* Set CRIT */ +#ifndef BOOTROM + MOVT A1LbP,#HI(___pTBIs) + ADD A1LbP,A1LbP,#LO(___pTBIs) + GETL D1Ar5,D0Ar6,[A1LbP] /* D0Ar6 = ___pTBIs[1] */ +#else +/* + * For BOOTROM support ___pTBIs must be allocated at offset 0 vs A1GbP + */ + GETL D1Ar5,D0Ar6,[A1GbP] /* D0Ar6 = ___pTBIs[1] */ +#endif + BZ ___TBIBoingHalt /* Yes: Service HALT */ +/* + * Encode interrupt as signal vector, strip away same/lower TXMASKI bits + */ + MOV D1Ar1,#1 /* Generate mask for this bit */ + MOV D0Re0,TXMASKI /* Get interrupt mask */ + LSL TXSTATI,D1Ar1,D1Ar3 /* Acknowledge trigger */ + AND TXMASKI,D0Re0,#TXSTATI_BGNDHALT_BIT /* Only allow HALTs */ + OR D0Ar2,D0Ar2,D0Re0 /* Set TBIRES.Sig.TrigMask */ + ADD D1Ar3,D1Ar3,#TBID_SIGNUM_TRT /* Offset into interrupt sigs */ + LSL D0Re0,D1Ar3,#TBID_SIGNUM_S /* Generate offset from SigNum */ +/* + * This is a key moment we are about to call the handler, register state is + * as follows- + * + * D0Re0 - Handler vector (SigNum<<TBID_SIGNUM_S) + * D0Ar2 - TXMASKI:TBICTX_CRIT_BIT with optional CBUF and PRIV bits + * D1Ar3 - SigNum + * D0Ar4 - State read from TXSTATI + * D1Ar5 - Inst for SWITCH trigger case only, otherwise undefined + * D0Ar6 - pTBI + */ +___TBIBoingVec: + ADD D0Re0,D0Re0,#TBI_fnSigs /* Offset into signal table */ + GETD D1Re0,[D0Ar6+D0Re0] /* Get address for Handler */ +/* + * Call handler at interrupt level, when it returns simply resume execution + * of state indicated by D1Re0. + */ + MOV D1Ar1,A0FrP /* Pass in pCtx */ + CALLR D1RtP,___TBIBoingRTH /* Use RTH to invoke handler */ + +/* + * Perform critical state restore and execute background thread. + * + * A0FrP - is pointer to TBICTX structure to resume + * D0Re0 - contains additional TXMASKI triggers + */ + .text + .balign 4 +#ifdef BOOTROM + .global ___TBIResume +#endif +___TBIResume: +/* + * New META IP method + */ + RTH /* Go to interrupt level */ + MOV D0Ar4,TXMASKI /* Read TXMASKI */ + OR TXMASKI,D0Ar4,D0Re0 /* -Write-Modify TXMASKI */ + GETL D0Re0,D1Re0,[A0FrP+#TBICTX_Flags]/* Get Flags:SaveMask, CurrPC */ + MOV A0StP,A0FrP /* Position stack pointer */ + MOV D0Ar2,TXPOLLI /* Read pending triggers */ + MOV PCX,D1Re0 /* Set resumption PC */ + TST D0Ar2,#0xFFFF /* Any pending triggers? */ + BNZ ___TBIBoingWait /* Yes: Go for triggers */ + TSTT D0Re0,#TBICTX_WAIT_BIT /* Do we WAIT anyway? */ + BNZ ___TBIBoingWait /* Yes: Go for triggers */ + LSLS D1Ar5,D0Re0,#1 /* Test XCBF (MI) & PRIV (CS)? */ + ADD D1Re0,A0FrP,#TBICTX_CurrRPT /* Address CT save area */ + ADD A0StP,A0FrP,#TBICTX_DX+(8*1) /* Address DX.1 save area */ + MGETL A0.2,A0.3,[D1Re0] /* Get CT reg states */ + MOV D1Ar3,A1.3 /* Copy old TXDIVTIME */ + BPL ___TBIResCrit /* No: Skip logic */ + ADD D0Ar4,A0FrP,#TBICTX_BYTES /* Source is after TBICTX */ + ANDST D1Ar3,D1Ar3,#HI(TXDIVTIME_RPMASK_BITS)/* !Z if RPDIRTY */ + MGETL D0.5,D0.6,[D0Ar4] /* Read Catch state */ + MOV TXCATCH0,D0.5 /* Restore TXCATCHn */ + MOV TXCATCH1,D1.5 + MOV TXCATCH2,D0.6 + MOV TXCATCH3,D1.6 + BZ ___TBIResCrit + MOV D0Ar2,#(1*8) + LSRS D1Ar3,D1Ar3,#TXDIVTIME_RPMASK_S+1 /* 2nd RPMASK bit -> bit 0 */ + ADD RA,D0Ar4,#(0*8) /* Re-read read pipeline */ + ADDNZ RA,D0Ar4,D0Ar2 /* If Bit 0 set issue RA */ + LSRS D1Ar3,D1Ar3,#2 /* Bit 1 -> C, Bit 2 -> Bit 0 */ + ADD D0Ar2,D0Ar2,#8 + ADDCS RA,D0Ar4,D0Ar2 /* If C issue RA */ + ADD D0Ar2,D0Ar2,#8 + ADDNZ RA,D0Ar4,D0Ar2 /* If Bit 0 set issue RA */ + LSRS D1Ar3,D1Ar3,#2 /* Bit 1 -> C, Bit 2 -> Bit 0 */ + ADD D0Ar2,D0Ar2,#8 + ADDCS RA,D0Ar4,D0Ar2 /* If C issue RA */ + ADD D0Ar2,D0Ar2,#8 + ADDNZ RA,D0Ar4,D0Ar2 /* If Bit 0 set issue RA */ + MOV TXDIVTIME,A1.3 /* Set RPDIRTY again */ +___TBIResCrit: + LSLS D1Ar5,D0Re0,#1 /* Test XCBF (MI) & PRIV (CS)? */ +#ifdef TBI_1_4 + ANDT D1Ar5,D1Ar5,#(TBICTX_FPAC_BIT*2) + LSL D0Ar6,D1Ar5,#3 /* Convert FPAC into FPACTIVE */ +#endif + ANDMT D0Re0,D0Re0,#TBICTX_CBUF_BIT /* Keep CBUF bit from SaveMask */ +#ifdef TBI_1_4 + OR D0Re0,D0Re0,D0Ar6 /* Combine FPACTIVE with others */ +#endif + MGETL D0Ar6,D0Ar4,D0Ar2,D0FrT,D0.5,D0.6,D0.7,[A0StP] /* Restore DX */ + MOV TXRPT,A0.2 /* Restore CT regs */ + MOV TXBPOBITS,A1.2 + MOV TXMODE,A0.3 + BCC ___TBIBoingPCX /* Do non-PRIV wait! */ + MOV A1GblIGbP,A1GbP /* Save A1GbP too */ + MGETL A0StP,A0FrP,A0.2,A0.3 A0_4,[D1Re0] /* Restore AX regs */ +/* + * Wait for the first interrupt/exception trigger in a privilege mode system + * (interrupt stack area for current TASK to be pointed to by A0GblIStP + * or per_cpu__stack_save[hwthread_id]). + */ + MOV TXSTATUS,D0Re0 /* Restore flags */ + MOV D0Re0,TXPRIVEXT /* Set TXPRIVEXT_TXTOGGLEI_BIT */ + SUB D1Re0,D1Re0,#TBICTX_BYTES /* TBICTX is top of int stack */ +#ifdef TBX_PERCPU_SP_SAVE + SWAP D1Ar3,A1GbP + MOV D1Ar3,TXENABLE /* Which thread are we? */ + AND D1Ar3,D1Ar3,#TXENABLE_THREAD_BITS + LSR D1Ar3,D1Ar3,#TXENABLE_THREAD_S-2 + ADDT D1Ar3,D1Ar3,#HI(_per_cpu__stack_save) + ADD D1Ar3,D1Ar3,#LO(_per_cpu__stack_save) + SETD [D1Ar3],D1Re0 + SWAP D1Ar3,A1GbP +#else + MOV A0GblIStP, D1Re0 +#endif + OR D0Re0,D0Re0,#TXPRIVEXT_TXTOGGLEI_BIT + MOV TXPRIVEXT,D0Re0 /* Cannot set TXPRIVEXT if !priv */ + GETL D0Re0,D1Re0,[D1Re0+#TBICTX_DX] + RTI /* Wait for interrupt */ +/* + * Save initial interrupt state on A0GblIStP, switch to A0GblIStP if + * BOOTROM code, save and switch to [A1GbP] otherwise. + */ +___TBIBoingPCXP: +#ifdef TBX_PERCPU_SP_SAVE + SWAP D1Ar3,A1GbP /* Get PRIV stack base */ + MOV D1Ar3,TXENABLE /* Which thread are we? */ + AND D1Ar3,D1Ar3,#TXENABLE_THREAD_BITS + LSR D1Ar3,D1Ar3,#TXENABLE_THREAD_S-2 + ADDT D1Ar3,D1Ar3,#HI(_per_cpu__stack_save) + ADD D1Ar3,D1Ar3,#LO(_per_cpu__stack_save) + GETD D1Ar3,[D1Ar3] +#else + SWAP D1Ar3,A0GblIStP /* Get PRIV stack base */ +#endif + SETL [D1Ar3+#TBICTX_DX],D0Re0,D1Re0 /* Save key registers */ + MOV D0Re0,TXPRIVEXT /* Clear TXPRIVEXT_TXTOGGLEI_BIT */ + ADD D1Re0,D1Ar3,#TBICTX_AX /* Address AX save area */ + ANDMB D0Re0,D0Re0,#0xFFFF-TXPRIVEXT_TXTOGGLEI_BIT + MOV TXPRIVEXT,D0Re0 /* Cannot set TXPRIVEXT if !priv */ + MOV D0Re0,TXSTATUS /* Read TXSTATUS into D0Re0 */ + MOV TXSTATUS,#0 /* Clear TXSTATUS */ + MSETL [D1Re0],A0StP,A0FrP,A0.2,A0.3 A0_4 /* Save AX critical regs */ + MOV A0StP,D1Ar3 /* Switch stacks */ +#ifdef TBX_PERCPU_SP_SAVE + MOV D1Ar3,A1GbP /* Get D1Ar2 back */ +#else + MOV D1Ar3,A0GblIStP /* Get D1Ar2 back */ +#endif + ORT D0Re0,D0Re0,#TBICTX_PRIV_BIT /* Add PRIV to TXSTATUS */ + MOV A1GbP,A1GblIGbP /* Restore A1GbP */ + B ___TBIBoing /* Enter common handler code */ +/* + * At this point we know it's a background HALT case we are handling. + * The restored TXSTATUS always needs to have zero in the reason bits. + */ +___TBIBoingHalt: + MOV D0Ar4,TXMASKI /* Get interrupt mask */ + ANDST D0Re0,D0Re0,#HI(TXSTATUS_MAJOR_HALT_BITS+TXSTATUS_MEM_FAULT_BITS) + AND TXMASKI,D0Ar4,#TXSTATI_BGNDHALT_BIT /* Only allow HALTs */ + AND D0Ar4,D0Ar4,#0xFFFF-TXSTATI_BGNDHALT_BIT /* What ints are off? */ + OR D0Ar2,D0Ar2,D0Ar4 /* Set TBIRES.Sig.TrigMask */ + MOV D0Ar4,#TXSTATI_BGNDHALT_BIT /* This was the trigger state */ + LSR D1Ar3,D0Re0,#TXSTATUS_MAJOR_HALT_S + MOV D0Re0,#TBID_SIGNUM_XXF<<TBID_SIGNUM_S + BNZ ___TBIBoingVec /* Jump to XXF exception handler */ +/* + * Only the SWITCH cases are left, PCX must be valid + */ +#ifdef TBI_1_4 + MOV D1Ar5,TXPRIVEXT + TST D1Ar5,#TXPRIVEXT_MINIMON_BIT + LSR D1Ar3,D1Ar1,#1 /* Shift needed for MINIM paths (fill stall) */ + BZ $Lmeta /* If META only, skip */ + TSTT D1Ar1,#HI(0x00800000) + ANDMT D1Ar3,D1Ar3,#HI(0x007FFFFF >> 1)/* Shifted mask for large MINIM */ + ANDT D1Ar1,D1Ar1,#HI(0xFFE00000) /* Static mask for small MINIM */ + BZ $Llarge_minim /* If large MINIM */ +$Lsmall_minim: + TSTT D1Ar3,#HI(0x00100000 >> 1) + ANDMT D1Ar3,D1Ar3,#HI(0x001FFFFF >> 1)/* Correct shifted mask for large MINIM */ + ADDZ D1Ar1,D1Ar1,D1Ar3 /* If META rgn, add twice to undo LSR #1 */ + B $Lrecombine +$Llarge_minim: + ANDST D1Ar1,D1Ar1,#HI(0xFF800000) /* Correct static mask for small MINIM */ + /* Z=0 (Cannot place code at NULL) */ +$Lrecombine: + ADD D1Ar1,D1Ar1,D1Ar3 /* Combine static and shifted parts */ +$Lmeta: + GETW D1Ar5,[D1Ar1++] /* META: lo-16, MINIM: lo-16 (all-16 if short) */ + GETW D1Ar3,[D1Ar1] /* META: hi-16, MINIM: hi-16 (only if long) */ + MOV D1Re0,D1Ar5 + XOR D1Re0,D1Re0,#0x4000 + LSLSNZ D1Re0,D1Re0,#(32-14) /* MINIM: If long C=0, if short C=1 */ + LSLCC D1Ar3,D1Ar3,#16 /* META/MINIM long: Move hi-16 up */ + LSLCS D1Ar3,D1Ar5,#16 /* MINIM short: Dup all-16 */ + ADD D1Ar5,D1Ar5,D1Ar3 /* ALL: Combine both 16-bit parts */ +#else + GETD D1Ar5,[D1Ar1] /* Read instruction for switch */ +#endif + LSR D1Ar3,D1Ar5,#22 /* Convert into signal number */ + AND D1Ar3,D1Ar3,#TBID_SIGNUM_SW3-TBID_SIGNUM_SW0 + LSL D0Re0,D1Ar3,#TBID_SIGNUM_S /* Generate offset from SigNum */ + B ___TBIBoingVec /* Jump to switch handler */ +/* + * Exit from TBIASyncTrigger call + */ +___TBIBoingExit: + GETL D0FrT,D1RtP,[A0FrP++] /* Restore state from frame */ + SUB A0StP,A0FrP,#8 /* Unwind stack */ + MOV A0FrP,D0FrT /* Last memory read completes */ + MOV PC,D1RtP /* Return to caller */ +#endif /* ifdef CODE_USES_BOOTROM */ + .size ___TBIResume,.-___TBIResume + +#ifndef BOOTROM +/* + * void __TBIASyncResume( TBIRES State ) + */ + .text + .balign 4 + .global ___TBIASyncResume + .type ___TBIASyncResume,function +___TBIASyncResume: +/* + * Perform CRIT|SOFT state restore and execute background thread. + */ + MOV D1Ar3,D1Ar1 /* Restore this context */ + MOV D0Re0,D0Ar2 /* Carry in additional triggers */ + /* Reserve space for TBICTX */ + ADD D1Ar3,D1Ar3,#TBICTX_BYTES+(CATCH_ENTRY_BYTES*CATCH_ENTRIES) + MOV A0StP,D1Ar3 /* Enter with protection of */ + MOV A0FrP,D1Ar1 /* TBICTX on our stack */ +#ifdef CODE_USES_BOOTROM + MOVT D1Ar1,#HI(LINCORE_BASE) + JUMP D1Ar1,#0xA4 +#else + B ___TBIResume +#endif + .size ___TBIASyncResume,.-___TBIASyncResume +#endif /* ifndef BOOTROM */ + +/* + * End of tbipcx.S + */ diff --git a/arch/metag/tbx/tbiroot.S b/arch/metag/tbx/tbiroot.S new file mode 100644 index 000000000000..7d84daf1340b --- /dev/null +++ b/arch/metag/tbx/tbiroot.S @@ -0,0 +1,87 @@ +/* + * tbiroot.S + * + * Copyright (C) 2001, 2002, 2012 Imagination Technologies. + * + * This program is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License version 2 as published by the + * Free Software Foundation. + * + * Module that creates and via ___TBI function returns a TBI Root Block for + * interrupt and background processing on the current thread. + */ + + .file "tbiroot.S" +#include <asm/metag_regs.h> + +/* + * Get data structures and defines from the TBI C header + */ +#include <asm/tbx.h> + + +/* If signals need to be exchanged we must create a TBI Root Block */ + + .data + .balign 8 + .global ___pTBIs + .type ___pTBIs,object +___pTBIs: + .long 0 /* Bgnd+Int root block ptrs */ + .long 0 + .size ___pTBIs,.-___pTBIs + + +/* + * Return ___pTBIs value specific to execution level with promotion/demotion + * + * Register Usage: D1Ar1 is Id, D0Re0 is the primary result + * D1Re0 is secondary result (___pTBIs for other exec level) + */ + .text + .balign 4 + .global ___TBI + .type ___TBI,function +___TBI: + TSTT D1Ar1,#HI(TBID_ISTAT_BIT) /* Bgnd or Int level? */ + MOVT A1LbP,#HI(___pTBIs) + ADD A1LbP,A1LbP,#LO(___pTBIs) + GETL D0Re0,D1Re0,[A1LbP] /* Base of root block table */ + SWAPNZ D0Re0,D1Re0 /* Swap if asked */ + MOV PC,D1RtP + .size ___TBI,.-___TBI + + +/* + * Return identifier of the current thread in TBI segment or signal format with + * secondary mask to indicate privilege and interrupt level of thread + */ + .text + .balign 4 + .global ___TBIThrdPrivId + .type ___TBIThrdPrivId,function +___TBIThrdPrivId: + .global ___TBIThreadId + .type ___TBIThreadId,function +___TBIThreadId: +#ifndef METAC_0_1 + MOV D1Re0,TXSTATUS /* Are we privileged or int? */ + MOV D0Re0,TXENABLE /* Which thread are we? */ +/* Disable privilege adaption for now */ + ANDT D1Re0,D1Re0,#HI(TXSTATUS_ISTAT_BIT) /* +TXSTATUS_PSTAT_BIT) */ + LSL D1Re0,D1Re0,#TBID_ISTAT_S-TXSTATUS_ISTAT_S + AND D0Re0,D0Re0,#TXENABLE_THREAD_BITS + LSL D0Re0,D0Re0,#TBID_THREAD_S-TXENABLE_THREAD_S +#else +/* Thread 0 only */ + XOR D0Re0,D0Re0,D0Re0 + XOR D1Re0,D1Re0,D1Re0 +#endif + MOV PC,D1RtP /* Return */ + .size ___TBIThrdPrivId,.-___TBIThrdPrivId + .size ___TBIThreadId,.-___TBIThreadId + + +/* + * End of tbiroot.S + */ diff --git a/arch/metag/tbx/tbisoft.S b/arch/metag/tbx/tbisoft.S new file mode 100644 index 000000000000..0346fe8a53b1 --- /dev/null +++ b/arch/metag/tbx/tbisoft.S @@ -0,0 +1,237 @@ +/* + * tbisoft.S + * + * Copyright (C) 2001, 2002, 2007, 2012 Imagination Technologies. + * + * This program is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License version 2 as published by the + * Free Software Foundation. + * + * Support for soft threads and soft context switches + */ + + .file "tbisoft.S" + +#include <asm/tbx.h> + +#ifdef METAC_1_0 +/* Ax.4 is saved in TBICTX */ +#define A0_4 ,A0.4 +#define D0_5 ,D0.5 +#else +/* Ax.4 is NOT saved in TBICTX */ +#define A0_4 +#define D0_5 +#endif + +/* Size of the TBICTX structure */ +#define TBICTX_BYTES ((TBICTX_AX_REGS*8)+TBICTX_AX) + + .text + .balign 4 + .global ___TBISwitchTail + .type ___TBISwitchTail,function +___TBISwitchTail: + B $LSwitchTail + .size ___TBISwitchTail,.-___TBISwitchTail + +/* + * TBIRES __TBIJumpX( TBIX64 ArgsA, PTBICTX *rpSaveCtx, int TrigsMask, + * void (*fnMain)(), void *pStack ); + * + * This is a combination of __TBISwitch and __TBIJump with the context of + * the calling thread being saved in the rpSaveCtx location with a drop-thru + * effect into the __TBIJump logic. ArgsB passes via __TBIJump to the + * routine eventually invoked will reflect the rpSaveCtx value specified. + */ + .text + .balign 4 + .global ___TBIJumpX + .type ___TBIJumpX,function +___TBIJumpX: + CMP D1RtP,#-1 + B $LSwitchStart + .size ___TBIJumpX,.-___TBIJumpX + +/* + * TBIRES __TBISwitch( TBIRES Switch, PTBICTX *rpSaveCtx ) + * + * Software syncronous context switch between soft threads, save only the + * registers which are actually valid on call entry. + * + * A0FrP, D0RtP, D0.5, D0.6, D0.7 - Saved on stack + * A1GbP is global to all soft threads so not virtualised + * A0StP is then saved as the base of the TBICTX of the thread + * + */ + .text + .balign 4 + .global ___TBISwitch + .type ___TBISwitch,function +___TBISwitch: + XORS D0Re0,D0Re0,D0Re0 /* Set ZERO flag */ +$LSwitchStart: + MOV D0FrT,A0FrP /* Boing entry sequence */ + ADD A0FrP,A0StP,#0 + SETL [A0StP+#8++],D0FrT,D1RtP +/* + * Save current frame state - we save all regs because we don't want + * uninitialised crap in the TBICTX structure that the asyncronous resumption + * of a thread will restore. + */ + MOVT D1Re0,#HI($LSwitchExit) /* ASync resume point here */ + ADD D1Re0,D1Re0,#LO($LSwitchExit) + SETD [D1Ar3],A0StP /* Record pCtx of this thread */ + MOVT D0Re0,#TBICTX_SOFT_BIT /* Only soft thread state */ + SETL [A0StP++],D0Re0,D1Re0 /* Push header fields */ + ADD D0FrT,A0StP,#TBICTX_AX-TBICTX_DX /* Address AX save area */ + MOV D0Re0,#0 /* Setup 0:0 result for ASync */ + MOV D1Re0,#0 /* resume of the thread */ + MSETL [A0StP],D0Re0,D0Ar6,D0Ar4,D0Ar2,D0FrT,D0.5,D0.6,D0.7 + SETL [A0StP++],D0Re0,D1Re0 /* Zero CurrRPT, CurrBPOBITS, */ + SETL [A0StP++],D0Re0,D1Re0 /* Zero CurrMODE, CurrDIVTIME */ + ADD A0StP,A0StP,#(TBICTX_AX_REGS*8) /* Reserve AX save space */ + MSETL [D0FrT],A0StP,A0FrP,A0.2,A0.3 A0_4 /* Save AX regs */ + BNZ ___TBIJump +/* + * NextThread MUST be in TBICTX_SOFT_BIT state! + */ +$LSwitchTail: + MOV D0Re0,D0Ar2 /* Result from args */ + MOV D1Re0,D1Ar1 + ADD D1RtP,D1Ar1,#TBICTX_AX + MGETL A0StP,A0FrP,[D1RtP] /* Get frame values */ +$LSwitchCmn: + ADD A0.2,D1Ar1,#TBICTX_DX+(8*5) + MGETL D0.5,D0.6,D0.7,[A0.2] /* Get caller-saved DX regs */ +$LSwitchExit: + GETL D0FrT,D1RtP,[A0FrP++] /* Restore state from frame */ + SUB A0StP,A0FrP,#8 /* Unwind stack */ + MOV A0FrP,D0FrT /* Last memory read completes */ + MOV PC,D1RtP /* Return to caller */ + .size ___TBISwitch,.-___TBISwitch + +/* + * void __TBISyncResume( TBIRES State, int TrigMask ); + * + * This routine causes the TBICTX structure specified in State.Sig.pCtx to + * be restored. This implies that execution will not return to the caller. + * The State.Sig.TrigMask field will be ored into TXMASKI during the + * context switch such that any immediately occuring interrupts occur in + * the context of the newly specified task. The State.Sig.SaveMask parameter + * is ignored. + */ + .text + .balign 4 + .global ___TBISyncResume + .type ___TBISyncResume,function +___TBISyncResume: + MOV D0Re0,D0Ar2 /* Result from args */ + MOV D1Re0,D1Ar1 + XOR D1Ar5,D1Ar5,D1Ar5 /* D1Ar5 = 0 */ + ADD D1RtP,D1Ar1,#TBICTX_AX + SWAP D1Ar5,TXMASKI /* D1Ar5 <-> TXMASKI */ + MGETL A0StP,A0FrP,[D1RtP] /* Get frame values */ + OR TXMASKI,D1Ar5,D1Ar3 /* New TXMASKI */ + B $LSwitchCmn + .size ___TBISyncResume,.-___TBISyncResume + +/* + * void __TBIJump( TBIX64 ArgsA, TBIX32 ArgsB, int TrigsMask, + * void (*fnMain)(), void *pStack ); + * + * Jump directly to a new routine on an arbitrary stack with arbitrary args + * oring bits back into TXMASKI on route. + */ + .text + .balign 4 + .global ___TBIJump + .type ___TBIJump,function +___TBIJump: + XOR D0Re0,D0Re0,D0Re0 /* D0Re0 = 0 */ + MOV A0StP,D0Ar6 /* Stack = Frame */ + SWAP D0Re0,TXMASKI /* D0Re0 <-> TXMASKI */ + MOV A0FrP,D0Ar6 + MOVT A1LbP,#HI(__exit) + ADD A1LbP,A1LbP,#LO(__exit) + MOV D1RtP,A1LbP /* D1RtP = __exit */ + OR TXMASKI,D0Re0,D0Ar4 /* New TXMASKI */ + MOV PC,D1Ar5 /* Jump to fnMain */ + .size ___TBIJump,.-___TBIJump + +/* + * PTBICTX __TBISwitchInit( void *pStack, int (*fnMain)(), + * .... 4 extra 32-bit args .... ); + * + * Generate a new soft thread context ready for it's first outing. + * + * D1Ar1 - Region of memory to be used as the new soft thread stack + * D0Ar2 - Main line routine for new soft thread + * D1Ar3, D0Ar4, D1Ar5, D0Ar6 - arguments to be passed on stack + * The routine returns the initial PTBICTX value for the new thread + */ + .text + .balign 4 + .global ___TBISwitchInit + .type ___TBISwitchInit,function +___TBISwitchInit: + MOV D0FrT,A0FrP /* Need save return point */ + ADD A0FrP,A0StP,#0 + SETL [A0StP++],D0FrT,D1RtP /* Save return to caller */ + MOVT A1LbP,#HI(__exit) + ADD A1LbP,A1LbP,#LO(__exit) + MOV D1RtP,A1LbP /* Get address of __exit */ + ADD D1Ar1,D1Ar1,#7 /* Align stack to 64-bits */ + ANDMB D1Ar1,D1Ar1,#0xfff8 /* by rounding base up */ + MOV A0.2,D1Ar1 /* A0.2 is new stack */ + MOV D0FrT,D1Ar1 /* Initial puesdo-frame pointer */ + SETL [A0.2++],D0FrT,D1RtP /* Save return to __exit */ + MOV D1RtP,D0Ar2 + SETL [A0.2++],D0FrT,D1RtP /* Save return to fnMain */ + ADD D0FrT,D0FrT,#8 /* Advance puesdo-frame pointer */ + MSETL [A0.2],D0Ar6,D0Ar4 /* Save extra initial args */ + MOVT D1RtP,#HI(___TBIStart) /* Start up code for new stack */ + ADD D1RtP,D1RtP,#LO(___TBIStart) + SETL [A0.2++],D0FrT,D1RtP /* Save return to ___TBIStart */ + ADD D0FrT,D0FrT,#(8*3) /* Advance puesdo-frame pointer */ + MOV D0Re0,A0.2 /* Return pCtx for new thread */ + MOV D1Re0,#0 /* pCtx:0 is default Arg1:Arg2 */ +/* + * Generate initial TBICTX state + */ + MOVT D1Ar1,#HI($LSwitchExit) /* Async restore code */ + ADD D1Ar1,D1Ar1,#LO($LSwitchExit) + MOVT D0Ar2,#TBICTX_SOFT_BIT /* Only soft thread state */ + ADD D0Ar6,A0.2,#TBICTX_BYTES /* New A0StP */ + MOV D1Ar5,A1GbP /* Same A1GbP */ + MOV D0Ar4,D0FrT /* Initial A0FrP */ + MOV D1Ar3,A1LbP /* Same A1LbP */ + SETL [A0.2++],D0Ar2,D1Ar1 /* Set header fields */ + MSETL [A0.2],D0Re0,D0Ar6,D0Ar4,D0Ar2,D0FrT,D0.5,D0.6,D0.7 + MOV D0Ar2,#0 /* Zero values */ + MOV D1Ar1,#0 + SETL [A0.2++],D0Ar2,D1Ar1 /* Zero CurrRPT, CurrBPOBITS, */ + SETL [A0.2++],D0Ar2,D1Ar1 /* CurrMODE, and pCurrCBuf */ + MSETL [A0.2],D0Ar6,D0Ar4,D0Ar2,D0FrT D0_5 /* Set DX and then AX regs */ + B $LSwitchExit /* All done! */ + .size ___TBISwitchInit,.-___TBISwitchInit + + .text + .balign 4 + .global ___TBIStart + .type ___TBIStart,function +___TBIStart: + MOV D1Ar1,D1Re0 /* Pass TBIRES args to call */ + MOV D0Ar2,D0Re0 + MGETL D0Re0,D0Ar6,D0Ar4,[A0FrP] /* Get hidden args */ + SUB A0StP,A0FrP,#(8*3) /* Entry stack pointer */ + MOV A0FrP,D0Re0 /* Entry frame pointer */ + MOVT A1LbP,#HI(__exit) + ADD A1LbP,A1LbP,#LO(__exit) + MOV D1RtP,A1LbP /* D1RtP = __exit */ + MOV PC,D1Re0 /* Jump into fnMain */ + .size ___TBIStart,.-___TBIStart + +/* + * End of tbisoft.S + */ diff --git a/arch/metag/tbx/tbistring.c b/arch/metag/tbx/tbistring.c new file mode 100644 index 000000000000..f90cd0822065 --- /dev/null +++ b/arch/metag/tbx/tbistring.c @@ -0,0 +1,114 @@ +/* + * tbistring.c + * + * Copyright (C) 2001, 2002, 2003, 2005, 2007, 2012 Imagination Technologies. + * + * This program is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License version 2 as published by the + * Free Software Foundation. + * + * String table functions provided as part of the thread binary interface for + * Meta processors + */ + +#include <linux/export.h> +#include <linux/string.h> +#include <asm/tbx.h> + +/* + * There are not any functions to modify the string table currently, if these + * are required at some later point I suggest having a seperate module and + * ensuring that creating new entries does not interfere with reading old + * entries in any way. + */ + +const TBISTR *__TBIFindStr(const TBISTR *start, + const char *str, int match_len) +{ + const TBISTR *search = start; + bool exact = true; + const TBISEG *seg; + + if (match_len < 0) { + /* Make match_len always positive for the inner loop */ + match_len = -match_len; + exact = false; + } else { + /* + * Also support historic behaviour, which expected match_len to + * include null terminator + */ + if (match_len && str[match_len-1] == '\0') + match_len--; + } + + if (!search) { + /* Find global string table segment */ + seg = __TBIFindSeg(NULL, TBID_SEG(TBID_THREAD_GLOBAL, + TBID_SEGSCOPE_GLOBAL, + TBID_SEGTYPE_STRING)); + + if (!seg || seg->Bytes < sizeof(TBISTR)) + /* No string table! */ + return NULL; + + /* Start of string table */ + search = seg->pGAddr; + } + + for (;;) { + while (!search->Tag) + /* Allow simple gaps which are just zero initialised */ + search = (const TBISTR *)((const char *)search + 8); + + if (search->Tag == METAG_TBI_STRE) { + /* Reached the end of the table */ + search = NULL; + break; + } + + if ((search->Len >= match_len) && + (!exact || (search->Len == match_len + 1)) && + (search->Tag != METAG_TBI_STRG)) { + /* Worth searching */ + if (!strncmp(str, (const char *)search->String, + match_len)) + break; + } + + /* Next entry */ + search = (const TBISTR *)((const char *)search + search->Bytes); + } + + return search; +} + +const void *__TBITransStr(const char *str, int len) +{ + const TBISTR *search = NULL; + const void *res = NULL; + + for (;;) { + /* Search onwards */ + search = __TBIFindStr(search, str, len); + + /* No translation returns NULL */ + if (!search) + break; + + /* Skip matching entries with no translation data */ + if (search->TransLen != METAG_TBI_STRX) { + /* Calculate base of translation string */ + res = (const char *)search->String + + ((search->Len + 7) & ~7); + break; + } + + /* Next entry */ + search = (const TBISTR *)((const char *)search + search->Bytes); + } + + /* Return base address of translation data or NULL */ + return res; +} +EXPORT_SYMBOL(__TBITransStr); diff --git a/arch/metag/tbx/tbitimer.S b/arch/metag/tbx/tbitimer.S new file mode 100644 index 000000000000..5dbeddeee7ba --- /dev/null +++ b/arch/metag/tbx/tbitimer.S @@ -0,0 +1,207 @@ +/* + * tbitimer.S + * + * Copyright (C) 2001, 2002, 2007, 2012 Imagination Technologies. + * + * This program is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License version 2 as published by the + * Free Software Foundation. + * + * TBI timer support routines and data values + */ + + .file "tbitimer.S" +/* + * Get data structures and defines from the main C header + */ +#include <asm/tbx.h> + + .data + .balign 8 + .global ___TBITimeB + .type ___TBITimeB,object +___TBITimeB: + .quad 0 /* Background 'lost' ticks */ + .size ___TBITimeB,.-___TBITimeB + + .data + .balign 8 + .global ___TBITimeI + .type ___TBITimeI,object +___TBITimeI: + .quad 0 /* Interrupt 'lost' ticks */ + .size ___TBITimeI,.-___TBITimeI + + .data + .balign 8 + .global ___TBITimes + .type ___TBITimes,object +___TBITimes: + .long ___TBITimeB /* Table of 'lost' tick values */ + .long ___TBITimeI + .size ___TBITimes,.-___TBITimes + +/* + * Flag bits for control of ___TBITimeCore + */ +#define TIMER_SET_BIT 1 +#define TIMER_ADD_BIT 2 + +/* + * Initialise or stop timer support + * + * Register Usage: D1Ar1 holds Id, D1Ar2 is initial delay or 0 + * D0FrT is used to call ___TBITimeCore + * D0Re0 is used for the result which is TXSTAT_TIMER_BIT + * D0Ar4, D1Ar5, D0Ar6 are all used as scratch + * Other registers are those set by ___TBITimeCore + * A0.3 is assumed to point at ___TBITime(I/B) + */ + .text + .balign 4 + .global ___TBITimerCtrl + .type ___TBITimerCtrl,function +___TBITimerCtrl: + MOV D1Ar5,#TIMER_SET_BIT /* Timer SET request */ + MOVT D0FrT,#HI(___TBITimeCore) /* Get timer core reg values */ + CALL D0FrT,#LO(___TBITimeCore) /* and perform register update */ + NEGS D0Ar6,D0Ar2 /* Set flags from time-stamp */ + ASR D1Ar5,D0Ar6,#31 /* Sign extend D0Ar6 into D1Ar5 */ + SETLNZ [A0.3],D0Ar6,D1Ar5 /* ___TBITime(B/I)=-Start if enable */ + MOV PC,D1RtP /* Return */ + .size ___TBITimerCtrl,.-___TBITimerCtrl + +/* + * Return ___TBITimeStamp value + * + * Register Usage: D1Ar1 holds Id + * D0FrT is used to call ___TBITimeCore + * D0Re0, D1Re0 is used for the result + * D1Ar3, D0Ar4, D1Ar5 + * Other registers are those set by ___TBITimeCore + * D0Ar6 is assumed to be the timer value read + * A0.3 is assumed to point at ___TBITime(I/B) + */ + .text + .balign 4 + .global ___TBITimeStamp + .type ___TBITimeStamp,function +___TBITimeStamp: + MOV D1Ar5,#0 /* Timer GET request */ + MOVT D0FrT,#HI(___TBITimeCore) /* Get timer core reg values */ + CALL D0FrT,#LO(___TBITimeCore) /* with no register update */ + ADDS D0Re0,D0Ar4,D0Ar6 /* Add current time value */ + ADD D1Re0,D1Ar3,D1Ar5 /* to 64-bit signed extend time */ + ADDCS D1Re0,D1Re0,#1 /* Support borrow too */ + MOV PC,D1RtP /* Return */ + .size ___TBITimeStamp,.-___TBITimeStamp + +/* + * Perform ___TBITimerAdd logic + * + * Register Usage: D1Ar1 holds Id, D0Ar2 holds value to be added to the timer + * D0Re0 is used for the result - new TIMER value + * D1Ar5, D0Ar6 are used as scratch + * Other registers are those set by ___TBITimeCore + * D0Ar6 is assumed to be the timer value read + * D0Ar4, D1Ar3 is the current value of ___TBITime(B/I) + */ + .text + .balign 4 + .global ___TBITimerAdd + .type ___TBITimerAdd,function +___TBITimerAdd: + MOV D1Ar5,#TIMER_ADD_BIT /* Timer ADD request */ + MOVT D0FrT,#HI(___TBITimeCore) /* Get timer core reg values */ + CALL D0FrT,#LO(___TBITimeCore) /* with no register update */ + ADD D0Re0,D0Ar2,D0Ar6 /* Regenerate new value = result */ + NEG D0Ar2,D0Ar2 /* Negate delta */ + ASR D1Re0,D0Ar2,#31 /* Sign extend negated delta */ + ADDS D0Ar4,D0Ar4,D0Ar2 /* Add time added to ... */ + ADD D1Ar3,D1Ar3,D1Re0 /* ... real timer ... */ + ADDCS D1Ar3,D1Ar3,#1 /* ... with carry */ + SETL [A0.3],D0Ar4,D1Ar3 /* Update ___TBITime(B/I) */ + MOV PC,D1RtP /* Return */ + .size ___TBITimerAdd,.-___TBITimerAdd + +#ifdef TBI_1_4 +/* + * Perform ___TBITimerDeadline logic + * NB: Delays are positive compared to the Wait values which are -ive + * + * Register Usage: D1Ar1 holds Id + * D0Ar2 holds Delay requested + * D0Re0 is used for the result - old TIMER Delay value + * D1Ar5, D0Ar6 are used as scratch + * Other registers are those set by ___TBITimeCore + * D0Ar6 is assumed to be the timer value read + * D0Ar4, D1Ar3 is the current value of ___TBITime(B/I) + * + */ + .text + .type ___TBITimerDeadline,function + .global ___TBITimerDeadline + .align 2 +___TBITimerDeadline: + MOV D1Ar5,#TIMER_SET_BIT /* Timer SET request */ + MOVT D0FrT,#HI(___TBITimeCore) /* Get timer core reg values */ + CALL D0FrT,#LO(___TBITimeCore) /* with no register update */ + MOV D0Re0,D0Ar6 /* Old value read = result */ + SUB D0Ar2,D0Ar6,D0Ar2 /* Delta from (old - new) */ + ASR D1Re0,D0Ar2,#31 /* Sign extend delta */ + ADDS D0Ar4,D0Ar4,D0Ar2 /* Add time added to ... */ + ADD D1Ar3,D1Ar3,D1Re0 /* ... real timer ... */ + ADDCS D1Ar3,D1Ar3,#1 /* ... with carry */ + SETL [A0.3],D0Ar4,D1Ar3 /* Update ___TBITime(B/I) */ + MOV PC,D1RtP /* Return */ + .size ___TBITimerDeadline,.-___TBITimerDeadline +#endif /* TBI_1_4 */ + +/* + * Perform core timer access logic + * + * Register Usage: D1Ar1 holds Id, D0Ar2 holds input value for SET and + * input value for ADD + * D1Ar5 controls op as SET or ADD as bit values + * On return D0Ar6, D1Ar5 holds the old 64-bit timer value + * A0.3 is setup to point at ___TBITime(I/B) + * A1.3 is setup to point at ___TBITimes + * D0Ar4, D1Ar3 is setup to value of ___TBITime(I/B) + */ + .text + .balign 4 + .global ___TBITimeCore + .type ___TBITimeCore,function +___TBITimeCore: +#ifndef METAC_0_1 + TSTT D1Ar1,#HI(TBID_ISTAT_BIT) /* Interrupt level timer? */ +#endif + MOVT A1LbP,#HI(___TBITimes) + ADD A1LbP,A1LbP,#LO(___TBITimes) + MOV A1.3,A1LbP /* Get ___TBITimes address */ +#ifndef METAC_0_1 + BNZ $LTimeCoreI /* Yes: Service TXTIMERI! */ +#endif + LSRS D1Ar5,D1Ar5,#1 /* Carry = SET, Zero = !ADD */ + GETD A0.3,[A1.3+#0] /* A0.3 == &___TBITimeB */ + MOV D0Ar6,TXTIMER /* Always GET old value */ + MOVCS TXTIMER,D0Ar2 /* Conditional SET operation */ + ADDNZ TXTIMER,D0Ar2,D0Ar6 /* Conditional ADD operation */ +#ifndef METAC_0_1 + B $LTimeCoreEnd +$LTimeCoreI: + LSRS D1Ar5,D1Ar5,#1 /* Carry = SET, Zero = !ADD */ + GETD A0.3,[A1.3+#4] /* A0.3 == &___TBITimeI */ + MOV D0Ar6,TXTIMERI /* Always GET old value */ + MOVCS TXTIMERI,D0Ar2 /* Conditional SET operation */ + ADDNZ TXTIMERI,D0Ar2,D0Ar6 /* Conditional ADD operation */ +$LTimeCoreEnd: +#endif + ASR D1Ar5,D0Ar6,#31 /* Sign extend D0Ar6 into D1Ar5 */ + GETL D0Ar4,D1Ar3,[A0.3] /* Read ___TBITime(B/I) */ + MOV PC,D0FrT /* Return quickly */ + .size ___TBITimeCore,.-___TBITimeCore + +/* + * End of tbitimer.S + */ diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig index e920cbe519fa..e507ab7df60b 100644 --- a/drivers/clocksource/Kconfig +++ b/drivers/clocksource/Kconfig @@ -62,3 +62,8 @@ config CLKSRC_DBX500_PRCMU_SCHED_CLOCK config ARM_ARCH_TIMER bool + +config CLKSRC_METAG_GENERIC + def_bool y if METAG + help + This option enables support for the Meta per-thread timers. diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile index 7d671b85a98e..4d8283aec5b5 100644 --- a/drivers/clocksource/Makefile +++ b/drivers/clocksource/Makefile @@ -21,3 +21,4 @@ obj-$(CONFIG_ARCH_TEGRA) += tegra20_timer.o obj-$(CONFIG_VT8500_TIMER) += vt8500_timer.o obj-$(CONFIG_ARM_ARCH_TIMER) += arm_arch_timer.o +obj-$(CONFIG_CLKSRC_METAG_GENERIC) += metag_generic.o diff --git a/drivers/clocksource/metag_generic.c b/drivers/clocksource/metag_generic.c new file mode 100644 index 000000000000..ade7513a11d1 --- /dev/null +++ b/drivers/clocksource/metag_generic.c @@ -0,0 +1,198 @@ +/* + * Copyright (C) 2005-2013 Imagination Technologies Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * + * Support for Meta per-thread timers. + * + * Meta hardware threads have 2 timers. The background timer (TXTIMER) is used + * as a free-running time base (hz clocksource), and the interrupt timer + * (TXTIMERI) is used for the timer interrupt (clock event). Both counters + * traditionally count at approximately 1MHz. + */ + +#include <clocksource/metag_generic.h> +#include <linux/cpu.h> +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/param.h> +#include <linux/time.h> +#include <linux/init.h> +#include <linux/proc_fs.h> +#include <linux/clocksource.h> +#include <linux/clockchips.h> +#include <linux/interrupt.h> + +#include <asm/clock.h> +#include <asm/hwthread.h> +#include <asm/core_reg.h> +#include <asm/metag_mem.h> +#include <asm/tbx.h> + +#define HARDWARE_FREQ 1000000 /* 1MHz */ +#define HARDWARE_DIV 1 /* divide by 1 = 1MHz clock */ +#define HARDWARE_TO_NS_SHIFT 10 /* convert ticks to ns */ + +static unsigned int hwtimer_freq = HARDWARE_FREQ; +static DEFINE_PER_CPU(struct clock_event_device, local_clockevent); +static DEFINE_PER_CPU(char [11], local_clockevent_name); + +static int metag_timer_set_next_event(unsigned long delta, + struct clock_event_device *dev) +{ + __core_reg_set(TXTIMERI, -delta); + return 0; +} + +static void metag_timer_set_mode(enum clock_event_mode mode, + struct clock_event_device *evt) +{ + switch (mode) { + case CLOCK_EVT_MODE_ONESHOT: + case CLOCK_EVT_MODE_RESUME: + break; + + case CLOCK_EVT_MODE_SHUTDOWN: + /* We should disable the IRQ here */ + break; + + case CLOCK_EVT_MODE_PERIODIC: + case CLOCK_EVT_MODE_UNUSED: + WARN_ON(1); + break; + }; +} + +static cycle_t metag_clocksource_read(struct clocksource *cs) +{ + return __core_reg_get(TXTIMER); +} + +static struct clocksource clocksource_metag = { + .name = "META", + .rating = 200, + .mask = CLOCKSOURCE_MASK(32), + .read = metag_clocksource_read, + .flags = CLOCK_SOURCE_IS_CONTINUOUS, +}; + +static irqreturn_t metag_timer_interrupt(int irq, void *dummy) +{ + struct clock_event_device *evt = &__get_cpu_var(local_clockevent); + + evt->event_handler(evt); + + return IRQ_HANDLED; +} + +static struct irqaction metag_timer_irq = { + .name = "META core timer", + .handler = metag_timer_interrupt, + .flags = IRQF_TIMER | IRQF_IRQPOLL | IRQF_PERCPU, +}; + +unsigned long long sched_clock(void) +{ + unsigned long long ticks = __core_reg_get(TXTIMER); + return ticks << HARDWARE_TO_NS_SHIFT; +} + +static void __cpuinit arch_timer_setup(unsigned int cpu) +{ + unsigned int txdivtime; + struct clock_event_device *clk = &per_cpu(local_clockevent, cpu); + char *name = per_cpu(local_clockevent_name, cpu); + + txdivtime = __core_reg_get(TXDIVTIME); + + txdivtime &= ~TXDIVTIME_DIV_BITS; + txdivtime |= (HARDWARE_DIV & TXDIVTIME_DIV_BITS); + + __core_reg_set(TXDIVTIME, txdivtime); + + sprintf(name, "META %d", cpu); + clk->name = name; + clk->features = CLOCK_EVT_FEAT_ONESHOT, + + clk->rating = 200, + clk->shift = 12, + clk->irq = tbisig_map(TBID_SIGNUM_TRT), + clk->set_mode = metag_timer_set_mode, + clk->set_next_event = metag_timer_set_next_event, + + clk->mult = div_sc(hwtimer_freq, NSEC_PER_SEC, clk->shift); + clk->max_delta_ns = clockevent_delta2ns(0x7fffffff, clk); + clk->min_delta_ns = clockevent_delta2ns(0xf, clk); + clk->cpumask = cpumask_of(cpu); + + clockevents_register_device(clk); + + /* + * For all non-boot CPUs we need to synchronize our free + * running clock (TXTIMER) with the boot CPU's clock. + * + * While this won't be accurate, it should be close enough. + */ + if (cpu) { + unsigned int thread0 = cpu_2_hwthread_id[0]; + unsigned long val; + + val = core_reg_read(TXUCT_ID, TXTIMER_REGNUM, thread0); + __core_reg_set(TXTIMER, val); + } +} + +static int __cpuinit arch_timer_cpu_notify(struct notifier_block *self, + unsigned long action, void *hcpu) +{ + int cpu = (long)hcpu; + + switch (action) { + case CPU_STARTING: + case CPU_STARTING_FROZEN: + arch_timer_setup(cpu); + break; + } + + return NOTIFY_OK; +} + +static struct notifier_block __cpuinitdata arch_timer_cpu_nb = { + .notifier_call = arch_timer_cpu_notify, +}; + +int __init metag_generic_timer_init(void) +{ + /* + * On Meta 2 SoCs, the actual frequency of the timer is based on the + * Meta core clock speed divided by an integer, so it is only + * approximately 1MHz. Calculating the real frequency here drastically + * reduces clock skew on these SoCs. + */ +#ifdef CONFIG_METAG_META21 + hwtimer_freq = get_coreclock() / (metag_in32(EXPAND_TIMER_DIV) + 1); +#endif + clocksource_register_hz(&clocksource_metag, hwtimer_freq); + + setup_irq(tbisig_map(TBID_SIGNUM_TRT), &metag_timer_irq); + + /* Configure timer on boot CPU */ + arch_timer_setup(smp_processor_id()); + + /* Hook cpu boot to configure other CPU's timers */ + register_cpu_notifier(&arch_timer_cpu_nb); + + return 0; +} diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile index e65fbf2cdf71..98e3b87bdf1b 100644 --- a/drivers/irqchip/Makefile +++ b/drivers/irqchip/Makefile @@ -2,6 +2,8 @@ obj-$(CONFIG_IRQCHIP) += irqchip.o obj-$(CONFIG_ARCH_BCM2835) += irq-bcm2835.o obj-$(CONFIG_ARCH_EXYNOS) += exynos-combiner.o +obj-$(CONFIG_METAG) += irq-metag-ext.o +obj-$(CONFIG_METAG_PERFCOUNTER_IRQS) += irq-metag.o obj-$(CONFIG_ARCH_SUNXI) += irq-sunxi.o obj-$(CONFIG_ARCH_SPEAR3XX) += spear-shirq.o obj-$(CONFIG_ARM_GIC) += irq-gic.o diff --git a/drivers/irqchip/irq-metag-ext.c b/drivers/irqchip/irq-metag-ext.c new file mode 100644 index 000000000000..92c41ab4dbfd --- /dev/null +++ b/drivers/irqchip/irq-metag-ext.c @@ -0,0 +1,868 @@ +/* + * Meta External interrupt code. + * + * Copyright (C) 2005-2012 Imagination Technologies Ltd. + * + * External interrupts on Meta are configured at two-levels, in the CPU core and + * in the external trigger block. Interrupts from SoC peripherals are + * multiplexed onto a single Meta CPU "trigger" - traditionally it has always + * been trigger 2 (TR2). For info on how de-multiplexing happens check out + * meta_intc_irq_demux(). + */ + +#include <linux/interrupt.h> +#include <linux/irqchip/metag-ext.h> +#include <linux/irqdomain.h> +#include <linux/io.h> +#include <linux/of.h> +#include <linux/slab.h> +#include <linux/syscore_ops.h> + +#include <asm/irq.h> +#include <asm/hwthread.h> + +#define HWSTAT_STRIDE 8 +#define HWVEC_BLK_STRIDE 0x1000 + +/** + * struct meta_intc_priv - private meta external interrupt data + * @nr_banks: Number of interrupt banks + * @domain: IRQ domain for all banks of external IRQs + * @unmasked: Record of unmasked IRQs + * @levels_altered: Record of altered level bits + */ +struct meta_intc_priv { + unsigned int nr_banks; + struct irq_domain *domain; + + unsigned long unmasked[4]; + +#ifdef CONFIG_METAG_SUSPEND_MEM + unsigned long levels_altered[4]; +#endif +}; + +/* Private data for the one and only external interrupt controller */ +static struct meta_intc_priv meta_intc_priv; + +/** + * meta_intc_offset() - Get the offset into the bank of a hardware IRQ number + * @hw: Hardware IRQ number (within external trigger block) + * + * Returns: Bit offset into the IRQ's bank registers + */ +static unsigned int meta_intc_offset(irq_hw_number_t hw) +{ + return hw & 0x1f; +} + +/** + * meta_intc_bank() - Get the bank number of a hardware IRQ number + * @hw: Hardware IRQ number (within external trigger block) + * + * Returns: Bank number indicating which register the IRQ's bits are + */ +static unsigned int meta_intc_bank(irq_hw_number_t hw) +{ + return hw >> 5; +} + +/** + * meta_intc_stat_addr() - Get the address of a HWSTATEXT register + * @hw: Hardware IRQ number (within external trigger block) + * + * Returns: Address of a HWSTATEXT register containing the status bit for + * the specified hardware IRQ number + */ +static void __iomem *meta_intc_stat_addr(irq_hw_number_t hw) +{ + return (void __iomem *)(HWSTATEXT + + HWSTAT_STRIDE * meta_intc_bank(hw)); +} + +/** + * meta_intc_level_addr() - Get the address of a HWLEVELEXT register + * @hw: Hardware IRQ number (within external trigger block) + * + * Returns: Address of a HWLEVELEXT register containing the sense bit for + * the specified hardware IRQ number + */ +static void __iomem *meta_intc_level_addr(irq_hw_number_t hw) +{ + return (void __iomem *)(HWLEVELEXT + + HWSTAT_STRIDE * meta_intc_bank(hw)); +} + +/** + * meta_intc_mask_addr() - Get the address of a HWMASKEXT register + * @hw: Hardware IRQ number (within external trigger block) + * + * Returns: Address of a HWMASKEXT register containing the mask bit for the + * specified hardware IRQ number + */ +static void __iomem *meta_intc_mask_addr(irq_hw_number_t hw) +{ + return (void __iomem *)(HWMASKEXT + + HWSTAT_STRIDE * meta_intc_bank(hw)); +} + +/** + * meta_intc_vec_addr() - Get the vector address of a hardware interrupt + * @hw: Hardware IRQ number (within external trigger block) + * + * Returns: Address of a HWVECEXT register controlling the core trigger to + * vector the IRQ onto + */ +static inline void __iomem *meta_intc_vec_addr(irq_hw_number_t hw) +{ + return (void __iomem *)(HWVEC0EXT + + HWVEC_BLK_STRIDE * meta_intc_bank(hw) + + HWVECnEXT_STRIDE * meta_intc_offset(hw)); +} + +/** + * meta_intc_startup_irq() - set up an external irq + * @data: data for the external irq to start up + * + * Multiplex interrupts for irq onto TR2. Clear any pending interrupts and + * unmask irq, both using the appropriate callbacks. + */ +static unsigned int meta_intc_startup_irq(struct irq_data *data) +{ + irq_hw_number_t hw = data->hwirq; + void __iomem *vec_addr = meta_intc_vec_addr(hw); + int thread = hard_processor_id(); + + /* Perform any necessary acking. */ + if (data->chip->irq_ack) + data->chip->irq_ack(data); + + /* Wire up this interrupt to the core with HWVECxEXT. */ + metag_out32(TBI_TRIG_VEC(TBID_SIGNUM_TR2(thread)), vec_addr); + + /* Perform any necessary unmasking. */ + data->chip->irq_unmask(data); + + return 0; +} + +/** + * meta_intc_shutdown_irq() - turn off an external irq + * @data: data for the external irq to turn off + * + * Mask irq using the appropriate callback and stop muxing it onto TR2. + */ +static void meta_intc_shutdown_irq(struct irq_data *data) +{ + irq_hw_number_t hw = data->hwirq; + void __iomem *vec_addr = meta_intc_vec_addr(hw); + + /* Mask the IRQ */ + data->chip->irq_mask(data); + + /* + * Disable the IRQ at the core by removing the interrupt from + * the HW vector mapping. + */ + metag_out32(0, vec_addr); +} + +/** + * meta_intc_ack_irq() - acknowledge an external irq + * @data: data for the external irq to ack + * + * Clear down an edge interrupt in the status register. + */ +static void meta_intc_ack_irq(struct irq_data *data) +{ + irq_hw_number_t hw = data->hwirq; + unsigned int bit = 1 << meta_intc_offset(hw); + void __iomem *stat_addr = meta_intc_stat_addr(hw); + + /* Ack the int, if it is still 'on'. + * NOTE - this only works for edge triggered interrupts. + */ + if (metag_in32(stat_addr) & bit) + metag_out32(bit, stat_addr); +} + +/** + * record_irq_is_masked() - record the IRQ masked so it doesn't get handled + * @data: data for the external irq to record + * + * This should get called whenever an external IRQ is masked (by whichever + * callback is used). It records the IRQ masked so that it doesn't get handled + * if it still shows up in the status register. + */ +static void record_irq_is_masked(struct irq_data *data) +{ + struct meta_intc_priv *priv = &meta_intc_priv; + irq_hw_number_t hw = data->hwirq; + + clear_bit(meta_intc_offset(hw), &priv->unmasked[meta_intc_bank(hw)]); +} + +/** + * record_irq_is_unmasked() - record the IRQ unmasked so it can be handled + * @data: data for the external irq to record + * + * This should get called whenever an external IRQ is unmasked (by whichever + * callback is used). It records the IRQ unmasked so that it gets handled if it + * shows up in the status register. + */ +static void record_irq_is_unmasked(struct irq_data *data) +{ + struct meta_intc_priv *priv = &meta_intc_priv; + irq_hw_number_t hw = data->hwirq; + + set_bit(meta_intc_offset(hw), &priv->unmasked[meta_intc_bank(hw)]); +} + +/* + * For use by wrapper IRQ drivers + */ + +/** + * meta_intc_mask_irq_simple() - minimal mask used by wrapper IRQ drivers + * @data: data for the external irq being masked + * + * This should be called by any wrapper IRQ driver mask functions. it doesn't do + * any masking but records the IRQ as masked so that the core code knows the + * mask has taken place. It is the callers responsibility to ensure that the IRQ + * won't trigger an interrupt to the core. + */ +void meta_intc_mask_irq_simple(struct irq_data *data) +{ + record_irq_is_masked(data); +} + +/** + * meta_intc_unmask_irq_simple() - minimal unmask used by wrapper IRQ drivers + * @data: data for the external irq being unmasked + * + * This should be called by any wrapper IRQ driver unmask functions. it doesn't + * do any unmasking but records the IRQ as unmasked so that the core code knows + * the unmask has taken place. It is the callers responsibility to ensure that + * the IRQ can now trigger an interrupt to the core. + */ +void meta_intc_unmask_irq_simple(struct irq_data *data) +{ + record_irq_is_unmasked(data); +} + + +/** + * meta_intc_mask_irq() - mask an external irq using HWMASKEXT + * @data: data for the external irq to mask + * + * This is a default implementation of a mask function which makes use of the + * HWMASKEXT registers available in newer versions. + * + * Earlier versions without these registers should use SoC level IRQ masking + * which call the meta_intc_*_simple() functions above, or if that isn't + * available should use the fallback meta_intc_*_nomask() functions below. + */ +static void meta_intc_mask_irq(struct irq_data *data) +{ + irq_hw_number_t hw = data->hwirq; + unsigned int bit = 1 << meta_intc_offset(hw); + void __iomem *mask_addr = meta_intc_mask_addr(hw); + unsigned long flags; + + record_irq_is_masked(data); + + /* update the interrupt mask */ + __global_lock2(flags); + metag_out32(metag_in32(mask_addr) & ~bit, mask_addr); + __global_unlock2(flags); +} + +/** + * meta_intc_unmask_irq() - unmask an external irq using HWMASKEXT + * @data: data for the external irq to unmask + * + * This is a default implementation of an unmask function which makes use of the + * HWMASKEXT registers available on new versions. It should be paired with + * meta_intc_mask_irq() above. + */ +static void meta_intc_unmask_irq(struct irq_data *data) +{ + irq_hw_number_t hw = data->hwirq; + unsigned int bit = 1 << meta_intc_offset(hw); + void __iomem *mask_addr = meta_intc_mask_addr(hw); + unsigned long flags; + + record_irq_is_unmasked(data); + + /* update the interrupt mask */ + __global_lock2(flags); + metag_out32(metag_in32(mask_addr) | bit, mask_addr); + __global_unlock2(flags); +} + +/** + * meta_intc_mask_irq_nomask() - mask an external irq by unvectoring + * @data: data for the external irq to mask + * + * This is the version of the mask function for older versions which don't have + * HWMASKEXT registers, or a SoC level means of masking IRQs. Instead the IRQ is + * unvectored from the core and retriggered if necessary later. + */ +static void meta_intc_mask_irq_nomask(struct irq_data *data) +{ + irq_hw_number_t hw = data->hwirq; + void __iomem *vec_addr = meta_intc_vec_addr(hw); + + record_irq_is_masked(data); + + /* there is no interrupt mask, so unvector the interrupt */ + metag_out32(0, vec_addr); +} + +/** + * meta_intc_unmask_edge_irq_nomask() - unmask an edge irq by revectoring + * @data: data for the external irq to unmask + * + * This is the version of the unmask function for older versions which don't + * have HWMASKEXT registers, or a SoC level means of masking IRQs. Instead the + * IRQ is revectored back to the core and retriggered if necessary. + * + * The retriggering done by this function is specific to edge interrupts. + */ +static void meta_intc_unmask_edge_irq_nomask(struct irq_data *data) +{ + irq_hw_number_t hw = data->hwirq; + unsigned int bit = 1 << meta_intc_offset(hw); + void __iomem *stat_addr = meta_intc_stat_addr(hw); + void __iomem *vec_addr = meta_intc_vec_addr(hw); + unsigned int thread = hard_processor_id(); + + record_irq_is_unmasked(data); + + /* there is no interrupt mask, so revector the interrupt */ + metag_out32(TBI_TRIG_VEC(TBID_SIGNUM_TR2(thread)), vec_addr); + + /* + * Re-trigger interrupt + * + * Writing a 1 toggles, and a 0->1 transition triggers. We only + * retrigger if the status bit is already set, which means we + * need to clear it first. Retriggering is fundamentally racy + * because if the interrupt fires again after we clear it we + * could end up clearing it again and the interrupt handler + * thinking it hasn't fired. Therefore we need to keep trying to + * retrigger until the bit is set. + */ + if (metag_in32(stat_addr) & bit) { + metag_out32(bit, stat_addr); + while (!(metag_in32(stat_addr) & bit)) + metag_out32(bit, stat_addr); + } +} + +/** + * meta_intc_unmask_level_irq_nomask() - unmask a level irq by revectoring + * @data: data for the external irq to unmask + * + * This is the version of the unmask function for older versions which don't + * have HWMASKEXT registers, or a SoC level means of masking IRQs. Instead the + * IRQ is revectored back to the core and retriggered if necessary. + * + * The retriggering done by this function is specific to level interrupts. + */ +static void meta_intc_unmask_level_irq_nomask(struct irq_data *data) +{ + irq_hw_number_t hw = data->hwirq; + unsigned int bit = 1 << meta_intc_offset(hw); + void __iomem *stat_addr = meta_intc_stat_addr(hw); + void __iomem *vec_addr = meta_intc_vec_addr(hw); + unsigned int thread = hard_processor_id(); + + record_irq_is_unmasked(data); + + /* there is no interrupt mask, so revector the interrupt */ + metag_out32(TBI_TRIG_VEC(TBID_SIGNUM_TR2(thread)), vec_addr); + + /* Re-trigger interrupt */ + /* Writing a 1 triggers interrupt */ + if (metag_in32(stat_addr) & bit) + metag_out32(bit, stat_addr); +} + +/** + * meta_intc_irq_set_type() - set the type of an external irq + * @data: data for the external irq to set the type of + * @flow_type: new irq flow type + * + * Set the flow type of an external interrupt. This updates the irq chip and irq + * handler depending on whether the irq is edge or level sensitive (the polarity + * is ignored), and also sets up the bit in HWLEVELEXT so the hardware knows + * when to trigger. + */ +static int meta_intc_irq_set_type(struct irq_data *data, unsigned int flow_type) +{ +#ifdef CONFIG_METAG_SUSPEND_MEM + struct meta_intc_priv *priv = &meta_intc_priv; +#endif + unsigned int irq = data->irq; + irq_hw_number_t hw = data->hwirq; + unsigned int bit = 1 << meta_intc_offset(hw); + void __iomem *level_addr = meta_intc_level_addr(hw); + unsigned long flags; + unsigned int level; + + /* update the chip/handler */ + if (flow_type & IRQ_TYPE_LEVEL_MASK) + __irq_set_chip_handler_name_locked(irq, &meta_intc_level_chip, + handle_level_irq, NULL); + else + __irq_set_chip_handler_name_locked(irq, &meta_intc_edge_chip, + handle_edge_irq, NULL); + + /* and clear/set the bit in HWLEVELEXT */ + __global_lock2(flags); + level = metag_in32(level_addr); + if (flow_type & IRQ_TYPE_LEVEL_MASK) + level |= bit; + else + level &= ~bit; + metag_out32(level, level_addr); +#ifdef CONFIG_METAG_SUSPEND_MEM + priv->levels_altered[meta_intc_bank(hw)] |= bit; +#endif + __global_unlock2(flags); + + return 0; +} + +/** + * meta_intc_irq_demux() - external irq de-multiplexer + * @irq: the virtual interrupt number + * @desc: the interrupt description structure for this irq + * + * The cpu receives an interrupt on TR2 when a SoC interrupt has occurred. It is + * this function's job to demux this irq and figure out exactly which external + * irq needs servicing. + * + * Whilst using TR2 to detect external interrupts is a software convention it is + * (hopefully) unlikely to change. + */ +static void meta_intc_irq_demux(unsigned int irq, struct irq_desc *desc) +{ + struct meta_intc_priv *priv = &meta_intc_priv; + irq_hw_number_t hw; + unsigned int bank, irq_no, status; + void __iomem *stat_addr = meta_intc_stat_addr(0); + + /* + * Locate which interrupt has caused our handler to run. + */ + for (bank = 0; bank < priv->nr_banks; ++bank) { + /* Which interrupts are currently pending in this bank? */ +recalculate: + status = metag_in32(stat_addr) & priv->unmasked[bank]; + + for (hw = bank*32; status; status >>= 1, ++hw) { + if (status & 0x1) { + /* + * Map the hardware IRQ number to a virtual + * Linux IRQ number. + */ + irq_no = irq_linear_revmap(priv->domain, hw); + + /* + * Only fire off external interrupts that are + * registered to be handled by the kernel. + * Other external interrupts are probably being + * handled by other Meta hardware threads. + */ + generic_handle_irq(irq_no); + + /* + * The handler may have re-enabled interrupts + * which could have caused a nested invocation + * of this code and make the copy of the + * status register we are using invalid. + */ + goto recalculate; + } + } + stat_addr += HWSTAT_STRIDE; + } +} + +#ifdef CONFIG_SMP +/** + * meta_intc_set_affinity() - set the affinity for an interrupt + * @data: data for the external irq to set the affinity of + * @cpumask: cpu mask representing cpus which can handle the interrupt + * @force: whether to force (ignored) + * + * Revector the specified external irq onto a specific cpu's TR2 trigger, so + * that that cpu tends to be the one who handles it. + */ +static int meta_intc_set_affinity(struct irq_data *data, + const struct cpumask *cpumask, bool force) +{ + irq_hw_number_t hw = data->hwirq; + void __iomem *vec_addr = meta_intc_vec_addr(hw); + unsigned int cpu, thread; + + /* + * Wire up this interrupt from HWVECxEXT to the Meta core. + * + * Note that we can't wire up HWVECxEXT to interrupt more than + * one cpu (the interrupt code doesn't support it), so we just + * pick the first cpu we find in 'cpumask'. + */ + cpu = cpumask_any(cpumask); + thread = cpu_2_hwthread_id[cpu]; + + metag_out32(TBI_TRIG_VEC(TBID_SIGNUM_TR2(thread)), vec_addr); + + return 0; +} +#else +#define meta_intc_set_affinity NULL +#endif + +#ifdef CONFIG_PM_SLEEP +#define META_INTC_CHIP_FLAGS (IRQCHIP_MASK_ON_SUSPEND \ + | IRQCHIP_SKIP_SET_WAKE) +#else +#define META_INTC_CHIP_FLAGS 0 +#endif + +/* public edge/level irq chips which SoCs can override */ + +struct irq_chip meta_intc_edge_chip = { + .irq_startup = meta_intc_startup_irq, + .irq_shutdown = meta_intc_shutdown_irq, + .irq_ack = meta_intc_ack_irq, + .irq_mask = meta_intc_mask_irq, + .irq_unmask = meta_intc_unmask_irq, + .irq_set_type = meta_intc_irq_set_type, + .irq_set_affinity = meta_intc_set_affinity, + .flags = META_INTC_CHIP_FLAGS, +}; + +struct irq_chip meta_intc_level_chip = { + .irq_startup = meta_intc_startup_irq, + .irq_shutdown = meta_intc_shutdown_irq, + .irq_set_type = meta_intc_irq_set_type, + .irq_mask = meta_intc_mask_irq, + .irq_unmask = meta_intc_unmask_irq, + .irq_set_affinity = meta_intc_set_affinity, + .flags = META_INTC_CHIP_FLAGS, +}; + +/** + * meta_intc_map() - map an external irq + * @d: irq domain of external trigger block + * @irq: virtual irq number + * @hw: hardware irq number within external trigger block + * + * This sets up a virtual irq for a specified hardware interrupt. The irq chip + * and handler is configured, using the HWLEVELEXT registers to determine + * edge/level flow type. These registers will have been set when the irq type is + * set (or set to a default at init time). + */ +static int meta_intc_map(struct irq_domain *d, unsigned int irq, + irq_hw_number_t hw) +{ + unsigned int bit = 1 << meta_intc_offset(hw); + void __iomem *level_addr = meta_intc_level_addr(hw); + + /* Go by the current sense in the HWLEVELEXT register */ + if (metag_in32(level_addr) & bit) + irq_set_chip_and_handler(irq, &meta_intc_level_chip, + handle_level_irq); + else + irq_set_chip_and_handler(irq, &meta_intc_edge_chip, + handle_edge_irq); + return 0; +} + +static const struct irq_domain_ops meta_intc_domain_ops = { + .map = meta_intc_map, + .xlate = irq_domain_xlate_twocell, +}; + +#ifdef CONFIG_METAG_SUSPEND_MEM + +/** + * struct meta_intc_context - suspend context + * @levels: State of HWLEVELEXT registers + * @masks: State of HWMASKEXT registers + * @vectors: State of HWVECEXT registers + * @txvecint: State of TxVECINT registers + * + * This structure stores the IRQ state across suspend. + */ +struct meta_intc_context { + u32 levels[4]; + u32 masks[4]; + u8 vectors[4*32]; + + u8 txvecint[4][4]; +}; + +/* suspend context */ +static struct meta_intc_context *meta_intc_context; + +/** + * meta_intc_suspend() - store irq state + * + * To avoid interfering with other threads we only save the IRQ state of IRQs in + * use by Linux. + */ +static int meta_intc_suspend(void) +{ + struct meta_intc_priv *priv = &meta_intc_priv; + int i, j; + irq_hw_number_t hw; + unsigned int bank; + unsigned long flags; + struct meta_intc_context *context; + void __iomem *level_addr, *mask_addr, *vec_addr; + u32 mask, bit; + + context = kzalloc(sizeof(*context), GFP_ATOMIC); + if (!context) + return -ENOMEM; + + hw = 0; + level_addr = meta_intc_level_addr(0); + mask_addr = meta_intc_mask_addr(0); + for (bank = 0; bank < priv->nr_banks; ++bank) { + vec_addr = meta_intc_vec_addr(hw); + + /* create mask of interrupts in use */ + mask = 0; + for (bit = 1; bit; bit <<= 1) { + i = irq_linear_revmap(priv->domain, hw); + /* save mapped irqs which are enabled or have actions */ + if (i && (!irqd_irq_disabled(irq_get_irq_data(i)) || + irq_has_action(i))) { + mask |= bit; + + /* save trigger vector */ + context->vectors[hw] = metag_in32(vec_addr); + } + + ++hw; + vec_addr += HWVECnEXT_STRIDE; + } + + /* save level state if any IRQ levels altered */ + if (priv->levels_altered[bank]) + context->levels[bank] = metag_in32(level_addr); + /* save mask state if any IRQs in use */ + if (mask) + context->masks[bank] = metag_in32(mask_addr); + + level_addr += HWSTAT_STRIDE; + mask_addr += HWSTAT_STRIDE; + } + + /* save trigger matrixing */ + __global_lock2(flags); + for (i = 0; i < 4; ++i) + for (j = 0; j < 4; ++j) + context->txvecint[i][j] = metag_in32(T0VECINT_BHALT + + TnVECINT_STRIDE*i + + 8*j); + __global_unlock2(flags); + + meta_intc_context = context; + return 0; +} + +/** + * meta_intc_resume() - restore saved irq state + * + * Restore the saved IRQ state and drop it. + */ +static void meta_intc_resume(void) +{ + struct meta_intc_priv *priv = &meta_intc_priv; + int i, j; + irq_hw_number_t hw; + unsigned int bank; + unsigned long flags; + struct meta_intc_context *context = meta_intc_context; + void __iomem *level_addr, *mask_addr, *vec_addr; + u32 mask, bit, tmp; + + meta_intc_context = NULL; + + hw = 0; + level_addr = meta_intc_level_addr(0); + mask_addr = meta_intc_mask_addr(0); + for (bank = 0; bank < priv->nr_banks; ++bank) { + vec_addr = meta_intc_vec_addr(hw); + + /* create mask of interrupts in use */ + mask = 0; + for (bit = 1; bit; bit <<= 1) { + i = irq_linear_revmap(priv->domain, hw); + /* restore mapped irqs, enabled or with actions */ + if (i && (!irqd_irq_disabled(irq_get_irq_data(i)) || + irq_has_action(i))) { + mask |= bit; + + /* restore trigger vector */ + metag_out32(context->vectors[hw], vec_addr); + } + + ++hw; + vec_addr += HWVECnEXT_STRIDE; + } + + if (mask) { + /* restore mask state */ + __global_lock2(flags); + tmp = metag_in32(mask_addr); + tmp = (tmp & ~mask) | (context->masks[bank] & mask); + metag_out32(tmp, mask_addr); + __global_unlock2(flags); + } + + mask = priv->levels_altered[bank]; + if (mask) { + /* restore level state */ + __global_lock2(flags); + tmp = metag_in32(level_addr); + tmp = (tmp & ~mask) | (context->levels[bank] & mask); + metag_out32(tmp, level_addr); + __global_unlock2(flags); + } + + level_addr += HWSTAT_STRIDE; + mask_addr += HWSTAT_STRIDE; + } + + /* restore trigger matrixing */ + __global_lock2(flags); + for (i = 0; i < 4; ++i) { + for (j = 0; j < 4; ++j) { + metag_out32(context->txvecint[i][j], + T0VECINT_BHALT + + TnVECINT_STRIDE*i + + 8*j); + } + } + __global_unlock2(flags); + + kfree(context); +} + +static struct syscore_ops meta_intc_syscore_ops = { + .suspend = meta_intc_suspend, + .resume = meta_intc_resume, +}; + +static void __init meta_intc_init_syscore_ops(struct meta_intc_priv *priv) +{ + register_syscore_ops(&meta_intc_syscore_ops); +} +#else +#define meta_intc_init_syscore_ops(priv) do {} while (0) +#endif + +/** + * meta_intc_init_cpu() - register with a Meta cpu + * @priv: private interrupt controller data + * @cpu: the CPU to register on + * + * Configure @cpu's TR2 irq so that we can demux external irqs. + */ +static void __init meta_intc_init_cpu(struct meta_intc_priv *priv, int cpu) +{ + unsigned int thread = cpu_2_hwthread_id[cpu]; + unsigned int signum = TBID_SIGNUM_TR2(thread); + int irq = tbisig_map(signum); + + /* Register the multiplexed IRQ handler */ + irq_set_chained_handler(irq, meta_intc_irq_demux); + irq_set_irq_type(irq, IRQ_TYPE_LEVEL_LOW); +} + +/** + * meta_intc_no_mask() - indicate lack of HWMASKEXT registers + * + * Called from SoC code (or init code below) to dynamically indicate the lack of + * HWMASKEXT registers (for example depending on some SoC revision register). + * This alters the irq mask and unmask callbacks to use the fallback + * unvectoring/retriggering technique instead of using HWMASKEXT registers. + */ +void __init meta_intc_no_mask(void) +{ + meta_intc_edge_chip.irq_mask = meta_intc_mask_irq_nomask; + meta_intc_edge_chip.irq_unmask = meta_intc_unmask_edge_irq_nomask; + meta_intc_level_chip.irq_mask = meta_intc_mask_irq_nomask; + meta_intc_level_chip.irq_unmask = meta_intc_unmask_level_irq_nomask; +} + +/** + * init_external_IRQ() - initialise the external irq controller + * + * Set up the external irq controller using device tree properties. This is + * called from init_IRQ(). + */ +int __init init_external_IRQ(void) +{ + struct meta_intc_priv *priv = &meta_intc_priv; + struct device_node *node; + int ret, cpu; + u32 val; + bool no_masks = false; + + node = of_find_compatible_node(NULL, NULL, "img,meta-intc"); + if (!node) + return -ENOENT; + + /* Get number of banks */ + ret = of_property_read_u32(node, "num-banks", &val); + if (ret) { + pr_err("meta-intc: No num-banks property found\n"); + return ret; + } + if (val < 1 || val > 4) { + pr_err("meta-intc: num-banks (%u) out of range\n", val); + return -EINVAL; + } + priv->nr_banks = val; + + /* Are any mask registers present? */ + if (of_get_property(node, "no-mask", NULL)) + no_masks = true; + + /* No HWMASKEXT registers present? */ + if (no_masks) + meta_intc_no_mask(); + + /* Set up an IRQ domain */ + /* + * This is a legacy IRQ domain for now until all the platform setup code + * has been converted to devicetree. + */ + priv->domain = irq_domain_add_linear(node, priv->nr_banks*32, + &meta_intc_domain_ops, priv); + if (unlikely(!priv->domain)) { + pr_err("meta-intc: cannot add IRQ domain\n"); + return -ENOMEM; + } + + /* Setup TR2 for all cpus. */ + for_each_possible_cpu(cpu) + meta_intc_init_cpu(priv, cpu); + + /* Set up system suspend/resume callbacks */ + meta_intc_init_syscore_ops(priv); + + pr_info("meta-intc: External IRQ controller initialised (%u IRQs)\n", + priv->nr_banks*32); + + return 0; +} diff --git a/drivers/irqchip/irq-metag.c b/drivers/irqchip/irq-metag.c new file mode 100644 index 000000000000..8e94d7a3b20d --- /dev/null +++ b/drivers/irqchip/irq-metag.c @@ -0,0 +1,343 @@ +/* + * Meta internal (HWSTATMETA) interrupt code. + * + * Copyright (C) 2011-2012 Imagination Technologies Ltd. + * + * This code is based on the code in SoC/common/irq.c and SoC/comet/irq.c + * The code base could be generalised/merged as a lot of the functionality is + * similar. Until this is done, we try to keep the code simple here. + */ + +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/irqdomain.h> + +#include <asm/irq.h> +#include <asm/hwthread.h> + +#define PERF0VECINT 0x04820580 +#define PERF1VECINT 0x04820588 +#define PERF0TRIG_OFFSET 16 +#define PERF1TRIG_OFFSET 17 + +/** + * struct metag_internal_irq_priv - private meta internal interrupt data + * @domain: IRQ domain for all internal Meta IRQs (HWSTATMETA) + * @unmasked: Record of unmasked IRQs + */ +struct metag_internal_irq_priv { + struct irq_domain *domain; + + unsigned long unmasked; +}; + +/* Private data for the one and only internal interrupt controller */ +static struct metag_internal_irq_priv metag_internal_irq_priv; + +static unsigned int metag_internal_irq_startup(struct irq_data *data); +static void metag_internal_irq_shutdown(struct irq_data *data); +static void metag_internal_irq_ack(struct irq_data *data); +static void metag_internal_irq_mask(struct irq_data *data); +static void metag_internal_irq_unmask(struct irq_data *data); +#ifdef CONFIG_SMP +static int metag_internal_irq_set_affinity(struct irq_data *data, + const struct cpumask *cpumask, bool force); +#endif + +static struct irq_chip internal_irq_edge_chip = { + .name = "HWSTATMETA-IRQ", + .irq_startup = metag_internal_irq_startup, + .irq_shutdown = metag_internal_irq_shutdown, + .irq_ack = metag_internal_irq_ack, + .irq_mask = metag_internal_irq_mask, + .irq_unmask = metag_internal_irq_unmask, +#ifdef CONFIG_SMP + .irq_set_affinity = metag_internal_irq_set_affinity, +#endif +}; + +/* + * metag_hwvec_addr - get the address of *VECINT regs of irq + * + * This function is a table of supported triggers on HWSTATMETA + * Could do with a structure, but better keep it simple. Changes + * in this code should be rare. + */ +static inline void __iomem *metag_hwvec_addr(irq_hw_number_t hw) +{ + void __iomem *addr; + + switch (hw) { + case PERF0TRIG_OFFSET: + addr = (void __iomem *)PERF0VECINT; + break; + case PERF1TRIG_OFFSET: + addr = (void __iomem *)PERF1VECINT; + break; + default: + addr = NULL; + break; + } + return addr; +} + +/* + * metag_internal_startup - setup an internal irq + * @irq: the irq to startup + * + * Multiplex interrupts for @irq onto TR1. Clear any pending + * interrupts. + */ +static unsigned int metag_internal_irq_startup(struct irq_data *data) +{ + /* Clear (toggle) the bit in HWSTATMETA for our interrupt. */ + metag_internal_irq_ack(data); + + /* Enable the interrupt by unmasking it */ + metag_internal_irq_unmask(data); + + return 0; +} + +/* + * metag_internal_irq_shutdown - turn off the irq + * @irq: the irq number to turn off + * + * Mask @irq and clear any pending interrupts. + * Stop muxing @irq onto TR1. + */ +static void metag_internal_irq_shutdown(struct irq_data *data) +{ + /* Disable the IRQ at the core by masking it. */ + metag_internal_irq_mask(data); + + /* Clear (toggle) the bit in HWSTATMETA for our interrupt. */ + metag_internal_irq_ack(data); +} + +/* + * metag_internal_irq_ack - acknowledge irq + * @irq: the irq to ack + */ +static void metag_internal_irq_ack(struct irq_data *data) +{ + irq_hw_number_t hw = data->hwirq; + unsigned int bit = 1 << hw; + + if (metag_in32(HWSTATMETA) & bit) + metag_out32(bit, HWSTATMETA); +} + +/** + * metag_internal_irq_mask() - mask an internal irq by unvectoring + * @data: data for the internal irq to mask + * + * HWSTATMETA has no mask register. Instead the IRQ is unvectored from the core + * and retriggered if necessary later. + */ +static void metag_internal_irq_mask(struct irq_data *data) +{ + struct metag_internal_irq_priv *priv = &metag_internal_irq_priv; + irq_hw_number_t hw = data->hwirq; + void __iomem *vec_addr = metag_hwvec_addr(hw); + + clear_bit(hw, &priv->unmasked); + + /* there is no interrupt mask, so unvector the interrupt */ + metag_out32(0, vec_addr); +} + +/** + * meta_intc_unmask_edge_irq_nomask() - unmask an edge irq by revectoring + * @data: data for the internal irq to unmask + * + * HWSTATMETA has no mask register. Instead the IRQ is revectored back to the + * core and retriggered if necessary. + */ +static void metag_internal_irq_unmask(struct irq_data *data) +{ + struct metag_internal_irq_priv *priv = &metag_internal_irq_priv; + irq_hw_number_t hw = data->hwirq; + unsigned int bit = 1 << hw; + void __iomem *vec_addr = metag_hwvec_addr(hw); + unsigned int thread = hard_processor_id(); + + set_bit(hw, &priv->unmasked); + + /* there is no interrupt mask, so revector the interrupt */ + metag_out32(TBI_TRIG_VEC(TBID_SIGNUM_TR1(thread)), vec_addr); + + /* + * Re-trigger interrupt + * + * Writing a 1 toggles, and a 0->1 transition triggers. We only + * retrigger if the status bit is already set, which means we + * need to clear it first. Retriggering is fundamentally racy + * because if the interrupt fires again after we clear it we + * could end up clearing it again and the interrupt handler + * thinking it hasn't fired. Therefore we need to keep trying to + * retrigger until the bit is set. + */ + if (metag_in32(HWSTATMETA) & bit) { + metag_out32(bit, HWSTATMETA); + while (!(metag_in32(HWSTATMETA) & bit)) + metag_out32(bit, HWSTATMETA); + } +} + +#ifdef CONFIG_SMP +/* + * metag_internal_irq_set_affinity - set the affinity for an interrupt + */ +static int metag_internal_irq_set_affinity(struct irq_data *data, + const struct cpumask *cpumask, bool force) +{ + unsigned int cpu, thread; + irq_hw_number_t hw = data->hwirq; + /* + * Wire up this interrupt from *VECINT to the Meta core. + * + * Note that we can't wire up *VECINT to interrupt more than + * one cpu (the interrupt code doesn't support it), so we just + * pick the first cpu we find in 'cpumask'. + */ + cpu = cpumask_any(cpumask); + thread = cpu_2_hwthread_id[cpu]; + + metag_out32(TBI_TRIG_VEC(TBID_SIGNUM_TR1(thread)), + metag_hwvec_addr(hw)); + + return 0; +} +#endif + +/* + * metag_internal_irq_demux - irq de-multiplexer + * @irq: the interrupt number + * @desc: the interrupt description structure for this irq + * + * The cpu receives an interrupt on TR1 when an interrupt has + * occurred. It is this function's job to demux this irq and + * figure out exactly which trigger needs servicing. + */ +static void metag_internal_irq_demux(unsigned int irq, struct irq_desc *desc) +{ + struct metag_internal_irq_priv *priv = irq_desc_get_handler_data(desc); + irq_hw_number_t hw; + unsigned int irq_no; + u32 status; + +recalculate: + status = metag_in32(HWSTATMETA) & priv->unmasked; + + for (hw = 0; status != 0; status >>= 1, ++hw) { + if (status & 0x1) { + /* + * Map the hardware IRQ number to a virtual Linux IRQ + * number. + */ + irq_no = irq_linear_revmap(priv->domain, hw); + + /* + * Only fire off interrupts that are + * registered to be handled by the kernel. + * Other interrupts are probably being + * handled by other Meta hardware threads. + */ + generic_handle_irq(irq_no); + + /* + * The handler may have re-enabled interrupts + * which could have caused a nested invocation + * of this code and make the copy of the + * status register we are using invalid. + */ + goto recalculate; + } + } +} + +/** + * internal_irq_map() - Map an internal meta IRQ to a virtual IRQ number. + * @hw: Number of the internal IRQ. Must be in range. + * + * Returns: The virtual IRQ number of the Meta internal IRQ specified by + * @hw. + */ +int internal_irq_map(unsigned int hw) +{ + struct metag_internal_irq_priv *priv = &metag_internal_irq_priv; + if (!priv->domain) + return -ENODEV; + return irq_create_mapping(priv->domain, hw); +} + +/** + * metag_internal_irq_init_cpu - regsister with the Meta cpu + * @cpu: the CPU to register on + * + * Configure @cpu's TR1 irq so that we can demux irqs. + */ +static void metag_internal_irq_init_cpu(struct metag_internal_irq_priv *priv, + int cpu) +{ + unsigned int thread = cpu_2_hwthread_id[cpu]; + unsigned int signum = TBID_SIGNUM_TR1(thread); + int irq = tbisig_map(signum); + + /* Register the multiplexed IRQ handler */ + irq_set_handler_data(irq, priv); + irq_set_chained_handler(irq, metag_internal_irq_demux); + irq_set_irq_type(irq, IRQ_TYPE_LEVEL_LOW); +} + +/** + * metag_internal_intc_map() - map an internal irq + * @d: irq domain of internal trigger block + * @irq: virtual irq number + * @hw: hardware irq number within internal trigger block + * + * This sets up a virtual irq for a specified hardware interrupt. The irq chip + * and handler is configured. + */ +static int metag_internal_intc_map(struct irq_domain *d, unsigned int irq, + irq_hw_number_t hw) +{ + /* only register interrupt if it is mapped */ + if (!metag_hwvec_addr(hw)) + return -EINVAL; + + irq_set_chip_and_handler(irq, &internal_irq_edge_chip, + handle_edge_irq); + return 0; +} + +static const struct irq_domain_ops metag_internal_intc_domain_ops = { + .map = metag_internal_intc_map, +}; + +/** + * metag_internal_irq_register - register internal IRQs + * + * Register the irq chip and handler function for all internal IRQs + */ +int __init init_internal_IRQ(void) +{ + struct metag_internal_irq_priv *priv = &metag_internal_irq_priv; + unsigned int cpu; + + /* Set up an IRQ domain */ + priv->domain = irq_domain_add_linear(NULL, 32, + &metag_internal_intc_domain_ops, + priv); + if (unlikely(!priv->domain)) { + pr_err("meta-internal-intc: cannot add IRQ domain\n"); + return -ENOMEM; + } + + /* Setup TR1 for all cpus. */ + for_each_possible_cpu(cpu) + metag_internal_irq_init_cpu(priv, cpu); + + return 0; +}; diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index a5702d74d2bd..3939829f6c5c 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -322,6 +322,8 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec, return 0; } +#ifndef elf_map + static unsigned long elf_map(struct file *filep, unsigned long addr, struct elf_phdr *eppnt, int prot, int type, unsigned long total_size) @@ -356,6 +358,8 @@ static unsigned long elf_map(struct file *filep, unsigned long addr, return(map_addr); } +#endif /* !elf_map */ + static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr) { int i, first_idx = -1, last_idx = -1; diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h index aba53083297d..ac9da00e9f2c 100644 --- a/include/asm-generic/io.h +++ b/include/asm-generic/io.h @@ -346,6 +346,7 @@ extern void ioport_unmap(void __iomem *p); #define xlate_dev_kmem_ptr(p) p #define xlate_dev_mem_ptr(p) __va(p) +#ifdef CONFIG_VIRT_TO_BUS #ifndef virt_to_bus static inline unsigned long virt_to_bus(volatile void *address) { @@ -357,6 +358,7 @@ static inline void *bus_to_virt(unsigned long address) return (void *) address; } #endif +#endif #ifndef memset_io #define memset_io(a, b, c) memset(__io_virt(a), (b), (c)) diff --git a/include/asm-generic/unistd.h b/include/asm-generic/unistd.h index 257c55ec4f77..4077b5d9ff81 100644 --- a/include/asm-generic/unistd.h +++ b/include/asm-generic/unistd.h @@ -17,5 +17,12 @@ * but it doesn't work on all toolchains, so we just do it by hand */ #ifndef cond_syscall -#define cond_syscall(x) asm(".weak\t" #x "\n\t.set\t" #x ",sys_ni_syscall") +#ifdef CONFIG_SYMBOL_PREFIX +#define __SYMBOL_PREFIX CONFIG_SYMBOL_PREFIX +#else +#define __SYMBOL_PREFIX +#endif +#define cond_syscall(x) asm(".weak\t" __SYMBOL_PREFIX #x "\n\t" \ + ".set\t" __SYMBOL_PREFIX #x "," \ + __SYMBOL_PREFIX "sys_ni_syscall") #endif diff --git a/include/clocksource/metag_generic.h b/include/clocksource/metag_generic.h new file mode 100644 index 000000000000..ac17e7d06cfb --- /dev/null +++ b/include/clocksource/metag_generic.h @@ -0,0 +1,21 @@ +/* + * Copyright (C) 2013 Imaginaton Technologies Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __CLKSOURCE_METAG_GENERIC_H +#define __CLKSOURCE_METAG_GENERIC_H + +extern int metag_generic_timer_init(void); + +#endif /* __CLKSOURCE_METAG_GENERIC_H */ diff --git a/include/linux/irqchip/metag-ext.h b/include/linux/irqchip/metag-ext.h new file mode 100644 index 000000000000..697af0fe7c5a --- /dev/null +++ b/include/linux/irqchip/metag-ext.h @@ -0,0 +1,33 @@ +/* + * Copyright (C) 2012 Imagination Technologies + */ + +#ifndef _LINUX_IRQCHIP_METAG_EXT_H_ +#define _LINUX_IRQCHIP_METAG_EXT_H_ + +struct irq_data; +struct platform_device; + +/* called from core irq code at init */ +int init_external_IRQ(void); + +/* + * called from SoC init_irq() callback to dynamically indicate the lack of + * HWMASKEXT registers. + */ +void meta_intc_no_mask(void); + +/* + * These allow SoCs to specialise the interrupt controller from their init_irq + * callbacks. + */ + +extern struct irq_chip meta_intc_edge_chip; +extern struct irq_chip meta_intc_level_chip; + +/* this should be called in the mask callback */ +void meta_intc_mask_irq_simple(struct irq_data *data); +/* this should be called in the unmask callback */ +void meta_intc_unmask_irq_simple(struct irq_data *data); + +#endif /* _LINUX_IRQCHIP_METAG_EXT_H_ */ diff --git a/include/linux/irqchip/metag.h b/include/linux/irqchip/metag.h new file mode 100644 index 000000000000..4ebdfb3101ab --- /dev/null +++ b/include/linux/irqchip/metag.h @@ -0,0 +1,24 @@ +/* + * Copyright (C) 2011 Imagination Technologies + */ + +#ifndef _LINUX_IRQCHIP_METAG_H_ +#define _LINUX_IRQCHIP_METAG_H_ + +#include <linux/errno.h> + +#ifdef CONFIG_METAG_PERFCOUNTER_IRQS +extern int init_internal_IRQ(void); +extern int internal_irq_map(unsigned int hw); +#else +static inline int init_internal_IRQ(void) +{ + return 0; +} +static inline int internal_irq_map(unsigned int hw) +{ + return -EINVAL; +} +#endif + +#endif /* _LINUX_IRQCHIP_METAG_H_ */ diff --git a/include/linux/mm.h b/include/linux/mm.h index 1ede55f292c2..7acc9dc73c9f 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -115,6 +115,8 @@ extern unsigned int kobjsize(const void *objp); # define VM_SAO VM_ARCH_1 /* Strong Access Ordering (powerpc) */ #elif defined(CONFIG_PARISC) # define VM_GROWSUP VM_ARCH_1 +#elif defined(CONFIG_METAG) +# define VM_GROWSUP VM_ARCH_1 #elif defined(CONFIG_IA64) # define VM_GROWSUP VM_ARCH_1 #elif !defined(CONFIG_MMU) diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index 900b9484445b..8072d352b98f 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -395,6 +395,8 @@ typedef struct elf64_shdr { #define NT_ARM_TLS 0x401 /* ARM TLS register */ #define NT_ARM_HW_BREAK 0x402 /* ARM hardware breakpoint registers */ #define NT_ARM_HW_WATCH 0x403 /* ARM hardware watchpoint registers */ +#define NT_METAG_CBUF 0x500 /* Metag catch buffer registers */ +#define NT_METAG_RPIPE 0x501 /* Metag read pipeline state */ /* Note header in a PT_NOTE section */ diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 7244acde77b0..6989df2ba194 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -178,7 +178,7 @@ void tracing_off_permanent(void) #define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX) #define RB_EVNT_MIN_SIZE 8U /* two 32bit words */ -#if !defined(CONFIG_64BIT) || defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) +#ifndef CONFIG_HAVE_64BIT_ALIGNED_ACCESS # define RB_FORCE_8BYTE_ALIGNMENT 0 # define RB_ARCH_ALIGNMENT RB_ALIGNMENT #else @@ -186,6 +186,8 @@ void tracing_off_permanent(void) # define RB_ARCH_ALIGNMENT 8U #endif +#define RB_ALIGN_DATA __aligned(RB_ARCH_ALIGNMENT) + /* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */ #define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX @@ -334,7 +336,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_data); struct buffer_data_page { u64 time_stamp; /* page time stamp */ local_t commit; /* write committed index */ - unsigned char data[]; /* data of buffer page */ + unsigned char data[] RB_ALIGN_DATA; /* data of buffer page */ }; /* diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index e4a7f808fa06..28be08c09bab 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -674,7 +674,7 @@ config STACKTRACE config DEBUG_STACK_USAGE bool "Stack utilization instrumentation" - depends on DEBUG_KERNEL && !IA64 && !PARISC + depends on DEBUG_KERNEL && !IA64 && !PARISC && !METAG help Enables the display of the minimum amount of free stack which each task has ever had available in the sysrq-T and sysrq-P debug output. @@ -855,7 +855,7 @@ config FRAME_POINTER bool "Compile the kernel with frame pointers" depends on DEBUG_KERNEL && \ (CRIS || M68K || FRV || UML || \ - AVR32 || SUPERH || BLACKFIN || MN10300) || \ + AVR32 || SUPERH || BLACKFIN || MN10300 || METAG) || \ ARCH_WANT_FRAME_POINTERS default y if (DEBUG_INFO && UML) || ARCH_WANT_FRAME_POINTERS help diff --git a/scripts/checkstack.pl b/scripts/checkstack.pl index 17e384396705..544aa56b6200 100755 --- a/scripts/checkstack.pl +++ b/scripts/checkstack.pl @@ -34,7 +34,7 @@ use strict; # $1 (first bracket) matches the dynamic amount of the stack growth # # use anything else and feel the pain ;) -my (@stack, $re, $dre, $x, $xs); +my (@stack, $re, $dre, $x, $xs, $funcre); { my $arch = shift; if ($arch eq "") { @@ -44,6 +44,7 @@ my (@stack, $re, $dre, $x, $xs); $x = "[0-9a-f]"; # hex character $xs = "[0-9a-f ]"; # hex character or space + $funcre = qr/^$x* <(.*)>:$/; if ($arch eq 'arm') { #c0008ffc: e24dd064 sub sp, sp, #100 ; 0x64 $re = qr/.*sub.*sp, sp, #(([0-9]{2}|[3-9])[0-9]{2})/o; @@ -66,6 +67,10 @@ my (@stack, $re, $dre, $x, $xs); # 2b6c: 4e56 fb70 linkw %fp,#-1168 # 1df770: defc ffe4 addaw #-28,%sp $re = qr/.*(?:linkw %fp,|addaw )#-([0-9]{1,4})(?:,%sp)?$/o; + } elsif ($arch eq 'metag') { + #400026fc: 40 00 00 82 ADD A0StP,A0StP,#0x8 + $re = qr/.*ADD.*A0StP,A0StP,\#(0x$x{1,8})/o; + $funcre = qr/^$x* <[^\$](.*)>:$/; } elsif ($arch eq 'mips64') { #8800402c: 67bdfff0 daddiu sp,sp,-16 $re = qr/.*daddiu.*sp,sp,-(([0-9]{2}|[3-9])[0-9]{2})/o; @@ -109,7 +114,6 @@ my (@stack, $re, $dre, $x, $xs); # # main() # -my $funcre = qr/^$x* <(.*)>:$/; my ($func, $file, $lastslash); while (my $line = <STDIN>) { diff --git a/scripts/genksyms/genksyms.c b/scripts/genksyms/genksyms.c index 8a106499ec4f..d25e4a118d37 100644 --- a/scripts/genksyms/genksyms.c +++ b/scripts/genksyms/genksyms.c @@ -826,7 +826,8 @@ int main(int argc, char **argv) genksyms_usage(); return 1; } - if ((strcmp(arch, "h8300") == 0) || (strcmp(arch, "blackfin") == 0)) + if ((strcmp(arch, "h8300") == 0) || (strcmp(arch, "blackfin") == 0) || + (strcmp(arch, "metag") == 0)) mod_prefix = "_"; { extern int yydebug; diff --git a/scripts/recordmcount.c b/scripts/recordmcount.c index ee52cb8e17ad..9c22317778eb 100644 --- a/scripts/recordmcount.c +++ b/scripts/recordmcount.c @@ -33,6 +33,13 @@ #include <string.h> #include <unistd.h> +#ifndef EM_METAG +/* Remove this when these make it to the standard system elf.h. */ +#define EM_METAG 174 +#define R_METAG_ADDR32 2 +#define R_METAG_NONE 3 +#endif + static int fd_map; /* File descriptor for file being modified. */ static int mmap_failed; /* Boolean flag. */ static void *ehdr_curr; /* current ElfXX_Ehdr * for resource cleanup */ @@ -341,6 +348,12 @@ do_file(char const *const fname) altmcount = "__gnu_mcount_nc"; break; case EM_IA_64: reltype = R_IA64_IMM64; gpfx = '_'; break; + case EM_METAG: reltype = R_METAG_ADDR32; + altmcount = "_mcount_wrapper"; + rel_type_nop = R_METAG_NONE; + /* We happen to have the same requirement as MIPS */ + is_fake_mcount32 = MIPS32_is_fake_mcount; + break; case EM_MIPS: /* reltype: e_class */ gpfx = '_'; break; case EM_PPC: reltype = R_PPC_ADDR32; gpfx = '_'; break; case EM_PPC64: reltype = R_PPC64_ADDR64; gpfx = '_'; break; diff --git a/tools/perf/perf.h b/tools/perf/perf.h index d5818c98d051..74659ecf93e0 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -100,6 +100,12 @@ #define CPUINFO_PROC "Processor" #endif +#ifdef __metag__ +#define rmb() asm volatile("" ::: "memory") +#define cpu_relax() asm volatile("" ::: "memory") +#define CPUINFO_PROC "CPU" +#endif + #include <time.h> #include <unistd.h> #include <sys/types.h> |