From 25d3584797a39f57b69cd835722ac7c41113fb9a Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 16 Aug 2010 15:15:14 +0100 Subject: ARM: 6330/1: perf: reword comments relating to perf_event_do_pending This is purely a cosmetic change to the ARM perf backend because the current comments about the relationship between NMIs, interrupt context and perf_event_do_pending are misleading. This patch updates the comments so that they reflect what the code actually does (which is in line with other architectures). Acked-by: Jamie Iles Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/include/asm/perf_event.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/arm/include/asm') diff --git a/arch/arm/include/asm/perf_event.h b/arch/arm/include/asm/perf_event.h index 48837e6d8887..b5799a3b7117 100644 --- a/arch/arm/include/asm/perf_event.h +++ b/arch/arm/include/asm/perf_event.h @@ -17,7 +17,7 @@ * counter interrupts are regular interrupts and not an NMI. This * means that when we receive the interrupt we can call * perf_event_do_pending() that handles all of the work with - * interrupts enabled. + * interrupts disabled. */ static inline void set_perf_event_pending(void) -- cgit v1.2.3 From 418cf646c9999f2f7d216b1a693c2c575ab8094c Mon Sep 17 00:00:00 2001 From: Mikael Pettersson Date: Wed, 25 Aug 2010 20:49:01 +0100 Subject: ARM: 6343/1: wire up fanotify and prlimit64 syscalls on ARM The 2.6.36-rc kernel added three new system calls: fanotify_init, fanotify_mark, and prlimit64. This patch wires them up on ARM. The only non-trivial issue here is the u64 argument to sys_fanotify_mark(), but it is the 3rd argument and thus passed in r2/r3 in both kernel and user space, so it causes no problems. Tested with a 2.6.36-rc2 EABI kernel on an ixp4xx machine. Tested-by: Anand Gadiyar Signed-off-by: Mikael Pettersson Signed-off-by: Russell King --- arch/arm/include/asm/unistd.h | 3 +++ arch/arm/kernel/calls.S | 3 +++ 2 files changed, 6 insertions(+) (limited to 'arch/arm/include/asm') diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h index d02cfb683487..c891eb76c0e3 100644 --- a/arch/arm/include/asm/unistd.h +++ b/arch/arm/include/asm/unistd.h @@ -393,6 +393,9 @@ #define __NR_perf_event_open (__NR_SYSCALL_BASE+364) #define __NR_recvmmsg (__NR_SYSCALL_BASE+365) #define __NR_accept4 (__NR_SYSCALL_BASE+366) +#define __NR_fanotify_init (__NR_SYSCALL_BASE+367) +#define __NR_fanotify_mark (__NR_SYSCALL_BASE+368) +#define __NR_prlimit64 (__NR_SYSCALL_BASE+369) /* * The following SWIs are ARM private. diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S index afeb71fa72cb..5c26eccef998 100644 --- a/arch/arm/kernel/calls.S +++ b/arch/arm/kernel/calls.S @@ -376,6 +376,9 @@ CALL(sys_perf_event_open) /* 365 */ CALL(sys_recvmmsg) CALL(sys_accept4) + CALL(sys_fanotify_init) + CALL(sys_fanotify_mark) + CALL(sys_prlimit64) #ifndef syscalls_counted .equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls #define syscalls_counted -- cgit v1.2.3 From 3b6c223b1b97ad60bbb0f4efda57d649414ac2a2 Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Tue, 10 Aug 2010 19:43:28 +0100 Subject: ARM: 6318/1: ftrace: fix and update dynamic ftrace This adds mcount recording and updates dynamic ftrace for ARM to work with the new ftrace dyamic tracing implementation. It also adds support for the mcount format used by newer ARM compilers. With dynamic tracing, mcount() is implemented as a nop. Callsites are patched on startup with nops, and dynamically patched to call to the ftrace_caller() routine as needed. Acked-by: Steven Rostedt [recordmcount.pl change] Signed-off-by: Rabin Vincent Signed-off-by: Russell King --- arch/arm/include/asm/ftrace.h | 19 ++++- arch/arm/kernel/entry-common.S | 37 +++++++--- arch/arm/kernel/ftrace.c | 155 ++++++++++++++++++++++++++++------------- scripts/recordmcount.pl | 2 + 4 files changed, 155 insertions(+), 58 deletions(-) (limited to 'arch/arm/include/asm') diff --git a/arch/arm/include/asm/ftrace.h b/arch/arm/include/asm/ftrace.h index 103f7ee97313..4a56a2ee067c 100644 --- a/arch/arm/include/asm/ftrace.h +++ b/arch/arm/include/asm/ftrace.h @@ -2,12 +2,29 @@ #define _ASM_ARM_FTRACE #ifdef CONFIG_FUNCTION_TRACER -#define MCOUNT_ADDR ((long)(mcount)) +#define MCOUNT_ADDR ((unsigned long)(__gnu_mcount_nc)) #define MCOUNT_INSN_SIZE 4 /* sizeof mcount call */ #ifndef __ASSEMBLY__ extern void mcount(void); extern void __gnu_mcount_nc(void); + +#ifdef CONFIG_DYNAMIC_FTRACE +struct dyn_arch_ftrace { +#ifdef CONFIG_OLD_MCOUNT + bool old_mcount; +#endif +}; + +static inline unsigned long ftrace_call_adjust(unsigned long addr) +{ + return addr; +} + +extern void ftrace_caller_old(void); +extern void ftrace_call_old(void); +#endif + #endif #endif diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index f5e75de0203e..e02790f28879 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -127,6 +127,10 @@ ENDPROC(ret_from_fork) * clobber the ip register. This is OK because the ARM calling convention * allows it to be clobbered in subroutines and doesn't use it to hold * parameters.) + * + * When using dynamic ftrace, we patch out the mcount call by a "mov r0, r0" + * for the mcount case, and a "pop {lr}" for the __gnu_mcount_nc case (see + * arch/arm/kernel/ftrace.c). */ #ifndef CONFIG_OLD_MCOUNT @@ -136,30 +140,45 @@ ENDPROC(ret_from_fork) #endif #ifdef CONFIG_DYNAMIC_FTRACE -ENTRY(mcount) +ENTRY(__gnu_mcount_nc) + mov ip, lr + ldmia sp!, {lr} + mov pc, ip +ENDPROC(__gnu_mcount_nc) + +ENTRY(ftrace_caller) stmdb sp!, {r0-r3, lr} mov r0, lr sub r0, r0, #MCOUNT_INSN_SIZE + ldr r1, [sp, #20] - .globl mcount_call -mcount_call: + .global ftrace_call +ftrace_call: bl ftrace_stub - ldr lr, [fp, #-4] @ restore lr - ldmia sp!, {r0-r3, pc} + ldmia sp!, {r0-r3, ip, lr} + mov pc, ip +ENDPROC(ftrace_caller) + +#ifdef CONFIG_OLD_MCOUNT +ENTRY(mcount) + stmdb sp!, {lr} + ldr lr, [fp, #-4] + ldmia sp!, {pc} ENDPROC(mcount) -ENTRY(ftrace_caller) +ENTRY(ftrace_caller_old) stmdb sp!, {r0-r3, lr} ldr r1, [fp, #-4] mov r0, lr sub r0, r0, #MCOUNT_INSN_SIZE - .globl ftrace_call -ftrace_call: + .globl ftrace_call_old +ftrace_call_old: bl ftrace_stub ldr lr, [fp, #-4] @ restore lr ldmia sp!, {r0-r3, pc} -ENDPROC(ftrace_caller) +ENDPROC(ftrace_caller_old) +#endif #else diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c index 0298286ad4ad..f09014cfbf2c 100644 --- a/arch/arm/kernel/ftrace.c +++ b/arch/arm/kernel/ftrace.c @@ -2,102 +2,161 @@ * Dynamic function tracing support. * * Copyright (C) 2008 Abhishek Sagar + * Copyright (C) 2010 Rabin Vincent * * For licencing details, see COPYING. * * Defines low-level handling of mcount calls when the kernel * is compiled with the -pg flag. When using dynamic ftrace, the - * mcount call-sites get patched lazily with NOP till they are - * enabled. All code mutation routines here take effect atomically. + * mcount call-sites get patched with NOP till they are enabled. + * All code mutation routines here are called under stop_machine(). */ #include +#include #include #include -#define PC_OFFSET 8 -#define BL_OPCODE 0xeb000000 -#define BL_OFFSET_MASK 0x00ffffff +#define NOP 0xe8bd4000 /* pop {lr} */ -static unsigned long bl_insn; -static const unsigned long NOP = 0xe1a00000; /* mov r0, r0 */ +#ifdef CONFIG_OLD_MCOUNT +#define OLD_MCOUNT_ADDR ((unsigned long) mcount) +#define OLD_FTRACE_ADDR ((unsigned long) ftrace_caller_old) -unsigned char *ftrace_nop_replace(void) +#define OLD_NOP 0xe1a00000 /* mov r0, r0 */ + +static unsigned long ftrace_nop_replace(struct dyn_ftrace *rec) +{ + return rec->arch.old_mcount ? OLD_NOP : NOP; +} + +static unsigned long adjust_address(struct dyn_ftrace *rec, unsigned long addr) +{ + if (!rec->arch.old_mcount) + return addr; + + if (addr == MCOUNT_ADDR) + addr = OLD_MCOUNT_ADDR; + else if (addr == FTRACE_ADDR) + addr = OLD_FTRACE_ADDR; + + return addr; +} +#else +static unsigned long ftrace_nop_replace(struct dyn_ftrace *rec) +{ + return NOP; +} + +static unsigned long adjust_address(struct dyn_ftrace *rec, unsigned long addr) { - return (char *)&NOP; + return addr; } +#endif /* construct a branch (BL) instruction to addr */ -unsigned char *ftrace_call_replace(unsigned long pc, unsigned long addr) +static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr) { long offset; - offset = (long)addr - (long)(pc + PC_OFFSET); + offset = (long)addr - (long)(pc + 8); if (unlikely(offset < -33554432 || offset > 33554428)) { /* Can't generate branches that far (from ARM ARM). Ftrace * doesn't generate branches outside of kernel text. */ WARN_ON_ONCE(1); - return NULL; + return 0; } - offset = (offset >> 2) & BL_OFFSET_MASK; - bl_insn = BL_OPCODE | offset; - return (unsigned char *)&bl_insn; -} -int ftrace_modify_code(unsigned long pc, unsigned char *old_code, - unsigned char *new_code) -{ - unsigned long err = 0, replaced = 0, old, new; + offset = (offset >> 2) & 0x00ffffff; - old = *(unsigned long *)old_code; - new = *(unsigned long *)new_code; + return 0xeb000000 | offset; +} - __asm__ __volatile__ ( - "1: ldr %1, [%2] \n" - " cmp %1, %4 \n" - "2: streq %3, [%2] \n" - " cmpne %1, %3 \n" - " movne %0, #2 \n" - "3:\n" +static int ftrace_modify_code(unsigned long pc, unsigned long old, + unsigned long new) +{ + unsigned long replaced; - ".pushsection .fixup, \"ax\"\n" - "4: mov %0, #1 \n" - " b 3b \n" - ".popsection\n" + if (probe_kernel_read(&replaced, (void *)pc, MCOUNT_INSN_SIZE)) + return -EFAULT; - ".pushsection __ex_table, \"a\"\n" - " .long 1b, 4b \n" - " .long 2b, 4b \n" - ".popsection\n" + if (replaced != old) + return -EINVAL; - : "=r"(err), "=r"(replaced) - : "r"(pc), "r"(new), "r"(old), "0"(err), "1"(replaced) - : "memory"); + if (probe_kernel_write((void *)pc, &new, MCOUNT_INSN_SIZE)) + return -EPERM; - if (!err && (replaced == old)) - flush_icache_range(pc, pc + MCOUNT_INSN_SIZE); + flush_icache_range(pc, pc + MCOUNT_INSN_SIZE); - return err; + return 0; } int ftrace_update_ftrace_func(ftrace_func_t func) { - int ret; unsigned long pc, old; - unsigned char *new; + unsigned long new; + int ret; pc = (unsigned long)&ftrace_call; memcpy(&old, &ftrace_call, MCOUNT_INSN_SIZE); new = ftrace_call_replace(pc, (unsigned long)func); - ret = ftrace_modify_code(pc, (unsigned char *)&old, new); + + ret = ftrace_modify_code(pc, old, new); + +#ifdef CONFIG_OLD_MCOUNT + if (!ret) { + pc = (unsigned long)&ftrace_call_old; + memcpy(&old, &ftrace_call_old, MCOUNT_INSN_SIZE); + new = ftrace_call_replace(pc, (unsigned long)func); + + ret = ftrace_modify_code(pc, old, new); + } +#endif + + return ret; +} + +int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) +{ + unsigned long new, old; + unsigned long ip = rec->ip; + + old = ftrace_nop_replace(rec); + new = ftrace_call_replace(ip, adjust_address(rec, addr)); + + return ftrace_modify_code(rec->ip, old, new); +} + +int ftrace_make_nop(struct module *mod, + struct dyn_ftrace *rec, unsigned long addr) +{ + unsigned long ip = rec->ip; + unsigned long old; + unsigned long new; + int ret; + + old = ftrace_call_replace(ip, adjust_address(rec, addr)); + new = ftrace_nop_replace(rec); + ret = ftrace_modify_code(ip, old, new); + +#ifdef CONFIG_OLD_MCOUNT + if (ret == -EINVAL && addr == MCOUNT_ADDR) { + rec->arch.old_mcount = true; + + old = ftrace_call_replace(ip, adjust_address(rec, addr)); + new = ftrace_nop_replace(rec); + ret = ftrace_modify_code(ip, old, new); + } +#endif + return ret; } -/* run from ftrace_init with irqs disabled */ int __init ftrace_dyn_arch_init(void *data) { - ftrace_mcount_set(data); + *(unsigned long *)data = 0; + return 0; } diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl index e67f05486087..022d4679b1b3 100755 --- a/scripts/recordmcount.pl +++ b/scripts/recordmcount.pl @@ -270,6 +270,8 @@ if ($arch eq "x86_64") { } elsif ($arch eq "arm") { $alignment = 2; $section_type = '%progbits'; + $mcount_regex = "^\\s*([0-9a-fA-F]+):\\s*R_ARM_(CALL|PC24)" . + "\\s+(__gnu_mcount_nc|mcount)\$"; } elsif ($arch eq "ia64") { $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\s_mcount\$"; -- cgit v1.2.3 From 72dc43a9eb123d2742bd413c80dbeab0c588f622 Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Tue, 10 Aug 2010 19:52:35 +0100 Subject: ARM: 6319/1: ftrace: add Thumb-2 support to dynamic ftrace Handle the different nop and call instructions for Thumb-2. Also, we need to adjust the recorded mcount_loc addresses because they have the lsb set. Cc: Catalin Marinas Acked-by: Steven Rostedt [recordmcount.pl change] Signed-off-by: Rabin Vincent Signed-off-by: Russell King --- arch/arm/include/asm/ftrace.h | 3 ++- arch/arm/kernel/ftrace.c | 33 +++++++++++++++++++++++++++++++++ scripts/recordmcount.pl | 2 +- 3 files changed, 36 insertions(+), 2 deletions(-) (limited to 'arch/arm/include/asm') diff --git a/arch/arm/include/asm/ftrace.h b/arch/arm/include/asm/ftrace.h index 4a56a2ee067c..f89515adac60 100644 --- a/arch/arm/include/asm/ftrace.h +++ b/arch/arm/include/asm/ftrace.h @@ -18,7 +18,8 @@ struct dyn_arch_ftrace { static inline unsigned long ftrace_call_adjust(unsigned long addr) { - return addr; + /* With Thumb-2, the recorded addresses have the lsb set */ + return addr & ~1; } extern void ftrace_caller_old(void); diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c index f09014cfbf2c..971ac8c36ea7 100644 --- a/arch/arm/kernel/ftrace.c +++ b/arch/arm/kernel/ftrace.c @@ -18,7 +18,11 @@ #include #include +#ifdef CONFIG_THUMB2_KERNEL +#define NOP 0xeb04f85d /* pop.w {lr} */ +#else #define NOP 0xe8bd4000 /* pop {lr} */ +#endif #ifdef CONFIG_OLD_MCOUNT #define OLD_MCOUNT_ADDR ((unsigned long) mcount) @@ -56,6 +60,34 @@ static unsigned long adjust_address(struct dyn_ftrace *rec, unsigned long addr) #endif /* construct a branch (BL) instruction to addr */ +#ifdef CONFIG_THUMB2_KERNEL +static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr) +{ + unsigned long s, j1, j2, i1, i2, imm10, imm11; + unsigned long first, second; + long offset; + + offset = (long)addr - (long)(pc + 4); + if (offset < -16777216 || offset > 16777214) { + WARN_ON_ONCE(1); + return 0; + } + + s = (offset >> 24) & 0x1; + i1 = (offset >> 23) & 0x1; + i2 = (offset >> 22) & 0x1; + imm10 = (offset >> 12) & 0x3ff; + imm11 = (offset >> 1) & 0x7ff; + + j1 = (!i1) ^ s; + j2 = (!i2) ^ s; + + first = 0xf000 | (s << 10) | imm10; + second = 0xd000 | (j1 << 13) | (j2 << 11) | imm11; + + return (second << 16) | first; +} +#else static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr) { long offset; @@ -73,6 +105,7 @@ static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr) return 0xeb000000 | offset; } +#endif static int ftrace_modify_code(unsigned long pc, unsigned long old, unsigned long new) diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl index 022d4679b1b3..1d7963f4ee79 100755 --- a/scripts/recordmcount.pl +++ b/scripts/recordmcount.pl @@ -270,7 +270,7 @@ if ($arch eq "x86_64") { } elsif ($arch eq "arm") { $alignment = 2; $section_type = '%progbits'; - $mcount_regex = "^\\s*([0-9a-fA-F]+):\\s*R_ARM_(CALL|PC24)" . + $mcount_regex = "^\\s*([0-9a-fA-F]+):\\s*R_ARM_(CALL|PC24|THM_CALL)" . "\\s+(__gnu_mcount_nc|mcount)\$"; } elsif ($arch eq "ia64") { -- cgit v1.2.3 From e5f7772eec3ec342ecfe686ab8330ef538af134b Mon Sep 17 00:00:00 2001 From: Phil Carmody Date: Thu, 19 Aug 2010 15:16:37 +0100 Subject: ARM: 6339/1: module - simplify unwind table handling The various sections are all dealt with similarly, so factor out that common behaviour. (Incorporating Peter Huewe's fix.) Cc: Peter Huewe Signed-off-by: Phil Carmody Acked-by: Catalin Marinas Signed-off-by: Russell King --- arch/arm/include/asm/module.h | 29 ++++++++++++++++----------- arch/arm/kernel/module.c | 46 ++++++++++++++++++------------------------- 2 files changed, 36 insertions(+), 39 deletions(-) (limited to 'arch/arm/include/asm') diff --git a/arch/arm/include/asm/module.h b/arch/arm/include/asm/module.h index e4dfa69abb68..6dcff0f7f8d7 100644 --- a/arch/arm/include/asm/module.h +++ b/arch/arm/include/asm/module.h @@ -7,20 +7,25 @@ struct unwind_table; -struct mod_arch_specific -{ #ifdef CONFIG_ARM_UNWIND - Elf_Shdr *unw_sec_init; - Elf_Shdr *unw_sec_devinit; - Elf_Shdr *unw_sec_core; - Elf_Shdr *sec_init_text; - Elf_Shdr *sec_devinit_text; - Elf_Shdr *sec_core_text; - struct unwind_table *unwind_init; - struct unwind_table *unwind_devinit; - struct unwind_table *unwind_core; -#endif +struct arm_unwind_mapping { + Elf_Shdr *unw_sec; + Elf_Shdr *sec_text; + struct unwind_table *unwind; +}; +enum { + ARM_SEC_INIT, + ARM_SEC_DEVINIT, + ARM_SEC_CORE, + ARM_SEC_MAX, +}; +struct mod_arch_specific { + struct arm_unwind_mapping map[ARM_SEC_MAX]; }; +#else +struct mod_arch_specific { +}; +#endif /* * Include the ARM architecture version. diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c index 1dae0468677a..0aa622e84b24 100644 --- a/arch/arm/kernel/module.c +++ b/arch/arm/kernel/module.c @@ -69,22 +69,23 @@ int module_frob_arch_sections(Elf_Ehdr *hdr, { #ifdef CONFIG_ARM_UNWIND Elf_Shdr *s, *sechdrs_end = sechdrs + hdr->e_shnum; + struct arm_unwind_mapping *maps = mod->arch.map; for (s = sechdrs; s < sechdrs_end; s++) { char const *secname = secstrings + s->sh_name; if (strcmp(".ARM.exidx.init.text", secname) == 0) - mod->arch.unw_sec_init = s; + maps[ARM_SEC_INIT].unw_sec = s; else if (strcmp(".ARM.exidx.devinit.text", secname) == 0) - mod->arch.unw_sec_devinit = s; + maps[ARM_SEC_DEVINIT].unw_sec = s; else if (strcmp(".ARM.exidx", secname) == 0) - mod->arch.unw_sec_core = s; + maps[ARM_SEC_CORE].unw_sec = s; else if (strcmp(".init.text", secname) == 0) - mod->arch.sec_init_text = s; + maps[ARM_SEC_INIT].sec_text = s; else if (strcmp(".devinit.text", secname) == 0) - mod->arch.sec_devinit_text = s; + maps[ARM_SEC_DEVINIT].sec_text = s; else if (strcmp(".text", secname) == 0) - mod->arch.sec_core_text = s; + maps[ARM_SEC_CORE].sec_text = s; } #endif return 0; @@ -294,31 +295,22 @@ apply_relocate_add(Elf32_Shdr *sechdrs, const char *strtab, #ifdef CONFIG_ARM_UNWIND static void register_unwind_tables(struct module *mod) { - if (mod->arch.unw_sec_init && mod->arch.sec_init_text) - mod->arch.unwind_init = - unwind_table_add(mod->arch.unw_sec_init->sh_addr, - mod->arch.unw_sec_init->sh_size, - mod->arch.sec_init_text->sh_addr, - mod->arch.sec_init_text->sh_size); - if (mod->arch.unw_sec_devinit && mod->arch.sec_devinit_text) - mod->arch.unwind_devinit = - unwind_table_add(mod->arch.unw_sec_devinit->sh_addr, - mod->arch.unw_sec_devinit->sh_size, - mod->arch.sec_devinit_text->sh_addr, - mod->arch.sec_devinit_text->sh_size); - if (mod->arch.unw_sec_core && mod->arch.sec_core_text) - mod->arch.unwind_core = - unwind_table_add(mod->arch.unw_sec_core->sh_addr, - mod->arch.unw_sec_core->sh_size, - mod->arch.sec_core_text->sh_addr, - mod->arch.sec_core_text->sh_size); + int i; + for (i = 0; i < ARM_SEC_MAX; ++i) { + struct arm_unwind_mapping *map = &mod->arch.map[i]; + if (map->unw_sec && map->sec_text) + map->unwind = unwind_table_add(map->unw_sec->sh_addr, + map->unw_sec->sh_size, + map->sec_text->sh_addr, + map->sec_text->sh_size); + } } static void unregister_unwind_tables(struct module *mod) { - unwind_table_del(mod->arch.unwind_init); - unwind_table_del(mod->arch.unwind_devinit); - unwind_table_del(mod->arch.unwind_core); + int i = ARM_SEC_MAX; + while (--i >= 0) + unwind_table_del(mod->arch.map[i].unwind); } #else static inline void register_unwind_tables(struct module *mod) { } -- cgit v1.2.3 From 09e56a2d076c2afb2a1932ae4283e10ef2d26ef3 Mon Sep 17 00:00:00 2001 From: Phil Carmody Date: Thu, 19 Aug 2010 15:19:04 +0100 Subject: ARM: 6340/1: module - additional unwind tables for exit/devexit sections Without these, exit functions cannot be stack-traced, so to speak. This implies that module unloads that perform allocations (don't laugh) will cause noisy warnings on the console when kmemleak is enabled, as it presumes that all code's call chains are traceable. Similarly, BUGs and WARN_ONs will give additional console spam. Signed-off-by: Phil Carmody Acked-by: Catalin Marinas Signed-off-by: Russell King --- arch/arm/include/asm/module.h | 2 ++ arch/arm/kernel/module.c | 8 ++++++++ 2 files changed, 10 insertions(+) (limited to 'arch/arm/include/asm') diff --git a/arch/arm/include/asm/module.h b/arch/arm/include/asm/module.h index 6dcff0f7f8d7..cbb0bc295d2b 100644 --- a/arch/arm/include/asm/module.h +++ b/arch/arm/include/asm/module.h @@ -17,6 +17,8 @@ enum { ARM_SEC_INIT, ARM_SEC_DEVINIT, ARM_SEC_CORE, + ARM_SEC_EXIT, + ARM_SEC_DEVEXIT, ARM_SEC_MAX, }; struct mod_arch_specific { diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c index 0aa622e84b24..d9bd786ce23d 100644 --- a/arch/arm/kernel/module.c +++ b/arch/arm/kernel/module.c @@ -80,12 +80,20 @@ int module_frob_arch_sections(Elf_Ehdr *hdr, maps[ARM_SEC_DEVINIT].unw_sec = s; else if (strcmp(".ARM.exidx", secname) == 0) maps[ARM_SEC_CORE].unw_sec = s; + else if (strcmp(".ARM.exidx.exit.text", secname) == 0) + maps[ARM_SEC_EXIT].unw_sec = s; + else if (strcmp(".ARM.exidx.devexit.text", secname) == 0) + maps[ARM_SEC_DEVEXIT].unw_sec = s; else if (strcmp(".init.text", secname) == 0) maps[ARM_SEC_INIT].sec_text = s; else if (strcmp(".devinit.text", secname) == 0) maps[ARM_SEC_DEVINIT].sec_text = s; else if (strcmp(".text", secname) == 0) maps[ARM_SEC_CORE].sec_text = s; + else if (strcmp(".exit.text", secname) == 0) + maps[ARM_SEC_EXIT].sec_text = s; + else if (strcmp(".devexit.text", secname) == 0) + maps[ARM_SEC_DEVEXIT].sec_text = s; } #endif return 0; -- cgit v1.2.3 From 3a4b5dca53aecb16db9e007d782b2d1e757e941a Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 3 Sep 2010 10:39:59 +0100 Subject: ARM: 6355/1: hw-breakpoint: add mechanism for hooking into prefetch aborts On ARM processors with hardware breakpoint and watchpoint support, triggering these events results in a debug exception. These manifest as prefetch and data aborts respectively. arch/arm/mm/fault.c already provides hook_fault_code for hooking into data aborts dependent on the DFSR. This patch adds a new function, hook_ifault_code for hooking into prefetch aborts in the same manner. Cc: Frederic Weisbecker Cc: S. Karthikeyan Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/include/asm/system.h | 4 ++++ arch/arm/mm/fault.c | 13 +++++++++++++ 2 files changed, 17 insertions(+) (limited to 'arch/arm/include/asm') diff --git a/arch/arm/include/asm/system.h b/arch/arm/include/asm/system.h index 8ba1ccf82a02..b516cdea5ae2 100644 --- a/arch/arm/include/asm/system.h +++ b/arch/arm/include/asm/system.h @@ -85,6 +85,10 @@ void hook_fault_code(int nr, int (*fn)(unsigned long, unsigned int, struct pt_regs *), int sig, int code, const char *name); +void hook_ifault_code(int nr, int (*fn)(unsigned long, unsigned int, + struct pt_regs *), + int sig, int code, const char *name); + #define xchg(ptr,x) \ ((__typeof__(*(ptr)))__xchg((unsigned long)(x),(ptr),sizeof(*(ptr)))) diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index 23b0b03af5ea..1e21e125fe3a 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -581,6 +581,19 @@ static struct fsr_info ifsr_info[] = { { do_bad, SIGBUS, 0, "unknown 31" }, }; +void __init +hook_ifault_code(int nr, int (*fn)(unsigned long, unsigned int, struct pt_regs *), + int sig, int code, const char *name) +{ + if (nr < 0 || nr >= ARRAY_SIZE(ifsr_info)) + BUG(); + + ifsr_info[nr].fn = fn; + ifsr_info[nr].sig = sig; + ifsr_info[nr].code = code; + ifsr_info[nr].name = name; +} + asmlinkage void __exception do_PrefetchAbort(unsigned long addr, unsigned int ifsr, struct pt_regs *regs) { -- cgit v1.2.3 From f81ef4a920c8e1af75adf9f15042c2daa49d3cb3 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 3 Sep 2010 10:41:08 +0100 Subject: ARM: 6356/1: hw-breakpoint: add ARM backend for the hw-breakpoint framework The hw-breakpoint framework in the kernel requires architecture-specific support in order to install, remove, validate and manage hardware breakpoints. This patch adds initial support for this framework to the ARM architecture, but restricts the number of watchpoints to a single resource to get around the fact that the Data Fault Address Register is unknown when a watchpoint debug exception is taken. On cores with v7 debug, the Kernel can handle breakpoint and watchpoint exceptions occuring from userspace. Older cores require clients to handle the exception themselves by registering an appropriate overflow handler or, in the case of ptrace, handling the raised SIGTRAP. The memory-mapped extended debug interface is unsupported due to its unreliability in real implementations. Cc: Frederic Weisbecker Cc: S. Karthikeyan Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/include/asm/hw_breakpoint.h | 123 +++++ arch/arm/kernel/hw_breakpoint.c | 849 +++++++++++++++++++++++++++++++++++ 2 files changed, 972 insertions(+) create mode 100644 arch/arm/include/asm/hw_breakpoint.h create mode 100644 arch/arm/kernel/hw_breakpoint.c (limited to 'arch/arm/include/asm') diff --git a/arch/arm/include/asm/hw_breakpoint.h b/arch/arm/include/asm/hw_breakpoint.h new file mode 100644 index 000000000000..33048c700caa --- /dev/null +++ b/arch/arm/include/asm/hw_breakpoint.h @@ -0,0 +1,123 @@ +#ifndef _ARM_HW_BREAKPOINT_H +#define _ARM_HW_BREAKPOINT_H + +#ifdef __KERNEL__ +struct arch_hw_breakpoint_ctrl { + u32 __reserved : 9, + mismatch : 1, + : 9, + len : 8, + type : 2, + privilege : 2, + enabled : 1; +}; + +struct arch_hw_breakpoint { + u32 address; + u32 trigger; + struct perf_event *suspended_wp; + struct arch_hw_breakpoint_ctrl ctrl; +}; + +static inline u32 encode_ctrl_reg(struct arch_hw_breakpoint_ctrl ctrl) +{ + return (ctrl.mismatch << 22) | (ctrl.len << 5) | (ctrl.type << 3) | + (ctrl.privilege << 1) | ctrl.enabled; +} + +static inline void decode_ctrl_reg(u32 reg, + struct arch_hw_breakpoint_ctrl *ctrl) +{ + ctrl->enabled = reg & 0x1; + reg >>= 1; + ctrl->privilege = reg & 0x3; + reg >>= 2; + ctrl->type = reg & 0x3; + reg >>= 2; + ctrl->len = reg & 0xff; + reg >>= 17; + ctrl->mismatch = reg & 0x1; +} + +/* Debug architecture numbers. */ +#define ARM_DEBUG_ARCH_RESERVED 0 /* In case of ptrace ABI updates. */ +#define ARM_DEBUG_ARCH_V6 1 +#define ARM_DEBUG_ARCH_V6_1 2 +#define ARM_DEBUG_ARCH_V7_ECP14 3 +#define ARM_DEBUG_ARCH_V7_MM 4 + +/* Breakpoint */ +#define ARM_BREAKPOINT_EXECUTE 0 + +/* Watchpoints */ +#define ARM_BREAKPOINT_LOAD 1 +#define ARM_BREAKPOINT_STORE 2 + +/* Privilege Levels */ +#define ARM_BREAKPOINT_PRIV 1 +#define ARM_BREAKPOINT_USER 2 + +/* Lengths */ +#define ARM_BREAKPOINT_LEN_1 0x1 +#define ARM_BREAKPOINT_LEN_2 0x3 +#define ARM_BREAKPOINT_LEN_4 0xf +#define ARM_BREAKPOINT_LEN_8 0xff + +/* Limits */ +#define ARM_MAX_BRP 16 +#define ARM_MAX_WRP 16 +#define ARM_MAX_HBP_SLOTS (ARM_MAX_BRP + ARM_MAX_WRP) + +/* DSCR method of entry bits. */ +#define ARM_DSCR_MOE(x) ((x >> 2) & 0xf) +#define ARM_ENTRY_BREAKPOINT 0x1 +#define ARM_ENTRY_ASYNC_WATCHPOINT 0x2 +#define ARM_ENTRY_SYNC_WATCHPOINT 0xa + +/* DSCR monitor/halting bits. */ +#define ARM_DSCR_HDBGEN (1 << 14) +#define ARM_DSCR_MDBGEN (1 << 15) + +/* opcode2 numbers for the co-processor instructions. */ +#define ARM_OP2_BVR 4 +#define ARM_OP2_BCR 5 +#define ARM_OP2_WVR 6 +#define ARM_OP2_WCR 7 + +/* Base register numbers for the debug registers. */ +#define ARM_BASE_BVR 64 +#define ARM_BASE_BCR 80 +#define ARM_BASE_WVR 96 +#define ARM_BASE_WCR 112 + +/* Accessor macros for the debug registers. */ +#define ARM_DBG_READ(M, OP2, VAL) do {\ + asm volatile("mrc p14, 0, %0, c0," #M ", " #OP2 : "=r" (VAL));\ +} while (0) + +#define ARM_DBG_WRITE(M, OP2, VAL) do {\ + asm volatile("mcr p14, 0, %0, c0," #M ", " #OP2 : : "r" (VAL));\ +} while (0) + +struct notifier_block; +struct perf_event; +struct pmu; +struct task_struct; + +extern struct pmu perf_ops_bp; +extern int arch_bp_generic_fields(struct arch_hw_breakpoint_ctrl ctrl, + int *gen_len, int *gen_type); +extern int arch_check_bp_in_kernelspace(struct perf_event *bp); +extern int arch_validate_hwbkpt_settings(struct perf_event *bp); +extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused, + unsigned long val, void *data); +extern u8 arch_get_debug_arch(void); +extern u8 arch_get_max_wp_len(void); + +int arch_install_hw_breakpoint(struct perf_event *bp); +void arch_uninstall_hw_breakpoint(struct perf_event *bp); +void hw_breakpoint_pmu_read(struct perf_event *bp); +int hw_breakpoint_slots(int type); + +#endif /* __KERNEL__ */ +#endif /* _ARM_HW_BREAKPOINT_H */ diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c new file mode 100644 index 000000000000..54593b0c241b --- /dev/null +++ b/arch/arm/kernel/hw_breakpoint.c @@ -0,0 +1,849 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) 2009, 2010 ARM Limited + * + * Author: Will Deacon + */ + +/* + * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility, + * using the CPU's debug registers. + */ +#define pr_fmt(fmt) "hw-breakpoint: " fmt + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +/* Breakpoint currently in use for each BRP. */ +static DEFINE_PER_CPU(struct perf_event *, bp_on_reg[ARM_MAX_BRP]); + +/* Watchpoint currently in use for each WRP. */ +static DEFINE_PER_CPU(struct perf_event *, wp_on_reg[ARM_MAX_WRP]); + +/* Number of BRP/WRP registers on this CPU. */ +static int core_num_brps; +static int core_num_wrps; + +/* Debug architecture version. */ +static u8 debug_arch; + +/* Maximum supported watchpoint length. */ +static u8 max_watchpoint_len; + +/* Determine number of BRP registers available. */ +static int get_num_brps(void) +{ + u32 didr; + ARM_DBG_READ(c0, 0, didr); + return ((didr >> 24) & 0xf) + 1; +} + +/* Determine number of WRP registers available. */ +static int get_num_wrps(void) +{ + /* + * FIXME: When a watchpoint fires, the only way to work out which + * watchpoint it was is by disassembling the faulting instruction + * and working out the address of the memory access. + * + * Furthermore, we can only do this if the watchpoint was precise + * since imprecise watchpoints prevent us from calculating register + * based addresses. + * + * For the time being, we only report 1 watchpoint register so we + * always know which watchpoint fired. In the future we can either + * add a disassembler and address generation emulator, or we can + * insert a check to see if the DFAR is set on watchpoint exception + * entry [the ARM ARM states that the DFAR is UNKNOWN, but + * experience shows that it is set on some implementations]. + */ + +#if 0 + u32 didr, wrps; + ARM_DBG_READ(c0, 0, didr); + return ((didr >> 28) & 0xf) + 1; +#endif + + return 1; +} + +int hw_breakpoint_slots(int type) +{ + /* + * We can be called early, so don't rely on + * our static variables being initialised. + */ + switch (type) { + case TYPE_INST: + return get_num_brps(); + case TYPE_DATA: + return get_num_wrps(); + default: + pr_warning("unknown slot type: %d\n", type); + return 0; + } +} + +/* Determine debug architecture. */ +static u8 get_debug_arch(void) +{ + u32 didr; + + /* Do we implement the extended CPUID interface? */ + if (((read_cpuid_id() >> 16) & 0xf) != 0xf) { + pr_warning("CPUID feature registers not supported. " + "Assuming v6 debug is present.\n"); + return ARM_DEBUG_ARCH_V6; + } + + ARM_DBG_READ(c0, 0, didr); + return (didr >> 16) & 0xf; +} + +/* Does this core support mismatch breakpoints? */ +static int core_has_mismatch_bps(void) +{ + return debug_arch >= ARM_DEBUG_ARCH_V7_ECP14 && core_num_brps > 1; +} + +u8 arch_get_debug_arch(void) +{ + return debug_arch; +} + +#define READ_WB_REG_CASE(OP2, M, VAL) \ + case ((OP2 << 4) + M): \ + ARM_DBG_READ(c ## M, OP2, VAL); \ + break + +#define WRITE_WB_REG_CASE(OP2, M, VAL) \ + case ((OP2 << 4) + M): \ + ARM_DBG_WRITE(c ## M, OP2, VAL);\ + break + +#define GEN_READ_WB_REG_CASES(OP2, VAL) \ + READ_WB_REG_CASE(OP2, 0, VAL); \ + READ_WB_REG_CASE(OP2, 1, VAL); \ + READ_WB_REG_CASE(OP2, 2, VAL); \ + READ_WB_REG_CASE(OP2, 3, VAL); \ + READ_WB_REG_CASE(OP2, 4, VAL); \ + READ_WB_REG_CASE(OP2, 5, VAL); \ + READ_WB_REG_CASE(OP2, 6, VAL); \ + READ_WB_REG_CASE(OP2, 7, VAL); \ + READ_WB_REG_CASE(OP2, 8, VAL); \ + READ_WB_REG_CASE(OP2, 9, VAL); \ + READ_WB_REG_CASE(OP2, 10, VAL); \ + READ_WB_REG_CASE(OP2, 11, VAL); \ + READ_WB_REG_CASE(OP2, 12, VAL); \ + READ_WB_REG_CASE(OP2, 13, VAL); \ + READ_WB_REG_CASE(OP2, 14, VAL); \ + READ_WB_REG_CASE(OP2, 15, VAL) + +#define GEN_WRITE_WB_REG_CASES(OP2, VAL) \ + WRITE_WB_REG_CASE(OP2, 0, VAL); \ + WRITE_WB_REG_CASE(OP2, 1, VAL); \ + WRITE_WB_REG_CASE(OP2, 2, VAL); \ + WRITE_WB_REG_CASE(OP2, 3, VAL); \ + WRITE_WB_REG_CASE(OP2, 4, VAL); \ + WRITE_WB_REG_CASE(OP2, 5, VAL); \ + WRITE_WB_REG_CASE(OP2, 6, VAL); \ + WRITE_WB_REG_CASE(OP2, 7, VAL); \ + WRITE_WB_REG_CASE(OP2, 8, VAL); \ + WRITE_WB_REG_CASE(OP2, 9, VAL); \ + WRITE_WB_REG_CASE(OP2, 10, VAL); \ + WRITE_WB_REG_CASE(OP2, 11, VAL); \ + WRITE_WB_REG_CASE(OP2, 12, VAL); \ + WRITE_WB_REG_CASE(OP2, 13, VAL); \ + WRITE_WB_REG_CASE(OP2, 14, VAL); \ + WRITE_WB_REG_CASE(OP2, 15, VAL) + +static u32 read_wb_reg(int n) +{ + u32 val = 0; + + switch (n) { + GEN_READ_WB_REG_CASES(ARM_OP2_BVR, val); + GEN_READ_WB_REG_CASES(ARM_OP2_BCR, val); + GEN_READ_WB_REG_CASES(ARM_OP2_WVR, val); + GEN_READ_WB_REG_CASES(ARM_OP2_WCR, val); + default: + pr_warning("attempt to read from unknown breakpoint " + "register %d\n", n); + } + + return val; +} + +static void write_wb_reg(int n, u32 val) +{ + switch (n) { + GEN_WRITE_WB_REG_CASES(ARM_OP2_BVR, val); + GEN_WRITE_WB_REG_CASES(ARM_OP2_BCR, val); + GEN_WRITE_WB_REG_CASES(ARM_OP2_WVR, val); + GEN_WRITE_WB_REG_CASES(ARM_OP2_WCR, val); + default: + pr_warning("attempt to write to unknown breakpoint " + "register %d\n", n); + } + isb(); +} + +/* + * In order to access the breakpoint/watchpoint control registers, + * we must be running in debug monitor mode. Unfortunately, we can + * be put into halting debug mode at any time by an external debugger + * but there is nothing we can do to prevent that. + */ +static int enable_monitor_mode(void) +{ + u32 dscr; + int ret = 0; + + ARM_DBG_READ(c1, 0, dscr); + + /* Ensure that halting mode is disabled. */ + if (WARN_ONCE(dscr & ARM_DSCR_HDBGEN, "halting debug mode enabled." + "Unable to access hardware resources.")) { + ret = -EPERM; + goto out; + } + + /* Write to the corresponding DSCR. */ + switch (debug_arch) { + case ARM_DEBUG_ARCH_V6: + case ARM_DEBUG_ARCH_V6_1: + ARM_DBG_WRITE(c1, 0, (dscr | ARM_DSCR_MDBGEN)); + break; + case ARM_DEBUG_ARCH_V7_ECP14: + ARM_DBG_WRITE(c2, 2, (dscr | ARM_DSCR_MDBGEN)); + break; + default: + ret = -ENODEV; + goto out; + } + + /* Check that the write made it through. */ + ARM_DBG_READ(c1, 0, dscr); + if (WARN_ONCE(!(dscr & ARM_DSCR_MDBGEN), + "failed to enable monitor mode.")) { + ret = -EPERM; + } + +out: + return ret; +} + +/* + * Check if 8-bit byte-address select is available. + * This clobbers WRP 0. + */ +static u8 get_max_wp_len(void) +{ + u32 ctrl_reg; + struct arch_hw_breakpoint_ctrl ctrl; + u8 size = 4; + + if (debug_arch < ARM_DEBUG_ARCH_V7_ECP14) + goto out; + + if (enable_monitor_mode()) + goto out; + + memset(&ctrl, 0, sizeof(ctrl)); + ctrl.len = ARM_BREAKPOINT_LEN_8; + ctrl_reg = encode_ctrl_reg(ctrl); + + write_wb_reg(ARM_BASE_WVR, 0); + write_wb_reg(ARM_BASE_WCR, ctrl_reg); + if ((read_wb_reg(ARM_BASE_WCR) & ctrl_reg) == ctrl_reg) + size = 8; + +out: + return size; +} + +u8 arch_get_max_wp_len(void) +{ + return max_watchpoint_len; +} + +/* + * Handler for reactivating a suspended watchpoint when the single + * step `mismatch' breakpoint is triggered. + */ +static void wp_single_step_handler(struct perf_event *bp, int unused, + struct perf_sample_data *data, + struct pt_regs *regs) +{ + perf_event_enable(counter_arch_bp(bp)->suspended_wp); + unregister_hw_breakpoint(bp); +} + +static int bp_is_single_step(struct perf_event *bp) +{ + return bp->overflow_handler == wp_single_step_handler; +} + +/* + * Install a perf counter breakpoint. + */ +int arch_install_hw_breakpoint(struct perf_event *bp) +{ + struct arch_hw_breakpoint *info = counter_arch_bp(bp); + struct perf_event **slot, **slots; + int i, max_slots, ctrl_base, val_base, ret = 0; + + /* Ensure that we are in monitor mode and halting mode is disabled. */ + ret = enable_monitor_mode(); + if (ret) + goto out; + + if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE) { + /* Breakpoint */ + ctrl_base = ARM_BASE_BCR; + val_base = ARM_BASE_BVR; + slots = __get_cpu_var(bp_on_reg); + max_slots = core_num_brps - 1; + + if (bp_is_single_step(bp)) { + info->ctrl.mismatch = 1; + i = max_slots; + slots[i] = bp; + goto setup; + } + } else { + /* Watchpoint */ + ctrl_base = ARM_BASE_WCR; + val_base = ARM_BASE_WVR; + slots = __get_cpu_var(wp_on_reg); + max_slots = core_num_wrps; + } + + for (i = 0; i < max_slots; ++i) { + slot = &slots[i]; + + if (!*slot) { + *slot = bp; + break; + } + } + + if (WARN_ONCE(i == max_slots, "Can't find any breakpoint slot")) { + ret = -EBUSY; + goto out; + } + +setup: + /* Setup the address register. */ + write_wb_reg(val_base + i, info->address); + + /* Setup the control register. */ + write_wb_reg(ctrl_base + i, encode_ctrl_reg(info->ctrl) | 0x1); + +out: + return ret; +} + +void arch_uninstall_hw_breakpoint(struct perf_event *bp) +{ + struct arch_hw_breakpoint *info = counter_arch_bp(bp); + struct perf_event **slot, **slots; + int i, max_slots, base; + + if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE) { + /* Breakpoint */ + base = ARM_BASE_BCR; + slots = __get_cpu_var(bp_on_reg); + max_slots = core_num_brps - 1; + + if (bp_is_single_step(bp)) { + i = max_slots; + slots[i] = NULL; + goto reset; + } + } else { + /* Watchpoint */ + base = ARM_BASE_WCR; + slots = __get_cpu_var(wp_on_reg); + max_slots = core_num_wrps; + } + + /* Remove the breakpoint. */ + for (i = 0; i < max_slots; ++i) { + slot = &slots[i]; + + if (*slot == bp) { + *slot = NULL; + break; + } + } + + if (WARN_ONCE(i == max_slots, "Can't find any breakpoint slot")) + return; + +reset: + /* Reset the control register. */ + write_wb_reg(base + i, 0); +} + +static int get_hbp_len(u8 hbp_len) +{ + unsigned int len_in_bytes = 0; + + switch (hbp_len) { + case ARM_BREAKPOINT_LEN_1: + len_in_bytes = 1; + break; + case ARM_BREAKPOINT_LEN_2: + len_in_bytes = 2; + break; + case ARM_BREAKPOINT_LEN_4: + len_in_bytes = 4; + break; + case ARM_BREAKPOINT_LEN_8: + len_in_bytes = 8; + break; + } + + return len_in_bytes; +} + +/* + * Check whether bp virtual address is in kernel space. + */ +int arch_check_bp_in_kernelspace(struct perf_event *bp) +{ + unsigned int len; + unsigned long va; + struct arch_hw_breakpoint *info = counter_arch_bp(bp); + + va = info->address; + len = get_hbp_len(info->ctrl.len); + + return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE); +} + +/* + * Extract generic type and length encodings from an arch_hw_breakpoint_ctrl. + * Hopefully this will disappear when ptrace can bypass the conversion + * to generic breakpoint descriptions. + */ +int arch_bp_generic_fields(struct arch_hw_breakpoint_ctrl ctrl, + int *gen_len, int *gen_type) +{ + /* Type */ + switch (ctrl.type) { + case ARM_BREAKPOINT_EXECUTE: + *gen_type = HW_BREAKPOINT_X; + break; + case ARM_BREAKPOINT_LOAD: + *gen_type = HW_BREAKPOINT_R; + break; + case ARM_BREAKPOINT_STORE: + *gen_type = HW_BREAKPOINT_W; + break; + case ARM_BREAKPOINT_LOAD | ARM_BREAKPOINT_STORE: + *gen_type = HW_BREAKPOINT_RW; + break; + default: + return -EINVAL; + } + + /* Len */ + switch (ctrl.len) { + case ARM_BREAKPOINT_LEN_1: + *gen_len = HW_BREAKPOINT_LEN_1; + break; + case ARM_BREAKPOINT_LEN_2: + *gen_len = HW_BREAKPOINT_LEN_2; + break; + case ARM_BREAKPOINT_LEN_4: + *gen_len = HW_BREAKPOINT_LEN_4; + break; + case ARM_BREAKPOINT_LEN_8: + *gen_len = HW_BREAKPOINT_LEN_8; + break; + default: + return -EINVAL; + } + + return 0; +} + +/* + * Construct an arch_hw_breakpoint from a perf_event. + */ +static int arch_build_bp_info(struct perf_event *bp) +{ + struct arch_hw_breakpoint *info = counter_arch_bp(bp); + + /* Type */ + switch (bp->attr.bp_type) { + case HW_BREAKPOINT_X: + info->ctrl.type = ARM_BREAKPOINT_EXECUTE; + break; + case HW_BREAKPOINT_R: + info->ctrl.type = ARM_BREAKPOINT_LOAD; + break; + case HW_BREAKPOINT_W: + info->ctrl.type = ARM_BREAKPOINT_STORE; + break; + case HW_BREAKPOINT_RW: + info->ctrl.type = ARM_BREAKPOINT_LOAD | ARM_BREAKPOINT_STORE; + break; + default: + return -EINVAL; + } + + /* Len */ + switch (bp->attr.bp_len) { + case HW_BREAKPOINT_LEN_1: + info->ctrl.len = ARM_BREAKPOINT_LEN_1; + break; + case HW_BREAKPOINT_LEN_2: + info->ctrl.len = ARM_BREAKPOINT_LEN_2; + break; + case HW_BREAKPOINT_LEN_4: + info->ctrl.len = ARM_BREAKPOINT_LEN_4; + break; + case HW_BREAKPOINT_LEN_8: + info->ctrl.len = ARM_BREAKPOINT_LEN_8; + if ((info->ctrl.type != ARM_BREAKPOINT_EXECUTE) + && max_watchpoint_len >= 8) + break; + default: + return -EINVAL; + } + + /* Address */ + info->address = bp->attr.bp_addr; + + /* Privilege */ + info->ctrl.privilege = ARM_BREAKPOINT_USER; + if (arch_check_bp_in_kernelspace(bp) && !bp_is_single_step(bp)) + info->ctrl.privilege |= ARM_BREAKPOINT_PRIV; + + /* Enabled? */ + info->ctrl.enabled = !bp->attr.disabled; + + /* Mismatch */ + info->ctrl.mismatch = 0; + + return 0; +} + +/* + * Validate the arch-specific HW Breakpoint register settings. + */ +int arch_validate_hwbkpt_settings(struct perf_event *bp) +{ + struct arch_hw_breakpoint *info = counter_arch_bp(bp); + int ret = 0; + u32 bytelen, max_len, offset, alignment_mask = 0x3; + + /* Build the arch_hw_breakpoint. */ + ret = arch_build_bp_info(bp); + if (ret) + goto out; + + /* Check address alignment. */ + if (info->ctrl.len == ARM_BREAKPOINT_LEN_8) + alignment_mask = 0x7; + if (info->address & alignment_mask) { + /* + * Try to fix the alignment. This may result in a length + * that is too large, so we must check for that. + */ + bytelen = get_hbp_len(info->ctrl.len); + max_len = info->ctrl.type == ARM_BREAKPOINT_EXECUTE ? 4 : + max_watchpoint_len; + + if (max_len >= 8) + offset = info->address & 0x7; + else + offset = info->address & 0x3; + + if (bytelen > (1 << ((max_len - (offset + 1)) >> 1))) { + ret = -EFBIG; + goto out; + } + + info->ctrl.len <<= offset; + info->address &= ~offset; + + pr_debug("breakpoint alignment fixup: length = 0x%x, " + "address = 0x%x\n", info->ctrl.len, info->address); + } + + /* + * Currently we rely on an overflow handler to take + * care of single-stepping the breakpoint when it fires. + * In the case of userspace breakpoints on a core with V7 debug, + * we can use the mismatch feature as a poor-man's hardware single-step. + */ + if (WARN_ONCE(!bp->overflow_handler && + (arch_check_bp_in_kernelspace(bp) || !core_has_mismatch_bps()), + "overflow handler required but none found")) { + ret = -EINVAL; + goto out; + } +out: + return ret; +} + +static void update_mismatch_flag(int idx, int flag) +{ + struct perf_event *bp = __get_cpu_var(bp_on_reg[idx]); + struct arch_hw_breakpoint *info; + + if (bp == NULL) + return; + + info = counter_arch_bp(bp); + + /* Update the mismatch field to enter/exit `single-step' mode */ + if (!bp->overflow_handler && info->ctrl.mismatch != flag) { + info->ctrl.mismatch = flag; + write_wb_reg(ARM_BASE_BCR + idx, encode_ctrl_reg(info->ctrl) | 0x1); + } +} + +static void watchpoint_handler(unsigned long unknown, struct pt_regs *regs) +{ + int i; + struct perf_event *bp, **slots = __get_cpu_var(wp_on_reg); + struct arch_hw_breakpoint *info; + struct perf_event_attr attr; + + /* Without a disassembler, we can only handle 1 watchpoint. */ + BUG_ON(core_num_wrps > 1); + + hw_breakpoint_init(&attr); + attr.bp_addr = regs->ARM_pc & ~0x3; + attr.bp_len = HW_BREAKPOINT_LEN_4; + attr.bp_type = HW_BREAKPOINT_X; + + for (i = 0; i < core_num_wrps; ++i) { + rcu_read_lock(); + + if (slots[i] == NULL) { + rcu_read_unlock(); + continue; + } + + /* + * The DFAR is an unknown value. Since we only allow a + * single watchpoint, we can set the trigger to the lowest + * possible faulting address. + */ + info = counter_arch_bp(slots[i]); + info->trigger = slots[i]->attr.bp_addr; + pr_debug("watchpoint fired: address = 0x%x\n", info->trigger); + perf_bp_event(slots[i], regs); + + /* + * If no overflow handler is present, insert a temporary + * mismatch breakpoint so we can single-step over the + * watchpoint trigger. + */ + if (!slots[i]->overflow_handler) { + bp = register_user_hw_breakpoint(&attr, + wp_single_step_handler, + current); + counter_arch_bp(bp)->suspended_wp = slots[i]; + perf_event_disable(slots[i]); + } + + rcu_read_unlock(); + } +} + +static void breakpoint_handler(unsigned long unknown, struct pt_regs *regs) +{ + int i; + int mismatch; + u32 ctrl_reg, val, addr; + struct perf_event *bp, **slots = __get_cpu_var(bp_on_reg); + struct arch_hw_breakpoint *info; + struct arch_hw_breakpoint_ctrl ctrl; + + /* The exception entry code places the amended lr in the PC. */ + addr = regs->ARM_pc; + + for (i = 0; i < core_num_brps; ++i) { + rcu_read_lock(); + + bp = slots[i]; + + if (bp == NULL) { + rcu_read_unlock(); + continue; + } + + mismatch = 0; + + /* Check if the breakpoint value matches. */ + val = read_wb_reg(ARM_BASE_BVR + i); + if (val != (addr & ~0x3)) + goto unlock; + + /* Possible match, check the byte address select to confirm. */ + ctrl_reg = read_wb_reg(ARM_BASE_BCR + i); + decode_ctrl_reg(ctrl_reg, &ctrl); + if ((1 << (addr & 0x3)) & ctrl.len) { + mismatch = 1; + info = counter_arch_bp(bp); + info->trigger = addr; + } + +unlock: + if ((mismatch && !info->ctrl.mismatch) || bp_is_single_step(bp)) { + pr_debug("breakpoint fired: address = 0x%x\n", addr); + perf_bp_event(bp, regs); + } + + update_mismatch_flag(i, mismatch); + rcu_read_unlock(); + } +} + +/* + * Called from either the Data Abort Handler [watchpoint] or the + * Prefetch Abort Handler [breakpoint]. + */ +static int hw_breakpoint_pending(unsigned long addr, unsigned int fsr, + struct pt_regs *regs) +{ + int ret = 1; /* Unhandled fault. */ + u32 dscr; + + /* We only handle watchpoints and hardware breakpoints. */ + ARM_DBG_READ(c1, 0, dscr); + + /* Perform perf callbacks. */ + switch (ARM_DSCR_MOE(dscr)) { + case ARM_ENTRY_BREAKPOINT: + breakpoint_handler(addr, regs); + break; + case ARM_ENTRY_ASYNC_WATCHPOINT: + WARN_ON("Asynchronous watchpoint exception taken. " + "Debugging results may be unreliable"); + case ARM_ENTRY_SYNC_WATCHPOINT: + watchpoint_handler(addr, regs); + break; + default: + goto out; + } + + ret = 0; +out: + return ret; +} + +/* + * One-time initialisation. + */ +static void __init reset_ctrl_regs(void *unused) +{ + int i; + + if (enable_monitor_mode()) + return; + + for (i = 0; i < core_num_brps; ++i) { + write_wb_reg(ARM_BASE_BCR + i, 0UL); + write_wb_reg(ARM_BASE_BVR + i, 0UL); + } + + for (i = 0; i < core_num_wrps; ++i) { + write_wb_reg(ARM_BASE_WCR + i, 0UL); + write_wb_reg(ARM_BASE_WVR + i, 0UL); + } +} + +static int __init arch_hw_breakpoint_init(void) +{ + int ret = 0; + u32 dscr; + + debug_arch = get_debug_arch(); + + if (debug_arch > ARM_DEBUG_ARCH_V7_ECP14) { + pr_info("debug architecture 0x%x unsupported.\n", debug_arch); + ret = -ENODEV; + goto out; + } + + /* Determine how many BRPs/WRPs are available. */ + core_num_brps = get_num_brps(); + core_num_wrps = get_num_wrps(); + + pr_info("found %d breakpoint and %d watchpoint registers.\n", + core_num_brps, core_num_wrps); + + if (core_has_mismatch_bps()) + pr_info("1 breakpoint reserved for watchpoint single-step.\n"); + + ARM_DBG_READ(c1, 0, dscr); + if (dscr & ARM_DSCR_HDBGEN) { + pr_warning("halting debug mode enabled. Assuming maximum " + "watchpoint size of 4 bytes."); + } else { + /* Work out the maximum supported watchpoint length. */ + max_watchpoint_len = get_max_wp_len(); + pr_info("maximum watchpoint size is %u bytes.\n", + max_watchpoint_len); + + /* + * Reset the breakpoint resources. We assume that a halting + * debugger will leave the world in a nice state for us. + */ + smp_call_function(reset_ctrl_regs, NULL, 1); + reset_ctrl_regs(NULL); + } + + /* Register debug fault handler. */ + hook_fault_code(2, hw_breakpoint_pending, SIGTRAP, TRAP_HWBKPT, + "watchpoint debug exception"); + hook_ifault_code(2, hw_breakpoint_pending, SIGTRAP, TRAP_HWBKPT, + "breakpoint debug exception"); + +out: + return ret; +} +arch_initcall(arch_hw_breakpoint_init); + +void hw_breakpoint_pmu_read(struct perf_event *bp) +{ +} + +/* + * Dummy function to register with die_notifier. + */ +int hw_breakpoint_exceptions_notify(struct notifier_block *unused, + unsigned long val, void *data) +{ + return NOTIFY_DONE; +} -- cgit v1.2.3 From 864232fa1a2f8dfe003438ef0851a56722740f3e Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 3 Sep 2010 10:42:55 +0100 Subject: ARM: 6357/1: hw-breakpoint: add new ptrace requests for hw-breakpoint interaction For debuggers to take advantage of the hw-breakpoint framework in the kernel, it is necessary to expose the API calls via a ptrace interface. This patch exposes the hardware breakpoints framework as a collection of virtual registers, accesible using PTRACE_SETHBPREGS and PTRACE_GETHBPREGS requests. The breakpoints are stored in the debug_info struct of the running thread. Cc: Frederic Weisbecker Cc: S. Karthikeyan Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/include/asm/hw_breakpoint.h | 12 +- arch/arm/include/asm/processor.h | 4 + arch/arm/include/asm/ptrace.h | 2 + arch/arm/kernel/process.c | 5 + arch/arm/kernel/ptrace.c | 239 +++++++++++++++++++++++++++++++++++ 5 files changed, 261 insertions(+), 1 deletion(-) (limited to 'arch/arm/include/asm') diff --git a/arch/arm/include/asm/hw_breakpoint.h b/arch/arm/include/asm/hw_breakpoint.h index 33048c700caa..4d8ae9d67abe 100644 --- a/arch/arm/include/asm/hw_breakpoint.h +++ b/arch/arm/include/asm/hw_breakpoint.h @@ -2,6 +2,11 @@ #define _ARM_HW_BREAKPOINT_H #ifdef __KERNEL__ + +struct task_struct; + +#ifdef CONFIG_HAVE_HW_BREAKPOINT + struct arch_hw_breakpoint_ctrl { u32 __reserved : 9, mismatch : 1, @@ -102,7 +107,6 @@ static inline void decode_ctrl_reg(u32 reg, struct notifier_block; struct perf_event; struct pmu; -struct task_struct; extern struct pmu perf_ops_bp; extern int arch_bp_generic_fields(struct arch_hw_breakpoint_ctrl ctrl, @@ -111,13 +115,19 @@ extern int arch_check_bp_in_kernelspace(struct perf_event *bp); extern int arch_validate_hwbkpt_settings(struct perf_event *bp); extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused, unsigned long val, void *data); + extern u8 arch_get_debug_arch(void); extern u8 arch_get_max_wp_len(void); +extern void clear_ptrace_hw_breakpoint(struct task_struct *tsk); int arch_install_hw_breakpoint(struct perf_event *bp); void arch_uninstall_hw_breakpoint(struct perf_event *bp); void hw_breakpoint_pmu_read(struct perf_event *bp); int hw_breakpoint_slots(int type); +#else +static inline void clear_ptrace_hw_breakpoint(struct task_struct *tsk) {} + +#endif /* CONFIG_HAVE_HW_BREAKPOINT */ #endif /* __KERNEL__ */ #endif /* _ARM_HW_BREAKPOINT_H */ diff --git a/arch/arm/include/asm/processor.h b/arch/arm/include/asm/processor.h index 7bed3daf83b8..67357baaeeeb 100644 --- a/arch/arm/include/asm/processor.h +++ b/arch/arm/include/asm/processor.h @@ -19,6 +19,7 @@ #ifdef __KERNEL__ +#include #include #include @@ -41,6 +42,9 @@ struct debug_entry { struct debug_info { int nsaved; struct debug_entry bp[2]; +#ifdef CONFIG_HAVE_HW_BREAKPOINT + struct perf_event *hbp[ARM_MAX_HBP_SLOTS]; +#endif }; struct thread_struct { diff --git a/arch/arm/include/asm/ptrace.h b/arch/arm/include/asm/ptrace.h index 7ce15eb15f72..783d50f32618 100644 --- a/arch/arm/include/asm/ptrace.h +++ b/arch/arm/include/asm/ptrace.h @@ -29,6 +29,8 @@ #define PTRACE_SETCRUNCHREGS 26 #define PTRACE_GETVFPREGS 27 #define PTRACE_SETVFPREGS 28 +#define PTRACE_GETHBPREGS 29 +#define PTRACE_SETHBPREGS 30 /* * PSR bits diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index 401e38be1f78..974af1c3eb1d 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -317,6 +318,8 @@ void flush_thread(void) struct thread_info *thread = current_thread_info(); struct task_struct *tsk = current; + flush_ptrace_hw_breakpoint(tsk); + memset(thread->used_cp, 0, sizeof(thread->used_cp)); memset(&tsk->thread.debug, 0, sizeof(struct debug_info)); memset(&thread->fpstate, 0, sizeof(union fp_state)); @@ -345,6 +348,8 @@ copy_thread(unsigned long clone_flags, unsigned long stack_start, thread->cpu_context.sp = (unsigned long)childregs; thread->cpu_context.pc = (unsigned long)ret_from_fork; + clear_ptrace_hw_breakpoint(p); + if (clone_flags & CLONE_SETTLS) thread->tp_value = regs->ARM_r3; diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c index f99d489822d5..e0cb6370ed14 100644 --- a/arch/arm/kernel/ptrace.c +++ b/arch/arm/kernel/ptrace.c @@ -19,6 +19,8 @@ #include #include #include +#include +#include #include #include @@ -847,6 +849,232 @@ static int ptrace_setvfpregs(struct task_struct *tsk, void __user *data) } #endif +#ifdef CONFIG_HAVE_HW_BREAKPOINT +/* + * Convert a virtual register number into an index for a thread_info + * breakpoint array. Breakpoints are identified using positive numbers + * whilst watchpoints are negative. The registers are laid out as pairs + * of (address, control), each pair mapping to a unique hw_breakpoint struct. + * Register 0 is reserved for describing resource information. + */ +static int ptrace_hbp_num_to_idx(long num) +{ + if (num < 0) + num = (ARM_MAX_BRP << 1) - num; + return (num - 1) >> 1; +} + +/* + * Returns the virtual register number for the address of the + * breakpoint at index idx. + */ +static long ptrace_hbp_idx_to_num(int idx) +{ + long mid = ARM_MAX_BRP << 1; + long num = (idx << 1) + 1; + return num > mid ? mid - num : num; +} + +/* + * Handle hitting a HW-breakpoint. + */ +static void ptrace_hbptriggered(struct perf_event *bp, int unused, + struct perf_sample_data *data, + struct pt_regs *regs) +{ + struct arch_hw_breakpoint *bkpt = counter_arch_bp(bp); + long num; + int i; + siginfo_t info; + + for (i = 0; i < ARM_MAX_HBP_SLOTS; ++i) + if (current->thread.debug.hbp[i] == bp) + break; + + num = (i == ARM_MAX_HBP_SLOTS) ? 0 : ptrace_hbp_idx_to_num(i); + + info.si_signo = SIGTRAP; + info.si_errno = (int)num; + info.si_code = TRAP_HWBKPT; + info.si_addr = (void __user *)(bkpt->trigger); + + force_sig_info(SIGTRAP, &info, current); +} + +/* + * Set ptrace breakpoint pointers to zero for this task. + * This is required in order to prevent child processes from unregistering + * breakpoints held by their parent. + */ +void clear_ptrace_hw_breakpoint(struct task_struct *tsk) +{ + memset(tsk->thread.debug.hbp, 0, sizeof(tsk->thread.debug.hbp)); +} + +/* + * Unregister breakpoints from this task and reset the pointers in + * the thread_struct. + */ +void flush_ptrace_hw_breakpoint(struct task_struct *tsk) +{ + int i; + struct thread_struct *t = &tsk->thread; + + for (i = 0; i < ARM_MAX_HBP_SLOTS; i++) { + if (t->debug.hbp[i]) { + unregister_hw_breakpoint(t->debug.hbp[i]); + t->debug.hbp[i] = NULL; + } + } +} + +static u32 ptrace_get_hbp_resource_info(void) +{ + u8 num_brps, num_wrps, debug_arch, wp_len; + u32 reg = 0; + + num_brps = hw_breakpoint_slots(TYPE_INST); + num_wrps = hw_breakpoint_slots(TYPE_DATA); + debug_arch = arch_get_debug_arch(); + wp_len = arch_get_max_wp_len(); + + reg |= debug_arch; + reg <<= 8; + reg |= wp_len; + reg <<= 8; + reg |= num_wrps; + reg <<= 8; + reg |= num_brps; + + return reg; +} + +static struct perf_event *ptrace_hbp_create(struct task_struct *tsk, int type) +{ + struct perf_event_attr attr; + + ptrace_breakpoint_init(&attr); + + /* Initialise fields to sane defaults. */ + attr.bp_addr = 0; + attr.bp_len = HW_BREAKPOINT_LEN_4; + attr.bp_type = type; + attr.disabled = 1; + + return register_user_hw_breakpoint(&attr, ptrace_hbptriggered, tsk); +} + +static int ptrace_gethbpregs(struct task_struct *tsk, long num, + unsigned long __user *data) +{ + u32 reg; + int idx, ret = 0; + struct perf_event *bp; + struct arch_hw_breakpoint_ctrl arch_ctrl; + + if (num == 0) { + reg = ptrace_get_hbp_resource_info(); + } else { + idx = ptrace_hbp_num_to_idx(num); + if (idx < 0 || idx >= ARM_MAX_HBP_SLOTS) { + ret = -EINVAL; + goto out; + } + + bp = tsk->thread.debug.hbp[idx]; + if (!bp) { + reg = 0; + goto put; + } + + arch_ctrl = counter_arch_bp(bp)->ctrl; + + /* + * Fix up the len because we may have adjusted it + * to compensate for an unaligned address. + */ + while (!(arch_ctrl.len & 0x1)) + arch_ctrl.len >>= 1; + + if (idx & 0x1) + reg = encode_ctrl_reg(arch_ctrl); + else + reg = bp->attr.bp_addr; + } + +put: + if (put_user(reg, data)) + ret = -EFAULT; + +out: + return ret; +} + +static int ptrace_sethbpregs(struct task_struct *tsk, long num, + unsigned long __user *data) +{ + int idx, gen_len, gen_type, implied_type, ret = 0; + u32 user_val; + struct perf_event *bp; + struct arch_hw_breakpoint_ctrl ctrl; + struct perf_event_attr attr; + + if (num == 0) + goto out; + else if (num < 0) + implied_type = HW_BREAKPOINT_RW; + else + implied_type = HW_BREAKPOINT_X; + + idx = ptrace_hbp_num_to_idx(num); + if (idx < 0 || idx >= ARM_MAX_HBP_SLOTS) { + ret = -EINVAL; + goto out; + } + + if (get_user(user_val, data)) { + ret = -EFAULT; + goto out; + } + + bp = tsk->thread.debug.hbp[idx]; + if (!bp) { + bp = ptrace_hbp_create(tsk, implied_type); + if (IS_ERR(bp)) { + ret = PTR_ERR(bp); + goto out; + } + tsk->thread.debug.hbp[idx] = bp; + } + + attr = bp->attr; + + if (num & 0x1) { + /* Address */ + attr.bp_addr = user_val; + } else { + /* Control */ + decode_ctrl_reg(user_val, &ctrl); + ret = arch_bp_generic_fields(ctrl, &gen_len, &gen_type); + if (ret) + goto out; + + if ((gen_type & implied_type) != gen_type) { + ret = -EINVAL; + goto out; + } + + attr.bp_len = gen_len; + attr.bp_type = gen_type; + attr.disabled = !ctrl.enabled; + } + + ret = modify_user_hw_breakpoint(bp, &attr); +out: + return ret; +} +#endif + long arch_ptrace(struct task_struct *child, long request, long addr, long data) { int ret; @@ -916,6 +1144,17 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) break; #endif +#ifdef CONFIG_HAVE_HW_BREAKPOINT + case PTRACE_GETHBPREGS: + ret = ptrace_gethbpregs(child, addr, + (unsigned long __user *)data); + break; + case PTRACE_SETHBPREGS: + ret = ptrace_sethbpregs(child, addr, + (unsigned long __user *)data); + break; +#endif + default: ret = ptrace_request(child, request, addr, data); break; -- cgit v1.2.3 From 0485e18bc4112a3b548baa314c24bfbece4d156b Mon Sep 17 00:00:00 2001 From: Russell King Date: Sun, 5 Sep 2010 12:19:50 +0100 Subject: Revert "[ARM] pxa: remove now unnecessary dma_needs_bounce()" This reverts commit 4fa5518, which causes a compilation regression for IXP4xx platforms. Reported-by: Richard Cochran Acked-by: Eric Miao Signed-off-by: Russell King --- arch/arm/common/it8152.c | 8 ++++++++ arch/arm/include/asm/dma-mapping.h | 8 -------- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'arch/arm/include/asm') diff --git a/arch/arm/common/it8152.c b/arch/arm/common/it8152.c index 6c0913562455..7974baacafce 100644 --- a/arch/arm/common/it8152.c +++ b/arch/arm/common/it8152.c @@ -263,6 +263,14 @@ static int it8152_pci_platform_notify_remove(struct device *dev) return 0; } +int dma_needs_bounce(struct device *dev, dma_addr_t dma_addr, size_t size) +{ + dev_dbg(dev, "%s: dma_addr %08x, size %08x\n", + __func__, dma_addr, size); + return (dev->bus == &pci_bus_type) && + ((dma_addr + size - PHYS_OFFSET) >= SZ_64M); +} + int __init it8152_pci_setup(int nr, struct pci_sys_data *sys) { it8152_io.start = IT8152_IO_BASE + 0x12000; diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h index c226fe10553e..c568da7dcae4 100644 --- a/arch/arm/include/asm/dma-mapping.h +++ b/arch/arm/include/asm/dma-mapping.h @@ -288,15 +288,7 @@ extern void dmabounce_unregister_dev(struct device *); * DMA access and 1 if the buffer needs to be bounced. * */ -#ifdef CONFIG_SA1111 extern int dma_needs_bounce(struct device*, dma_addr_t, size_t); -#else -static inline int dma_needs_bounce(struct device *dev, dma_addr_t addr, - size_t size) -{ - return 0; -} -#endif /* * The DMA API, implemented by dmabounce.c. See below for descriptions. -- cgit v1.2.3 From c01778001a4f5ad9c62d882776235f3f31922fdd Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 13 Sep 2010 15:57:36 +0100 Subject: ARM: 6379/1: Assume new page cache pages have dirty D-cache There are places in Linux where writes to newly allocated page cache pages happen without a subsequent call to flush_dcache_page() (several PIO drivers including USB HCD). This patch changes the meaning of PG_arch_1 to be PG_dcache_clean and always flush the D-cache for a newly mapped page in update_mmu_cache(). The patch also sets the PG_arch_1 bit in the DMA cache maintenance function to avoid additional cache flushing in update_mmu_cache(). Tested-by: Rabin Vincent Cc: Nicolas Pitre Signed-off-by: Catalin Marinas Signed-off-by: Russell King --- arch/arm/include/asm/cacheflush.h | 6 +++--- arch/arm/include/asm/tlbflush.h | 2 +- arch/arm/mm/copypage-v4mc.c | 2 +- arch/arm/mm/copypage-v6.c | 2 +- arch/arm/mm/copypage-xscale.c | 2 +- arch/arm/mm/dma-mapping.c | 6 ++++++ arch/arm/mm/fault-armv.c | 4 ++-- arch/arm/mm/flush.c | 3 ++- 8 files changed, 17 insertions(+), 10 deletions(-) (limited to 'arch/arm/include/asm') diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h index 4656a24058d2..d3730f0f4b50 100644 --- a/arch/arm/include/asm/cacheflush.h +++ b/arch/arm/include/asm/cacheflush.h @@ -137,10 +137,10 @@ #endif /* - * This flag is used to indicate that the page pointed to by a pte - * is dirty and requires cleaning before returning it to the user. + * This flag is used to indicate that the page pointed to by a pte is clean + * and does not require cleaning before returning it to the user. */ -#define PG_dcache_dirty PG_arch_1 +#define PG_dcache_clean PG_arch_1 /* * MM Cache Management diff --git a/arch/arm/include/asm/tlbflush.h b/arch/arm/include/asm/tlbflush.h index 33b546ae72d4..9ad329ad7458 100644 --- a/arch/arm/include/asm/tlbflush.h +++ b/arch/arm/include/asm/tlbflush.h @@ -560,7 +560,7 @@ extern void flush_tlb_kernel_range(unsigned long start, unsigned long end); #endif /* - * if PG_dcache_dirty is set for the page, we need to ensure that any + * If PG_dcache_clean is not set for the page, we need to ensure that any * cache entries for the kernels virtual memory range are written * back to the page. */ diff --git a/arch/arm/mm/copypage-v4mc.c b/arch/arm/mm/copypage-v4mc.c index 598c51ad5071..b8061519ce77 100644 --- a/arch/arm/mm/copypage-v4mc.c +++ b/arch/arm/mm/copypage-v4mc.c @@ -73,7 +73,7 @@ void v4_mc_copy_user_highpage(struct page *to, struct page *from, { void *kto = kmap_atomic(to, KM_USER1); - if (test_and_clear_bit(PG_dcache_dirty, &from->flags)) + if (!test_and_set_bit(PG_dcache_clean, &from->flags)) __flush_dcache_page(page_mapping(from), from); spin_lock(&minicache_lock); diff --git a/arch/arm/mm/copypage-v6.c b/arch/arm/mm/copypage-v6.c index f55fa1044f72..bdba6c65c901 100644 --- a/arch/arm/mm/copypage-v6.c +++ b/arch/arm/mm/copypage-v6.c @@ -79,7 +79,7 @@ static void v6_copy_user_highpage_aliasing(struct page *to, unsigned int offset = CACHE_COLOUR(vaddr); unsigned long kfrom, kto; - if (test_and_clear_bit(PG_dcache_dirty, &from->flags)) + if (!test_and_set_bit(PG_dcache_clean, &from->flags)) __flush_dcache_page(page_mapping(from), from); /* FIXME: not highmem safe */ diff --git a/arch/arm/mm/copypage-xscale.c b/arch/arm/mm/copypage-xscale.c index 9920c0ae2096..649bbcd325bf 100644 --- a/arch/arm/mm/copypage-xscale.c +++ b/arch/arm/mm/copypage-xscale.c @@ -95,7 +95,7 @@ void xscale_mc_copy_user_highpage(struct page *to, struct page *from, { void *kto = kmap_atomic(to, KM_USER1); - if (test_and_clear_bit(PG_dcache_dirty, &from->flags)) + if (!test_and_set_bit(PG_dcache_clean, &from->flags)) __flush_dcache_page(page_mapping(from), from); spin_lock(&minicache_lock); diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 4bc43e535d3b..e4dd0646e859 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -523,6 +523,12 @@ void ___dma_page_dev_to_cpu(struct page *page, unsigned long off, outer_inv_range(paddr, paddr + size); dma_cache_maint_page(page, off, size, dir, dmac_unmap_area); + + /* + * Mark the D-cache clean for this page to avoid extra flushing. + */ + if (dir != DMA_TO_DEVICE && off == 0 && size >= PAGE_SIZE) + set_bit(PG_dcache_clean, &page->flags); } EXPORT_SYMBOL(___dma_page_dev_to_cpu); diff --git a/arch/arm/mm/fault-armv.c b/arch/arm/mm/fault-armv.c index 9b906dec1ca1..58846cbd0e0b 100644 --- a/arch/arm/mm/fault-armv.c +++ b/arch/arm/mm/fault-armv.c @@ -141,7 +141,7 @@ make_coherent(struct address_space *mapping, struct vm_area_struct *vma, * a page table, or changing an existing PTE. Basically, there are two * things that we need to take care of: * - * 1. If PG_dcache_dirty is set for the page, we need to ensure + * 1. If PG_dcache_clean is not set for the page, we need to ensure * that any cache entries for the kernels virtual memory * range are written back to the page. * 2. If we have multiple shared mappings of the same space in @@ -169,7 +169,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long addr, mapping = page_mapping(page); #ifndef CONFIG_SMP - if (test_and_clear_bit(PG_dcache_dirty, &page->flags)) + if (!test_and_set_bit(PG_dcache_clean, &page->flags)) __flush_dcache_page(mapping, page); #endif if (mapping) { diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c index 87dd5ffdfa2d..b4efce9b7985 100644 --- a/arch/arm/mm/flush.c +++ b/arch/arm/mm/flush.c @@ -248,7 +248,7 @@ void flush_dcache_page(struct page *page) #ifndef CONFIG_SMP if (mapping && !mapping_mapped(mapping)) - set_bit(PG_dcache_dirty, &page->flags); + clear_bit(PG_dcache_clean, &page->flags); else #endif { @@ -257,6 +257,7 @@ void flush_dcache_page(struct page *page) __flush_dcache_aliases(mapping, page); else if (mapping) __flush_icache_all(); + set_bit(PG_dcache_clean, &page->flags); } } EXPORT_SYMBOL(flush_dcache_page); -- cgit v1.2.3 From 6012191aa9c6ffff3a23b81162298318b56d7cb3 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 13 Sep 2010 15:58:06 +0100 Subject: ARM: 6380/1: Introduce __sync_icache_dcache() for VIPT caches On SMP systems, there is a small chance of a PTE becoming visible to a different CPU before the current cache maintenance operations in update_mmu_cache(). To avoid this, cache maintenance must be handled in set_pte_at() (similar to IA-64 and PowerPC). This patch provides a unified VIPT cache handling mechanism and implements the __sync_icache_dcache() function for ARMv6 onwards architectures. It is called from set_pte_at() and replaces the update_mmu_cache(). The latter is still used on VIVT hardware where a vm_area_struct is required. Tested-by: Rabin Vincent Cc: Nicolas Pitre Signed-off-by: Catalin Marinas Signed-off-by: Russell King --- arch/arm/include/asm/pgtable.h | 26 +++++++++++++++++++++++--- arch/arm/include/asm/tlbflush.h | 10 +++++++++- arch/arm/mm/fault-armv.c | 4 ++-- arch/arm/mm/flush.c | 30 ++++++++++++++++++++++++++++++ 4 files changed, 64 insertions(+), 6 deletions(-) (limited to 'arch/arm/include/asm') diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h index ab68cf1ef80f..42e694f1d58e 100644 --- a/arch/arm/include/asm/pgtable.h +++ b/arch/arm/include/asm/pgtable.h @@ -278,9 +278,24 @@ extern struct page *empty_zero_page; #define set_pte_ext(ptep,pte,ext) cpu_set_pte_ext(ptep,pte,ext) -#define set_pte_at(mm,addr,ptep,pteval) do { \ - set_pte_ext(ptep, pteval, (addr) >= TASK_SIZE ? 0 : PTE_EXT_NG); \ - } while (0) +#if __LINUX_ARM_ARCH__ < 6 +static inline void __sync_icache_dcache(pte_t pteval) +{ +} +#else +extern void __sync_icache_dcache(pte_t pteval); +#endif + +static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pteval) +{ + if (addr >= TASK_SIZE) + set_pte_ext(ptep, pteval, 0); + else { + __sync_icache_dcache(pteval); + set_pte_ext(ptep, pteval, PTE_EXT_NG); + } +} /* * The following only work if pte_present() is true. @@ -290,8 +305,13 @@ extern struct page *empty_zero_page; #define pte_write(pte) (pte_val(pte) & L_PTE_WRITE) #define pte_dirty(pte) (pte_val(pte) & L_PTE_DIRTY) #define pte_young(pte) (pte_val(pte) & L_PTE_YOUNG) +#define pte_exec(pte) (pte_val(pte) & L_PTE_EXEC) #define pte_special(pte) (0) +#define pte_present_user(pte) \ + ((pte_val(pte) & (L_PTE_PRESENT | L_PTE_USER)) == \ + (L_PTE_PRESENT | L_PTE_USER)) + #define PTE_BIT_FUNC(fn,op) \ static inline pte_t pte_##fn(pte_t pte) { pte_val(pte) op; return pte; } diff --git a/arch/arm/include/asm/tlbflush.h b/arch/arm/include/asm/tlbflush.h index 9ad329ad7458..989c9e57d92b 100644 --- a/arch/arm/include/asm/tlbflush.h +++ b/arch/arm/include/asm/tlbflush.h @@ -562,10 +562,18 @@ extern void flush_tlb_kernel_range(unsigned long start, unsigned long end); /* * If PG_dcache_clean is not set for the page, we need to ensure that any * cache entries for the kernels virtual memory range are written - * back to the page. + * back to the page. On ARMv6 and later, the cache coherency is handled via + * the set_pte_at() function. */ +#if __LINUX_ARM_ARCH__ < 6 extern void update_mmu_cache(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep); +#else +static inline void update_mmu_cache(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep) +{ +} +#endif #endif diff --git a/arch/arm/mm/fault-armv.c b/arch/arm/mm/fault-armv.c index 58846cbd0e0b..8440d952ba6d 100644 --- a/arch/arm/mm/fault-armv.c +++ b/arch/arm/mm/fault-armv.c @@ -28,6 +28,7 @@ static unsigned long shared_pte_mask = L_PTE_MT_BUFFERABLE; +#if __LINUX_ARM_ARCH__ < 6 /* * We take the easy way out of this problem - we make the * PTE uncacheable. However, we leave the write buffer on. @@ -168,10 +169,8 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long addr, return; mapping = page_mapping(page); -#ifndef CONFIG_SMP if (!test_and_set_bit(PG_dcache_clean, &page->flags)) __flush_dcache_page(mapping, page); -#endif if (mapping) { if (cache_is_vivt()) make_coherent(mapping, vma, addr, ptep, pfn); @@ -179,6 +178,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long addr, __flush_icache_all(); } } +#endif /* __LINUX_ARM_ARCH__ < 6 */ /* * Check whether the write buffer has physical address aliasing diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c index b4efce9b7985..dd5b0120b92e 100644 --- a/arch/arm/mm/flush.c +++ b/arch/arm/mm/flush.c @@ -215,6 +215,36 @@ static void __flush_dcache_aliases(struct address_space *mapping, struct page *p flush_dcache_mmap_unlock(mapping); } +#if __LINUX_ARM_ARCH__ >= 6 +void __sync_icache_dcache(pte_t pteval) +{ + unsigned long pfn; + struct page *page; + struct address_space *mapping; + + if (!pte_present_user(pteval)) + return; + if (cache_is_vipt_nonaliasing() && !pte_exec(pteval)) + /* only flush non-aliasing VIPT caches for exec mappings */ + return; + pfn = pte_pfn(pteval); + if (!pfn_valid(pfn)) + return; + + page = pfn_to_page(pfn); + if (cache_is_vipt_aliasing()) + mapping = page_mapping(page); + else + mapping = NULL; + + if (!test_and_set_bit(PG_dcache_clean, &page->flags)) + __flush_dcache_page(mapping, page); + /* pte_exec() already checked above for non-aliasing VIPT cache */ + if (cache_is_vipt_nonaliasing() || pte_exec(pteval)) + __flush_icache_all(); +} +#endif + /* * Ensure cache coherency between kernel mapping and userspace mapping * of this page. -- cgit v1.2.3 From 85848dd7ab75fce1134856228582a8df522c91d9 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 13 Sep 2010 15:58:37 +0100 Subject: ARM: 6381/1: Use lazy cache flushing on ARMv7 SMP systems ARMv7 processors like Cortex-A9 broadcast the cache maintenance operations in hardware. This patch allows the flush_dcache_page/update_mmu_cache pair to work in lazy flushing mode similar to the UP case. Note that cache flushing on SMP systems now takes place via the set_pte_at() call (__sync_icache_dcache) and there is no race with other CPUs executing code from the new PTE before the cache flushing took place. Tested-by: Rabin Vincent Cc: Nicolas Pitre Signed-off-by: Catalin Marinas Signed-off-by: Russell King --- arch/arm/include/asm/smp_plat.h | 4 ++++ arch/arm/mm/flush.c | 13 ++++--------- 2 files changed, 8 insertions(+), 9 deletions(-) (limited to 'arch/arm/include/asm') diff --git a/arch/arm/include/asm/smp_plat.h b/arch/arm/include/asm/smp_plat.h index e6215305544a..963a338d567b 100644 --- a/arch/arm/include/asm/smp_plat.h +++ b/arch/arm/include/asm/smp_plat.h @@ -13,9 +13,13 @@ static inline int tlb_ops_need_broadcast(void) return ((read_cpuid_ext(CPUID_EXT_MMFR3) >> 12) & 0xf) < 2; } +#if !defined(CONFIG_SMP) || __LINUX_ARM_ARCH__ >= 7 +#define cache_ops_need_broadcast() 0 +#else static inline int cache_ops_need_broadcast(void) { return ((read_cpuid_ext(CPUID_EXT_MMFR3) >> 12) & 0xf) < 1; } +#endif #endif diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c index dd5b0120b92e..2332b774c6b9 100644 --- a/arch/arm/mm/flush.c +++ b/arch/arm/mm/flush.c @@ -17,6 +17,7 @@ #include #include #include +#include #include "mm.h" @@ -93,12 +94,10 @@ void flush_cache_page(struct vm_area_struct *vma, unsigned long user_addr, unsig #define flush_pfn_alias(pfn,vaddr) do { } while (0) #endif -#ifdef CONFIG_SMP static void flush_ptrace_access_other(void *args) { __flush_icache_all(); } -#endif static void flush_ptrace_access(struct vm_area_struct *vma, struct page *page, @@ -122,11 +121,9 @@ void flush_ptrace_access(struct vm_area_struct *vma, struct page *page, if (vma->vm_flags & VM_EXEC) { unsigned long addr = (unsigned long)kaddr; __cpuc_coherent_kern_range(addr, addr + len); -#ifdef CONFIG_SMP if (cache_ops_need_broadcast()) smp_call_function(flush_ptrace_access_other, NULL, 1); -#endif } } @@ -276,12 +273,10 @@ void flush_dcache_page(struct page *page) mapping = page_mapping(page); -#ifndef CONFIG_SMP - if (mapping && !mapping_mapped(mapping)) + if (!cache_ops_need_broadcast() && + mapping && !mapping_mapped(mapping)) clear_bit(PG_dcache_clean, &page->flags); - else -#endif - { + else { __flush_dcache_page(mapping, page); if (mapping && cache_is_vivt()) __flush_dcache_aliases(mapping, page); -- cgit v1.2.3 From f8b63c184ad13cc8adc3dadb557d4fbc29f76e4d Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 13 Sep 2010 15:59:07 +0100 Subject: ARM: 6382/1: Remove superfluous flush_kernel_dcache_page() Since page cache pages are now considered 'dirty' by default, the cache flushing is handled via __flush_dcache_page() when a page gets mapped to user space. Highmem pages on VIVT systems are flushed during kunmap() and flush_kernel_dcache_page() was already a no-op in this case. ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE is still defined since ARM needs specific implementations for flush_kernel_vmap_range() and invalidate_kernel_vmap_range(). Cc: Nicolas Pitre Signed-off-by: Catalin Marinas Signed-off-by: Russell King --- arch/arm/include/asm/cacheflush.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/arm/include/asm') diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h index d3730f0f4b50..042e13994d38 100644 --- a/arch/arm/include/asm/cacheflush.h +++ b/arch/arm/include/asm/cacheflush.h @@ -405,9 +405,6 @@ static inline void flush_anon_page(struct vm_area_struct *vma, #define ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE static inline void flush_kernel_dcache_page(struct page *page) { - /* highmem pages are always flushed upon kunmap already */ - if ((cache_is_vivt() || cache_is_vipt_aliasing()) && !PageHighMem(page)) - __cpuc_flush_dcache_area(page_address(page), PAGE_SIZE); } #define flush_dcache_mmap_lock(mapping) \ -- cgit v1.2.3 From d907387c42e9e39261629890e45a08ef4c3ed3fe Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 13 Sep 2010 16:01:24 +0100 Subject: ARM: 6383/1: Implement phys_mem_access_prot() to avoid attributes aliasing ARMv7 onwards requires that there are no aliases to the same physical location using different memory types (i.e. Normal vs Strongly Ordered). Access to SO mappings when the unaligned accesses are handled in hardware is also Unpredictable (pgprot_noncached() mappings in user space). The /dev/mem driver requires uncached mappings with O_SYNC. The patch implements the phys_mem_access_prot() function which generates Strongly Ordered memory attributes if !pfn_valid() (independent of O_SYNC) and Normal Noncacheable (writecombine) if O_SYNC. Signed-off-by: Catalin Marinas Signed-off-by: Russell King --- arch/arm/include/asm/pgtable.h | 4 ++++ arch/arm/mm/mmu.c | 14 ++++++++++++++ 2 files changed, 18 insertions(+) (limited to 'arch/arm/include/asm') diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h index ab68cf1ef80f..e90b167ea848 100644 --- a/arch/arm/include/asm/pgtable.h +++ b/arch/arm/include/asm/pgtable.h @@ -317,6 +317,10 @@ static inline pte_t pte_mkspecial(pte_t pte) { return pte; } #ifdef CONFIG_ARM_DMA_MEM_BUFFERABLE #define pgprot_dmacoherent(prot) \ __pgprot_modify(prot, L_PTE_MT_MASK|L_PTE_EXEC, L_PTE_MT_BUFFERABLE) +#define __HAVE_PHYS_MEM_ACCESS_PROT +struct file; +extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, + unsigned long size, pgprot_t vma_prot); #else #define pgprot_dmacoherent(prot) \ __pgprot_modify(prot, L_PTE_MT_MASK|L_PTE_EXEC, L_PTE_MT_UNCACHED) diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 6e1c4f6a2b3f..a486bd0d97dc 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -498,6 +499,19 @@ static void __init build_mem_type_table(void) } } +#ifdef CONFIG_ARM_DMA_MEM_BUFFERABLE +pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, + unsigned long size, pgprot_t vma_prot) +{ + if (!pfn_valid(pfn)) + return pgprot_noncached(vma_prot); + else if (file->f_flags & O_SYNC) + return pgprot_writecombine(vma_prot); + return vma_prot; +} +EXPORT_SYMBOL(phys_mem_access_prot); +#endif + #define vectors_base() (vectors_high() ? 0xffff0000 : 0) static void __init *early_alloc(unsigned long sz) -- cgit v1.2.3 From 8234eaef8002cb8ba30920949b338d692508137a Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Wed, 4 Aug 2010 11:22:43 +0100 Subject: ARM: 6291/1: coresight: move struct tracectx inside etm driver This is done so as to be able to make use of the coresight components' registers in assembler code (like omap sleep code). Also, there shouldn't be any users of this structure outside the etm driver. Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Alexander Shishkin Signed-off-by: Russell King --- arch/arm/include/asm/hardware/coresight.h | 12 ------------ arch/arm/kernel/etm.c | 15 +++++++++++++++ 2 files changed, 15 insertions(+), 12 deletions(-) (limited to 'arch/arm/include/asm') diff --git a/arch/arm/include/asm/hardware/coresight.h b/arch/arm/include/asm/hardware/coresight.h index 212e47828c79..d84605175fc4 100644 --- a/arch/arm/include/asm/hardware/coresight.h +++ b/arch/arm/include/asm/hardware/coresight.h @@ -21,18 +21,6 @@ #define TRACER_RUNNING BIT(TRACER_RUNNING_BIT) #define TRACER_CYCLE_ACC BIT(TRACER_CYCLE_ACC_BIT) -struct tracectx { - unsigned int etb_bufsz; - void __iomem *etb_regs; - void __iomem *etm_regs; - unsigned long flags; - int ncmppairs; - int etm_portsz; - struct device *dev; - struct clk *emu_clk; - struct mutex mutex; -}; - #define TRACER_TIMEOUT 10000 #define etm_writel(t, v, x) \ diff --git a/arch/arm/kernel/etm.c b/arch/arm/kernel/etm.c index 33c7077174db..a48d51257988 100644 --- a/arch/arm/kernel/etm.c +++ b/arch/arm/kernel/etm.c @@ -30,6 +30,21 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Alexander Shishkin"); +/* + * ETM tracer state + */ +struct tracectx { + unsigned int etb_bufsz; + void __iomem *etb_regs; + void __iomem *etm_regs; + unsigned long flags; + int ncmppairs; + int etm_portsz; + struct device *dev; + struct clk *emu_clk; + struct mutex mutex; +}; + static struct tracectx tracer; static inline bool trace_isrunning(struct tracectx *t) -- cgit v1.2.3 From 9f5336915bc47947230290a1a384de95803f815a Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Wed, 4 Aug 2010 11:26:27 +0100 Subject: ARM: 6293/1: coresight: cosmetic fixes Use BIT() macro whenever it is sensible to do so. Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Alexander Shishkin Signed-off-by: Russell King --- arch/arm/include/asm/hardware/coresight.h | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'arch/arm/include/asm') diff --git a/arch/arm/include/asm/hardware/coresight.h b/arch/arm/include/asm/hardware/coresight.h index d84605175fc4..7ecd793b8f5a 100644 --- a/arch/arm/include/asm/hardware/coresight.h +++ b/arch/arm/include/asm/hardware/coresight.h @@ -100,10 +100,10 @@ /* ETM status register, "ETM Architecture", 3.3.2 */ #define ETMR_STATUS (0x10) -#define ETMST_OVERFLOW (1 << 0) -#define ETMST_PROGBIT (1 << 1) -#define ETMST_STARTSTOP (1 << 2) -#define ETMST_TRIGGER (1 << 3) +#define ETMST_OVERFLOW BIT(0) +#define ETMST_PROGBIT BIT(1) +#define ETMST_STARTSTOP BIT(2) +#define ETMST_TRIGGER BIT(3) #define etm_progbit(t) (etm_readl((t), ETMR_STATUS) & ETMST_PROGBIT) #define etm_started(t) (etm_readl((t), ETMR_STATUS) & ETMST_STARTSTOP) @@ -111,7 +111,7 @@ #define ETMR_TRACEENCTRL2 0x1c #define ETMR_TRACEENCTRL 0x24 -#define ETMTE_INCLEXCL (1 << 24) +#define ETMTE_INCLEXCL BIT(24) #define ETMR_TRACEENEVT 0x20 #define ETMCTRL_OPTS (ETMCTRL_DO_CPRT | \ ETMCTRL_DATA_DO_ADDR | \ @@ -134,12 +134,12 @@ #define ETBR_CTRL 0x20 #define ETBR_FORMATTERCTRL 0x304 #define ETBFF_ENFTC 1 -#define ETBFF_ENFCONT (1 << 1) -#define ETBFF_FONFLIN (1 << 4) -#define ETBFF_MANUAL_FLUSH (1 << 6) -#define ETBFF_TRIGIN (1 << 8) -#define ETBFF_TRIGEVT (1 << 9) -#define ETBFF_TRIGFL (1 << 10) +#define ETBFF_ENFCONT BIT(1) +#define ETBFF_FONFLIN BIT(4) +#define ETBFF_MANUAL_FLUSH BIT(6) +#define ETBFF_TRIGIN BIT(8) +#define ETBFF_TRIGEVT BIT(9) +#define ETBFF_TRIGFL BIT(10) #define etb_writel(t, v, x) \ (__raw_writel((v), (t)->etb_regs + (x))) -- cgit v1.2.3 From 067173526c3bbc2eaeefcf6b7b2a9d998b9e8042 Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 4 Sep 2010 16:14:20 +0100 Subject: ARM: Provide common header for hard_smp_processor_id() Provide a common header to read the SMP CPU number from the MPIDR. Signed-off-by: Russell King --- arch/arm/include/asm/smp_mpidr.h | 12 ++++++++++++ arch/arm/mach-realview/include/mach/smp.h | 10 +--------- arch/arm/mach-s5pv310/include/mach/smp.h | 9 +-------- arch/arm/mach-tegra/include/mach/smp.h | 10 +--------- arch/arm/mach-ux500/include/mach/smp.h | 9 +-------- arch/arm/mach-vexpress/include/mach/smp.h | 9 +-------- arch/arm/plat-omap/include/plat/smp.h | 12 +----------- 7 files changed, 18 insertions(+), 53 deletions(-) create mode 100644 arch/arm/include/asm/smp_mpidr.h (limited to 'arch/arm/include/asm') diff --git a/arch/arm/include/asm/smp_mpidr.h b/arch/arm/include/asm/smp_mpidr.h new file mode 100644 index 000000000000..7da7105e83e5 --- /dev/null +++ b/arch/arm/include/asm/smp_mpidr.h @@ -0,0 +1,12 @@ +#ifndef ASMARM_SMP_MIDR_H +#define ASMARM_SMP_MIDR_H + +#define hard_smp_processor_id() \ + ({ \ + unsigned int cpunum; \ + __asm__("mrc p15, 0, %0, c0, c0, 5\n" \ + : "=r" (cpunum)); \ + cpunum &= 0x0F; \ + }) + +#endif diff --git a/arch/arm/mach-realview/include/mach/smp.h b/arch/arm/mach-realview/include/mach/smp.h index dd53892d44a7..d3cd265cb058 100644 --- a/arch/arm/mach-realview/include/mach/smp.h +++ b/arch/arm/mach-realview/include/mach/smp.h @@ -1,16 +1,8 @@ #ifndef ASMARM_ARCH_SMP_H #define ASMARM_ARCH_SMP_H - #include - -#define hard_smp_processor_id() \ - ({ \ - unsigned int cpunum; \ - __asm__("mrc p15, 0, %0, c0, c0, 5" \ - : "=r" (cpunum)); \ - cpunum &= 0x0F; \ - }) +#include /* * We use IRQ1 as the IPI diff --git a/arch/arm/mach-s5pv310/include/mach/smp.h b/arch/arm/mach-s5pv310/include/mach/smp.h index 990f3ba88a1f..b7ec252384f4 100644 --- a/arch/arm/mach-s5pv310/include/mach/smp.h +++ b/arch/arm/mach-s5pv310/include/mach/smp.h @@ -7,17 +7,10 @@ #define ASM_ARCH_SMP_H __FILE__ #include +#include extern void __iomem *gic_cpu_base_addr; -#define hard_smp_processor_id() \ - ({ \ - unsigned int cpunum; \ - __asm__("mrc p15, 0, %0, c0, c0, 5" \ - : "=r" (cpunum)); \ - cpunum &= 0x03; \ - }) - /* * We use IRQ1 as the IPI */ diff --git a/arch/arm/mach-tegra/include/mach/smp.h b/arch/arm/mach-tegra/include/mach/smp.h index 8b42dab79a70..e4a34a35a544 100644 --- a/arch/arm/mach-tegra/include/mach/smp.h +++ b/arch/arm/mach-tegra/include/mach/smp.h @@ -1,16 +1,8 @@ #ifndef ASMARM_ARCH_SMP_H #define ASMARM_ARCH_SMP_H - #include - -#define hard_smp_processor_id() \ - ({ \ - unsigned int cpunum; \ - __asm__("mrc p15, 0, %0, c0, c0, 5" \ - : "=r" (cpunum)); \ - cpunum &= 0x0F; \ - }) +#include /* * We use IRQ1 as the IPI diff --git a/arch/arm/mach-ux500/include/mach/smp.h b/arch/arm/mach-ux500/include/mach/smp.h index b59f7bc9725d..197e8417375e 100644 --- a/arch/arm/mach-ux500/include/mach/smp.h +++ b/arch/arm/mach-ux500/include/mach/smp.h @@ -10,18 +10,11 @@ #define ASMARM_ARCH_SMP_H #include +#include /* This is required to wakeup the secondary core */ extern void u8500_secondary_startup(void); -#define hard_smp_processor_id() \ - ({ \ - unsigned int cpunum; \ - __asm__("mrc p15, 0, %0, c0, c0, 5" \ - : "=r" (cpunum)); \ - cpunum &= 0x0F; \ - }) - /* * We use IRQ1 as the IPI */ diff --git a/arch/arm/mach-vexpress/include/mach/smp.h b/arch/arm/mach-vexpress/include/mach/smp.h index 72a9621ed087..5a6da4fd247e 100644 --- a/arch/arm/mach-vexpress/include/mach/smp.h +++ b/arch/arm/mach-vexpress/include/mach/smp.h @@ -2,14 +2,7 @@ #define __MACH_SMP_H #include - -#define hard_smp_processor_id() \ - ({ \ - unsigned int cpunum; \ - __asm__("mrc p15, 0, %0, c0, c0, 5" \ - : "=r" (cpunum)); \ - cpunum &= 0x0F; \ - }) +#include /* * We use IRQ1 as the IPI diff --git a/arch/arm/plat-omap/include/plat/smp.h b/arch/arm/plat-omap/include/plat/smp.h index 5177a9c5a25a..ecd6a488c497 100644 --- a/arch/arm/plat-omap/include/plat/smp.h +++ b/arch/arm/plat-omap/include/plat/smp.h @@ -18,6 +18,7 @@ #define OMAP_ARCH_SMP_H #include +#include /* Needed for secondary core boot */ extern void omap_secondary_startup(void); @@ -33,15 +34,4 @@ static inline void smp_cross_call(const struct cpumask *mask) gic_raise_softirq(mask, 1); } -/* - * Read MPIDR: Multiprocessor affinity register - */ -#define hard_smp_processor_id() \ - ({ \ - unsigned int cpunum; \ - __asm__("mrc p15, 0, %0, c0, c0, 5" \ - : "=r" (cpunum)); \ - cpunum &= 0x0F; \ - }) - #endif -- cgit v1.2.3 From f00ec48fadf5e37e7889f14cff900aa70d18b644 Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 4 Sep 2010 10:47:48 +0100 Subject: ARM: Allow SMP kernels to boot on UP systems UP systems do not implement all the instructions that SMP systems have, so in order to boot a SMP kernel on a UP system, we need to rewrite parts of the kernel. Do this using an 'alternatives' scheme, where the kernel code and data is modified prior to initialization to replace the SMP instructions, thereby rendering the problematical code ineffectual. We use the linker to generate a list of 32-bit word locations and their replacement values, and run through these replacements when we detect a UP system. Signed-off-by: Russell King --- arch/arm/Kconfig | 13 +++++++++++ arch/arm/include/asm/assembler.h | 27 ++++++++++++++++++++-- arch/arm/include/asm/smp_mpidr.h | 7 +++++- arch/arm/include/asm/smp_plat.h | 15 ++++++++++++ arch/arm/include/asm/tlbflush.h | 24 +++++++++++++------ arch/arm/kernel/entry-armv.S | 11 ++++----- arch/arm/kernel/head.S | 50 ++++++++++++++++++++++++++++++++++++++++ arch/arm/kernel/setup.c | 4 +++- arch/arm/kernel/vmlinux.lds.S | 11 +++++++++ arch/arm/mm/cache-v7.S | 14 ++++------- arch/arm/mm/mmu.c | 46 +++++++++++++++++------------------- arch/arm/mm/proc-v6.S | 43 ++++++++++++++++++++++------------ arch/arm/mm/proc-v7.S | 41 +++++++++++++++++++++----------- arch/arm/mm/tlb-v7.S | 33 ++++++++++---------------- 14 files changed, 237 insertions(+), 102 deletions(-) (limited to 'arch/arm/include/asm') diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 6a939e64cbd7..f6cdc21b562c 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1191,6 +1191,19 @@ config SMP If you don't know what to do here, say N. +config SMP_ON_UP + bool "Allow booting SMP kernel on uniprocessor systems (EXPERIMENTAL)" + depends on EXPERIMENTAL + depends on SMP && !XIP && !THUMB2_KERNEL + default y + help + SMP kernels contain instructions which fail on non-SMP processors. + Enabling this option allows the kernel to modify itself to make + these instructions safe. Disabling it allows about 1K of space + savings. + + If you don't know what to do here, say Y. + config HAVE_ARM_SCU bool depends on SMP diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index 6e8f05c8a1c8..062b58c029ab 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -154,16 +154,39 @@ .long 9999b,9001f; \ .popsection +#ifdef CONFIG_SMP +#define ALT_SMP(instr...) \ +9998: instr +#define ALT_UP(instr...) \ + .pushsection ".alt.smp.init", "a" ;\ + .long 9998b ;\ + instr ;\ + .popsection +#define ALT_UP_B(label) \ + .equ up_b_offset, label - 9998b ;\ + .pushsection ".alt.smp.init", "a" ;\ + .long 9998b ;\ + b . + up_b_offset ;\ + .popsection +#else +#define ALT_SMP(instr...) +#define ALT_UP(instr...) instr +#define ALT_UP_B(label) b label +#endif + /* * SMP data memory barrier */ .macro smp_dmb #ifdef CONFIG_SMP #if __LINUX_ARM_ARCH__ >= 7 - dmb + ALT_SMP(dmb) #elif __LINUX_ARM_ARCH__ == 6 - mcr p15, 0, r0, c7, c10, 5 @ dmb + ALT_SMP(mcr p15, 0, r0, c7, c10, 5) @ dmb +#else +#error Incompatible SMP platform #endif + ALT_UP(nop) #endif .endm diff --git a/arch/arm/include/asm/smp_mpidr.h b/arch/arm/include/asm/smp_mpidr.h index 7da7105e83e5..6a9307d64900 100644 --- a/arch/arm/include/asm/smp_mpidr.h +++ b/arch/arm/include/asm/smp_mpidr.h @@ -4,7 +4,12 @@ #define hard_smp_processor_id() \ ({ \ unsigned int cpunum; \ - __asm__("mrc p15, 0, %0, c0, c0, 5\n" \ + __asm__("\n" \ + "1: mrc p15, 0, %0, c0, c0, 5\n" \ + " .pushsection \".alt.smp.init\", \"a\"\n"\ + " .long 1b\n" \ + " mov %0, #0\n" \ + " .popsection" \ : "=r" (cpunum)); \ cpunum &= 0x0F; \ }) diff --git a/arch/arm/include/asm/smp_plat.h b/arch/arm/include/asm/smp_plat.h index e6215305544a..7de5aa56c18b 100644 --- a/arch/arm/include/asm/smp_plat.h +++ b/arch/arm/include/asm/smp_plat.h @@ -18,4 +18,19 @@ static inline int cache_ops_need_broadcast(void) return ((read_cpuid_ext(CPUID_EXT_MMFR3) >> 12) & 0xf) < 1; } +/* + * Return true if we are running on a SMP platform + */ +static inline bool is_smp(void) +{ +#ifndef CONFIG_SMP + return false; +#elif defined(CONFIG_SMP_ON_UP) + extern unsigned int smp_on_up; + return !!smp_on_up; +#else + return true; +#endif +} + #endif diff --git a/arch/arm/include/asm/tlbflush.h b/arch/arm/include/asm/tlbflush.h index 33b546ae72d4..cf2f018492e0 100644 --- a/arch/arm/include/asm/tlbflush.h +++ b/arch/arm/include/asm/tlbflush.h @@ -70,6 +70,10 @@ #undef _TLB #undef MULTI_TLB +#ifdef CONFIG_SMP_ON_UP +#define MULTI_TLB 1 +#endif + #define v3_tlb_flags (TLB_V3_FULL | TLB_V3_PAGE) #ifdef CONFIG_CPU_TLB_V3 @@ -185,17 +189,23 @@ # define v6wbi_always_flags (-1UL) #endif -#ifdef CONFIG_SMP -#define v7wbi_tlb_flags (TLB_WB | TLB_DCLEAN | TLB_V7_IS_BTB | \ +#define v7wbi_tlb_flags_smp (TLB_WB | TLB_DCLEAN | TLB_V7_IS_BTB | \ TLB_V7_UIS_FULL | TLB_V7_UIS_PAGE | TLB_V7_UIS_ASID) -#else -#define v7wbi_tlb_flags (TLB_WB | TLB_DCLEAN | TLB_BTB | \ +#define v7wbi_tlb_flags_up (TLB_WB | TLB_DCLEAN | TLB_BTB | \ TLB_V6_U_FULL | TLB_V6_U_PAGE | TLB_V6_U_ASID) -#endif #ifdef CONFIG_CPU_TLB_V7 -# define v7wbi_possible_flags v7wbi_tlb_flags -# define v7wbi_always_flags v7wbi_tlb_flags + +# ifdef CONFIG_SMP_ON_UP +# define v7wbi_possible_flags (v7wbi_tlb_flags_smp | v7wbi_tlb_flags_up) +# define v7wbi_always_flags (v7wbi_tlb_flags_smp & v7wbi_tlb_flags_up) +# elif defined(CONFIG_SMP) +# define v7wbi_possible_flags v7wbi_tlb_flags_smp +# define v7wbi_always_flags v7wbi_tlb_flags_smp +# else +# define v7wbi_possible_flags v7wbi_tlb_flags_up +# define v7wbi_always_flags v7wbi_tlb_flags_up +# endif # ifdef _TLB # define MULTI_TLB 1 # else diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index bb8e93a76407..c09e3573c5de 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -46,7 +46,8 @@ * this macro assumes that irqstat (r6) and base (r5) are * preserved from get_irqnr_and_base above */ - test_for_ipi r0, r6, r5, lr + ALT_SMP(test_for_ipi r0, r6, r5, lr) + ALT_UP_B(9997f) movne r0, sp adrne lr, BSYM(1b) bne do_IPI @@ -57,6 +58,7 @@ adrne lr, BSYM(1b) bne do_local_timer #endif +9997: #endif .endm @@ -965,11 +967,8 @@ kuser_cmpxchg_fixup: beq 1b rsbs r0, r3, #0 /* beware -- each __kuser slot must be 8 instructions max */ -#ifdef CONFIG_SMP - b __kuser_memory_barrier -#else - usr_ret lr -#endif + ALT_SMP(b __kuser_memory_barrier) + ALT_UP(usr_ret lr) #endif diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index eb62bf947212..b44d21e1e344 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -86,6 +86,9 @@ ENTRY(stext) movs r8, r5 @ invalid machine (r5=0)? beq __error_a @ yes, error 'a' bl __vet_atags +#ifdef CONFIG_SMP_ON_UP + bl __fixup_smp +#endif bl __create_page_tables /* @@ -333,4 +336,51 @@ __create_page_tables: ENDPROC(__create_page_tables) .ltorg +#ifdef CONFIG_SMP_ON_UP +__fixup_smp: + mov r7, #0x00070000 + orr r6, r7, #0xff000000 @ mask 0xff070000 + orr r7, r7, #0x41000000 @ val 0x41070000 + and r0, r9, r6 + teq r0, r7 @ ARM CPU and ARMv6/v7? + bne __fixup_smp_on_up @ no, assume UP + + orr r6, r6, #0x0000ff00 + orr r6, r6, #0x000000f0 @ mask 0xff07fff0 + orr r7, r7, #0x0000b000 + orr r7, r7, #0x00000020 @ val 0x4107b020 + and r0, r9, r6 + teq r0, r7 @ ARM 11MPCore? + moveq pc, lr @ yes, assume SMP + + mrc p15, 0, r0, c0, c0, 5 @ read MPIDR + tst r0, #1 << 31 + movne pc, lr @ bit 31 => SMP + +__fixup_smp_on_up: + adr r0, 1f + ldmia r0, {r3, r6, r7} + sub r3, r0, r3 + add r6, r6, r3 + add r7, r7, r3 +2: cmp r6, r7 + ldmia r6!, {r0, r4} + strlo r4, [r0, r3] + blo 2b + mov pc, lr +ENDPROC(__fixup_smp) + +1: .word . + .word __smpalt_begin + .word __smpalt_end + + .pushsection .data + .globl smp_on_up +smp_on_up: + ALT_SMP(.long 1) + ALT_UP(.long 0) + .popsection + +#endif + #include "head-common.S" diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index d5231ae7355a..5a82c39ca85e 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include @@ -825,7 +826,8 @@ void __init setup_arch(char **cmdline_p) request_standard_resources(&meminfo, mdesc); #ifdef CONFIG_SMP - smp_init_cpus(); + if (is_smp()) + smp_init_cpus(); #endif reserve_crashkernel(); diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index b16c07914b55..fd5750b8ac84 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -40,6 +40,11 @@ SECTIONS __tagtable_begin = .; *(.taglist.init) __tagtable_end = .; +#ifdef CONFIG_SMP_ON_UP + __smpalt_begin = .; + *(.alt.smp.init) + __smpalt_end = .; +#endif INIT_SETUP(16) @@ -237,6 +242,12 @@ SECTIONS /* Default discards */ DISCARDS + +#ifndef CONFIG_SMP_ON_UP + /DISCARD/ : { + *(.alt.smp.init) + } +#endif } /* diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S index 37c8157e116e..e8ea1a071f43 100644 --- a/arch/arm/mm/cache-v7.S +++ b/arch/arm/mm/cache-v7.S @@ -91,11 +91,8 @@ ENTRY(v7_flush_kern_cache_all) THUMB( stmfd sp!, {r4-r7, r9-r11, lr} ) bl v7_flush_dcache_all mov r0, #0 -#ifdef CONFIG_SMP - mcr p15, 0, r0, c7, c1, 0 @ invalidate I-cache inner shareable -#else - mcr p15, 0, r0, c7, c5, 0 @ I+BTB cache invalidate -#endif + ALT_SMP(mcr p15, 0, r0, c7, c1, 0) @ invalidate I-cache inner shareable + ALT_UP(mcr p15, 0, r0, c7, c5, 0) @ I+BTB cache invalidate ARM( ldmfd sp!, {r4-r5, r7, r9-r11, lr} ) THUMB( ldmfd sp!, {r4-r7, r9-r11, lr} ) mov pc, lr @@ -171,11 +168,8 @@ ENTRY(v7_coherent_user_range) cmp r0, r1 blo 1b mov r0, #0 -#ifdef CONFIG_SMP - mcr p15, 0, r0, c7, c1, 6 @ invalidate BTB Inner Shareable -#else - mcr p15, 0, r0, c7, c5, 6 @ invalidate BTB -#endif + ALT_SMP(mcr p15, 0, r0, c7, c1, 6) @ invalidate BTB Inner Shareable + ALT_UP(mcr p15, 0, r0, c7, c5, 6) @ invalidate BTB dsb isb mov pc, lr diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 6a3a2d0cd6db..e2335811c02e 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -310,9 +310,8 @@ static void __init build_mem_type_table(void) cachepolicy = CPOLICY_WRITEBACK; ecc_mask = 0; } -#ifdef CONFIG_SMP - cachepolicy = CPOLICY_WRITEALLOC; -#endif + if (is_smp()) + cachepolicy = CPOLICY_WRITEALLOC; /* * Strip out features not present on earlier architectures. @@ -406,13 +405,11 @@ static void __init build_mem_type_table(void) cp = &cache_policies[cachepolicy]; vecs_pgprot = kern_pgprot = user_pgprot = cp->pte; -#ifndef CONFIG_SMP /* * Only use write-through for non-SMP systems */ - if (cpu_arch >= CPU_ARCH_ARMv5 && cachepolicy > CPOLICY_WRITETHROUGH) + if (!is_smp() && cpu_arch >= CPU_ARCH_ARMv5 && cachepolicy > CPOLICY_WRITETHROUGH) vecs_pgprot = cache_policies[CPOLICY_WRITETHROUGH].pte; -#endif /* * Enable CPU-specific coherency if supported. @@ -436,22 +433,23 @@ static void __init build_mem_type_table(void) mem_types[MT_MINICLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE; mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE; -#ifdef CONFIG_SMP - /* - * Mark memory with the "shared" attribute for SMP systems - */ - user_pgprot |= L_PTE_SHARED; - kern_pgprot |= L_PTE_SHARED; - vecs_pgprot |= L_PTE_SHARED; - mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_S; - mem_types[MT_DEVICE_WC].prot_pte |= L_PTE_SHARED; - mem_types[MT_DEVICE_CACHED].prot_sect |= PMD_SECT_S; - mem_types[MT_DEVICE_CACHED].prot_pte |= L_PTE_SHARED; - mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S; - mem_types[MT_MEMORY].prot_pte |= L_PTE_SHARED; - mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_S; - mem_types[MT_MEMORY_NONCACHED].prot_pte |= L_PTE_SHARED; -#endif + if (is_smp()) { + /* + * Mark memory with the "shared" attribute + * for SMP systems + */ + user_pgprot |= L_PTE_SHARED; + kern_pgprot |= L_PTE_SHARED; + vecs_pgprot |= L_PTE_SHARED; + mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_S; + mem_types[MT_DEVICE_WC].prot_pte |= L_PTE_SHARED; + mem_types[MT_DEVICE_CACHED].prot_sect |= PMD_SECT_S; + mem_types[MT_DEVICE_CACHED].prot_pte |= L_PTE_SHARED; + mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S; + mem_types[MT_MEMORY].prot_pte |= L_PTE_SHARED; + mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_S; + mem_types[MT_MEMORY_NONCACHED].prot_pte |= L_PTE_SHARED; + } } /* @@ -829,8 +827,7 @@ static void __init sanity_check_meminfo(void) * rather difficult. */ reason = "with VIPT aliasing cache"; -#ifdef CONFIG_SMP - } else if (tlb_ops_need_broadcast()) { + } else if (is_smp() && tlb_ops_need_broadcast()) { /* * kmap_high needs to occasionally flush TLB entries, * however, if the TLB entries need to be broadcast @@ -840,7 +837,6 @@ static void __init sanity_check_meminfo(void) * (must not be called with irqs off) */ reason = "without hardware TLB ops broadcasting"; -#endif } if (reason) { printk(KERN_CRIT "HIGHMEM is not supported %s, ignoring high memory\n", diff --git a/arch/arm/mm/proc-v6.S b/arch/arm/mm/proc-v6.S index 22aac8515196..b95662dedb64 100644 --- a/arch/arm/mm/proc-v6.S +++ b/arch/arm/mm/proc-v6.S @@ -30,13 +30,10 @@ #define TTB_RGN_WT (2 << 3) #define TTB_RGN_WB (3 << 3) -#ifndef CONFIG_SMP -#define TTB_FLAGS TTB_RGN_WBWA -#define PMD_FLAGS PMD_SECT_WB -#else -#define TTB_FLAGS TTB_RGN_WBWA|TTB_S -#define PMD_FLAGS PMD_SECT_WBWA|PMD_SECT_S -#endif +#define TTB_FLAGS_UP TTB_RGN_WBWA +#define PMD_FLAGS_UP PMD_SECT_WB +#define TTB_FLAGS_SMP TTB_RGN_WBWA|TTB_S +#define PMD_FLAGS_SMP PMD_SECT_WBWA|PMD_SECT_S ENTRY(cpu_v6_proc_init) mov pc, lr @@ -97,7 +94,8 @@ ENTRY(cpu_v6_switch_mm) #ifdef CONFIG_MMU mov r2, #0 ldr r1, [r1, #MM_CONTEXT_ID] @ get mm->context.id - orr r0, r0, #TTB_FLAGS + ALT_SMP(orr r0, r0, #TTB_FLAGS_SMP) + ALT_UP(orr r0, r0, #TTB_FLAGS_UP) mcr p15, 0, r2, c7, c5, 6 @ flush BTAC/BTB mcr p15, 0, r2, c7, c10, 4 @ drain write buffer mcr p15, 0, r0, c2, c0, 0 @ set TTB 0 @@ -156,9 +154,11 @@ cpu_pj4_name: */ __v6_setup: #ifdef CONFIG_SMP - mrc p15, 0, r0, c1, c0, 1 @ Enable SMP/nAMP mode + ALT_SMP(mrc p15, 0, r0, c1, c0, 1) @ Enable SMP/nAMP mode + ALT_UP(nop) orr r0, r0, #0x20 - mcr p15, 0, r0, c1, c0, 1 + ALT_SMP(mcr p15, 0, r0, c1, c0, 1) + ALT_UP(nop) #endif mov r0, #0 @@ -169,7 +169,8 @@ __v6_setup: #ifdef CONFIG_MMU mcr p15, 0, r0, c8, c7, 0 @ invalidate I + D TLBs mcr p15, 0, r0, c2, c0, 2 @ TTB control register - orr r4, r4, #TTB_FLAGS + ALT_SMP(orr r4, r4, #TTB_FLAGS_SMP) + ALT_UP(orr r4, r4, #TTB_FLAGS_UP) mcr p15, 0, r4, c2, c0, 1 @ load TTB1 #endif /* CONFIG_MMU */ adr r5, v6_crval @@ -225,10 +226,16 @@ cpu_elf_name: __v6_proc_info: .long 0x0007b000 .long 0x0007f000 - .long PMD_TYPE_SECT | \ + ALT_SMP(.long \ + PMD_TYPE_SECT | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ | \ + PMD_FLAGS_SMP) + ALT_UP(.long \ + PMD_TYPE_SECT | \ PMD_SECT_AP_WRITE | \ PMD_SECT_AP_READ | \ - PMD_FLAGS + PMD_FLAGS_UP) .long PMD_TYPE_SECT | \ PMD_SECT_XN | \ PMD_SECT_AP_WRITE | \ @@ -249,10 +256,16 @@ __v6_proc_info: __pj4_v6_proc_info: .long 0x560f5810 .long 0xff0ffff0 - .long PMD_TYPE_SECT | \ + ALT_SMP(.long \ + PMD_TYPE_SECT | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ | \ + PMD_FLAGS_SMP) + ALT_UP(.long \ + PMD_TYPE_SECT | \ PMD_SECT_AP_WRITE | \ PMD_SECT_AP_READ | \ - PMD_FLAGS + PMD_FLAGS_UP) .long PMD_TYPE_SECT | \ PMD_SECT_XN | \ PMD_SECT_AP_WRITE | \ diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index 7563ff0141bd..df422fee1cb6 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -30,15 +30,13 @@ #define TTB_IRGN_WT ((1 << 0) | (0 << 6)) #define TTB_IRGN_WB ((1 << 0) | (1 << 6)) -#ifndef CONFIG_SMP /* PTWs cacheable, inner WB not shareable, outer WB not shareable */ -#define TTB_FLAGS TTB_IRGN_WB|TTB_RGN_OC_WB -#define PMD_FLAGS PMD_SECT_WB -#else +#define TTB_FLAGS_UP TTB_IRGN_WB|TTB_RGN_OC_WB +#define PMD_FLAGS_UP PMD_SECT_WB + /* PTWs cacheable, inner WBWA shareable, outer WBWA not shareable */ -#define TTB_FLAGS TTB_IRGN_WBWA|TTB_S|TTB_NOS|TTB_RGN_OC_WBWA -#define PMD_FLAGS PMD_SECT_WBWA|PMD_SECT_S -#endif +#define TTB_FLAGS_SMP TTB_IRGN_WBWA|TTB_S|TTB_NOS|TTB_RGN_OC_WBWA +#define PMD_FLAGS_SMP PMD_SECT_WBWA|PMD_SECT_S ENTRY(cpu_v7_proc_init) mov pc, lr @@ -105,7 +103,8 @@ ENTRY(cpu_v7_switch_mm) #ifdef CONFIG_MMU mov r2, #0 ldr r1, [r1, #MM_CONTEXT_ID] @ get mm->context.id - orr r0, r0, #TTB_FLAGS + ALT_SMP(orr r0, r0, #TTB_FLAGS_SMP) + ALT_UP(orr r0, r0, #TTB_FLAGS_UP) #ifdef CONFIG_ARM_ERRATA_430973 mcr p15, 0, r2, c7, c5, 6 @ flush BTAC/BTB #endif @@ -188,7 +187,8 @@ cpu_v7_name: */ __v7_ca9mp_setup: #ifdef CONFIG_SMP - mrc p15, 0, r0, c1, c0, 1 + ALT_SMP(mrc p15, 0, r0, c1, c0, 1) + ALT_UP(mov r0, #(1 << 6)) @ fake it for UP tst r0, #(1 << 6) @ SMP/nAMP mode enabled? orreq r0, r0, #(1 << 6) | (1 << 0) @ Enable SMP/nAMP mode and mcreq p15, 0, r0, c1, c0, 1 @ TLB ops broadcasting @@ -262,7 +262,8 @@ __v7_setup: #ifdef CONFIG_MMU mcr p15, 0, r10, c8, c7, 0 @ invalidate I + D TLBs mcr p15, 0, r10, c2, c0, 2 @ TTB control register - orr r4, r4, #TTB_FLAGS + ALT_SMP(orr r4, r4, #TTB_FLAGS_SMP) + ALT_UP(orr r4, r4, #TTB_FLAGS_UP) mcr p15, 0, r4, c2, c0, 1 @ load TTB1 mov r10, #0x1f @ domains 0, 1 = manager mcr p15, 0, r10, c3, c0, 0 @ load domain access register @@ -354,10 +355,16 @@ cpu_elf_name: __v7_ca9mp_proc_info: .long 0x410fc090 @ Required ID value .long 0xff0ffff0 @ Mask for ID - .long PMD_TYPE_SECT | \ + ALT_SMP(.long \ + PMD_TYPE_SECT | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ | \ + PMD_FLAGS_SMP) + ALT_UP(.long \ + PMD_TYPE_SECT | \ PMD_SECT_AP_WRITE | \ PMD_SECT_AP_READ | \ - PMD_FLAGS + PMD_FLAGS_UP) .long PMD_TYPE_SECT | \ PMD_SECT_XN | \ PMD_SECT_AP_WRITE | \ @@ -380,10 +387,16 @@ __v7_ca9mp_proc_info: __v7_proc_info: .long 0x000f0000 @ Required ID value .long 0x000f0000 @ Mask for ID - .long PMD_TYPE_SECT | \ + ALT_SMP(.long \ + PMD_TYPE_SECT | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ | \ + PMD_FLAGS_SMP) + ALT_UP(.long \ + PMD_TYPE_SECT | \ PMD_SECT_AP_WRITE | \ PMD_SECT_AP_READ | \ - PMD_FLAGS + PMD_FLAGS_UP) .long PMD_TYPE_SECT | \ PMD_SECT_XN | \ PMD_SECT_AP_WRITE | \ diff --git a/arch/arm/mm/tlb-v7.S b/arch/arm/mm/tlb-v7.S index f3f288a9546d..53cd5b454673 100644 --- a/arch/arm/mm/tlb-v7.S +++ b/arch/arm/mm/tlb-v7.S @@ -13,6 +13,7 @@ */ #include #include +#include #include #include #include @@ -41,20 +42,15 @@ ENTRY(v7wbi_flush_user_tlb_range) orr r0, r3, r0, lsl #PAGE_SHIFT @ Create initial MVA mov r1, r1, lsl #PAGE_SHIFT 1: -#ifdef CONFIG_SMP - mcr p15, 0, r0, c8, c3, 1 @ TLB invalidate U MVA (shareable) -#else - mcr p15, 0, r0, c8, c7, 1 @ TLB invalidate U MVA -#endif + ALT_SMP(mcr p15, 0, r0, c8, c3, 1) @ TLB invalidate U MVA (shareable) + ALT_UP(mcr p15, 0, r0, c8, c7, 1) @ TLB invalidate U MVA + add r0, r0, #PAGE_SZ cmp r0, r1 blo 1b mov ip, #0 -#ifdef CONFIG_SMP - mcr p15, 0, ip, c7, c1, 6 @ flush BTAC/BTB Inner Shareable -#else - mcr p15, 0, ip, c7, c5, 6 @ flush BTAC/BTB -#endif + ALT_SMP(mcr p15, 0, ip, c7, c1, 6) @ flush BTAC/BTB Inner Shareable + ALT_UP(mcr p15, 0, ip, c7, c5, 6) @ flush BTAC/BTB dsb mov pc, lr ENDPROC(v7wbi_flush_user_tlb_range) @@ -74,20 +70,14 @@ ENTRY(v7wbi_flush_kern_tlb_range) mov r0, r0, lsl #PAGE_SHIFT mov r1, r1, lsl #PAGE_SHIFT 1: -#ifdef CONFIG_SMP - mcr p15, 0, r0, c8, c3, 1 @ TLB invalidate U MVA (shareable) -#else - mcr p15, 0, r0, c8, c7, 1 @ TLB invalidate U MVA -#endif + ALT_SMP(mcr p15, 0, r0, c8, c3, 1) @ TLB invalidate U MVA (shareable) + ALT_UP(mcr p15, 0, r0, c8, c7, 1) @ TLB invalidate U MVA add r0, r0, #PAGE_SZ cmp r0, r1 blo 1b mov r2, #0 -#ifdef CONFIG_SMP - mcr p15, 0, r2, c7, c1, 6 @ flush BTAC/BTB Inner Shareable -#else - mcr p15, 0, r2, c7, c5, 6 @ flush BTAC/BTB -#endif + ALT_SMP(mcr p15, 0, r2, c7, c1, 6) @ flush BTAC/BTB Inner Shareable + ALT_UP(mcr p15, 0, r2, c7, c5, 6) @ flush BTAC/BTB dsb isb mov pc, lr @@ -99,5 +89,6 @@ ENDPROC(v7wbi_flush_kern_tlb_range) ENTRY(v7wbi_tlb_fns) .long v7wbi_flush_user_tlb_range .long v7wbi_flush_kern_tlb_range - .long v7wbi_tlb_flags + ALT_SMP(.long v7wbi_tlb_flags_smp) + ALT_UP(.long v7wbi_tlb_flags_up) .size v7wbi_tlb_fns, . - v7wbi_tlb_fns -- cgit v1.2.3 From 81d11955bf0b5ae25e3adbec930cd84840385dae Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Tue, 21 Sep 2010 17:16:40 +0100 Subject: ARM: 6405/1: Handle __flush_icache_all for CONFIG_SMP_ON_UP Do this by adding flush_icache_all to cache_fns for ARMv6 and 7. As flush_icache_all may neeed to be called from flush_kern_cache_all, add it as the first entry in the cache_fns. Note that now we can remove the ARM_ERRATA_411920 dependency to !SMP so it can be selected on UP ARMv6 processors, such as omap2. Signed-off-by: Tony Lindgren Signed-off-by: Anand Gadiyar Signed-off-by: Russell King --- arch/arm/Kconfig | 2 +- arch/arm/include/asm/cacheflush.h | 56 ++++++++++++++++++++++++++++----------- arch/arm/mm/cache-v6.S | 30 +++++++++++++-------- arch/arm/mm/cache-v7.S | 16 +++++++++++ 4 files changed, 77 insertions(+), 27 deletions(-) (limited to 'arch/arm/include/asm') diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index f6cdc21b562c..7562f884f2c5 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1002,7 +1002,7 @@ endif config ARM_ERRATA_411920 bool "ARM errata: Invalidation of the Instruction Cache operation can fail" - depends on CPU_V6 && !SMP + depends on CPU_V6 help Invalidation of the Instruction Cache operation can fail. This erratum is present in 1136 (before r1p4), 1156 and 1176. diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h index 4656a24058d2..a3db768ee5cc 100644 --- a/arch/arm/include/asm/cacheflush.h +++ b/arch/arm/include/asm/cacheflush.h @@ -156,6 +156,12 @@ * Please note that the implementation of these, and the required * effects are cache-type (VIVT/VIPT/PIPT) specific. * + * flush_icache_all() + * + * Unconditionally clean and invalidate the entire icache. + * Currently only needed for cache-v6.S and cache-v7.S, see + * __flush_icache_all for the generic implementation. + * * flush_kern_all() * * Unconditionally clean and invalidate the entire cache. @@ -206,6 +212,7 @@ */ struct cpu_cache_fns { + void (*flush_icache_all)(void); void (*flush_kern_all)(void); void (*flush_user_all)(void); void (*flush_user_range)(unsigned long, unsigned long, unsigned int); @@ -227,6 +234,7 @@ struct cpu_cache_fns { extern struct cpu_cache_fns cpu_cache; +#define __cpuc_flush_icache_all cpu_cache.flush_icache_all #define __cpuc_flush_kern_all cpu_cache.flush_kern_all #define __cpuc_flush_user_all cpu_cache.flush_user_all #define __cpuc_flush_user_range cpu_cache.flush_user_range @@ -246,6 +254,7 @@ extern struct cpu_cache_fns cpu_cache; #else +#define __cpuc_flush_icache_all __glue(_CACHE,_flush_icache_all) #define __cpuc_flush_kern_all __glue(_CACHE,_flush_kern_cache_all) #define __cpuc_flush_user_all __glue(_CACHE,_flush_user_cache_all) #define __cpuc_flush_user_range __glue(_CACHE,_flush_user_cache_range) @@ -253,6 +262,7 @@ extern struct cpu_cache_fns cpu_cache; #define __cpuc_coherent_user_range __glue(_CACHE,_coherent_user_range) #define __cpuc_flush_dcache_area __glue(_CACHE,_flush_kern_dcache_area) +extern void __cpuc_flush_icache_all(void); extern void __cpuc_flush_kern_all(void); extern void __cpuc_flush_user_all(void); extern void __cpuc_flush_user_range(unsigned long, unsigned long, unsigned int); @@ -291,6 +301,37 @@ extern void copy_to_user_page(struct vm_area_struct *, struct page *, /* * Convert calls to our calling convention. */ + +/* Invalidate I-cache */ +#define __flush_icache_all_generic() \ + asm("mcr p15, 0, %0, c7, c5, 0" \ + : : "r" (0)); + +/* Invalidate I-cache inner shareable */ +#define __flush_icache_all_v7_smp() \ + asm("mcr p15, 0, %0, c7, c1, 0" \ + : : "r" (0)); + +/* + * Optimized __flush_icache_all for the common cases. Note that UP ARMv7 + * will fall through to use __flush_icache_all_generic. + */ +#if (defined(CONFIG_CPU_V7) && defined(CONFIG_CPU_V6)) || \ + defined(CONFIG_SMP_ON_UP) +#define __flush_icache_preferred __cpuc_flush_icache_all +#elif __LINUX_ARM_ARCH__ >= 7 && defined(CONFIG_SMP) +#define __flush_icache_preferred __flush_icache_all_v7_smp +#elif __LINUX_ARM_ARCH__ == 6 && defined(CONFIG_ARM_ERRATA_411920) +#define __flush_icache_preferred __cpuc_flush_icache_all +#else +#define __flush_icache_preferred __flush_icache_all_generic +#endif + +static inline void __flush_icache_all(void) +{ + __flush_icache_preferred(); +} + #define flush_cache_all() __cpuc_flush_kern_all() static inline void vivt_flush_cache_mm(struct mm_struct *mm) @@ -366,21 +407,6 @@ extern void flush_cache_page(struct vm_area_struct *vma, unsigned long user_addr #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 extern void flush_dcache_page(struct page *); -static inline void __flush_icache_all(void) -{ -#ifdef CONFIG_ARM_ERRATA_411920 - extern void v6_icache_inval_all(void); - v6_icache_inval_all(); -#elif defined(CONFIG_SMP) && __LINUX_ARM_ARCH__ >= 7 - asm("mcr p15, 0, %0, c7, c1, 0 @ invalidate I-cache inner shareable\n" - : - : "r" (0)); -#else - asm("mcr p15, 0, %0, c7, c5, 0 @ invalidate I-cache\n" - : - : "r" (0)); -#endif -} static inline void flush_kernel_vmap_range(void *addr, int size) { if ((cache_is_vivt() || cache_is_vipt_aliasing())) diff --git a/arch/arm/mm/cache-v6.S b/arch/arm/mm/cache-v6.S index 86aa689ef1aa..99fa688dfadd 100644 --- a/arch/arm/mm/cache-v6.S +++ b/arch/arm/mm/cache-v6.S @@ -21,18 +21,22 @@ #define D_CACHE_LINE_SIZE 32 #define BTB_FLUSH_SIZE 8 -#ifdef CONFIG_ARM_ERRATA_411920 /* - * Invalidate the entire I cache (this code is a workaround for the ARM1136 - * erratum 411920 - Invalidate Instruction Cache operation can fail. This - * erratum is present in 1136, 1156 and 1176. It does not affect the MPCore. + * v6_flush_icache_all() + * + * Flush the whole I-cache. * - * Registers: - * r0 - set to 0 - * r1 - corrupted + * ARM1136 erratum 411920 - Invalidate Instruction Cache operation can fail. + * This erratum is present in 1136, 1156 and 1176. It does not affect the + * MPCore. + * + * Registers: + * r0 - set to 0 + * r1 - corrupted */ -ENTRY(v6_icache_inval_all) +ENTRY(v6_flush_icache_all) mov r0, #0 +#ifdef CONFIG_ARM_ERRATA_411920 mrs r1, cpsr cpsid ifa @ disable interrupts mcr p15, 0, r0, c7, c5, 0 @ invalidate entire I-cache @@ -43,8 +47,11 @@ ENTRY(v6_icache_inval_all) .rept 11 @ ARM Ltd recommends at least nop @ 11 NOPs .endr - mov pc, lr +#else + mcr p15, 0, r0, c7, c5, 0 @ invalidate I-cache #endif + mov pc, lr +ENDPROC(v6_flush_icache_all) /* * v6_flush_cache_all() @@ -60,7 +67,7 @@ ENTRY(v6_flush_kern_cache_all) #ifndef CONFIG_ARM_ERRATA_411920 mcr p15, 0, r0, c7, c5, 0 @ I+BTB cache invalidate #else - b v6_icache_inval_all + b v6_flush_icache_all #endif #else mcr p15, 0, r0, c7, c15, 0 @ Cache clean+invalidate @@ -138,7 +145,7 @@ ENTRY(v6_coherent_user_range) #ifndef CONFIG_ARM_ERRATA_411920 mcr p15, 0, r0, c7, c5, 0 @ I+BTB cache invalidate #else - b v6_icache_inval_all + b v6_flush_icache_all #endif #else mcr p15, 0, r0, c7, c5, 6 @ invalidate BTB @@ -312,6 +319,7 @@ ENDPROC(v6_dma_unmap_area) .type v6_cache_fns, #object ENTRY(v6_cache_fns) + .long v6_flush_icache_all .long v6_flush_kern_cache_all .long v6_flush_user_cache_all .long v6_flush_user_cache_range diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S index e8ea1a071f43..a3ebf7a4f49b 100644 --- a/arch/arm/mm/cache-v7.S +++ b/arch/arm/mm/cache-v7.S @@ -17,6 +17,21 @@ #include "proc-macros.S" +/* + * v7_flush_icache_all() + * + * Flush the whole I-cache. + * + * Registers: + * r0 - set to 0 + */ +ENTRY(v7_flush_icache_all) + mov r0, #0 + ALT_SMP(mcr p15, 0, r0, c7, c1, 0) @ invalidate I-cache inner shareable + ALT_UP(mcr p15, 0, r0, c7, c5, 0) @ I+BTB cache invalidate + mov pc, lr +ENDPROC(v7_flush_icache_all) + /* * v7_flush_dcache_all() * @@ -303,6 +318,7 @@ ENDPROC(v7_dma_unmap_area) .type v7_cache_fns, #object ENTRY(v7_cache_fns) + .long v7_flush_icache_all .long v7_flush_kern_cache_all .long v7_flush_user_cache_all .long v7_flush_user_cache_range -- cgit v1.2.3 From 8925ec4c530094b878e7e28a1fd78e7122afd973 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 13 Sep 2010 16:18:30 +0100 Subject: ARM: 6385/1: setup: detect aliasing I-cache when D-cache is non-aliasing Currently, the Kernel assumes that if a CPU has a non-aliasing D-cache then the I-cache is also non-aliasing. This may not be true on ARM cores from v6 onwards, which may have aliasing I-caches but non-aliasing D-caches. This patch adds a cpu_has_aliasing_icache function, which is called from cacheid_init and adds CACHEID_VIPT_I_ALIASING to the cacheid when appropriate. A utility macro, icache_is_vipt_aliasing(), is also provided. Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/include/asm/cachetype.h | 8 ++++++-- arch/arm/kernel/setup.c | 39 ++++++++++++++++++++++++++++++++++++--- 2 files changed, 42 insertions(+), 5 deletions(-) (limited to 'arch/arm/include/asm') diff --git a/arch/arm/include/asm/cachetype.h b/arch/arm/include/asm/cachetype.h index d3a4c2cb9f2f..c023db09fcc1 100644 --- a/arch/arm/include/asm/cachetype.h +++ b/arch/arm/include/asm/cachetype.h @@ -6,6 +6,7 @@ #define CACHEID_VIPT_ALIASING (1 << 2) #define CACHEID_VIPT (CACHEID_VIPT_ALIASING|CACHEID_VIPT_NONALIASING) #define CACHEID_ASID_TAGGED (1 << 3) +#define CACHEID_VIPT_I_ALIASING (1 << 4) extern unsigned int cacheid; @@ -14,15 +15,18 @@ extern unsigned int cacheid; #define cache_is_vipt_nonaliasing() cacheid_is(CACHEID_VIPT_NONALIASING) #define cache_is_vipt_aliasing() cacheid_is(CACHEID_VIPT_ALIASING) #define icache_is_vivt_asid_tagged() cacheid_is(CACHEID_ASID_TAGGED) +#define icache_is_vipt_aliasing() cacheid_is(CACHEID_VIPT_I_ALIASING) /* * __LINUX_ARM_ARCH__ is the minimum supported CPU architecture * Mask out support which will never be present on newer CPUs. * - v6+ is never VIVT - * - v7+ VIPT never aliases + * - v7+ VIPT never aliases on D-side */ #if __LINUX_ARM_ARCH__ >= 7 -#define __CACHEID_ARCH_MIN (CACHEID_VIPT_NONALIASING | CACHEID_ASID_TAGGED) +#define __CACHEID_ARCH_MIN (CACHEID_VIPT_NONALIASING |\ + CACHEID_ASID_TAGGED |\ + CACHEID_VIPT_I_ALIASING) #elif __LINUX_ARM_ARCH__ >= 6 #define __CACHEID_ARCH_MIN (~CACHEID_VIVT) #else diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index d5231ae7355a..9fc483393bae 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -238,6 +238,34 @@ int cpu_architecture(void) return cpu_arch; } +static int cpu_has_aliasing_icache(unsigned int arch) +{ + int aliasing_icache; + unsigned int id_reg, num_sets, line_size; + + /* arch specifies the register format */ + switch (arch) { + case CPU_ARCH_ARMv7: + asm("mcr p15, 2, %1, c0, c0, 0 @ set CSSELR\n" + "isb\n" + "mrc p15, 1, %0, c0, c0, 0 @ read CCSIDR" + : "=r" (id_reg) + : "r" (1)); + line_size = 4 << ((id_reg & 0x7) + 2); + num_sets = ((id_reg >> 13) & 0x7fff) + 1; + aliasing_icache = (line_size * num_sets) > PAGE_SIZE; + break; + case CPU_ARCH_ARMv6: + aliasing_icache = read_cpuid_cachetype() & (1 << 11); + break; + default: + /* I-cache aliases will be handled by D-cache aliasing code */ + aliasing_icache = 0; + } + + return aliasing_icache; +} + static void __init cacheid_init(void) { unsigned int cachetype = read_cpuid_cachetype(); @@ -249,10 +277,15 @@ static void __init cacheid_init(void) cacheid = CACHEID_VIPT_NONALIASING; if ((cachetype & (3 << 14)) == 1 << 14) cacheid |= CACHEID_ASID_TAGGED; - } else if (cachetype & (1 << 23)) + else if (cpu_has_aliasing_icache(CPU_ARCH_ARMv7)) + cacheid |= CACHEID_VIPT_I_ALIASING; + } else if (cachetype & (1 << 23)) { cacheid = CACHEID_VIPT_ALIASING; - else + } else { cacheid = CACHEID_VIPT_NONALIASING; + if (cpu_has_aliasing_icache(CPU_ARCH_ARMv6)) + cacheid |= CACHEID_VIPT_I_ALIASING; + } } else { cacheid = CACHEID_VIVT; } @@ -263,7 +296,7 @@ static void __init cacheid_init(void) cache_is_vipt_nonaliasing() ? "VIPT nonaliasing" : "unknown", cache_is_vivt() ? "VIVT" : icache_is_vivt_asid_tagged() ? "VIVT ASID tagged" : - cache_is_vipt_aliasing() ? "VIPT aliasing" : + icache_is_vipt_aliasing() ? "VIPT aliasing" : cache_is_vipt_nonaliasing() ? "VIPT nonaliasing" : "unknown"); } -- cgit v1.2.3 From 7511db9d25080d53e5427bf4b8849278c07019bc Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Tue, 5 Oct 2010 16:40:13 +0100 Subject: ARM: 6429/1: Check for is_smp for tlb_ops and cache_ops broadcast Broadcast should not be needed when running SMP kernel on UP systems. Also, this fixes an undefined instruction for SMP_ON_UP on earlier ARM cores without the extended CPUID_EXT_MMFR3 register. Signed-off-by: Tony Lindgren Signed-off-by: Russell King --- arch/arm/include/asm/smp_plat.h | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) (limited to 'arch/arm/include/asm') diff --git a/arch/arm/include/asm/smp_plat.h b/arch/arm/include/asm/smp_plat.h index 7de5aa56c18b..7f4e6633f753 100644 --- a/arch/arm/include/asm/smp_plat.h +++ b/arch/arm/include/asm/smp_plat.h @@ -7,17 +7,6 @@ #include -/* all SMP configurations have the extended CPUID registers */ -static inline int tlb_ops_need_broadcast(void) -{ - return ((read_cpuid_ext(CPUID_EXT_MMFR3) >> 12) & 0xf) < 2; -} - -static inline int cache_ops_need_broadcast(void) -{ - return ((read_cpuid_ext(CPUID_EXT_MMFR3) >> 12) & 0xf) < 1; -} - /* * Return true if we are running on a SMP platform */ @@ -33,4 +22,21 @@ static inline bool is_smp(void) #endif } +/* all SMP configurations have the extended CPUID registers */ +static inline int tlb_ops_need_broadcast(void) +{ + if (!is_smp()) + return 0; + + return ((read_cpuid_ext(CPUID_EXT_MMFR3) >> 12) & 0xf) < 2; +} + +static inline int cache_ops_need_broadcast(void) +{ + if (!is_smp()) + return 0; + + return ((read_cpuid_ext(CPUID_EXT_MMFR3) >> 12) & 0xf) < 1; +} + #endif -- cgit v1.2.3 From c7b0aff44a0740eedd31b759fd08d9e25672fa76 Mon Sep 17 00:00:00 2001 From: Kevin Hilman Date: Fri, 1 Oct 2010 22:13:47 +0100 Subject: ARM: 6428/1: add cpu_idle_wait() to support CPUidle on SMP systems. In order for CPUidle to work on SMP systems, an implementation of cpu_idle_wait() is needed. This patch duplicates the x86 implementation of cpu_idle_wait() for ARM. Tested-by: Colin Cross Signed-off-by: Kevin Hilman Signed-off-by: Russell King --- arch/arm/Kconfig | 3 +++ arch/arm/include/asm/system.h | 2 ++ arch/arm/kernel/process.c | 19 +++++++++++++++++++ 3 files changed, 24 insertions(+) (limited to 'arch/arm/include/asm') diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 7562f884f2c5..94360e7a7abc 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -145,6 +145,9 @@ config ARCH_HAS_CPUFREQ and that the relevant menu configurations are displayed for it. +config ARCH_HAS_CPU_IDLE_WAIT + def_bool y + config GENERIC_HWEIGHT bool default y diff --git a/arch/arm/include/asm/system.h b/arch/arm/include/asm/system.h index 8ba1ccf82a02..48e360c74327 100644 --- a/arch/arm/include/asm/system.h +++ b/arch/arm/include/asm/system.h @@ -325,6 +325,8 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size extern void disable_hlt(void); extern void enable_hlt(void); +void cpu_idle_wait(void); + #include #if __LINUX_ARM_ARCH__ < 6 diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index 401e38be1f78..23def52d2d24 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -135,6 +135,25 @@ EXPORT_SYMBOL(pm_power_off); void (*arm_pm_restart)(char str, const char *cmd) = arm_machine_restart; EXPORT_SYMBOL_GPL(arm_pm_restart); +static void do_nothing(void *unused) +{ +} + +/* + * cpu_idle_wait - Used to ensure that all the CPUs discard old value of + * pm_idle and update to new pm_idle value. Required while changing pm_idle + * handler on SMP systems. + * + * Caller must have changed pm_idle to the new value before the call. Old + * pm_idle value will not be used by any CPU after the return of this function. + */ +void cpu_idle_wait(void) +{ + smp_mb(); + /* kick all the CPUs so that they exit out of pm_idle */ + smp_call_function(do_nothing, NULL, 1); +} +EXPORT_SYMBOL_GPL(cpu_idle_wait); /* * This is our default idle handler. We need to disable -- cgit v1.2.3