From 1771c6e1a567ea0ba2cccc0a4ffe68a1419fd8ef Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Fri, 20 May 2016 16:59:31 -0700 Subject: x86/kasan: instrument user memory access API Exchange between user and kernel memory is coded in assembly language. Which means that such accesses won't be spotted by KASAN as a compiler instruments only C code. Add explicit KASAN checks to user memory access API to ensure that userspace writes to (or reads from) a valid kernel memory. Note: Unlike others strncpy_from_user() is written mostly in C and KASAN sees memory accesses in it. However, it makes sense to add explicit check for all @count bytes that *potentially* could be written to the kernel. [aryabinin@virtuozzo.com: move kasan check under the condition] Link: http://lkml.kernel.org/r/1462869209-21096-1-git-send-email-aryabinin@virtuozzo.com Link: http://lkml.kernel.org/r/1462538722-1574-4-git-send-email-aryabinin@virtuozzo.com Signed-off-by: Andrey Ryabinin Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/include/asm/uaccess.h | 5 +++++ arch/x86/include/asm/uaccess_64.h | 7 +++++++ 2 files changed, 12 insertions(+) (limited to 'arch') diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 12f9653bde8d..2982387ba817 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -5,6 +5,7 @@ */ #include #include +#include #include #include #include @@ -721,6 +722,8 @@ copy_from_user(void *to, const void __user *from, unsigned long n) might_fault(); + kasan_check_write(to, n); + /* * While we would like to have the compiler do the checking for us * even in the non-constant size case, any false positives there are @@ -754,6 +757,8 @@ copy_to_user(void __user *to, const void *from, unsigned long n) { int sz = __compiletime_object_size(from); + kasan_check_read(from, n); + might_fault(); /* See the comment in copy_from_user() above. */ diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index 307698688fa1..2eac2aa3e37f 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -109,6 +110,7 @@ static __always_inline __must_check int __copy_from_user(void *dst, const void __user *src, unsigned size) { might_fault(); + kasan_check_write(dst, size); return __copy_from_user_nocheck(dst, src, size); } @@ -175,6 +177,7 @@ static __always_inline __must_check int __copy_to_user(void __user *dst, const void *src, unsigned size) { might_fault(); + kasan_check_read(src, size); return __copy_to_user_nocheck(dst, src, size); } @@ -242,12 +245,14 @@ int __copy_in_user(void __user *dst, const void __user *src, unsigned size) static __must_check __always_inline int __copy_from_user_inatomic(void *dst, const void __user *src, unsigned size) { + kasan_check_write(dst, size); return __copy_from_user_nocheck(dst, src, size); } static __must_check __always_inline int __copy_to_user_inatomic(void __user *dst, const void *src, unsigned size) { + kasan_check_read(src, size); return __copy_to_user_nocheck(dst, src, size); } @@ -258,6 +263,7 @@ static inline int __copy_from_user_nocache(void *dst, const void __user *src, unsigned size) { might_fault(); + kasan_check_write(dst, size); return __copy_user_nocache(dst, src, size, 1); } @@ -265,6 +271,7 @@ static inline int __copy_from_user_inatomic_nocache(void *dst, const void __user *src, unsigned size) { + kasan_check_write(dst, size); return __copy_user_nocache(dst, src, size, 0); } -- cgit v1.2.3 From 2ec656eb4054ce55e6d453b8614ef9669e84c542 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Fri, 20 May 2016 17:00:11 -0700 Subject: mn10300: let exit_fpu accept a task We need to call exit_thread from copy_process in a fail path. Since exit_thread on mn10300 calls exit_thread_runtime_instr, make it accept task_struct as a parameter now. Signed-off-by: Jiri Slaby Cc: "David S. Miller" Cc: "H. Peter Anvin" Cc: "James E.J. Bottomley" Cc: Aurelien Jacquiot Cc: Benjamin Herrenschmidt Cc: Catalin Marinas Cc: Chen Liqin Cc: Chris Metcalf Cc: Chris Zankel Cc: David Howells Cc: Fenghua Yu Cc: Geert Uytterhoeven Cc: Guan Xuetao Cc: Haavard Skinnemoen Cc: Hans-Christian Egtvedt Cc: Heiko Carstens Cc: Helge Deller Cc: Ingo Molnar Cc: Ivan Kokshaysky Cc: James Hogan Cc: Jeff Dike Cc: Jesper Nilsson Cc: Jiri Slaby Cc: Jonas Bonn Cc: Koichi Yasutake Cc: Lennox Wu Cc: Ley Foon Tan Cc: Mark Salter Cc: Martin Schwidefsky Cc: Matt Turner Cc: Max Filippov Cc: Michael Ellerman Cc: Michal Simek Cc: Mikael Starvik Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Ralf Baechle Cc: Rich Felker Cc: Richard Henderson Cc: Richard Kuo Cc: Richard Weinberger Cc: Russell King Cc: Steven Miao Cc: Thomas Gleixner Cc: Tony Luck Cc: Vineet Gupta Cc: Will Deacon Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/mn10300/include/asm/fpu.h | 6 ++---- arch/mn10300/kernel/process.c | 2 +- 2 files changed, 3 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/mn10300/include/asm/fpu.h b/arch/mn10300/include/asm/fpu.h index 738ff72659d5..a47e995d45f3 100644 --- a/arch/mn10300/include/asm/fpu.h +++ b/arch/mn10300/include/asm/fpu.h @@ -76,11 +76,9 @@ static inline void unlazy_fpu(struct task_struct *tsk) preempt_enable(); } -static inline void exit_fpu(void) +static inline void exit_fpu(struct task_struct *tsk) { #ifdef CONFIG_LAZY_SAVE_FPU - struct task_struct *tsk = current; - preempt_disable(); if (fpu_state_owner == tsk) fpu_state_owner = NULL; @@ -123,7 +121,7 @@ static inline void fpu_init_state(void) {} static inline void fpu_save(struct fpu_state_struct *s) {} static inline void fpu_kill_state(struct task_struct *tsk) {} static inline void unlazy_fpu(struct task_struct *tsk) {} -static inline void exit_fpu(void) {} +static inline void exit_fpu(struct task_struct *tsk) {} static inline void flush_fpu(void) {} static inline int fpu_setup_sigcontext(struct fpucontext *buf) { return 0; } static inline int fpu_restore_sigcontext(struct fpucontext *buf) { return 0; } diff --git a/arch/mn10300/kernel/process.c b/arch/mn10300/kernel/process.c index 3707da583d05..74a96ccf7451 100644 --- a/arch/mn10300/kernel/process.c +++ b/arch/mn10300/kernel/process.c @@ -105,7 +105,7 @@ void show_regs(struct pt_regs *regs) */ void exit_thread(void) { - exit_fpu(); + exit_fpu(current); } void flush_thread(void) -- cgit v1.2.3 From 5f56a5dfdb9bcb3bca03df59980d4d2f012cbb53 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Fri, 20 May 2016 17:00:16 -0700 Subject: exit_thread: remove empty bodies Define HAVE_EXIT_THREAD for archs which want to do something in exit_thread. For others, let's define exit_thread as an empty inline. This is a cleanup before we change the prototype of exit_thread to accept a task parameter. [akpm@linux-foundation.org: fix mips] Signed-off-by: Jiri Slaby Cc: "David S. Miller" Cc: "H. Peter Anvin" Cc: "James E.J. Bottomley" Cc: Aurelien Jacquiot Cc: Benjamin Herrenschmidt Cc: Catalin Marinas Cc: Chen Liqin Cc: Chris Metcalf Cc: Chris Zankel Cc: David Howells Cc: Fenghua Yu Cc: Geert Uytterhoeven Cc: Guan Xuetao Cc: Haavard Skinnemoen Cc: Hans-Christian Egtvedt Cc: Heiko Carstens Cc: Helge Deller Cc: Ingo Molnar Cc: Ivan Kokshaysky Cc: James Hogan Cc: Jeff Dike Cc: Jesper Nilsson Cc: Jiri Slaby Cc: Jonas Bonn Cc: Koichi Yasutake Cc: Lennox Wu Cc: Ley Foon Tan Cc: Mark Salter Cc: Martin Schwidefsky Cc: Matt Turner Cc: Max Filippov Cc: Michael Ellerman Cc: Michal Simek Cc: Mikael Starvik Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Ralf Baechle Cc: Rich Felker Cc: Richard Henderson Cc: Richard Kuo Cc: Richard Weinberger Cc: Russell King Cc: Steven Miao Cc: Thomas Gleixner Cc: Tony Luck Cc: Vineet Gupta Cc: Will Deacon Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/Kconfig | 5 +++++ arch/alpha/kernel/process.c | 8 -------- arch/arc/kernel/process.c | 7 ------- arch/arm/Kconfig | 1 + arch/arm64/kernel/process.c | 7 ------- arch/avr32/Kconfig | 1 + arch/blackfin/include/asm/processor.h | 7 ------- arch/c6x/kernel/process.c | 4 ---- arch/cris/Kconfig | 1 + arch/cris/arch-v10/kernel/process.c | 9 --------- arch/frv/include/asm/processor.h | 7 ------- arch/h8300/include/asm/processor.h | 7 ------- arch/hexagon/kernel/process.c | 7 ------- arch/ia64/Kconfig | 1 + arch/m32r/kernel/process.c | 9 --------- arch/m68k/include/asm/processor.h | 7 ------- arch/metag/Kconfig | 1 + arch/metag/include/asm/processor.h | 2 -- arch/microblaze/include/asm/processor.h | 10 ---------- arch/mips/kernel/process.c | 4 ---- arch/mn10300/Kconfig | 1 + arch/nios2/include/asm/processor.h | 5 ----- arch/openrisc/include/asm/processor.h | 9 --------- arch/parisc/kernel/process.c | 7 ------- arch/powerpc/kernel/process.c | 4 ---- arch/s390/Kconfig | 1 + arch/score/kernel/process.c | 2 -- arch/sh/Kconfig | 1 + arch/sh/kernel/process_32.c | 7 ------- arch/sparc/Kconfig | 1 + arch/tile/Kconfig | 1 + arch/um/kernel/process.c | 4 ---- arch/unicore32/kernel/process.c | 7 ------- arch/x86/Kconfig | 1 + arch/xtensa/Kconfig | 1 + include/linux/sched.h | 7 +++++++ 36 files changed, 24 insertions(+), 140 deletions(-) (limited to 'arch') diff --git a/arch/Kconfig b/arch/Kconfig index 81869a5e7e17..0f298f9123dc 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -517,6 +517,11 @@ config HAVE_ARCH_MMAP_RND_BITS - ARCH_MMAP_RND_BITS_MIN - ARCH_MMAP_RND_BITS_MAX +config HAVE_EXIT_THREAD + bool + help + An architecture implements exit_thread. + config ARCH_MMAP_RND_BITS_MIN int diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c index 84d13263ce46..b483156698d5 100644 --- a/arch/alpha/kernel/process.c +++ b/arch/alpha/kernel/process.c @@ -210,14 +210,6 @@ start_thread(struct pt_regs * regs, unsigned long pc, unsigned long sp) } EXPORT_SYMBOL(start_thread); -/* - * Free current thread data structures etc.. - */ -void -exit_thread(void) -{ -} - void flush_thread(void) { diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c index a3f750e76b68..b5db9e7fd649 100644 --- a/arch/arc/kernel/process.c +++ b/arch/arc/kernel/process.c @@ -183,13 +183,6 @@ void flush_thread(void) { } -/* - * Free any architecture-specific thread data structures, etc. - */ -void exit_thread(void) -{ -} - int dump_fpu(struct pt_regs *regs, elf_fpregset_t *fpu) { return 0; diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index b99d25b4133e..956d3575426c 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -50,6 +50,7 @@ config ARM select HAVE_DMA_CONTIGUOUS if MMU select HAVE_DYNAMIC_FTRACE if (!XIP_KERNEL) && !CPU_ENDIAN_BE32 && MMU select HAVE_EFFICIENT_UNALIGNED_ACCESS if (CPU_V6 || CPU_V6K || CPU_V7) && MMU + select HAVE_EXIT_THREAD select HAVE_FTRACE_MCOUNT_RECORD if (!XIP_KERNEL) select HAVE_FUNCTION_GRAPH_TRACER if (!THUMB2_KERNEL) select HAVE_FUNCTION_TRACER if (!XIP_KERNEL) diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 48eea6866c67..6cd2612236dc 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -200,13 +200,6 @@ void show_regs(struct pt_regs * regs) __show_regs(regs); } -/* - * Free current thread data structures etc.. - */ -void exit_thread(void) -{ -} - static void tls_thread_flush(void) { asm ("msr tpidr_el0, xzr"); diff --git a/arch/avr32/Kconfig b/arch/avr32/Kconfig index 18b88779e701..e43519a2ca89 100644 --- a/arch/avr32/Kconfig +++ b/arch/avr32/Kconfig @@ -4,6 +4,7 @@ config AVR32 # that we usually don't need on AVR32. select EXPERT select HAVE_CLK + select HAVE_EXIT_THREAD select HAVE_OPROFILE select HAVE_KPROBES select VIRT_TO_BUS diff --git a/arch/blackfin/include/asm/processor.h b/arch/blackfin/include/asm/processor.h index 7acd46653df3..0c265aba94ad 100644 --- a/arch/blackfin/include/asm/processor.h +++ b/arch/blackfin/include/asm/processor.h @@ -75,13 +75,6 @@ static inline void release_thread(struct task_struct *dead_task) { } -/* - * Free current thread data structures etc.. - */ -static inline void exit_thread(void) -{ -} - /* * Return saved PC of a blocked thread. */ diff --git a/arch/c6x/kernel/process.c b/arch/c6x/kernel/process.c index 3ae9f5a166a0..0ee7686a78f3 100644 --- a/arch/c6x/kernel/process.c +++ b/arch/c6x/kernel/process.c @@ -82,10 +82,6 @@ void flush_thread(void) { } -void exit_thread(void) -{ -} - /* * Do necessary setup to start up a newly executed thread. */ diff --git a/arch/cris/Kconfig b/arch/cris/Kconfig index 99bda1ba3d2f..5c0ca8ae9293 100644 --- a/arch/cris/Kconfig +++ b/arch/cris/Kconfig @@ -59,6 +59,7 @@ config CRIS select GENERIC_IOMAP select MODULES_USE_ELF_RELA select CLONE_BACKWARDS2 + select HAVE_EXIT_THREAD if ETRAX_ARCH_V32 select OLD_SIGSUSPEND select OLD_SIGACTION select GPIOLIB diff --git a/arch/cris/arch-v10/kernel/process.c b/arch/cris/arch-v10/kernel/process.c index 02b783457be0..96e5afef6b47 100644 --- a/arch/cris/arch-v10/kernel/process.c +++ b/arch/cris/arch-v10/kernel/process.c @@ -35,15 +35,6 @@ void default_idle(void) local_irq_enable(); } -/* - * Free current thread data structures etc.. - */ - -void exit_thread(void) -{ - /* Nothing needs to be done. */ -} - /* if the watchdog is enabled, we can simply disable interrupts and go * into an eternal loop, and the watchdog will reset the CPU after 0.1s * if on the other hand the watchdog wasn't enabled, we just enable it and wait diff --git a/arch/frv/include/asm/processor.h b/arch/frv/include/asm/processor.h index ae8d423e79d9..73f0a79ad8e6 100644 --- a/arch/frv/include/asm/processor.h +++ b/arch/frv/include/asm/processor.h @@ -96,13 +96,6 @@ extern asmlinkage void *restore_user_regs(const struct user_context *target, ... #define release_segments(mm) do { } while (0) #define forget_segments() do { } while (0) -/* - * Free current thread data structures etc.. - */ -static inline void exit_thread(void) -{ -} - /* * Return saved PC of a blocked thread. */ diff --git a/arch/h8300/include/asm/processor.h b/arch/h8300/include/asm/processor.h index 54e3fd83c336..111df7397ac7 100644 --- a/arch/h8300/include/asm/processor.h +++ b/arch/h8300/include/asm/processor.h @@ -110,13 +110,6 @@ static inline void release_thread(struct task_struct *dead_task) { } -/* - * Free current thread data structures etc.. - */ -static inline void exit_thread(void) -{ -} - /* * Return saved PC of a blocked thread. */ diff --git a/arch/hexagon/kernel/process.c b/arch/hexagon/kernel/process.c index a9ebd471823a..d9edfd3fc52a 100644 --- a/arch/hexagon/kernel/process.c +++ b/arch/hexagon/kernel/process.c @@ -136,13 +136,6 @@ void release_thread(struct task_struct *dead_task) { } -/* - * Free any architecture-specific thread data structures, etc. - */ -void exit_thread(void) -{ -} - /* * Some archs flush debug and FPU info here */ diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index b534ebab36ea..f80758cb7157 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -18,6 +18,7 @@ config IA64 select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI select ARCH_MIGHT_HAVE_ACPI_PDC if ACPI select HAVE_UNSTABLE_SCHED_CLOCK + select HAVE_EXIT_THREAD select HAVE_IDE select HAVE_OPROFILE select HAVE_KPROBES diff --git a/arch/m32r/kernel/process.c b/arch/m32r/kernel/process.c index e69221d581d5..a88b1f01e91f 100644 --- a/arch/m32r/kernel/process.c +++ b/arch/m32r/kernel/process.c @@ -101,15 +101,6 @@ void show_regs(struct pt_regs * regs) #endif } -/* - * Free current thread data structures etc.. - */ -void exit_thread(void) -{ - /* Nothing to do. */ - DPRINTK("pid = %d\n", current->pid); -} - void flush_thread(void) { DPRINTK("pid = %d\n", current->pid); diff --git a/arch/m68k/include/asm/processor.h b/arch/m68k/include/asm/processor.h index 20dda1d4b860..a6ce2ec8d693 100644 --- a/arch/m68k/include/asm/processor.h +++ b/arch/m68k/include/asm/processor.h @@ -153,13 +153,6 @@ static inline void release_thread(struct task_struct *dead_task) { } -/* - * Free current thread data structures etc.. - */ -static inline void exit_thread(void) -{ -} - extern unsigned long thread_saved_pc(struct task_struct *tsk); unsigned long get_wchan(struct task_struct *p); diff --git a/arch/metag/Kconfig b/arch/metag/Kconfig index a0fa88da3e31..e47a08d72819 100644 --- a/arch/metag/Kconfig +++ b/arch/metag/Kconfig @@ -11,6 +11,7 @@ config METAG select HAVE_DEBUG_KMEMLEAK select HAVE_DEBUG_STACKOVERFLOW select HAVE_DYNAMIC_FTRACE + select HAVE_EXIT_THREAD select HAVE_FTRACE_MCOUNT_RECORD select HAVE_FUNCTION_TRACER select HAVE_KERNEL_BZIP2 diff --git a/arch/metag/include/asm/processor.h b/arch/metag/include/asm/processor.h index 0838ca699764..a0333ebcac35 100644 --- a/arch/metag/include/asm/processor.h +++ b/arch/metag/include/asm/processor.h @@ -134,8 +134,6 @@ static inline void release_thread(struct task_struct *dead_task) #define copy_segments(tsk, mm) do { } while (0) #define release_segments(mm) do { } while (0) -extern void exit_thread(void); - /* * Return saved PC of a blocked thread. */ diff --git a/arch/microblaze/include/asm/processor.h b/arch/microblaze/include/asm/processor.h index 497a988d79c2..c38d0dd91134 100644 --- a/arch/microblaze/include/asm/processor.h +++ b/arch/microblaze/include/asm/processor.h @@ -70,11 +70,6 @@ static inline void release_thread(struct task_struct *dead_task) { } -/* Free all resources held by a thread. */ -static inline void exit_thread(void) -{ -} - extern unsigned long thread_saved_pc(struct task_struct *t); extern unsigned long get_wchan(struct task_struct *p); @@ -127,11 +122,6 @@ static inline void release_thread(struct task_struct *dead_task) { } -/* Free current thread data structures etc. */ -static inline void exit_thread(void) -{ -} - /* Return saved (kernel) PC of a blocked thread. */ # define thread_saved_pc(tsk) \ ((tsk)->thread.regs ? (tsk)->thread.regs->r15 : 0) diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index a6b3dc54260a..411c971e3417 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -73,10 +73,6 @@ void start_thread(struct pt_regs * regs, unsigned long pc, unsigned long sp) regs->regs[29] = sp; } -void exit_thread(void) -{ -} - int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) { /* diff --git a/arch/mn10300/Kconfig b/arch/mn10300/Kconfig index 06ddb5501ab1..9627e81a6cbb 100644 --- a/arch/mn10300/Kconfig +++ b/arch/mn10300/Kconfig @@ -1,5 +1,6 @@ config MN10300 def_bool y + select HAVE_EXIT_THREAD select HAVE_OPROFILE select HAVE_UID16 select GENERIC_IRQ_SHOW diff --git a/arch/nios2/include/asm/processor.h b/arch/nios2/include/asm/processor.h index c2ba45c159c7..1c953f0cadbf 100644 --- a/arch/nios2/include/asm/processor.h +++ b/arch/nios2/include/asm/processor.h @@ -75,11 +75,6 @@ static inline void release_thread(struct task_struct *dead_task) { } -/* Free current thread data structures etc.. */ -static inline void exit_thread(void) -{ -} - /* Return saved PC of a blocked thread. */ #define thread_saved_pc(tsk) ((tsk)->thread.kregs->ea) diff --git a/arch/openrisc/include/asm/processor.h b/arch/openrisc/include/asm/processor.h index 4d235e3d2534..70334c9f7d24 100644 --- a/arch/openrisc/include/asm/processor.h +++ b/arch/openrisc/include/asm/processor.h @@ -84,15 +84,6 @@ void start_thread(struct pt_regs *regs, unsigned long nip, unsigned long sp); void release_thread(struct task_struct *); unsigned long get_wchan(struct task_struct *p); -/* - * Free current thread data structures etc.. - */ - -extern inline void exit_thread(void) -{ - /* Nothing needs to be done. */ -} - /* * Return saved PC of a blocked thread. For now, this is the "user" PC */ diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index 809905a811ed..40639439d8b3 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c @@ -144,13 +144,6 @@ void machine_power_off(void) void (*pm_power_off)(void) = machine_power_off; EXPORT_SYMBOL(pm_power_off); -/* - * Free current thread data structures etc.. - */ -void exit_thread(void) -{ -} - void flush_thread(void) { /* Only needs to handle fpu stuff or perf monitors. diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index ea8a28fd6f31..e2f12cbcade9 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1329,10 +1329,6 @@ void show_regs(struct pt_regs * regs) show_instructions(regs); } -void exit_thread(void) -{ -} - void flush_thread(void) { #ifdef CONFIG_HAVE_HW_BREAKPOINT diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index de0fcc08dff5..e2c9aaaf64b2 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -134,6 +134,7 @@ config S390 select HAVE_DMA_API_DEBUG select HAVE_DYNAMIC_FTRACE select HAVE_DYNAMIC_FTRACE_WITH_REGS + select HAVE_EXIT_THREAD select HAVE_FTRACE_MCOUNT_RECORD select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_TRACER diff --git a/arch/score/kernel/process.c b/arch/score/kernel/process.c index a1519ad3d49d..aae9480706c2 100644 --- a/arch/score/kernel/process.c +++ b/arch/score/kernel/process.c @@ -56,8 +56,6 @@ void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long sp) regs->regs[0] = sp; } -void exit_thread(void) {} - /* * When a process does an "exec", machine state like FPU and debug * registers need to be reset. This is a hook function for that. diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 7ed20fc3fc81..cb93af8f8017 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -71,6 +71,7 @@ config SUPERH32 config SUPERH64 def_bool ARCH = "sh64" + select HAVE_EXIT_THREAD select KALLSYMS config ARCH_DEFCONFIG diff --git a/arch/sh/kernel/process_32.c b/arch/sh/kernel/process_32.c index 2885fc9d9dcd..ee12e9451874 100644 --- a/arch/sh/kernel/process_32.c +++ b/arch/sh/kernel/process_32.c @@ -76,13 +76,6 @@ void start_thread(struct pt_regs *regs, unsigned long new_pc, } EXPORT_SYMBOL(start_thread); -/* - * Free current thread data structures etc.. - */ -void exit_thread(void) -{ -} - void flush_thread(void) { struct task_struct *tsk = current; diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index db0a26cffa97..27b3a0ad40a0 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -20,6 +20,7 @@ config SPARC select HAVE_OPROFILE select HAVE_ARCH_KGDB if !SMP || SPARC64 select HAVE_ARCH_TRACEHOOK + select HAVE_EXIT_THREAD select SYSCTL_EXCEPTION_TRACE select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE select RTC_CLASS diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig index 81719302b056..174746225577 100644 --- a/arch/tile/Kconfig +++ b/arch/tile/Kconfig @@ -3,6 +3,7 @@ config TILE def_bool y + select HAVE_EXIT_THREAD select HAVE_PERF_EVENTS select USE_PMC if PERF_EVENTS select HAVE_DMA_API_DEBUG diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index 48af59aae129..0b04711f1f18 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -103,10 +103,6 @@ void interrupt_end(void) tracehook_notify_resume(regs); } -void exit_thread(void) -{ -} - int get_current_pid(void) { return task_pid_nr(current); diff --git a/arch/unicore32/kernel/process.c b/arch/unicore32/kernel/process.c index b008e9961465..00299c927852 100644 --- a/arch/unicore32/kernel/process.c +++ b/arch/unicore32/kernel/process.c @@ -201,13 +201,6 @@ void show_regs(struct pt_regs *regs) __backtrace(); } -/* - * Free current thread data structures etc.. - */ -void exit_thread(void) -{ -} - void flush_thread(void) { struct thread_info *thread = current_thread_info(); diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index ace79d2da2c3..8ff5b3be95d4 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -105,6 +105,7 @@ config X86 select HAVE_DYNAMIC_FTRACE select HAVE_DYNAMIC_FTRACE_WITH_REGS select HAVE_EFFICIENT_UNALIGNED_ACCESS + select HAVE_EXIT_THREAD select HAVE_FENTRY if X86_64 select HAVE_FTRACE_MCOUNT_RECORD select HAVE_FUNCTION_GRAPH_FP_TEST diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig index 85257afe71c3..64336f666fb6 100644 --- a/arch/xtensa/Kconfig +++ b/arch/xtensa/Kconfig @@ -14,6 +14,7 @@ config XTENSA select GENERIC_PCI_IOMAP select GENERIC_SCHED_CLOCK select HAVE_DMA_API_DEBUG + select HAVE_EXIT_THREAD select HAVE_FUNCTION_TRACER select HAVE_FUTEX_CMPXCHG if !MMU select HAVE_HW_BREAKPOINT if PERF_EVENTS diff --git a/include/linux/sched.h b/include/linux/sched.h index 6b3213d96da6..167c0d4bf3fa 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2769,7 +2769,14 @@ static inline int copy_thread_tls( } #endif extern void flush_thread(void); + +#ifdef CONFIG_HAVE_EXIT_THREAD extern void exit_thread(void); +#else +static inline void exit_thread(void) +{ +} +#endif extern void exit_files(struct task_struct *); extern void __cleanup_sighand(struct sighand_struct *); -- cgit v1.2.3 From e64646946ed32902fd597fa6e514b1da84642de3 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Fri, 20 May 2016 17:00:20 -0700 Subject: exit_thread: accept a task parameter to be exited We need to call exit_thread from copy_process in a fail path. So make it accept task_struct as a parameter. [v2] * s390: exit_thread_runtime_instr doesn't make sense to be called for non-current tasks. * arm: fix the comment in vfp_thread_copy * change 'me' to 'tsk' for task_struct * now we can change only archs that actually have exit_thread [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Jiri Slaby Cc: "David S. Miller" Cc: "H. Peter Anvin" Cc: "James E.J. Bottomley" Cc: Aurelien Jacquiot Cc: Benjamin Herrenschmidt Cc: Catalin Marinas Cc: Chen Liqin Cc: Chris Metcalf Cc: Chris Zankel Cc: David Howells Cc: Fenghua Yu Cc: Geert Uytterhoeven Cc: Guan Xuetao Cc: Haavard Skinnemoen Cc: Hans-Christian Egtvedt Cc: Heiko Carstens Cc: Helge Deller Cc: Ingo Molnar Cc: Ivan Kokshaysky Cc: James Hogan Cc: Jeff Dike Cc: Jesper Nilsson Cc: Jiri Slaby Cc: Jonas Bonn Cc: Koichi Yasutake Cc: Lennox Wu Cc: Ley Foon Tan Cc: Mark Salter Cc: Martin Schwidefsky Cc: Matt Turner Cc: Max Filippov Cc: Michael Ellerman Cc: Michal Simek Cc: Mikael Starvik Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Ralf Baechle Cc: Rich Felker Cc: Richard Henderson Cc: Richard Kuo Cc: Richard Weinberger Cc: Russell King Cc: Steven Miao Cc: Thomas Gleixner Cc: Tony Luck Cc: Vineet Gupta Cc: Will Deacon Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/kernel/process.c | 4 ++-- arch/arm/vfp/vfpmodule.c | 4 ---- arch/avr32/kernel/process.c | 4 ++-- arch/cris/arch-v32/kernel/process.c | 4 ++-- arch/ia64/kernel/perfmon.c | 4 ++-- arch/ia64/kernel/process.c | 14 +++++++------- arch/metag/kernel/process.c | 6 +++--- arch/mn10300/kernel/process.c | 4 ++-- arch/s390/kernel/process.c | 5 +++-- arch/sh/kernel/process_64.c | 5 ++--- arch/sparc/kernel/process_32.c | 12 ++++++------ arch/sparc/kernel/process_64.c | 4 ++-- arch/tile/kernel/process.c | 4 ++-- arch/x86/kernel/process.c | 5 ++--- arch/xtensa/kernel/process.c | 4 ++-- include/linux/sched.h | 4 ++-- kernel/exit.c | 2 +- 17 files changed, 42 insertions(+), 47 deletions(-) (limited to 'arch') diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index 4adfb46e3ee9..a647d6642f3e 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -193,9 +193,9 @@ EXPORT_SYMBOL_GPL(thread_notify_head); /* * Free current thread data structures etc.. */ -void exit_thread(void) +void exit_thread(struct task_struct *tsk) { - thread_notify(THREAD_NOTIFY_EXIT, current_thread_info()); + thread_notify(THREAD_NOTIFY_EXIT, task_thread_info(tsk)); } void flush_thread(void) diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c index 2a61e4b04600..73085d3482ed 100644 --- a/arch/arm/vfp/vfpmodule.c +++ b/arch/arm/vfp/vfpmodule.c @@ -156,10 +156,6 @@ static void vfp_thread_copy(struct thread_info *thread) * - we could be preempted if tree preempt rcu is enabled, so * it is unsafe to use thread->cpu. * THREAD_NOTIFY_EXIT - * - the thread (v) will be running on the local CPU, so - * v === current_thread_info() - * - thread->cpu is the local CPU number at the time it is accessed, - * but may change at any time. * - we could be preempted if tree preempt rcu is enabled, so * it is unsafe to use thread->cpu. */ diff --git a/arch/avr32/kernel/process.c b/arch/avr32/kernel/process.c index 42a53e740a7e..68e5b9dac059 100644 --- a/arch/avr32/kernel/process.c +++ b/arch/avr32/kernel/process.c @@ -62,9 +62,9 @@ void machine_restart(char *cmd) /* * Free current thread data structures etc */ -void exit_thread(void) +void exit_thread(struct task_struct *tsk) { - ocd_disable(current); + ocd_disable(tsk); } void flush_thread(void) diff --git a/arch/cris/arch-v32/kernel/process.c b/arch/cris/arch-v32/kernel/process.c index c7ce784a393c..4d1afa9f9fd3 100644 --- a/arch/cris/arch-v32/kernel/process.c +++ b/arch/cris/arch-v32/kernel/process.c @@ -33,9 +33,9 @@ void default_idle(void) */ extern void deconfigure_bp(long pid); -void exit_thread(void) +void exit_thread(struct task_struct *tsk) { - deconfigure_bp(current->pid); + deconfigure_bp(tsk->pid); } /* diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index 9cd607b06964..2436ad5f92c1 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -4542,8 +4542,8 @@ pfm_context_unload(pfm_context_t *ctx, void *arg, int count, struct pt_regs *reg /* - * called only from exit_thread(): task == current - * we come here only if current has a context attached (loaded or masked) + * called only from exit_thread() + * we come here only if the task has a context attached (loaded or masked) */ void pfm_exit_thread(struct task_struct *task) diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index b51514957620..aae6c4dc7ae7 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -570,22 +570,22 @@ flush_thread (void) } /* - * Clean up state associated with current thread. This is called when + * Clean up state associated with a thread. This is called when * the thread calls exit(). */ void -exit_thread (void) +exit_thread (struct task_struct *tsk) { - ia64_drop_fpu(current); + ia64_drop_fpu(tsk); #ifdef CONFIG_PERFMON /* if needed, stop monitoring and flush state to perfmon context */ - if (current->thread.pfm_context) - pfm_exit_thread(current); + if (tsk->thread.pfm_context) + pfm_exit_thread(tsk); /* free debug register resources */ - if (current->thread.flags & IA64_THREAD_DBG_VALID) - pfm_release_debug_registers(current); + if (tsk->thread.flags & IA64_THREAD_DBG_VALID) + pfm_release_debug_registers(tsk); #endif } diff --git a/arch/metag/kernel/process.c b/arch/metag/kernel/process.c index 7f546183a0f0..35062796edf2 100644 --- a/arch/metag/kernel/process.c +++ b/arch/metag/kernel/process.c @@ -345,10 +345,10 @@ void flush_thread(void) /* * Free current thread data structures etc. */ -void exit_thread(void) +void exit_thread(struct task_struct *tsk) { - clear_fpu(¤t->thread); - clear_dsp(¤t->thread); + clear_fpu(&tsk->thread); + clear_dsp(&tsk->thread); } /* TODO: figure out how to unwind the kernel stack here to figure out diff --git a/arch/mn10300/kernel/process.c b/arch/mn10300/kernel/process.c index 74a96ccf7451..cbede4e88dee 100644 --- a/arch/mn10300/kernel/process.c +++ b/arch/mn10300/kernel/process.c @@ -103,9 +103,9 @@ void show_regs(struct pt_regs *regs) /* * free current thread data structures etc.. */ -void exit_thread(void) +void exit_thread(struct task_struct *tsk) { - exit_fpu(current); + exit_fpu(tsk); } void flush_thread(void) diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 481d7a83efc6..bba4fa74b321 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -68,9 +68,10 @@ extern void kernel_thread_starter(void); /* * Free current thread data structures etc.. */ -void exit_thread(void) +void exit_thread(struct task_struct *tsk) { - exit_thread_runtime_instr(); + if (tsk == current) + exit_thread_runtime_instr(); } void flush_thread(void) diff --git a/arch/sh/kernel/process_64.c b/arch/sh/kernel/process_64.c index e2062e643341..9d3e9916555d 100644 --- a/arch/sh/kernel/process_64.c +++ b/arch/sh/kernel/process_64.c @@ -288,7 +288,7 @@ void show_regs(struct pt_regs *regs) /* * Free current thread data structures etc.. */ -void exit_thread(void) +void exit_thread(struct task_struct *tsk) { /* * See arch/sparc/kernel/process.c for the precedent for doing @@ -307,9 +307,8 @@ void exit_thread(void) * which it would get safely nulled. */ #ifdef CONFIG_SH_FPU - if (last_task_used_math == current) { + if (last_task_used_math == tsk) last_task_used_math = NULL; - } #endif } diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c index c5113c7ce2fd..b7780a5bef11 100644 --- a/arch/sparc/kernel/process_32.c +++ b/arch/sparc/kernel/process_32.c @@ -184,21 +184,21 @@ unsigned long thread_saved_pc(struct task_struct *tsk) /* * Free current thread data structures etc.. */ -void exit_thread(void) +void exit_thread(struct task_struct *tsk) { #ifndef CONFIG_SMP - if(last_task_used_math == current) { + if (last_task_used_math == tsk) { #else - if (test_thread_flag(TIF_USEDFPU)) { + if (test_ti_thread_flag(task_thread_info(tsk), TIF_USEDFPU)) { #endif /* Keep process from leaving FPU in a bogon state. */ put_psr(get_psr() | PSR_EF); - fpsave(¤t->thread.float_regs[0], ¤t->thread.fsr, - ¤t->thread.fpqueue[0], ¤t->thread.fpqdepth); + fpsave(&tsk->thread.float_regs[0], &tsk->thread.fsr, + &tsk->thread.fpqueue[0], &tsk->thread.fpqdepth); #ifndef CONFIG_SMP last_task_used_math = NULL; #else - clear_thread_flag(TIF_USEDFPU); + clear_ti_thread_flag(task_thread_info(tsk), TIF_USEDFPU); #endif } } diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c index c16ef1af1843..fa14402b33f9 100644 --- a/arch/sparc/kernel/process_64.c +++ b/arch/sparc/kernel/process_64.c @@ -417,9 +417,9 @@ unsigned long thread_saved_pc(struct task_struct *tsk) } /* Free current thread data structures etc.. */ -void exit_thread(void) +void exit_thread(struct task_struct *tsk) { - struct thread_info *t = current_thread_info(); + struct thread_info *t = task_thread_info(tsk); if (t->utraps) { if (t->utraps[0] < 2) diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c index b5f30d376ce1..6b705ccc9cc1 100644 --- a/arch/tile/kernel/process.c +++ b/arch/tile/kernel/process.c @@ -541,7 +541,7 @@ void flush_thread(void) /* * Free current thread data structures etc.. */ -void exit_thread(void) +void exit_thread(struct task_struct *tsk) { #ifdef CONFIG_HARDWALL /* @@ -550,7 +550,7 @@ void exit_thread(void) * the last reference to a hardwall fd, it would already have * been released and deactivated at this point.) */ - hardwall_deactivate_all(current); + hardwall_deactivate_all(tsk); #endif } diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 2915d54e9dd5..96becbbb52e0 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -97,10 +97,9 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) /* * Free current thread data structures etc.. */ -void exit_thread(void) +void exit_thread(struct task_struct *tsk) { - struct task_struct *me = current; - struct thread_struct *t = &me->thread; + struct thread_struct *t = &tsk->thread; unsigned long *bp = t->io_bitmap_ptr; struct fpu *fpu = &t->fpu; diff --git a/arch/xtensa/kernel/process.c b/arch/xtensa/kernel/process.c index 5bbfed81c97b..e0ded48561db 100644 --- a/arch/xtensa/kernel/process.c +++ b/arch/xtensa/kernel/process.c @@ -115,10 +115,10 @@ void arch_cpu_idle(void) /* * This is called when the thread calls exit(). */ -void exit_thread(void) +void exit_thread(struct task_struct *tsk) { #if XTENSA_HAVE_COPROCESSORS - coprocessor_release_all(current_thread_info()); + coprocessor_release_all(task_thread_info(tsk)); #endif } diff --git a/include/linux/sched.h b/include/linux/sched.h index 167c0d4bf3fa..02bdab4d6db7 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2771,9 +2771,9 @@ static inline int copy_thread_tls( extern void flush_thread(void); #ifdef CONFIG_HAVE_EXIT_THREAD -extern void exit_thread(void); +extern void exit_thread(struct task_struct *tsk); #else -static inline void exit_thread(void) +static inline void exit_thread(struct task_struct *tsk) { } #endif diff --git a/kernel/exit.c b/kernel/exit.c index fd90195667e1..75b34fe835b2 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -746,7 +746,7 @@ void do_exit(long code) disassociate_ctty(1); exit_task_namespaces(tsk); exit_task_work(tsk); - exit_thread(); + exit_thread(tsk); /* * Flush inherited counters to the parent - before the parent -- cgit v1.2.3 From 42a0bb3f71383b457a7db362f1c69e7afb96732b Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Fri, 20 May 2016 17:00:33 -0700 Subject: printk/nmi: generic solution for safe printk in NMI printk() takes some locks and could not be used a safe way in NMI context. The chance of a deadlock is real especially when printing stacks from all CPUs. This particular problem has been addressed on x86 by the commit a9edc8809328 ("x86/nmi: Perform a safe NMI stack trace on all CPUs"). The patchset brings two big advantages. First, it makes the NMI backtraces safe on all architectures for free. Second, it makes all NMI messages almost safe on all architectures (the temporary buffer is limited. We still should keep the number of messages in NMI context at minimum). Note that there already are several messages printed in NMI context: WARN_ON(in_nmi()), BUG_ON(in_nmi()), anything being printed out from MCE handlers. These are not easy to avoid. This patch reuses most of the code and makes it generic. It is useful for all messages and architectures that support NMI. The alternative printk_func is set when entering and is reseted when leaving NMI context. It queues IRQ work to copy the messages into the main ring buffer in a safe context. __printk_nmi_flush() copies all available messages and reset the buffer. Then we could use a simple cmpxchg operations to get synchronized with writers. There is also used a spinlock to get synchronized with other flushers. We do not longer use seq_buf because it depends on external lock. It would be hard to make all supported operations safe for a lockless use. It would be confusing and error prone to make only some operations safe. The code is put into separate printk/nmi.c as suggested by Steven Rostedt. It needs a per-CPU buffer and is compiled only on architectures that call nmi_enter(). This is achieved by the new HAVE_NMI Kconfig flag. The are MN10300 and Xtensa architectures. We need to clean up NMI handling there first. Let's do it separately. The patch is heavily based on the draft from Peter Zijlstra, see https://lkml.org/lkml/2015/6/10/327 [arnd@arndb.de: printk-nmi: use %zu format string for size_t] [akpm@linux-foundation.org: min_t->min - all types are size_t here] Signed-off-by: Petr Mladek Suggested-by: Peter Zijlstra Suggested-by: Steven Rostedt Cc: Jan Kara Acked-by: Russell King [arm part] Cc: Daniel Thompson Cc: Jiri Kosina Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Ralf Baechle Cc: Benjamin Herrenschmidt Cc: Martin Schwidefsky Cc: David Miller Cc: Daniel Thompson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/Kconfig | 4 + arch/arm/Kconfig | 1 + arch/arm/kernel/smp.c | 2 + arch/avr32/Kconfig | 1 + arch/blackfin/Kconfig | 1 + arch/cris/Kconfig | 1 + arch/mips/Kconfig | 1 + arch/powerpc/Kconfig | 1 + arch/s390/Kconfig | 1 + arch/sh/Kconfig | 1 + arch/sparc/Kconfig | 1 + arch/tile/Kconfig | 1 + arch/x86/Kconfig | 1 + arch/x86/kernel/apic/hw_nmi.c | 1 - include/linux/hardirq.h | 2 + include/linux/percpu.h | 3 - include/linux/printk.h | 12 ++- init/Kconfig | 5 + init/main.c | 1 + kernel/printk/Makefile | 1 + kernel/printk/internal.h | 44 +++++++++ kernel/printk/nmi.c | 219 ++++++++++++++++++++++++++++++++++++++++++ kernel/printk/printk.c | 19 +--- lib/nmi_backtrace.c | 89 +---------------- 24 files changed, 306 insertions(+), 107 deletions(-) create mode 100644 kernel/printk/internal.h create mode 100644 kernel/printk/nmi.c (limited to 'arch') diff --git a/arch/Kconfig b/arch/Kconfig index 0f298f9123dc..8f84fd268dee 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -187,7 +187,11 @@ config HAVE_OPTPROBES config HAVE_KPROBES_ON_FTRACE bool +config HAVE_NMI + bool + config HAVE_NMI_WATCHDOG + depends on HAVE_NMI bool # # An arch should select this if it provides all these things: diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 956d3575426c..90542db1220d 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -67,6 +67,7 @@ config ARM select HAVE_KRETPROBES if (HAVE_KPROBES) select HAVE_MEMBLOCK select HAVE_MOD_ARCH_SPECIFIC + select HAVE_NMI select HAVE_OPROFILE if (HAVE_PERF_EVENTS) select HAVE_OPTPROBES if !THUMB2_KERNEL select HAVE_PERF_EVENTS diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index baee70267f29..df90bc59bfce 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -644,9 +644,11 @@ void handle_IPI(int ipinr, struct pt_regs *regs) break; case IPI_CPU_BACKTRACE: + printk_nmi_enter(); irq_enter(); nmi_cpu_backtrace(regs); irq_exit(); + printk_nmi_exit(); break; default: diff --git a/arch/avr32/Kconfig b/arch/avr32/Kconfig index e43519a2ca89..7e75d45e20cd 100644 --- a/arch/avr32/Kconfig +++ b/arch/avr32/Kconfig @@ -18,6 +18,7 @@ config AVR32 select GENERIC_CLOCKEVENTS select HAVE_MOD_ARCH_SPECIFIC select MODULES_USE_ELF_RELA + select HAVE_NMI help AVR32 is a high-performance 32-bit RISC microprocessor core, designed for cost-sensitive embedded applications, with particular diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig index a63c12259e77..28c63fea786d 100644 --- a/arch/blackfin/Kconfig +++ b/arch/blackfin/Kconfig @@ -40,6 +40,7 @@ config BLACKFIN select HAVE_MOD_ARCH_SPECIFIC select MODULES_USE_ELF_RELA select HAVE_DEBUG_STACKOVERFLOW + select HAVE_NMI config GENERIC_CSUM def_bool y diff --git a/arch/cris/Kconfig b/arch/cris/Kconfig index 5c0ca8ae9293..deba2662b9f3 100644 --- a/arch/cris/Kconfig +++ b/arch/cris/Kconfig @@ -70,6 +70,7 @@ config CRIS select GENERIC_CLOCKEVENTS if ETRAX_ARCH_V32 select GENERIC_SCHED_CLOCK if ETRAX_ARCH_V32 select HAVE_DEBUG_BUGVERBOSE if ETRAX_ARCH_V32 + select HAVE_NMI config HZ int diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 5663f411c225..8040fb1845b4 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -48,6 +48,7 @@ config MIPS select GENERIC_SCHED_CLOCK if !CAVIUM_OCTEON_SOC select GENERIC_CMOS_UPDATE select HAVE_MOD_ARCH_SPECIFIC + select HAVE_NMI select VIRT_TO_BUS select MODULES_USE_ELF_REL if MODULES select MODULES_USE_ELF_RELA if MODULES && 64BIT diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index f0403b58ae8b..01f7464d9fea 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -155,6 +155,7 @@ config PPC select NO_BOOTMEM select HAVE_GENERIC_RCU_GUP select HAVE_PERF_EVENTS_NMI if PPC64 + select HAVE_NMI if PERF_EVENTS select EDAC_SUPPORT select EDAC_ATOMIC_SCRUB select ARCH_HAS_DMA_SET_COHERENT_MASK diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index e2c9aaaf64b2..1c3c43d9d1b5 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -166,6 +166,7 @@ config S390 select TTY select VIRT_CPU_ACCOUNTING select VIRT_TO_BUS + select HAVE_NMI config SCHED_OMIT_FRAME_POINTER diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index cb93af8f8017..f6254341c065 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -44,6 +44,7 @@ config SUPERH select OLD_SIGSUSPEND select OLD_SIGACTION select HAVE_ARCH_AUDITSYSCALL + select HAVE_NMI help The SuperH is a RISC processor targeted for use in embedded systems and consumer electronics; it was also used in the Sega Dreamcast diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 27b3a0ad40a0..1012f7ffcdf5 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -79,6 +79,7 @@ config SPARC64 select NO_BOOTMEM select HAVE_ARCH_AUDITSYSCALL select ARCH_SUPPORTS_ATOMIC_RMW + select HAVE_NMI config ARCH_DEFCONFIG string diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig index 174746225577..76989b878f3c 100644 --- a/arch/tile/Kconfig +++ b/arch/tile/Kconfig @@ -30,6 +30,7 @@ config TILE select HAVE_DEBUG_STACKOVERFLOW select ARCH_WANT_FRAME_POINTERS select HAVE_CONTEXT_TRACKING + select HAVE_NMI if USE_PMC select EDAC_SUPPORT select GENERIC_STRNCPY_FROM_USER select GENERIC_STRNLEN_USER diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 8ff5b3be95d4..0a7b885964ba 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -131,6 +131,7 @@ config X86 select HAVE_MEMBLOCK select HAVE_MEMBLOCK_NODE_MAP select HAVE_MIXED_BREAKPOINTS_REGS + select HAVE_NMI select HAVE_OPROFILE select HAVE_OPTPROBES select HAVE_PCSPKR_PLATFORM diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c index 045e424fb368..7788ce643bf4 100644 --- a/arch/x86/kernel/apic/hw_nmi.c +++ b/arch/x86/kernel/apic/hw_nmi.c @@ -18,7 +18,6 @@ #include #include #include -#include #ifdef CONFIG_HARDLOCKUP_DETECTOR u64 hw_nmi_get_sample_period(int watchdog_thresh) diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index dfd59d6bc6f0..c683996110b1 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -61,6 +61,7 @@ extern void irq_exit(void); #define nmi_enter() \ do { \ + printk_nmi_enter(); \ lockdep_off(); \ ftrace_nmi_enter(); \ BUG_ON(in_nmi()); \ @@ -77,6 +78,7 @@ extern void irq_exit(void); preempt_count_sub(NMI_OFFSET + HARDIRQ_OFFSET); \ ftrace_nmi_exit(); \ lockdep_on(); \ + printk_nmi_exit(); \ } while (0) #endif /* LINUX_HARDIRQ_H */ diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 4bc6dafb703e..56939d3f6e53 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -129,7 +129,4 @@ extern phys_addr_t per_cpu_ptr_to_phys(void *addr); (typeof(type) __percpu *)__alloc_percpu(sizeof(type), \ __alignof__(type)) -/* To avoid include hell, as printk can not declare this, we declare it here */ -DECLARE_PER_CPU(printk_func_t, printk_func); - #endif /* __LINUX_PERCPU_H */ diff --git a/include/linux/printk.h b/include/linux/printk.h index 9ccbdf2c1453..51dd6b824fe2 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -122,7 +122,17 @@ static inline __printf(1, 2) __cold void early_printk(const char *s, ...) { } #endif -typedef __printf(1, 0) int (*printk_func_t)(const char *fmt, va_list args); +#ifdef CONFIG_PRINTK_NMI +extern void printk_nmi_init(void); +extern void printk_nmi_enter(void); +extern void printk_nmi_exit(void); +extern void printk_nmi_flush(void); +#else +static inline void printk_nmi_init(void) { } +static inline void printk_nmi_enter(void) { } +static inline void printk_nmi_exit(void) { } +static inline void printk_nmi_flush(void) { } +#endif /* PRINTK_NMI */ #ifdef CONFIG_PRINTK asmlinkage __printf(5, 0) diff --git a/init/Kconfig b/init/Kconfig index 79a91a2c0444..bccc1d607be5 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1454,6 +1454,11 @@ config PRINTK very difficult to diagnose system problems, saying N here is strongly discouraged. +config PRINTK_NMI + def_bool y + depends on PRINTK + depends on HAVE_NMI + config BUG bool "BUG() support" if EXPERT default y diff --git a/init/main.c b/init/main.c index 2075fafaad59..fa9b2bdde183 100644 --- a/init/main.c +++ b/init/main.c @@ -569,6 +569,7 @@ asmlinkage __visible void __init start_kernel(void) timekeeping_init(); time_init(); sched_clock_postinit(); + printk_nmi_init(); perf_event_init(); profile_init(); call_function_init(); diff --git a/kernel/printk/Makefile b/kernel/printk/Makefile index 85405bdcf2b3..abb0042a427b 100644 --- a/kernel/printk/Makefile +++ b/kernel/printk/Makefile @@ -1,2 +1,3 @@ obj-y = printk.o +obj-$(CONFIG_PRINTK_NMI) += nmi.o obj-$(CONFIG_A11Y_BRAILLE_CONSOLE) += braille.o diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h new file mode 100644 index 000000000000..2de99faedfc1 --- /dev/null +++ b/kernel/printk/internal.h @@ -0,0 +1,44 @@ +/* + * internal.h - printk internal definitions + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ +#include + +typedef __printf(1, 0) int (*printk_func_t)(const char *fmt, va_list args); + +int __printf(1, 0) vprintk_default(const char *fmt, va_list args); + +#ifdef CONFIG_PRINTK_NMI + +/* + * printk() could not take logbuf_lock in NMI context. Instead, + * it temporary stores the strings into a per-CPU buffer. + * The alternative implementation is chosen transparently + * via per-CPU variable. + */ +DECLARE_PER_CPU(printk_func_t, printk_func); +static inline __printf(1, 0) int vprintk_func(const char *fmt, va_list args) +{ + return this_cpu_read(printk_func)(fmt, args); +} + +#else /* CONFIG_PRINTK_NMI */ + +static inline __printf(1, 0) int vprintk_func(const char *fmt, va_list args) +{ + return vprintk_default(fmt, args); +} + +#endif /* CONFIG_PRINTK_NMI */ diff --git a/kernel/printk/nmi.c b/kernel/printk/nmi.c new file mode 100644 index 000000000000..303cf0d15e57 --- /dev/null +++ b/kernel/printk/nmi.c @@ -0,0 +1,219 @@ +/* + * nmi.c - Safe printk in NMI context + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#include +#include +#include +#include +#include +#include + +#include "internal.h" + +/* + * printk() could not take logbuf_lock in NMI context. Instead, + * it uses an alternative implementation that temporary stores + * the strings into a per-CPU buffer. The content of the buffer + * is later flushed into the main ring buffer via IRQ work. + * + * The alternative implementation is chosen transparently + * via @printk_func per-CPU variable. + * + * The implementation allows to flush the strings also from another CPU. + * There are situations when we want to make sure that all buffers + * were handled or when IRQs are blocked. + */ +DEFINE_PER_CPU(printk_func_t, printk_func) = vprintk_default; +static int printk_nmi_irq_ready; + +#define NMI_LOG_BUF_LEN (4096 - sizeof(atomic_t) - sizeof(struct irq_work)) + +struct nmi_seq_buf { + atomic_t len; /* length of written data */ + struct irq_work work; /* IRQ work that flushes the buffer */ + unsigned char buffer[NMI_LOG_BUF_LEN]; +}; +static DEFINE_PER_CPU(struct nmi_seq_buf, nmi_print_seq); + +/* + * Safe printk() for NMI context. It uses a per-CPU buffer to + * store the message. NMIs are not nested, so there is always only + * one writer running. But the buffer might get flushed from another + * CPU, so we need to be careful. + */ +static int vprintk_nmi(const char *fmt, va_list args) +{ + struct nmi_seq_buf *s = this_cpu_ptr(&nmi_print_seq); + int add = 0; + size_t len; + +again: + len = atomic_read(&s->len); + + if (len >= sizeof(s->buffer)) + return 0; + + /* + * Make sure that all old data have been read before the buffer was + * reseted. This is not needed when we just append data. + */ + if (!len) + smp_rmb(); + + add = vsnprintf(s->buffer + len, sizeof(s->buffer) - len, fmt, args); + + /* + * Do it once again if the buffer has been flushed in the meantime. + * Note that atomic_cmpxchg() is an implicit memory barrier that + * makes sure that the data were written before updating s->len. + */ + if (atomic_cmpxchg(&s->len, len, len + add) != len) + goto again; + + /* Get flushed in a more safe context. */ + if (add && printk_nmi_irq_ready) { + /* Make sure that IRQ work is really initialized. */ + smp_rmb(); + irq_work_queue(&s->work); + } + + return add; +} + +/* + * printk one line from the temporary buffer from @start index until + * and including the @end index. + */ +static void print_nmi_seq_line(struct nmi_seq_buf *s, int start, int end) +{ + const char *buf = s->buffer + start; + + printk("%.*s", (end - start) + 1, buf); +} + +/* + * Flush data from the associated per_CPU buffer. The function + * can be called either via IRQ work or independently. + */ +static void __printk_nmi_flush(struct irq_work *work) +{ + static raw_spinlock_t read_lock = + __RAW_SPIN_LOCK_INITIALIZER(read_lock); + struct nmi_seq_buf *s = container_of(work, struct nmi_seq_buf, work); + unsigned long flags; + size_t len, size; + int i, last_i; + + /* + * The lock has two functions. First, one reader has to flush all + * available message to make the lockless synchronization with + * writers easier. Second, we do not want to mix messages from + * different CPUs. This is especially important when printing + * a backtrace. + */ + raw_spin_lock_irqsave(&read_lock, flags); + + i = 0; +more: + len = atomic_read(&s->len); + + /* + * This is just a paranoid check that nobody has manipulated + * the buffer an unexpected way. If we printed something then + * @len must only increase. + */ + if (i && i >= len) + pr_err("printk_nmi_flush: internal error: i=%d >= len=%zu\n", + i, len); + + if (!len) + goto out; /* Someone else has already flushed the buffer. */ + + /* Make sure that data has been written up to the @len */ + smp_rmb(); + + size = min(len, sizeof(s->buffer)); + last_i = i; + + /* Print line by line. */ + for (; i < size; i++) { + if (s->buffer[i] == '\n') { + print_nmi_seq_line(s, last_i, i); + last_i = i + 1; + } + } + /* Check if there was a partial line. */ + if (last_i < size) { + print_nmi_seq_line(s, last_i, size - 1); + pr_cont("\n"); + } + + /* + * Check that nothing has got added in the meantime and truncate + * the buffer. Note that atomic_cmpxchg() is an implicit memory + * barrier that makes sure that the data were copied before + * updating s->len. + */ + if (atomic_cmpxchg(&s->len, len, 0) != len) + goto more; + +out: + raw_spin_unlock_irqrestore(&read_lock, flags); +} + +/** + * printk_nmi_flush - flush all per-cpu nmi buffers. + * + * The buffers are flushed automatically via IRQ work. This function + * is useful only when someone wants to be sure that all buffers have + * been flushed at some point. + */ +void printk_nmi_flush(void) +{ + int cpu; + + for_each_possible_cpu(cpu) + __printk_nmi_flush(&per_cpu(nmi_print_seq, cpu).work); +} + +void __init printk_nmi_init(void) +{ + int cpu; + + for_each_possible_cpu(cpu) { + struct nmi_seq_buf *s = &per_cpu(nmi_print_seq, cpu); + + init_irq_work(&s->work, __printk_nmi_flush); + } + + /* Make sure that IRQ works are initialized before enabling. */ + smp_wmb(); + printk_nmi_irq_ready = 1; + + /* Flush pending messages that did not have scheduled IRQ works. */ + printk_nmi_flush(); +} + +void printk_nmi_enter(void) +{ + this_cpu_write(printk_func, vprintk_nmi); +} + +void printk_nmi_exit(void) +{ + this_cpu_write(printk_func, vprintk_default); +} diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index bfbf284e4218..71eba0607034 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -55,6 +55,7 @@ #include "console_cmdline.h" #include "braille.h" +#include "internal.h" int console_printk[4] = { CONSOLE_LOGLEVEL_DEFAULT, /* console_loglevel */ @@ -1807,14 +1808,6 @@ int vprintk_default(const char *fmt, va_list args) } EXPORT_SYMBOL_GPL(vprintk_default); -/* - * This allows printk to be diverted to another function per cpu. - * This is useful for calling printk functions from within NMI - * without worrying about race conditions that can lock up the - * box. - */ -DEFINE_PER_CPU(printk_func_t, printk_func) = vprintk_default; - /** * printk - print a kernel message * @fmt: format string @@ -1838,21 +1831,11 @@ DEFINE_PER_CPU(printk_func_t, printk_func) = vprintk_default; */ asmlinkage __visible int printk(const char *fmt, ...) { - printk_func_t vprintk_func; va_list args; int r; va_start(args, fmt); - - /* - * If a caller overrides the per_cpu printk_func, then it needs - * to disable preemption when calling printk(). Otherwise - * the printk_func should be set to the default. No need to - * disable preemption here. - */ - vprintk_func = this_cpu_read(printk_func); r = vprintk_func(fmt, args); - va_end(args); return r; diff --git a/lib/nmi_backtrace.c b/lib/nmi_backtrace.c index 6019c53c669e..26caf51cc238 100644 --- a/lib/nmi_backtrace.c +++ b/lib/nmi_backtrace.c @@ -16,33 +16,14 @@ #include #include #include -#include #ifdef arch_trigger_all_cpu_backtrace /* For reliability, we're prepared to waste bits here. */ static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly; -static cpumask_t printtrace_mask; - -#define NMI_BUF_SIZE 4096 - -struct nmi_seq_buf { - unsigned char buffer[NMI_BUF_SIZE]; - struct seq_buf seq; -}; - -/* Safe printing in NMI context */ -static DEFINE_PER_CPU(struct nmi_seq_buf, nmi_print_seq); /* "in progress" flag of arch_trigger_all_cpu_backtrace */ static unsigned long backtrace_flag; -static void print_seq_line(struct nmi_seq_buf *s, int start, int end) -{ - const char *buf = s->buffer + start; - - printk("%.*s", (end - start) + 1, buf); -} - /* * When raise() is called it will be is passed a pointer to the * backtrace_mask. Architectures that call nmi_cpu_backtrace() @@ -52,8 +33,7 @@ static void print_seq_line(struct nmi_seq_buf *s, int start, int end) void nmi_trigger_all_cpu_backtrace(bool include_self, void (*raise)(cpumask_t *mask)) { - struct nmi_seq_buf *s; - int i, cpu, this_cpu = get_cpu(); + int i, this_cpu = get_cpu(); if (test_and_set_bit(0, &backtrace_flag)) { /* @@ -68,17 +48,6 @@ void nmi_trigger_all_cpu_backtrace(bool include_self, if (!include_self) cpumask_clear_cpu(this_cpu, to_cpumask(backtrace_mask)); - cpumask_copy(&printtrace_mask, to_cpumask(backtrace_mask)); - - /* - * Set up per_cpu seq_buf buffers that the NMIs running on the other - * CPUs will write to. - */ - for_each_cpu(cpu, to_cpumask(backtrace_mask)) { - s = &per_cpu(nmi_print_seq, cpu); - seq_buf_init(&s->seq, s->buffer, NMI_BUF_SIZE); - } - if (!cpumask_empty(to_cpumask(backtrace_mask))) { pr_info("Sending NMI to %s CPUs:\n", (include_self ? "all" : "other")); @@ -94,73 +63,25 @@ void nmi_trigger_all_cpu_backtrace(bool include_self, } /* - * Now that all the NMIs have triggered, we can dump out their - * back traces safely to the console. + * Force flush any remote buffers that might be stuck in IRQ context + * and therefore could not run their irq_work. */ - for_each_cpu(cpu, &printtrace_mask) { - int len, last_i = 0; + printk_nmi_flush(); - s = &per_cpu(nmi_print_seq, cpu); - len = seq_buf_used(&s->seq); - if (!len) - continue; - - /* Print line by line. */ - for (i = 0; i < len; i++) { - if (s->buffer[i] == '\n') { - print_seq_line(s, last_i, i); - last_i = i + 1; - } - } - /* Check if there was a partial line. */ - if (last_i < len) { - print_seq_line(s, last_i, len - 1); - pr_cont("\n"); - } - } - - clear_bit(0, &backtrace_flag); - smp_mb__after_atomic(); + clear_bit_unlock(0, &backtrace_flag); put_cpu(); } -/* - * It is not safe to call printk() directly from NMI handlers. - * It may be fine if the NMI detected a lock up and we have no choice - * but to do so, but doing a NMI on all other CPUs to get a back trace - * can be done with a sysrq-l. We don't want that to lock up, which - * can happen if the NMI interrupts a printk in progress. - * - * Instead, we redirect the vprintk() to this nmi_vprintk() that writes - * the content into a per cpu seq_buf buffer. Then when the NMIs are - * all done, we can safely dump the contents of the seq_buf to a printk() - * from a non NMI context. - */ -static int nmi_vprintk(const char *fmt, va_list args) -{ - struct nmi_seq_buf *s = this_cpu_ptr(&nmi_print_seq); - unsigned int len = seq_buf_used(&s->seq); - - seq_buf_vprintf(&s->seq, fmt, args); - return seq_buf_used(&s->seq) - len; -} - bool nmi_cpu_backtrace(struct pt_regs *regs) { int cpu = smp_processor_id(); if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) { - printk_func_t printk_func_save = this_cpu_read(printk_func); - - /* Replace printk to write into the NMI seq */ - this_cpu_write(printk_func, nmi_vprintk); pr_warn("NMI backtrace for cpu %d\n", cpu); if (regs) show_regs(regs); else dump_stack(); - this_cpu_write(printk_func, printk_func_save); - cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask)); return true; } -- cgit v1.2.3 From fff7fb0b2d908dec779783d8eaf3d7725230f75e Mon Sep 17 00:00:00 2001 From: Zhaoxiu Zeng Date: Fri, 20 May 2016 17:03:57 -0700 Subject: lib/GCD.c: use binary GCD algorithm instead of Euclidean The binary GCD algorithm is based on the following facts: 1. If a and b are all evens, then gcd(a,b) = 2 * gcd(a/2, b/2) 2. If a is even and b is odd, then gcd(a,b) = gcd(a/2, b) 3. If a and b are all odds, then gcd(a,b) = gcd((a-b)/2, b) = gcd((a+b)/2, b) Even on x86 machines with reasonable division hardware, the binary algorithm runs about 25% faster (80% the execution time) than the division-based Euclidian algorithm. On platforms like Alpha and ARMv6 where division is a function call to emulation code, it's even more significant. There are two variants of the code here, depending on whether a fast __ffs (find least significant set bit) instruction is available. This allows the unpredictable branches in the bit-at-a-time shifting loop to be eliminated. If fast __ffs is not available, the "even/odd" GCD variant is used. I use the following code to benchmark: #include #include #include #include #include #include #define swap(a, b) \ do { \ a ^= b; \ b ^= a; \ a ^= b; \ } while (0) unsigned long gcd0(unsigned long a, unsigned long b) { unsigned long r; if (a < b) { swap(a, b); } if (b == 0) return a; while ((r = a % b) != 0) { a = b; b = r; } return b; } unsigned long gcd1(unsigned long a, unsigned long b) { unsigned long r = a | b; if (!a || !b) return r; b >>= __builtin_ctzl(b); for (;;) { a >>= __builtin_ctzl(a); if (a == b) return a << __builtin_ctzl(r); if (a < b) swap(a, b); a -= b; } } unsigned long gcd2(unsigned long a, unsigned long b) { unsigned long r = a | b; if (!a || !b) return r; r &= -r; while (!(b & r)) b >>= 1; for (;;) { while (!(a & r)) a >>= 1; if (a == b) return a; if (a < b) swap(a, b); a -= b; a >>= 1; if (a & r) a += b; a >>= 1; } } unsigned long gcd3(unsigned long a, unsigned long b) { unsigned long r = a | b; if (!a || !b) return r; b >>= __builtin_ctzl(b); if (b == 1) return r & -r; for (;;) { a >>= __builtin_ctzl(a); if (a == 1) return r & -r; if (a == b) return a << __builtin_ctzl(r); if (a < b) swap(a, b); a -= b; } } unsigned long gcd4(unsigned long a, unsigned long b) { unsigned long r = a | b; if (!a || !b) return r; r &= -r; while (!(b & r)) b >>= 1; if (b == r) return r; for (;;) { while (!(a & r)) a >>= 1; if (a == r) return r; if (a == b) return a; if (a < b) swap(a, b); a -= b; a >>= 1; if (a & r) a += b; a >>= 1; } } static unsigned long (*gcd_func[])(unsigned long a, unsigned long b) = { gcd0, gcd1, gcd2, gcd3, gcd4, }; #define TEST_ENTRIES (sizeof(gcd_func) / sizeof(gcd_func[0])) #if defined(__x86_64__) #define rdtscll(val) do { \ unsigned long __a,__d; \ __asm__ __volatile__("rdtsc" : "=a" (__a), "=d" (__d)); \ (val) = ((unsigned long long)__a) | (((unsigned long long)__d)<<32); \ } while(0) static unsigned long long benchmark_gcd_func(unsigned long (*gcd)(unsigned long, unsigned long), unsigned long a, unsigned long b, unsigned long *res) { unsigned long long start, end; unsigned long long ret; unsigned long gcd_res; rdtscll(start); gcd_res = gcd(a, b); rdtscll(end); if (end >= start) ret = end - start; else ret = ~0ULL - start + 1 + end; *res = gcd_res; return ret; } #else static inline struct timespec read_time(void) { struct timespec time; clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &time); return time; } static inline unsigned long long diff_time(struct timespec start, struct timespec end) { struct timespec temp; if ((end.tv_nsec - start.tv_nsec) < 0) { temp.tv_sec = end.tv_sec - start.tv_sec - 1; temp.tv_nsec = 1000000000ULL + end.tv_nsec - start.tv_nsec; } else { temp.tv_sec = end.tv_sec - start.tv_sec; temp.tv_nsec = end.tv_nsec - start.tv_nsec; } return temp.tv_sec * 1000000000ULL + temp.tv_nsec; } static unsigned long long benchmark_gcd_func(unsigned long (*gcd)(unsigned long, unsigned long), unsigned long a, unsigned long b, unsigned long *res) { struct timespec start, end; unsigned long gcd_res; start = read_time(); gcd_res = gcd(a, b); end = read_time(); *res = gcd_res; return diff_time(start, end); } #endif static inline unsigned long get_rand() { if (sizeof(long) == 8) return (unsigned long)rand() << 32 | rand(); else return rand(); } int main(int argc, char **argv) { unsigned int seed = time(0); int loops = 100; int repeats = 1000; unsigned long (*res)[TEST_ENTRIES]; unsigned long long elapsed[TEST_ENTRIES]; int i, j, k; for (;;) { int opt = getopt(argc, argv, "n:r:s:"); /* End condition always first */ if (opt == -1) break; switch (opt) { case 'n': loops = atoi(optarg); break; case 'r': repeats = atoi(optarg); break; case 's': seed = strtoul(optarg, NULL, 10); break; default: /* You won't actually get here. */ break; } } res = malloc(sizeof(unsigned long) * TEST_ENTRIES * loops); memset(elapsed, 0, sizeof(elapsed)); srand(seed); for (j = 0; j < loops; j++) { unsigned long a = get_rand(); /* Do we have args? */ unsigned long b = argc > optind ? strtoul(argv[optind], NULL, 10) : get_rand(); unsigned long long min_elapsed[TEST_ENTRIES]; for (k = 0; k < repeats; k++) { for (i = 0; i < TEST_ENTRIES; i++) { unsigned long long tmp = benchmark_gcd_func(gcd_func[i], a, b, &res[j][i]); if (k == 0 || min_elapsed[i] > tmp) min_elapsed[i] = tmp; } } for (i = 0; i < TEST_ENTRIES; i++) elapsed[i] += min_elapsed[i]; } for (i = 0; i < TEST_ENTRIES; i++) printf("gcd%d: elapsed %llu\n", i, elapsed[i]); k = 0; srand(seed); for (j = 0; j < loops; j++) { unsigned long a = get_rand(); unsigned long b = argc > optind ? strtoul(argv[optind], NULL, 10) : get_rand(); for (i = 1; i < TEST_ENTRIES; i++) { if (res[j][i] != res[j][0]) break; } if (i < TEST_ENTRIES) { if (k == 0) { k = 1; fprintf(stderr, "Error:\n"); } fprintf(stderr, "gcd(%lu, %lu): ", a, b); for (i = 0; i < TEST_ENTRIES; i++) fprintf(stderr, "%ld%s", res[j][i], i < TEST_ENTRIES - 1 ? ", " : "\n"); } } if (k == 0) fprintf(stderr, "PASS\n"); free(res); return 0; } Compiled with "-O2", on "VirtualBox 4.4.0-22-generic #38-Ubuntu x86_64" got: zhaoxiuzeng@zhaoxiuzeng-VirtualBox:~/develop$ ./gcd -r 500000 -n 10 gcd0: elapsed 10174 gcd1: elapsed 2120 gcd2: elapsed 2902 gcd3: elapsed 2039 gcd4: elapsed 2812 PASS zhaoxiuzeng@zhaoxiuzeng-VirtualBox:~/develop$ ./gcd -r 500000 -n 10 gcd0: elapsed 9309 gcd1: elapsed 2280 gcd2: elapsed 2822 gcd3: elapsed 2217 gcd4: elapsed 2710 PASS zhaoxiuzeng@zhaoxiuzeng-VirtualBox:~/develop$ ./gcd -r 500000 -n 10 gcd0: elapsed 9589 gcd1: elapsed 2098 gcd2: elapsed 2815 gcd3: elapsed 2030 gcd4: elapsed 2718 PASS zhaoxiuzeng@zhaoxiuzeng-VirtualBox:~/develop$ ./gcd -r 500000 -n 10 gcd0: elapsed 9914 gcd1: elapsed 2309 gcd2: elapsed 2779 gcd3: elapsed 2228 gcd4: elapsed 2709 PASS [akpm@linux-foundation.org: avoid #defining a CONFIG_ variable] Signed-off-by: Zhaoxiu Zeng Signed-off-by: George Spelvin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/Kconfig | 3 ++ arch/alpha/Kconfig | 1 + arch/arc/Kconfig | 1 + arch/arm/mm/Kconfig | 3 ++ arch/h8300/Kconfig | 1 + arch/m32r/Kconfig | 1 + arch/m68k/Kconfig.cpu | 11 ++++++ arch/metag/Kconfig | 1 + arch/microblaze/Kconfig | 1 + arch/mips/include/asm/cpu-features.h | 10 +++++ arch/nios2/Kconfig | 1 + arch/openrisc/Kconfig | 1 + arch/parisc/Kconfig | 1 + arch/s390/Kconfig | 1 + arch/score/Kconfig | 1 + arch/sh/Kconfig | 1 + arch/sparc/Kconfig | 1 + lib/gcd.c | 77 +++++++++++++++++++++++++++++++----- 18 files changed, 107 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/Kconfig b/arch/Kconfig index 8f84fd268dee..b16e74e4b5af 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -647,4 +647,7 @@ config COMPAT_OLD_SIGACTION config ARCH_NO_COHERENT_DMA_MMAP bool +config CPU_NO_EFFICIENT_FFS + def_bool n + source "kernel/gcov/Kconfig" diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig index fe99f894e57d..7f312d80b43b 100644 --- a/arch/alpha/Kconfig +++ b/arch/alpha/Kconfig @@ -26,6 +26,7 @@ config ALPHA select MODULES_USE_ELF_RELA select ODD_RT_SIGACTION select OLD_SIGSUSPEND + select CPU_NO_EFFICIENT_FFS if !ALPHA_EV67 help The Alpha is a 64-bit general-purpose processor designed and marketed by the Digital Equipment Corporation of blessed memory, diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 8894f7e7e3de..0dcbacfdea4b 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -107,6 +107,7 @@ choice config ISA_ARCOMPACT bool "ARCompact ISA" + select CPU_NO_EFFICIENT_FFS help The original ARC ISA of ARC600/700 cores diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index 55347662e5ed..cb569b65a54d 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -421,18 +421,21 @@ config CPU_32v3 select CPU_USE_DOMAINS if MMU select NEED_KUSER_HELPERS select TLS_REG_EMUL if SMP || !MMU + select CPU_NO_EFFICIENT_FFS config CPU_32v4 bool select CPU_USE_DOMAINS if MMU select NEED_KUSER_HELPERS select TLS_REG_EMUL if SMP || !MMU + select CPU_NO_EFFICIENT_FFS config CPU_32v4T bool select CPU_USE_DOMAINS if MMU select NEED_KUSER_HELPERS select TLS_REG_EMUL if SMP || !MMU + select CPU_NO_EFFICIENT_FFS config CPU_32v5 bool diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig index 986ea84caaed..aa232de2d4bc 100644 --- a/arch/h8300/Kconfig +++ b/arch/h8300/Kconfig @@ -20,6 +20,7 @@ config H8300 select HAVE_KERNEL_GZIP select HAVE_KERNEL_LZO select HAVE_ARCH_KGDB + select CPU_NO_EFFICIENT_FFS config RWSEM_GENERIC_SPINLOCK def_bool y diff --git a/arch/m32r/Kconfig b/arch/m32r/Kconfig index c82b29253991..3cc8498fe0fe 100644 --- a/arch/m32r/Kconfig +++ b/arch/m32r/Kconfig @@ -17,6 +17,7 @@ config M32R select ARCH_USES_GETTIMEOFFSET select MODULES_USE_ELF_RELA select HAVE_DEBUG_STACKOVERFLOW + select CPU_NO_EFFICIENT_FFS config SBUS bool diff --git a/arch/m68k/Kconfig.cpu b/arch/m68k/Kconfig.cpu index c1beb5ae181f..8ace920ca24a 100644 --- a/arch/m68k/Kconfig.cpu +++ b/arch/m68k/Kconfig.cpu @@ -40,6 +40,7 @@ config M68000 select CPU_HAS_NO_MULDIV64 select CPU_HAS_NO_UNALIGNED select GENERIC_CSUM + select CPU_NO_EFFICIENT_FFS help The Freescale (was Motorola) 68000 CPU is the first generation of the well known M68K family of processors. The CPU core as well as @@ -51,6 +52,7 @@ config MCPU32 bool select CPU_HAS_NO_BITFIELDS select CPU_HAS_NO_UNALIGNED + select CPU_NO_EFFICIENT_FFS help The Freescale (was then Motorola) CPU32 is a CPU core that is based on the 68020 processor. For the most part it is used in @@ -130,6 +132,7 @@ config M5206 depends on !MMU select COLDFIRE_SW_A7 select HAVE_MBAR + select CPU_NO_EFFICIENT_FFS help Motorola ColdFire 5206 processor support. @@ -138,6 +141,7 @@ config M5206e depends on !MMU select COLDFIRE_SW_A7 select HAVE_MBAR + select CPU_NO_EFFICIENT_FFS help Motorola ColdFire 5206e processor support. @@ -163,6 +167,7 @@ config M5249 depends on !MMU select COLDFIRE_SW_A7 select HAVE_MBAR + select CPU_NO_EFFICIENT_FFS help Motorola ColdFire 5249 processor support. @@ -171,6 +176,7 @@ config M525x depends on !MMU select COLDFIRE_SW_A7 select HAVE_MBAR + select CPU_NO_EFFICIENT_FFS help Freescale (Motorola) Coldfire 5251/5253 processor support. @@ -189,6 +195,7 @@ config M5272 depends on !MMU select COLDFIRE_SW_A7 select HAVE_MBAR + select CPU_NO_EFFICIENT_FFS help Motorola ColdFire 5272 processor support. @@ -217,6 +224,7 @@ config M5307 select COLDFIRE_SW_A7 select HAVE_CACHE_CB select HAVE_MBAR + select CPU_NO_EFFICIENT_FFS help Motorola ColdFire 5307 processor support. @@ -242,6 +250,7 @@ config M5407 select COLDFIRE_SW_A7 select HAVE_CACHE_CB select HAVE_MBAR + select CPU_NO_EFFICIENT_FFS help Motorola ColdFire 5407 processor support. @@ -251,6 +260,7 @@ config M547x select MMU_COLDFIRE if MMU select HAVE_CACHE_CB select HAVE_MBAR + select CPU_NO_EFFICIENT_FFS help Freescale ColdFire 5470/5471/5472/5473/5474/5475 processor support. @@ -260,6 +270,7 @@ config M548x select M54xx select HAVE_CACHE_CB select HAVE_MBAR + select CPU_NO_EFFICIENT_FFS help Freescale ColdFire 5480/5481/5482/5483/5484/5485 processor support. diff --git a/arch/metag/Kconfig b/arch/metag/Kconfig index e47a08d72819..5b7a45d99cfb 100644 --- a/arch/metag/Kconfig +++ b/arch/metag/Kconfig @@ -30,6 +30,7 @@ config METAG select OF select OF_EARLY_FLATTREE select SPARSE_IRQ + select CPU_NO_EFFICIENT_FFS config STACKTRACE_SUPPORT def_bool y diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig index 3d793b55f60c..f17c3a4fb697 100644 --- a/arch/microblaze/Kconfig +++ b/arch/microblaze/Kconfig @@ -32,6 +32,7 @@ config MICROBLAZE select OF_EARLY_FLATTREE select TRACING_SUPPORT select VIRT_TO_BUS + select CPU_NO_EFFICIENT_FFS config SWAP def_bool n diff --git a/arch/mips/include/asm/cpu-features.h b/arch/mips/include/asm/cpu-features.h index e6f19fc61bf2..e961c8a7ea66 100644 --- a/arch/mips/include/asm/cpu-features.h +++ b/arch/mips/include/asm/cpu-features.h @@ -204,6 +204,16 @@ #endif #endif +/* __builtin_constant_p(cpu_has_mips_r) && cpu_has_mips_r */ +#if !((defined(cpu_has_mips32r1) && cpu_has_mips32r1) || \ + (defined(cpu_has_mips32r2) && cpu_has_mips32r2) || \ + (defined(cpu_has_mips32r6) && cpu_has_mips32r6) || \ + (defined(cpu_has_mips64r1) && cpu_has_mips64r1) || \ + (defined(cpu_has_mips64r2) && cpu_has_mips64r2) || \ + (defined(cpu_has_mips64r6) && cpu_has_mips64r6)) +#define CPU_NO_EFFICIENT_FFS 1 +#endif + #ifndef cpu_has_mips_1 # define cpu_has_mips_1 (!cpu_has_mips_r6) #endif diff --git a/arch/nios2/Kconfig b/arch/nios2/Kconfig index 87ca653eb5f3..51a56c8b04b4 100644 --- a/arch/nios2/Kconfig +++ b/arch/nios2/Kconfig @@ -15,6 +15,7 @@ config NIOS2 select SOC_BUS select SPARSE_IRQ select USB_ARCH_HAS_HCD if USB_SUPPORT + select CPU_NO_EFFICIENT_FFS config GENERIC_CSUM def_bool y diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig index e118c02cc79a..142cb057c41b 100644 --- a/arch/openrisc/Kconfig +++ b/arch/openrisc/Kconfig @@ -25,6 +25,7 @@ config OPENRISC select MODULES_USE_ELF_RELA select HAVE_DEBUG_STACKOVERFLOW select OR1K_PIC + select CPU_NO_EFFICIENT_FFS if !OPENRISC_HAVE_INST_FF1 config MMU def_bool y diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 88cfaa8af78e..3d498a676551 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -32,6 +32,7 @@ config PARISC select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_SECCOMP_FILTER select ARCH_NO_COHERENT_DMA_MMAP + select CPU_NO_EFFICIENT_FFS help The PA-RISC microprocessor is designed by Hewlett-Packard and used diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 1c3c43d9d1b5..a8c259059adf 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -123,6 +123,7 @@ config S390 select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_EARLY_PFN_TO_NID select HAVE_ARCH_JUMP_LABEL + select CPU_NO_EFFICIENT_FFS if !HAVE_MARCH_Z9_109_FEATURES select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_SOFT_DIRTY select HAVE_ARCH_TRACEHOOK diff --git a/arch/score/Kconfig b/arch/score/Kconfig index 366e1b599a7b..507d63181389 100644 --- a/arch/score/Kconfig +++ b/arch/score/Kconfig @@ -14,6 +14,7 @@ config SCORE select VIRT_TO_BUS select MODULES_USE_ELF_REL select CLONE_BACKWARDS + select CPU_NO_EFFICIENT_FFS choice prompt "System type" diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index f6254341c065..e803a836cb7c 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -20,6 +20,7 @@ config SUPERH select PERF_USE_VMALLOC select HAVE_DEBUG_KMEMLEAK select HAVE_KERNEL_GZIP + select CPU_NO_EFFICIENT_FFS select HAVE_KERNEL_BZIP2 select HAVE_KERNEL_LZMA select HAVE_KERNEL_XZ diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 1012f7ffcdf5..546293d9e6c5 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -42,6 +42,7 @@ config SPARC select ODD_RT_SIGACTION select OLD_SIGSUSPEND select ARCH_HAS_SG_CHAIN + select CPU_NO_EFFICIENT_FFS config SPARC32 def_bool !64BIT diff --git a/lib/gcd.c b/lib/gcd.c index 3657f129d7b8..135ee6407a5e 100644 --- a/lib/gcd.c +++ b/lib/gcd.c @@ -2,20 +2,77 @@ #include #include -/* Greatest common divisor */ +/* + * This implements the binary GCD algorithm. (Often attributed to Stein, + * but as Knuth has noted, appears in a first-century Chinese math text.) + * + * This is faster than the division-based algorithm even on x86, which + * has decent hardware division. + */ + +#if !defined(CONFIG_CPU_NO_EFFICIENT_FFS) && !defined(CPU_NO_EFFICIENT_FFS) + +/* If __ffs is available, the even/odd algorithm benchmarks slower. */ unsigned long gcd(unsigned long a, unsigned long b) { - unsigned long r; + unsigned long r = a | b; + + if (!a || !b) + return r; - if (a < b) - swap(a, b); + b >>= __ffs(b); + if (b == 1) + return r & -r; - if (!b) - return a; - while ((r = a % b) != 0) { - a = b; - b = r; + for (;;) { + a >>= __ffs(a); + if (a == 1) + return r & -r; + if (a == b) + return a << __ffs(r); + + if (a < b) + swap(a, b); + a -= b; } - return b; } + +#else + +/* If normalization is done by loops, the even/odd algorithm is a win. */ +unsigned long gcd(unsigned long a, unsigned long b) +{ + unsigned long r = a | b; + + if (!a || !b) + return r; + + /* Isolate lsbit of r */ + r &= -r; + + while (!(b & r)) + b >>= 1; + if (b == r) + return r; + + for (;;) { + while (!(a & r)) + a >>= 1; + if (a == r) + return r; + if (a == b) + return a; + + if (a < b) + swap(a, b); + a -= b; + a >>= 1; + if (a & r) + a += b; + a >>= 1; + } +} + +#endif + EXPORT_SYMBOL_GPL(gcd); -- cgit v1.2.3