From 53d91c5ce0cb8945b55e8bb54e551cabc51eb28d Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 16 Sep 2014 17:36:01 +0100 Subject: Provide a binary to hex conversion function Provide a function to convert a buffer of binary data into an unterminated ascii hex string representation of that data. Signed-off-by: David Howells Acked-by: Vivek Goyal --- include/linux/kernel.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/kernel.h') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 4c52907a6d8b..89a0b8e5a952 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -500,6 +500,7 @@ static inline char * __deprecated pack_hex_byte(char *buf, u8 byte) extern int hex_to_bin(char ch); extern int __must_check hex2bin(u8 *dst, const char *src, size_t count); +extern char *bin2hex(char *dst, const void *src, size_t count); int mac_pton(const char *s, u8 *mac); -- cgit v1.2.3 From 6ccc72b87b83ece31c2a75bbe07f440b0378f7a9 Mon Sep 17 00:00:00 2001 From: Dave Young Date: Thu, 14 Aug 2014 17:15:27 +0800 Subject: lib: Add a generic cmdline parse function parse_option_str There should be a generic function to parse params like a=b,c Adding parse_option_str in lib/cmdline.c which will return true if there's specified option set in the params. Also updated efi=old_map parsing code to use the new function Signed-off-by: Dave Young Signed-off-by: Matt Fleming --- arch/x86/platform/efi/efi.c | 22 ++-------------------- include/linux/kernel.h | 1 + lib/cmdline.c | 29 +++++++++++++++++++++++++++++ 3 files changed, 32 insertions(+), 20 deletions(-) (limited to 'include/linux/kernel.h') diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index c90d3cd2728c..c73a7df5d37f 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -932,26 +932,8 @@ u64 efi_mem_attributes(unsigned long phys_addr) static int __init parse_efi_cmdline(char *str) { - if (*str == '=') - str++; - - while (*str) { - if (!strncmp(str, "old_map", 7)) { - set_bit(EFI_OLD_MEMMAP, &efi.flags); - str += strlen("old_map"); - } - - /* - * Skip any options we don't understand. Presumably - * they apply to the EFI boot stub. - */ - while (*str && *str != ',') - str++; - - /* If we hit a delimiter, skip it */ - if (*str == ',') - str++; - } + if (parse_option_str(str, "old_map")) + set_bit(EFI_OLD_MEMMAP, &efi.flags); return 0; } diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 95624bed87ef..f66427ef0628 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -407,6 +407,7 @@ int vsscanf(const char *, const char *, va_list); extern int get_option(char **str, int *pint); extern char *get_options(const char *str, int nints, int *ints); extern unsigned long long memparse(const char *ptr, char **retptr); +extern bool parse_option_str(const char *str, const char *option); extern int core_kernel_text(unsigned long addr); extern int core_kernel_data(unsigned long addr); diff --git a/lib/cmdline.c b/lib/cmdline.c index 76a712e6e20e..8f13cf73c2ec 100644 --- a/lib/cmdline.c +++ b/lib/cmdline.c @@ -160,3 +160,32 @@ unsigned long long memparse(const char *ptr, char **retptr) return ret; } EXPORT_SYMBOL(memparse); + +/** + * parse_option_str - Parse a string and check an option is set or not + * @str: String to be parsed + * @option: option name + * + * This function parses a string containing a comma-separated list of + * strings like a=b,c. + * + * Return true if there's such option in the string, or return false. + */ +bool parse_option_str(const char *str, const char *option) +{ + while (*str) { + if (!strncmp(str, option, strlen(option))) { + str += strlen(option); + if (!*str || *str == ',') + return true; + } + + while (*str && *str != ',') + str++; + + if (*str == ',') + str++; + } + + return false; +} -- cgit v1.2.3 From 2e1d06e1c05af9dbe8a3bfddeefbf041ca637fff Mon Sep 17 00:00:00 2001 From: Michal Nazarewicz Date: Thu, 9 Oct 2014 15:30:13 -0700 Subject: include/linux/kernel.h: rewrite min3, max3 and clamp using min and max It appears that gcc is better at optimising a double call to min and max rather than open coded min3 and max3. This can be observed here: $ cat min-max.c #define min(x, y) ({ \ typeof(x) _min1 = (x); \ typeof(y) _min2 = (y); \ (void) (&_min1 == &_min2); \ _min1 < _min2 ? _min1 : _min2; }) #define min3(x, y, z) ({ \ typeof(x) _min1 = (x); \ typeof(y) _min2 = (y); \ typeof(z) _min3 = (z); \ (void) (&_min1 == &_min2); \ (void) (&_min1 == &_min3); \ _min1 < _min2 ? (_min1 < _min3 ? _min1 : _min3) : \ (_min2 < _min3 ? _min2 : _min3); }) int fmin3(int x, int y, int z) { return min3(x, y, z); } int fmin2(int x, int y, int z) { return min(min(x, y), z); } $ gcc -O2 -o min-max.s -S min-max.c; cat min-max.s .file "min-max.c" .text .p2align 4,,15 .globl fmin3 .type fmin3, @function fmin3: .LFB0: .cfi_startproc cmpl %esi, %edi jl .L5 cmpl %esi, %edx movl %esi, %eax cmovle %edx, %eax ret .p2align 4,,10 .p2align 3 .L5: cmpl %edi, %edx movl %edi, %eax cmovle %edx, %eax ret .cfi_endproc .LFE0: .size fmin3, .-fmin3 .p2align 4,,15 .globl fmin2 .type fmin2, @function fmin2: .LFB1: .cfi_startproc cmpl %edi, %esi movl %edx, %eax cmovle %esi, %edi cmpl %edx, %edi cmovle %edi, %eax ret .cfi_endproc .LFE1: .size fmin2, .-fmin2 .ident "GCC: (Ubuntu/Linaro 4.6.3-1ubuntu5) 4.6.3" .section .note.GNU-stack,"",@progbits fmin3 function, which uses open-coded min3 macro, is compiled into total of ten instructions including a conditional branch, whereas fmin2 function, which uses two calls to min2 macro, is compiled into six instructions with no branches. Similarly, open-coded clamp produces the same code as clamp using min and max macros, but the latter is much shorter: $ cat clamp.c #define clamp(val, min, max) ({ \ typeof(val) __val = (val); \ typeof(min) __min = (min); \ typeof(max) __max = (max); \ (void) (&__val == &__min); \ (void) (&__val == &__max); \ __val = __val < __min ? __min: __val; \ __val > __max ? __max: __val; }) #define min(x, y) ({ \ typeof(x) _min1 = (x); \ typeof(y) _min2 = (y); \ (void) (&_min1 == &_min2); \ _min1 < _min2 ? _min1 : _min2; }) #define max(x, y) ({ \ typeof(x) _max1 = (x); \ typeof(y) _max2 = (y); \ (void) (&_max1 == &_max2); \ _max1 > _max2 ? _max1 : _max2; }) int fclamp(int v, int min, int max) { return clamp(v, min, max); } int fclampmm(int v, int min, int max) { return min(max(v, min), max); } $ gcc -O2 -o clamp.s -S clamp.c; cat clamp.s .file "clamp.c" .text .p2align 4,,15 .globl fclamp .type fclamp, @function fclamp: .LFB0: .cfi_startproc cmpl %edi, %esi movl %edx, %eax cmovge %esi, %edi cmpl %edx, %edi cmovle %edi, %eax ret .cfi_endproc .LFE0: .size fclamp, .-fclamp .p2align 4,,15 .globl fclampmm .type fclampmm, @function fclampmm: .LFB1: .cfi_startproc cmpl %edi, %esi cmovge %esi, %edi cmpl %edi, %edx movl %edi, %eax cmovle %edx, %eax ret .cfi_endproc .LFE1: .size fclampmm, .-fclampmm .ident "GCC: (Ubuntu/Linaro 4.6.3-1ubuntu5) 4.6.3" .section .note.GNU-stack,"",@progbits Linux mpn-glaptop 3.13.0-29-generic #53~precise1-Ubuntu SMP Wed Jun 4 22:06:25 UTC 2014 x86_64 x86_64 x86_64 GNU/Linux gcc (Ubuntu/Linaro 4.6.3-1ubuntu5) 4.6.3 Copyright (C) 2011 Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -rwx------ 1 mpn eng 51224656 Jun 17 14:15 vmlinux.before -rwx------ 1 mpn eng 51224608 Jun 17 13:57 vmlinux.after 48 bytes reduction. The do_fault_around was a few instruction shorter and as far as I can tell saved 12 bytes on the stack, i.e.: $ grep -e rsp -e pop -e push do_fault_around.* do_fault_around.before.s:push %rbp do_fault_around.before.s:mov %rsp,%rbp do_fault_around.before.s:push %r13 do_fault_around.before.s:push %r12 do_fault_around.before.s:push %rbx do_fault_around.before.s:sub $0x38,%rsp do_fault_around.before.s:add $0x38,%rsp do_fault_around.before.s:pop %rbx do_fault_around.before.s:pop %r12 do_fault_around.before.s:pop %r13 do_fault_around.before.s:pop %rbp do_fault_around.after.s:push %rbp do_fault_around.after.s:mov %rsp,%rbp do_fault_around.after.s:push %r12 do_fault_around.after.s:push %rbx do_fault_around.after.s:sub $0x30,%rsp do_fault_around.after.s:add $0x30,%rsp do_fault_around.after.s:pop %rbx do_fault_around.after.s:pop %r12 do_fault_around.after.s:pop %rbp or here side-by-side: Before After push %rbp push %rbp mov %rsp,%rbp mov %rsp,%rbp push %r13 push %r12 push %r12 push %rbx push %rbx sub $0x38,%rsp sub $0x30,%rsp add $0x38,%rsp add $0x30,%rsp pop %rbx pop %rbx pop %r12 pop %r12 pop %r13 pop %rbp pop %rbp There are also fewer branches: $ grep ^j do_fault_around.* do_fault_around.before.s:jae ffffffff812079b7 do_fault_around.before.s:jmp ffffffff812079c5 do_fault_around.before.s:jmp ffffffff81207a14 do_fault_around.before.s:ja ffffffff812079f9 do_fault_around.before.s:jb ffffffff81207a10 do_fault_around.before.s:jmp ffffffff81207a63 do_fault_around.before.s:jne ffffffff812079df do_fault_around.after.s:jmp ffffffff812079fd do_fault_around.after.s:ja ffffffff812079e2 do_fault_around.after.s:jb ffffffff812079f9 do_fault_around.after.s:jmp ffffffff81207a4c do_fault_around.after.s:jne ffffffff812079c8 And here's with allyesconfig on a different machine: $ uname -a; gcc --version; ls -l vmlinux.* Linux erwin 3.14.7-mn #54 SMP Sun Jun 15 11:25:08 CEST 2014 x86_64 AMD Phenom(tm) II X3 710 Processor AuthenticAMD GNU/Linux gcc (GCC) 4.8.3 Copyright (C) 2013 Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -rwx------ 1 mpn eng 437027411 Jun 20 16:04 vmlinux.before -rwx------ 1 mpn eng 437026881 Jun 20 15:30 vmlinux.after 530 bytes reduction. Signed-off-by: Michal Nazarewicz Signed-off-by: Hagen Paul Pfeifer Acked-by: Steven Rostedt Cc: Hagen Paul Pfeifer Cc: David Rientjes Cc: "Rustad, Mark D" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 32 +++++--------------------------- 1 file changed, 5 insertions(+), 27 deletions(-) (limited to 'include/linux/kernel.h') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 95624bed87ef..aa2a0cb57f50 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -715,23 +715,8 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { } (void) (&_max1 == &_max2); \ _max1 > _max2 ? _max1 : _max2; }) -#define min3(x, y, z) ({ \ - typeof(x) _min1 = (x); \ - typeof(y) _min2 = (y); \ - typeof(z) _min3 = (z); \ - (void) (&_min1 == &_min2); \ - (void) (&_min1 == &_min3); \ - _min1 < _min2 ? (_min1 < _min3 ? _min1 : _min3) : \ - (_min2 < _min3 ? _min2 : _min3); }) - -#define max3(x, y, z) ({ \ - typeof(x) _max1 = (x); \ - typeof(y) _max2 = (y); \ - typeof(z) _max3 = (z); \ - (void) (&_max1 == &_max2); \ - (void) (&_max1 == &_max3); \ - _max1 > _max2 ? (_max1 > _max3 ? _max1 : _max3) : \ - (_max2 > _max3 ? _max2 : _max3); }) +#define min3(x, y, z) min((typeof(x))min(x, y), z) +#define max3(x, y, z) max((typeof(x))max(x, y), z) /** * min_not_zero - return the minimum that is _not_ zero, unless both are zero @@ -746,20 +731,13 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { } /** * clamp - return a value clamped to a given range with strict typechecking * @val: current value - * @min: minimum allowable value - * @max: maximum allowable value + * @lo: lowest allowable value + * @hi: highest allowable value * * This macro does strict typechecking of min/max to make sure they are of the * same type as val. See the unnecessary pointer comparisons. */ -#define clamp(val, min, max) ({ \ - typeof(val) __val = (val); \ - typeof(min) __min = (min); \ - typeof(max) __max = (max); \ - (void) (&__val == &__min); \ - (void) (&__val == &__max); \ - __val = __val < __min ? __min: __val; \ - __val > __max ? __max: __val; }) +#define clamp(val, lo, hi) min((typeof(val))max(val, lo), hi) /* * ..and if you can't take the strict -- cgit v1.2.3 From c185b07fc9f24d52a864376ed22a6d84384b0c53 Mon Sep 17 00:00:00 2001 From: Michal Nazarewicz Date: Thu, 9 Oct 2014 15:30:15 -0700 Subject: include/linux/kernel.h: deduplicate code implementing clamp* macros Instead of open-coding clamp_t macro min_t and max_t the way clamp macro does and instead of open-coding clamp_val simply use clamp_t. Furthermore, normalise argument naming in the macros to be lo and hi. Signed-off-by: Michal Nazarewicz Cc: Mark Rustad Cc: "Kirsher, Jeffrey T" Cc: Hagen Paul Pfeifer Cc: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 24 +++++++----------------- 1 file changed, 7 insertions(+), 17 deletions(-) (limited to 'include/linux/kernel.h') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index aa2a0cb57f50..e9e420b6d931 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -734,7 +734,7 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { } * @lo: lowest allowable value * @hi: highest allowable value * - * This macro does strict typechecking of min/max to make sure they are of the + * This macro does strict typechecking of lo/hi to make sure they are of the * same type as val. See the unnecessary pointer comparisons. */ #define clamp(val, lo, hi) min((typeof(val))max(val, lo), hi) @@ -759,36 +759,26 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { } * clamp_t - return a value clamped to a given range using a given type * @type: the type of variable to use * @val: current value - * @min: minimum allowable value - * @max: maximum allowable value + * @lo: minimum allowable value + * @hi: maximum allowable value * * This macro does no typechecking and uses temporary variables of type * 'type' to make all the comparisons. */ -#define clamp_t(type, val, min, max) ({ \ - type __val = (val); \ - type __min = (min); \ - type __max = (max); \ - __val = __val < __min ? __min: __val; \ - __val > __max ? __max: __val; }) +#define clamp_t(type, val, lo, hi) min_t(type, max_t(type, val, lo), hi) /** * clamp_val - return a value clamped to a given range using val's type * @val: current value - * @min: minimum allowable value - * @max: maximum allowable value + * @lo: minimum allowable value + * @hi: maximum allowable value * * This macro does no typechecking and uses temporary variables of whatever * type the input argument 'val' is. This is useful when val is an unsigned * type and min and max are literals that will otherwise be assigned a signed * integer type. */ -#define clamp_val(val, min, max) ({ \ - typeof(val) __val = (val); \ - typeof(val) __min = (min); \ - typeof(val) __max = (max); \ - __val = __val < __min ? __min: __val; \ - __val > __max ? __max: __val; }) +#define clamp_val(val, lo, hi) clamp_t(typeof(val), val, lo, hi) /* -- cgit v1.2.3 From 3db2e9cdc085144e243495137273e2318c53a82f Mon Sep 17 00:00:00 2001 From: Daniel Walter Date: Mon, 13 Oct 2014 15:55:09 -0700 Subject: include/linux: remove strict_strto* definitions Remove obsolete and unused strict_strto* functions Signed-off-by: Daniel Walter Acked-by: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux/kernel.h') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 35c8ffb0136f..40728cf1c452 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -376,10 +376,6 @@ extern unsigned long simple_strtoul(const char *,char **,unsigned int); extern long simple_strtol(const char *,char **,unsigned int); extern unsigned long long simple_strtoull(const char *,char **,unsigned int); extern long long simple_strtoll(const char *,char **,unsigned int); -#define strict_strtoul kstrtoul -#define strict_strtol kstrtol -#define strict_strtoull kstrtoull -#define strict_strtoll kstrtoll extern int num_to_str(char *buf, int size, unsigned long long num); -- cgit v1.2.3 From 1029a2b52c09e479fd7b07275812ad97868c0fb0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 24 Sep 2014 10:18:49 +0200 Subject: sched, exit: Deal with nested sleeps do_wait() is a big wait loop, but we set TASK_RUNNING too late; we end up calling potential sleeps before we reset it. Not strictly a bug since we're guaranteed to exit the loop and not call schedule(); put in annotations to quiet might_sleep(). WARNING: CPU: 0 PID: 1 at ../kernel/sched/core.c:7123 __might_sleep+0x7e/0x90() do not call blocking ops when !TASK_RUNNING; state=1 set at [] do_wait+0x88/0x270 Call Trace: [] dump_stack+0x4e/0x7a [] warn_slowpath_common+0x8c/0xc0 [] warn_slowpath_fmt+0x4c/0x50 [] __might_sleep+0x7e/0x90 [] might_fault+0x55/0xb0 [] wait_consider_task+0x90b/0xc10 [] do_wait+0x104/0x270 [] SyS_wait4+0x77/0x100 [] system_call_fastpath+0x16/0x1b Signed-off-by: Peter Zijlstra (Intel) Cc: tglx@linutronix.de Cc: umgwanakikbuti@gmail.com Cc: ilya.dryomov@inktank.com Cc: Alex Elder Cc: Andrew Morton Cc: Axel Lin Cc: Daniel Borkmann Cc: Dave Jones Cc: Guillaume Morin Cc: Ionut Alexa Cc: Jason Baron Cc: Linus Torvalds Cc: Michal Hocko Cc: Michal Schmidt Cc: Oleg Nesterov Cc: Paul E. McKenney Cc: Rik van Riel Cc: Rusty Russell Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20140924082242.186408915@infradead.org Signed-off-by: Ingo Molnar --- include/linux/kernel.h | 2 ++ kernel/exit.c | 5 +++++ 2 files changed, 7 insertions(+) (limited to 'include/linux/kernel.h') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 3d770f5564b8..5068a0d9fecd 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -175,10 +175,12 @@ extern int _cond_resched(void); */ # define might_sleep() \ do { __might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0) +# define sched_annotate_sleep() __set_current_state(TASK_RUNNING) #else static inline void __might_sleep(const char *file, int line, int preempt_offset) { } # define might_sleep() do { might_resched(); } while (0) +# define sched_annotate_sleep() do { } while (0) #endif #define might_sleep_if(cond) do { if (cond) might_sleep(); } while (0) diff --git a/kernel/exit.c b/kernel/exit.c index 5d30019ff953..232c4bc8bcc9 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -997,6 +997,8 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p) get_task_struct(p); read_unlock(&tasklist_lock); + sched_annotate_sleep(); + if ((exit_code & 0x7f) == 0) { why = CLD_EXITED; status = exit_code >> 8; @@ -1079,6 +1081,7 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p) * thread can reap it because we its state == DEAD/TRACE. */ read_unlock(&tasklist_lock); + sched_annotate_sleep(); retval = wo->wo_rusage ? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0; @@ -1210,6 +1213,7 @@ unlock_sig: pid = task_pid_vnr(p); why = ptrace ? CLD_TRAPPED : CLD_STOPPED; read_unlock(&tasklist_lock); + sched_annotate_sleep(); if (unlikely(wo->wo_flags & WNOWAIT)) return wait_noreap_copyout(wo, p, pid, uid, why, exit_code); @@ -1272,6 +1276,7 @@ static int wait_task_continued(struct wait_opts *wo, struct task_struct *p) pid = task_pid_vnr(p); get_task_struct(p); read_unlock(&tasklist_lock); + sched_annotate_sleep(); if (!wo->wo_info) { retval = wo->wo_rusage -- cgit v1.2.3 From 3427445afd26bd2395f29241319283a93f362cd0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 24 Sep 2014 10:18:56 +0200 Subject: sched: Exclude cond_resched() from nested sleep test cond_resched() is a preemption point, not strictly a blocking primitive, so exclude it from the ->state test. In particular, preemption preserves task_struct::state. Signed-off-by: Peter Zijlstra (Intel) Cc: tglx@linutronix.de Cc: ilya.dryomov@inktank.com Cc: umgwanakikbuti@gmail.com Cc: oleg@redhat.com Cc: Alex Elder Cc: Andrew Morton Cc: Axel Lin Cc: Daniel Borkmann Cc: Dave Jones Cc: Jason Baron Cc: Linus Torvalds Cc: Rusty Russell Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20140924082242.656559952@infradead.org Signed-off-by: Ingo Molnar --- include/linux/kernel.h | 3 +++ include/linux/sched.h | 6 +++--- kernel/sched/core.c | 12 +++++++++--- 3 files changed, 15 insertions(+), 6 deletions(-) (limited to 'include/linux/kernel.h') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 5068a0d9fecd..446d76a87ba1 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -162,6 +162,7 @@ extern int _cond_resched(void); #endif #ifdef CONFIG_DEBUG_ATOMIC_SLEEP + void ___might_sleep(const char *file, int line, int preempt_offset); void __might_sleep(const char *file, int line, int preempt_offset); /** * might_sleep - annotation for functions that can sleep @@ -177,6 +178,8 @@ extern int _cond_resched(void); do { __might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0) # define sched_annotate_sleep() __set_current_state(TASK_RUNNING) #else + static inline void ___might_sleep(const char *file, int line, + int preempt_offset) { } static inline void __might_sleep(const char *file, int line, int preempt_offset) { } # define might_sleep() do { might_resched(); } while (0) diff --git a/include/linux/sched.h b/include/linux/sched.h index 4648e07f7d6f..4400ddc2fe73 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2806,7 +2806,7 @@ static inline int signal_pending_state(long state, struct task_struct *p) extern int _cond_resched(void); #define cond_resched() ({ \ - __might_sleep(__FILE__, __LINE__, 0); \ + ___might_sleep(__FILE__, __LINE__, 0); \ _cond_resched(); \ }) @@ -2819,14 +2819,14 @@ extern int __cond_resched_lock(spinlock_t *lock); #endif #define cond_resched_lock(lock) ({ \ - __might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET); \ + ___might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET);\ __cond_resched_lock(lock); \ }) extern int __cond_resched_softirq(void); #define cond_resched_softirq() ({ \ - __might_sleep(__FILE__, __LINE__, SOFTIRQ_DISABLE_OFFSET); \ + ___might_sleep(__FILE__, __LINE__, SOFTIRQ_DISABLE_OFFSET); \ __cond_resched_softirq(); \ }) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 5b4b96b27cd7..b9f78f12ac22 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -7296,8 +7296,6 @@ static inline int preempt_count_equals(int preempt_offset) void __might_sleep(const char *file, int line, int preempt_offset) { - static unsigned long prev_jiffy; /* ratelimiting */ - /* * Blocking primitives will set (and therefore destroy) current->state, * since we will exit with TASK_RUNNING make sure we enter with it, @@ -7311,6 +7309,14 @@ void __might_sleep(const char *file, int line, int preempt_offset) (void *)current->task_state_change)) __set_current_state(TASK_RUNNING); + ___might_sleep(file, line, preempt_offset); +} +EXPORT_SYMBOL(__might_sleep); + +void ___might_sleep(const char *file, int line, int preempt_offset) +{ + static unsigned long prev_jiffy; /* ratelimiting */ + rcu_sleep_check(); /* WARN_ON_ONCE() by default, no rate limit reqd. */ if ((preempt_count_equals(preempt_offset) && !irqs_disabled() && !is_idle_task(current)) || @@ -7340,7 +7346,7 @@ void __might_sleep(const char *file, int line, int preempt_offset) #endif dump_stack(); } -EXPORT_SYMBOL(__might_sleep); +EXPORT_SYMBOL(___might_sleep); #endif #ifdef CONFIG_MAGIC_SYSRQ -- cgit v1.2.3 From e1c2296c3485158304bfad5a80e89078463d70c8 Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Thu, 16 Oct 2014 14:59:48 -0400 Subject: tty: Move session_of_pgrp() and make static tiocspgrp() is the lone caller of session_of_pgrp(); relocate and limit to file scope. Signed-off-by: Peter Hurley Reviewed-by: Alan Cox Signed-off-by: Greg Kroah-Hartman --- drivers/tty/tty_io.c | 21 +++++++++++++++++++++ include/linux/kernel.h | 3 --- kernel/exit.c | 21 --------------------- 3 files changed, 21 insertions(+), 24 deletions(-) (limited to 'include/linux/kernel.h') diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index 114854c5554b..ae8f53c7972d 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -2515,6 +2515,27 @@ struct pid *tty_get_pgrp(struct tty_struct *tty) } EXPORT_SYMBOL_GPL(tty_get_pgrp); +/* + * This checks not only the pgrp, but falls back on the pid if no + * satisfactory pgrp is found. I dunno - gdb doesn't work correctly + * without this... + * + * The caller must hold rcu lock or the tasklist lock. + */ +static struct pid *session_of_pgrp(struct pid *pgrp) +{ + struct task_struct *p; + struct pid *sid = NULL; + + p = pid_task(pgrp, PIDTYPE_PGID); + if (p == NULL) + p = pid_task(pgrp, PIDTYPE_PID); + if (p != NULL) + sid = task_session(p); + + return sid; +} + /** * tiocgpgrp - get process group * @tty: tty passed by user diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 3d770f5564b8..01bc530fbfcb 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -411,9 +411,6 @@ extern int __kernel_text_address(unsigned long addr); extern int kernel_text_address(unsigned long addr); extern int func_ptr_is_kernel_text(void *ptr); -struct pid; -extern struct pid *session_of_pgrp(struct pid *pgrp); - unsigned long int_sqrt(unsigned long); extern void bust_spinlocks(int yes); diff --git a/kernel/exit.c b/kernel/exit.c index 5d30019ff953..6a3e2e5004ba 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -214,27 +214,6 @@ repeat: goto repeat; } -/* - * This checks not only the pgrp, but falls back on the pid if no - * satisfactory pgrp is found. I dunno - gdb doesn't work correctly - * without this... - * - * The caller must hold rcu lock or the tasklist lock. - */ -struct pid *session_of_pgrp(struct pid *pgrp) -{ - struct task_struct *p; - struct pid *sid = NULL; - - p = pid_task(pgrp, PIDTYPE_PGID); - if (p == NULL) - p = pid_task(pgrp, PIDTYPE_PID); - if (p != NULL) - sid = task_session(p); - - return sid; -} - /* * Determine if a process group is "orphaned", according to the POSIX * definition in 2.2.2.52. Orphaned process groups are not to be affected -- cgit v1.2.3 From 9e3961a0979817c612b10b2da4f3045ec9faa779 Mon Sep 17 00:00:00 2001 From: Prarit Bhargava Date: Wed, 10 Dec 2014 15:45:50 -0800 Subject: kernel: add panic_on_warn There have been several times where I have had to rebuild a kernel to cause a panic when hitting a WARN() in the code in order to get a crash dump from a system. Sometimes this is easy to do, other times (such as in the case of a remote admin) it is not trivial to send new images to the user. A much easier method would be a switch to change the WARN() over to a panic. This makes debugging easier in that I can now test the actual image the WARN() was seen on and I do not have to engage in remote debugging. This patch adds a panic_on_warn kernel parameter and /proc/sys/kernel/panic_on_warn calls panic() in the warn_slowpath_common() path. The function will still print out the location of the warning. An example of the panic_on_warn output: The first line below is from the WARN_ON() to output the WARN_ON()'s location. After that the panic() output is displayed. WARNING: CPU: 30 PID: 11698 at /home/prarit/dummy_module/dummy-module.c:25 init_dummy+0x1f/0x30 [dummy_module]() Kernel panic - not syncing: panic_on_warn set ... CPU: 30 PID: 11698 Comm: insmod Tainted: G W OE 3.17.0+ #57 Hardware name: Intel Corporation S2600CP/S2600CP, BIOS RMLSDP.86I.00.29.D696.1311111329 11/11/2013 0000000000000000 000000008e3f87df ffff88080f093c38 ffffffff81665190 0000000000000000 ffffffff818aea3d ffff88080f093cb8 ffffffff8165e2ec ffffffff00000008 ffff88080f093cc8 ffff88080f093c68 000000008e3f87df Call Trace: [] dump_stack+0x46/0x58 [] panic+0xd0/0x204 [] ? init_dummy+0x1f/0x30 [dummy_module] [] warn_slowpath_common+0xd0/0xd0 [] ? dummy_greetings+0x40/0x40 [dummy_module] [] warn_slowpath_null+0x1a/0x20 [] init_dummy+0x1f/0x30 [dummy_module] [] do_one_initcall+0xd4/0x210 [] ? __vunmap+0xc2/0x110 [] load_module+0x16a9/0x1b30 [] ? store_uevent+0x70/0x70 [] ? copy_module_from_fd.isra.44+0x129/0x180 [] SyS_finit_module+0xa6/0xd0 [] system_call_fastpath+0x12/0x17 Successfully tested by me. hpa said: There is another very valid use for this: many operators would rather a machine shuts down than being potentially compromised either functionally or security-wise. Signed-off-by: Prarit Bhargava Cc: Jonathan Corbet Cc: Rusty Russell Cc: "H. Peter Anvin" Cc: Andi Kleen Cc: Masami Hiramatsu Acked-by: Yasuaki Ishimatsu Cc: Fabian Frederick Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/kdump/kdump.txt | 7 +++++++ Documentation/kernel-parameters.txt | 3 +++ Documentation/sysctl/kernel.txt | 40 ++++++++++++++++++++++++------------- include/linux/kernel.h | 1 + include/uapi/linux/sysctl.h | 1 + kernel/panic.c | 13 ++++++++++++ kernel/sysctl.c | 9 +++++++++ kernel/sysctl_binary.c | 1 + 8 files changed, 61 insertions(+), 14 deletions(-) (limited to 'include/linux/kernel.h') diff --git a/Documentation/kdump/kdump.txt b/Documentation/kdump/kdump.txt index 6c0b9f27e465..bc4bd5a44b88 100644 --- a/Documentation/kdump/kdump.txt +++ b/Documentation/kdump/kdump.txt @@ -471,6 +471,13 @@ format. Crash is available on Dave Anderson's site at the following URL: http://people.redhat.com/~anderson/ +Trigger Kdump on WARN() +======================= + +The kernel parameter, panic_on_warn, calls panic() in all WARN() paths. This +will cause a kdump to occur at the panic() call. In cases where a user wants +to specify this during runtime, /proc/sys/kernel/panic_on_warn can be set to 1 +to achieve the same behaviour. Contact ======= diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 838f3776c924..d6eb3636fe5a 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2509,6 +2509,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted. timeout < 0: reboot immediately Format: + panic_on_warn panic() instead of WARN(). Useful to cause kdump + on a WARN(). + crash_kexec_post_notifiers Run kdump after running panic-notifiers and dumping kmsg. This only for the users who doubt kdump always diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt index 57baff5bdb80..b5d0c8501a18 100644 --- a/Documentation/sysctl/kernel.txt +++ b/Documentation/sysctl/kernel.txt @@ -54,8 +54,9 @@ show up in /proc/sys/kernel: - overflowuid - panic - panic_on_oops -- panic_on_unrecovered_nmi - panic_on_stackoverflow +- panic_on_unrecovered_nmi +- panic_on_warn - pid_max - powersave-nap [ PPC only ] - printk @@ -527,19 +528,6 @@ the recommended setting is 60. ============================================================== -panic_on_unrecovered_nmi: - -The default Linux behaviour on an NMI of either memory or unknown is -to continue operation. For many environments such as scientific -computing it is preferable that the box is taken out and the error -dealt with than an uncorrected parity/ECC error get propagated. - -A small number of systems do generate NMI's for bizarre random reasons -such as power management so the default is off. That sysctl works like -the existing panic controls already in that directory. - -============================================================== - panic_on_oops: Controls the kernel's behaviour when an oops or BUG is encountered. @@ -563,6 +551,30 @@ This file shows up if CONFIG_DEBUG_STACKOVERFLOW is enabled. ============================================================== +panic_on_unrecovered_nmi: + +The default Linux behaviour on an NMI of either memory or unknown is +to continue operation. For many environments such as scientific +computing it is preferable that the box is taken out and the error +dealt with than an uncorrected parity/ECC error get propagated. + +A small number of systems do generate NMI's for bizarre random reasons +such as power management so the default is off. That sysctl works like +the existing panic controls already in that directory. + +============================================================== + +panic_on_warn: + +Calls panic() in the WARN() path when set to 1. This is useful to avoid +a kernel rebuild when attempting to kdump at the location of a WARN(). + +0: only WARN(), default behaviour. + +1: call panic() after printing out WARN() location. + +============================================================== + perf_cpu_time_max_percent: Hints to the kernel how much CPU time it should be allowed to diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 446d76a87ba1..233ea8107038 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -427,6 +427,7 @@ extern int panic_timeout; extern int panic_on_oops; extern int panic_on_unrecovered_nmi; extern int panic_on_io_nmi; +extern int panic_on_warn; extern int sysctl_panic_on_stackoverflow; /* * Only to be used by arch init code. If the user over-wrote the default diff --git a/include/uapi/linux/sysctl.h b/include/uapi/linux/sysctl.h index 43aaba1cc037..0956373b56db 100644 --- a/include/uapi/linux/sysctl.h +++ b/include/uapi/linux/sysctl.h @@ -153,6 +153,7 @@ enum KERN_MAX_LOCK_DEPTH=74, /* int: rtmutex's maximum lock depth */ KERN_NMI_WATCHDOG=75, /* int: enable/disable nmi watchdog */ KERN_PANIC_ON_NMI=76, /* int: whether we will panic on an unrecovered */ + KERN_PANIC_ON_WARN=77, /* int: call panic() in WARN() functions */ }; diff --git a/kernel/panic.c b/kernel/panic.c index cf80672b7924..4d8d6f906dec 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -33,6 +33,7 @@ static int pause_on_oops; static int pause_on_oops_flag; static DEFINE_SPINLOCK(pause_on_oops_lock); static bool crash_kexec_post_notifiers; +int panic_on_warn __read_mostly; int panic_timeout = CONFIG_PANIC_TIMEOUT; EXPORT_SYMBOL_GPL(panic_timeout); @@ -428,6 +429,17 @@ static void warn_slowpath_common(const char *file, int line, void *caller, if (args) vprintk(args->fmt, args->args); + if (panic_on_warn) { + /* + * This thread may hit another WARN() in the panic path. + * Resetting this prevents additional WARN() from panicking the + * system on this thread. Other threads are blocked by the + * panic_mutex in panic(). + */ + panic_on_warn = 0; + panic("panic_on_warn set ...\n"); + } + print_modules(); dump_stack(); print_oops_end_marker(); @@ -485,6 +497,7 @@ EXPORT_SYMBOL(__stack_chk_fail); core_param(panic, panic_timeout, int, 0644); core_param(pause_on_oops, pause_on_oops, int, 0644); +core_param(panic_on_warn, panic_on_warn, int, 0644); static int __init setup_crash_kexec_post_notifiers(char *s) { diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 15f2511a1b7c..7c54ff79afd7 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1104,6 +1104,15 @@ static struct ctl_table kern_table[] = { .proc_handler = proc_dointvec, }, #endif + { + .procname = "panic_on_warn", + .data = &panic_on_warn, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one, + }, { } }; diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c index 9a4f750a2963..7e7746a42a62 100644 --- a/kernel/sysctl_binary.c +++ b/kernel/sysctl_binary.c @@ -137,6 +137,7 @@ static const struct bin_table bin_kern_table[] = { { CTL_INT, KERN_COMPAT_LOG, "compat-log" }, { CTL_INT, KERN_MAX_LOCK_DEPTH, "max_lock_depth" }, { CTL_INT, KERN_PANIC_ON_NMI, "panic_on_unrecovered_nmi" }, + { CTL_INT, KERN_PANIC_ON_WARN, "panic_on_warn" }, {} }; -- cgit v1.2.3