From ad71d889b88055e61e3970a6744a271a51a94f42 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 30 Apr 2013 15:46:14 -0400 Subject: tracing: Add function probe to trigger a ftrace dump to console Add the "dump" command to have the ftrace buffer dumped to console if a function is hit. This is useful when debugging a tripple fault, where you have an idea of a function that is called just before the tripple fault occurs, and can tell ftrace to dump its content out to the console before it continues. Format is: :dump echo 'bad_address:dump' > /debug/tracing/set_ftrace_filter To remove this: echo '!bad_address:dump' > /debug/tracing/set_ftrace_filter Requested-by: Luis Claudio R. Goncalves Signed-off-by: Steven Rostedt --- Documentation/trace/ftrace.txt | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'Documentation') diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt index bfe8c29b1f1d..cc9ec57e157c 100644 --- a/Documentation/trace/ftrace.txt +++ b/Documentation/trace/ftrace.txt @@ -2430,6 +2430,13 @@ The following commands are supported: echo '!schedule:disable_event:sched:sched_switch' > \ set_ftrace_filter +- dump + When the function is hit, it will dump the contents of the ftrace + ring buffer to the console. This is useful if you need to debug + something, and want to dump the trace when a certain function + is hit. Perhaps its a function that is called before a tripple + fault happens and does not allow you to get a regular dump. + trace_pipe ---------- -- cgit v1.2.3 From 90e3c03c3a09a7b176b3fe59d78f5d9755ac8e37 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 30 Apr 2013 19:00:46 -0400 Subject: tracing: Add function probe to trigger a ftrace dump of current CPU trace Add the "cpudump" command to have the current CPU ftrace buffer dumped to console if a function is hit. This is useful when debugging a tripple fault, where you have an idea of a function that is called just before the tripple fault occurs, and can tell ftrace to dump its content out to the console before it continues. This differs from the "dump" command as it only dumps the content of the ring buffer for the currently executing CPU, and does not show the contents of the other CPUs. Format is: :cpudump echo 'bad_address:cpudump' > /debug/tracing/set_ftrace_filter To remove this: echo '!bad_address:cpudump' > /debug/tracing/set_ftrace_filter Signed-off-by: Steven Rostedt --- Documentation/trace/ftrace.txt | 6 ++++++ kernel/trace/trace_functions.c | 44 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+) (limited to 'Documentation') diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt index cc9ec57e157c..b937c6e2163c 100644 --- a/Documentation/trace/ftrace.txt +++ b/Documentation/trace/ftrace.txt @@ -2437,6 +2437,12 @@ The following commands are supported: is hit. Perhaps its a function that is called before a tripple fault happens and does not allow you to get a regular dump. +- cpudump + When the function is hit, it will dump the contents of the ftrace + ring buffer for the current CPU to the console. Unlike the "dump" + command, it only prints out the contents of the ring buffer for the + CPU that executed the function that triggered the dump. + trace_pipe ---------- diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index d7c8719734b8..b863f93b30f3 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -297,6 +297,14 @@ ftrace_dump_probe(unsigned long ip, unsigned long parent_ip, void **data) ftrace_dump(DUMP_ALL); } +/* Only dump the current CPU buffer. */ +static void +ftrace_cpudump_probe(unsigned long ip, unsigned long parent_ip, void **data) +{ + if (update_count(data)) + ftrace_dump(DUMP_ORIG); +} + static int ftrace_probe_print(const char *name, struct seq_file *m, unsigned long ip, void *data) @@ -341,6 +349,13 @@ ftrace_dump_print(struct seq_file *m, unsigned long ip, return ftrace_probe_print("dump", m, ip, data); } +static int +ftrace_cpudump_print(struct seq_file *m, unsigned long ip, + struct ftrace_probe_ops *ops, void *data) +{ + return ftrace_probe_print("cpudump", m, ip, data); +} + static struct ftrace_probe_ops traceon_count_probe_ops = { .func = ftrace_traceon_count, .print = ftrace_traceon_print, @@ -361,6 +376,11 @@ static struct ftrace_probe_ops dump_probe_ops = { .print = ftrace_dump_print, }; +static struct ftrace_probe_ops cpudump_probe_ops = { + .func = ftrace_cpudump_probe, + .print = ftrace_cpudump_print, +}; + static struct ftrace_probe_ops traceon_probe_ops = { .func = ftrace_traceon, .print = ftrace_traceon_print, @@ -457,6 +477,19 @@ ftrace_dump_callback(struct ftrace_hash *hash, "1", enable); } +static int +ftrace_cpudump_callback(struct ftrace_hash *hash, + char *glob, char *cmd, char *param, int enable) +{ + struct ftrace_probe_ops *ops; + + ops = &cpudump_probe_ops; + + /* Only dump once. */ + return ftrace_trace_probe_callback(ops, hash, glob, cmd, + "1", enable); +} + static struct ftrace_func_command ftrace_traceon_cmd = { .name = "traceon", .func = ftrace_trace_onoff_callback, @@ -477,6 +510,11 @@ static struct ftrace_func_command ftrace_dump_cmd = { .func = ftrace_dump_callback, }; +static struct ftrace_func_command ftrace_cpudump_cmd = { + .name = "cpudump", + .func = ftrace_cpudump_callback, +}; + static int __init init_func_cmd_traceon(void) { int ret; @@ -497,8 +535,14 @@ static int __init init_func_cmd_traceon(void) if (ret) goto out_free_stacktrace; + ret = register_ftrace_command(&ftrace_cpudump_cmd); + if (ret) + goto out_free_dump; + return 0; + out_free_dump: + unregister_ftrace_command(&ftrace_dump_cmd); out_free_stacktrace: unregister_ftrace_command(&ftrace_stacktrace_cmd); out_free_traceon: -- cgit v1.2.3 From 1a891cf19cdfb645827969cc6aeaeebdefeb87b2 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 12 Jun 2013 13:16:25 -0400 Subject: tracing: Add binary '&' filter for events There are some cases when filtering on a set flag of a field of a tracepoint is useful. But currently the only filtering commands for numbered fields is ==, !=, <, <=, >, >=. This does not help when you just want to trace if a specific flag is set. For example: > # sudo trace-cmd record -e brcmfmac:brcmf_dbg -f 'level & 0x40000' > disable all > enable brcmfmac:brcmf_dbg > path = /sys/kernel/debug/tracing/events/brcmfmac/brcmf_dbg/enable > (level & 0x40000) > ^ > parse_error: Invalid operator > When trying to trace brcmf_dbg when level has its 1 << 18 bit set, the filter fails to perform. By allowing a binary '&' operation, this gives the user the ability to test a bit. Note, a binary '|' is not added, as it doesn't make sense as fields must be compared to constants (for now), and ORing a constant will always return true. Link: http://lkml.kernel.org/r/1371057385.9844.261.camel@gandalf.local.home Suggested-by: Arend van Spriel Tested-by: Arend van Spriel Signed-off-by: Steven Rostedt --- Documentation/trace/events.txt | 2 +- kernel/trace/trace_events_filter.c | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/trace/events.txt b/Documentation/trace/events.txt index bb24c2a0e870..41911240c65c 100644 --- a/Documentation/trace/events.txt +++ b/Documentation/trace/events.txt @@ -183,7 +183,7 @@ The relational-operators depend on the type of the field being tested: The operators available for numeric fields are: -==, !=, <, <=, >, >= +==, !=, <, <=, >, >=, & And for string fields they are: diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index e1b653f7e1ca..0d883dc057d6 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -44,6 +44,7 @@ enum filter_op_ids OP_LE, OP_GT, OP_GE, + OP_BAND, OP_NONE, OP_OPEN_PAREN, }; @@ -54,6 +55,7 @@ struct filter_op { int precedence; }; +/* Order must be the same as enum filter_op_ids above */ static struct filter_op filter_ops[] = { { OP_OR, "||", 1 }, { OP_AND, "&&", 2 }, @@ -64,6 +66,7 @@ static struct filter_op filter_ops[] = { { OP_LE, "<=", 5 }, { OP_GT, ">", 5 }, { OP_GE, ">=", 5 }, + { OP_BAND, "&", 6 }, { OP_NONE, "OP_NONE", 0 }, { OP_OPEN_PAREN, "(", 0 }, }; @@ -156,6 +159,9 @@ static int filter_pred_##type(struct filter_pred *pred, void *event) \ case OP_GE: \ match = (*addr >= val); \ break; \ + case OP_BAND: \ + match = (*addr & val); \ + break; \ default: \ break; \ } \ -- cgit v1.2.3 From c3e13c7c0605677a2c94957b39157f4501cea9a8 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Mon, 17 Jun 2013 10:59:17 -0400 Subject: tracing: Update documentation on tracepoint glob matching b0f1a59a "tracing/filters: Use a different op for glob match" added glob matching to tracepoint filter strings. It uses the ftrace function tracing glob matching facility that allows for the wild card character (*) to be used at the start and/or end of the matching string. But the documentation still states that the filtering only allows for exact string matches. Cc: Li Zefan Signed-off-by: Steven Rostedt --- Documentation/trace/events.txt | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'Documentation') diff --git a/Documentation/trace/events.txt b/Documentation/trace/events.txt index 41911240c65c..37732a220d33 100644 --- a/Documentation/trace/events.txt +++ b/Documentation/trace/events.txt @@ -187,9 +187,18 @@ The operators available for numeric fields are: And for string fields they are: -==, != +==, !=, ~ -Currently, only exact string matches are supported. +The glob (~) only accepts a wild card character (*) at the start and or +end of the string. For example: + + prev_comm ~ "*sh" + prev_comm ~ "sh*" + prev_comm ~ "*sh*" + +But does not allow for it to be within the string: + + prev_comm ~ "ba*sh" <-- is invalid 5.2 Setting filters ------------------- -- cgit v1.2.3 From de7edd31457b626e54a0b2a7e8ff4d65492f01ad Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Fri, 14 Jun 2013 16:21:43 -0400 Subject: tracing: Disable tracing on warning Add a traceoff_on_warning option in both the kernel command line as well as a sysctl option. When set, any WARN*() function that is hit will cause the tracing_on variable to be cleared, which disables writing to the ring buffer. This is useful especially when tracing a bug with function tracing. When a warning is hit, the print caused by the warning can flood the trace with the functions that producing the output for the warning. This can make the resulting trace useless by either hiding where the bug happened, or worse, by overflowing the buffer and losing the trace of the bug totally. Acked-by: Peter Zijlstra Signed-off-by: Steven Rostedt --- Documentation/kernel-parameters.txt | 13 +++++++++++++ include/linux/ftrace.h | 5 +++++ kernel/panic.c | 3 +++ kernel/sysctl.c | 7 +++++++ kernel/trace/trace.c | 17 +++++++++++++++++ 5 files changed, 45 insertions(+) (limited to 'Documentation') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 6e3b18a8afc6..729d0b9803b7 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -3069,6 +3069,19 @@ bytes respectively. Such letter suffixes can also be entirely omitted. See also Documentation/trace/ftrace.txt "trace options" section. + traceoff_on_warning + [FTRACE] enable this option to disable tracing when a + warning is hit. This turns off "tracing_on". Tracing can + be enabled again by echoing '1' into the "tracing_on" + file located in /sys/kernel/debug/tracing/ + + This option is useful, as it disables the trace before + the WARNING dump is called, which prevents the trace to + be filled with content caused by the warning output. + + This option can also be set at run time via the sysctl + option: kernel/traceoff_on_warning + transparent_hugepage= [KNL] Format: [always|madvise|never] diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index e48ed1d7876d..9f15c0064c50 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -824,10 +824,15 @@ enum ftrace_dump_mode; extern enum ftrace_dump_mode ftrace_dump_on_oops; +extern void disable_trace_on_warning(void); +extern int __disable_trace_on_warning; + #ifdef CONFIG_PREEMPT #define INIT_TRACE_RECURSION .trace_recursion = 0, #endif +#else /* CONFIG_TRACING */ +static inline void disable_trace_on_warning(void) { } #endif /* CONFIG_TRACING */ #ifndef INIT_TRACE_RECURSION diff --git a/kernel/panic.c b/kernel/panic.c index 167ec097ce8b..4cea6cc628ab 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -399,6 +400,8 @@ struct slowpath_args { static void warn_slowpath_common(const char *file, int line, void *caller, unsigned taint, struct slowpath_args *args) { + disable_trace_on_warning(); + printk(KERN_WARNING "------------[ cut here ]------------\n"); printk(KERN_WARNING "WARNING: at %s:%d %pS()\n", file, line, caller); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 9edcf456e0fc..5b0f18c12800 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -600,6 +600,13 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "traceoff_on_warning", + .data = &__disable_trace_on_warning, + .maxlen = sizeof(__disable_trace_on_warning), + .mode = 0644, + .proc_handler = proc_dointvec, + }, #endif #ifdef CONFIG_MODULES { diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 5f4a09c12e0b..c4c9296b1916 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -115,6 +115,9 @@ cpumask_var_t __read_mostly tracing_buffer_mask; enum ftrace_dump_mode ftrace_dump_on_oops; +/* When set, tracing will stop when a WARN*() is hit */ +int __disable_trace_on_warning; + static int tracing_set_tracer(const char *buf); #define MAX_TRACER_SIZE 100 @@ -149,6 +152,13 @@ static int __init set_ftrace_dump_on_oops(char *str) } __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops); +static int __init stop_trace_on_warning(char *str) +{ + __disable_trace_on_warning = 1; + return 1; +} +__setup("traceoff_on_warning=", stop_trace_on_warning); + static int __init boot_alloc_snapshot(char *str) { allocate_snapshot = true; @@ -170,6 +180,7 @@ static int __init set_trace_boot_options(char *str) } __setup("trace_options=", set_trace_boot_options); + unsigned long long ns2usecs(cycle_t nsec) { nsec += 500; @@ -562,6 +573,12 @@ void tracing_off(void) } EXPORT_SYMBOL_GPL(tracing_off); +void disable_trace_on_warning(void) +{ + if (__disable_trace_on_warning) + tracing_off(); +} + /** * tracing_is_on - show state of ring buffers enabled */ -- cgit v1.2.3