From ad909e21bbe69f1d39055d346540abd827190eca Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Wed, 6 Mar 2013 21:45:37 -0500 Subject: tracing: Add internal tracing_snapshot() functions The new snapshot feature is quite handy. It's a way for the user to take advantage of the spare buffer that, until then, only the latency tracers used to "snapshot" the buffer when it hit a max latency. Now users can trigger a "snapshot" manually when some condition is hit in a program. But a snapshot currently can not be triggered by a condition inside the kernel. With the addition of tracing_snapshot() and tracing_snapshot_alloc(), snapshots can now be taking when a condition is hit, and the developer wants to snapshot the case without stopping the trace. Note, any snapshot will overwrite the old one, so take care in how this is done. These new functions are to be used like tracing_on(), tracing_off() and trace_printk() are. That is, they should never be called in the mainline Linux kernel. They are solely for the purpose of debugging. The tracing_snapshot() will not allocate a buffer, but it is safe to be called from any context (except NMIs). But if a snapshot buffer isn't allocated when it is called, it will write to the live buffer, complaining about the lack of a snapshot buffer, and then stop tracing (giving you the "permanent snapshot"). tracing_snapshot_alloc() will allocate the snapshot buffer if it was not already allocated and then take the snapshot. This routine *may sleep*, and must be called from context that can sleep. The allocation is done with GFP_KERNEL and not atomic. If you need a snapshot in an atomic context, say in early boot, then it is best to call the tracing_snapshot_alloc() before then, where it will allocate the buffer, and then you can use the tracing_snapshot() anywhere you want and still get snapshots. Cc: Hiraku Toyooka Cc: Thomas Gleixner Cc: Peter Zijlstra Signed-off-by: Steven Rostedt --- include/linux/kernel.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux/kernel.h') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index c566927efcbd..bc5392a326ab 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -483,6 +483,8 @@ enum ftrace_dump_mode { void tracing_on(void); void tracing_off(void); int tracing_is_on(void); +void tracing_snapshot(void); +void tracing_snapshot_alloc(void); extern void tracing_start(void); extern void tracing_stop(void); @@ -570,6 +572,8 @@ static inline void trace_dump_stack(void) { } static inline void tracing_on(void) { } static inline void tracing_off(void) { } static inline int tracing_is_on(void) { return 0; } +static inline void tracing_snapshot(void) { } +static inline void tracing_snapshot_alloc(void) { } static inline __printf(1, 2) int trace_printk(const char *fmt, ...) -- cgit v1.2.3 From 09ae72348eccb60e304cf8ce94653f4a78fcd407 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Fri, 8 Mar 2013 21:02:34 -0500 Subject: tracing: Add trace_puts() for even faster trace_printk() tracing The trace_printk() is extremely fast and is very handy as it can be used in any context (including NMIs!). But it still requires scanning the fmt string for parsing the args. Even the trace_bprintk() requires a scan to know what args will be saved, although it doesn't copy the format string itself. Several times trace_printk() has no args, and wastes cpu cycles scanning the fmt string. Adding trace_puts() allows the developer to use an even faster tracing method that only saves the pointer to the string in the ring buffer without doing any format parsing at all. This will help remove even more of the "Heisenbug" effect, when debugging. Also fixed up the F_printk()s for the ftrace internal bprint and print events. Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Frederic Weisbecker Signed-off-by: Steven Rostedt --- include/linux/kernel.h | 41 +++++++++++++++++++++++- kernel/trace/trace.c | 76 ++++++++++++++++++++++++++++++++++++++++++++ kernel/trace/trace.h | 2 ++ kernel/trace/trace_entries.h | 23 +++++++++++--- kernel/trace/trace_output.c | 75 +++++++++++++++++++++++++++++++++++++++++++ kernel/trace/trace_output.h | 2 ++ 6 files changed, 214 insertions(+), 5 deletions(-) (limited to 'include/linux/kernel.h') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index bc5392a326ab..a3a5574a61fc 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -514,7 +514,8 @@ do { \ * * This is intended as a debugging tool for the developer only. * Please refrain from leaving trace_printks scattered around in - * your code. + * your code. (Extra memory is used for special buffers that are + * allocated when trace_printk() is used) */ #define trace_printk(fmt, args...) \ @@ -537,6 +538,44 @@ int __trace_bprintk(unsigned long ip, const char *fmt, ...); extern __printf(2, 3) int __trace_printk(unsigned long ip, const char *fmt, ...); +/** + * trace_puts - write a string into the ftrace buffer + * @str: the string to record + * + * Note: __trace_bputs is an internal function for trace_puts and + * the @ip is passed in via the trace_puts macro. + * + * This is similar to trace_printk() but is made for those really fast + * paths that a developer wants the least amount of "Heisenbug" affects, + * where the processing of the print format is still too much. + * + * This function allows a kernel developer to debug fast path sections + * that printk is not appropriate for. By scattering in various + * printk like tracing in the code, a developer can quickly see + * where problems are occurring. + * + * This is intended as a debugging tool for the developer only. + * Please refrain from leaving trace_puts scattered around in + * your code. (Extra memory is used for special buffers that are + * allocated when trace_puts() is used) + * + * Returns: 0 if nothing was written, positive # if string was. + * (1 when __trace_bputs is used, strlen(str) when __trace_puts is used) + */ + +extern int __trace_bputs(unsigned long ip, const char *str); +extern int __trace_puts(unsigned long ip, const char *str, int size); +#define trace_puts(str) ({ \ + static const char *trace_printk_fmt \ + __attribute__((section("__trace_printk_fmt"))) = \ + __builtin_constant_p(str) ? str : NULL; \ + \ + if (__builtin_constant_p(str)) \ + __trace_bputs(_THIS_IP_, trace_printk_fmt); \ + else \ + __trace_puts(_THIS_IP_, str, strlen(str)); \ +}) + extern void trace_dump_stack(void); /* diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 4021a5e66412..5043a0c4dde0 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -350,6 +350,77 @@ void tracing_on(void) } EXPORT_SYMBOL_GPL(tracing_on); +/** + * __trace_puts - write a constant string into the trace buffer. + * @ip: The address of the caller + * @str: The constant string to write + * @size: The size of the string. + */ +int __trace_puts(unsigned long ip, const char *str, int size) +{ + struct ring_buffer_event *event; + struct ring_buffer *buffer; + struct print_entry *entry; + unsigned long irq_flags; + int alloc; + + alloc = sizeof(*entry) + size + 2; /* possible \n added */ + + local_save_flags(irq_flags); + buffer = global_trace.trace_buffer.buffer; + event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, + irq_flags, preempt_count()); + if (!event) + return 0; + + entry = ring_buffer_event_data(event); + entry->ip = ip; + + memcpy(&entry->buf, str, size); + + /* Add a newline if necessary */ + if (entry->buf[size - 1] != '\n') { + entry->buf[size] = '\n'; + entry->buf[size + 1] = '\0'; + } else + entry->buf[size] = '\0'; + + __buffer_unlock_commit(buffer, event); + + return size; +} +EXPORT_SYMBOL_GPL(__trace_puts); + +/** + * __trace_bputs - write the pointer to a constant string into trace buffer + * @ip: The address of the caller + * @str: The constant string to write to the buffer to + */ +int __trace_bputs(unsigned long ip, const char *str) +{ + struct ring_buffer_event *event; + struct ring_buffer *buffer; + struct bputs_entry *entry; + unsigned long irq_flags; + int size = sizeof(struct bputs_entry); + + local_save_flags(irq_flags); + buffer = global_trace.trace_buffer.buffer; + event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size, + irq_flags, preempt_count()); + if (!event) + return 0; + + entry = ring_buffer_event_data(event); + entry->ip = ip; + entry->str = str; + + __buffer_unlock_commit(buffer, event); + + return 1; +} +EXPORT_SYMBOL_GPL(__trace_bputs); + #ifdef CONFIG_TRACER_SNAPSHOT /** * trace_snapshot - take a snapshot of the current buffer. @@ -2475,6 +2546,11 @@ enum print_line_t print_trace_line(struct trace_iterator *iter) return ret; } + if (iter->ent->type == TRACE_BPUTS && + trace_flags & TRACE_ITER_PRINTK && + trace_flags & TRACE_ITER_PRINTK_MSGONLY) + return trace_print_bputs_msg_only(iter); + if (iter->ent->type == TRACE_BPRINT && trace_flags & TRACE_ITER_PRINTK && trace_flags & TRACE_ITER_PRINTK_MSGONLY) diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 26bc71834041..d5764a8532e2 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -34,6 +34,7 @@ enum trace_type { TRACE_GRAPH_ENT, TRACE_USER_STACK, TRACE_BLK, + TRACE_BPUTS, __TRACE_LAST_TYPE, }; @@ -277,6 +278,7 @@ extern void __ftrace_bad_type(void); IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\ IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT); \ IF_ASSIGN(var, ent, struct bprint_entry, TRACE_BPRINT); \ + IF_ASSIGN(var, ent, struct bputs_entry, TRACE_BPUTS); \ IF_ASSIGN(var, ent, struct trace_mmiotrace_rw, \ TRACE_MMIO_RW); \ IF_ASSIGN(var, ent, struct trace_mmiotrace_map, \ diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h index 4108e1250ca2..e2d027ac66a2 100644 --- a/kernel/trace/trace_entries.h +++ b/kernel/trace/trace_entries.h @@ -223,8 +223,8 @@ FTRACE_ENTRY(bprint, bprint_entry, __dynamic_array( u32, buf ) ), - F_printk("%08lx fmt:%p", - __entry->ip, __entry->fmt), + F_printk("%pf: %s", + (void *)__entry->ip, __entry->fmt), FILTER_OTHER ); @@ -238,8 +238,23 @@ FTRACE_ENTRY(print, print_entry, __dynamic_array( char, buf ) ), - F_printk("%08lx %s", - __entry->ip, __entry->buf), + F_printk("%pf: %s", + (void *)__entry->ip, __entry->buf), + + FILTER_OTHER +); + +FTRACE_ENTRY(bputs, bputs_entry, + + TRACE_BPUTS, + + F_STRUCT( + __field( unsigned long, ip ) + __field( const char *, str ) + ), + + F_printk("%pf: %s", + (void *)__entry->ip, __entry->str), FILTER_OTHER ); diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 2edc7220d017..19f48e7edc39 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -37,6 +37,22 @@ int trace_print_seq(struct seq_file *m, struct trace_seq *s) return ret; } +enum print_line_t trace_print_bputs_msg_only(struct trace_iterator *iter) +{ + struct trace_seq *s = &iter->seq; + struct trace_entry *entry = iter->ent; + struct bputs_entry *field; + int ret; + + trace_assign_type(field, entry); + + ret = trace_seq_puts(s, field->str); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + return TRACE_TYPE_HANDLED; +} + enum print_line_t trace_print_bprintk_msg_only(struct trace_iterator *iter) { struct trace_seq *s = &iter->seq; @@ -1244,6 +1260,64 @@ static struct trace_event trace_user_stack_event = { .funcs = &trace_user_stack_funcs, }; +/* TRACE_BPUTS */ +static enum print_line_t +trace_bputs_print(struct trace_iterator *iter, int flags, + struct trace_event *event) +{ + struct trace_entry *entry = iter->ent; + struct trace_seq *s = &iter->seq; + struct bputs_entry *field; + + trace_assign_type(field, entry); + + if (!seq_print_ip_sym(s, field->ip, flags)) + goto partial; + + if (!trace_seq_puts(s, ": ")) + goto partial; + + if (!trace_seq_puts(s, field->str)) + goto partial; + + return TRACE_TYPE_HANDLED; + + partial: + return TRACE_TYPE_PARTIAL_LINE; +} + + +static enum print_line_t +trace_bputs_raw(struct trace_iterator *iter, int flags, + struct trace_event *event) +{ + struct bputs_entry *field; + struct trace_seq *s = &iter->seq; + + trace_assign_type(field, iter->ent); + + if (!trace_seq_printf(s, ": %lx : ", field->ip)) + goto partial; + + if (!trace_seq_puts(s, field->str)) + goto partial; + + return TRACE_TYPE_HANDLED; + + partial: + return TRACE_TYPE_PARTIAL_LINE; +} + +static struct trace_event_functions trace_bputs_funcs = { + .trace = trace_bputs_print, + .raw = trace_bputs_raw, +}; + +static struct trace_event trace_bputs_event = { + .type = TRACE_BPUTS, + .funcs = &trace_bputs_funcs, +}; + /* TRACE_BPRINT */ static enum print_line_t trace_bprint_print(struct trace_iterator *iter, int flags, @@ -1356,6 +1430,7 @@ static struct trace_event *events[] __initdata = { &trace_wake_event, &trace_stack_event, &trace_user_stack_event, + &trace_bputs_event, &trace_bprint_event, &trace_print_event, NULL diff --git a/kernel/trace/trace_output.h b/kernel/trace/trace_output.h index c038eba0492b..af77870de278 100644 --- a/kernel/trace/trace_output.h +++ b/kernel/trace/trace_output.h @@ -4,6 +4,8 @@ #include #include "trace.h" +extern enum print_line_t +trace_print_bputs_msg_only(struct trace_iterator *iter); extern enum print_line_t trace_print_bprintk_msg_only(struct trace_iterator *iter); extern enum print_line_t -- cgit v1.2.3 From 9d3c752c062e3266f1051ba0825276ea1e2777da Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Fri, 8 Mar 2013 22:11:57 -0500 Subject: tracing: Optimize trace_printk() with one arg to use trace_puts() Although trace_printk() is extremely fast, especially when it uses trace_bprintk() (writes args straight to buffer instead of inserting into string), it still has the overhead of calling one of the printf sprintf() functions, that need to scan the fmt string to determine what, if any args it has. This is a waste of precious CPU cycles if the printk format has no args but a single constant string. It is better to use trace_puts() which does not have the overhead of the fmt scanning. But wouldn't it be nice if the developer didn't have to think about such things, and the compile would just do it for them? trace_printk("this string has no args\n"); [...] trace_printk("this sting does %p %d\n", foo, bar); As tracing is critical to have the least amount of overhead, especially when dealing with race conditions, and you want to eliminate any "Heisenbugs", you want the trace_printk() to use the fastest possible means of tracing. Currently the macro magic determines if it will use trace_bprintk() or if the fmt is a dynamic string (a variable), it will fall back to the slow trace_printk() method that does a full snprintf() before copying it into the buffer, where as trace_bprintk() only copys the pointer to the fmt and the args into the buffer. Well, now there's a way to spend some more Hogwarts cash and come up with new fancy macro magic. #define trace_printk(fmt, ...) \ do { \ char _______STR[] = __stringify((__VA_ARGS__)); \ if (sizeof(_______STR) > 3) \ do_trace_printk(fmt, ##__VA_ARGS__); \ else \ trace_puts(fmt); \ } while (0) The above needs a bit of explaining (both here and in the comments). By stringifying the __VA_ARGS__, we can, at compile time, determine the number of args that are being passed to trace_printk(). The extra parenthesis are required, otherwise the compiler complains about too many parameters for __stringify if there is more than one arg. When there are no args, the __stringify((__VA_ARGS__)) converts into "()\0", a string of 3 characters. Anything else, will be a string containing more than 3 characters. Now we assign that string to a dynamic char array, and then take the sizeof() of that array. If it is greater than 3 characters, we know trace_printk() has args and we need to do the full "do_trace_printk()" on them, otherwise it was only passed a single arg and we can optimize to use trace_puts(). Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Frederic Weisbecker Signed-off-by: Steven "The King of Nasty Macros!" Rostedt --- include/linux/kernel.h | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) (limited to 'include/linux/kernel.h') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index a3a5574a61fc..d0a16fe03fef 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -516,9 +516,30 @@ do { \ * Please refrain from leaving trace_printks scattered around in * your code. (Extra memory is used for special buffers that are * allocated when trace_printk() is used) + * + * A little optization trick is done here. If there's only one + * argument, there's no need to scan the string for printf formats. + * The trace_puts() will suffice. But how can we take advantage of + * using trace_puts() when trace_printk() has only one argument? + * By stringifying the args and checking the size we can tell + * whether or not there are args. __stringify((__VA_ARGS__)) will + * turn into "()\0" with a size of 3 when there are no args, anything + * else will be bigger. All we need to do is define a string to this, + * and then take its size and compare to 3. If it's bigger, use + * do_trace_printk() otherwise, optimize it to trace_puts(). Then just + * let gcc optimize the rest. */ -#define trace_printk(fmt, args...) \ +#define trace_printk(fmt, ...) \ +do { \ + char _______STR[] = __stringify((__VA_ARGS__)); \ + if (sizeof(_______STR) > 3) \ + do_trace_printk(fmt, ##__VA_ARGS__); \ + else \ + trace_puts(fmt); \ +} while (0) + +#define do_trace_printk(fmt, args...) \ do { \ static const char *trace_printk_fmt \ __attribute__((section("__trace_printk_fmt"))) = \ -- cgit v1.2.3 From c142be8ebe0b7bf73c8a0063925623f3e4b980c0 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Wed, 13 Mar 2013 09:55:57 -0400 Subject: tracing: Add skip argument to trace_dump_stack() Altough the trace_dump_stack() already skips three functions in the call to stack trace, which gets the stack trace to start at the caller of the function, the caller may want to skip some more too (as it may have helper functions). Add a skip argument to the trace_dump_stack() that lets the caller skip back tracing functions that it doesn't care about. Signed-off-by: Steven Rostedt --- include/linux/kernel.h | 2 +- kernel/trace/trace.c | 13 +++++++++---- 2 files changed, 10 insertions(+), 5 deletions(-) (limited to 'include/linux/kernel.h') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index d0a16fe03fef..239dbb9627ca 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -597,7 +597,7 @@ extern int __trace_puts(unsigned long ip, const char *str, int size); __trace_puts(_THIS_IP_, str, strlen(str)); \ }) -extern void trace_dump_stack(void); +extern void trace_dump_stack(int skip); /* * The double __builtin_constant_p is because gcc will give us an error diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index c5b844621562..8aa53213201f 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1657,8 +1657,9 @@ void __trace_stack(struct trace_array *tr, unsigned long flags, int skip, /** * trace_dump_stack - record a stack back trace in the trace buffer + * @skip: Number of functions to skip (helper handlers) */ -void trace_dump_stack(void) +void trace_dump_stack(int skip) { unsigned long flags; @@ -1667,9 +1668,13 @@ void trace_dump_stack(void) local_save_flags(flags); - /* skipping 3 traces, seems to get us at the caller of this function */ - __ftrace_trace_stack(global_trace.trace_buffer.buffer, flags, 3, - preempt_count(), NULL); + /* + * Skip 3 more, seems to get us at the caller of + * this function. + */ + skip += 3; + __ftrace_trace_stack(global_trace.trace_buffer.buffer, + flags, skip, preempt_count(), NULL); } static DEFINE_PER_CPU(int, user_stack_count); -- cgit v1.2.3