From cecbca96da387428e220e307a9c945e37e2f4d9e Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 18 Apr 2010 19:08:41 +0200 Subject: tracing: Dump either the oops's cpu source or all cpus buffers The ftrace_dump_on_oops kernel parameter, sysctl and sysrq let one dump every cpu buffers when an oops or panic happens. It's nice when you have few cpus but it may take ages if have many, plus you miss the real origin of the problem in all the cpu traces. Sometimes, all you need is to dump the cpu buffer that triggered the opps, most of the time it is our main interest. This patch modifies ftrace_dump_on_oops to handle this choice. The ftrace_dump_on_oops kernel parameter, when it comes alone, has the same behaviour than before. But ftrace_dump_on_oops=orig_cpu will only dump the buffer of the cpu that oops'ed. Similarly, sysctl kernel.ftrace_dump_on_oops=1 and echo 1 > /proc/sys/kernel/ftrace_dump_on_oops keep their previous behaviour. But setting 2 jumps into cpu origin dump mode. v2: Fix double setup v3: Fix spelling issues reported by Randy Dunlap v4: Also update __ftrace_dump in the selftests Signed-off-by: Frederic Weisbecker Acked-by: David S. Miller Acked-by: Steven Rostedt Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Li Zefan Cc: Lai Jiangshan --- include/linux/ftrace.h | 4 +++- include/linux/kernel.h | 11 +++++++++-- 2 files changed, 12 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 01e6adea07ec..ea5b1aae0e8b 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -492,7 +492,9 @@ static inline int test_tsk_trace_graph(struct task_struct *tsk) return tsk->trace & TSK_TRACE_FL_GRAPH; } -extern int ftrace_dump_on_oops; +enum ftrace_dump_mode; + +extern enum ftrace_dump_mode ftrace_dump_on_oops; #ifdef CONFIG_PREEMPT #define INIT_TRACE_RECURSION .trace_recursion = 0, diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 9365227dbaf6..9fb1c1299032 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -490,6 +490,13 @@ static inline void tracing_off(void) { } static inline void tracing_off_permanent(void) { } static inline int tracing_is_on(void) { return 0; } #endif + +enum ftrace_dump_mode { + DUMP_NONE, + DUMP_ALL, + DUMP_ORIG, +}; + #ifdef CONFIG_TRACING extern void tracing_start(void); extern void tracing_stop(void); @@ -571,7 +578,7 @@ __ftrace_vbprintk(unsigned long ip, const char *fmt, va_list ap); extern int __ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap); -extern void ftrace_dump(void); +extern void ftrace_dump(enum ftrace_dump_mode oops_dump_mode); #else static inline void ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) { } @@ -592,7 +599,7 @@ ftrace_vprintk(const char *fmt, va_list ap) { return 0; } -static inline void ftrace_dump(void) { } +static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { } #endif /* CONFIG_TRACING */ /* -- cgit v1.2.3 From 62b915f1060996a8e1f69be50e3b8e9e43b710cb Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 2 Apr 2010 19:01:22 +0200 Subject: tracing: Add graph output support for irqsoff tracer Add function graph output to irqsoff tracer. The graph output is enabled by setting new 'display-graph' trace option. Signed-off-by: Jiri Olsa LKML-Reference: <1270227683-14631-4-git-send-email-jolsa@redhat.com> Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 15 +- kernel/trace/trace.c | 35 +++-- kernel/trace/trace.h | 21 +++ kernel/trace/trace_functions_graph.c | 15 +- kernel/trace/trace_irqsoff.c | 271 +++++++++++++++++++++++++++++++++-- 5 files changed, 324 insertions(+), 33 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index ea5b1aae0e8b..8415a522f430 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -352,6 +352,10 @@ struct ftrace_graph_ret { int depth; }; +/* Type of the callback handlers for tracing function graph*/ +typedef void (*trace_func_graph_ret_t)(struct ftrace_graph_ret *); /* return */ +typedef int (*trace_func_graph_ent_t)(struct ftrace_graph_ent *); /* entry */ + #ifdef CONFIG_FUNCTION_GRAPH_TRACER /* for init task */ @@ -400,10 +404,6 @@ extern char __irqentry_text_end[]; #define FTRACE_RETFUNC_DEPTH 50 #define FTRACE_RETSTACK_ALLOC_SIZE 32 -/* Type of the callback handlers for tracing function graph*/ -typedef void (*trace_func_graph_ret_t)(struct ftrace_graph_ret *); /* return */ -typedef int (*trace_func_graph_ent_t)(struct ftrace_graph_ent *); /* entry */ - extern int register_ftrace_graph(trace_func_graph_ret_t retfunc, trace_func_graph_ent_t entryfunc); @@ -441,6 +441,13 @@ static inline void unpause_graph_tracing(void) static inline void ftrace_graph_init_task(struct task_struct *t) { } static inline void ftrace_graph_exit_task(struct task_struct *t) { } +static inline int register_ftrace_graph(trace_func_graph_ret_t retfunc, + trace_func_graph_ent_t entryfunc) +{ + return -1; +} +static inline void unregister_ftrace_graph(void) { } + static inline int task_curr_ret_stack(struct task_struct *tsk) { return -1; diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 7b516c7ef9a0..8b9ba41ec146 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1808,7 +1808,7 @@ static void print_func_help_header(struct seq_file *m) } -static void +void print_trace_header(struct seq_file *m, struct trace_iterator *iter) { unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK); @@ -2017,7 +2017,7 @@ static enum print_line_t print_bin_fmt(struct trace_iterator *iter) return event ? event->binary(iter, 0) : TRACE_TYPE_HANDLED; } -static int trace_empty(struct trace_iterator *iter) +int trace_empty(struct trace_iterator *iter) { int cpu; @@ -2084,6 +2084,23 @@ static enum print_line_t print_trace_line(struct trace_iterator *iter) return print_trace_fmt(iter); } +void trace_default_header(struct seq_file *m) +{ + struct trace_iterator *iter = m->private; + + if (iter->iter_flags & TRACE_FILE_LAT_FMT) { + /* print nothing if the buffers are empty */ + if (trace_empty(iter)) + return; + print_trace_header(m, iter); + if (!(trace_flags & TRACE_ITER_VERBOSE)) + print_lat_help_header(m); + } else { + if (!(trace_flags & TRACE_ITER_VERBOSE)) + print_func_help_header(m); + } +} + static int s_show(struct seq_file *m, void *v) { struct trace_iterator *iter = v; @@ -2096,17 +2113,9 @@ static int s_show(struct seq_file *m, void *v) } if (iter->trace && iter->trace->print_header) iter->trace->print_header(m); - else if (iter->iter_flags & TRACE_FILE_LAT_FMT) { - /* print nothing if the buffers are empty */ - if (trace_empty(iter)) - return 0; - print_trace_header(m, iter); - if (!(trace_flags & TRACE_ITER_VERBOSE)) - print_lat_help_header(m); - } else { - if (!(trace_flags & TRACE_ITER_VERBOSE)) - print_func_help_header(m); - } + else + trace_default_header(m); + } else if (iter->leftover) { /* * If we filled the seq_file buffer earlier, we diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 970004c5fa79..911e9864e94a 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -378,6 +378,9 @@ void trace_function(struct trace_array *tr, unsigned long ip, unsigned long parent_ip, unsigned long flags, int pc); +void trace_default_header(struct seq_file *m); +void print_trace_header(struct seq_file *m, struct trace_iterator *iter); +int trace_empty(struct trace_iterator *iter); void trace_graph_return(struct ftrace_graph_ret *trace); int trace_graph_entry(struct ftrace_graph_ent *trace); @@ -491,11 +494,29 @@ extern int trace_clock_id; /* Standard output formatting function used for function return traces */ #ifdef CONFIG_FUNCTION_GRAPH_TRACER + +/* Flag options */ +#define TRACE_GRAPH_PRINT_OVERRUN 0x1 +#define TRACE_GRAPH_PRINT_CPU 0x2 +#define TRACE_GRAPH_PRINT_OVERHEAD 0x4 +#define TRACE_GRAPH_PRINT_PROC 0x8 +#define TRACE_GRAPH_PRINT_DURATION 0x10 +#define TRACE_GRAPH_PRINT_ABS_TIME 0x20 + extern enum print_line_t print_graph_function_flags(struct trace_iterator *iter, u32 flags); extern void print_graph_headers_flags(struct seq_file *s, u32 flags); extern enum print_line_t trace_print_graph_duration(unsigned long long duration, struct trace_seq *s); +extern void graph_trace_open(struct trace_iterator *iter); +extern void graph_trace_close(struct trace_iterator *iter); +extern int __trace_graph_entry(struct trace_array *tr, + struct ftrace_graph_ent *trace, + unsigned long flags, int pc); +extern void __trace_graph_return(struct trace_array *tr, + struct ftrace_graph_ret *trace, + unsigned long flags, int pc); + #ifdef CONFIG_DYNAMIC_FTRACE /* TODO: make this variable */ diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index de5f6518aba0..dd11c830eb84 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -179,7 +179,7 @@ unsigned long ftrace_return_to_handler(unsigned long frame_pointer) return ret; } -static int __trace_graph_entry(struct trace_array *tr, +int __trace_graph_entry(struct trace_array *tr, struct ftrace_graph_ent *trace, unsigned long flags, int pc) @@ -246,7 +246,7 @@ int trace_graph_thresh_entry(struct ftrace_graph_ent *trace) return trace_graph_entry(trace); } -static void __trace_graph_return(struct trace_array *tr, +void __trace_graph_return(struct trace_array *tr, struct ftrace_graph_ret *trace, unsigned long flags, int pc) @@ -1093,6 +1093,11 @@ print_graph_function_flags(struct trace_iterator *iter, u32 flags) trace_assign_type(field, entry); return print_graph_return(&field->ret, s, entry, iter, flags); } + case TRACE_STACK: + case TRACE_FN: + /* dont trace stack and functions as comments */ + return TRACE_TYPE_UNHANDLED; + default: return print_graph_comment(s, entry, iter, flags); } @@ -1170,12 +1175,12 @@ void print_graph_headers_flags(struct seq_file *s, u32 flags) seq_printf(s, " | | | |\n"); } -static void print_graph_headers(struct seq_file *s) +void print_graph_headers(struct seq_file *s) { print_graph_headers_flags(s, tracer_flags.val); } -static void graph_trace_open(struct trace_iterator *iter) +void graph_trace_open(struct trace_iterator *iter) { /* pid and depth on the last trace processed */ struct fgraph_data *data; @@ -1210,7 +1215,7 @@ static void graph_trace_open(struct trace_iterator *iter) pr_warning("function graph tracer: not enough memory\n"); } -static void graph_trace_close(struct trace_iterator *iter) +void graph_trace_close(struct trace_iterator *iter) { struct fgraph_data *data = iter->private; diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index 2974bc7538c7..6fd486e0cef4 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -34,6 +34,9 @@ static int trace_type __read_mostly; static int save_lat_flag; +static void stop_irqsoff_tracer(struct trace_array *tr, int graph); +static int start_irqsoff_tracer(struct trace_array *tr, int graph); + #ifdef CONFIG_PREEMPT_TRACER static inline int preempt_trace(void) @@ -55,6 +58,23 @@ irq_trace(void) # define irq_trace() (0) #endif +#define TRACE_DISPLAY_GRAPH 1 + +static struct tracer_opt trace_opts[] = { +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + /* display latency trace as call graph */ + { TRACER_OPT(display-graph, TRACE_DISPLAY_GRAPH) }, +#endif + { } /* Empty entry */ +}; + +static struct tracer_flags tracer_flags = { + .val = 0, + .opts = trace_opts, +}; + +#define is_graph() (tracer_flags.val & TRACE_DISPLAY_GRAPH) + /* * Sequence count - we record it when starting a measurement and * skip the latency if the sequence has changed - some other section @@ -108,6 +128,202 @@ static struct ftrace_ops trace_ops __read_mostly = }; #endif /* CONFIG_FUNCTION_TRACER */ +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +static int irqsoff_set_flag(u32 old_flags, u32 bit, int set) +{ + int cpu; + + if (!(bit & TRACE_DISPLAY_GRAPH)) + return -EINVAL; + + if (!(is_graph() ^ set)) + return 0; + + stop_irqsoff_tracer(irqsoff_trace, !set); + + for_each_possible_cpu(cpu) + per_cpu(tracing_cpu, cpu) = 0; + + tracing_max_latency = 0; + tracing_reset_online_cpus(irqsoff_trace); + + return start_irqsoff_tracer(irqsoff_trace, set); +} + +static int irqsoff_graph_entry(struct ftrace_graph_ent *trace) +{ + struct trace_array *tr = irqsoff_trace; + struct trace_array_cpu *data; + unsigned long flags; + long disabled; + int ret; + int cpu; + int pc; + + cpu = raw_smp_processor_id(); + if (likely(!per_cpu(tracing_cpu, cpu))) + return 0; + + local_save_flags(flags); + /* slight chance to get a false positive on tracing_cpu */ + if (!irqs_disabled_flags(flags)) + return 0; + + data = tr->data[cpu]; + disabled = atomic_inc_return(&data->disabled); + + if (likely(disabled == 1)) { + pc = preempt_count(); + ret = __trace_graph_entry(tr, trace, flags, pc); + } else + ret = 0; + + atomic_dec(&data->disabled); + return ret; +} + +static void irqsoff_graph_return(struct ftrace_graph_ret *trace) +{ + struct trace_array *tr = irqsoff_trace; + struct trace_array_cpu *data; + unsigned long flags; + long disabled; + int cpu; + int pc; + + cpu = raw_smp_processor_id(); + if (likely(!per_cpu(tracing_cpu, cpu))) + return; + + local_save_flags(flags); + /* slight chance to get a false positive on tracing_cpu */ + if (!irqs_disabled_flags(flags)) + return; + + data = tr->data[cpu]; + disabled = atomic_inc_return(&data->disabled); + + if (likely(disabled == 1)) { + pc = preempt_count(); + __trace_graph_return(tr, trace, flags, pc); + } + + atomic_dec(&data->disabled); +} + +static void irqsoff_trace_open(struct trace_iterator *iter) +{ + if (is_graph()) + graph_trace_open(iter); + +} + +static void irqsoff_trace_close(struct trace_iterator *iter) +{ + if (iter->private) + graph_trace_close(iter); +} + +#define GRAPH_TRACER_FLAGS (TRACE_GRAPH_PRINT_CPU | \ + TRACE_GRAPH_PRINT_PROC) + +static enum print_line_t irqsoff_print_line(struct trace_iterator *iter) +{ + u32 flags = GRAPH_TRACER_FLAGS; + + if (trace_flags & TRACE_ITER_LATENCY_FMT) + flags |= TRACE_GRAPH_PRINT_DURATION; + else + flags |= TRACE_GRAPH_PRINT_ABS_TIME; + + /* + * In graph mode call the graph tracer output function, + * otherwise go with the TRACE_FN event handler + */ + if (is_graph()) + return print_graph_function_flags(iter, flags); + + return TRACE_TYPE_UNHANDLED; +} + +static void irqsoff_print_header(struct seq_file *s) +{ + if (is_graph()) { + struct trace_iterator *iter = s->private; + u32 flags = GRAPH_TRACER_FLAGS; + + if (trace_flags & TRACE_ITER_LATENCY_FMT) { + /* print nothing if the buffers are empty */ + if (trace_empty(iter)) + return; + + print_trace_header(s, iter); + flags |= TRACE_GRAPH_PRINT_DURATION; + } else + flags |= TRACE_GRAPH_PRINT_ABS_TIME; + + print_graph_headers_flags(s, flags); + } else + trace_default_header(s); +} + +static void +trace_graph_function(struct trace_array *tr, + unsigned long ip, unsigned long flags, int pc) +{ + u64 time = trace_clock_local(); + struct ftrace_graph_ent ent = { + .func = ip, + .depth = 0, + }; + struct ftrace_graph_ret ret = { + .func = ip, + .depth = 0, + .calltime = time, + .rettime = time, + }; + + __trace_graph_entry(tr, &ent, flags, pc); + __trace_graph_return(tr, &ret, flags, pc); +} + +static void +__trace_function(struct trace_array *tr, + unsigned long ip, unsigned long parent_ip, + unsigned long flags, int pc) +{ + if (!is_graph()) + trace_function(tr, ip, parent_ip, flags, pc); + else { + trace_graph_function(tr, parent_ip, flags, pc); + trace_graph_function(tr, ip, flags, pc); + } +} + +#else +#define __trace_function trace_function + +static int irqsoff_set_flag(u32 old_flags, u32 bit, int set) +{ + return -EINVAL; +} + +static int irqsoff_graph_entry(struct ftrace_graph_ent *trace) +{ + return -1; +} + +static enum print_line_t irqsoff_print_line(struct trace_iterator *iter) +{ + return TRACE_TYPE_UNHANDLED; +} + +static void irqsoff_graph_return(struct ftrace_graph_ret *trace) { } +static void irqsoff_print_header(struct seq_file *s) { } +static void irqsoff_trace_open(struct trace_iterator *iter) { } +static void irqsoff_trace_close(struct trace_iterator *iter) { } +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ + /* * Should this new latency be reported/recorded? */ @@ -150,7 +366,7 @@ check_critical_timing(struct trace_array *tr, if (!report_latency(delta)) goto out_unlock; - trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc); + __trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc); /* Skip 5 functions to get to the irq/preempt enable function */ __trace_stack(tr, flags, 5, pc); @@ -172,7 +388,7 @@ out_unlock: out: data->critical_sequence = max_sequence; data->preempt_timestamp = ftrace_now(cpu); - trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc); + __trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc); } static inline void @@ -204,7 +420,7 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip) local_save_flags(flags); - trace_function(tr, ip, parent_ip, flags, preempt_count()); + __trace_function(tr, ip, parent_ip, flags, preempt_count()); per_cpu(tracing_cpu, cpu) = 1; @@ -238,7 +454,7 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip) atomic_inc(&data->disabled); local_save_flags(flags); - trace_function(tr, ip, parent_ip, flags, preempt_count()); + __trace_function(tr, ip, parent_ip, flags, preempt_count()); check_critical_timing(tr, data, parent_ip ? : ip, cpu); data->critical_start = 0; atomic_dec(&data->disabled); @@ -347,19 +563,32 @@ void trace_preempt_off(unsigned long a0, unsigned long a1) } #endif /* CONFIG_PREEMPT_TRACER */ -static void start_irqsoff_tracer(struct trace_array *tr) +static int start_irqsoff_tracer(struct trace_array *tr, int graph) { - register_ftrace_function(&trace_ops); - if (tracing_is_enabled()) + int ret = 0; + + if (!graph) + ret = register_ftrace_function(&trace_ops); + else + ret = register_ftrace_graph(&irqsoff_graph_return, + &irqsoff_graph_entry); + + if (!ret && tracing_is_enabled()) tracer_enabled = 1; else tracer_enabled = 0; + + return ret; } -static void stop_irqsoff_tracer(struct trace_array *tr) +static void stop_irqsoff_tracer(struct trace_array *tr, int graph) { tracer_enabled = 0; - unregister_ftrace_function(&trace_ops); + + if (!graph) + unregister_ftrace_function(&trace_ops); + else + unregister_ftrace_graph(); } static void __irqsoff_tracer_init(struct trace_array *tr) @@ -372,12 +601,14 @@ static void __irqsoff_tracer_init(struct trace_array *tr) /* make sure that the tracer is visible */ smp_wmb(); tracing_reset_online_cpus(tr); - start_irqsoff_tracer(tr); + + if (start_irqsoff_tracer(tr, is_graph())) + printk(KERN_ERR "failed to start irqsoff tracer\n"); } static void irqsoff_tracer_reset(struct trace_array *tr) { - stop_irqsoff_tracer(tr); + stop_irqsoff_tracer(tr, is_graph()); if (!save_lat_flag) trace_flags &= ~TRACE_ITER_LATENCY_FMT; @@ -409,9 +640,15 @@ static struct tracer irqsoff_tracer __read_mostly = .start = irqsoff_tracer_start, .stop = irqsoff_tracer_stop, .print_max = 1, + .print_header = irqsoff_print_header, + .print_line = irqsoff_print_line, + .flags = &tracer_flags, + .set_flag = irqsoff_set_flag, #ifdef CONFIG_FTRACE_SELFTEST .selftest = trace_selftest_startup_irqsoff, #endif + .open = irqsoff_trace_open, + .close = irqsoff_trace_close, }; # define register_irqsoff(trace) register_tracer(&trace) #else @@ -435,9 +672,15 @@ static struct tracer preemptoff_tracer __read_mostly = .start = irqsoff_tracer_start, .stop = irqsoff_tracer_stop, .print_max = 1, + .print_header = irqsoff_print_header, + .print_line = irqsoff_print_line, + .flags = &tracer_flags, + .set_flag = irqsoff_set_flag, #ifdef CONFIG_FTRACE_SELFTEST .selftest = trace_selftest_startup_preemptoff, #endif + .open = irqsoff_trace_open, + .close = irqsoff_trace_close, }; # define register_preemptoff(trace) register_tracer(&trace) #else @@ -463,9 +706,15 @@ static struct tracer preemptirqsoff_tracer __read_mostly = .start = irqsoff_tracer_start, .stop = irqsoff_tracer_stop, .print_max = 1, + .print_header = irqsoff_print_header, + .print_line = irqsoff_print_line, + .flags = &tracer_flags, + .set_flag = irqsoff_set_flag, #ifdef CONFIG_FTRACE_SELFTEST .selftest = trace_selftest_startup_preemptirqsoff, #endif + .open = irqsoff_trace_open, + .close = irqsoff_trace_close, }; # define register_preemptirqsoff(trace) register_tracer(&trace) -- cgit v1.2.3 From 72c9ddfd4c5bf54ef03cfdf57026416cb678eeba Mon Sep 17 00:00:00 2001 From: David Miller Date: Tue, 20 Apr 2010 15:47:11 -0700 Subject: ring-buffer: Make non-consuming read less expensive with lots of cpus. When performing a non-consuming read, a synchronize_sched() is performed once for every cpu which is actively tracing. This is very expensive, and can make it take several seconds to open up the 'trace' file with lots of cpus. Only one synchronize_sched() call is actually necessary. What is desired is for all cpus to see the disabling state change. So we transform the existing sequence: for_each_cpu() { ring_buffer_read_start(); } where each ring_buffer_start() call performs a synchronize_sched(), into the following: for_each_cpu() { ring_buffer_read_prepare(); } ring_buffer_read_prepare_sync(); for_each_cpu() { ring_buffer_read_start(); } wherein only the single ring_buffer_read_prepare_sync() call needs to do the synchronize_sched(). The first phase, via ring_buffer_read_prepare(), allocates the 'iter' memory and increments ->record_disabled. In the second phase, ring_buffer_read_prepare_sync() makes sure this ->record_disabled state is visible fully to all cpus. And in the final third phase, the ring_buffer_read_start() calls reset the 'iter' objects allocated in the first phase since we now know that none of the cpus are adding trace entries any more. This makes openning the 'trace' file nearly instantaneous on a sparc64 Niagara2 box with 128 cpus tracing. Signed-off-by: David S. Miller LKML-Reference: <20100420.154711.11246950.davem@davemloft.net> Signed-off-by: Steven Rostedt --- include/linux/ring_buffer.h | 4 ++- kernel/trace/ring_buffer.c | 64 ++++++++++++++++++++++++++++++++++++++------- kernel/trace/trace.c | 11 +++++--- 3 files changed, 65 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index c8297761e414..25b4f686d918 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -127,7 +127,9 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts, unsigned long *lost_events); struct ring_buffer_iter * -ring_buffer_read_start(struct ring_buffer *buffer, int cpu); +ring_buffer_read_prepare(struct ring_buffer *buffer, int cpu); +void ring_buffer_read_prepare_sync(void); +void ring_buffer_read_start(struct ring_buffer_iter *iter); void ring_buffer_read_finish(struct ring_buffer_iter *iter); struct ring_buffer_event * diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 5885cdfc41f3..2a090448ef6b 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -3332,23 +3332,30 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts, EXPORT_SYMBOL_GPL(ring_buffer_consume); /** - * ring_buffer_read_start - start a non consuming read of the buffer + * ring_buffer_read_prepare - Prepare for a non consuming read of the buffer * @buffer: The ring buffer to read from * @cpu: The cpu buffer to iterate over * - * This starts up an iteration through the buffer. It also disables - * the recording to the buffer until the reading is finished. - * This prevents the reading from being corrupted. This is not - * a consuming read, so a producer is not expected. + * This performs the initial preparations necessary to iterate + * through the buffer. Memory is allocated, buffer recording + * is disabled, and the iterator pointer is returned to the caller. * - * Must be paired with ring_buffer_finish. + * Disabling buffer recordng prevents the reading from being + * corrupted. This is not a consuming read, so a producer is not + * expected. + * + * After a sequence of ring_buffer_read_prepare calls, the user is + * expected to make at least one call to ring_buffer_prepare_sync. + * Afterwards, ring_buffer_read_start is invoked to get things going + * for real. + * + * This overall must be paired with ring_buffer_finish. */ struct ring_buffer_iter * -ring_buffer_read_start(struct ring_buffer *buffer, int cpu) +ring_buffer_read_prepare(struct ring_buffer *buffer, int cpu) { struct ring_buffer_per_cpu *cpu_buffer; struct ring_buffer_iter *iter; - unsigned long flags; if (!cpumask_test_cpu(cpu, buffer->cpumask)) return NULL; @@ -3362,15 +3369,52 @@ ring_buffer_read_start(struct ring_buffer *buffer, int cpu) iter->cpu_buffer = cpu_buffer; atomic_inc(&cpu_buffer->record_disabled); + + return iter; +} +EXPORT_SYMBOL_GPL(ring_buffer_read_prepare); + +/** + * ring_buffer_read_prepare_sync - Synchronize a set of prepare calls + * + * All previously invoked ring_buffer_read_prepare calls to prepare + * iterators will be synchronized. Afterwards, read_buffer_read_start + * calls on those iterators are allowed. + */ +void +ring_buffer_read_prepare_sync(void) +{ synchronize_sched(); +} +EXPORT_SYMBOL_GPL(ring_buffer_read_prepare_sync); + +/** + * ring_buffer_read_start - start a non consuming read of the buffer + * @iter: The iterator returned by ring_buffer_read_prepare + * + * This finalizes the startup of an iteration through the buffer. + * The iterator comes from a call to ring_buffer_read_prepare and + * an intervening ring_buffer_read_prepare_sync must have been + * performed. + * + * Must be paired with ring_buffer_finish. + */ +void +ring_buffer_read_start(struct ring_buffer_iter *iter) +{ + struct ring_buffer_per_cpu *cpu_buffer; + unsigned long flags; + + if (!iter) + return; + + cpu_buffer = iter->cpu_buffer; spin_lock_irqsave(&cpu_buffer->reader_lock, flags); arch_spin_lock(&cpu_buffer->lock); rb_iter_reset(iter); arch_spin_unlock(&cpu_buffer->lock); spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); - - return iter; } EXPORT_SYMBOL_GPL(ring_buffer_read_start); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 8b9ba41ec146..756d7283318b 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2201,15 +2201,20 @@ __tracing_open(struct inode *inode, struct file *file) if (iter->cpu_file == TRACE_PIPE_ALL_CPU) { for_each_tracing_cpu(cpu) { - iter->buffer_iter[cpu] = - ring_buffer_read_start(iter->tr->buffer, cpu); + ring_buffer_read_prepare(iter->tr->buffer, cpu); + } + ring_buffer_read_prepare_sync(); + for_each_tracing_cpu(cpu) { + ring_buffer_read_start(iter->buffer_iter[cpu]); tracing_iter_reset(iter, cpu); } } else { cpu = iter->cpu_file; iter->buffer_iter[cpu] = - ring_buffer_read_start(iter->tr->buffer, cpu); + ring_buffer_read_prepare(iter->tr->buffer, cpu); + ring_buffer_read_prepare_sync(); + ring_buffer_read_start(iter->buffer_iter[cpu]); tracing_iter_reset(iter, cpu); } -- cgit v1.2.3 From 4dbf6bc239c169b032777616806ecc648058f6b2 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 4 May 2010 11:24:01 -0400 Subject: tracing: Convert nop macros to static inlines The ftrace.h file contains several functions as macros when the functions are disabled due to config options. This patch converts most of them to static inlines. There are two exceptions: register_ftrace_function() and unregister_ftrace_function() This is because their parameter "ops" must not be evaluated since code using the function is allowed to #ifdef out the creation of the parameter. This also fixes an error caused by recent changes: kernel/trace/trace_irqsoff.c: In function 'start_irqsoff_tracer': kernel/trace/trace_irqsoff.c:571: error: expected expression before 'do' Reported-by: Ingo Molnar Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 8415a522f430..e0ae83bbd9cc 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -82,9 +82,13 @@ void clear_ftrace_function(void); extern void ftrace_stub(unsigned long a0, unsigned long a1); #else /* !CONFIG_FUNCTION_TRACER */ -# define register_ftrace_function(ops) do { } while (0) -# define unregister_ftrace_function(ops) do { } while (0) -# define clear_ftrace_function(ops) do { } while (0) +/* + * (un)register_ftrace_function must be a macro since the ops parameter + * must not be evaluated. + */ +#define register_ftrace_function(ops) ({ 0; }) +#define unregister_ftrace_function(ops) ({ 0; }) +static inline void clear_ftrace_function(void) { } static inline void ftrace_kill(void) { } static inline void ftrace_stop(void) { } static inline void ftrace_start(void) { } @@ -237,11 +241,13 @@ extern int skip_trace(unsigned long ip); extern void ftrace_disable_daemon(void); extern void ftrace_enable_daemon(void); #else -# define skip_trace(ip) ({ 0; }) -# define ftrace_force_update() ({ 0; }) -# define ftrace_set_filter(buf, len, reset) do { } while (0) -# define ftrace_disable_daemon() do { } while (0) -# define ftrace_enable_daemon() do { } while (0) +static inline int skip_trace(unsigned long ip) { return 0; } +static inline int ftrace_force_update(void) { return 0; } +static inline void ftrace_set_filter(unsigned char *buf, int len, int reset) +{ +} +static inline void ftrace_disable_daemon(void) { } +static inline void ftrace_enable_daemon(void) { } static inline void ftrace_release_mod(struct module *mod) {} static inline int register_ftrace_command(struct ftrace_func_command *cmd) { @@ -314,16 +320,16 @@ static inline void __ftrace_enabled_restore(int enabled) extern void time_hardirqs_on(unsigned long a0, unsigned long a1); extern void time_hardirqs_off(unsigned long a0, unsigned long a1); #else -# define time_hardirqs_on(a0, a1) do { } while (0) -# define time_hardirqs_off(a0, a1) do { } while (0) + static inline void time_hardirqs_on(unsigned long a0, unsigned long a1) { } + static inline void time_hardirqs_off(unsigned long a0, unsigned long a1) { } #endif #ifdef CONFIG_PREEMPT_TRACER extern void trace_preempt_on(unsigned long a0, unsigned long a1); extern void trace_preempt_off(unsigned long a0, unsigned long a1); #else -# define trace_preempt_on(a0, a1) do { } while (0) -# define trace_preempt_off(a0, a1) do { } while (0) + static inline void trace_preempt_on(unsigned long a0, unsigned long a1) { } + static inline void trace_preempt_off(unsigned long a0, unsigned long a1) { } #endif #ifdef CONFIG_FTRACE_MCOUNT_RECORD -- cgit v1.2.3 From 2e26ca7150a4f2ab3e69471dfc65f131e7dd7a05 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 5 May 2010 10:52:31 -0400 Subject: tracing: Fix tracepoint.h DECLARE_TRACE() to allow more than one header When more than one header is included under CREATE_TRACE_POINTS the DECLARE_TRACE() macro is not defined back to its original meaning and the second include will fail to initialize the TRACE_EVENT() and DECLARE_TRACE() correctly. To fix this the tracepoint.h file moves the define of DECLARE_TRACE() out of the #ifdef _LINUX_TRACEPOINT_H protection (just like the define of the TRACE_EVENT()). This way the define_trace.h will undef the DECLARE_TRACE() at the end and allow new headers to start from scratch. This patch also requires fixing the include/events/napi.h It currently uses DECLARE_TRACE() and should be converted to a TRACE_EVENT() format. But I'll leave that change to the authors of that file. But since the napi.h file depends on using the CREATE_TRACE_POINTS and does not define its own DEFINE_TRACE() it must use the define_trace.h method instead. Cc: Neil Horman Cc: David S. Miller Cc: Mathieu Desnoyers Signed-off-by: Steven Rostedt --- include/linux/tracepoint.h | 114 ++++++++++++++++++++++--------------------- include/trace/define_trace.h | 5 ++ include/trace/events/napi.h | 10 +++- 3 files changed, 72 insertions(+), 57 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 78b4bd3be496..1d85f9a6a199 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -33,6 +33,65 @@ struct tracepoint { * Keep in sync with vmlinux.lds.h. */ +/* + * Connect a probe to a tracepoint. + * Internal API, should not be used directly. + */ +extern int tracepoint_probe_register(const char *name, void *probe); + +/* + * Disconnect a probe from a tracepoint. + * Internal API, should not be used directly. + */ +extern int tracepoint_probe_unregister(const char *name, void *probe); + +extern int tracepoint_probe_register_noupdate(const char *name, void *probe); +extern int tracepoint_probe_unregister_noupdate(const char *name, void *probe); +extern void tracepoint_probe_update_all(void); + +struct tracepoint_iter { + struct module *module; + struct tracepoint *tracepoint; +}; + +extern void tracepoint_iter_start(struct tracepoint_iter *iter); +extern void tracepoint_iter_next(struct tracepoint_iter *iter); +extern void tracepoint_iter_stop(struct tracepoint_iter *iter); +extern void tracepoint_iter_reset(struct tracepoint_iter *iter); +extern int tracepoint_get_iter_range(struct tracepoint **tracepoint, + struct tracepoint *begin, struct tracepoint *end); + +/* + * tracepoint_synchronize_unregister must be called between the last tracepoint + * probe unregistration and the end of module exit to make sure there is no + * caller executing a probe when it is freed. + */ +static inline void tracepoint_synchronize_unregister(void) +{ + synchronize_sched(); +} + +#define PARAMS(args...) args + +#ifdef CONFIG_TRACEPOINTS +extern void tracepoint_update_probe_range(struct tracepoint *begin, + struct tracepoint *end); +#else +static inline void tracepoint_update_probe_range(struct tracepoint *begin, + struct tracepoint *end) +{ } +#endif /* CONFIG_TRACEPOINTS */ + +#endif /* _LINUX_TRACEPOINT_H */ + +/* + * Note: we keep the TRACE_EVENT and DECLARE_TRACE outside the include + * file ifdef protection. + * This is due to the way trace events work. If a file includes two + * trace event headers under one "CREATE_TRACE_POINTS" the first include + * will override the TRACE_EVENT and break the second include. + */ + #ifndef DECLARE_TRACE #define TP_PROTO(args...) args @@ -96,9 +155,6 @@ struct tracepoint { #define EXPORT_TRACEPOINT_SYMBOL(name) \ EXPORT_SYMBOL(__tracepoint_##name) -extern void tracepoint_update_probe_range(struct tracepoint *begin, - struct tracepoint *end); - #else /* !CONFIG_TRACEPOINTS */ #define DECLARE_TRACE(name, proto, args) \ static inline void _do_trace_##name(struct tracepoint *tp, proto) \ @@ -119,61 +175,9 @@ extern void tracepoint_update_probe_range(struct tracepoint *begin, #define EXPORT_TRACEPOINT_SYMBOL_GPL(name) #define EXPORT_TRACEPOINT_SYMBOL(name) -static inline void tracepoint_update_probe_range(struct tracepoint *begin, - struct tracepoint *end) -{ } #endif /* CONFIG_TRACEPOINTS */ #endif /* DECLARE_TRACE */ -/* - * Connect a probe to a tracepoint. - * Internal API, should not be used directly. - */ -extern int tracepoint_probe_register(const char *name, void *probe); - -/* - * Disconnect a probe from a tracepoint. - * Internal API, should not be used directly. - */ -extern int tracepoint_probe_unregister(const char *name, void *probe); - -extern int tracepoint_probe_register_noupdate(const char *name, void *probe); -extern int tracepoint_probe_unregister_noupdate(const char *name, void *probe); -extern void tracepoint_probe_update_all(void); - -struct tracepoint_iter { - struct module *module; - struct tracepoint *tracepoint; -}; - -extern void tracepoint_iter_start(struct tracepoint_iter *iter); -extern void tracepoint_iter_next(struct tracepoint_iter *iter); -extern void tracepoint_iter_stop(struct tracepoint_iter *iter); -extern void tracepoint_iter_reset(struct tracepoint_iter *iter); -extern int tracepoint_get_iter_range(struct tracepoint **tracepoint, - struct tracepoint *begin, struct tracepoint *end); - -/* - * tracepoint_synchronize_unregister must be called between the last tracepoint - * probe unregistration and the end of module exit to make sure there is no - * caller executing a probe when it is freed. - */ -static inline void tracepoint_synchronize_unregister(void) -{ - synchronize_sched(); -} - -#define PARAMS(args...) args - -#endif /* _LINUX_TRACEPOINT_H */ - -/* - * Note: we keep the TRACE_EVENT outside the include file ifdef protection. - * This is due to the way trace events work. If a file includes two - * trace event headers under one "CREATE_TRACE_POINTS" the first include - * will override the TRACE_EVENT and break the second include. - */ - #ifndef TRACE_EVENT /* * For use with the TRACE_EVENT macro: diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h index 5acfb1eb4df9..1dfab5401511 100644 --- a/include/trace/define_trace.h +++ b/include/trace/define_trace.h @@ -65,6 +65,10 @@ #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) +/* Make all open coded DECLARE_TRACE nops */ +#undef DECLARE_TRACE +#define DECLARE_TRACE(name, proto, args) + #ifdef CONFIG_EVENT_TRACING #include #endif @@ -75,6 +79,7 @@ #undef DEFINE_EVENT #undef DEFINE_EVENT_PRINT #undef TRACE_HEADER_MULTI_READ +#undef DECLARE_TRACE /* Only undef what we defined in this file */ #ifdef UNDEF_TRACE_INCLUDE_FILE diff --git a/include/trace/events/napi.h b/include/trace/events/napi.h index a8989c4547e7..188deca2f3c7 100644 --- a/include/trace/events/napi.h +++ b/include/trace/events/napi.h @@ -1,4 +1,7 @@ -#ifndef _TRACE_NAPI_H_ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM napi + +#if !defined(_TRACE_NAPI_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_NAPI_H_ #include @@ -8,4 +11,7 @@ DECLARE_TRACE(napi_poll, TP_PROTO(struct napi_struct *napi), TP_ARGS(napi)); -#endif +#endif /* _TRACE_NAPI_H_ */ + +/* This part must be outside protection */ +#include -- cgit v1.2.3