From 884bfe89a462fcc85c8abd96171519cf2fe70929 Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Fri, 15 Jul 2011 14:23:58 -0700 Subject: ring-buffer: Add a 'dropped events' counter The existing 'overrun' counter is incremented when the ring buffer wraps around, with overflow on (the default). We wanted a way to count requests lost from the buffer filling up with overflow off, too. I decided to add a new counter instead of retro-fitting the existing one because it seems like a different statistic to count conceptually, and also because of how the code was structured. Link: http://lkml.kernel.org/r/1310765038-26399-1-git-send-email-slavapestov@google.com Signed-off-by: Slava Pestov Signed-off-by: Steven Rostedt --- include/linux/ring_buffer.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 6c8835f74f79..2007375cfe77 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -166,6 +166,7 @@ unsigned long ring_buffer_overruns(struct ring_buffer *buffer); unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu); unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu); unsigned long ring_buffer_commit_overrun_cpu(struct ring_buffer *buffer, int cpu); +unsigned long ring_buffer_dropped_events_cpu(struct ring_buffer *buffer, int cpu); u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu); void ring_buffer_normalize_time_stamp(struct ring_buffer *buffer, -- cgit v1.2.3 From 60efc15ae96c7aace8060411b0d5add20e1ab21e Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Thu, 25 Oct 2012 15:41:51 +0200 Subject: linux/kernel.h: Remove duplicate trace_printk declaration !CONFIG_TRACING both declares and defines (empty) trace_printk. The first one is not redundant so it can be removed. Link: http://lkml.kernel.org/r/1351172511-18125-1-git-send-email-mhocko@suse.cz Signed-off-by: Michal Hocko Signed-off-by: Steven Rostedt --- include/linux/kernel.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index a123b13b70fd..7785d5df6d8b 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -527,9 +527,6 @@ __ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap); extern void ftrace_dump(enum ftrace_dump_mode oops_dump_mode); #else -static inline __printf(1, 2) -int trace_printk(const char *fmt, ...); - static inline void tracing_start(void) { } static inline void tracing_stop(void) { } static inline void ftrace_off_permanent(void) { } @@ -539,8 +536,8 @@ static inline void tracing_on(void) { } static inline void tracing_off(void) { } static inline int tracing_is_on(void) { return 0; } -static inline int -trace_printk(const char *fmt, ...) +static inline __printf(1, 2) +int trace_printk(const char *fmt, ...) { return 0; } -- cgit v1.2.3 From 50ecf2c3afead23a05227ab004e4212eca08c207 Mon Sep 17 00:00:00 2001 From: Yoshihiro YUNOMAE Date: Thu, 11 Oct 2012 16:27:54 -0700 Subject: ring-buffer: Change unsigned long type of ring_buffer_oldest_event_ts() to u64 ring_buffer_oldest_event_ts() should return a value of u64 type, because ring_buffer_per_cpu->buffer_page->buffer_data_page->time_stamp is u64 type. Link: http://lkml.kernel.org/r/1349998076-15495-5-git-send-email-dhsharp@google.com Cc: Frederic Weisbecker Cc: Vaibhav Nagarnaik Signed-off-by: Yoshihiro YUNOMAE Signed-off-by: David Sharp Signed-off-by: Steven Rostedt --- include/linux/ring_buffer.h | 2 +- kernel/trace/ring_buffer.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 2007375cfe77..519777e3fa01 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -159,7 +159,7 @@ int ring_buffer_record_is_on(struct ring_buffer *buffer); void ring_buffer_record_disable_cpu(struct ring_buffer *buffer, int cpu); void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu); -unsigned long ring_buffer_oldest_event_ts(struct ring_buffer *buffer, int cpu); +u64 ring_buffer_oldest_event_ts(struct ring_buffer *buffer, int cpu); unsigned long ring_buffer_bytes_cpu(struct ring_buffer *buffer, int cpu); unsigned long ring_buffer_entries(struct ring_buffer *buffer); unsigned long ring_buffer_overruns(struct ring_buffer *buffer); diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 23a384b92512..3c7834c24e54 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2932,12 +2932,12 @@ rb_num_of_entries(struct ring_buffer_per_cpu *cpu_buffer) * @buffer: The ring buffer * @cpu: The per CPU buffer to read from. */ -unsigned long ring_buffer_oldest_event_ts(struct ring_buffer *buffer, int cpu) +u64 ring_buffer_oldest_event_ts(struct ring_buffer *buffer, int cpu) { unsigned long flags; struct ring_buffer_per_cpu *cpu_buffer; struct buffer_page *bpage; - unsigned long ret; + u64 ret; if (!cpumask_test_cpu(cpu, buffer->cpumask)) return 0; -- cgit v1.2.3 From 0d5c6e1c19bab82fad4837108c2902f557d62a04 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 1 Nov 2012 20:54:21 -0400 Subject: tracing: Use irq_work for wake ups and remove *_nowake_*() functions Have the ring buffer commit function use the irq_work infrastructure to wake up any waiters waiting on the ring buffer for new data. The irq_work was created for such a purpose, where doing the actual wake up at the time of adding data is too dangerous, as an event or function trace may be in the midst of the work queue locks and cause deadlocks. The irq_work will either delay the action to the next timer interrupt, or trigger an IPI to itself forcing an interrupt to do the work (in a safe location). With irq_work, all ring buffer commits can safely do wakeups, removing the need for the ring buffer commit "nowake" variants, which were used by events and function tracing. All commits can now safely use the normal commit, and the "nowake" variants can be removed. Cc: Peter Zijlstra Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 14 ++--- include/trace/ftrace.h | 3 +- kernel/trace/Kconfig | 1 + kernel/trace/trace.c | 121 +++++++++++++++++++++----------------- kernel/trace/trace.h | 5 -- kernel/trace/trace_events.c | 2 +- kernel/trace/trace_kprobe.c | 8 +-- kernel/trace/trace_sched_switch.c | 2 +- kernel/trace/trace_selftest.c | 1 + 9 files changed, 84 insertions(+), 73 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 642928cf57b4..b80c8ddfbbdc 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -127,13 +127,13 @@ trace_current_buffer_lock_reserve(struct ring_buffer **current_buffer, void trace_current_buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event, unsigned long flags, int pc); -void trace_nowake_buffer_unlock_commit(struct ring_buffer *buffer, - struct ring_buffer_event *event, - unsigned long flags, int pc); -void trace_nowake_buffer_unlock_commit_regs(struct ring_buffer *buffer, - struct ring_buffer_event *event, - unsigned long flags, int pc, - struct pt_regs *regs); +void trace_buffer_unlock_commit(struct ring_buffer *buffer, + struct ring_buffer_event *event, + unsigned long flags, int pc); +void trace_buffer_unlock_commit_regs(struct ring_buffer *buffer, + struct ring_buffer_event *event, + unsigned long flags, int pc, + struct pt_regs *regs); void trace_current_buffer_discard_commit(struct ring_buffer *buffer, struct ring_buffer_event *event); diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index a763888a36f9..698f2a890322 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -545,8 +545,7 @@ ftrace_raw_event_##call(void *__data, proto) \ { assign; } \ \ if (!filter_current_check_discard(buffer, event_call, entry, event)) \ - trace_nowake_buffer_unlock_commit(buffer, \ - event, irq_flags, pc); \ + trace_buffer_unlock_commit(buffer, event, irq_flags, pc); \ } /* * The ftrace_test_probe is compiled out, it is only here as a build time check diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 4cea4f41c1d9..5d89335a485f 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -119,6 +119,7 @@ config TRACING select BINARY_PRINTF select EVENT_TRACING select TRACE_CLOCK + select IRQ_WORK config GENERIC_TRACER bool diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index d5cbc0d3f209..37d1c703e3ec 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -84,6 +85,14 @@ static int dummy_set_flag(u32 old_flags, u32 bit, int set) */ static DEFINE_PER_CPU(bool, trace_cmdline_save); +/* + * When a reader is waiting for data, then this variable is + * set to true. + */ +static bool trace_wakeup_needed; + +static struct irq_work trace_work_wakeup; + /* * Kill all tracing for good (never come back). * It is initialized to 1 but will turn to zero if the initialization @@ -329,12 +338,18 @@ unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | static int trace_stop_count; static DEFINE_RAW_SPINLOCK(tracing_start_lock); -static void wakeup_work_handler(struct work_struct *work) +/** + * trace_wake_up - wake up tasks waiting for trace input + * + * Schedules a delayed work to wake up any task that is blocked on the + * trace_wait queue. These is used with trace_poll for tasks polling the + * trace. + */ +static void trace_wake_up(struct irq_work *work) { - wake_up(&trace_wait); -} + wake_up_all(&trace_wait); -static DECLARE_DELAYED_WORK(wakeup_work, wakeup_work_handler); +} /** * tracing_on - enable tracing buffers @@ -389,22 +404,6 @@ int tracing_is_on(void) } EXPORT_SYMBOL_GPL(tracing_is_on); -/** - * trace_wake_up - wake up tasks waiting for trace input - * - * Schedules a delayed work to wake up any task that is blocked on the - * trace_wait queue. These is used with trace_poll for tasks polling the - * trace. - */ -void trace_wake_up(void) -{ - const unsigned long delay = msecs_to_jiffies(2); - - if (trace_flags & TRACE_ITER_BLOCK) - return; - schedule_delayed_work(&wakeup_work, delay); -} - static int __init set_buf_size(char *str) { unsigned long buf_size; @@ -753,6 +752,40 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) } #endif /* CONFIG_TRACER_MAX_TRACE */ +static void default_wait_pipe(struct trace_iterator *iter) +{ + DEFINE_WAIT(wait); + + prepare_to_wait(&trace_wait, &wait, TASK_INTERRUPTIBLE); + + /* + * The events can happen in critical sections where + * checking a work queue can cause deadlocks. + * After adding a task to the queue, this flag is set + * only to notify events to try to wake up the queue + * using irq_work. + * + * We don't clear it even if the buffer is no longer + * empty. The flag only causes the next event to run + * irq_work to do the work queue wake up. The worse + * that can happen if we race with !trace_empty() is that + * an event will cause an irq_work to try to wake up + * an empty queue. + * + * There's no reason to protect this flag either, as + * the work queue and irq_work logic will do the necessary + * synchronization for the wake ups. The only thing + * that is necessary is that the wake up happens after + * a task has been queued. It's OK for spurious wake ups. + */ + trace_wakeup_needed = true; + + if (trace_empty(iter)) + schedule(); + + finish_wait(&trace_wait, &wait); +} + /** * register_tracer - register a tracer with the ftrace system. * @type - the plugin for the tracer @@ -1156,30 +1189,32 @@ void __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event) { __this_cpu_write(trace_cmdline_save, true); + if (trace_wakeup_needed) { + trace_wakeup_needed = false; + /* irq_work_queue() supplies it's own memory barriers */ + irq_work_queue(&trace_work_wakeup); + } ring_buffer_unlock_commit(buffer, event); } static inline void __trace_buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event, - unsigned long flags, int pc, - int wake) + unsigned long flags, int pc) { __buffer_unlock_commit(buffer, event); ftrace_trace_stack(buffer, flags, 6, pc); ftrace_trace_userstack(buffer, flags, pc); - - if (wake) - trace_wake_up(); } void trace_buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event, unsigned long flags, int pc) { - __trace_buffer_unlock_commit(buffer, event, flags, pc, 1); + __trace_buffer_unlock_commit(buffer, event, flags, pc); } +EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit); struct ring_buffer_event * trace_current_buffer_lock_reserve(struct ring_buffer **current_rb, @@ -1196,29 +1231,21 @@ void trace_current_buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event, unsigned long flags, int pc) { - __trace_buffer_unlock_commit(buffer, event, flags, pc, 1); + __trace_buffer_unlock_commit(buffer, event, flags, pc); } EXPORT_SYMBOL_GPL(trace_current_buffer_unlock_commit); -void trace_nowake_buffer_unlock_commit(struct ring_buffer *buffer, - struct ring_buffer_event *event, - unsigned long flags, int pc) -{ - __trace_buffer_unlock_commit(buffer, event, flags, pc, 0); -} -EXPORT_SYMBOL_GPL(trace_nowake_buffer_unlock_commit); - -void trace_nowake_buffer_unlock_commit_regs(struct ring_buffer *buffer, - struct ring_buffer_event *event, - unsigned long flags, int pc, - struct pt_regs *regs) +void trace_buffer_unlock_commit_regs(struct ring_buffer *buffer, + struct ring_buffer_event *event, + unsigned long flags, int pc, + struct pt_regs *regs) { __buffer_unlock_commit(buffer, event); ftrace_trace_stack_regs(buffer, flags, 0, pc, regs); ftrace_trace_userstack(buffer, flags, pc); } -EXPORT_SYMBOL_GPL(trace_nowake_buffer_unlock_commit_regs); +EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit_regs); void trace_current_buffer_discard_commit(struct ring_buffer *buffer, struct ring_buffer_event *event) @@ -3354,19 +3381,6 @@ tracing_poll_pipe(struct file *filp, poll_table *poll_table) } } - -void default_wait_pipe(struct trace_iterator *iter) -{ - DEFINE_WAIT(wait); - - prepare_to_wait(&trace_wait, &wait, TASK_INTERRUPTIBLE); - - if (trace_empty(iter)) - schedule(); - - finish_wait(&trace_wait, &wait); -} - /* * This is a make-shift waitqueue. * A tracer might use this callback on some rare cases: @@ -5107,6 +5121,7 @@ __init static int tracer_alloc_buffers(void) #endif trace_init_cmdlines(); + init_irq_work(&trace_work_wakeup, trace_wake_up); register_tracer(&nop_trace); current_trace = &nop_trace; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 3e8a176f64e6..55010ed175f0 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -327,7 +327,6 @@ trace_buffer_iter(struct trace_iterator *iter, int cpu) int tracer_init(struct tracer *t, struct trace_array *tr); int tracing_is_enabled(void); -void trace_wake_up(void); void tracing_reset(struct trace_array *tr, int cpu); void tracing_reset_online_cpus(struct trace_array *tr); void tracing_reset_current(int cpu); @@ -349,9 +348,6 @@ trace_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long len, unsigned long flags, int pc); -void trace_buffer_unlock_commit(struct ring_buffer *buffer, - struct ring_buffer_event *event, - unsigned long flags, int pc); struct trace_entry *tracing_get_trace_entry(struct trace_array *tr, struct trace_array_cpu *data); @@ -370,7 +366,6 @@ void trace_init_global_iter(struct trace_iterator *iter); void tracing_iter_reset(struct trace_iterator *iter, int cpu); -void default_wait_pipe(struct trace_iterator *iter); void poll_wait_pipe(struct trace_iterator *iter); void ftrace(struct trace_array *tr, diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index cb2df3b70f7f..880073d0b946 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -1760,7 +1760,7 @@ function_test_events_call(unsigned long ip, unsigned long parent_ip, entry->ip = ip; entry->parent_ip = parent_ip; - trace_nowake_buffer_unlock_commit(buffer, event, flags, pc); + trace_buffer_unlock_commit(buffer, event, flags, pc); out: atomic_dec(&per_cpu(ftrace_test_event_disable, cpu)); diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 5a3c533ef060..1865d5f76538 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -751,8 +751,8 @@ static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs) store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); if (!filter_current_check_discard(buffer, call, entry, event)) - trace_nowake_buffer_unlock_commit_regs(buffer, event, - irq_flags, pc, regs); + trace_buffer_unlock_commit_regs(buffer, event, + irq_flags, pc, regs); } /* Kretprobe handler */ @@ -784,8 +784,8 @@ static __kprobes void kretprobe_trace_func(struct kretprobe_instance *ri, store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); if (!filter_current_check_discard(buffer, call, entry, event)) - trace_nowake_buffer_unlock_commit_regs(buffer, event, - irq_flags, pc, regs); + trace_buffer_unlock_commit_regs(buffer, event, + irq_flags, pc, regs); } /* Event entry printers */ diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index b0a136ac382a..3374c792ccd8 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -102,7 +102,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr, entry->next_cpu = task_cpu(wakee); if (!filter_check_discard(call, entry, buffer, event)) - trace_nowake_buffer_unlock_commit(buffer, event, flags, pc); + trace_buffer_unlock_commit(buffer, event, flags, pc); } static void diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 091b815f7b0a..47623169a815 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -1094,6 +1094,7 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr) tracing_stop(); /* check both trace buffers */ ret = trace_test_buffer(tr, NULL); + printk("ret = %d\n", ret); if (!ret) ret = trace_test_buffer(&max_tr, &count); -- cgit v1.2.3 From 19f5ee2716373519fda2129e9333f4c3847aa742 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 28 Oct 2012 18:14:14 +0100 Subject: uprobes: Kill arch_uprobe_enable/disable_step() hooks Kill arch_uprobe_enable/disable_step() hooks, they do nothing and nobody needs them. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- include/linux/uprobes.h | 2 -- kernel/events/uprobes.c | 10 ---------- 2 files changed, 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index 24594571c5a3..2615c4d7788d 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -101,8 +101,6 @@ extern void uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm); extern void uprobe_free_utask(struct task_struct *t); extern void uprobe_copy_process(struct task_struct *t); extern unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs); -extern void __weak arch_uprobe_enable_step(struct arch_uprobe *arch); -extern void __weak arch_uprobe_disable_step(struct arch_uprobe *arch); extern int uprobe_post_sstep_notifier(struct pt_regs *regs); extern int uprobe_pre_sstep_notifier(struct pt_regs *regs); extern void uprobe_notify_resume(struct pt_regs *regs); diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index abbfd8440a6d..39c75cc51efc 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1430,14 +1430,6 @@ static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp) return uprobe; } -void __weak arch_uprobe_enable_step(struct arch_uprobe *arch) -{ -} - -void __weak arch_uprobe_disable_step(struct arch_uprobe *arch) -{ -} - /* * Run handler and ask thread to singlestep. * Ensure all non-fatal signals cannot interrupt thread while it singlesteps. @@ -1491,7 +1483,6 @@ static void handle_swbp(struct pt_regs *regs) goto out; if (!pre_ssout(uprobe, regs, bp_vaddr)) { - arch_uprobe_enable_step(&uprobe->arch); utask->active_uprobe = uprobe; utask->state = UTASK_SSTEP; return; @@ -1523,7 +1514,6 @@ static void handle_singlestep(struct uprobe_task *utask, struct pt_regs *regs) else WARN_ON_ONCE(1); - arch_uprobe_disable_step(&uprobe->arch); put_uprobe(uprobe); utask->active_uprobe = NULL; utask->state = UTASK_RUNNING; -- cgit v1.2.3 From 8cbd9cc6254065c97c4bac42daa55ba1abe73a8e Mon Sep 17 00:00:00 2001 From: David Sharp Date: Tue, 13 Nov 2012 12:18:21 -0800 Subject: tracing,x86: Add a TSC trace_clock In order to promote interoperability between userspace tracers and ftrace, add a trace_clock that reports raw TSC values which will then be recorded in the ring buffer. Userspace tracers that also record TSCs are then on exactly the same time base as the kernel and events can be unambiguously interlaced. Tested: Enabled a tracepoint and the "tsc" trace_clock and saw very large timestamp values. v2: Move arch-specific bits out of generic code. v3: Rename "x86-tsc", cleanups v7: Generic arch bits in Kbuild. Google-Bug-Id: 6980623 Link: http://lkml.kernel.org/r/1352837903-32191-1-git-send-email-dhsharp@google.com Acked-by: Ingo Molnar Cc: Masami Hiramatsu Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Signed-off-by: David Sharp Signed-off-by: Steven Rostedt --- arch/alpha/include/asm/Kbuild | 1 + arch/arm/include/asm/Kbuild | 1 + arch/arm64/include/asm/Kbuild | 1 + arch/avr32/include/asm/Kbuild | 1 + arch/blackfin/include/asm/Kbuild | 1 + arch/c6x/include/asm/Kbuild | 1 + arch/cris/include/asm/Kbuild | 1 + arch/frv/include/asm/Kbuild | 1 + arch/h8300/include/asm/Kbuild | 1 + arch/hexagon/include/asm/Kbuild | 1 + arch/ia64/include/asm/Kbuild | 1 + arch/m32r/include/asm/Kbuild | 1 + arch/m68k/include/asm/Kbuild | 1 + arch/microblaze/include/asm/Kbuild | 1 + arch/mips/include/asm/Kbuild | 1 + arch/mn10300/include/asm/Kbuild | 1 + arch/openrisc/include/asm/Kbuild | 1 + arch/parisc/include/asm/Kbuild | 1 + arch/powerpc/include/asm/Kbuild | 1 + arch/s390/include/asm/Kbuild | 1 + arch/score/include/asm/Kbuild | 1 + arch/sh/include/asm/Kbuild | 1 + arch/sparc/include/asm/Kbuild | 1 + arch/tile/include/asm/Kbuild | 1 + arch/um/include/asm/Kbuild | 1 + arch/unicore32/include/asm/Kbuild | 1 + arch/x86/include/asm/trace_clock.h | 20 ++++++++++++++++++++ arch/x86/kernel/Makefile | 1 + arch/x86/kernel/trace_clock.c | 21 +++++++++++++++++++++ arch/xtensa/include/asm/Kbuild | 1 + include/asm-generic/trace_clock.h | 16 ++++++++++++++++ include/linux/trace_clock.h | 2 ++ kernel/trace/trace.c | 1 + 33 files changed, 88 insertions(+) create mode 100644 arch/x86/include/asm/trace_clock.h create mode 100644 arch/x86/kernel/trace_clock.c create mode 100644 include/asm-generic/trace_clock.h (limited to 'include/linux') diff --git a/arch/alpha/include/asm/Kbuild b/arch/alpha/include/asm/Kbuild index 64ffc9e9e548..dcfabb9f05a0 100644 --- a/arch/alpha/include/asm/Kbuild +++ b/arch/alpha/include/asm/Kbuild @@ -11,3 +11,4 @@ header-y += reg.h header-y += regdef.h header-y += sysinfo.h generic-y += exec.h +generic-y += trace_clock.h diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild index f70ae175a3d6..514e398f1a07 100644 --- a/arch/arm/include/asm/Kbuild +++ b/arch/arm/include/asm/Kbuild @@ -31,5 +31,6 @@ generic-y += sockios.h generic-y += termbits.h generic-y += termios.h generic-y += timex.h +generic-y += trace_clock.h generic-y += types.h generic-y += unaligned.h diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild index a581a2205938..6e9ca462127f 100644 --- a/arch/arm64/include/asm/Kbuild +++ b/arch/arm64/include/asm/Kbuild @@ -43,6 +43,7 @@ generic-y += swab.h generic-y += termbits.h generic-y += termios.h generic-y += topology.h +generic-y += trace_clock.h generic-y += types.h generic-y += unaligned.h generic-y += user.h diff --git a/arch/avr32/include/asm/Kbuild b/arch/avr32/include/asm/Kbuild index 4807ded352c5..4dd4f78d3dcc 100644 --- a/arch/avr32/include/asm/Kbuild +++ b/arch/avr32/include/asm/Kbuild @@ -1,3 +1,4 @@ generic-y += clkdev.h generic-y += exec.h +generic-y += trace_clock.h diff --git a/arch/blackfin/include/asm/Kbuild b/arch/blackfin/include/asm/Kbuild index 5a0625aad6a0..27d70759474c 100644 --- a/arch/blackfin/include/asm/Kbuild +++ b/arch/blackfin/include/asm/Kbuild @@ -38,6 +38,7 @@ generic-y += statfs.h generic-y += termbits.h generic-y += termios.h generic-y += topology.h +generic-y += trace_clock.h generic-y += types.h generic-y += ucontext.h generic-y += unaligned.h diff --git a/arch/c6x/include/asm/Kbuild b/arch/c6x/include/asm/Kbuild index 112a496d8355..eae7b5963e86 100644 --- a/arch/c6x/include/asm/Kbuild +++ b/arch/c6x/include/asm/Kbuild @@ -49,6 +49,7 @@ generic-y += termbits.h generic-y += termios.h generic-y += tlbflush.h generic-y += topology.h +generic-y += trace_clock.h generic-y += types.h generic-y += ucontext.h generic-y += user.h diff --git a/arch/cris/include/asm/Kbuild b/arch/cris/include/asm/Kbuild index 6d43a951b5ec..15a122c3767c 100644 --- a/arch/cris/include/asm/Kbuild +++ b/arch/cris/include/asm/Kbuild @@ -11,3 +11,4 @@ header-y += sync_serial.h generic-y += clkdev.h generic-y += exec.h generic-y += module.h +generic-y += trace_clock.h diff --git a/arch/frv/include/asm/Kbuild b/arch/frv/include/asm/Kbuild index 4a159da23633..c5d767028306 100644 --- a/arch/frv/include/asm/Kbuild +++ b/arch/frv/include/asm/Kbuild @@ -1,3 +1,4 @@ generic-y += clkdev.h generic-y += exec.h +generic-y += trace_clock.h diff --git a/arch/h8300/include/asm/Kbuild b/arch/h8300/include/asm/Kbuild index 50bbf387b2f8..4bc8ae73e08a 100644 --- a/arch/h8300/include/asm/Kbuild +++ b/arch/h8300/include/asm/Kbuild @@ -3,3 +3,4 @@ include include/asm-generic/Kbuild.asm generic-y += clkdev.h generic-y += exec.h generic-y += module.h +generic-y += trace_clock.h diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild index 3bfa9b30f448..bdb54ceb53bc 100644 --- a/arch/hexagon/include/asm/Kbuild +++ b/arch/hexagon/include/asm/Kbuild @@ -48,6 +48,7 @@ generic-y += stat.h generic-y += termbits.h generic-y += termios.h generic-y += topology.h +generic-y += trace_clock.h generic-y += types.h generic-y += ucontext.h generic-y += unaligned.h diff --git a/arch/ia64/include/asm/Kbuild b/arch/ia64/include/asm/Kbuild index dd02f09b6eda..05b03ecd7933 100644 --- a/arch/ia64/include/asm/Kbuild +++ b/arch/ia64/include/asm/Kbuild @@ -2,3 +2,4 @@ generic-y += clkdev.h generic-y += exec.h generic-y += kvm_para.h +generic-y += trace_clock.h diff --git a/arch/m32r/include/asm/Kbuild b/arch/m32r/include/asm/Kbuild index 50bbf387b2f8..4bc8ae73e08a 100644 --- a/arch/m32r/include/asm/Kbuild +++ b/arch/m32r/include/asm/Kbuild @@ -3,3 +3,4 @@ include include/asm-generic/Kbuild.asm generic-y += clkdev.h generic-y += exec.h generic-y += module.h +generic-y += trace_clock.h diff --git a/arch/m68k/include/asm/Kbuild b/arch/m68k/include/asm/Kbuild index 88fa3ac86fae..7f1949c0e089 100644 --- a/arch/m68k/include/asm/Kbuild +++ b/arch/m68k/include/asm/Kbuild @@ -24,6 +24,7 @@ generic-y += sections.h generic-y += siginfo.h generic-y += statfs.h generic-y += topology.h +generic-y += trace_clock.h generic-y += types.h generic-y += word-at-a-time.h generic-y += xor.h diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild index 8653072d7e9f..2957fcc71764 100644 --- a/arch/microblaze/include/asm/Kbuild +++ b/arch/microblaze/include/asm/Kbuild @@ -3,3 +3,4 @@ include include/asm-generic/Kbuild.asm header-y += elf.h generic-y += clkdev.h generic-y += exec.h +generic-y += trace_clock.h diff --git a/arch/mips/include/asm/Kbuild b/arch/mips/include/asm/Kbuild index 533053d12ced..9b54b7a403d4 100644 --- a/arch/mips/include/asm/Kbuild +++ b/arch/mips/include/asm/Kbuild @@ -1 +1,2 @@ # MIPS headers +generic-y += trace_clock.h diff --git a/arch/mn10300/include/asm/Kbuild b/arch/mn10300/include/asm/Kbuild index 4a159da23633..c5d767028306 100644 --- a/arch/mn10300/include/asm/Kbuild +++ b/arch/mn10300/include/asm/Kbuild @@ -1,3 +1,4 @@ generic-y += clkdev.h generic-y += exec.h +generic-y += trace_clock.h diff --git a/arch/openrisc/include/asm/Kbuild b/arch/openrisc/include/asm/Kbuild index 78de6805268d..8971026e1c63 100644 --- a/arch/openrisc/include/asm/Kbuild +++ b/arch/openrisc/include/asm/Kbuild @@ -60,6 +60,7 @@ generic-y += swab.h generic-y += termbits.h generic-y += termios.h generic-y += topology.h +generic-y += trace_clock.h generic-y += types.h generic-y += ucontext.h generic-y += user.h diff --git a/arch/parisc/include/asm/Kbuild b/arch/parisc/include/asm/Kbuild index bac8debecffb..ff4c9faed546 100644 --- a/arch/parisc/include/asm/Kbuild +++ b/arch/parisc/include/asm/Kbuild @@ -3,3 +3,4 @@ generic-y += word-at-a-time.h auxvec.h user.h cputime.h emergency-restart.h \ segment.h topology.h vga.h device.h percpu.h hw_irq.h mutex.h \ div64.h irq_regs.h kdebug.h kvm_para.h local64.h local.h param.h \ poll.h xor.h clkdev.h exec.h +generic-y += trace_clock.h diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild index a4fe15e33c6f..2d62b484b3fc 100644 --- a/arch/powerpc/include/asm/Kbuild +++ b/arch/powerpc/include/asm/Kbuild @@ -2,3 +2,4 @@ generic-y += clkdev.h generic-y += rwsem.h +generic-y += trace_clock.h diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild index 0633dc6d254d..f313f9cbcf44 100644 --- a/arch/s390/include/asm/Kbuild +++ b/arch/s390/include/asm/Kbuild @@ -1,3 +1,4 @@ generic-y += clkdev.h +generic-y += trace_clock.h diff --git a/arch/score/include/asm/Kbuild b/arch/score/include/asm/Kbuild index ec697aeefd05..16e41fe1a419 100644 --- a/arch/score/include/asm/Kbuild +++ b/arch/score/include/asm/Kbuild @@ -3,3 +3,4 @@ include include/asm-generic/Kbuild.asm header-y += generic-y += clkdev.h +generic-y += trace_clock.h diff --git a/arch/sh/include/asm/Kbuild b/arch/sh/include/asm/Kbuild index 29f83beeef7a..280bea9e5e2b 100644 --- a/arch/sh/include/asm/Kbuild +++ b/arch/sh/include/asm/Kbuild @@ -31,5 +31,6 @@ generic-y += socket.h generic-y += statfs.h generic-y += termbits.h generic-y += termios.h +generic-y += trace_clock.h generic-y += ucontext.h generic-y += xor.h diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild index 645a58da0e86..e26d430ce2fd 100644 --- a/arch/sparc/include/asm/Kbuild +++ b/arch/sparc/include/asm/Kbuild @@ -8,4 +8,5 @@ generic-y += local64.h generic-y += irq_regs.h generic-y += local.h generic-y += module.h +generic-y += trace_clock.h generic-y += word-at-a-time.h diff --git a/arch/tile/include/asm/Kbuild b/arch/tile/include/asm/Kbuild index 6948015e08a2..b17b9b8e53cd 100644 --- a/arch/tile/include/asm/Kbuild +++ b/arch/tile/include/asm/Kbuild @@ -34,5 +34,6 @@ generic-y += sockios.h generic-y += statfs.h generic-y += termbits.h generic-y += termios.h +generic-y += trace_clock.h generic-y += types.h generic-y += xor.h diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild index 0f6e7b328265..b30f34a79882 100644 --- a/arch/um/include/asm/Kbuild +++ b/arch/um/include/asm/Kbuild @@ -2,3 +2,4 @@ generic-y += bug.h cputime.h device.h emergency-restart.h futex.h hardirq.h generic-y += hw_irq.h irq_regs.h kdebug.h percpu.h sections.h topology.h xor.h generic-y += ftrace.h pci.h io.h param.h delay.h mutex.h current.h exec.h generic-y += switch_to.h clkdev.h +generic-y += trace_clock.h diff --git a/arch/unicore32/include/asm/Kbuild b/arch/unicore32/include/asm/Kbuild index c910c9857e11..7be503e45695 100644 --- a/arch/unicore32/include/asm/Kbuild +++ b/arch/unicore32/include/asm/Kbuild @@ -54,6 +54,7 @@ generic-y += syscalls.h generic-y += termbits.h generic-y += termios.h generic-y += topology.h +generic-y += trace_clock.h generic-y += types.h generic-y += ucontext.h generic-y += unaligned.h diff --git a/arch/x86/include/asm/trace_clock.h b/arch/x86/include/asm/trace_clock.h new file mode 100644 index 000000000000..5c1652728b6d --- /dev/null +++ b/arch/x86/include/asm/trace_clock.h @@ -0,0 +1,20 @@ +#ifndef _ASM_X86_TRACE_CLOCK_H +#define _ASM_X86_TRACE_CLOCK_H + +#include +#include + +#ifdef CONFIG_X86_TSC + +extern u64 notrace trace_clock_x86_tsc(void); + +# define ARCH_TRACE_CLOCKS \ + { trace_clock_x86_tsc, "x86-tsc" }, + +#else /* !CONFIG_X86_TSC */ + +#define ARCH_TRACE_CLOCKS + +#endif + +#endif /* _ASM_X86_TRACE_CLOCK_H */ diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 9fd5eed3f8f5..34e923a53762 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -61,6 +61,7 @@ obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o +obj-$(CONFIG_X86_TSC) += trace_clock.o obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o diff --git a/arch/x86/kernel/trace_clock.c b/arch/x86/kernel/trace_clock.c new file mode 100644 index 000000000000..25b993729f9b --- /dev/null +++ b/arch/x86/kernel/trace_clock.c @@ -0,0 +1,21 @@ +/* + * X86 trace clocks + */ +#include +#include +#include + +/* + * trace_clock_x86_tsc(): A clock that is just the cycle counter. + * + * Unlike the other clocks, this is not in nanoseconds. + */ +u64 notrace trace_clock_x86_tsc(void) +{ + u64 ret; + + rdtsc_barrier(); + rdtscll(ret); + + return ret; +} diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild index 6d1302789995..095f0a2244f7 100644 --- a/arch/xtensa/include/asm/Kbuild +++ b/arch/xtensa/include/asm/Kbuild @@ -25,4 +25,5 @@ generic-y += siginfo.h generic-y += statfs.h generic-y += termios.h generic-y += topology.h +generic-y += trace_clock.h generic-y += xor.h diff --git a/include/asm-generic/trace_clock.h b/include/asm-generic/trace_clock.h new file mode 100644 index 000000000000..6726f1bafb5e --- /dev/null +++ b/include/asm-generic/trace_clock.h @@ -0,0 +1,16 @@ +#ifndef _ASM_GENERIC_TRACE_CLOCK_H +#define _ASM_GENERIC_TRACE_CLOCK_H +/* + * Arch-specific trace clocks. + */ + +/* + * Additional trace clocks added to the trace_clocks + * array in kernel/trace/trace.c + * None if the architecture has not defined it. + */ +#ifndef ARCH_TRACE_CLOCKS +# define ARCH_TRACE_CLOCKS +#endif + +#endif /* _ASM_GENERIC_TRACE_CLOCK_H */ diff --git a/include/linux/trace_clock.h b/include/linux/trace_clock.h index 4eb490237d4c..d563f37e1a1d 100644 --- a/include/linux/trace_clock.h +++ b/include/linux/trace_clock.h @@ -12,6 +12,8 @@ #include #include +#include + extern u64 notrace trace_clock_local(void); extern u64 notrace trace_clock(void); extern u64 notrace trace_clock_global(void); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index c1434b5ce4d1..0d20620c0d27 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -488,6 +488,7 @@ static struct { { trace_clock_local, "local" }, { trace_clock_global, "global" }, { trace_clock_counter, "counter" }, + ARCH_TRACE_CLOCKS }; int trace_clock_id; -- cgit v1.2.3 From 8be0709f10e3dd5d7d07933ad61a9f18c4b93ca5 Mon Sep 17 00:00:00 2001 From: David Sharp Date: Tue, 13 Nov 2012 12:18:22 -0800 Subject: tracing: Format non-nanosec times from tsc clock without a decimal point. With the addition of the "tsc" clock, formatting timestamps to look like fractional seconds is misleading. Mark clocks as either in nanoseconds or not, and format non-nanosecond timestamps as decimal integers. Tested: $ cd /sys/kernel/debug/tracing/ $ cat trace_clock [local] global tsc $ echo sched_switch > set_event $ echo 1 > tracing_on ; sleep 0.0005 ; echo 0 > tracing_on $ cat trace -0 [000] 6330.555552: sched_switch: prev_comm=swapper prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=bash next_pid=29964 next_prio=120 sleep-29964 [000] 6330.555628: sched_switch: prev_comm=bash prev_pid=29964 prev_prio=120 prev_state=S ==> next_comm=swapper next_pid=0 next_prio=120 ... $ echo 1 > options/latency-format $ cat trace -0 0 4104553247us+: sched_switch: prev_comm=swapper prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=bash next_pid=29964 next_prio=120 sleep-29964 0 4104553322us+: sched_switch: prev_comm=bash prev_pid=29964 prev_prio=120 prev_state=S ==> next_comm=swapper next_pid=0 next_prio=120 ... $ echo tsc > trace_clock $ cat trace $ echo 1 > tracing_on ; sleep 0.0005 ; echo 0 > tracing_on $ echo 0 > options/latency-format $ cat trace -0 [000] 16490053398357: sched_switch: prev_comm=swapper prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=bash next_pid=31128 next_prio=120 sleep-31128 [000] 16490053588518: sched_switch: prev_comm=bash prev_pid=31128 prev_prio=120 prev_state=S ==> next_comm=swapper next_pid=0 next_prio=120 ... echo 1 > options/latency-format $ cat trace -0 0 91557653238+: sched_switch: prev_comm=swapper prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=bash next_pid=31128 next_prio=120 sleep-31128 0 91557843399+: sched_switch: prev_comm=bash prev_pid=31128 prev_prio=120 prev_state=S ==> next_comm=swapper next_pid=0 next_prio=120 ... v2: Move arch-specific bits out of generic code. v4: Fix x86_32 build due to 64-bit division. Google-Bug-Id: 6980623 Link: http://lkml.kernel.org/r/1352837903-32191-2-git-send-email-dhsharp@google.com Cc: Masami Hiramatsu Signed-off-by: David Sharp Signed-off-by: Steven Rostedt --- arch/x86/include/asm/trace_clock.h | 2 +- include/linux/ftrace_event.h | 6 +++ kernel/trace/trace.c | 15 ++++++-- kernel/trace/trace.h | 4 -- kernel/trace/trace_output.c | 78 ++++++++++++++++++++++++++------------ 5 files changed, 72 insertions(+), 33 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/include/asm/trace_clock.h b/arch/x86/include/asm/trace_clock.h index 5c1652728b6d..beab86cc282d 100644 --- a/arch/x86/include/asm/trace_clock.h +++ b/arch/x86/include/asm/trace_clock.h @@ -9,7 +9,7 @@ extern u64 notrace trace_clock_x86_tsc(void); # define ARCH_TRACE_CLOCKS \ - { trace_clock_x86_tsc, "x86-tsc" }, + { trace_clock_x86_tsc, "x86-tsc", .in_ns = 0 }, #else /* !CONFIG_X86_TSC */ diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index b80c8ddfbbdc..a3d489531d83 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -86,6 +86,12 @@ struct trace_iterator { cpumask_var_t started; }; +enum trace_iter_flags { + TRACE_FILE_LAT_FMT = 1, + TRACE_FILE_ANNOTATE = 2, + TRACE_FILE_TIME_IN_NS = 4, +}; + struct trace_event; diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 0d20620c0d27..d943e69569cd 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -484,10 +484,11 @@ static const char *trace_options[] = { static struct { u64 (*func)(void); const char *name; + int in_ns; /* is this clock in nanoseconds? */ } trace_clocks[] = { - { trace_clock_local, "local" }, - { trace_clock_global, "global" }, - { trace_clock_counter, "counter" }, + { trace_clock_local, "local", 1 }, + { trace_clock_global, "global", 1 }, + { trace_clock_counter, "counter", 0 }, ARCH_TRACE_CLOCKS }; @@ -2478,6 +2479,10 @@ __tracing_open(struct inode *inode, struct file *file) if (ring_buffer_overruns(iter->tr->buffer)) iter->iter_flags |= TRACE_FILE_ANNOTATE; + /* Output in nanoseconds only if we are using a clock in nanoseconds. */ + if (trace_clocks[trace_clock_id].in_ns) + iter->iter_flags |= TRACE_FILE_TIME_IN_NS; + /* stop the trace while dumping */ tracing_stop(); @@ -3339,6 +3344,10 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) if (trace_flags & TRACE_ITER_LATENCY_FMT) iter->iter_flags |= TRACE_FILE_LAT_FMT; + /* Output in nanoseconds only if we are using a clock in nanoseconds. */ + if (trace_clocks[trace_clock_id].in_ns) + iter->iter_flags |= TRACE_FILE_TIME_IN_NS; + iter->cpu_file = cpu_file; iter->tr = &global_trace; mutex_init(&iter->mutex); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 55010ed175f0..c75d7988902c 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -406,10 +406,6 @@ void tracing_stop_sched_switch_record(void); void tracing_start_sched_switch_record(void); int register_tracer(struct tracer *type); int is_tracing_stopped(void); -enum trace_file_type { - TRACE_FILE_LAT_FMT = 1, - TRACE_FILE_ANNOTATE = 2, -}; extern cpumask_var_t __read_mostly tracing_buffer_mask; diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 123b189c732c..194d79602dc7 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -610,24 +610,54 @@ lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu) return trace_print_lat_fmt(s, entry); } -static unsigned long preempt_mark_thresh = 100; +static unsigned long preempt_mark_thresh_us = 100; static int -lat_print_timestamp(struct trace_seq *s, u64 abs_usecs, - unsigned long rel_usecs) +lat_print_timestamp(struct trace_iterator *iter, u64 next_ts) { - return trace_seq_printf(s, " %4lldus%c: ", abs_usecs, - rel_usecs > preempt_mark_thresh ? '!' : - rel_usecs > 1 ? '+' : ' '); + unsigned long verbose = trace_flags & TRACE_ITER_VERBOSE; + unsigned long in_ns = iter->iter_flags & TRACE_FILE_TIME_IN_NS; + unsigned long long abs_ts = iter->ts - iter->tr->time_start; + unsigned long long rel_ts = next_ts - iter->ts; + struct trace_seq *s = &iter->seq; + + if (in_ns) { + abs_ts = ns2usecs(abs_ts); + rel_ts = ns2usecs(rel_ts); + } + + if (verbose && in_ns) { + unsigned long abs_usec = do_div(abs_ts, USEC_PER_MSEC); + unsigned long abs_msec = (unsigned long)abs_ts; + unsigned long rel_usec = do_div(rel_ts, USEC_PER_MSEC); + unsigned long rel_msec = (unsigned long)rel_ts; + + return trace_seq_printf( + s, "[%08llx] %ld.%03ldms (+%ld.%03ldms): ", + ns2usecs(iter->ts), + abs_msec, abs_usec, + rel_msec, rel_usec); + } else if (verbose && !in_ns) { + return trace_seq_printf( + s, "[%016llx] %lld (+%lld): ", + iter->ts, abs_ts, rel_ts); + } else if (!verbose && in_ns) { + return trace_seq_printf( + s, " %4lldus%c: ", + abs_ts, + rel_ts > preempt_mark_thresh_us ? '!' : + rel_ts > 1 ? '+' : ' '); + } else { /* !verbose && !in_ns */ + return trace_seq_printf(s, " %4lld: ", abs_ts); + } } int trace_print_context(struct trace_iterator *iter) { struct trace_seq *s = &iter->seq; struct trace_entry *entry = iter->ent; - unsigned long long t = ns2usecs(iter->ts); - unsigned long usec_rem = do_div(t, USEC_PER_SEC); - unsigned long secs = (unsigned long)t; + unsigned long long t; + unsigned long secs, usec_rem; char comm[TASK_COMM_LEN]; int ret; @@ -644,8 +674,13 @@ int trace_print_context(struct trace_iterator *iter) return 0; } - return trace_seq_printf(s, " %5lu.%06lu: ", - secs, usec_rem); + if (iter->iter_flags & TRACE_FILE_TIME_IN_NS) { + t = ns2usecs(iter->ts); + usec_rem = do_div(t, USEC_PER_SEC); + secs = (unsigned long)t; + return trace_seq_printf(s, " %5lu.%06lu: ", secs, usec_rem); + } else + return trace_seq_printf(s, " %12llu: ", iter->ts); } int trace_print_lat_context(struct trace_iterator *iter) @@ -659,36 +694,29 @@ int trace_print_lat_context(struct trace_iterator *iter) *next_entry = trace_find_next_entry(iter, NULL, &next_ts); unsigned long verbose = (trace_flags & TRACE_ITER_VERBOSE); - unsigned long abs_usecs = ns2usecs(iter->ts - iter->tr->time_start); - unsigned long rel_usecs; /* Restore the original ent_size */ iter->ent_size = ent_size; if (!next_entry) next_ts = iter->ts; - rel_usecs = ns2usecs(next_ts - iter->ts); if (verbose) { char comm[TASK_COMM_LEN]; trace_find_cmdline(entry->pid, comm); - ret = trace_seq_printf(s, "%16s %5d %3d %d %08x %08lx [%08llx]" - " %ld.%03ldms (+%ld.%03ldms): ", comm, - entry->pid, iter->cpu, entry->flags, - entry->preempt_count, iter->idx, - ns2usecs(iter->ts), - abs_usecs / USEC_PER_MSEC, - abs_usecs % USEC_PER_MSEC, - rel_usecs / USEC_PER_MSEC, - rel_usecs % USEC_PER_MSEC); + ret = trace_seq_printf( + s, "%16s %5d %3d %d %08x %08lx ", + comm, entry->pid, iter->cpu, entry->flags, + entry->preempt_count, iter->idx); } else { ret = lat_print_generic(s, entry, iter->cpu); - if (ret) - ret = lat_print_timestamp(s, abs_usecs, rel_usecs); } + if (ret) + ret = lat_print_timestamp(iter, next_ts); + return ret; } -- cgit v1.2.3 From 32cdba1e05418909708a17e52505e8b2ba4381d1 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 14 Nov 2012 19:03:42 +0100 Subject: uprobes: Use percpu_rw_semaphore to fix register/unregister vs dup_mmap() race This was always racy, but 268720903f87e0b84b161626c4447b81671b5d18 "uprobes: Rework register_for_each_vma() to make it O(n)" should be blamed anyway, it made everything worse and I didn't notice. register/unregister call build_map_info() and then do install/remove breakpoint for every mm which mmaps inode/offset. This can obviously race with fork()->dup_mmap() in between and we can miss the child. uprobe_register() could be easily fixed but unregister is much worse, the new mm inherits "int3" from parent and there is no way to detect this if uprobe goes away. So this patch simply adds percpu_down_read/up_read around dup_mmap(), and percpu_down_write/up_write into register_for_each_vma(). This adds 2 new hooks into dup_mmap() but we can kill uprobe_dup_mmap() and fold it into uprobe_end_dup_mmap(). Reported-by: Srikar Dronamraju Acked-by: Srikar Dronamraju Signed-off-by: Oleg Nesterov --- include/linux/uprobes.h | 8 ++++++++ kernel/events/uprobes.c | 26 +++++++++++++++++++++++--- kernel/fork.c | 2 ++ 3 files changed, 33 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index 2615c4d7788d..4f628a6fc5b4 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -97,6 +97,8 @@ extern int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_con extern void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc); extern int uprobe_mmap(struct vm_area_struct *vma); extern void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end); +extern void uprobe_start_dup_mmap(void); +extern void uprobe_end_dup_mmap(void); extern void uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm); extern void uprobe_free_utask(struct task_struct *t); extern void uprobe_copy_process(struct task_struct *t); @@ -127,6 +129,12 @@ static inline void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end) { } +static inline void uprobe_start_dup_mmap(void) +{ +} +static inline void uprobe_end_dup_mmap(void) +{ +} static inline void uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm) { diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 5ce99cfd2e6e..dea7acfbb071 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -33,6 +33,7 @@ #include /* user_enable_single_step */ #include /* notifier mechanism */ #include "../../mm/internal.h" /* munlock_vma_page */ +#include #include @@ -71,6 +72,8 @@ static struct mutex uprobes_mutex[UPROBES_HASH_SZ]; static struct mutex uprobes_mmap_mutex[UPROBES_HASH_SZ]; #define uprobes_mmap_hash(v) (&uprobes_mmap_mutex[((unsigned long)(v)) % UPROBES_HASH_SZ]) +static struct percpu_rw_semaphore dup_mmap_sem; + /* * uprobe_events allows us to skip the uprobe_mmap if there are no uprobe * events active at this time. Probably a fine grained per inode count is @@ -766,10 +769,13 @@ static int register_for_each_vma(struct uprobe *uprobe, bool is_register) struct map_info *info; int err = 0; + percpu_down_write(&dup_mmap_sem); info = build_map_info(uprobe->inode->i_mapping, uprobe->offset, is_register); - if (IS_ERR(info)) - return PTR_ERR(info); + if (IS_ERR(info)) { + err = PTR_ERR(info); + goto out; + } while (info) { struct mm_struct *mm = info->mm; @@ -799,7 +805,8 @@ static int register_for_each_vma(struct uprobe *uprobe, bool is_register) mmput(mm); info = free_map_info(info); } - + out: + percpu_up_write(&dup_mmap_sem); return err; } @@ -1131,6 +1138,16 @@ void uprobe_clear_state(struct mm_struct *mm) kfree(area); } +void uprobe_start_dup_mmap(void) +{ + percpu_down_read(&dup_mmap_sem); +} + +void uprobe_end_dup_mmap(void) +{ + percpu_up_read(&dup_mmap_sem); +} + void uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm) { newmm->uprobes_state.xol_area = NULL; @@ -1597,6 +1614,9 @@ static int __init init_uprobes(void) mutex_init(&uprobes_mmap_mutex[i]); } + if (percpu_init_rwsem(&dup_mmap_sem)) + return -ENOMEM; + return register_die_notifier(&uprobe_exception_nb); } module_init(init_uprobes); diff --git a/kernel/fork.c b/kernel/fork.c index 8b20ab7d3aa2..c497e57aa654 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -352,6 +352,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) unsigned long charge; struct mempolicy *pol; + uprobe_start_dup_mmap(); down_write(&oldmm->mmap_sem); flush_cache_dup_mm(oldmm); uprobe_dup_mmap(oldmm, mm); @@ -469,6 +470,7 @@ out: up_write(&mm->mmap_sem); flush_tlb_mm(oldmm); up_write(&oldmm->mmap_sem); + uprobe_end_dup_mmap(); return retval; fail_nomem_anon_vma_fork: mpol_put(pol); -- cgit v1.2.3