From c791cc4b1feb881b3644c059dba5464b076bf592 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 16 Jun 2020 14:53:55 -0400 Subject: tracing: Only allow trace_array_printk() to be used by instances To prevent default "trace_printks()" from spamming the top level tracing ring buffer, only allow trace instances to use trace_array_printk() (which can be used without the trace_printk() start up warning). Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index bb62269724d5..8241d1448d70 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3346,12 +3346,16 @@ int trace_array_printk(struct trace_array *tr, int ret; va_list ap; - if (!(global_trace.trace_flags & TRACE_ITER_PRINTK)) - return 0; - if (!tr) return -ENOENT; + /* This is only allowed for created instances */ + if (tr == &global_trace) + return 0; + + if (!(tr->trace_flags & TRACE_ITER_PRINTK)) + return 0; + va_start(ap, fmt); ret = trace_array_vprintk(tr, ip, fmt, ap); va_end(ap); -- cgit v1.2.3 From 7ef282e05132d56b6f6b71e3873f317664bea78b Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 29 Jun 2020 23:45:56 -0400 Subject: tracing: Move pipe reference to trace array instead of current_tracer If a process has the trace_pipe open on a trace_array, the current tracer for that trace array should not be changed. This was original enforced by a global lock, but when instances were introduced, it was moved to the current_trace. But this structure is shared by all instances, and a trace_pipe is for a single instance. There's no reason that a process that has trace_pipe open on one instance should prevent another instance from changing its current tracer. Move the reference counter to the trace_array instead. This is marked as "Fixes" but is more of a clean up than a true fix. Backport if you want, but its not critical. Fixes: cf6ab6d9143b1 ("tracing: Add ref count to tracer for when they are being read by pipe") Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 12 ++++++------ kernel/trace/trace.h | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 8241d1448d70..64c5b8146cca 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -5891,7 +5891,7 @@ int tracing_set_tracer(struct trace_array *tr, const char *buf) } /* If trace pipe files are being read, we can't change the tracer */ - if (tr->current_trace->ref) { + if (tr->trace_ref) { ret = -EBUSY; goto out; } @@ -6107,7 +6107,7 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) nonseekable_open(inode, filp); - tr->current_trace->ref++; + tr->trace_ref++; out: mutex_unlock(&trace_types_lock); return ret; @@ -6126,7 +6126,7 @@ static int tracing_release_pipe(struct inode *inode, struct file *file) mutex_lock(&trace_types_lock); - tr->current_trace->ref--; + tr->trace_ref--; if (iter->trace->pipe_close) iter->trace->pipe_close(iter); @@ -7428,7 +7428,7 @@ static int tracing_buffers_open(struct inode *inode, struct file *filp) filp->private_data = info; - tr->current_trace->ref++; + tr->trace_ref++; mutex_unlock(&trace_types_lock); @@ -7529,7 +7529,7 @@ static int tracing_buffers_release(struct inode *inode, struct file *file) mutex_lock(&trace_types_lock); - iter->tr->current_trace->ref--; + iter->tr->trace_ref--; __trace_array_put(iter->tr); @@ -8737,7 +8737,7 @@ static int __remove_instance(struct trace_array *tr) int i; /* Reference counter for a newly created trace array = 1. */ - if (tr->ref > 1 || (tr->current_trace && tr->current_trace->ref)) + if (tr->ref > 1 || (tr->current_trace && tr->trace_ref)) return -EBUSY; list_del(&tr->list); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 13db4000af3f..f21607f87189 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -356,6 +356,7 @@ struct trace_array { struct trace_event_file *trace_marker_file; cpumask_var_t tracing_cpumask; /* only trace on set CPUs */ int ref; + int trace_ref; #ifdef CONFIG_FUNCTION_TRACER struct ftrace_ops *ops; struct trace_pid_list __rcu *function_pids; @@ -547,7 +548,6 @@ struct tracer { struct tracer *next; struct tracer_flags *flags; int enabled; - int ref; bool print_max; bool allow_instances; #ifdef CONFIG_TRACER_MAX_TRACE -- cgit v1.2.3 From b23d7a5f4a07af02343cdd28fe1f7488bac3afda Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 25 Jun 2020 15:34:03 +1000 Subject: ring-buffer: speed up buffer resets by avoiding synchronize_rcu for each CPU On a 144 thread system, `perf ftrace` takes about 20 seconds to start up, due to calling synchronize_rcu() for each CPU. cat /proc/108560/stack 0xc0003e7eb336f470 __switch_to+0x2e0/0x480 __wait_rcu_gp+0x20c/0x220 synchronize_rcu+0x9c/0xc0 ring_buffer_reset_cpu+0x88/0x2e0 tracing_reset_online_cpus+0x84/0xe0 tracing_open+0x1d4/0x1f0 On a system with 10x more threads, it starts to become an annoyance. Batch these up so we disable all the per-cpu buffers first, then synchronize_rcu() once, then reset each of the buffers. This brings the time down to about 0.5s. Link: https://lkml.kernel.org/r/20200625053403.2386972-1-npiggin@gmail.com Tested-by: Anton Blanchard Acked-by: Paul E. McKenney Signed-off-by: Nicholas Piggin Signed-off-by: Steven Rostedt (VMware) --- include/linux/ring_buffer.h | 1 + kernel/trace/ring_buffer.c | 85 +++++++++++++++++++++++++++++++++++++-------- kernel/trace/trace.c | 4 +-- 3 files changed, 73 insertions(+), 17 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index c76b2f3b3ac4..136ea0997e6d 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -143,6 +143,7 @@ bool ring_buffer_iter_dropped(struct ring_buffer_iter *iter); unsigned long ring_buffer_size(struct trace_buffer *buffer, int cpu); void ring_buffer_reset_cpu(struct trace_buffer *buffer, int cpu); +void ring_buffer_reset_online_cpus(struct trace_buffer *buffer); void ring_buffer_reset(struct trace_buffer *buffer); #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 802bb38d9c81..ed1941304f69 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -270,6 +270,9 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_data); #define for_each_buffer_cpu(buffer, cpu) \ for_each_cpu(cpu, buffer->cpumask) +#define for_each_online_buffer_cpu(buffer, cpu) \ + for_each_cpu_and(cpu, buffer->cpumask, cpu_online_mask) + #define TS_SHIFT 27 #define TS_MASK ((1ULL << TS_SHIFT) - 1) #define TS_DELTA_TEST (~TS_MASK) @@ -4790,6 +4793,26 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer) rb_head_page_activate(cpu_buffer); } +/* Must have disabled the cpu buffer then done a synchronize_rcu */ +static void reset_disabled_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); + + if (RB_WARN_ON(cpu_buffer, local_read(&cpu_buffer->committing))) + goto out; + + arch_spin_lock(&cpu_buffer->lock); + + rb_reset_cpu(cpu_buffer); + + arch_spin_unlock(&cpu_buffer->lock); + + out: + raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); +} + /** * ring_buffer_reset_cpu - reset a ring buffer per CPU buffer * @buffer: The ring buffer to reset a per cpu buffer of @@ -4798,7 +4821,6 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer) void ring_buffer_reset_cpu(struct trace_buffer *buffer, int cpu) { struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; - unsigned long flags; if (!cpumask_test_cpu(cpu, buffer->cpumask)) return; @@ -4809,24 +4831,42 @@ void ring_buffer_reset_cpu(struct trace_buffer *buffer, int cpu) /* Make sure all commits have finished */ synchronize_rcu(); - raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); + reset_disabled_cpu_buffer(cpu_buffer); - if (RB_WARN_ON(cpu_buffer, local_read(&cpu_buffer->committing))) - goto out; + atomic_dec(&cpu_buffer->record_disabled); + atomic_dec(&cpu_buffer->resize_disabled); +} +EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu); - arch_spin_lock(&cpu_buffer->lock); +/** + * ring_buffer_reset_cpu - reset a ring buffer per CPU buffer + * @buffer: The ring buffer to reset a per cpu buffer of + * @cpu: The CPU buffer to be reset + */ +void ring_buffer_reset_online_cpus(struct trace_buffer *buffer) +{ + struct ring_buffer_per_cpu *cpu_buffer; + int cpu; - rb_reset_cpu(cpu_buffer); + for_each_online_buffer_cpu(buffer, cpu) { + cpu_buffer = buffer->buffers[cpu]; - arch_spin_unlock(&cpu_buffer->lock); + atomic_inc(&cpu_buffer->resize_disabled); + atomic_inc(&cpu_buffer->record_disabled); + } - out: - raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); + /* Make sure all commits have finished */ + synchronize_rcu(); - atomic_dec(&cpu_buffer->record_disabled); - atomic_dec(&cpu_buffer->resize_disabled); + for_each_online_buffer_cpu(buffer, cpu) { + cpu_buffer = buffer->buffers[cpu]; + + reset_disabled_cpu_buffer(cpu_buffer); + + atomic_dec(&cpu_buffer->record_disabled); + atomic_dec(&cpu_buffer->resize_disabled); + } } -EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu); /** * ring_buffer_reset - reset a ring buffer @@ -4834,10 +4874,27 @@ EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu); */ void ring_buffer_reset(struct trace_buffer *buffer) { + struct ring_buffer_per_cpu *cpu_buffer; int cpu; - for_each_buffer_cpu(buffer, cpu) - ring_buffer_reset_cpu(buffer, cpu); + for_each_buffer_cpu(buffer, cpu) { + cpu_buffer = buffer->buffers[cpu]; + + atomic_inc(&cpu_buffer->resize_disabled); + atomic_inc(&cpu_buffer->record_disabled); + } + + /* Make sure all commits have finished */ + synchronize_rcu(); + + for_each_buffer_cpu(buffer, cpu) { + cpu_buffer = buffer->buffers[cpu]; + + reset_disabled_cpu_buffer(cpu_buffer); + + atomic_dec(&cpu_buffer->record_disabled); + atomic_dec(&cpu_buffer->resize_disabled); + } } EXPORT_SYMBOL_GPL(ring_buffer_reset); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 64c5b8146cca..4aab712f9567 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2003,7 +2003,6 @@ static void tracing_reset_cpu(struct array_buffer *buf, int cpu) void tracing_reset_online_cpus(struct array_buffer *buf) { struct trace_buffer *buffer = buf->buffer; - int cpu; if (!buffer) return; @@ -2015,8 +2014,7 @@ void tracing_reset_online_cpus(struct array_buffer *buf) buf->time_start = buffer_ftrace_now(buf, buf->cpu); - for_each_online_cpu(cpu) - ring_buffer_reset_cpu(buffer, cpu); + ring_buffer_reset_online_cpus(buffer); ring_buffer_record_enable(buffer); } -- cgit v1.2.3 From ee896ee8051a618ea4fe3d91c58d3e6ef9faf705 Mon Sep 17 00:00:00 2001 From: Vincent Whitchurch Date: Mon, 27 Jul 2020 11:28:40 +0200 Subject: tracing: Remove outdated comment in stack handling This comment describes the behaviour before commit 2a820bf74918 ("tracing: Use percpu stack trace buffer more intelligently"). Since that commit, interrupts and NMIs do use the per-cpu stacks so the comment is no longer correct. Remove it. (Note that the FTRACE_STACK_SIZE mentioned in the comment has never existed, it probably should have said FTRACE_STACK_ENTRIES.) Link: https://lkml.kernel.org/r/20200727092840.18659-1-vincent.whitchurch@axis.com Signed-off-by: Vincent Whitchurch Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 4aab712f9567..dbcacdd56b02 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2930,12 +2930,6 @@ static void __ftrace_trace_stack(struct trace_buffer *buffer, skip++; #endif - /* - * Since events can happen in NMIs there's no safe way to - * use the per cpu ftrace_stacks. We reserve it and if an interrupt - * or NMI comes in, it will just have to use the default - * FTRACE_STACK_SIZE. - */ preempt_disable_notrace(); stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1; -- cgit v1.2.3 From 0f69dae4d1069963fdcc16e63655927d83281006 Mon Sep 17 00:00:00 2001 From: Zhaoyang Huang Date: Fri, 31 Jul 2020 08:27:45 +0800 Subject: trace : Have tracing buffer info use kvzalloc instead of kzalloc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit High order memory stuff within trace could introduce OOM, use kvzalloc instead. Please find the bellowing for the call stack we run across in an android system. The scenario happens when traced_probes is woken up to get a large quantity of trace even if free memory is even higher than watermark_low.  traced_probes invoked oom-killer: gfp_mask=0x140c0c0(GFP_KERNEL|__GFP_COMP|__GFP_ZERO), nodemask=(null), order=2, oom_score_adj=-1 traced_probes cpuset=system-background mems_allowed=0 CPU: 3 PID: 588 Comm: traced_probes Tainted: G W O 4.14.181 #1 Hardware name: Generic DT based system (unwind_backtrace) from [] (show_stack+0x20/0x24) (show_stack) from [] (dump_stack+0xa8/0xec) (dump_stack) from [] (dump_header+0x9c/0x220) (dump_header) from [] (oom_kill_process+0xc0/0x5c4) (oom_kill_process) from [] (out_of_memory+0x220/0x310) (out_of_memory) from [] (__alloc_pages_nodemask+0xff8/0x13a4) (__alloc_pages_nodemask) from [] (kmalloc_order+0x30/0x48) (kmalloc_order) from [] (kmalloc_order_trace+0x30/0x118) (kmalloc_order_trace) from [] (tracing_buffers_open+0x50/0xfc) (tracing_buffers_open) from [] (do_dentry_open+0x278/0x34c) (do_dentry_open) from [] (vfs_open+0x50/0x70) (vfs_open) from [] (path_openat+0x5fc/0x169c) (path_openat) from [] (do_filp_open+0x94/0xf8) (do_filp_open) from [] (do_sys_open+0x168/0x26c) (do_sys_open) from [] (SyS_openat+0x34/0x38) (SyS_openat) from [] (ret_fast_syscall+0x0/0x28) Link: https://lkml.kernel.org/r/1596155265-32365-1-git-send-email-zhaoyang.huang@unisoc.com Signed-off-by: Zhaoyang Huang Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index dbcacdd56b02..06c0feae5ff9 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -7402,7 +7402,7 @@ static int tracing_buffers_open(struct inode *inode, struct file *filp) if (ret) return ret; - info = kzalloc(sizeof(*info), GFP_KERNEL); + info = kvzalloc(sizeof(*info), GFP_KERNEL); if (!info) { trace_array_put(tr); return -ENOMEM; @@ -7528,7 +7528,7 @@ static int tracing_buffers_release(struct inode *inode, struct file *file) if (info->spare) ring_buffer_free_read_page(iter->array_buffer->buffer, info->spare_cpu, info->spare); - kfree(info); + kvfree(info); mutex_unlock(&trace_types_lock); -- cgit v1.2.3 From 38ce2a9e33db61a3041840310077072d6210ead4 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 6 Aug 2020 12:46:49 -0400 Subject: tracing: Add trace_array_init_printk() to initialize instance trace_printk() buffers As trace_array_printk() used with not global instances will not add noise to the main buffer, they are OK to have in the kernel (unlike trace_printk()). This require the subsystem to create their own tracing instance, and the trace_array_printk() only writes into those instances. Add trace_array_init_printk() to initialize the trace_printk() buffers without printing out the WARNING message. Reported-by: Sean Paul Reviewed-by: Sean Paul Signed-off-by: Steven Rostedt (VMware) --- include/linux/trace.h | 1 + kernel/trace/trace.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+) (limited to 'kernel/trace/trace.c') diff --git a/include/linux/trace.h b/include/linux/trace.h index 7fd86d3c691f..36d255d66f88 100644 --- a/include/linux/trace.h +++ b/include/linux/trace.h @@ -29,6 +29,7 @@ struct trace_array; void trace_printk_init_buffers(void); int trace_array_printk(struct trace_array *tr, unsigned long ip, const char *fmt, ...); +int trace_array_init_printk(struct trace_array *tr); void trace_array_put(struct trace_array *tr); struct trace_array *trace_array_get_by_name(const char *name); int trace_array_destroy(struct trace_array *tr); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 06c0feae5ff9..c5f822736261 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3129,6 +3129,9 @@ static int alloc_percpu_trace_buffer(void) { struct trace_buffer_struct *buffers; + if (trace_percpu_buffer) + return 0; + buffers = alloc_percpu(struct trace_buffer_struct); if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer")) return -ENOMEM; @@ -3331,6 +3334,26 @@ int trace_array_vprintk(struct trace_array *tr, return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args); } +/** + * trace_array_printk - Print a message to a specific instance + * @tr: The instance trace_array descriptor + * @ip: The instruction pointer that this is called from. + * @fmt: The format to print (printf format) + * + * If a subsystem sets up its own instance, they have the right to + * printk strings into their tracing instance buffer using this + * function. Note, this function will not write into the top level + * buffer (use trace_printk() for that), as writing into the top level + * buffer should only have events that can be individually disabled. + * trace_printk() is only used for debugging a kernel, and should not + * be ever encorporated in normal use. + * + * trace_array_printk() can be used, as it will not add noise to the + * top level tracing buffer. + * + * Note, trace_array_init_printk() must be called on @tr before this + * can be used. + */ __printf(3, 0) int trace_array_printk(struct trace_array *tr, unsigned long ip, const char *fmt, ...) @@ -3355,6 +3378,27 @@ int trace_array_printk(struct trace_array *tr, } EXPORT_SYMBOL_GPL(trace_array_printk); +/** + * trace_array_init_printk - Initialize buffers for trace_array_printk() + * @tr: The trace array to initialize the buffers for + * + * As trace_array_printk() only writes into instances, they are OK to + * have in the kernel (unlike trace_printk()). This needs to be called + * before trace_array_printk() can be used on a trace_array. + */ +int trace_array_init_printk(struct trace_array *tr) +{ + if (!tr) + return -ENOENT; + + /* This is only allowed for created instances */ + if (tr == &global_trace) + return -EINVAL; + + return alloc_percpu_trace_buffer(); +} +EXPORT_SYMBOL_GPL(trace_array_init_printk); + __printf(3, 4) int trace_array_printk_buf(struct trace_buffer *buffer, unsigned long ip, const char *fmt, ...) -- cgit v1.2.3