diff options
Diffstat (limited to 'kernel/trace')
37 files changed, 3608 insertions, 931 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 25a0fcfa7a5d..91e885194dbc 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -141,6 +141,15 @@ menuconfig FTRACE if FTRACE +config BOOTTIME_TRACING + bool "Boot-time Tracing support" + depends on BOOT_CONFIG && TRACING + default y + help + Enable developer to setup ftrace subsystem via supplemental + kernel cmdline at boot time for debugging (tracing) driver + initialization and boot process. + config FUNCTION_TRACER bool "Kernel Function Tracer" depends on HAVE_FUNCTION_TRACER @@ -172,6 +181,77 @@ config FUNCTION_GRAPH_TRACER the return value. This is done by setting the current return address on the current task structure into a stack of calls. +config DYNAMIC_FTRACE + bool "enable/disable function tracing dynamically" + depends on FUNCTION_TRACER + depends on HAVE_DYNAMIC_FTRACE + default y + help + This option will modify all the calls to function tracing + dynamically (will patch them out of the binary image and + replace them with a No-Op instruction) on boot up. During + compile time, a table is made of all the locations that ftrace + can function trace, and this table is linked into the kernel + image. When this is enabled, functions can be individually + enabled, and the functions not enabled will not affect + performance of the system. + + See the files in /sys/kernel/debug/tracing: + available_filter_functions + set_ftrace_filter + set_ftrace_notrace + + This way a CONFIG_FUNCTION_TRACER kernel is slightly larger, but + otherwise has native performance as long as no tracing is active. + +config DYNAMIC_FTRACE_WITH_REGS + def_bool y + depends on DYNAMIC_FTRACE + depends on HAVE_DYNAMIC_FTRACE_WITH_REGS + +config DYNAMIC_FTRACE_WITH_DIRECT_CALLS + def_bool y + depends on DYNAMIC_FTRACE + depends on HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + +config FUNCTION_PROFILER + bool "Kernel function profiler" + depends on FUNCTION_TRACER + default n + help + This option enables the kernel function profiler. A file is created + in debugfs called function_profile_enabled which defaults to zero. + When a 1 is echoed into this file profiling begins, and when a + zero is entered, profiling stops. A "functions" file is created in + the trace_stat directory; this file shows the list of functions that + have been hit and their counters. + + If in doubt, say N. + +config STACK_TRACER + bool "Trace max stack" + depends on HAVE_FUNCTION_TRACER + select FUNCTION_TRACER + select STACKTRACE + select KALLSYMS + help + This special tracer records the maximum stack footprint of the + kernel and displays it in /sys/kernel/debug/tracing/stack_trace. + + This tracer works by hooking into every function call that the + kernel executes, and keeping a maximum stack depth value and + stack-trace saved. If this is configured with DYNAMIC_FTRACE + then it will not have any overhead while the stack tracer + is disabled. + + To enable the stack tracer on bootup, pass in 'stacktrace' + on the kernel command line. + + The stack tracer can also be enabled or disabled via the + sysctl kernel.stack_tracer_enabled + + Say N if unsure. + config TRACE_PREEMPT_TOGGLE bool help @@ -282,6 +362,19 @@ config HWLAT_TRACER file. Every time a latency is greater than tracing_thresh, it will be recorded into the ring buffer. +config MMIOTRACE + bool "Memory mapped IO tracing" + depends on HAVE_MMIOTRACE_SUPPORT && PCI + select GENERIC_TRACER + help + Mmiotrace traces Memory Mapped I/O access and is meant for + debugging and reverse engineering. It is called from the ioremap + implementation and works via page faults. Tracing is disabled by + default and can be enabled at run-time. + + See Documentation/trace/mmiotrace.rst. + If you are not helping to develop drivers, say N. + config ENABLE_DEFAULT_TRACERS bool "Trace process context switches and events" depends on !GENERIC_TRACER @@ -410,30 +503,6 @@ config BRANCH_TRACER Say N if unsure. -config STACK_TRACER - bool "Trace max stack" - depends on HAVE_FUNCTION_TRACER - select FUNCTION_TRACER - select STACKTRACE - select KALLSYMS - help - This special tracer records the maximum stack footprint of the - kernel and displays it in /sys/kernel/debug/tracing/stack_trace. - - This tracer works by hooking into every function call that the - kernel executes, and keeping a maximum stack depth value and - stack-trace saved. If this is configured with DYNAMIC_FTRACE - then it will not have any overhead while the stack tracer - is disabled. - - To enable the stack tracer on bootup, pass in 'stacktrace' - on the kernel command line. - - The stack tracer can also be enabled or disabled via the - sysctl kernel.stack_tracer_enabled - - Say N if unsure. - config BLK_DEV_IO_TRACE bool "Support for tracing block IO actions" depends on SYSFS @@ -531,53 +600,6 @@ config DYNAMIC_EVENTS config PROBE_EVENTS def_bool n -config DYNAMIC_FTRACE - bool "enable/disable function tracing dynamically" - depends on FUNCTION_TRACER - depends on HAVE_DYNAMIC_FTRACE - default y - help - This option will modify all the calls to function tracing - dynamically (will patch them out of the binary image and - replace them with a No-Op instruction) on boot up. During - compile time, a table is made of all the locations that ftrace - can function trace, and this table is linked into the kernel - image. When this is enabled, functions can be individually - enabled, and the functions not enabled will not affect - performance of the system. - - See the files in /sys/kernel/debug/tracing: - available_filter_functions - set_ftrace_filter - set_ftrace_notrace - - This way a CONFIG_FUNCTION_TRACER kernel is slightly larger, but - otherwise has native performance as long as no tracing is active. - -config DYNAMIC_FTRACE_WITH_REGS - def_bool y - depends on DYNAMIC_FTRACE - depends on HAVE_DYNAMIC_FTRACE_WITH_REGS - -config DYNAMIC_FTRACE_WITH_DIRECT_CALLS - def_bool y - depends on DYNAMIC_FTRACE - depends on HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS - -config FUNCTION_PROFILER - bool "Kernel function profiler" - depends on FUNCTION_TRACER - default n - help - This option enables the kernel function profiler. A file is created - in debugfs called function_profile_enabled which defaults to zero. - When a 1 is echoed into this file profiling begins, and when a - zero is entered, profiling stops. A "functions" file is created in - the trace_stat directory; this file shows the list of functions that - have been hit and their counters. - - If in doubt, say N. - config BPF_KPROBE_OVERRIDE bool "Enable BPF programs to override a kprobed function" depends on BPF_EVENTS @@ -592,54 +614,6 @@ config FTRACE_MCOUNT_RECORD depends on DYNAMIC_FTRACE depends on HAVE_FTRACE_MCOUNT_RECORD -config FTRACE_SELFTEST - bool - -config FTRACE_STARTUP_TEST - bool "Perform a startup test on ftrace" - depends on GENERIC_TRACER - select FTRACE_SELFTEST - help - This option performs a series of startup tests on ftrace. On bootup - a series of tests are made to verify that the tracer is - functioning properly. It will do tests on all the configured - tracers of ftrace. - -config EVENT_TRACE_STARTUP_TEST - bool "Run selftest on trace events" - depends on FTRACE_STARTUP_TEST - default y - help - This option performs a test on all trace events in the system. - It basically just enables each event and runs some code that - will trigger events (not necessarily the event it enables) - This may take some time run as there are a lot of events. - -config EVENT_TRACE_TEST_SYSCALLS - bool "Run selftest on syscall events" - depends on EVENT_TRACE_STARTUP_TEST - help - This option will also enable testing every syscall event. - It only enables the event and disables it and runs various loads - with the event enabled. This adds a bit more time for kernel boot - up since it runs this on every system call defined. - - TBD - enable a way to actually call the syscalls as we test their - events - -config MMIOTRACE - bool "Memory mapped IO tracing" - depends on HAVE_MMIOTRACE_SUPPORT && PCI - select GENERIC_TRACER - help - Mmiotrace traces Memory Mapped I/O access and is meant for - debugging and reverse engineering. It is called from the ioremap - implementation and works via page faults. Tracing is disabled by - default and can be enabled at run-time. - - See Documentation/trace/mmiotrace.rst. - If you are not helping to develop drivers, say N. - config TRACING_MAP bool depends on ARCH_HAVE_NMI_SAFE_CMPXCHG @@ -680,16 +654,6 @@ config TRACE_EVENT_INJECT If unsure, say N. -config MMIOTRACE_TEST - tristate "Test module for mmiotrace" - depends on MMIOTRACE && m - help - This is a dumb module for testing mmiotrace. It is very dangerous - as it will write garbage to IO memory starting at a given address. - However, it should be safe to use on e.g. unused portion of VRAM. - - Say N, unless you absolutely know what you are doing. - config TRACEPOINT_BENCHMARK bool "Add tracepoint that benchmarks tracepoints" help @@ -736,6 +700,81 @@ config RING_BUFFER_BENCHMARK If unsure, say N. +config TRACE_EVAL_MAP_FILE + bool "Show eval mappings for trace events" + depends on TRACING + help + The "print fmt" of the trace events will show the enum/sizeof names + instead of their values. This can cause problems for user space tools + that use this string to parse the raw data as user space does not know + how to convert the string to its value. + + To fix this, there's a special macro in the kernel that can be used + to convert an enum/sizeof into its value. If this macro is used, then + the print fmt strings will be converted to their values. + + If something does not get converted properly, this option can be + used to show what enums/sizeof the kernel tried to convert. + + This option is for debugging the conversions. A file is created + in the tracing directory called "eval_map" that will show the + names matched with their values and what trace event system they + belong too. + + Normally, the mapping of the strings to values will be freed after + boot up or module load. With this option, they will not be freed, as + they are needed for the "eval_map" file. Enabling this option will + increase the memory footprint of the running kernel. + + If unsure, say N. + +config GCOV_PROFILE_FTRACE + bool "Enable GCOV profiling on ftrace subsystem" + depends on GCOV_KERNEL + help + Enable GCOV profiling on ftrace subsystem for checking + which functions/lines are tested. + + If unsure, say N. + + Note that on a kernel compiled with this config, ftrace will + run significantly slower. + +config FTRACE_SELFTEST + bool + +config FTRACE_STARTUP_TEST + bool "Perform a startup test on ftrace" + depends on GENERIC_TRACER + select FTRACE_SELFTEST + help + This option performs a series of startup tests on ftrace. On bootup + a series of tests are made to verify that the tracer is + functioning properly. It will do tests on all the configured + tracers of ftrace. + +config EVENT_TRACE_STARTUP_TEST + bool "Run selftest on trace events" + depends on FTRACE_STARTUP_TEST + default y + help + This option performs a test on all trace events in the system. + It basically just enables each event and runs some code that + will trigger events (not necessarily the event it enables) + This may take some time run as there are a lot of events. + +config EVENT_TRACE_TEST_SYSCALLS + bool "Run selftest on syscall events" + depends on EVENT_TRACE_STARTUP_TEST + help + This option will also enable testing every syscall event. + It only enables the event and disables it and runs various loads + with the event enabled. This adds a bit more time for kernel boot + up since it runs this on every system call defined. + + TBD - enable a way to actually call the syscalls as we test their + events + config RING_BUFFER_STARTUP_TEST bool "Ring buffer startup self test" depends on RING_BUFFER @@ -759,8 +798,18 @@ config RING_BUFFER_STARTUP_TEST If unsure, say N +config MMIOTRACE_TEST + tristate "Test module for mmiotrace" + depends on MMIOTRACE && m + help + This is a dumb module for testing mmiotrace. It is very dangerous + as it will write garbage to IO memory starting at a given address. + However, it should be safe to use on e.g. unused portion of VRAM. + + Say N, unless you absolutely know what you are doing. + config PREEMPTIRQ_DELAY_TEST - tristate "Preempt / IRQ disable delay thread to test latency tracers" + tristate "Test module to create a preempt / IRQ disable delay thread to test latency tracers" depends on m help Select this option to build a test module that can help test latency @@ -774,45 +823,30 @@ config PREEMPTIRQ_DELAY_TEST If unsure, say N -config TRACE_EVAL_MAP_FILE - bool "Show eval mappings for trace events" - depends on TRACING - help - The "print fmt" of the trace events will show the enum/sizeof names - instead of their values. This can cause problems for user space tools - that use this string to parse the raw data as user space does not know - how to convert the string to its value. - - To fix this, there's a special macro in the kernel that can be used - to convert an enum/sizeof into its value. If this macro is used, then - the print fmt strings will be converted to their values. - - If something does not get converted properly, this option can be - used to show what enums/sizeof the kernel tried to convert. - - This option is for debugging the conversions. A file is created - in the tracing directory called "eval_map" that will show the - names matched with their values and what trace event system they - belong too. +config SYNTH_EVENT_GEN_TEST + tristate "Test module for in-kernel synthetic event generation" + depends on HIST_TRIGGERS + help + This option creates a test module to check the base + functionality of in-kernel synthetic event definition and + generation. - Normally, the mapping of the strings to values will be freed after - boot up or module load. With this option, they will not be freed, as - they are needed for the "eval_map" file. Enabling this option will - increase the memory footprint of the running kernel. + To test, insert the module, and then check the trace buffer + for the generated sample events. - If unsure, say N. + If unsure, say N. -config GCOV_PROFILE_FTRACE - bool "Enable GCOV profiling on ftrace subsystem" - depends on GCOV_KERNEL +config KPROBE_EVENT_GEN_TEST + tristate "Test module for in-kernel kprobe event generation" + depends on KPROBE_EVENTS help - Enable GCOV profiling on ftrace subsystem for checking - which functions/lines are tested. + This option creates a test module to check the base + functionality of in-kernel kprobe event definition. - If unsure, say N. + To test, insert the module, and then check the trace buffer + for the generated kprobe events. - Note that on a kernel compiled with this config, ftrace will - run significantly slower. + If unsure, say N. endif # FTRACE diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 0e63db62225f..f9dcd19165fa 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -44,6 +44,8 @@ obj-$(CONFIG_TRACING) += trace_stat.o obj-$(CONFIG_TRACING) += trace_printk.o obj-$(CONFIG_TRACING_MAP) += tracing_map.o obj-$(CONFIG_PREEMPTIRQ_DELAY_TEST) += preemptirq_delay_test.o +obj-$(CONFIG_SYNTH_EVENT_GEN_TEST) += synth_event_gen_test.o +obj-$(CONFIG_KPROBE_EVENT_GEN_TEST) += kprobe_event_gen_test.o obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o obj-$(CONFIG_PREEMPTIRQ_TRACEPOINTS) += trace_preemptirq.o @@ -83,6 +85,7 @@ endif obj-$(CONFIG_DYNAMIC_EVENTS) += trace_dynevent.o obj-$(CONFIG_PROBE_EVENTS) += trace_probe.o obj-$(CONFIG_UPROBE_EVENTS) += trace_uprobe.o +obj-$(CONFIG_BOOTTIME_TRACING) += trace_boot.o obj-$(CONFIG_TRACEPOINT_BENCHMARK) += trace_benchmark.o diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 475e29498bca..0735ae8545d8 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -68,14 +68,14 @@ static void trace_note(struct blk_trace *bt, pid_t pid, int action, { struct blk_io_trace *t; struct ring_buffer_event *event = NULL; - struct ring_buffer *buffer = NULL; + struct trace_buffer *buffer = NULL; int pc = 0; int cpu = smp_processor_id(); bool blk_tracer = blk_tracer_enabled; ssize_t cgid_len = cgid ? sizeof(cgid) : 0; if (blk_tracer) { - buffer = blk_tr->trace_buffer.buffer; + buffer = blk_tr->array_buffer.buffer; pc = preempt_count(); event = trace_buffer_lock_reserve(buffer, TRACE_BLK, sizeof(*t) + len + cgid_len, @@ -215,7 +215,7 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, { struct task_struct *tsk = current; struct ring_buffer_event *event = NULL; - struct ring_buffer *buffer = NULL; + struct trace_buffer *buffer = NULL; struct blk_io_trace *t; unsigned long flags = 0; unsigned long *sequence; @@ -248,7 +248,7 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, if (blk_tracer) { tracing_record_cmdline(current); - buffer = blk_tr->trace_buffer.buffer; + buffer = blk_tr->array_buffer.buffer; pc = preempt_count(); event = trace_buffer_lock_reserve(buffer, TRACE_BLK, sizeof(*t) + pdu_len + cgid_len, diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index e5ef4ae9edb5..19e793aa441a 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -703,6 +703,7 @@ struct send_signal_irq_work { struct irq_work irq_work; struct task_struct *task; u32 sig; + enum pid_type type; }; static DEFINE_PER_CPU(struct send_signal_irq_work, send_signal_work); @@ -712,10 +713,10 @@ static void do_bpf_send_signal(struct irq_work *entry) struct send_signal_irq_work *work; work = container_of(entry, struct send_signal_irq_work, irq_work); - group_send_sig_info(work->sig, SEND_SIG_PRIV, work->task, PIDTYPE_TGID); + group_send_sig_info(work->sig, SEND_SIG_PRIV, work->task, work->type); } -BPF_CALL_1(bpf_send_signal, u32, sig) +static int bpf_send_signal_common(u32 sig, enum pid_type type) { struct send_signal_irq_work *work = NULL; @@ -748,11 +749,17 @@ BPF_CALL_1(bpf_send_signal, u32, sig) */ work->task = current; work->sig = sig; + work->type = type; irq_work_queue(&work->irq_work); return 0; } - return group_send_sig_info(sig, SEND_SIG_PRIV, current, PIDTYPE_TGID); + return group_send_sig_info(sig, SEND_SIG_PRIV, current, type); +} + +BPF_CALL_1(bpf_send_signal, u32, sig) +{ + return bpf_send_signal_common(sig, PIDTYPE_TGID); } static const struct bpf_func_proto bpf_send_signal_proto = { @@ -762,6 +769,18 @@ static const struct bpf_func_proto bpf_send_signal_proto = { .arg1_type = ARG_ANYTHING, }; +BPF_CALL_1(bpf_send_signal_thread, u32, sig) +{ + return bpf_send_signal_common(sig, PIDTYPE_PID); +} + +static const struct bpf_func_proto bpf_send_signal_thread_proto = { + .func = bpf_send_signal_thread, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_ANYTHING, +}; + static const struct bpf_func_proto * tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { @@ -822,6 +841,8 @@ tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) #endif case BPF_FUNC_send_signal: return &bpf_send_signal_proto; + case BPF_FUNC_send_signal_thread: + return &bpf_send_signal_thread_proto; default: return NULL; } diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 9bf1f2cd515e..3f7ee102868a 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -62,8 +62,6 @@ }) /* hash bits for specific function selection */ -#define FTRACE_HASH_BITS 7 -#define FTRACE_FUNC_HASHSIZE (1 << FTRACE_HASH_BITS) #define FTRACE_HASH_DEFAULT_BITS 10 #define FTRACE_HASH_MAX_BITS 12 @@ -146,7 +144,7 @@ static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip, { struct trace_array *tr = op->private; - if (tr && this_cpu_read(tr->trace_buffer.data->ftrace_ignore_pid)) + if (tr && this_cpu_read(tr->array_buffer.data->ftrace_ignore_pid)) return; op->saved_func(ip, parent_ip, op, regs); @@ -1103,9 +1101,6 @@ struct ftrace_page { #define ENTRY_SIZE sizeof(struct dyn_ftrace) #define ENTRIES_PER_PAGE (PAGE_SIZE / ENTRY_SIZE) -/* estimate from running different kernels */ -#define NR_TO_INIT 10000 - static struct ftrace_page *ftrace_pages_start; static struct ftrace_page *ftrace_pages; @@ -5464,7 +5459,7 @@ static void __init set_ftrace_early_graph(char *buf, int enable) struct ftrace_hash *hash; hash = alloc_ftrace_hash(FTRACE_HASH_DEFAULT_BITS); - if (WARN_ON(!hash)) + if (MEM_FAIL(!hash, "Failed to allocate hash\n")) return; while (buf) { @@ -5596,8 +5591,8 @@ static const struct file_operations ftrace_notrace_fops = { static DEFINE_MUTEX(graph_lock); -struct ftrace_hash *ftrace_graph_hash = EMPTY_HASH; -struct ftrace_hash *ftrace_graph_notrace_hash = EMPTY_HASH; +struct ftrace_hash __rcu *ftrace_graph_hash = EMPTY_HASH; +struct ftrace_hash __rcu *ftrace_graph_notrace_hash = EMPTY_HASH; enum graph_filter_type { GRAPH_FILTER_NOTRACE = 0, @@ -5872,8 +5867,15 @@ ftrace_graph_release(struct inode *inode, struct file *file) mutex_unlock(&graph_lock); - /* Wait till all users are no longer using the old hash */ - synchronize_rcu(); + /* + * We need to do a hard force of sched synchronization. + * This is because we use preempt_disable() to do RCU, but + * the function tracers can be called where RCU is not watching + * (like before user_exit()). We can not rely on the RCU + * infrastructure to do the synchronization, thus we must do it + * ourselves. + */ + schedule_on_each_cpu(ftrace_sync); free_ftrace_hash(old_hash); } @@ -6596,7 +6598,7 @@ static void add_to_clear_hash_list(struct list_head *clear_list, func = kmalloc(sizeof(*func), GFP_KERNEL); if (!func) { - WARN_ONCE(1, "alloc failure, ftrace filter could be stale\n"); + MEM_FAIL(1, "alloc failure, ftrace filter could be stale\n"); return; } @@ -6922,7 +6924,7 @@ ftrace_filter_pid_sched_switch_probe(void *data, bool preempt, pid_list = rcu_dereference_sched(tr->function_pids); - this_cpu_write(tr->trace_buffer.data->ftrace_ignore_pid, + this_cpu_write(tr->array_buffer.data->ftrace_ignore_pid, trace_ignore_this_task(pid_list, next)); } @@ -6976,7 +6978,7 @@ static void clear_ftrace_pids(struct trace_array *tr) unregister_trace_sched_switch(ftrace_filter_pid_sched_switch_probe, tr); for_each_possible_cpu(cpu) - per_cpu_ptr(tr->trace_buffer.data, cpu)->ftrace_ignore_pid = false; + per_cpu_ptr(tr->array_buffer.data, cpu)->ftrace_ignore_pid = false; rcu_assign_pointer(tr->function_pids, NULL); @@ -7031,9 +7033,10 @@ static void *fpid_next(struct seq_file *m, void *v, loff_t *pos) struct trace_array *tr = m->private; struct trace_pid_list *pid_list = rcu_dereference_sched(tr->function_pids); - if (v == FTRACE_NO_PIDS) + if (v == FTRACE_NO_PIDS) { + (*pos)++; return NULL; - + } return trace_pid_next(pid_list, v, pos); } @@ -7100,7 +7103,7 @@ static void ignore_task_cpu(void *data) pid_list = rcu_dereference_protected(tr->function_pids, mutex_is_locked(&ftrace_lock)); - this_cpu_write(tr->trace_buffer.data->ftrace_ignore_pid, + this_cpu_write(tr->array_buffer.data->ftrace_ignore_pid, trace_ignore_this_task(pid_list, current)); } diff --git a/kernel/trace/kprobe_event_gen_test.c b/kernel/trace/kprobe_event_gen_test.c new file mode 100644 index 000000000000..18b0f1cbb947 --- /dev/null +++ b/kernel/trace/kprobe_event_gen_test.c @@ -0,0 +1,225 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Test module for in-kernel kprobe event creation and generation. + * + * Copyright (C) 2019 Tom Zanussi <zanussi@kernel.org> + */ + +#include <linux/module.h> +#include <linux/trace_events.h> + +/* + * This module is a simple test of basic functionality for in-kernel + * kprobe/kretprobe event creation. The first test uses + * kprobe_event_gen_cmd_start(), kprobe_event_add_fields() and + * kprobe_event_gen_cmd_end() to create a kprobe event, which is then + * enabled in order to generate trace output. The second creates a + * kretprobe event using kretprobe_event_gen_cmd_start() and + * kretprobe_event_gen_cmd_end(), and is also then enabled. + * + * To test, select CONFIG_KPROBE_EVENT_GEN_TEST and build the module. + * Then: + * + * # insmod kernel/trace/kprobe_event_gen_test.ko + * # cat /sys/kernel/debug/tracing/trace + * + * You should see many instances of the "gen_kprobe_test" and + * "gen_kretprobe_test" events in the trace buffer. + * + * To remove the events, remove the module: + * + * # rmmod kprobe_event_gen_test + * + */ + +static struct trace_event_file *gen_kprobe_test; +static struct trace_event_file *gen_kretprobe_test; + +/* + * Test to make sure we can create a kprobe event, then add more + * fields. + */ +static int __init test_gen_kprobe_cmd(void) +{ + struct dynevent_cmd cmd; + char *buf; + int ret; + + /* Create a buffer to hold the generated command */ + buf = kzalloc(MAX_DYNEVENT_CMD_LEN, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + /* Before generating the command, initialize the cmd object */ + kprobe_event_cmd_init(&cmd, buf, MAX_DYNEVENT_CMD_LEN); + + /* + * Define the gen_kprobe_test event with the first 2 kprobe + * fields. + */ + ret = kprobe_event_gen_cmd_start(&cmd, "gen_kprobe_test", + "do_sys_open", + "dfd=%ax", "filename=%dx"); + if (ret) + goto free; + + /* Use kprobe_event_add_fields to add the rest of the fields */ + + ret = kprobe_event_add_fields(&cmd, "flags=%cx", "mode=+4($stack)"); + if (ret) + goto free; + + /* + * This actually creates the event. + */ + ret = kprobe_event_gen_cmd_end(&cmd); + if (ret) + goto free; + + /* + * Now get the gen_kprobe_test event file. We need to prevent + * the instance and event from disappearing from underneath + * us, which trace_get_event_file() does (though in this case + * we're using the top-level instance which never goes away). + */ + gen_kprobe_test = trace_get_event_file(NULL, "kprobes", + "gen_kprobe_test"); + if (IS_ERR(gen_kprobe_test)) { + ret = PTR_ERR(gen_kprobe_test); + goto delete; + } + + /* Enable the event or you won't see anything */ + ret = trace_array_set_clr_event(gen_kprobe_test->tr, + "kprobes", "gen_kprobe_test", true); + if (ret) { + trace_put_event_file(gen_kprobe_test); + goto delete; + } + out: + return ret; + delete: + /* We got an error after creating the event, delete it */ + ret = kprobe_event_delete("gen_kprobe_test"); + free: + kfree(buf); + + goto out; +} + +/* + * Test to make sure we can create a kretprobe event. + */ +static int __init test_gen_kretprobe_cmd(void) +{ + struct dynevent_cmd cmd; + char *buf; + int ret; + + /* Create a buffer to hold the generated command */ + buf = kzalloc(MAX_DYNEVENT_CMD_LEN, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + /* Before generating the command, initialize the cmd object */ + kprobe_event_cmd_init(&cmd, buf, MAX_DYNEVENT_CMD_LEN); + + /* + * Define the kretprobe event. + */ + ret = kretprobe_event_gen_cmd_start(&cmd, "gen_kretprobe_test", + "do_sys_open", + "$retval"); + if (ret) + goto free; + + /* + * This actually creates the event. + */ + ret = kretprobe_event_gen_cmd_end(&cmd); + if (ret) + goto free; + + /* + * Now get the gen_kretprobe_test event file. We need to + * prevent the instance and event from disappearing from + * underneath us, which trace_get_event_file() does (though in + * this case we're using the top-level instance which never + * goes away). + */ + gen_kretprobe_test = trace_get_event_file(NULL, "kprobes", + "gen_kretprobe_test"); + if (IS_ERR(gen_kretprobe_test)) { + ret = PTR_ERR(gen_kretprobe_test); + goto delete; + } + + /* Enable the event or you won't see anything */ + ret = trace_array_set_clr_event(gen_kretprobe_test->tr, + "kprobes", "gen_kretprobe_test", true); + if (ret) { + trace_put_event_file(gen_kretprobe_test); + goto delete; + } + out: + return ret; + delete: + /* We got an error after creating the event, delete it */ + ret = kprobe_event_delete("gen_kretprobe_test"); + free: + kfree(buf); + + goto out; +} + +static int __init kprobe_event_gen_test_init(void) +{ + int ret; + + ret = test_gen_kprobe_cmd(); + if (ret) + return ret; + + ret = test_gen_kretprobe_cmd(); + if (ret) { + WARN_ON(trace_array_set_clr_event(gen_kretprobe_test->tr, + "kprobes", + "gen_kretprobe_test", false)); + trace_put_event_file(gen_kretprobe_test); + WARN_ON(kprobe_event_delete("gen_kretprobe_test")); + } + + return ret; +} + +static void __exit kprobe_event_gen_test_exit(void) +{ + /* Disable the event or you can't remove it */ + WARN_ON(trace_array_set_clr_event(gen_kprobe_test->tr, + "kprobes", + "gen_kprobe_test", false)); + + /* Now give the file and instance back */ + trace_put_event_file(gen_kprobe_test); + + /* Now unregister and free the event */ + WARN_ON(kprobe_event_delete("gen_kprobe_test")); + + /* Disable the event or you can't remove it */ + WARN_ON(trace_array_set_clr_event(gen_kprobe_test->tr, + "kprobes", + "gen_kretprobe_test", false)); + + /* Now give the file and instance back */ + trace_put_event_file(gen_kretprobe_test); + + /* Now unregister and free the event */ + WARN_ON(kprobe_event_delete("gen_kretprobe_test")); +} + +module_init(kprobe_event_gen_test_init) +module_exit(kprobe_event_gen_test_exit) + +MODULE_AUTHOR("Tom Zanussi"); +MODULE_DESCRIPTION("kprobe event generation test"); +MODULE_LICENSE("GPL v2"); diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 3f655371eaf6..61f0e92ace99 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -300,8 +300,6 @@ u64 ring_buffer_event_time_stamp(struct ring_buffer_event *event) /* Missed count stored at end */ #define RB_MISSED_STORED (1 << 30) -#define RB_MISSED_FLAGS (RB_MISSED_EVENTS|RB_MISSED_STORED) - struct buffer_data_page { u64 time_stamp; /* page time stamp */ local_t commit; /* write committed index */ @@ -443,7 +441,7 @@ enum { struct ring_buffer_per_cpu { int cpu; atomic_t record_disabled; - struct ring_buffer *buffer; + struct trace_buffer *buffer; raw_spinlock_t reader_lock; /* serialize readers */ arch_spinlock_t lock; struct lock_class_key lock_key; @@ -482,7 +480,7 @@ struct ring_buffer_per_cpu { struct rb_irq_work irq_work; }; -struct ring_buffer { +struct trace_buffer { unsigned flags; int cpus; atomic_t record_disabled; @@ -518,7 +516,7 @@ struct ring_buffer_iter { * * Returns the number of pages used by a per_cpu buffer of the ring buffer. */ -size_t ring_buffer_nr_pages(struct ring_buffer *buffer, int cpu) +size_t ring_buffer_nr_pages(struct trace_buffer *buffer, int cpu) { return buffer->buffers[cpu]->nr_pages; } @@ -530,7 +528,7 @@ size_t ring_buffer_nr_pages(struct ring_buffer *buffer, int cpu) * * Returns the number of pages that have content in the ring buffer. */ -size_t ring_buffer_nr_dirty_pages(struct ring_buffer *buffer, int cpu) +size_t ring_buffer_nr_dirty_pages(struct trace_buffer *buffer, int cpu) { size_t read; size_t cnt; @@ -573,7 +571,7 @@ static void rb_wake_up_waiters(struct irq_work *work) * as data is added to any of the @buffer's cpu buffers. Otherwise * it will wait for data to be added to a specific cpu buffer. */ -int ring_buffer_wait(struct ring_buffer *buffer, int cpu, int full) +int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full) { struct ring_buffer_per_cpu *uninitialized_var(cpu_buffer); DEFINE_WAIT(wait); @@ -684,7 +682,7 @@ int ring_buffer_wait(struct ring_buffer *buffer, int cpu, int full) * Returns EPOLLIN | EPOLLRDNORM if data exists in the buffers, * zero otherwise. */ -__poll_t ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu, +__poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu, struct file *filp, poll_table *poll_table) { struct ring_buffer_per_cpu *cpu_buffer; @@ -742,13 +740,13 @@ __poll_t ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu, /* Up this if you want to test the TIME_EXTENTS and normalization */ #define DEBUG_SHIFT 0 -static inline u64 rb_time_stamp(struct ring_buffer *buffer) +static inline u64 rb_time_stamp(struct trace_buffer *buffer) { /* shift to debug/test normalization and TIME_EXTENTS */ return buffer->clock() << DEBUG_SHIFT; } -u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu) +u64 ring_buffer_time_stamp(struct trace_buffer *buffer, int cpu) { u64 time; @@ -760,7 +758,7 @@ u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu) } EXPORT_SYMBOL_GPL(ring_buffer_time_stamp); -void ring_buffer_normalize_time_stamp(struct ring_buffer *buffer, +void ring_buffer_normalize_time_stamp(struct trace_buffer *buffer, int cpu, u64 *ts) { /* Just stupid testing the normalize function and deltas */ @@ -1283,7 +1281,7 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer, } static struct ring_buffer_per_cpu * -rb_allocate_cpu_buffer(struct ring_buffer *buffer, long nr_pages, int cpu) +rb_allocate_cpu_buffer(struct trace_buffer *buffer, long nr_pages, int cpu) { struct ring_buffer_per_cpu *cpu_buffer; struct buffer_page *bpage; @@ -1368,16 +1366,17 @@ static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer) * __ring_buffer_alloc - allocate a new ring_buffer * @size: the size in bytes per cpu that is needed. * @flags: attributes to set for the ring buffer. + * @key: ring buffer reader_lock_key. * * Currently the only flag that is available is the RB_FL_OVERWRITE * flag. This flag means that the buffer will overwrite old data * when the buffer wraps. If this flag is not set, the buffer will * drop data when the tail hits the head. */ -struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags, +struct trace_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *key) { - struct ring_buffer *buffer; + struct trace_buffer *buffer; long nr_pages; int bsize; int cpu; @@ -1447,7 +1446,7 @@ EXPORT_SYMBOL_GPL(__ring_buffer_alloc); * @buffer: the buffer to free. */ void -ring_buffer_free(struct ring_buffer *buffer) +ring_buffer_free(struct trace_buffer *buffer) { int cpu; @@ -1463,18 +1462,18 @@ ring_buffer_free(struct ring_buffer *buffer) } EXPORT_SYMBOL_GPL(ring_buffer_free); -void ring_buffer_set_clock(struct ring_buffer *buffer, +void ring_buffer_set_clock(struct trace_buffer *buffer, u64 (*clock)(void)) { buffer->clock = clock; } -void ring_buffer_set_time_stamp_abs(struct ring_buffer *buffer, bool abs) +void ring_buffer_set_time_stamp_abs(struct trace_buffer *buffer, bool abs) { buffer->time_stamp_abs = abs; } -bool ring_buffer_time_stamp_abs(struct ring_buffer *buffer) +bool ring_buffer_time_stamp_abs(struct trace_buffer *buffer) { return buffer->time_stamp_abs; } @@ -1712,7 +1711,7 @@ static void update_pages_handler(struct work_struct *work) * * Returns 0 on success and < 0 on failure. */ -int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size, +int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size, int cpu_id) { struct ring_buffer_per_cpu *cpu_buffer; @@ -1891,7 +1890,7 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size, } EXPORT_SYMBOL_GPL(ring_buffer_resize); -void ring_buffer_change_overwrite(struct ring_buffer *buffer, int val) +void ring_buffer_change_overwrite(struct trace_buffer *buffer, int val) { mutex_lock(&buffer->mutex); if (val) @@ -2206,7 +2205,7 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, { struct buffer_page *tail_page = info->tail_page; struct buffer_page *commit_page = cpu_buffer->commit_page; - struct ring_buffer *buffer = cpu_buffer->buffer; + struct trace_buffer *buffer = cpu_buffer->buffer; struct buffer_page *next_page; int ret; @@ -2330,11 +2329,11 @@ static inline bool rb_event_is_commit(struct ring_buffer_per_cpu *cpu_buffer, /** * rb_update_event - update event type and data + * @cpu_buffer: The per cpu buffer of the @event * @event: the event to update - * @type: the type of event - * @length: the size of the event field in the ring buffer + * @info: The info to update the @event with (contains length and delta) * - * Update the type and data fields of the event. The length + * Update the type and data fields of the @event. The length * is the actual size that is written to the ring buffer, * and with this, we can determine what to place into the * data field. @@ -2609,7 +2608,7 @@ static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer, } static __always_inline void -rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer) +rb_wakeups(struct trace_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer) { size_t nr_pages; size_t dirty; @@ -2733,7 +2732,7 @@ trace_recursive_unlock(struct ring_buffer_per_cpu *cpu_buffer) * Call this function before calling another ring_buffer_lock_reserve() and * call ring_buffer_nest_end() after the nested ring_buffer_unlock_commit(). */ -void ring_buffer_nest_start(struct ring_buffer *buffer) +void ring_buffer_nest_start(struct trace_buffer *buffer) { struct ring_buffer_per_cpu *cpu_buffer; int cpu; @@ -2753,7 +2752,7 @@ void ring_buffer_nest_start(struct ring_buffer *buffer) * Must be called after ring_buffer_nest_start() and after the * ring_buffer_unlock_commit(). */ -void ring_buffer_nest_end(struct ring_buffer *buffer) +void ring_buffer_nest_end(struct trace_buffer *buffer) { struct ring_buffer_per_cpu *cpu_buffer; int cpu; @@ -2775,7 +2774,7 @@ void ring_buffer_nest_end(struct ring_buffer *buffer) * * Must be paired with ring_buffer_lock_reserve. */ -int ring_buffer_unlock_commit(struct ring_buffer *buffer, +int ring_buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event) { struct ring_buffer_per_cpu *cpu_buffer; @@ -2868,7 +2867,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, } static __always_inline struct ring_buffer_event * -rb_reserve_next_event(struct ring_buffer *buffer, +rb_reserve_next_event(struct trace_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer, unsigned long length) { @@ -2961,7 +2960,7 @@ rb_reserve_next_event(struct ring_buffer *buffer, * If NULL is returned, then nothing has been allocated or locked. */ struct ring_buffer_event * -ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length) +ring_buffer_lock_reserve(struct trace_buffer *buffer, unsigned long length) { struct ring_buffer_per_cpu *cpu_buffer; struct ring_buffer_event *event; @@ -3062,7 +3061,7 @@ rb_decrement_entry(struct ring_buffer_per_cpu *cpu_buffer, * If this function is called, do not call ring_buffer_unlock_commit on * the event. */ -void ring_buffer_discard_commit(struct ring_buffer *buffer, +void ring_buffer_discard_commit(struct trace_buffer *buffer, struct ring_buffer_event *event) { struct ring_buffer_per_cpu *cpu_buffer; @@ -3113,7 +3112,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_discard_commit); * Note, like ring_buffer_lock_reserve, the length is the length of the data * and not the length of the event which would hold the header. */ -int ring_buffer_write(struct ring_buffer *buffer, +int ring_buffer_write(struct trace_buffer *buffer, unsigned long length, void *data) { @@ -3193,7 +3192,7 @@ static bool rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer) * * The caller should call synchronize_rcu() after this. */ -void ring_buffer_record_disable(struct ring_buffer *buffer) +void ring_buffer_record_disable(struct trace_buffer *buffer) { atomic_inc(&buffer->record_disabled); } @@ -3206,7 +3205,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_record_disable); * Note, multiple disables will need the same number of enables * to truly enable the writing (much like preempt_disable). */ -void ring_buffer_record_enable(struct ring_buffer *buffer) +void ring_buffer_record_enable(struct trace_buffer *buffer) { atomic_dec(&buffer->record_disabled); } @@ -3223,7 +3222,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_record_enable); * it works like an on/off switch, where as the disable() version * must be paired with a enable(). */ -void ring_buffer_record_off(struct ring_buffer *buffer) +void ring_buffer_record_off(struct trace_buffer *buffer) { unsigned int rd; unsigned int new_rd; @@ -3246,7 +3245,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_record_off); * it works like an on/off switch, where as the enable() version * must be paired with a disable(). */ -void ring_buffer_record_on(struct ring_buffer *buffer) +void ring_buffer_record_on(struct trace_buffer *buffer) { unsigned int rd; unsigned int new_rd; @@ -3264,7 +3263,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_record_on); * * Returns true if the ring buffer is in a state that it accepts writes. */ -bool ring_buffer_record_is_on(struct ring_buffer *buffer) +bool ring_buffer_record_is_on(struct trace_buffer *buffer) { return !atomic_read(&buffer->record_disabled); } @@ -3280,7 +3279,7 @@ bool ring_buffer_record_is_on(struct ring_buffer *buffer) * ring_buffer_record_disable(), as that is a temporary disabling of * the ring buffer. */ -bool ring_buffer_record_is_set_on(struct ring_buffer *buffer) +bool ring_buffer_record_is_set_on(struct trace_buffer *buffer) { return !(atomic_read(&buffer->record_disabled) & RB_BUFFER_OFF); } @@ -3295,7 +3294,7 @@ bool ring_buffer_record_is_set_on(struct ring_buffer *buffer) * * The caller should call synchronize_rcu() after this. */ -void ring_buffer_record_disable_cpu(struct ring_buffer *buffer, int cpu) +void ring_buffer_record_disable_cpu(struct trace_buffer *buffer, int cpu) { struct ring_buffer_per_cpu *cpu_buffer; @@ -3315,7 +3314,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_record_disable_cpu); * Note, multiple disables will need the same number of enables * to truly enable the writing (much like preempt_disable). */ -void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu) +void ring_buffer_record_enable_cpu(struct trace_buffer *buffer, int cpu) { struct ring_buffer_per_cpu *cpu_buffer; @@ -3345,7 +3344,7 @@ rb_num_of_entries(struct ring_buffer_per_cpu *cpu_buffer) * @buffer: The ring buffer * @cpu: The per CPU buffer to read from. */ -u64 ring_buffer_oldest_event_ts(struct ring_buffer *buffer, int cpu) +u64 ring_buffer_oldest_event_ts(struct trace_buffer *buffer, int cpu) { unsigned long flags; struct ring_buffer_per_cpu *cpu_buffer; @@ -3378,7 +3377,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_oldest_event_ts); * @buffer: The ring buffer * @cpu: The per CPU buffer to read from. */ -unsigned long ring_buffer_bytes_cpu(struct ring_buffer *buffer, int cpu) +unsigned long ring_buffer_bytes_cpu(struct trace_buffer *buffer, int cpu) { struct ring_buffer_per_cpu *cpu_buffer; unsigned long ret; @@ -3398,7 +3397,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_bytes_cpu); * @buffer: The ring buffer * @cpu: The per CPU buffer to get the entries from. */ -unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu) +unsigned long ring_buffer_entries_cpu(struct trace_buffer *buffer, int cpu) { struct ring_buffer_per_cpu *cpu_buffer; @@ -3417,7 +3416,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_entries_cpu); * @buffer: The ring buffer * @cpu: The per CPU buffer to get the number of overruns from */ -unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu) +unsigned long ring_buffer_overrun_cpu(struct trace_buffer *buffer, int cpu) { struct ring_buffer_per_cpu *cpu_buffer; unsigned long ret; @@ -3440,7 +3439,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_overrun_cpu); * @cpu: The per CPU buffer to get the number of overruns from */ unsigned long -ring_buffer_commit_overrun_cpu(struct ring_buffer *buffer, int cpu) +ring_buffer_commit_overrun_cpu(struct trace_buffer *buffer, int cpu) { struct ring_buffer_per_cpu *cpu_buffer; unsigned long ret; @@ -3462,7 +3461,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_commit_overrun_cpu); * @cpu: The per CPU buffer to get the number of overruns from */ unsigned long -ring_buffer_dropped_events_cpu(struct ring_buffer *buffer, int cpu) +ring_buffer_dropped_events_cpu(struct trace_buffer *buffer, int cpu) { struct ring_buffer_per_cpu *cpu_buffer; unsigned long ret; @@ -3483,7 +3482,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_dropped_events_cpu); * @cpu: The per CPU buffer to get the number of events read */ unsigned long -ring_buffer_read_events_cpu(struct ring_buffer *buffer, int cpu) +ring_buffer_read_events_cpu(struct trace_buffer *buffer, int cpu) { struct ring_buffer_per_cpu *cpu_buffer; @@ -3502,7 +3501,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_read_events_cpu); * Returns the total number of entries in the ring buffer * (all CPU entries) */ -unsigned long ring_buffer_entries(struct ring_buffer *buffer) +unsigned long ring_buffer_entries(struct trace_buffer *buffer) { struct ring_buffer_per_cpu *cpu_buffer; unsigned long entries = 0; @@ -3525,7 +3524,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_entries); * Returns the total number of overruns in the ring buffer * (all CPU entries) */ -unsigned long ring_buffer_overruns(struct ring_buffer *buffer) +unsigned long ring_buffer_overruns(struct trace_buffer *buffer) { struct ring_buffer_per_cpu *cpu_buffer; unsigned long overruns = 0; @@ -3949,7 +3948,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_peek); static struct ring_buffer_event * rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts) { - struct ring_buffer *buffer; + struct trace_buffer *buffer; struct ring_buffer_per_cpu *cpu_buffer; struct ring_buffer_event *event; int nr_loops = 0; @@ -4077,7 +4076,7 @@ rb_reader_unlock(struct ring_buffer_per_cpu *cpu_buffer, bool locked) * not consume the data. */ struct ring_buffer_event * -ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts, +ring_buffer_peek(struct trace_buffer *buffer, int cpu, u64 *ts, unsigned long *lost_events) { struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; @@ -4141,7 +4140,7 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts) * and eventually empty the ring buffer if the producer is slower. */ struct ring_buffer_event * -ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts, +ring_buffer_consume(struct trace_buffer *buffer, int cpu, u64 *ts, unsigned long *lost_events) { struct ring_buffer_per_cpu *cpu_buffer; @@ -4201,7 +4200,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_consume); * This overall must be paired with ring_buffer_read_finish. */ struct ring_buffer_iter * -ring_buffer_read_prepare(struct ring_buffer *buffer, int cpu, gfp_t flags) +ring_buffer_read_prepare(struct trace_buffer *buffer, int cpu, gfp_t flags) { struct ring_buffer_per_cpu *cpu_buffer; struct ring_buffer_iter *iter; @@ -4331,8 +4330,9 @@ EXPORT_SYMBOL_GPL(ring_buffer_read); /** * ring_buffer_size - return the size of the ring buffer (in bytes) * @buffer: The ring buffer. + * @cpu: The CPU to get ring buffer size from. */ -unsigned long ring_buffer_size(struct ring_buffer *buffer, int cpu) +unsigned long ring_buffer_size(struct trace_buffer *buffer, int cpu) { /* * Earlier, this method returned @@ -4398,7 +4398,7 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer) * @buffer: The ring buffer to reset a per cpu buffer of * @cpu: The CPU buffer to be reset */ -void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu) +void ring_buffer_reset_cpu(struct trace_buffer *buffer, int cpu) { struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; unsigned long flags; @@ -4435,7 +4435,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu); * ring_buffer_reset - reset a ring buffer * @buffer: The ring buffer to reset all cpu buffers */ -void ring_buffer_reset(struct ring_buffer *buffer) +void ring_buffer_reset(struct trace_buffer *buffer) { int cpu; @@ -4448,7 +4448,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_reset); * rind_buffer_empty - is the ring buffer empty? * @buffer: The ring buffer to test */ -bool ring_buffer_empty(struct ring_buffer *buffer) +bool ring_buffer_empty(struct trace_buffer *buffer) { struct ring_buffer_per_cpu *cpu_buffer; unsigned long flags; @@ -4478,7 +4478,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_empty); * @buffer: The ring buffer * @cpu: The CPU buffer to test */ -bool ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu) +bool ring_buffer_empty_cpu(struct trace_buffer *buffer, int cpu) { struct ring_buffer_per_cpu *cpu_buffer; unsigned long flags; @@ -4504,14 +4504,15 @@ EXPORT_SYMBOL_GPL(ring_buffer_empty_cpu); * ring_buffer_swap_cpu - swap a CPU buffer between two ring buffers * @buffer_a: One buffer to swap with * @buffer_b: The other buffer to swap with + * @cpu: the CPU of the buffers to swap * * This function is useful for tracers that want to take a "snapshot" * of a CPU buffer and has another back up buffer lying around. * it is expected that the tracer handles the cpu buffer not being * used at the moment. */ -int ring_buffer_swap_cpu(struct ring_buffer *buffer_a, - struct ring_buffer *buffer_b, int cpu) +int ring_buffer_swap_cpu(struct trace_buffer *buffer_a, + struct trace_buffer *buffer_b, int cpu) { struct ring_buffer_per_cpu *cpu_buffer_a; struct ring_buffer_per_cpu *cpu_buffer_b; @@ -4590,7 +4591,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu); * Returns: * The page allocated, or ERR_PTR */ -void *ring_buffer_alloc_read_page(struct ring_buffer *buffer, int cpu) +void *ring_buffer_alloc_read_page(struct trace_buffer *buffer, int cpu) { struct ring_buffer_per_cpu *cpu_buffer; struct buffer_data_page *bpage = NULL; @@ -4637,7 +4638,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_alloc_read_page); * * Free a page allocated from ring_buffer_alloc_read_page. */ -void ring_buffer_free_read_page(struct ring_buffer *buffer, int cpu, void *data) +void ring_buffer_free_read_page(struct trace_buffer *buffer, int cpu, void *data) { struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; struct buffer_data_page *bpage = data; @@ -4697,7 +4698,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_free_read_page); * >=0 if data has been transferred, returns the offset of consumed data. * <0 if no data has been transferred. */ -int ring_buffer_read_page(struct ring_buffer *buffer, +int ring_buffer_read_page(struct trace_buffer *buffer, void **data_page, size_t len, int cpu, int full) { struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; @@ -4868,12 +4869,12 @@ EXPORT_SYMBOL_GPL(ring_buffer_read_page); */ int trace_rb_cpu_prepare(unsigned int cpu, struct hlist_node *node) { - struct ring_buffer *buffer; + struct trace_buffer *buffer; long nr_pages_same; int cpu_i; unsigned long nr_pages; - buffer = container_of(node, struct ring_buffer, node); + buffer = container_of(node, struct trace_buffer, node); if (cpumask_test_cpu(cpu, buffer->cpumask)) return 0; @@ -4923,7 +4924,7 @@ int trace_rb_cpu_prepare(unsigned int cpu, struct hlist_node *node) static struct task_struct *rb_threads[NR_CPUS] __initdata; struct rb_test_data { - struct ring_buffer *buffer; + struct trace_buffer *buffer; unsigned long events; unsigned long bytes_written; unsigned long bytes_alloc; @@ -5065,7 +5066,7 @@ static __init int rb_hammer_test(void *arg) static __init int test_ringbuffer(void) { struct task_struct *rb_hammer; - struct ring_buffer *buffer; + struct trace_buffer *buffer; int cpu; int ret = 0; diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c index 32149e46551c..8df0aa810950 100644 --- a/kernel/trace/ring_buffer_benchmark.c +++ b/kernel/trace/ring_buffer_benchmark.c @@ -29,7 +29,7 @@ static int reader_finish; static DECLARE_COMPLETION(read_start); static DECLARE_COMPLETION(read_done); -static struct ring_buffer *buffer; +static struct trace_buffer *buffer; static struct task_struct *producer; static struct task_struct *consumer; static unsigned long read; diff --git a/kernel/trace/synth_event_gen_test.c b/kernel/trace/synth_event_gen_test.c new file mode 100644 index 000000000000..4aefe003cb7c --- /dev/null +++ b/kernel/trace/synth_event_gen_test.c @@ -0,0 +1,523 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Test module for in-kernel sythetic event creation and generation. + * + * Copyright (C) 2019 Tom Zanussi <zanussi@kernel.org> + */ + +#include <linux/module.h> +#include <linux/trace_events.h> + +/* + * This module is a simple test of basic functionality for in-kernel + * synthetic event creation and generation, the first and second tests + * using synth_event_gen_cmd_start() and synth_event_add_field(), the + * third uses synth_event_create() to do it all at once with a static + * field array. + * + * Following that are a few examples using the created events to test + * various ways of tracing a synthetic event. + * + * To test, select CONFIG_SYNTH_EVENT_GEN_TEST and build the module. + * Then: + * + * # insmod kernel/trace/synth_event_gen_test.ko + * # cat /sys/kernel/debug/tracing/trace + * + * You should see several events in the trace buffer - + * "create_synth_test", "empty_synth_test", and several instances of + * "gen_synth_test". + * + * To remove the events, remove the module: + * + * # rmmod synth_event_gen_test + * + */ + +static struct trace_event_file *create_synth_test; +static struct trace_event_file *empty_synth_test; +static struct trace_event_file *gen_synth_test; + +/* + * Test to make sure we can create a synthetic event, then add more + * fields. + */ +static int __init test_gen_synth_cmd(void) +{ + struct dynevent_cmd cmd; + u64 vals[7]; + char *buf; + int ret; + + /* Create a buffer to hold the generated command */ + buf = kzalloc(MAX_DYNEVENT_CMD_LEN, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + /* Before generating the command, initialize the cmd object */ + synth_event_cmd_init(&cmd, buf, MAX_DYNEVENT_CMD_LEN); + + /* + * Create the empty gen_synth_test synthetic event with the + * first 4 fields. + */ + ret = synth_event_gen_cmd_start(&cmd, "gen_synth_test", THIS_MODULE, + "pid_t", "next_pid_field", + "char[16]", "next_comm_field", + "u64", "ts_ns", + "u64", "ts_ms"); + if (ret) + goto free; + + /* Use synth_event_add_field to add the rest of the fields */ + + ret = synth_event_add_field(&cmd, "unsigned int", "cpu"); + if (ret) + goto free; + + ret = synth_event_add_field(&cmd, "char[64]", "my_string_field"); + if (ret) + goto free; + + ret = synth_event_add_field(&cmd, "int", "my_int_field"); + if (ret) + goto free; + + ret = synth_event_gen_cmd_end(&cmd); + if (ret) + goto free; + + /* + * Now get the gen_synth_test event file. We need to prevent + * the instance and event from disappearing from underneath + * us, which trace_get_event_file() does (though in this case + * we're using the top-level instance which never goes away). + */ + gen_synth_test = trace_get_event_file(NULL, "synthetic", + "gen_synth_test"); + if (IS_ERR(gen_synth_test)) { + ret = PTR_ERR(gen_synth_test); + goto delete; + } + + /* Enable the event or you won't see anything */ + ret = trace_array_set_clr_event(gen_synth_test->tr, + "synthetic", "gen_synth_test", true); + if (ret) { + trace_put_event_file(gen_synth_test); + goto delete; + } + + /* Create some bogus values just for testing */ + + vals[0] = 777; /* next_pid_field */ + vals[1] = (u64)"hula hoops"; /* next_comm_field */ + vals[2] = 1000000; /* ts_ns */ + vals[3] = 1000; /* ts_ms */ + vals[4] = smp_processor_id(); /* cpu */ + vals[5] = (u64)"thneed"; /* my_string_field */ + vals[6] = 598; /* my_int_field */ + + /* Now generate a gen_synth_test event */ + ret = synth_event_trace_array(gen_synth_test, vals, ARRAY_SIZE(vals)); + out: + return ret; + delete: + /* We got an error after creating the event, delete it */ + synth_event_delete("gen_synth_test"); + free: + kfree(buf); + + goto out; +} + +/* + * Test to make sure we can create an initially empty synthetic event, + * then add all the fields. + */ +static int __init test_empty_synth_event(void) +{ + struct dynevent_cmd cmd; + u64 vals[7]; + char *buf; + int ret; + + /* Create a buffer to hold the generated command */ + buf = kzalloc(MAX_DYNEVENT_CMD_LEN, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + /* Before generating the command, initialize the cmd object */ + synth_event_cmd_init(&cmd, buf, MAX_DYNEVENT_CMD_LEN); + + /* + * Create the empty_synth_test synthetic event with no fields. + */ + ret = synth_event_gen_cmd_start(&cmd, "empty_synth_test", THIS_MODULE); + if (ret) + goto free; + + /* Use synth_event_add_field to add all of the fields */ + + ret = synth_event_add_field(&cmd, "pid_t", "next_pid_field"); + if (ret) + goto free; + + ret = synth_event_add_field(&cmd, "char[16]", "next_comm_field"); + if (ret) + goto free; + + ret = synth_event_add_field(&cmd, "u64", "ts_ns"); + if (ret) + goto free; + + ret = synth_event_add_field(&cmd, "u64", "ts_ms"); + if (ret) + goto free; + + ret = synth_event_add_field(&cmd, "unsigned int", "cpu"); + if (ret) + goto free; + + ret = synth_event_add_field(&cmd, "char[64]", "my_string_field"); + if (ret) + goto free; + + ret = synth_event_add_field(&cmd, "int", "my_int_field"); + if (ret) + goto free; + + /* All fields have been added, close and register the synth event */ + + ret = synth_event_gen_cmd_end(&cmd); + if (ret) + goto free; + + /* + * Now get the empty_synth_test event file. We need to + * prevent the instance and event from disappearing from + * underneath us, which trace_get_event_file() does (though in + * this case we're using the top-level instance which never + * goes away). + */ + empty_synth_test = trace_get_event_file(NULL, "synthetic", + "empty_synth_test"); + if (IS_ERR(empty_synth_test)) { + ret = PTR_ERR(empty_synth_test); + goto delete; + } + + /* Enable the event or you won't see anything */ + ret = trace_array_set_clr_event(empty_synth_test->tr, + "synthetic", "empty_synth_test", true); + if (ret) { + trace_put_event_file(empty_synth_test); + goto delete; + } + + /* Create some bogus values just for testing */ + + vals[0] = 777; /* next_pid_field */ + vals[1] = (u64)"tiddlywinks"; /* next_comm_field */ + vals[2] = 1000000; /* ts_ns */ + vals[3] = 1000; /* ts_ms */ + vals[4] = smp_processor_id(); /* cpu */ + vals[5] = (u64)"thneed_2.0"; /* my_string_field */ + vals[6] = 399; /* my_int_field */ + + /* Now trace an empty_synth_test event */ + ret = synth_event_trace_array(empty_synth_test, vals, ARRAY_SIZE(vals)); + out: + return ret; + delete: + /* We got an error after creating the event, delete it */ + synth_event_delete("empty_synth_test"); + free: + kfree(buf); + + goto out; +} + +static struct synth_field_desc create_synth_test_fields[] = { + { .type = "pid_t", .name = "next_pid_field" }, + { .type = "char[16]", .name = "next_comm_field" }, + { .type = "u64", .name = "ts_ns" }, + { .type = "u64", .name = "ts_ms" }, + { .type = "unsigned int", .name = "cpu" }, + { .type = "char[64]", .name = "my_string_field" }, + { .type = "int", .name = "my_int_field" }, +}; + +/* + * Test synthetic event creation all at once from array of field + * descriptors. + */ +static int __init test_create_synth_event(void) +{ + u64 vals[7]; + int ret; + + /* Create the create_synth_test event with the fields above */ + ret = synth_event_create("create_synth_test", + create_synth_test_fields, + ARRAY_SIZE(create_synth_test_fields), + THIS_MODULE); + if (ret) + goto out; + + /* + * Now get the create_synth_test event file. We need to + * prevent the instance and event from disappearing from + * underneath us, which trace_get_event_file() does (though in + * this case we're using the top-level instance which never + * goes away). + */ + create_synth_test = trace_get_event_file(NULL, "synthetic", + "create_synth_test"); + if (IS_ERR(create_synth_test)) { + ret = PTR_ERR(create_synth_test); + goto delete; + } + + /* Enable the event or you won't see anything */ + ret = trace_array_set_clr_event(create_synth_test->tr, + "synthetic", "create_synth_test", true); + if (ret) { + trace_put_event_file(create_synth_test); + goto delete; + } + + /* Create some bogus values just for testing */ + + vals[0] = 777; /* next_pid_field */ + vals[1] = (u64)"tiddlywinks"; /* next_comm_field */ + vals[2] = 1000000; /* ts_ns */ + vals[3] = 1000; /* ts_ms */ + vals[4] = smp_processor_id(); /* cpu */ + vals[5] = (u64)"thneed"; /* my_string_field */ + vals[6] = 398; /* my_int_field */ + + /* Now generate a create_synth_test event */ + ret = synth_event_trace_array(create_synth_test, vals, ARRAY_SIZE(vals)); + out: + return ret; + delete: + /* We got an error after creating the event, delete it */ + ret = synth_event_delete("create_synth_test"); + + goto out; +} + +/* + * Test tracing a synthetic event by reserving trace buffer space, + * then filling in fields one after another. + */ +static int __init test_add_next_synth_val(void) +{ + struct synth_event_trace_state trace_state; + int ret; + + /* Start by reserving space in the trace buffer */ + ret = synth_event_trace_start(gen_synth_test, &trace_state); + if (ret) + return ret; + + /* Write some bogus values into the trace buffer, one after another */ + + /* next_pid_field */ + ret = synth_event_add_next_val(777, &trace_state); + if (ret) + goto out; + + /* next_comm_field */ + ret = synth_event_add_next_val((u64)"slinky", &trace_state); + if (ret) + goto out; + + /* ts_ns */ + ret = synth_event_add_next_val(1000000, &trace_state); + if (ret) + goto out; + + /* ts_ms */ + ret = synth_event_add_next_val(1000, &trace_state); + if (ret) + goto out; + + /* cpu */ + ret = synth_event_add_next_val(smp_processor_id(), &trace_state); + if (ret) + goto out; + + /* my_string_field */ + ret = synth_event_add_next_val((u64)"thneed_2.01", &trace_state); + if (ret) + goto out; + + /* my_int_field */ + ret = synth_event_add_next_val(395, &trace_state); + out: + /* Finally, commit the event */ + ret = synth_event_trace_end(&trace_state); + + return ret; +} + +/* + * Test tracing a synthetic event by reserving trace buffer space, + * then filling in fields using field names, which can be done in any + * order. + */ +static int __init test_add_synth_val(void) +{ + struct synth_event_trace_state trace_state; + int ret; + + /* Start by reserving space in the trace buffer */ + ret = synth_event_trace_start(gen_synth_test, &trace_state); + if (ret) + return ret; + + /* Write some bogus values into the trace buffer, using field names */ + + ret = synth_event_add_val("ts_ns", 1000000, &trace_state); + if (ret) + goto out; + + ret = synth_event_add_val("ts_ms", 1000, &trace_state); + if (ret) + goto out; + + ret = synth_event_add_val("cpu", smp_processor_id(), &trace_state); + if (ret) + goto out; + + ret = synth_event_add_val("next_pid_field", 777, &trace_state); + if (ret) + goto out; + + ret = synth_event_add_val("next_comm_field", (u64)"silly putty", + &trace_state); + if (ret) + goto out; + + ret = synth_event_add_val("my_string_field", (u64)"thneed_9", + &trace_state); + if (ret) + goto out; + + ret = synth_event_add_val("my_int_field", 3999, &trace_state); + out: + /* Finally, commit the event */ + ret = synth_event_trace_end(&trace_state); + + return ret; +} + +/* + * Test tracing a synthetic event all at once from array of values. + */ +static int __init test_trace_synth_event(void) +{ + int ret; + + /* Trace some bogus values just for testing */ + ret = synth_event_trace(create_synth_test, 7, /* number of values */ + 444, /* next_pid_field */ + (u64)"clackers", /* next_comm_field */ + 1000000, /* ts_ns */ + 1000, /* ts_ms */ + smp_processor_id(), /* cpu */ + (u64)"Thneed", /* my_string_field */ + 999); /* my_int_field */ + return ret; +} + +static int __init synth_event_gen_test_init(void) +{ + int ret; + + ret = test_gen_synth_cmd(); + if (ret) + return ret; + + ret = test_empty_synth_event(); + if (ret) { + WARN_ON(trace_array_set_clr_event(gen_synth_test->tr, + "synthetic", + "gen_synth_test", false)); + trace_put_event_file(gen_synth_test); + WARN_ON(synth_event_delete("gen_synth_test")); + goto out; + } + + ret = test_create_synth_event(); + if (ret) { + WARN_ON(trace_array_set_clr_event(gen_synth_test->tr, + "synthetic", + "gen_synth_test", false)); + trace_put_event_file(gen_synth_test); + WARN_ON(synth_event_delete("gen_synth_test")); + + WARN_ON(trace_array_set_clr_event(empty_synth_test->tr, + "synthetic", + "empty_synth_test", false)); + trace_put_event_file(empty_synth_test); + WARN_ON(synth_event_delete("empty_synth_test")); + goto out; + } + + ret = test_add_next_synth_val(); + WARN_ON(ret); + + ret = test_add_synth_val(); + WARN_ON(ret); + + ret = test_trace_synth_event(); + WARN_ON(ret); + out: + return ret; +} + +static void __exit synth_event_gen_test_exit(void) +{ + /* Disable the event or you can't remove it */ + WARN_ON(trace_array_set_clr_event(gen_synth_test->tr, + "synthetic", + "gen_synth_test", false)); + + /* Now give the file and instance back */ + trace_put_event_file(gen_synth_test); + + /* Now unregister and free the synthetic event */ + WARN_ON(synth_event_delete("gen_synth_test")); + + /* Disable the event or you can't remove it */ + WARN_ON(trace_array_set_clr_event(empty_synth_test->tr, + "synthetic", + "empty_synth_test", false)); + + /* Now give the file and instance back */ + trace_put_event_file(empty_synth_test); + + /* Now unregister and free the synthetic event */ + WARN_ON(synth_event_delete("empty_synth_test")); + + /* Disable the event or you can't remove it */ + WARN_ON(trace_array_set_clr_event(create_synth_test->tr, + "synthetic", + "create_synth_test", false)); + + /* Now give the file and instance back */ + trace_put_event_file(create_synth_test); + + /* Now unregister and free the synthetic event */ + WARN_ON(synth_event_delete("create_synth_test")); +} + +module_init(synth_event_gen_test_init) +module_exit(synth_event_gen_test_exit) + +MODULE_AUTHOR("Tom Zanussi"); +MODULE_DESCRIPTION("synthetic event generation test"); +MODULE_LICENSE("GPL v2"); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index ddb7e7f5fe8d..c797a15a1fc7 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -162,8 +162,8 @@ union trace_eval_map_item { static union trace_eval_map_item *trace_eval_maps; #endif /* CONFIG_TRACE_EVAL_MAP_FILE */ -static int tracing_set_tracer(struct trace_array *tr, const char *buf); -static void ftrace_trace_userstack(struct ring_buffer *buffer, +int tracing_set_tracer(struct trace_array *tr, const char *buf); +static void ftrace_trace_userstack(struct trace_buffer *buffer, unsigned long flags, int pc); #define MAX_TRACER_SIZE 100 @@ -338,7 +338,7 @@ int tracing_check_open_get_tr(struct trace_array *tr) } int call_filter_check_discard(struct trace_event_call *call, void *rec, - struct ring_buffer *buffer, + struct trace_buffer *buffer, struct ring_buffer_event *event) { if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) && @@ -603,7 +603,7 @@ int trace_pid_write(struct trace_pid_list *filtered_pids, return read; } -static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu) +static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu) { u64 ts; @@ -619,7 +619,7 @@ static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu) u64 ftrace_now(int cpu) { - return buffer_ftrace_now(&global_trace.trace_buffer, cpu); + return buffer_ftrace_now(&global_trace.array_buffer, cpu); } /** @@ -747,22 +747,22 @@ static inline void trace_access_lock_init(void) #endif #ifdef CONFIG_STACKTRACE -static void __ftrace_trace_stack(struct ring_buffer *buffer, +static void __ftrace_trace_stack(struct trace_buffer *buffer, unsigned long flags, int skip, int pc, struct pt_regs *regs); static inline void ftrace_trace_stack(struct trace_array *tr, - struct ring_buffer *buffer, + struct trace_buffer *buffer, unsigned long flags, int skip, int pc, struct pt_regs *regs); #else -static inline void __ftrace_trace_stack(struct ring_buffer *buffer, +static inline void __ftrace_trace_stack(struct trace_buffer *buffer, unsigned long flags, int skip, int pc, struct pt_regs *regs) { } static inline void ftrace_trace_stack(struct trace_array *tr, - struct ring_buffer *buffer, + struct trace_buffer *buffer, unsigned long flags, int skip, int pc, struct pt_regs *regs) { @@ -780,7 +780,7 @@ trace_event_setup(struct ring_buffer_event *event, } static __always_inline struct ring_buffer_event * -__trace_buffer_lock_reserve(struct ring_buffer *buffer, +__trace_buffer_lock_reserve(struct trace_buffer *buffer, int type, unsigned long len, unsigned long flags, int pc) @@ -796,8 +796,8 @@ __trace_buffer_lock_reserve(struct ring_buffer *buffer, void tracer_tracing_on(struct trace_array *tr) { - if (tr->trace_buffer.buffer) - ring_buffer_record_on(tr->trace_buffer.buffer); + if (tr->array_buffer.buffer) + ring_buffer_record_on(tr->array_buffer.buffer); /* * This flag is looked at when buffers haven't been allocated * yet, or by some tracers (like irqsoff), that just want to @@ -825,7 +825,7 @@ EXPORT_SYMBOL_GPL(tracing_on); static __always_inline void -__buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event) +__buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event) { __this_cpu_write(trace_taskinfo_save, true); @@ -848,7 +848,7 @@ __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *eve int __trace_puts(unsigned long ip, const char *str, int size) { struct ring_buffer_event *event; - struct ring_buffer *buffer; + struct trace_buffer *buffer; struct print_entry *entry; unsigned long irq_flags; int alloc; @@ -865,11 +865,14 @@ int __trace_puts(unsigned long ip, const char *str, int size) alloc = sizeof(*entry) + size + 2; /* possible \n added */ local_save_flags(irq_flags); - buffer = global_trace.trace_buffer.buffer; + buffer = global_trace.array_buffer.buffer; + ring_buffer_nest_start(buffer); event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, irq_flags, pc); - if (!event) - return 0; + if (!event) { + size = 0; + goto out; + } entry = ring_buffer_event_data(event); entry->ip = ip; @@ -885,7 +888,8 @@ int __trace_puts(unsigned long ip, const char *str, int size) __buffer_unlock_commit(buffer, event); ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL); - + out: + ring_buffer_nest_end(buffer); return size; } EXPORT_SYMBOL_GPL(__trace_puts); @@ -898,10 +902,11 @@ EXPORT_SYMBOL_GPL(__trace_puts); int __trace_bputs(unsigned long ip, const char *str) { struct ring_buffer_event *event; - struct ring_buffer *buffer; + struct trace_buffer *buffer; struct bputs_entry *entry; unsigned long irq_flags; int size = sizeof(struct bputs_entry); + int ret = 0; int pc; if (!(global_trace.trace_flags & TRACE_ITER_PRINTK)) @@ -913,11 +918,13 @@ int __trace_bputs(unsigned long ip, const char *str) return 0; local_save_flags(irq_flags); - buffer = global_trace.trace_buffer.buffer; + buffer = global_trace.array_buffer.buffer; + + ring_buffer_nest_start(buffer); event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size, irq_flags, pc); if (!event) - return 0; + goto out; entry = ring_buffer_event_data(event); entry->ip = ip; @@ -926,7 +933,10 @@ int __trace_bputs(unsigned long ip, const char *str) __buffer_unlock_commit(buffer, event); ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL); - return 1; + ret = 1; + out: + ring_buffer_nest_end(buffer); + return ret; } EXPORT_SYMBOL_GPL(__trace_bputs); @@ -1036,9 +1046,9 @@ void *tracing_cond_snapshot_data(struct trace_array *tr) } EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data); -static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf, - struct trace_buffer *size_buf, int cpu_id); -static void set_buffer_entries(struct trace_buffer *buf, unsigned long val); +static int resize_buffer_duplicate_size(struct array_buffer *trace_buf, + struct array_buffer *size_buf, int cpu_id); +static void set_buffer_entries(struct array_buffer *buf, unsigned long val); int tracing_alloc_snapshot_instance(struct trace_array *tr) { @@ -1048,7 +1058,7 @@ int tracing_alloc_snapshot_instance(struct trace_array *tr) /* allocate spare buffer */ ret = resize_buffer_duplicate_size(&tr->max_buffer, - &tr->trace_buffer, RING_BUFFER_ALL_CPUS); + &tr->array_buffer, RING_BUFFER_ALL_CPUS); if (ret < 0) return ret; @@ -1251,8 +1261,8 @@ EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable); void tracer_tracing_off(struct trace_array *tr) { - if (tr->trace_buffer.buffer) - ring_buffer_record_off(tr->trace_buffer.buffer); + if (tr->array_buffer.buffer) + ring_buffer_record_off(tr->array_buffer.buffer); /* * This flag is looked at when buffers haven't been allocated * yet, or by some tracers (like irqsoff), that just want to @@ -1294,8 +1304,8 @@ void disable_trace_on_warning(void) */ bool tracer_tracing_is_on(struct trace_array *tr) { - if (tr->trace_buffer.buffer) - return ring_buffer_record_is_on(tr->trace_buffer.buffer); + if (tr->array_buffer.buffer) + return ring_buffer_record_is_on(tr->array_buffer.buffer); return !tr->buffer_disabled; } @@ -1590,8 +1600,8 @@ void latency_fsnotify(struct trace_array *tr) static void __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) { - struct trace_buffer *trace_buf = &tr->trace_buffer; - struct trace_buffer *max_buf = &tr->max_buffer; + struct array_buffer *trace_buf = &tr->array_buffer; + struct array_buffer *max_buf = &tr->max_buffer; struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu); struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu); @@ -1649,8 +1659,8 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu, arch_spin_lock(&tr->max_lock); - /* Inherit the recordable setting from trace_buffer */ - if (ring_buffer_record_is_set_on(tr->trace_buffer.buffer)) + /* Inherit the recordable setting from array_buffer */ + if (ring_buffer_record_is_set_on(tr->array_buffer.buffer)) ring_buffer_record_on(tr->max_buffer.buffer); else ring_buffer_record_off(tr->max_buffer.buffer); @@ -1659,7 +1669,7 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu, if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) goto out_unlock; #endif - swap(tr->trace_buffer.buffer, tr->max_buffer.buffer); + swap(tr->array_buffer.buffer, tr->max_buffer.buffer); __update_max_tr(tr, tsk, cpu); @@ -1692,7 +1702,7 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) arch_spin_lock(&tr->max_lock); - ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu); + ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu); if (ret == -EBUSY) { /* @@ -1718,7 +1728,7 @@ static int wait_on_pipe(struct trace_iterator *iter, int full) if (trace_buffer_iter(iter, iter->cpu_file)) return 0; - return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file, + return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full); } @@ -1769,7 +1779,7 @@ static int run_tracer_selftest(struct tracer *type) * internal tracing to verify that everything is in order. * If we fail, we do not register this tracer. */ - tracing_reset_online_cpus(&tr->trace_buffer); + tracing_reset_online_cpus(&tr->array_buffer); tr->current_trace = type; @@ -1795,7 +1805,7 @@ static int run_tracer_selftest(struct tracer *type) return -1; } /* Only reset on passing, to avoid touching corrupted buffers */ - tracing_reset_online_cpus(&tr->trace_buffer); + tracing_reset_online_cpus(&tr->array_buffer); #ifdef CONFIG_TRACER_MAX_TRACE if (type->use_max_tr) { @@ -1962,9 +1972,9 @@ int __init register_tracer(struct tracer *type) return ret; } -static void tracing_reset_cpu(struct trace_buffer *buf, int cpu) +static void tracing_reset_cpu(struct array_buffer *buf, int cpu) { - struct ring_buffer *buffer = buf->buffer; + struct trace_buffer *buffer = buf->buffer; if (!buffer) return; @@ -1978,9 +1988,9 @@ static void tracing_reset_cpu(struct trace_buffer *buf, int cpu) ring_buffer_record_enable(buffer); } -void tracing_reset_online_cpus(struct trace_buffer *buf) +void tracing_reset_online_cpus(struct array_buffer *buf) { - struct ring_buffer *buffer = buf->buffer; + struct trace_buffer *buffer = buf->buffer; int cpu; if (!buffer) @@ -2008,7 +2018,7 @@ void tracing_reset_all_online_cpus(void) if (!tr->clear_trace) continue; tr->clear_trace = false; - tracing_reset_online_cpus(&tr->trace_buffer); + tracing_reset_online_cpus(&tr->array_buffer); #ifdef CONFIG_TRACER_MAX_TRACE tracing_reset_online_cpus(&tr->max_buffer); #endif @@ -2098,7 +2108,7 @@ int is_tracing_stopped(void) */ void tracing_start(void) { - struct ring_buffer *buffer; + struct trace_buffer *buffer; unsigned long flags; if (tracing_disabled) @@ -2117,7 +2127,7 @@ void tracing_start(void) /* Prevent the buffers from switching */ arch_spin_lock(&global_trace.max_lock); - buffer = global_trace.trace_buffer.buffer; + buffer = global_trace.array_buffer.buffer; if (buffer) ring_buffer_record_enable(buffer); @@ -2135,7 +2145,7 @@ void tracing_start(void) static void tracing_start_tr(struct trace_array *tr) { - struct ring_buffer *buffer; + struct trace_buffer *buffer; unsigned long flags; if (tracing_disabled) @@ -2156,7 +2166,7 @@ static void tracing_start_tr(struct trace_array *tr) goto out; } - buffer = tr->trace_buffer.buffer; + buffer = tr->array_buffer.buffer; if (buffer) ring_buffer_record_enable(buffer); @@ -2172,7 +2182,7 @@ static void tracing_start_tr(struct trace_array *tr) */ void tracing_stop(void) { - struct ring_buffer *buffer; + struct trace_buffer *buffer; unsigned long flags; raw_spin_lock_irqsave(&global_trace.start_lock, flags); @@ -2182,7 +2192,7 @@ void tracing_stop(void) /* Prevent the buffers from switching */ arch_spin_lock(&global_trace.max_lock); - buffer = global_trace.trace_buffer.buffer; + buffer = global_trace.array_buffer.buffer; if (buffer) ring_buffer_record_disable(buffer); @@ -2200,7 +2210,7 @@ void tracing_stop(void) static void tracing_stop_tr(struct trace_array *tr) { - struct ring_buffer *buffer; + struct trace_buffer *buffer; unsigned long flags; /* If global, we need to also stop the max tracer */ @@ -2211,7 +2221,7 @@ static void tracing_stop_tr(struct trace_array *tr) if (tr->stop_count++) goto out; - buffer = tr->trace_buffer.buffer; + buffer = tr->array_buffer.buffer; if (buffer) ring_buffer_record_disable(buffer); @@ -2442,7 +2452,7 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned short type, EXPORT_SYMBOL_GPL(tracing_generic_entry_update); struct ring_buffer_event * -trace_buffer_lock_reserve(struct ring_buffer *buffer, +trace_buffer_lock_reserve(struct trace_buffer *buffer, int type, unsigned long len, unsigned long flags, int pc) @@ -2561,10 +2571,10 @@ void trace_buffered_event_disable(void) preempt_enable(); } -static struct ring_buffer *temp_buffer; +static struct trace_buffer *temp_buffer; struct ring_buffer_event * -trace_event_buffer_lock_reserve(struct ring_buffer **current_rb, +trace_event_buffer_lock_reserve(struct trace_buffer **current_rb, struct trace_event_file *trace_file, int type, unsigned long len, unsigned long flags, int pc) @@ -2572,7 +2582,7 @@ trace_event_buffer_lock_reserve(struct ring_buffer **current_rb, struct ring_buffer_event *entry; int val; - *current_rb = trace_file->tr->trace_buffer.buffer; + *current_rb = trace_file->tr->array_buffer.buffer; if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) && @@ -2610,6 +2620,7 @@ static DEFINE_MUTEX(tracepoint_printk_mutex); static void output_printk(struct trace_event_buffer *fbuffer) { struct trace_event_call *event_call; + struct trace_event_file *file; struct trace_event *event; unsigned long flags; struct trace_iterator *iter = tracepoint_print_iter; @@ -2623,6 +2634,12 @@ static void output_printk(struct trace_event_buffer *fbuffer) !event_call->event.funcs->trace) return; + file = fbuffer->trace_file; + if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) || + (unlikely(file->flags & EVENT_FILE_FL_FILTERED) && + !filter_match_preds(file->filter, fbuffer->entry))) + return; + event = &fbuffer->trace_file->event_call->event; spin_lock_irqsave(&tracepoint_iter_lock, flags); @@ -2673,9 +2690,9 @@ void trace_event_buffer_commit(struct trace_event_buffer *fbuffer) if (static_key_false(&tracepoint_printk_key.key)) output_printk(fbuffer); - event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer, + event_trigger_unlock_commit_regs(fbuffer->trace_file, fbuffer->buffer, fbuffer->event, fbuffer->entry, - fbuffer->flags, fbuffer->pc); + fbuffer->flags, fbuffer->pc, fbuffer->regs); } EXPORT_SYMBOL_GPL(trace_event_buffer_commit); @@ -2689,7 +2706,7 @@ EXPORT_SYMBOL_GPL(trace_event_buffer_commit); # define STACK_SKIP 3 void trace_buffer_unlock_commit_regs(struct trace_array *tr, - struct ring_buffer *buffer, + struct trace_buffer *buffer, struct ring_buffer_event *event, unsigned long flags, int pc, struct pt_regs *regs) @@ -2710,7 +2727,7 @@ void trace_buffer_unlock_commit_regs(struct trace_array *tr, * Similar to trace_buffer_unlock_commit_regs() but do not dump stack. */ void -trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer, +trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer, struct ring_buffer_event *event) { __buffer_unlock_commit(buffer, event); @@ -2845,7 +2862,7 @@ trace_function(struct trace_array *tr, int pc) { struct trace_event_call *call = &event_function; - struct ring_buffer *buffer = tr->trace_buffer.buffer; + struct trace_buffer *buffer = tr->array_buffer.buffer; struct ring_buffer_event *event; struct ftrace_entry *entry; @@ -2883,7 +2900,7 @@ struct ftrace_stacks { static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks); static DEFINE_PER_CPU(int, ftrace_stack_reserve); -static void __ftrace_trace_stack(struct ring_buffer *buffer, +static void __ftrace_trace_stack(struct trace_buffer *buffer, unsigned long flags, int skip, int pc, struct pt_regs *regs) { @@ -2958,7 +2975,7 @@ static void __ftrace_trace_stack(struct ring_buffer *buffer, } static inline void ftrace_trace_stack(struct trace_array *tr, - struct ring_buffer *buffer, + struct trace_buffer *buffer, unsigned long flags, int skip, int pc, struct pt_regs *regs) { @@ -2971,7 +2988,7 @@ static inline void ftrace_trace_stack(struct trace_array *tr, void __trace_stack(struct trace_array *tr, unsigned long flags, int skip, int pc) { - struct ring_buffer *buffer = tr->trace_buffer.buffer; + struct trace_buffer *buffer = tr->array_buffer.buffer; if (rcu_is_watching()) { __ftrace_trace_stack(buffer, flags, skip, pc, NULL); @@ -3009,7 +3026,7 @@ void trace_dump_stack(int skip) /* Skip 1 to skip this function. */ skip++; #endif - __ftrace_trace_stack(global_trace.trace_buffer.buffer, + __ftrace_trace_stack(global_trace.array_buffer.buffer, flags, skip, preempt_count(), NULL); } EXPORT_SYMBOL_GPL(trace_dump_stack); @@ -3018,7 +3035,7 @@ EXPORT_SYMBOL_GPL(trace_dump_stack); static DEFINE_PER_CPU(int, user_stack_count); static void -ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc) +ftrace_trace_userstack(struct trace_buffer *buffer, unsigned long flags, int pc) { struct trace_event_call *call = &event_user_stack; struct ring_buffer_event *event; @@ -3063,7 +3080,7 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc) preempt_enable(); } #else /* CONFIG_USER_STACKTRACE_SUPPORT */ -static void ftrace_trace_userstack(struct ring_buffer *buffer, +static void ftrace_trace_userstack(struct trace_buffer *buffer, unsigned long flags, int pc) { } @@ -3109,7 +3126,7 @@ static int alloc_percpu_trace_buffer(void) struct trace_buffer_struct *buffers; buffers = alloc_percpu(struct trace_buffer_struct); - if (WARN(!buffers, "Could not allocate percpu trace_printk buffer")) + if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer")) return -ENOMEM; trace_percpu_buffer = buffers; @@ -3154,7 +3171,7 @@ void trace_printk_init_buffers(void) * directly here. If the global_trace.buffer is already * allocated here, then this was called by module code. */ - if (global_trace.trace_buffer.buffer) + if (global_trace.array_buffer.buffer) tracing_start_cmdline_record(); } EXPORT_SYMBOL_GPL(trace_printk_init_buffers); @@ -3188,7 +3205,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) { struct trace_event_call *call = &event_bprint; struct ring_buffer_event *event; - struct ring_buffer *buffer; + struct trace_buffer *buffer; struct trace_array *tr = &global_trace; struct bprint_entry *entry; unsigned long flags; @@ -3213,11 +3230,12 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args); if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0) - goto out; + goto out_put; local_save_flags(flags); size = sizeof(*entry) + sizeof(u32) * len; - buffer = tr->trace_buffer.buffer; + buffer = tr->array_buffer.buffer; + ring_buffer_nest_start(buffer); event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size, flags, pc); if (!event) @@ -3233,6 +3251,8 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) } out: + ring_buffer_nest_end(buffer); +out_put: put_trace_buf(); out_nobuffer: @@ -3245,7 +3265,7 @@ EXPORT_SYMBOL_GPL(trace_vbprintk); __printf(3, 0) static int -__trace_array_vprintk(struct ring_buffer *buffer, +__trace_array_vprintk(struct trace_buffer *buffer, unsigned long ip, const char *fmt, va_list args) { struct trace_event_call *call = &event_print; @@ -3275,6 +3295,7 @@ __trace_array_vprintk(struct ring_buffer *buffer, local_save_flags(flags); size = sizeof(*entry) + len + 1; + ring_buffer_nest_start(buffer); event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, flags, pc); if (!event) @@ -3289,6 +3310,7 @@ __trace_array_vprintk(struct ring_buffer *buffer, } out: + ring_buffer_nest_end(buffer); put_trace_buf(); out_nobuffer: @@ -3302,7 +3324,7 @@ __printf(3, 0) int trace_array_vprintk(struct trace_array *tr, unsigned long ip, const char *fmt, va_list args) { - return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args); + return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args); } __printf(3, 0) @@ -3326,7 +3348,7 @@ int trace_array_printk(struct trace_array *tr, EXPORT_SYMBOL_GPL(trace_array_printk); __printf(3, 4) -int trace_array_printk_buf(struct ring_buffer *buffer, +int trace_array_printk_buf(struct trace_buffer *buffer, unsigned long ip, const char *fmt, ...) { int ret; @@ -3367,7 +3389,7 @@ peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts, if (buf_iter) event = ring_buffer_iter_peek(buf_iter, ts); else - event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts, + event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts, lost_events); if (event) { @@ -3382,7 +3404,7 @@ static struct trace_entry * __find_next_entry(struct trace_iterator *iter, int *ent_cpu, unsigned long *missing_events, u64 *ent_ts) { - struct ring_buffer *buffer = iter->trace_buffer->buffer; + struct trace_buffer *buffer = iter->array_buffer->buffer; struct trace_entry *ent, *next = NULL; unsigned long lost_events = 0, next_lost = 0; int cpu_file = iter->cpu_file; @@ -3459,7 +3481,7 @@ void *trace_find_next_entry_inc(struct trace_iterator *iter) static void trace_consume(struct trace_iterator *iter) { - ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts, + ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts, &iter->lost_events); } @@ -3497,7 +3519,7 @@ void tracing_iter_reset(struct trace_iterator *iter, int cpu) unsigned long entries = 0; u64 ts; - per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0; + per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0; buf_iter = trace_buffer_iter(iter, cpu); if (!buf_iter) @@ -3511,13 +3533,13 @@ void tracing_iter_reset(struct trace_iterator *iter, int cpu) * by the timestamp being before the start of the buffer. */ while ((event = ring_buffer_iter_peek(buf_iter, &ts))) { - if (ts >= iter->trace_buffer->time_start) + if (ts >= iter->array_buffer->time_start) break; entries++; ring_buffer_read(buf_iter, NULL); } - per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries; + per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries; } /* @@ -3602,7 +3624,7 @@ static void s_stop(struct seq_file *m, void *p) } static void -get_total_entries_cpu(struct trace_buffer *buf, unsigned long *total, +get_total_entries_cpu(struct array_buffer *buf, unsigned long *total, unsigned long *entries, int cpu) { unsigned long count; @@ -3624,7 +3646,7 @@ get_total_entries_cpu(struct trace_buffer *buf, unsigned long *total, } static void -get_total_entries(struct trace_buffer *buf, +get_total_entries(struct array_buffer *buf, unsigned long *total, unsigned long *entries) { unsigned long t, e; @@ -3647,7 +3669,7 @@ unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu) if (!tr) tr = &global_trace; - get_total_entries_cpu(&tr->trace_buffer, &total, &entries, cpu); + get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu); return entries; } @@ -3659,7 +3681,7 @@ unsigned long trace_total_entries(struct trace_array *tr) if (!tr) tr = &global_trace; - get_total_entries(&tr->trace_buffer, &total, &entries); + get_total_entries(&tr->array_buffer, &total, &entries); return entries; } @@ -3676,7 +3698,7 @@ static void print_lat_help_header(struct seq_file *m) "# \\ / ||||| \\ | / \n"); } -static void print_event_info(struct trace_buffer *buf, struct seq_file *m) +static void print_event_info(struct array_buffer *buf, struct seq_file *m) { unsigned long total; unsigned long entries; @@ -3687,7 +3709,7 @@ static void print_event_info(struct trace_buffer *buf, struct seq_file *m) seq_puts(m, "#\n"); } -static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m, +static void print_func_help_header(struct array_buffer *buf, struct seq_file *m, unsigned int flags) { bool tgid = flags & TRACE_ITER_RECORD_TGID; @@ -3698,7 +3720,7 @@ static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m, seq_printf(m, "# | | %s | | |\n", tgid ? " | " : ""); } -static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m, +static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m, unsigned int flags) { bool tgid = flags & TRACE_ITER_RECORD_TGID; @@ -3720,7 +3742,7 @@ void print_trace_header(struct seq_file *m, struct trace_iterator *iter) { unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK); - struct trace_buffer *buf = iter->trace_buffer; + struct array_buffer *buf = iter->array_buffer; struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu); struct tracer *type = iter->trace; unsigned long entries; @@ -3795,7 +3817,7 @@ static void test_cpu_buff_start(struct trace_iterator *iter) cpumask_test_cpu(iter->cpu, iter->started)) return; - if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries) + if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries) return; if (cpumask_available(iter->started)) @@ -3929,7 +3951,7 @@ int trace_empty(struct trace_iterator *iter) if (!ring_buffer_iter_empty(buf_iter)) return 0; } else { - if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu)) + if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu)) return 0; } return 1; @@ -3941,7 +3963,7 @@ int trace_empty(struct trace_iterator *iter) if (!ring_buffer_iter_empty(buf_iter)) return 0; } else { - if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu)) + if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu)) return 0; } } @@ -4031,10 +4053,10 @@ void trace_default_header(struct seq_file *m) } else { if (!(trace_flags & TRACE_ITER_VERBOSE)) { if (trace_flags & TRACE_ITER_IRQ_INFO) - print_func_help_header_irq(iter->trace_buffer, + print_func_help_header_irq(iter->array_buffer, m, trace_flags); else - print_func_help_header(iter->trace_buffer, m, + print_func_help_header(iter->array_buffer, m, trace_flags); } } @@ -4192,21 +4214,21 @@ __tracing_open(struct inode *inode, struct file *file, bool snapshot) #ifdef CONFIG_TRACER_MAX_TRACE /* Currently only the top directory has a snapshot */ if (tr->current_trace->print_max || snapshot) - iter->trace_buffer = &tr->max_buffer; + iter->array_buffer = &tr->max_buffer; else #endif - iter->trace_buffer = &tr->trace_buffer; + iter->array_buffer = &tr->array_buffer; iter->snapshot = snapshot; iter->pos = -1; iter->cpu_file = tracing_get_cpu(inode); mutex_init(&iter->mutex); /* Notify the tracer early; before we stop tracing. */ - if (iter->trace && iter->trace->open) + if (iter->trace->open) iter->trace->open(iter); /* Annotate start of buffers if we had overruns */ - if (ring_buffer_overruns(iter->trace_buffer->buffer)) + if (ring_buffer_overruns(iter->array_buffer->buffer)) iter->iter_flags |= TRACE_FILE_ANNOTATE; /* Output in nanoseconds only if we are using a clock in nanoseconds. */ @@ -4220,7 +4242,7 @@ __tracing_open(struct inode *inode, struct file *file, bool snapshot) if (iter->cpu_file == RING_BUFFER_ALL_CPUS) { for_each_tracing_cpu(cpu) { iter->buffer_iter[cpu] = - ring_buffer_read_prepare(iter->trace_buffer->buffer, + ring_buffer_read_prepare(iter->array_buffer->buffer, cpu, GFP_KERNEL); } ring_buffer_read_prepare_sync(); @@ -4231,7 +4253,7 @@ __tracing_open(struct inode *inode, struct file *file, bool snapshot) } else { cpu = iter->cpu_file; iter->buffer_iter[cpu] = - ring_buffer_read_prepare(iter->trace_buffer->buffer, + ring_buffer_read_prepare(iter->array_buffer->buffer, cpu, GFP_KERNEL); ring_buffer_read_prepare_sync(); ring_buffer_read_start(iter->buffer_iter[cpu]); @@ -4357,7 +4379,7 @@ static int tracing_open(struct inode *inode, struct file *file) /* If this file was open for write, then erase contents */ if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) { int cpu = tracing_get_cpu(inode); - struct trace_buffer *trace_buf = &tr->trace_buffer; + struct array_buffer *trace_buf = &tr->array_buffer; #ifdef CONFIG_TRACER_MAX_TRACE if (tr->current_trace->print_max) @@ -4554,20 +4576,13 @@ out_err: return count; } -static ssize_t -tracing_cpumask_write(struct file *filp, const char __user *ubuf, - size_t count, loff_t *ppos) +int tracing_set_cpumask(struct trace_array *tr, + cpumask_var_t tracing_cpumask_new) { - struct trace_array *tr = file_inode(filp)->i_private; - cpumask_var_t tracing_cpumask_new; - int err, cpu; - - if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL)) - return -ENOMEM; + int cpu; - err = cpumask_parse_user(ubuf, count, tracing_cpumask_new); - if (err) - goto err_unlock; + if (!tr) + return -EINVAL; local_irq_disable(); arch_spin_lock(&tr->max_lock); @@ -4578,24 +4593,47 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf, */ if (cpumask_test_cpu(cpu, tr->tracing_cpumask) && !cpumask_test_cpu(cpu, tracing_cpumask_new)) { - atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled); - ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu); + atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled); + ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu); } if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) && cpumask_test_cpu(cpu, tracing_cpumask_new)) { - atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled); - ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu); + atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled); + ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu); } } arch_spin_unlock(&tr->max_lock); local_irq_enable(); cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new); + + return 0; +} + +static ssize_t +tracing_cpumask_write(struct file *filp, const char __user *ubuf, + size_t count, loff_t *ppos) +{ + struct trace_array *tr = file_inode(filp)->i_private; + cpumask_var_t tracing_cpumask_new; + int err; + + if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL)) + return -ENOMEM; + + err = cpumask_parse_user(ubuf, count, tracing_cpumask_new); + if (err) + goto err_free; + + err = tracing_set_cpumask(tr, tracing_cpumask_new); + if (err) + goto err_free; + free_cpumask_var(tracing_cpumask_new); return count; -err_unlock: +err_free: free_cpumask_var(tracing_cpumask_new); return err; @@ -4726,7 +4764,7 @@ int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled) ftrace_pid_follow_fork(tr, enabled); if (mask == TRACE_ITER_OVERWRITE) { - ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled); + ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled); #ifdef CONFIG_TRACER_MAX_TRACE ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled); #endif @@ -4740,7 +4778,7 @@ int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled) return 0; } -static int trace_set_options(struct trace_array *tr, char *option) +int trace_set_options(struct trace_array *tr, char *option) { char *cmp; int neg = 0; @@ -5361,14 +5399,12 @@ static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos) * Paranoid! If ptr points to end, we don't want to increment past it. * This really should never happen. */ + (*pos)++; ptr = update_eval_map(ptr); if (WARN_ON_ONCE(!ptr)) return NULL; ptr++; - - (*pos)++; - ptr = update_eval_map(ptr); return ptr; @@ -5534,11 +5570,11 @@ tracing_set_trace_read(struct file *filp, char __user *ubuf, int tracer_init(struct tracer *t, struct trace_array *tr) { - tracing_reset_online_cpus(&tr->trace_buffer); + tracing_reset_online_cpus(&tr->array_buffer); return t->init(tr); } -static void set_buffer_entries(struct trace_buffer *buf, unsigned long val) +static void set_buffer_entries(struct array_buffer *buf, unsigned long val) { int cpu; @@ -5548,8 +5584,8 @@ static void set_buffer_entries(struct trace_buffer *buf, unsigned long val) #ifdef CONFIG_TRACER_MAX_TRACE /* resize @tr's buffer to the size of @size_tr's entries */ -static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf, - struct trace_buffer *size_buf, int cpu_id) +static int resize_buffer_duplicate_size(struct array_buffer *trace_buf, + struct array_buffer *size_buf, int cpu_id) { int cpu, ret = 0; @@ -5587,10 +5623,10 @@ static int __tracing_resize_ring_buffer(struct trace_array *tr, ring_buffer_expanded = true; /* May be called before buffers are initialized */ - if (!tr->trace_buffer.buffer) + if (!tr->array_buffer.buffer) return 0; - ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu); + ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu); if (ret < 0) return ret; @@ -5601,8 +5637,8 @@ static int __tracing_resize_ring_buffer(struct trace_array *tr, ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu); if (ret < 0) { - int r = resize_buffer_duplicate_size(&tr->trace_buffer, - &tr->trace_buffer, cpu); + int r = resize_buffer_duplicate_size(&tr->array_buffer, + &tr->array_buffer, cpu); if (r < 0) { /* * AARGH! We are left with different @@ -5633,15 +5669,15 @@ static int __tracing_resize_ring_buffer(struct trace_array *tr, #endif /* CONFIG_TRACER_MAX_TRACE */ if (cpu == RING_BUFFER_ALL_CPUS) - set_buffer_entries(&tr->trace_buffer, size); + set_buffer_entries(&tr->array_buffer, size); else - per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size; + per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size; return ret; } -static ssize_t tracing_resize_ring_buffer(struct trace_array *tr, - unsigned long size, int cpu_id) +ssize_t tracing_resize_ring_buffer(struct trace_array *tr, + unsigned long size, int cpu_id) { int ret = size; @@ -5720,7 +5756,7 @@ static void add_tracer_options(struct trace_array *tr, struct tracer *t) create_trace_option_files(tr, t); } -static int tracing_set_tracer(struct trace_array *tr, const char *buf) +int tracing_set_tracer(struct trace_array *tr, const char *buf) { struct tracer *t; #ifdef CONFIG_TRACER_MAX_TRACE @@ -5979,7 +6015,7 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) iter->iter_flags |= TRACE_FILE_TIME_IN_NS; iter->tr = tr; - iter->trace_buffer = &tr->trace_buffer; + iter->array_buffer = &tr->array_buffer; iter->cpu_file = tracing_get_cpu(inode); mutex_init(&iter->mutex); filp->private_data = iter; @@ -6039,7 +6075,7 @@ trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_tabl */ return EPOLLIN | EPOLLRDNORM; else - return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file, + return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file, filp, poll_table); } @@ -6356,8 +6392,8 @@ tracing_entries_read(struct file *filp, char __user *ubuf, for_each_tracing_cpu(cpu) { /* fill in the size from first enabled cpu */ if (size == 0) - size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries; - if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) { + size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries; + if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) { buf_size_same = 0; break; } @@ -6373,7 +6409,7 @@ tracing_entries_read(struct file *filp, char __user *ubuf, } else r = sprintf(buf, "X\n"); } else - r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10); + r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10); mutex_unlock(&trace_types_lock); @@ -6420,7 +6456,7 @@ tracing_total_entries_read(struct file *filp, char __user *ubuf, mutex_lock(&trace_types_lock); for_each_tracing_cpu(cpu) { - size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10; + size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10; if (!ring_buffer_expanded) expanded_size += trace_buf_size >> 10; } @@ -6470,7 +6506,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, struct trace_array *tr = filp->private_data; struct ring_buffer_event *event; enum event_trigger_type tt = ETT_NONE; - struct ring_buffer *buffer; + struct trace_buffer *buffer; struct print_entry *entry; unsigned long irq_flags; ssize_t written; @@ -6499,7 +6535,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, if (cnt < FAULTED_SIZE) size += FAULTED_SIZE - cnt; - buffer = tr->trace_buffer.buffer; + buffer = tr->array_buffer.buffer; event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, irq_flags, preempt_count()); if (unlikely(!event)) @@ -6550,7 +6586,7 @@ tracing_mark_raw_write(struct file *filp, const char __user *ubuf, { struct trace_array *tr = filp->private_data; struct ring_buffer_event *event; - struct ring_buffer *buffer; + struct trace_buffer *buffer; struct raw_data_entry *entry; unsigned long irq_flags; ssize_t written; @@ -6579,7 +6615,7 @@ tracing_mark_raw_write(struct file *filp, const char __user *ubuf, if (cnt < FAULT_SIZE_ID) size += FAULT_SIZE_ID - cnt; - buffer = tr->trace_buffer.buffer; + buffer = tr->array_buffer.buffer; event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size, irq_flags, preempt_count()); if (!event) @@ -6634,13 +6670,13 @@ int tracing_set_clock(struct trace_array *tr, const char *clockstr) tr->clock_id = i; - ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func); + ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func); /* * New clock may not be consistent with the previous clock. * Reset the buffer so that it doesn't have incomparable timestamps. */ - tracing_reset_online_cpus(&tr->trace_buffer); + tracing_reset_online_cpus(&tr->array_buffer); #ifdef CONFIG_TRACER_MAX_TRACE if (tr->max_buffer.buffer) @@ -6703,7 +6739,7 @@ static int tracing_time_stamp_mode_show(struct seq_file *m, void *v) mutex_lock(&trace_types_lock); - if (ring_buffer_time_stamp_abs(tr->trace_buffer.buffer)) + if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer)) seq_puts(m, "delta [absolute]\n"); else seq_puts(m, "[delta] absolute\n"); @@ -6748,7 +6784,7 @@ int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs) goto out; } - ring_buffer_set_time_stamp_abs(tr->trace_buffer.buffer, abs); + ring_buffer_set_time_stamp_abs(tr->array_buffer.buffer, abs); #ifdef CONFIG_TRACER_MAX_TRACE if (tr->max_buffer.buffer) @@ -6797,7 +6833,7 @@ static int tracing_snapshot_open(struct inode *inode, struct file *file) ret = 0; iter->tr = tr; - iter->trace_buffer = &tr->max_buffer; + iter->array_buffer = &tr->max_buffer; iter->cpu_file = tracing_get_cpu(inode); m->private = iter; file->private_data = m; @@ -6860,7 +6896,7 @@ tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt, #endif if (tr->allocated_snapshot) ret = resize_buffer_duplicate_size(&tr->max_buffer, - &tr->trace_buffer, iter->cpu_file); + &tr->array_buffer, iter->cpu_file); else ret = tracing_alloc_snapshot_instance(tr); if (ret < 0) @@ -6935,7 +6971,7 @@ static int snapshot_raw_open(struct inode *inode, struct file *filp) } info->iter.snapshot = true; - info->iter.trace_buffer = &info->iter.tr->max_buffer; + info->iter.array_buffer = &info->iter.tr->max_buffer; return ret; } @@ -7310,7 +7346,7 @@ static int tracing_buffers_open(struct inode *inode, struct file *filp) info->iter.tr = tr; info->iter.cpu_file = tracing_get_cpu(inode); info->iter.trace = tr->current_trace; - info->iter.trace_buffer = &tr->trace_buffer; + info->iter.array_buffer = &tr->array_buffer; info->spare = NULL; /* Force reading ring buffer for first read */ info->read = (unsigned int)-1; @@ -7355,7 +7391,7 @@ tracing_buffers_read(struct file *filp, char __user *ubuf, #endif if (!info->spare) { - info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer, + info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer, iter->cpu_file); if (IS_ERR(info->spare)) { ret = PTR_ERR(info->spare); @@ -7373,7 +7409,7 @@ tracing_buffers_read(struct file *filp, char __user *ubuf, again: trace_access_lock(iter->cpu_file); - ret = ring_buffer_read_page(iter->trace_buffer->buffer, + ret = ring_buffer_read_page(iter->array_buffer->buffer, &info->spare, count, iter->cpu_file, 0); @@ -7423,7 +7459,7 @@ static int tracing_buffers_release(struct inode *inode, struct file *file) __trace_array_put(iter->tr); if (info->spare) - ring_buffer_free_read_page(iter->trace_buffer->buffer, + ring_buffer_free_read_page(iter->array_buffer->buffer, info->spare_cpu, info->spare); kfree(info); @@ -7433,7 +7469,7 @@ static int tracing_buffers_release(struct inode *inode, struct file *file) } struct buffer_ref { - struct ring_buffer *buffer; + struct trace_buffer *buffer; void *page; int cpu; refcount_t refcount; @@ -7528,7 +7564,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, again: trace_access_lock(iter->cpu_file); - entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file); + entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file); for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) { struct page *page; @@ -7541,7 +7577,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, } refcount_set(&ref->refcount, 1); - ref->buffer = iter->trace_buffer->buffer; + ref->buffer = iter->array_buffer->buffer; ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file); if (IS_ERR(ref->page)) { ret = PTR_ERR(ref->page); @@ -7569,7 +7605,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, spd.nr_pages++; *ppos += PAGE_SIZE; - entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file); + entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file); } trace_access_unlock(iter->cpu_file); @@ -7613,7 +7649,7 @@ tracing_stats_read(struct file *filp, char __user *ubuf, { struct inode *inode = file_inode(filp); struct trace_array *tr = inode->i_private; - struct trace_buffer *trace_buf = &tr->trace_buffer; + struct array_buffer *trace_buf = &tr->array_buffer; int cpu = tracing_get_cpu(inode); struct trace_seq *s; unsigned long cnt; @@ -7894,7 +7930,7 @@ static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu) tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer); - WARN_ONCE(!tr->percpu_dir, + MEM_FAIL(!tr->percpu_dir, "Could not create tracefs directory 'per_cpu/%d'\n", cpu); return tr->percpu_dir; @@ -8215,7 +8251,7 @@ create_trace_option_files(struct trace_array *tr, struct tracer *tracer) for (cnt = 0; opts[cnt].name; cnt++) { create_trace_option_file(tr, &topts[cnt], flags, &opts[cnt]); - WARN_ONCE(topts[cnt].entry == NULL, + MEM_FAIL(topts[cnt].entry == NULL, "Failed to create trace option: %s", opts[cnt].name); } @@ -8272,7 +8308,7 @@ rb_simple_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { struct trace_array *tr = filp->private_data; - struct ring_buffer *buffer = tr->trace_buffer.buffer; + struct trace_buffer *buffer = tr->array_buffer.buffer; unsigned long val; int ret; @@ -8362,7 +8398,7 @@ static void init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer); static int -allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size) +allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size) { enum ring_buffer_flags rb_flags; @@ -8382,8 +8418,8 @@ allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size } /* Allocate the first page for all buffers */ - set_buffer_entries(&tr->trace_buffer, - ring_buffer_size(tr->trace_buffer.buffer, 0)); + set_buffer_entries(&tr->array_buffer, + ring_buffer_size(tr->array_buffer.buffer, 0)); return 0; } @@ -8392,18 +8428,18 @@ static int allocate_trace_buffers(struct trace_array *tr, int size) { int ret; - ret = allocate_trace_buffer(tr, &tr->trace_buffer, size); + ret = allocate_trace_buffer(tr, &tr->array_buffer, size); if (ret) return ret; #ifdef CONFIG_TRACER_MAX_TRACE ret = allocate_trace_buffer(tr, &tr->max_buffer, allocate_snapshot ? size : 1); - if (WARN_ON(ret)) { - ring_buffer_free(tr->trace_buffer.buffer); - tr->trace_buffer.buffer = NULL; - free_percpu(tr->trace_buffer.data); - tr->trace_buffer.data = NULL; + if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) { + ring_buffer_free(tr->array_buffer.buffer); + tr->array_buffer.buffer = NULL; + free_percpu(tr->array_buffer.data); + tr->array_buffer.data = NULL; return -ENOMEM; } tr->allocated_snapshot = allocate_snapshot; @@ -8417,7 +8453,7 @@ static int allocate_trace_buffers(struct trace_array *tr, int size) return 0; } -static void free_trace_buffer(struct trace_buffer *buf) +static void free_trace_buffer(struct array_buffer *buf) { if (buf->buffer) { ring_buffer_free(buf->buffer); @@ -8432,7 +8468,7 @@ static void free_trace_buffers(struct trace_array *tr) if (!tr) return; - free_trace_buffer(&tr->trace_buffer); + free_trace_buffer(&tr->array_buffer); #ifdef CONFIG_TRACER_MAX_TRACE free_trace_buffer(&tr->max_buffer); @@ -8463,6 +8499,34 @@ static void update_tracer_options(struct trace_array *tr) mutex_unlock(&trace_types_lock); } +/* Must have trace_types_lock held */ +struct trace_array *trace_array_find(const char *instance) +{ + struct trace_array *tr, *found = NULL; + + list_for_each_entry(tr, &ftrace_trace_arrays, list) { + if (tr->name && strcmp(tr->name, instance) == 0) { + found = tr; + break; + } + } + + return found; +} + +struct trace_array *trace_array_find_get(const char *instance) +{ + struct trace_array *tr; + + mutex_lock(&trace_types_lock); + tr = trace_array_find(instance); + if (tr) + tr->ref++; + mutex_unlock(&trace_types_lock); + + return tr; +} + static struct trace_array *trace_array_create(const char *name) { struct trace_array *tr; @@ -8504,7 +8568,7 @@ static struct trace_array *trace_array_create(const char *name) ret = event_trace_add_tracer(tr->dir, tr); if (ret) { - tracefs_remove_recursive(tr->dir); + tracefs_remove(tr->dir); goto out_free_tr; } @@ -8539,10 +8603,8 @@ static int instance_mkdir(const char *name) mutex_lock(&trace_types_lock); ret = -EEXIST; - list_for_each_entry(tr, &ftrace_trace_arrays, list) { - if (tr->name && strcmp(tr->name, name) == 0) - goto out_unlock; - } + if (trace_array_find(name)) + goto out_unlock; tr = trace_array_create(name); @@ -8564,6 +8626,10 @@ out_unlock: * NOTE: This function increments the reference counter associated with the * trace array returned. This makes sure it cannot be freed while in use. * Use trace_array_put() once the trace array is no longer needed. + * If the trace_array is to be freed, trace_array_destroy() needs to + * be called after the trace_array_put(), or simply let user space delete + * it from the tracefs instances directory. But until the + * trace_array_put() is called, user space can not delete it. * */ struct trace_array *trace_array_get_by_name(const char *name) @@ -8613,7 +8679,7 @@ static int __remove_instance(struct trace_array *tr) event_trace_del_tracer(tr); ftrace_clear_pids(tr); ftrace_destroy_function_files(tr); - tracefs_remove_recursive(tr->dir); + tracefs_remove(tr->dir); free_trace_buffers(tr); for (i = 0; i < tr->nr_topts; i++) { @@ -8666,12 +8732,9 @@ static int instance_rmdir(const char *name) mutex_lock(&trace_types_lock); ret = -ENODEV; - list_for_each_entry(tr, &ftrace_trace_arrays, list) { - if (tr->name && strcmp(tr->name, name) == 0) { - ret = __remove_instance(tr); - break; - } - } + tr = trace_array_find(name); + if (tr) + ret = __remove_instance(tr); mutex_unlock(&trace_types_lock); mutex_unlock(&event_mutex); @@ -8684,7 +8747,7 @@ static __init void create_trace_instances(struct dentry *d_tracer) trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer, instance_mkdir, instance_rmdir); - if (WARN_ON(!trace_instance_dir)) + if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n")) return; } @@ -8754,7 +8817,7 @@ init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer) #endif if (ftrace_create_function_files(tr, d_tracer)) - WARN(1, "Could not allocate function filter files"); + MEM_FAIL(1, "Could not allocate function filter files"); #ifdef CONFIG_TRACER_SNAPSHOT trace_create_file("snapshot", 0644, d_tracer, @@ -9036,13 +9099,13 @@ void trace_init_global_iter(struct trace_iterator *iter) iter->tr = &global_trace; iter->trace = iter->tr->current_trace; iter->cpu_file = RING_BUFFER_ALL_CPUS; - iter->trace_buffer = &global_trace.trace_buffer; + iter->array_buffer = &global_trace.array_buffer; if (iter->trace && iter->trace->open) iter->trace->open(iter); /* Annotate start of buffers if we had overruns */ - if (ring_buffer_overruns(iter->trace_buffer->buffer)) + if (ring_buffer_overruns(iter->array_buffer->buffer)) iter->iter_flags |= TRACE_FILE_ANNOTATE; /* Output in nanoseconds only if we are using a clock in nanoseconds. */ @@ -9083,7 +9146,7 @@ void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) trace_init_global_iter(&iter); for_each_tracing_cpu(cpu) { - atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled); + atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled); } old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ; @@ -9151,7 +9214,7 @@ void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) tr->trace_flags |= old_userobj; for_each_tracing_cpu(cpu) { - atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled); + atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled); } atomic_dec(&dump_running); printk_nmi_direct_exit(); @@ -9306,8 +9369,7 @@ __init static int tracer_alloc_buffers(void) /* TODO: make the number of buffers hot pluggable with CPUS */ if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) { - printk(KERN_ERR "tracer: failed to allocate ring buffer!\n"); - WARN_ON(1); + MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n"); goto out_free_savedcmd; } @@ -9380,7 +9442,8 @@ void __init early_trace_init(void) if (tracepoint_printk) { tracepoint_print_iter = kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL); - if (WARN_ON(!tracepoint_print_iter)) + if (MEM_FAIL(!tracepoint_print_iter, + "Failed to allocate trace iterator\n")) tracepoint_printk = 0; else static_key_enable(&tracepoint_printk_key.key); @@ -9420,6 +9483,11 @@ __init static int tracing_set_default_clock(void) { /* sched_clock_stable() is determined in late_initcall */ if (!trace_boot_clock && !sched_clock_stable()) { + if (security_locked_down(LOCKDOWN_TRACEFS)) { + pr_warn("Can not set tracing clock due to lockdown\n"); + return -EPERM; + } + printk(KERN_WARNING "Unstable clock detected, switching default tracing clock to \"global\"\n" "If you want to keep using the local clock, then add:\n" diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 63bf60f79398..99372dd7d168 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -52,6 +52,9 @@ enum trace_type { #undef __field #define __field(type, item) type item; +#undef __field_fn +#define __field_fn(type, item) type item; + #undef __field_struct #define __field_struct(type, item) __field(type, item) @@ -71,29 +74,37 @@ enum trace_type { #define F_STRUCT(args...) args #undef FTRACE_ENTRY -#define FTRACE_ENTRY(name, struct_name, id, tstruct, print, filter) \ +#define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \ struct struct_name { \ struct trace_entry ent; \ tstruct \ } #undef FTRACE_ENTRY_DUP -#define FTRACE_ENTRY_DUP(name, name_struct, id, tstruct, printk, filter) +#define FTRACE_ENTRY_DUP(name, name_struct, id, tstruct, printk) #undef FTRACE_ENTRY_REG -#define FTRACE_ENTRY_REG(name, struct_name, id, tstruct, print, \ - filter, regfn) \ - FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print), \ - filter) +#define FTRACE_ENTRY_REG(name, struct_name, id, tstruct, print, regfn) \ + FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print)) #undef FTRACE_ENTRY_PACKED -#define FTRACE_ENTRY_PACKED(name, struct_name, id, tstruct, print, \ - filter) \ - FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print), \ - filter) __packed +#define FTRACE_ENTRY_PACKED(name, struct_name, id, tstruct, print) \ + FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print)) __packed #include "trace_entries.h" +/* Use this for memory failure errors */ +#define MEM_FAIL(condition, fmt, ...) ({ \ + static bool __section(.data.once) __warned; \ + int __ret_warn_once = !!(condition); \ + \ + if (unlikely(__ret_warn_once && !__warned)) { \ + __warned = true; \ + pr_err("ERROR: " fmt, ##__VA_ARGS__); \ + } \ + unlikely(__ret_warn_once); \ +}) + /* * syscalls are special, and need special handling, this is why * they are not included in trace_entries.h @@ -176,9 +187,9 @@ struct trace_array_cpu { struct tracer; struct trace_option_dentry; -struct trace_buffer { +struct array_buffer { struct trace_array *tr; - struct ring_buffer *buffer; + struct trace_buffer *buffer; struct trace_array_cpu __percpu *data; u64 time_start; int cpu; @@ -249,7 +260,7 @@ struct cond_snapshot { struct trace_array { struct list_head list; char *name; - struct trace_buffer trace_buffer; + struct array_buffer array_buffer; #ifdef CONFIG_TRACER_MAX_TRACE /* * The max_buffer is used to snapshot the trace when a maximum @@ -257,12 +268,12 @@ struct trace_array { * Some tracers will use this to store a maximum trace while * it continues examining live traces. * - * The buffers for the max_buffer are set up the same as the trace_buffer + * The buffers for the max_buffer are set up the same as the array_buffer * When a snapshot is taken, the buffer of the max_buffer is swapped - * with the buffer of the trace_buffer and the buffers are reset for - * the trace_buffer so the tracing can continue. + * with the buffer of the array_buffer and the buffers are reset for + * the array_buffer so the tracing can continue. */ - struct trace_buffer max_buffer; + struct array_buffer max_buffer; bool allocated_snapshot; #endif #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER) @@ -346,6 +357,8 @@ extern struct mutex trace_types_lock; extern int trace_array_get(struct trace_array *tr); extern int tracing_check_open_get_tr(struct trace_array *tr); +extern struct trace_array *trace_array_find(const char *instance); +extern struct trace_array *trace_array_find_get(const char *instance); extern int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs); extern int tracing_set_clock(struct trace_array *tr, const char *clockstr); @@ -685,7 +698,7 @@ trace_buffer_iter(struct trace_iterator *iter, int cpu) int tracer_init(struct tracer *t, struct trace_array *tr); int tracing_is_enabled(void); -void tracing_reset_online_cpus(struct trace_buffer *buf); +void tracing_reset_online_cpus(struct array_buffer *buf); void tracing_reset_current(int cpu); void tracing_reset_all_online_cpus(void); int tracing_open_generic(struct inode *inode, struct file *filp); @@ -705,7 +718,7 @@ struct dentry *tracing_init_dentry(void); struct ring_buffer_event; struct ring_buffer_event * -trace_buffer_lock_reserve(struct ring_buffer *buffer, +trace_buffer_lock_reserve(struct trace_buffer *buffer, int type, unsigned long len, unsigned long flags, @@ -717,7 +730,7 @@ struct trace_entry *tracing_get_trace_entry(struct trace_array *tr, struct trace_entry *trace_find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts); -void trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer, +void trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer, struct ring_buffer_event *event); int trace_empty(struct trace_iterator *iter); @@ -873,7 +886,7 @@ trace_vprintk(unsigned long ip, const char *fmt, va_list args); extern int trace_array_vprintk(struct trace_array *tr, unsigned long ip, const char *fmt, va_list args); -int trace_array_printk_buf(struct ring_buffer *buffer, +int trace_array_printk_buf(struct trace_buffer *buffer, unsigned long ip, const char *fmt, ...); void trace_printk_seq(struct trace_seq *s); enum print_line_t print_trace_line(struct trace_iterator *iter); @@ -950,22 +963,31 @@ extern void __trace_graph_return(struct trace_array *tr, unsigned long flags, int pc); #ifdef CONFIG_DYNAMIC_FTRACE -extern struct ftrace_hash *ftrace_graph_hash; -extern struct ftrace_hash *ftrace_graph_notrace_hash; +extern struct ftrace_hash __rcu *ftrace_graph_hash; +extern struct ftrace_hash __rcu *ftrace_graph_notrace_hash; static inline int ftrace_graph_addr(struct ftrace_graph_ent *trace) { unsigned long addr = trace->func; int ret = 0; + struct ftrace_hash *hash; preempt_disable_notrace(); - if (ftrace_hash_empty(ftrace_graph_hash)) { + /* + * Have to open code "rcu_dereference_sched()" because the + * function graph tracer can be called when RCU is not + * "watching". + * Protected with schedule_on_each_cpu(ftrace_sync) + */ + hash = rcu_dereference_protected(ftrace_graph_hash, !preemptible()); + + if (ftrace_hash_empty(hash)) { ret = 1; goto out; } - if (ftrace_lookup_ip(ftrace_graph_hash, addr)) { + if (ftrace_lookup_ip(hash, addr)) { /* * This needs to be cleared on the return functions @@ -1001,10 +1023,20 @@ static inline void ftrace_graph_addr_finish(struct ftrace_graph_ret *trace) static inline int ftrace_graph_notrace_addr(unsigned long addr) { int ret = 0; + struct ftrace_hash *notrace_hash; preempt_disable_notrace(); - if (ftrace_lookup_ip(ftrace_graph_notrace_hash, addr)) + /* + * Have to open code "rcu_dereference_sched()" because the + * function graph tracer can be called when RCU is not + * "watching". + * Protected with schedule_on_each_cpu(ftrace_sync) + */ + notrace_hash = rcu_dereference_protected(ftrace_graph_notrace_hash, + !preemptible()); + + if (ftrace_lookup_ip(notrace_hash, addr)) ret = 1; preempt_enable_notrace(); @@ -1057,7 +1089,7 @@ struct ftrace_func_command { extern bool ftrace_filter_param __initdata; static inline int ftrace_trace_task(struct trace_array *tr) { - return !this_cpu_read(tr->trace_buffer.data->ftrace_ignore_pid); + return !this_cpu_read(tr->array_buffer.data->ftrace_ignore_pid); } extern int ftrace_is_dead(void); int ftrace_create_function_files(struct trace_array *tr, @@ -1145,6 +1177,11 @@ int unregister_ftrace_command(struct ftrace_func_command *cmd); void ftrace_create_filter_files(struct ftrace_ops *ops, struct dentry *parent); void ftrace_destroy_filter_files(struct ftrace_ops *ops); + +extern int ftrace_set_filter(struct ftrace_ops *ops, unsigned char *buf, + int len, int reset); +extern int ftrace_set_notrace(struct ftrace_ops *ops, unsigned char *buf, + int len, int reset); #else struct ftrace_func_command; @@ -1367,17 +1404,17 @@ struct trace_subsystem_dir { }; extern int call_filter_check_discard(struct trace_event_call *call, void *rec, - struct ring_buffer *buffer, + struct trace_buffer *buffer, struct ring_buffer_event *event); void trace_buffer_unlock_commit_regs(struct trace_array *tr, - struct ring_buffer *buffer, + struct trace_buffer *buffer, struct ring_buffer_event *event, unsigned long flags, int pc, struct pt_regs *regs); static inline void trace_buffer_unlock_commit(struct trace_array *tr, - struct ring_buffer *buffer, + struct trace_buffer *buffer, struct ring_buffer_event *event, unsigned long flags, int pc) { @@ -1390,7 +1427,7 @@ void trace_buffered_event_disable(void); void trace_buffered_event_enable(void); static inline void -__trace_event_discard_commit(struct ring_buffer *buffer, +__trace_event_discard_commit(struct trace_buffer *buffer, struct ring_buffer_event *event) { if (this_cpu_read(trace_buffered_event) == event) { @@ -1416,7 +1453,7 @@ __trace_event_discard_commit(struct ring_buffer *buffer, */ static inline bool __event_trigger_test_discard(struct trace_event_file *file, - struct ring_buffer *buffer, + struct trace_buffer *buffer, struct ring_buffer_event *event, void *entry, enum event_trigger_type *tt) @@ -1451,7 +1488,7 @@ __event_trigger_test_discard(struct trace_event_file *file, */ static inline void event_trigger_unlock_commit(struct trace_event_file *file, - struct ring_buffer *buffer, + struct trace_buffer *buffer, struct ring_buffer_event *event, void *entry, unsigned long irq_flags, int pc) { @@ -1482,7 +1519,7 @@ event_trigger_unlock_commit(struct trace_event_file *file, */ static inline void event_trigger_unlock_commit_regs(struct trace_event_file *file, - struct ring_buffer *buffer, + struct trace_buffer *buffer, struct ring_buffer_event *event, void *entry, unsigned long irq_flags, int pc, struct pt_regs *regs) @@ -1893,6 +1930,15 @@ void trace_printk_start_comm(void); int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set); int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled); +/* Used from boot time tracer */ +extern int trace_set_options(struct trace_array *tr, char *option); +extern int tracing_set_tracer(struct trace_array *tr, const char *buf); +extern ssize_t tracing_resize_ring_buffer(struct trace_array *tr, + unsigned long size, int cpu_id); +extern int tracing_set_cpumask(struct trace_array *tr, + cpumask_var_t tracing_cpumask_new); + + #define MAX_EVENT_NAME_LEN 64 extern int trace_run_command(const char *buf, int (*createfn)(int, char**)); @@ -1917,17 +1963,15 @@ extern void tracing_log_err(struct trace_array *tr, #define internal_trace_puts(str) __trace_puts(_THIS_IP_, str, strlen(str)) #undef FTRACE_ENTRY -#define FTRACE_ENTRY(call, struct_name, id, tstruct, print, filter) \ +#define FTRACE_ENTRY(call, struct_name, id, tstruct, print) \ extern struct trace_event_call \ __aligned(4) event_##call; #undef FTRACE_ENTRY_DUP -#define FTRACE_ENTRY_DUP(call, struct_name, id, tstruct, print, filter) \ - FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print), \ - filter) +#define FTRACE_ENTRY_DUP(call, struct_name, id, tstruct, print) \ + FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print)) #undef FTRACE_ENTRY_PACKED -#define FTRACE_ENTRY_PACKED(call, struct_name, id, tstruct, print, filter) \ - FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print), \ - filter) +#define FTRACE_ENTRY_PACKED(call, struct_name, id, tstruct, print) \ + FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print)) #include "trace_entries.h" @@ -1952,6 +1996,9 @@ static inline const char *get_syscall_name(int syscall) #ifdef CONFIG_EVENT_TRACING void trace_event_init(void); void trace_event_eval_update(struct trace_eval_map **map, int len); +/* Used from boot time tracer */ +extern int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set); +extern int trigger_process_regex(struct trace_event_file *file, char *buff); #else static inline void __init trace_event_init(void) { } static inline void trace_event_eval_update(struct trace_eval_map **map, int len) { } diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c new file mode 100644 index 000000000000..06d7feb5255f --- /dev/null +++ b/kernel/trace/trace_boot.c @@ -0,0 +1,334 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * trace_boot.c + * Tracing kernel boot-time + */ + +#define pr_fmt(fmt) "trace_boot: " fmt + +#include <linux/bootconfig.h> +#include <linux/cpumask.h> +#include <linux/ftrace.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/mutex.h> +#include <linux/string.h> +#include <linux/slab.h> +#include <linux/trace.h> +#include <linux/trace_events.h> + +#include "trace.h" + +#define MAX_BUF_LEN 256 + +static void __init +trace_boot_set_instance_options(struct trace_array *tr, struct xbc_node *node) +{ + struct xbc_node *anode; + const char *p; + char buf[MAX_BUF_LEN]; + unsigned long v = 0; + + /* Common ftrace options */ + xbc_node_for_each_array_value(node, "options", anode, p) { + if (strlcpy(buf, p, ARRAY_SIZE(buf)) >= ARRAY_SIZE(buf)) { + pr_err("String is too long: %s\n", p); + continue; + } + + if (trace_set_options(tr, buf) < 0) + pr_err("Failed to set option: %s\n", buf); + } + + p = xbc_node_find_value(node, "trace_clock", NULL); + if (p && *p != '\0') { + if (tracing_set_clock(tr, p) < 0) + pr_err("Failed to set trace clock: %s\n", p); + } + + p = xbc_node_find_value(node, "buffer_size", NULL); + if (p && *p != '\0') { + v = memparse(p, NULL); + if (v < PAGE_SIZE) + pr_err("Buffer size is too small: %s\n", p); + if (tracing_resize_ring_buffer(tr, v, RING_BUFFER_ALL_CPUS) < 0) + pr_err("Failed to resize trace buffer to %s\n", p); + } + + p = xbc_node_find_value(node, "cpumask", NULL); + if (p && *p != '\0') { + cpumask_var_t new_mask; + + if (alloc_cpumask_var(&new_mask, GFP_KERNEL)) { + if (cpumask_parse(p, new_mask) < 0 || + tracing_set_cpumask(tr, new_mask) < 0) + pr_err("Failed to set new CPU mask %s\n", p); + free_cpumask_var(new_mask); + } + } +} + +#ifdef CONFIG_EVENT_TRACING +static void __init +trace_boot_enable_events(struct trace_array *tr, struct xbc_node *node) +{ + struct xbc_node *anode; + char buf[MAX_BUF_LEN]; + const char *p; + + xbc_node_for_each_array_value(node, "events", anode, p) { + if (strlcpy(buf, p, ARRAY_SIZE(buf)) >= ARRAY_SIZE(buf)) { + pr_err("String is too long: %s\n", p); + continue; + } + + if (ftrace_set_clr_event(tr, buf, 1) < 0) + pr_err("Failed to enable event: %s\n", p); + } +} + +#ifdef CONFIG_KPROBE_EVENTS +static int __init +trace_boot_add_kprobe_event(struct xbc_node *node, const char *event) +{ + struct dynevent_cmd cmd; + struct xbc_node *anode; + char buf[MAX_BUF_LEN]; + const char *val; + int ret; + + kprobe_event_cmd_init(&cmd, buf, MAX_BUF_LEN); + + ret = kprobe_event_gen_cmd_start(&cmd, event, NULL); + if (ret) + return ret; + + xbc_node_for_each_array_value(node, "probes", anode, val) { + ret = kprobe_event_add_field(&cmd, val); + if (ret) + return ret; + } + + ret = kprobe_event_gen_cmd_end(&cmd); + if (ret) + pr_err("Failed to add probe: %s\n", buf); + + return ret; +} +#else +static inline int __init +trace_boot_add_kprobe_event(struct xbc_node *node, const char *event) +{ + pr_err("Kprobe event is not supported.\n"); + return -ENOTSUPP; +} +#endif + +#ifdef CONFIG_HIST_TRIGGERS +static int __init +trace_boot_add_synth_event(struct xbc_node *node, const char *event) +{ + struct dynevent_cmd cmd; + struct xbc_node *anode; + char buf[MAX_BUF_LEN]; + const char *p; + int ret; + + synth_event_cmd_init(&cmd, buf, MAX_BUF_LEN); + + ret = synth_event_gen_cmd_start(&cmd, event, NULL); + if (ret) + return ret; + + xbc_node_for_each_array_value(node, "fields", anode, p) { + ret = synth_event_add_field_str(&cmd, p); + if (ret) + return ret; + } + + ret = synth_event_gen_cmd_end(&cmd); + if (ret < 0) + pr_err("Failed to add synthetic event: %s\n", buf); + + return ret; +} +#else +static inline int __init +trace_boot_add_synth_event(struct xbc_node *node, const char *event) +{ + pr_err("Synthetic event is not supported.\n"); + return -ENOTSUPP; +} +#endif + +static void __init +trace_boot_init_one_event(struct trace_array *tr, struct xbc_node *gnode, + struct xbc_node *enode) +{ + struct trace_event_file *file; + struct xbc_node *anode; + char buf[MAX_BUF_LEN]; + const char *p, *group, *event; + + group = xbc_node_get_data(gnode); + event = xbc_node_get_data(enode); + + if (!strcmp(group, "kprobes")) + if (trace_boot_add_kprobe_event(enode, event) < 0) + return; + if (!strcmp(group, "synthetic")) + if (trace_boot_add_synth_event(enode, event) < 0) + return; + + mutex_lock(&event_mutex); + file = find_event_file(tr, group, event); + if (!file) { + pr_err("Failed to find event: %s:%s\n", group, event); + goto out; + } + + p = xbc_node_find_value(enode, "filter", NULL); + if (p && *p != '\0') { + if (strlcpy(buf, p, ARRAY_SIZE(buf)) >= ARRAY_SIZE(buf)) + pr_err("filter string is too long: %s\n", p); + else if (apply_event_filter(file, buf) < 0) + pr_err("Failed to apply filter: %s\n", buf); + } + + xbc_node_for_each_array_value(enode, "actions", anode, p) { + if (strlcpy(buf, p, ARRAY_SIZE(buf)) >= ARRAY_SIZE(buf)) + pr_err("action string is too long: %s\n", p); + else if (trigger_process_regex(file, buf) < 0) + pr_err("Failed to apply an action: %s\n", buf); + } + + if (xbc_node_find_value(enode, "enable", NULL)) { + if (trace_event_enable_disable(file, 1, 0) < 0) + pr_err("Failed to enable event node: %s:%s\n", + group, event); + } +out: + mutex_unlock(&event_mutex); +} + +static void __init +trace_boot_init_events(struct trace_array *tr, struct xbc_node *node) +{ + struct xbc_node *gnode, *enode; + + node = xbc_node_find_child(node, "event"); + if (!node) + return; + /* per-event key starts with "event.GROUP.EVENT" */ + xbc_node_for_each_child(node, gnode) + xbc_node_for_each_child(gnode, enode) + trace_boot_init_one_event(tr, gnode, enode); +} +#else +#define trace_boot_enable_events(tr, node) do {} while (0) +#define trace_boot_init_events(tr, node) do {} while (0) +#endif + +#ifdef CONFIG_DYNAMIC_FTRACE +static void __init +trace_boot_set_ftrace_filter(struct trace_array *tr, struct xbc_node *node) +{ + struct xbc_node *anode; + const char *p; + char *q; + + xbc_node_for_each_array_value(node, "ftrace.filters", anode, p) { + q = kstrdup(p, GFP_KERNEL); + if (!q) + return; + if (ftrace_set_filter(tr->ops, q, strlen(q), 0) < 0) + pr_err("Failed to add %s to ftrace filter\n", p); + else + ftrace_filter_param = true; + kfree(q); + } + xbc_node_for_each_array_value(node, "ftrace.notraces", anode, p) { + q = kstrdup(p, GFP_KERNEL); + if (!q) + return; + if (ftrace_set_notrace(tr->ops, q, strlen(q), 0) < 0) + pr_err("Failed to add %s to ftrace filter\n", p); + else + ftrace_filter_param = true; + kfree(q); + } +} +#else +#define trace_boot_set_ftrace_filter(tr, node) do {} while (0) +#endif + +static void __init +trace_boot_enable_tracer(struct trace_array *tr, struct xbc_node *node) +{ + const char *p; + + trace_boot_set_ftrace_filter(tr, node); + + p = xbc_node_find_value(node, "tracer", NULL); + if (p && *p != '\0') { + if (tracing_set_tracer(tr, p) < 0) + pr_err("Failed to set given tracer: %s\n", p); + } +} + +static void __init +trace_boot_init_one_instance(struct trace_array *tr, struct xbc_node *node) +{ + trace_boot_set_instance_options(tr, node); + trace_boot_init_events(tr, node); + trace_boot_enable_events(tr, node); + trace_boot_enable_tracer(tr, node); +} + +static void __init +trace_boot_init_instances(struct xbc_node *node) +{ + struct xbc_node *inode; + struct trace_array *tr; + const char *p; + + node = xbc_node_find_child(node, "instance"); + if (!node) + return; + + xbc_node_for_each_child(node, inode) { + p = xbc_node_get_data(inode); + if (!p || *p == '\0') + continue; + + tr = trace_array_get_by_name(p); + if (!tr) { + pr_err("Failed to get trace instance %s\n", p); + continue; + } + trace_boot_init_one_instance(tr, inode); + trace_array_put(tr); + } +} + +static int __init trace_boot_init(void) +{ + struct xbc_node *trace_node; + struct trace_array *tr; + + trace_node = xbc_find_node("ftrace"); + if (!trace_node) + return 0; + + tr = top_trace_array(); + if (!tr) + return 0; + + /* Global trace array is also one instance */ + trace_boot_init_one_instance(tr, trace_node); + trace_boot_init_instances(trace_node); + + return 0; +} + +fs_initcall(trace_boot_init); diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c index 88e158d27965..eff099123aa2 100644 --- a/kernel/trace/trace_branch.c +++ b/kernel/trace/trace_branch.c @@ -32,10 +32,10 @@ probe_likely_condition(struct ftrace_likely_data *f, int val, int expect) { struct trace_event_call *call = &event_branch; struct trace_array *tr = branch_tracer; + struct trace_buffer *buffer; struct trace_array_cpu *data; struct ring_buffer_event *event; struct trace_branch *entry; - struct ring_buffer *buffer; unsigned long flags; int pc; const char *p; @@ -55,12 +55,12 @@ probe_likely_condition(struct ftrace_likely_data *f, int val, int expect) raw_local_irq_save(flags); current->trace_recursion |= TRACE_BRANCH_BIT; - data = this_cpu_ptr(tr->trace_buffer.data); + data = this_cpu_ptr(tr->array_buffer.data); if (atomic_read(&data->disabled)) goto out; pc = preempt_count(); - buffer = tr->trace_buffer.buffer; + buffer = tr->array_buffer.buffer; event = trace_buffer_lock_reserve(buffer, TRACE_BRANCH, sizeof(*entry), flags, pc); if (!event) diff --git a/kernel/trace/trace_dynevent.c b/kernel/trace/trace_dynevent.c index 89779eb84a07..9f2e8520b748 100644 --- a/kernel/trace/trace_dynevent.c +++ b/kernel/trace/trace_dynevent.c @@ -223,3 +223,215 @@ static __init int init_dynamic_event(void) return 0; } fs_initcall(init_dynamic_event); + +/** + * dynevent_arg_add - Add an arg to a dynevent_cmd + * @cmd: A pointer to the dynevent_cmd struct representing the new event cmd + * @arg: The argument to append to the current cmd + * @check_arg: An (optional) pointer to a function checking arg sanity + * + * Append an argument to a dynevent_cmd. The argument string will be + * appended to the current cmd string, followed by a separator, if + * applicable. Before the argument is added, the @check_arg function, + * if present, will be used to check the sanity of the current arg + * string. + * + * The cmd string and separator should be set using the + * dynevent_arg_init() before any arguments are added using this + * function. + * + * Return: 0 if successful, error otherwise. + */ +int dynevent_arg_add(struct dynevent_cmd *cmd, + struct dynevent_arg *arg, + dynevent_check_arg_fn_t check_arg) +{ + int ret = 0; + + if (check_arg) { + ret = check_arg(arg); + if (ret) + return ret; + } + + ret = seq_buf_printf(&cmd->seq, " %s%c", arg->str, arg->separator); + if (ret) { + pr_err("String is too long: %s%c\n", arg->str, arg->separator); + return -E2BIG; + } + + return ret; +} + +/** + * dynevent_arg_pair_add - Add an arg pair to a dynevent_cmd + * @cmd: A pointer to the dynevent_cmd struct representing the new event cmd + * @arg_pair: The argument pair to append to the current cmd + * @check_arg: An (optional) pointer to a function checking arg sanity + * + * Append an argument pair to a dynevent_cmd. An argument pair + * consists of a left-hand-side argument and a right-hand-side + * argument separated by an operator, which can be whitespace, all + * followed by a separator, if applicable. This can be used to add + * arguments of the form 'type variable_name;' or 'x+y'. + * + * The lhs argument string will be appended to the current cmd string, + * followed by an operator, if applicable, followd by the rhs string, + * followed finally by a separator, if applicable. Before the + * argument is added, the @check_arg function, if present, will be + * used to check the sanity of the current arg strings. + * + * The cmd strings, operator, and separator should be set using the + * dynevent_arg_pair_init() before any arguments are added using this + * function. + * + * Return: 0 if successful, error otherwise. + */ +int dynevent_arg_pair_add(struct dynevent_cmd *cmd, + struct dynevent_arg_pair *arg_pair, + dynevent_check_arg_fn_t check_arg) +{ + int ret = 0; + + if (check_arg) { + ret = check_arg(arg_pair); + if (ret) + return ret; + } + + ret = seq_buf_printf(&cmd->seq, " %s%c%s%c", arg_pair->lhs, + arg_pair->operator, arg_pair->rhs, + arg_pair->separator); + if (ret) { + pr_err("field string is too long: %s%c%s%c\n", arg_pair->lhs, + arg_pair->operator, arg_pair->rhs, + arg_pair->separator); + return -E2BIG; + } + + return ret; +} + +/** + * dynevent_str_add - Add a string to a dynevent_cmd + * @cmd: A pointer to the dynevent_cmd struct representing the new event cmd + * @str: The string to append to the current cmd + * + * Append a string to a dynevent_cmd. The string will be appended to + * the current cmd string as-is, with nothing prepended or appended. + * + * Return: 0 if successful, error otherwise. + */ +int dynevent_str_add(struct dynevent_cmd *cmd, const char *str) +{ + int ret = 0; + + ret = seq_buf_puts(&cmd->seq, str); + if (ret) { + pr_err("String is too long: %s\n", str); + return -E2BIG; + } + + return ret; +} + +/** + * dynevent_cmd_init - Initialize a dynevent_cmd object + * @cmd: A pointer to the dynevent_cmd struct representing the cmd + * @buf: A pointer to the buffer to generate the command into + * @maxlen: The length of the buffer the command will be generated into + * @type: The type of the cmd, checked against further operations + * @run_command: The type-specific function that will actually run the command + * + * Initialize a dynevent_cmd. A dynevent_cmd is used to build up and + * run dynamic event creation commands, such as commands for creating + * synthetic and kprobe events. Before calling any of the functions + * used to build the command, a dynevent_cmd object should be + * instantiated and initialized using this function. + * + * The initialization sets things up by saving a pointer to the + * user-supplied buffer and its length via the @buf and @maxlen + * params, and by saving the cmd-specific @type and @run_command + * params which are used to check subsequent dynevent_cmd operations + * and actually run the command when complete. + */ +void dynevent_cmd_init(struct dynevent_cmd *cmd, char *buf, int maxlen, + enum dynevent_type type, + dynevent_create_fn_t run_command) +{ + memset(cmd, '\0', sizeof(*cmd)); + + seq_buf_init(&cmd->seq, buf, maxlen); + cmd->type = type; + cmd->run_command = run_command; +} + +/** + * dynevent_arg_init - Initialize a dynevent_arg object + * @arg: A pointer to the dynevent_arg struct representing the arg + * @separator: An (optional) separator, appended after adding the arg + * + * Initialize a dynevent_arg object. A dynevent_arg represents an + * object used to append single arguments to the current command + * string. After the arg string is successfully appended to the + * command string, the optional @separator is appended. If no + * separator was specified when initializing the arg, a space will be + * appended. + */ +void dynevent_arg_init(struct dynevent_arg *arg, + char separator) +{ + memset(arg, '\0', sizeof(*arg)); + + if (!separator) + separator = ' '; + arg->separator = separator; +} + +/** + * dynevent_arg_pair_init - Initialize a dynevent_arg_pair object + * @arg_pair: A pointer to the dynevent_arg_pair struct representing the arg + * @operator: An (optional) operator, appended after adding the first arg + * @separator: An (optional) separator, appended after adding the second arg + * + * Initialize a dynevent_arg_pair object. A dynevent_arg_pair + * represents an object used to append argument pairs such as 'type + * variable_name;' or 'x+y' to the current command string. An + * argument pair consists of a left-hand-side argument and a + * right-hand-side argument separated by an operator, which can be + * whitespace, all followed by a separator, if applicable. After the + * first arg string is successfully appended to the command string, + * the optional @operator is appended, followed by the second arg and + * and optional @separator. If no separator was specified when + * initializing the arg, a space will be appended. + */ +void dynevent_arg_pair_init(struct dynevent_arg_pair *arg_pair, + char operator, char separator) +{ + memset(arg_pair, '\0', sizeof(*arg_pair)); + + if (!operator) + operator = ' '; + arg_pair->operator = operator; + + if (!separator) + separator = ' '; + arg_pair->separator = separator; +} + +/** + * dynevent_create - Create the dynamic event contained in dynevent_cmd + * @cmd: The dynevent_cmd object containing the dynamic event creation command + * + * Once a dynevent_cmd object has been successfully built up via the + * dynevent_cmd_init(), dynevent_arg_add() and dynevent_arg_pair_add() + * functions, this function runs the final command to actually create + * the event. + * + * Return: 0 if the event was successfully created, error otherwise. + */ +int dynevent_create(struct dynevent_cmd *cmd) +{ + return cmd->run_command(cmd); +} +EXPORT_SYMBOL_GPL(dynevent_create); diff --git a/kernel/trace/trace_dynevent.h b/kernel/trace/trace_dynevent.h index 46898138d2df..d6857a254ede 100644 --- a/kernel/trace/trace_dynevent.h +++ b/kernel/trace/trace_dynevent.h @@ -117,4 +117,36 @@ int dyn_event_release(int argc, char **argv, struct dyn_event_operations *type); #define for_each_dyn_event_safe(pos, n) \ list_for_each_entry_safe(pos, n, &dyn_event_list, list) +extern void dynevent_cmd_init(struct dynevent_cmd *cmd, char *buf, int maxlen, + enum dynevent_type type, + dynevent_create_fn_t run_command); + +typedef int (*dynevent_check_arg_fn_t)(void *data); + +struct dynevent_arg { + const char *str; + char separator; /* e.g. ';', ',', or nothing */ +}; + +extern void dynevent_arg_init(struct dynevent_arg *arg, + char separator); +extern int dynevent_arg_add(struct dynevent_cmd *cmd, + struct dynevent_arg *arg, + dynevent_check_arg_fn_t check_arg); + +struct dynevent_arg_pair { + const char *lhs; + const char *rhs; + char operator; /* e.g. '=' or nothing */ + char separator; /* e.g. ';', ',', or nothing */ +}; + +extern void dynevent_arg_pair_init(struct dynevent_arg_pair *arg_pair, + char operator, char separator); + +extern int dynevent_arg_pair_add(struct dynevent_cmd *cmd, + struct dynevent_arg_pair *arg_pair, + dynevent_check_arg_fn_t check_arg); +extern int dynevent_str_add(struct dynevent_cmd *cmd, const char *str); + #endif diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h index fc8e97328e54..f22746f3c132 100644 --- a/kernel/trace/trace_entries.h +++ b/kernel/trace/trace_entries.h @@ -61,15 +61,13 @@ FTRACE_ENTRY_REG(function, ftrace_entry, TRACE_FN, F_STRUCT( - __field( unsigned long, ip ) - __field( unsigned long, parent_ip ) + __field_fn( unsigned long, ip ) + __field_fn( unsigned long, parent_ip ) ), F_printk(" %ps <-- %ps", (void *)__entry->ip, (void *)__entry->parent_ip), - FILTER_TRACE_FN, - perf_ftrace_event_register ); @@ -84,9 +82,7 @@ FTRACE_ENTRY_PACKED(funcgraph_entry, ftrace_graph_ent_entry, __field_desc( int, graph_ent, depth ) ), - F_printk("--> %ps (%d)", (void *)__entry->func, __entry->depth), - - FILTER_OTHER + F_printk("--> %ps (%d)", (void *)__entry->func, __entry->depth) ); /* Function return entry */ @@ -97,18 +93,16 @@ FTRACE_ENTRY_PACKED(funcgraph_exit, ftrace_graph_ret_entry, F_STRUCT( __field_struct( struct ftrace_graph_ret, ret ) __field_desc( unsigned long, ret, func ) + __field_desc( unsigned long, ret, overrun ) __field_desc( unsigned long long, ret, calltime) __field_desc( unsigned long long, ret, rettime ) - __field_desc( unsigned long, ret, overrun ) __field_desc( int, ret, depth ) ), F_printk("<-- %ps (%d) (start: %llx end: %llx) over: %d", (void *)__entry->func, __entry->depth, __entry->calltime, __entry->rettime, - __entry->depth), - - FILTER_OTHER + __entry->depth) ); /* @@ -137,9 +131,7 @@ FTRACE_ENTRY(context_switch, ctx_switch_entry, F_printk("%u:%u:%u ==> %u:%u:%u [%03u]", __entry->prev_pid, __entry->prev_prio, __entry->prev_state, __entry->next_pid, __entry->next_prio, __entry->next_state, - __entry->next_cpu), - - FILTER_OTHER + __entry->next_cpu) ); /* @@ -157,9 +149,7 @@ FTRACE_ENTRY_DUP(wakeup, ctx_switch_entry, F_printk("%u:%u:%u ==+ %u:%u:%u [%03u]", __entry->prev_pid, __entry->prev_prio, __entry->prev_state, __entry->next_pid, __entry->next_prio, __entry->next_state, - __entry->next_cpu), - - FILTER_OTHER + __entry->next_cpu) ); /* @@ -174,7 +164,7 @@ FTRACE_ENTRY(kernel_stack, stack_entry, F_STRUCT( __field( int, size ) - __dynamic_array(unsigned long, caller ) + __array( unsigned long, caller, FTRACE_STACK_ENTRIES ) ), F_printk("\t=> %ps\n\t=> %ps\n\t=> %ps\n" @@ -183,9 +173,7 @@ FTRACE_ENTRY(kernel_stack, stack_entry, (void *)__entry->caller[0], (void *)__entry->caller[1], (void *)__entry->caller[2], (void *)__entry->caller[3], (void *)__entry->caller[4], (void *)__entry->caller[5], - (void *)__entry->caller[6], (void *)__entry->caller[7]), - - FILTER_OTHER + (void *)__entry->caller[6], (void *)__entry->caller[7]) ); FTRACE_ENTRY(user_stack, userstack_entry, @@ -203,9 +191,7 @@ FTRACE_ENTRY(user_stack, userstack_entry, (void *)__entry->caller[0], (void *)__entry->caller[1], (void *)__entry->caller[2], (void *)__entry->caller[3], (void *)__entry->caller[4], (void *)__entry->caller[5], - (void *)__entry->caller[6], (void *)__entry->caller[7]), - - FILTER_OTHER + (void *)__entry->caller[6], (void *)__entry->caller[7]) ); /* @@ -222,9 +208,7 @@ FTRACE_ENTRY(bprint, bprint_entry, ), F_printk("%ps: %s", - (void *)__entry->ip, __entry->fmt), - - FILTER_OTHER + (void *)__entry->ip, __entry->fmt) ); FTRACE_ENTRY_REG(print, print_entry, @@ -239,8 +223,6 @@ FTRACE_ENTRY_REG(print, print_entry, F_printk("%ps: %s", (void *)__entry->ip, __entry->buf), - FILTER_OTHER, - ftrace_event_register ); @@ -254,9 +236,7 @@ FTRACE_ENTRY(raw_data, raw_data_entry, ), F_printk("id:%04x %08x", - __entry->id, (int)__entry->buf[0]), - - FILTER_OTHER + __entry->id, (int)__entry->buf[0]) ); FTRACE_ENTRY(bputs, bputs_entry, @@ -269,9 +249,7 @@ FTRACE_ENTRY(bputs, bputs_entry, ), F_printk("%ps: %s", - (void *)__entry->ip, __entry->str), - - FILTER_OTHER + (void *)__entry->ip, __entry->str) ); FTRACE_ENTRY(mmiotrace_rw, trace_mmiotrace_rw, @@ -283,16 +261,14 @@ FTRACE_ENTRY(mmiotrace_rw, trace_mmiotrace_rw, __field_desc( resource_size_t, rw, phys ) __field_desc( unsigned long, rw, value ) __field_desc( unsigned long, rw, pc ) - __field_desc( int, rw, map_id ) + __field_desc( int, rw, map_id ) __field_desc( unsigned char, rw, opcode ) __field_desc( unsigned char, rw, width ) ), F_printk("%lx %lx %lx %d %x %x", (unsigned long)__entry->phys, __entry->value, __entry->pc, - __entry->map_id, __entry->opcode, __entry->width), - - FILTER_OTHER + __entry->map_id, __entry->opcode, __entry->width) ); FTRACE_ENTRY(mmiotrace_map, trace_mmiotrace_map, @@ -304,15 +280,13 @@ FTRACE_ENTRY(mmiotrace_map, trace_mmiotrace_map, __field_desc( resource_size_t, map, phys ) __field_desc( unsigned long, map, virt ) __field_desc( unsigned long, map, len ) - __field_desc( int, map, map_id ) + __field_desc( int, map, map_id ) __field_desc( unsigned char, map, opcode ) ), F_printk("%lx %lx %lx %d %x", (unsigned long)__entry->phys, __entry->virt, __entry->len, - __entry->map_id, __entry->opcode), - - FILTER_OTHER + __entry->map_id, __entry->opcode) ); @@ -334,9 +308,7 @@ FTRACE_ENTRY(branch, trace_branch, F_printk("%u:%s:%s (%u)%s", __entry->line, __entry->func, __entry->file, __entry->correct, - __entry->constant ? " CONSTANT" : ""), - - FILTER_OTHER + __entry->constant ? " CONSTANT" : "") ); @@ -362,7 +334,5 @@ FTRACE_ENTRY(hwlat, hwlat_entry, __entry->duration, __entry->outer_duration, __entry->nmi_total_ts, - __entry->nmi_count), - - FILTER_OTHER + __entry->nmi_count) ); diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index a5b614cc3887..f38234ecea18 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -24,6 +24,7 @@ #include <linux/delay.h> #include <trace/events/sched.h> +#include <trace/syscall.h> #include <asm/setup.h> @@ -237,7 +238,7 @@ bool trace_event_ignore_this_pid(struct trace_event_file *trace_file) if (!pid_list) return false; - data = this_cpu_ptr(tr->trace_buffer.data); + data = this_cpu_ptr(tr->array_buffer.data); return data->ignore_pid; } @@ -272,6 +273,7 @@ void *trace_event_buffer_reserve(struct trace_event_buffer *fbuffer, if (!fbuffer->event) return NULL; + fbuffer->regs = NULL; fbuffer->entry = ring_buffer_event_data(fbuffer->event); return fbuffer->entry; } @@ -546,7 +548,7 @@ event_filter_pid_sched_switch_probe_pre(void *data, bool preempt, pid_list = rcu_dereference_sched(tr->filtered_pids); - this_cpu_write(tr->trace_buffer.data->ignore_pid, + this_cpu_write(tr->array_buffer.data->ignore_pid, trace_ignore_this_task(pid_list, prev) && trace_ignore_this_task(pid_list, next)); } @@ -560,7 +562,7 @@ event_filter_pid_sched_switch_probe_post(void *data, bool preempt, pid_list = rcu_dereference_sched(tr->filtered_pids); - this_cpu_write(tr->trace_buffer.data->ignore_pid, + this_cpu_write(tr->array_buffer.data->ignore_pid, trace_ignore_this_task(pid_list, next)); } @@ -571,12 +573,12 @@ event_filter_pid_sched_wakeup_probe_pre(void *data, struct task_struct *task) struct trace_pid_list *pid_list; /* Nothing to do if we are already tracing */ - if (!this_cpu_read(tr->trace_buffer.data->ignore_pid)) + if (!this_cpu_read(tr->array_buffer.data->ignore_pid)) return; pid_list = rcu_dereference_sched(tr->filtered_pids); - this_cpu_write(tr->trace_buffer.data->ignore_pid, + this_cpu_write(tr->array_buffer.data->ignore_pid, trace_ignore_this_task(pid_list, task)); } @@ -587,13 +589,13 @@ event_filter_pid_sched_wakeup_probe_post(void *data, struct task_struct *task) struct trace_pid_list *pid_list; /* Nothing to do if we are not tracing */ - if (this_cpu_read(tr->trace_buffer.data->ignore_pid)) + if (this_cpu_read(tr->array_buffer.data->ignore_pid)) return; pid_list = rcu_dereference_sched(tr->filtered_pids); /* Set tracing if current is enabled */ - this_cpu_write(tr->trace_buffer.data->ignore_pid, + this_cpu_write(tr->array_buffer.data->ignore_pid, trace_ignore_this_task(pid_list, current)); } @@ -625,7 +627,7 @@ static void __ftrace_clear_event_pids(struct trace_array *tr) } for_each_possible_cpu(cpu) - per_cpu_ptr(tr->trace_buffer.data, cpu)->ignore_pid = false; + per_cpu_ptr(tr->array_buffer.data, cpu)->ignore_pid = false; rcu_assign_pointer(tr->filtered_pids, NULL); @@ -697,7 +699,7 @@ static void remove_subsystem(struct trace_subsystem_dir *dir) return; if (!--dir->nr_events) { - tracefs_remove_recursive(dir->entry); + tracefs_remove(dir->entry); list_del(&dir->list); __put_system_dir(dir); } @@ -716,7 +718,7 @@ static void remove_event_file_dir(struct trace_event_file *file) } spin_unlock(&dir->d_lock); - tracefs_remove_recursive(dir); + tracefs_remove(dir); } list_del(&file->list); @@ -1594,7 +1596,7 @@ static void ignore_task_cpu(void *data) pid_list = rcu_dereference_protected(tr->filtered_pids, mutex_is_locked(&event_mutex)); - this_cpu_write(tr->trace_buffer.data->ignore_pid, + this_cpu_write(tr->array_buffer.data->ignore_pid, trace_ignore_this_task(pid_list, current)); } @@ -2017,7 +2019,24 @@ event_create_dir(struct dentry *parent, struct trace_event_file *file) */ head = trace_get_fields(call); if (list_empty(head)) { - ret = call->class->define_fields(call); + struct trace_event_fields *field = call->class->fields_array; + unsigned int offset = sizeof(struct trace_entry); + + for (; field->type; field++) { + if (field->type == TRACE_FUNCTION_TYPE) { + ret = field->define_fields(call); + break; + } + + offset = ALIGN(offset, field->align); + ret = trace_define_field(call, field->type, field->name, + offset, field->size, + field->is_signed, field->filter_type); + if (ret) + break; + + offset += field->size; + } if (ret < 0) { pr_warn("Could not initialize trace point events/%s\n", name); @@ -2535,6 +2554,91 @@ find_event_file(struct trace_array *tr, const char *system, const char *event) return file; } +/** + * trace_get_event_file - Find and return a trace event file + * @instance: The name of the trace instance containing the event + * @system: The name of the system containing the event + * @event: The name of the event + * + * Return a trace event file given the trace instance name, trace + * system, and trace event name. If the instance name is NULL, it + * refers to the top-level trace array. + * + * This function will look it up and return it if found, after calling + * trace_array_get() to prevent the instance from going away, and + * increment the event's module refcount to prevent it from being + * removed. + * + * To release the file, call trace_put_event_file(), which will call + * trace_array_put() and decrement the event's module refcount. + * + * Return: The trace event on success, ERR_PTR otherwise. + */ +struct trace_event_file *trace_get_event_file(const char *instance, + const char *system, + const char *event) +{ + struct trace_array *tr = top_trace_array(); + struct trace_event_file *file = NULL; + int ret = -EINVAL; + + if (instance) { + tr = trace_array_find_get(instance); + if (!tr) + return ERR_PTR(-ENOENT); + } else { + ret = trace_array_get(tr); + if (ret) + return ERR_PTR(ret); + } + + mutex_lock(&event_mutex); + + file = find_event_file(tr, system, event); + if (!file) { + trace_array_put(tr); + ret = -EINVAL; + goto out; + } + + /* Don't let event modules unload while in use */ + ret = try_module_get(file->event_call->mod); + if (!ret) { + trace_array_put(tr); + ret = -EBUSY; + goto out; + } + + ret = 0; + out: + mutex_unlock(&event_mutex); + + if (ret) + file = ERR_PTR(ret); + + return file; +} +EXPORT_SYMBOL_GPL(trace_get_event_file); + +/** + * trace_put_event_file - Release a file from trace_get_event_file() + * @file: The trace event file + * + * If a file was retrieved using trace_get_event_file(), this should + * be called when it's no longer needed. It will cancel the previous + * trace_array_get() called by that function, and decrement the + * event's module refcount. + */ +void trace_put_event_file(struct trace_event_file *file) +{ + mutex_lock(&event_mutex); + module_put(file->event_call->mod); + mutex_unlock(&event_mutex); + + trace_array_put(file->tr); +} +EXPORT_SYMBOL_GPL(trace_put_event_file); + #ifdef CONFIG_DYNAMIC_FTRACE /* Avoid typos */ @@ -3064,7 +3168,7 @@ int event_trace_del_tracer(struct trace_array *tr) down_write(&trace_event_sem); __trace_remove_event_dirs(tr); - tracefs_remove_recursive(tr->event_dir); + tracefs_remove(tr->event_dir); up_write(&trace_event_sem); tr->event_dir = NULL; @@ -3391,8 +3495,8 @@ static void __init function_test_events_call(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct pt_regs *pt_regs) { + struct trace_buffer *buffer; struct ring_buffer_event *event; - struct ring_buffer *buffer; struct ftrace_entry *entry; unsigned long flags; long disabled; diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index f62de5f43e79..e7ce7cdac62f 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -66,7 +66,12 @@ C(INVALID_SUBSYS_EVENT, "Invalid subsystem or event name"), \ C(INVALID_REF_KEY, "Using variable references in keys not supported"), \ C(VAR_NOT_FOUND, "Couldn't find variable"), \ - C(FIELD_NOT_FOUND, "Couldn't find field"), + C(FIELD_NOT_FOUND, "Couldn't find field"), \ + C(EMPTY_ASSIGNMENT, "Empty assignment"), \ + C(INVALID_SORT_MODIFIER,"Invalid sort modifier"), \ + C(EMPTY_SORT_FIELD, "Empty sort field"), \ + C(TOO_MANY_SORT_FIELDS, "Too many sort fields (Max = 2)"), \ + C(INVALID_SORT_FIELD, "Sort field must be a key or a val"), #undef C #define C(a, b) HIST_ERR_##a @@ -116,6 +121,7 @@ struct hist_field { struct ftrace_event_field *field; unsigned long flags; hist_field_fn_t fn; + unsigned int ref; unsigned int size; unsigned int offset; unsigned int is_signed; @@ -374,7 +380,7 @@ struct hist_trigger_data { unsigned int n_save_var_str; }; -static int synth_event_create(int argc, const char **argv); +static int create_synth_event(int argc, const char **argv); static int synth_event_show(struct seq_file *m, struct dyn_event *ev); static int synth_event_release(struct dyn_event *ev); static bool synth_event_is_busy(struct dyn_event *ev); @@ -382,7 +388,7 @@ static bool synth_event_match(const char *system, const char *event, int argc, const char **argv, struct dyn_event *ev); static struct dyn_event_operations synth_event_ops = { - .create = synth_event_create, + .create = create_synth_event, .show = synth_event_show, .is_busy = synth_event_is_busy, .free = synth_event_release, @@ -393,6 +399,7 @@ struct synth_field { char *type; char *name; size_t size; + unsigned int offset; bool is_signed; bool is_string; }; @@ -407,6 +414,7 @@ struct synth_event { struct trace_event_class class; struct trace_event_call call; struct tracepoint *tp; + struct module *mod; }; static bool is_synth_event(struct dyn_event *ev) @@ -469,11 +477,12 @@ struct action_data { * When a histogram trigger is hit, the values of any * references to variables, including variables being passed * as parameters to synthetic events, are collected into a - * var_ref_vals array. This var_ref_idx is the index of the - * first param in the array to be passed to the synthetic - * event invocation. + * var_ref_vals array. This var_ref_idx array is an array of + * indices into the var_ref_vals array, one for each synthetic + * event param, and is passed to the synthetic event + * invocation. */ - unsigned int var_ref_idx; + unsigned int var_ref_idx[TRACING_MAP_VARS_MAX]; struct synth_event *synth_event; bool use_trace_keyword; char *synth_event_name; @@ -607,7 +616,8 @@ static void last_cmd_set(struct trace_event_file *file, char *str) if (!str) return; - strncpy(last_cmd, str, MAX_FILTER_STR_VAL - 1); + strcpy(last_cmd, "hist:"); + strncat(last_cmd, str, MAX_FILTER_STR_VAL - 1 - sizeof("hist:")); if (file) { call = file->event_call; @@ -661,6 +671,8 @@ static int synth_event_define_fields(struct trace_event_call *call) if (ret) break; + event->fields[i]->offset = n_u64; + if (event->fields[i]->is_string) { offset += STR_VAR_LEN_MAX; n_u64 += STR_VAR_LEN_MAX / sizeof(u64); @@ -833,7 +845,7 @@ static enum print_line_t print_synth_event(struct trace_iterator *iter, fmt = synth_field_fmt(se->fields[i]->type); /* parameter types */ - if (tr->trace_flags & TRACE_ITER_VERBOSE) + if (tr && tr->trace_flags & TRACE_ITER_VERBOSE) trace_seq_printf(s, "%s ", fmt); snprintf(print_fmt, sizeof(print_fmt), "%%s=%s%%s", fmt); @@ -874,14 +886,14 @@ static struct trace_event_functions synth_event_funcs = { static notrace void trace_event_raw_event_synth(void *__data, u64 *var_ref_vals, - unsigned int var_ref_idx) + unsigned int *var_ref_idx) { struct trace_event_file *trace_file = __data; struct synth_trace_event *entry; struct trace_event_buffer fbuffer; - struct ring_buffer *buffer; + struct trace_buffer *buffer; struct synth_event *event; - unsigned int i, n_u64; + unsigned int i, n_u64, val_idx; int fields_size = 0; event = trace_file->event_call->data; @@ -895,7 +907,7 @@ static notrace void trace_event_raw_event_synth(void *__data, * Avoid ring buffer recursion detection, as this event * is being performed within another event. */ - buffer = trace_file->tr->trace_buffer.buffer; + buffer = trace_file->tr->array_buffer.buffer; ring_buffer_nest_start(buffer); entry = trace_event_buffer_reserve(&fbuffer, trace_file, @@ -904,15 +916,16 @@ static notrace void trace_event_raw_event_synth(void *__data, goto out; for (i = 0, n_u64 = 0; i < event->n_fields; i++) { + val_idx = var_ref_idx[i]; if (event->fields[i]->is_string) { - char *str_val = (char *)(long)var_ref_vals[var_ref_idx + i]; + char *str_val = (char *)(long)var_ref_vals[val_idx]; char *str_field = (char *)&entry->fields[n_u64]; strscpy(str_field, str_val, STR_VAR_LEN_MAX); n_u64 += STR_VAR_LEN_MAX / sizeof(u64); } else { struct synth_field *field = event->fields[i]; - u64 val = var_ref_vals[var_ref_idx + i]; + u64 val = var_ref_vals[val_idx]; switch (field->size) { case 1: @@ -1112,10 +1125,10 @@ static struct tracepoint *alloc_synth_tracepoint(char *name) } typedef void (*synth_probe_func_t) (void *__data, u64 *var_ref_vals, - unsigned int var_ref_idx); + unsigned int *var_ref_idx); static inline void trace_synth(struct synth_event *event, u64 *var_ref_vals, - unsigned int var_ref_idx) + unsigned int *var_ref_idx) { struct tracepoint *tp = event->tp; @@ -1154,6 +1167,12 @@ static struct synth_event *find_synth_event(const char *name) return NULL; } +static struct trace_event_fields synth_event_fields_array[] = { + { .type = TRACE_FUNCTION_TYPE, + .define_fields = synth_event_define_fields }, + {} +}; + static int register_synth_event(struct synth_event *event) { struct trace_event_call *call = &event->call; @@ -1175,7 +1194,7 @@ static int register_synth_event(struct synth_event *event) INIT_LIST_HEAD(&call->class->fields); call->event.funcs = &synth_event_funcs; - call->class->define_fields = synth_event_define_fields; + call->class->fields_array = synth_event_fields_array; ret = register_trace_event(&call->event); if (!ret) { @@ -1286,6 +1305,273 @@ struct hist_var_data { struct hist_trigger_data *hist_data; }; +static int synth_event_check_arg_fn(void *data) +{ + struct dynevent_arg_pair *arg_pair = data; + int size; + + size = synth_field_size((char *)arg_pair->lhs); + + return size ? 0 : -EINVAL; +} + +/** + * synth_event_add_field - Add a new field to a synthetic event cmd + * @cmd: A pointer to the dynevent_cmd struct representing the new event + * @type: The type of the new field to add + * @name: The name of the new field to add + * + * Add a new field to a synthetic event cmd object. Field ordering is in + * the same order the fields are added. + * + * See synth_field_size() for available types. If field_name contains + * [n] the field is considered to be an array. + * + * Return: 0 if successful, error otherwise. + */ +int synth_event_add_field(struct dynevent_cmd *cmd, const char *type, + const char *name) +{ + struct dynevent_arg_pair arg_pair; + int ret; + + if (cmd->type != DYNEVENT_TYPE_SYNTH) + return -EINVAL; + + if (!type || !name) + return -EINVAL; + + dynevent_arg_pair_init(&arg_pair, 0, ';'); + + arg_pair.lhs = type; + arg_pair.rhs = name; + + ret = dynevent_arg_pair_add(cmd, &arg_pair, synth_event_check_arg_fn); + if (ret) + return ret; + + if (++cmd->n_fields > SYNTH_FIELDS_MAX) + ret = -EINVAL; + + return ret; +} +EXPORT_SYMBOL_GPL(synth_event_add_field); + +/** + * synth_event_add_field_str - Add a new field to a synthetic event cmd + * @cmd: A pointer to the dynevent_cmd struct representing the new event + * @type_name: The type and name of the new field to add, as a single string + * + * Add a new field to a synthetic event cmd object, as a single + * string. The @type_name string is expected to be of the form 'type + * name', which will be appended by ';'. No sanity checking is done - + * what's passed in is assumed to already be well-formed. Field + * ordering is in the same order the fields are added. + * + * See synth_field_size() for available types. If field_name contains + * [n] the field is considered to be an array. + * + * Return: 0 if successful, error otherwise. + */ +int synth_event_add_field_str(struct dynevent_cmd *cmd, const char *type_name) +{ + struct dynevent_arg arg; + int ret; + + if (cmd->type != DYNEVENT_TYPE_SYNTH) + return -EINVAL; + + if (!type_name) + return -EINVAL; + + dynevent_arg_init(&arg, ';'); + + arg.str = type_name; + + ret = dynevent_arg_add(cmd, &arg, NULL); + if (ret) + return ret; + + if (++cmd->n_fields > SYNTH_FIELDS_MAX) + ret = -EINVAL; + + return ret; +} +EXPORT_SYMBOL_GPL(synth_event_add_field_str); + +/** + * synth_event_add_fields - Add multiple fields to a synthetic event cmd + * @cmd: A pointer to the dynevent_cmd struct representing the new event + * @fields: An array of type/name field descriptions + * @n_fields: The number of field descriptions contained in the fields array + * + * Add a new set of fields to a synthetic event cmd object. The event + * fields that will be defined for the event should be passed in as an + * array of struct synth_field_desc, and the number of elements in the + * array passed in as n_fields. Field ordering will retain the + * ordering given in the fields array. + * + * See synth_field_size() for available types. If field_name contains + * [n] the field is considered to be an array. + * + * Return: 0 if successful, error otherwise. + */ +int synth_event_add_fields(struct dynevent_cmd *cmd, + struct synth_field_desc *fields, + unsigned int n_fields) +{ + unsigned int i; + int ret = 0; + + for (i = 0; i < n_fields; i++) { + if (fields[i].type == NULL || fields[i].name == NULL) { + ret = -EINVAL; + break; + } + + ret = synth_event_add_field(cmd, fields[i].type, fields[i].name); + if (ret) + break; + } + + return ret; +} +EXPORT_SYMBOL_GPL(synth_event_add_fields); + +/** + * __synth_event_gen_cmd_start - Start a synthetic event command from arg list + * @cmd: A pointer to the dynevent_cmd struct representing the new event + * @name: The name of the synthetic event + * @mod: The module creating the event, NULL if not created from a module + * @args: Variable number of arg (pairs), one pair for each field + * + * NOTE: Users normally won't want to call this function directly, but + * rather use the synth_event_gen_cmd_start() wrapper, which + * automatically adds a NULL to the end of the arg list. If this + * function is used directly, make sure the last arg in the variable + * arg list is NULL. + * + * Generate a synthetic event command to be executed by + * synth_event_gen_cmd_end(). This function can be used to generate + * the complete command or only the first part of it; in the latter + * case, synth_event_add_field(), synth_event_add_field_str(), or + * synth_event_add_fields() can be used to add more fields following + * this. + * + * There should be an even number variable args, each pair consisting + * of a type followed by a field name. + * + * See synth_field_size() for available types. If field_name contains + * [n] the field is considered to be an array. + * + * Return: 0 if successful, error otherwise. + */ +int __synth_event_gen_cmd_start(struct dynevent_cmd *cmd, const char *name, + struct module *mod, ...) +{ + struct dynevent_arg arg; + va_list args; + int ret; + + cmd->event_name = name; + cmd->private_data = mod; + + if (cmd->type != DYNEVENT_TYPE_SYNTH) + return -EINVAL; + + dynevent_arg_init(&arg, 0); + arg.str = name; + ret = dynevent_arg_add(cmd, &arg, NULL); + if (ret) + return ret; + + va_start(args, mod); + for (;;) { + const char *type, *name; + + type = va_arg(args, const char *); + if (!type) + break; + name = va_arg(args, const char *); + if (!name) + break; + + if (++cmd->n_fields > SYNTH_FIELDS_MAX) { + ret = -EINVAL; + break; + } + + ret = synth_event_add_field(cmd, type, name); + if (ret) + break; + } + va_end(args); + + return ret; +} +EXPORT_SYMBOL_GPL(__synth_event_gen_cmd_start); + +/** + * synth_event_gen_cmd_array_start - Start synthetic event command from an array + * @cmd: A pointer to the dynevent_cmd struct representing the new event + * @name: The name of the synthetic event + * @fields: An array of type/name field descriptions + * @n_fields: The number of field descriptions contained in the fields array + * + * Generate a synthetic event command to be executed by + * synth_event_gen_cmd_end(). This function can be used to generate + * the complete command or only the first part of it; in the latter + * case, synth_event_add_field(), synth_event_add_field_str(), or + * synth_event_add_fields() can be used to add more fields following + * this. + * + * The event fields that will be defined for the event should be + * passed in as an array of struct synth_field_desc, and the number of + * elements in the array passed in as n_fields. Field ordering will + * retain the ordering given in the fields array. + * + * See synth_field_size() for available types. If field_name contains + * [n] the field is considered to be an array. + * + * Return: 0 if successful, error otherwise. + */ +int synth_event_gen_cmd_array_start(struct dynevent_cmd *cmd, const char *name, + struct module *mod, + struct synth_field_desc *fields, + unsigned int n_fields) +{ + struct dynevent_arg arg; + unsigned int i; + int ret = 0; + + cmd->event_name = name; + cmd->private_data = mod; + + if (cmd->type != DYNEVENT_TYPE_SYNTH) + return -EINVAL; + + if (n_fields > SYNTH_FIELDS_MAX) + return -EINVAL; + + dynevent_arg_init(&arg, 0); + arg.str = name; + ret = dynevent_arg_add(cmd, &arg, NULL); + if (ret) + return ret; + + for (i = 0; i < n_fields; i++) { + if (fields[i].type == NULL || fields[i].name == NULL) + return -EINVAL; + + ret = synth_event_add_field(cmd, fields[i].type, fields[i].name); + if (ret) + break; + } + + return ret; +} +EXPORT_SYMBOL_GPL(synth_event_gen_cmd_array_start); + static int __create_synth_event(int argc, const char *name, const char **argv) { struct synth_field *field, *fields[SYNTH_FIELDS_MAX]; @@ -1354,29 +1640,123 @@ static int __create_synth_event(int argc, const char *name, const char **argv) goto out; } +/** + * synth_event_create - Create a new synthetic event + * @name: The name of the new sythetic event + * @fields: An array of type/name field descriptions + * @n_fields: The number of field descriptions contained in the fields array + * @mod: The module creating the event, NULL if not created from a module + * + * Create a new synthetic event with the given name under the + * trace/events/synthetic/ directory. The event fields that will be + * defined for the event should be passed in as an array of struct + * synth_field_desc, and the number elements in the array passed in as + * n_fields. Field ordering will retain the ordering given in the + * fields array. + * + * If the new synthetic event is being created from a module, the mod + * param must be non-NULL. This will ensure that the trace buffer + * won't contain unreadable events. + * + * The new synth event should be deleted using synth_event_delete() + * function. The new synthetic event can be generated from modules or + * other kernel code using trace_synth_event() and related functions. + * + * Return: 0 if successful, error otherwise. + */ +int synth_event_create(const char *name, struct synth_field_desc *fields, + unsigned int n_fields, struct module *mod) +{ + struct dynevent_cmd cmd; + char *buf; + int ret; + + buf = kzalloc(MAX_DYNEVENT_CMD_LEN, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + synth_event_cmd_init(&cmd, buf, MAX_DYNEVENT_CMD_LEN); + + ret = synth_event_gen_cmd_array_start(&cmd, name, mod, + fields, n_fields); + if (ret) + goto out; + + ret = synth_event_gen_cmd_end(&cmd); + out: + kfree(buf); + + return ret; +} +EXPORT_SYMBOL_GPL(synth_event_create); + +static int destroy_synth_event(struct synth_event *se) +{ + int ret; + + if (se->ref) + ret = -EBUSY; + else { + ret = unregister_synth_event(se); + if (!ret) { + dyn_event_remove(&se->devent); + free_synth_event(se); + } + } + + return ret; +} + +/** + * synth_event_delete - Delete a synthetic event + * @event_name: The name of the new sythetic event + * + * Delete a synthetic event that was created with synth_event_create(). + * + * Return: 0 if successful, error otherwise. + */ +int synth_event_delete(const char *event_name) +{ + struct synth_event *se = NULL; + struct module *mod = NULL; + int ret = -ENOENT; + + mutex_lock(&event_mutex); + se = find_synth_event(event_name); + if (se) { + mod = se->mod; + ret = destroy_synth_event(se); + } + mutex_unlock(&event_mutex); + + if (mod) { + mutex_lock(&trace_types_lock); + /* + * It is safest to reset the ring buffer if the module + * being unloaded registered any events that were + * used. The only worry is if a new module gets + * loaded, and takes on the same id as the events of + * this module. When printing out the buffer, traced + * events left over from this module may be passed to + * the new module events and unexpected results may + * occur. + */ + tracing_reset_all_online_cpus(); + mutex_unlock(&trace_types_lock); + } + + return ret; +} +EXPORT_SYMBOL_GPL(synth_event_delete); + static int create_or_delete_synth_event(int argc, char **argv) { const char *name = argv[0]; - struct synth_event *event = NULL; int ret; /* trace_run_command() ensures argc != 0 */ if (name[0] == '!') { - mutex_lock(&event_mutex); - event = find_synth_event(name + 1); - if (event) { - if (event->ref) - ret = -EBUSY; - else { - ret = unregister_synth_event(event); - if (!ret) { - dyn_event_remove(&event->devent); - free_synth_event(event); - } - } - } else - ret = -ENOENT; - mutex_unlock(&event_mutex); + ret = synth_event_delete(name + 1); return ret; } @@ -1384,7 +1764,474 @@ static int create_or_delete_synth_event(int argc, char **argv) return ret == -ECANCELED ? -EINVAL : ret; } -static int synth_event_create(int argc, const char **argv) +static int synth_event_run_command(struct dynevent_cmd *cmd) +{ + struct synth_event *se; + int ret; + + ret = trace_run_command(cmd->seq.buffer, create_or_delete_synth_event); + if (ret) + return ret; + + se = find_synth_event(cmd->event_name); + if (WARN_ON(!se)) + return -ENOENT; + + se->mod = cmd->private_data; + + return ret; +} + +/** + * synth_event_cmd_init - Initialize a synthetic event command object + * @cmd: A pointer to the dynevent_cmd struct representing the new event + * @buf: A pointer to the buffer used to build the command + * @maxlen: The length of the buffer passed in @buf + * + * Initialize a synthetic event command object. Use this before + * calling any of the other dyenvent_cmd functions. + */ +void synth_event_cmd_init(struct dynevent_cmd *cmd, char *buf, int maxlen) +{ + dynevent_cmd_init(cmd, buf, maxlen, DYNEVENT_TYPE_SYNTH, + synth_event_run_command); +} +EXPORT_SYMBOL_GPL(synth_event_cmd_init); + +/** + * synth_event_trace - Trace a synthetic event + * @file: The trace_event_file representing the synthetic event + * @n_vals: The number of values in vals + * @args: Variable number of args containing the event values + * + * Trace a synthetic event using the values passed in the variable + * argument list. + * + * The argument list should be a list 'n_vals' u64 values. The number + * of vals must match the number of field in the synthetic event, and + * must be in the same order as the synthetic event fields. + * + * All vals should be cast to u64, and string vals are just pointers + * to strings, cast to u64. Strings will be copied into space + * reserved in the event for the string, using these pointers. + * + * Return: 0 on success, err otherwise. + */ +int synth_event_trace(struct trace_event_file *file, unsigned int n_vals, ...) +{ + struct trace_event_buffer fbuffer; + struct synth_trace_event *entry; + struct trace_buffer *buffer; + struct synth_event *event; + unsigned int i, n_u64; + int fields_size = 0; + va_list args; + int ret = 0; + + /* + * Normal event generation doesn't get called at all unless + * the ENABLED bit is set (which attaches the probe thus + * allowing this code to be called, etc). Because this is + * called directly by the user, we don't have that but we + * still need to honor not logging when disabled. + */ + if (!(file->flags & EVENT_FILE_FL_ENABLED)) + return 0; + + event = file->event_call->data; + + if (n_vals != event->n_fields) + return -EINVAL; + + if (trace_trigger_soft_disabled(file)) + return -EINVAL; + + fields_size = event->n_u64 * sizeof(u64); + + /* + * Avoid ring buffer recursion detection, as this event + * is being performed within another event. + */ + buffer = file->tr->array_buffer.buffer; + ring_buffer_nest_start(buffer); + + entry = trace_event_buffer_reserve(&fbuffer, file, + sizeof(*entry) + fields_size); + if (!entry) { + ret = -EINVAL; + goto out; + } + + va_start(args, n_vals); + for (i = 0, n_u64 = 0; i < event->n_fields; i++) { + u64 val; + + val = va_arg(args, u64); + + if (event->fields[i]->is_string) { + char *str_val = (char *)(long)val; + char *str_field = (char *)&entry->fields[n_u64]; + + strscpy(str_field, str_val, STR_VAR_LEN_MAX); + n_u64 += STR_VAR_LEN_MAX / sizeof(u64); + } else { + entry->fields[n_u64] = val; + n_u64++; + } + } + va_end(args); + + trace_event_buffer_commit(&fbuffer); +out: + ring_buffer_nest_end(buffer); + + return ret; +} +EXPORT_SYMBOL_GPL(synth_event_trace); + +/** + * synth_event_trace_array - Trace a synthetic event from an array + * @file: The trace_event_file representing the synthetic event + * @vals: Array of values + * @n_vals: The number of values in vals + * + * Trace a synthetic event using the values passed in as 'vals'. + * + * The 'vals' array is just an array of 'n_vals' u64. The number of + * vals must match the number of field in the synthetic event, and + * must be in the same order as the synthetic event fields. + * + * All vals should be cast to u64, and string vals are just pointers + * to strings, cast to u64. Strings will be copied into space + * reserved in the event for the string, using these pointers. + * + * Return: 0 on success, err otherwise. + */ +int synth_event_trace_array(struct trace_event_file *file, u64 *vals, + unsigned int n_vals) +{ + struct trace_event_buffer fbuffer; + struct synth_trace_event *entry; + struct trace_buffer *buffer; + struct synth_event *event; + unsigned int i, n_u64; + int fields_size = 0; + int ret = 0; + + /* + * Normal event generation doesn't get called at all unless + * the ENABLED bit is set (which attaches the probe thus + * allowing this code to be called, etc). Because this is + * called directly by the user, we don't have that but we + * still need to honor not logging when disabled. + */ + if (!(file->flags & EVENT_FILE_FL_ENABLED)) + return 0; + + event = file->event_call->data; + + if (n_vals != event->n_fields) + return -EINVAL; + + if (trace_trigger_soft_disabled(file)) + return -EINVAL; + + fields_size = event->n_u64 * sizeof(u64); + + /* + * Avoid ring buffer recursion detection, as this event + * is being performed within another event. + */ + buffer = file->tr->array_buffer.buffer; + ring_buffer_nest_start(buffer); + + entry = trace_event_buffer_reserve(&fbuffer, file, + sizeof(*entry) + fields_size); + if (!entry) { + ret = -EINVAL; + goto out; + } + + for (i = 0, n_u64 = 0; i < event->n_fields; i++) { + if (event->fields[i]->is_string) { + char *str_val = (char *)(long)vals[i]; + char *str_field = (char *)&entry->fields[n_u64]; + + strscpy(str_field, str_val, STR_VAR_LEN_MAX); + n_u64 += STR_VAR_LEN_MAX / sizeof(u64); + } else { + entry->fields[n_u64] = vals[i]; + n_u64++; + } + } + + trace_event_buffer_commit(&fbuffer); +out: + ring_buffer_nest_end(buffer); + + return ret; +} +EXPORT_SYMBOL_GPL(synth_event_trace_array); + +/** + * synth_event_trace_start - Start piecewise synthetic event trace + * @file: The trace_event_file representing the synthetic event + * @trace_state: A pointer to object tracking the piecewise trace state + * + * Start the trace of a synthetic event field-by-field rather than all + * at once. + * + * This function 'opens' an event trace, which means space is reserved + * for the event in the trace buffer, after which the event's + * individual field values can be set through either + * synth_event_add_next_val() or synth_event_add_val(). + * + * A pointer to a trace_state object is passed in, which will keep + * track of the current event trace state until the event trace is + * closed (and the event finally traced) using + * synth_event_trace_end(). + * + * Note that synth_event_trace_end() must be called after all values + * have been added for each event trace, regardless of whether adding + * all field values succeeded or not. + * + * Note also that for a given event trace, all fields must be added + * using either synth_event_add_next_val() or synth_event_add_val() + * but not both together or interleaved. + * + * Return: 0 on success, err otherwise. + */ +int synth_event_trace_start(struct trace_event_file *file, + struct synth_event_trace_state *trace_state) +{ + struct synth_trace_event *entry; + int fields_size = 0; + int ret = 0; + + if (!trace_state) { + ret = -EINVAL; + goto out; + } + + memset(trace_state, '\0', sizeof(*trace_state)); + + /* + * Normal event tracing doesn't get called at all unless the + * ENABLED bit is set (which attaches the probe thus allowing + * this code to be called, etc). Because this is called + * directly by the user, we don't have that but we still need + * to honor not logging when disabled. For the the iterated + * trace case, we save the enabed state upon start and just + * ignore the following data calls. + */ + if (!(file->flags & EVENT_FILE_FL_ENABLED)) { + trace_state->enabled = false; + goto out; + } + + trace_state->enabled = true; + + trace_state->event = file->event_call->data; + + if (trace_trigger_soft_disabled(file)) { + ret = -EINVAL; + goto out; + } + + fields_size = trace_state->event->n_u64 * sizeof(u64); + + /* + * Avoid ring buffer recursion detection, as this event + * is being performed within another event. + */ + trace_state->buffer = file->tr->array_buffer.buffer; + ring_buffer_nest_start(trace_state->buffer); + + entry = trace_event_buffer_reserve(&trace_state->fbuffer, file, + sizeof(*entry) + fields_size); + if (!entry) { + ret = -EINVAL; + goto out; + } + + trace_state->entry = entry; +out: + return ret; +} +EXPORT_SYMBOL_GPL(synth_event_trace_start); + +static int __synth_event_add_val(const char *field_name, u64 val, + struct synth_event_trace_state *trace_state) +{ + struct synth_field *field = NULL; + struct synth_trace_event *entry; + struct synth_event *event; + int i, ret = 0; + + if (!trace_state) { + ret = -EINVAL; + goto out; + } + + /* can't mix add_next_synth_val() with add_synth_val() */ + if (field_name) { + if (trace_state->add_next) { + ret = -EINVAL; + goto out; + } + trace_state->add_name = true; + } else { + if (trace_state->add_name) { + ret = -EINVAL; + goto out; + } + trace_state->add_next = true; + } + + if (!trace_state->enabled) + goto out; + + event = trace_state->event; + if (trace_state->add_name) { + for (i = 0; i < event->n_fields; i++) { + field = event->fields[i]; + if (strcmp(field->name, field_name) == 0) + break; + } + if (!field) { + ret = -EINVAL; + goto out; + } + } else { + if (trace_state->cur_field >= event->n_fields) { + ret = -EINVAL; + goto out; + } + field = event->fields[trace_state->cur_field++]; + } + + entry = trace_state->entry; + if (field->is_string) { + char *str_val = (char *)(long)val; + char *str_field; + + if (!str_val) { + ret = -EINVAL; + goto out; + } + + str_field = (char *)&entry->fields[field->offset]; + strscpy(str_field, str_val, STR_VAR_LEN_MAX); + } else + entry->fields[field->offset] = val; + out: + return ret; +} + +/** + * synth_event_add_next_val - Add the next field's value to an open synth trace + * @val: The value to set the next field to + * @trace_state: A pointer to object tracking the piecewise trace state + * + * Set the value of the next field in an event that's been opened by + * synth_event_trace_start(). + * + * The val param should be the value cast to u64. If the value points + * to a string, the val param should be a char * cast to u64. + * + * This function assumes all the fields in an event are to be set one + * after another - successive calls to this function are made, one for + * each field, in the order of the fields in the event, until all + * fields have been set. If you'd rather set each field individually + * without regard to ordering, synth_event_add_val() can be used + * instead. + * + * Note however that synth_event_add_next_val() and + * synth_event_add_val() can't be intermixed for a given event trace - + * one or the other but not both can be used at the same time. + * + * Note also that synth_event_trace_end() must be called after all + * values have been added for each event trace, regardless of whether + * adding all field values succeeded or not. + * + * Return: 0 on success, err otherwise. + */ +int synth_event_add_next_val(u64 val, + struct synth_event_trace_state *trace_state) +{ + return __synth_event_add_val(NULL, val, trace_state); +} +EXPORT_SYMBOL_GPL(synth_event_add_next_val); + +/** + * synth_event_add_val - Add a named field's value to an open synth trace + * @field_name: The name of the synthetic event field value to set + * @val: The value to set the next field to + * @trace_state: A pointer to object tracking the piecewise trace state + * + * Set the value of the named field in an event that's been opened by + * synth_event_trace_start(). + * + * The val param should be the value cast to u64. If the value points + * to a string, the val param should be a char * cast to u64. + * + * This function looks up the field name, and if found, sets the field + * to the specified value. This lookup makes this function more + * expensive than synth_event_add_next_val(), so use that or the + * none-piecewise synth_event_trace() instead if efficiency is more + * important. + * + * Note however that synth_event_add_next_val() and + * synth_event_add_val() can't be intermixed for a given event trace - + * one or the other but not both can be used at the same time. + * + * Note also that synth_event_trace_end() must be called after all + * values have been added for each event trace, regardless of whether + * adding all field values succeeded or not. + * + * Return: 0 on success, err otherwise. + */ +int synth_event_add_val(const char *field_name, u64 val, + struct synth_event_trace_state *trace_state) +{ + return __synth_event_add_val(field_name, val, trace_state); +} +EXPORT_SYMBOL_GPL(synth_event_add_val); + +/** + * synth_event_trace_end - End piecewise synthetic event trace + * @trace_state: A pointer to object tracking the piecewise trace state + * + * End the trace of a synthetic event opened by + * synth_event_trace__start(). + * + * This function 'closes' an event trace, which basically means that + * it commits the reserved event and cleans up other loose ends. + * + * A pointer to a trace_state object is passed in, which will keep + * track of the current event trace state opened with + * synth_event_trace_start(). + * + * Note that this function must be called after all values have been + * added for each event trace, regardless of whether adding all field + * values succeeded or not. + * + * Return: 0 on success, err otherwise. + */ +int synth_event_trace_end(struct synth_event_trace_state *trace_state) +{ + if (!trace_state) + return -EINVAL; + + trace_event_buffer_commit(&trace_state->fbuffer); + + ring_buffer_nest_end(trace_state->buffer); + + return 0; +} +EXPORT_SYMBOL_GPL(synth_event_trace_end); + +static int create_synth_event(int argc, const char **argv) { const char *name = argv[0]; int len; @@ -1766,11 +2613,13 @@ static struct hist_field *find_var(struct hist_trigger_data *hist_data, struct event_trigger_data *test; struct hist_field *hist_field; + lockdep_assert_held(&event_mutex); + hist_field = find_var_field(hist_data, var_name); if (hist_field) return hist_field; - list_for_each_entry_rcu(test, &file->triggers, list) { + list_for_each_entry(test, &file->triggers, list) { if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) { test_data = test->private_data; hist_field = find_var_field(test_data, var_name); @@ -1820,7 +2669,9 @@ static struct hist_field *find_file_var(struct trace_event_file *file, struct event_trigger_data *test; struct hist_field *hist_field; - list_for_each_entry_rcu(test, &file->triggers, list) { + lockdep_assert_held(&event_mutex); + + list_for_each_entry(test, &file->triggers, list) { if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) { test_data = test->private_data; hist_field = find_var_field(test_data, var_name); @@ -2030,12 +2881,6 @@ static int parse_map_size(char *str) unsigned long size, map_bits; int ret; - strsep(&str, "="); - if (!str) { - ret = -EINVAL; - goto out; - } - ret = kstrtoul(str, 0, &size); if (ret) goto out; @@ -2095,25 +2940,25 @@ static int parse_action(char *str, struct hist_trigger_attrs *attrs) static int parse_assignment(struct trace_array *tr, char *str, struct hist_trigger_attrs *attrs) { - int ret = 0; + int len, ret = 0; - if ((str_has_prefix(str, "key=")) || - (str_has_prefix(str, "keys="))) { - attrs->keys_str = kstrdup(str, GFP_KERNEL); + if ((len = str_has_prefix(str, "key=")) || + (len = str_has_prefix(str, "keys="))) { + attrs->keys_str = kstrdup(str + len, GFP_KERNEL); if (!attrs->keys_str) { ret = -ENOMEM; goto out; } - } else if ((str_has_prefix(str, "val=")) || - (str_has_prefix(str, "vals=")) || - (str_has_prefix(str, "values="))) { - attrs->vals_str = kstrdup(str, GFP_KERNEL); + } else if ((len = str_has_prefix(str, "val=")) || + (len = str_has_prefix(str, "vals=")) || + (len = str_has_prefix(str, "values="))) { + attrs->vals_str = kstrdup(str + len, GFP_KERNEL); if (!attrs->vals_str) { ret = -ENOMEM; goto out; } - } else if (str_has_prefix(str, "sort=")) { - attrs->sort_key_str = kstrdup(str, GFP_KERNEL); + } else if ((len = str_has_prefix(str, "sort="))) { + attrs->sort_key_str = kstrdup(str + len, GFP_KERNEL); if (!attrs->sort_key_str) { ret = -ENOMEM; goto out; @@ -2124,12 +2969,8 @@ static int parse_assignment(struct trace_array *tr, ret = -ENOMEM; goto out; } - } else if (str_has_prefix(str, "clock=")) { - strsep(&str, "="); - if (!str) { - ret = -EINVAL; - goto out; - } + } else if ((len = str_has_prefix(str, "clock="))) { + str += len; str = strstrip(str); attrs->clock = kstrdup(str, GFP_KERNEL); @@ -2137,8 +2978,8 @@ static int parse_assignment(struct trace_array *tr, ret = -ENOMEM; goto out; } - } else if (str_has_prefix(str, "size=")) { - int map_bits = parse_map_size(str); + } else if ((len = str_has_prefix(str, "size="))) { + int map_bits = parse_map_size(str + len); if (map_bits < 0) { ret = map_bits; @@ -2178,8 +3019,15 @@ parse_hist_trigger_attrs(struct trace_array *tr, char *trigger_str) while (trigger_str) { char *str = strsep(&trigger_str, ":"); + char *rhs; - if (strchr(str, '=')) { + rhs = strchr(str, '='); + if (rhs) { + if (!strlen(++rhs)) { + ret = -EINVAL; + hist_err(tr, HIST_ERR_EMPTY_ASSIGNMENT, errpos(str)); + goto free; + } ret = parse_assignment(tr, str, attrs); if (ret) goto free; @@ -2423,8 +3271,16 @@ static int contains_operator(char *str) return field_op; } +static void get_hist_field(struct hist_field *hist_field) +{ + hist_field->ref++; +} + static void __destroy_hist_field(struct hist_field *hist_field) { + if (--hist_field->ref > 1) + return; + kfree(hist_field->var.name); kfree(hist_field->name); kfree(hist_field->type); @@ -2466,6 +3322,8 @@ static struct hist_field *create_hist_field(struct hist_trigger_data *hist_data, if (!hist_field) return NULL; + hist_field->ref = 1; + hist_field->hist_data = hist_data; if (flags & HIST_FIELD_FL_EXPR || flags & HIST_FIELD_FL_ALIAS) @@ -2640,6 +3498,22 @@ static int init_var_ref(struct hist_field *ref_field, goto out; } +static int find_var_ref_idx(struct hist_trigger_data *hist_data, + struct hist_field *var_field) +{ + struct hist_field *ref_field; + int i; + + for (i = 0; i < hist_data->n_var_refs; i++) { + ref_field = hist_data->var_refs[i]; + if (ref_field->var.idx == var_field->var.idx && + ref_field->var.hist_data == var_field->hist_data) + return i; + } + + return -ENOENT; +} + /** * create_var_ref - Create a variable reference and attach it to trigger * @hist_data: The trigger that will be referencing the variable @@ -2661,6 +3535,17 @@ static struct hist_field *create_var_ref(struct hist_trigger_data *hist_data, { unsigned long flags = HIST_FIELD_FL_VAR_REF; struct hist_field *ref_field; + int i; + + /* Check if the variable already exists */ + for (i = 0; i < hist_data->n_var_refs; i++) { + ref_field = hist_data->var_refs[i]; + if (ref_field->var.idx == var_field->var.idx && + ref_field->var.hist_data == var_field->hist_data) { + get_hist_field(ref_field); + return ref_field; + } + } ref_field = create_hist_field(var_field->hist_data, NULL, flags, NULL); if (ref_field) { @@ -3115,7 +4000,9 @@ static char *find_trigger_filter(struct hist_trigger_data *hist_data, { struct event_trigger_data *test; - list_for_each_entry_rcu(test, &file->triggers, list) { + lockdep_assert_held(&event_mutex); + + list_for_each_entry(test, &file->triggers, list) { if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) { if (test->private_data == hist_data) return test->filter_str; @@ -3166,9 +4053,11 @@ find_compatible_hist(struct hist_trigger_data *target_hist_data, struct event_trigger_data *test; unsigned int n_keys; + lockdep_assert_held(&event_mutex); + n_keys = target_hist_data->n_fields - target_hist_data->n_vals; - list_for_each_entry_rcu(test, &file->triggers, list) { + list_for_each_entry(test, &file->triggers, list) { if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) { hist_data = test->private_data; @@ -4110,8 +4999,11 @@ static int check_synth_field(struct synth_event *event, field = event->fields[field_pos]; - if (strcmp(field->type, hist_field->type) != 0) - return -EINVAL; + if (strcmp(field->type, hist_field->type) != 0) { + if (field->size != hist_field->size || + field->is_signed != hist_field->is_signed) + return -EINVAL; + } return 0; } @@ -4198,11 +5090,11 @@ static int trace_action_create(struct hist_trigger_data *hist_data, struct trace_array *tr = hist_data->event_file->tr; char *event_name, *param, *system = NULL; struct hist_field *hist_field, *var_ref; - unsigned int i, var_ref_idx; + unsigned int i; unsigned int field_pos = 0; struct synth_event *event; char *synth_event_name; - int ret = 0; + int var_ref_idx, ret = 0; lockdep_assert_held(&event_mutex); @@ -4219,8 +5111,6 @@ static int trace_action_create(struct hist_trigger_data *hist_data, event->ref++; - var_ref_idx = hist_data->n_var_refs; - for (i = 0; i < data->n_params; i++) { char *p; @@ -4269,6 +5159,14 @@ static int trace_action_create(struct hist_trigger_data *hist_data, goto err; } + var_ref_idx = find_var_ref_idx(hist_data, var_ref); + if (WARN_ON(var_ref_idx < 0)) { + ret = var_ref_idx; + goto err; + } + + data->var_ref_idx[i] = var_ref_idx; + field_pos++; kfree(p); continue; @@ -4287,7 +5185,6 @@ static int trace_action_create(struct hist_trigger_data *hist_data, } data->synth_event = event; - data->var_ref_idx = var_ref_idx; out: return ret; err: @@ -4506,10 +5403,6 @@ static int create_val_fields(struct hist_trigger_data *hist_data, if (!fields_str) goto out; - strsep(&fields_str, "="); - if (!fields_str) - goto out; - for (i = 0, j = 1; i < TRACING_MAP_VALS_MAX && j < TRACING_MAP_VALS_MAX; i++) { field_str = strsep(&fields_str, ","); @@ -4604,10 +5497,6 @@ static int create_key_fields(struct hist_trigger_data *hist_data, if (!fields_str) goto out; - strsep(&fields_str, "="); - if (!fields_str) - goto out; - for (i = n_vals; i < n_vals + TRACING_MAP_KEYS_MAX; i++) { field_str = strsep(&fields_str, ","); if (!field_str) @@ -4739,7 +5628,7 @@ static int create_hist_fields(struct hist_trigger_data *hist_data, return ret; } -static int is_descending(const char *str) +static int is_descending(struct trace_array *tr, const char *str) { if (!str) return 0; @@ -4750,11 +5639,14 @@ static int is_descending(const char *str) if (strcmp(str, "ascending") == 0) return 0; + hist_err(tr, HIST_ERR_INVALID_SORT_MODIFIER, errpos((char *)str)); + return -EINVAL; } static int create_sort_keys(struct hist_trigger_data *hist_data) { + struct trace_array *tr = hist_data->event_file->tr; char *fields_str = hist_data->attrs->sort_key_str; struct tracing_map_sort_key *sort_key; int descending, ret = 0; @@ -4765,12 +5657,6 @@ static int create_sort_keys(struct hist_trigger_data *hist_data) if (!fields_str) goto out; - strsep(&fields_str, "="); - if (!fields_str) { - ret = -EINVAL; - goto out; - } - for (i = 0; i < TRACING_MAP_SORT_KEYS_MAX; i++) { struct hist_field *hist_field; char *field_str, *field_name; @@ -4779,25 +5665,30 @@ static int create_sort_keys(struct hist_trigger_data *hist_data) sort_key = &hist_data->sort_keys[i]; field_str = strsep(&fields_str, ","); - if (!field_str) { - if (i == 0) - ret = -EINVAL; + if (!field_str) + break; + + if (!*field_str) { + ret = -EINVAL; + hist_err(tr, HIST_ERR_EMPTY_SORT_FIELD, errpos("sort=")); break; } if ((i == TRACING_MAP_SORT_KEYS_MAX - 1) && fields_str) { + hist_err(tr, HIST_ERR_TOO_MANY_SORT_FIELDS, errpos("sort=")); ret = -EINVAL; break; } field_name = strsep(&field_str, "."); - if (!field_name) { + if (!field_name || !*field_name) { ret = -EINVAL; + hist_err(tr, HIST_ERR_EMPTY_SORT_FIELD, errpos("sort=")); break; } if (strcmp(field_name, "hitcount") == 0) { - descending = is_descending(field_str); + descending = is_descending(tr, field_str); if (descending < 0) { ret = descending; break; @@ -4819,7 +5710,7 @@ static int create_sort_keys(struct hist_trigger_data *hist_data) if (strcmp(field_name, test_name) == 0) { sort_key->field_idx = idx; - descending = is_descending(field_str); + descending = is_descending(tr, field_str); if (descending < 0) { ret = descending; goto out; @@ -4830,6 +5721,7 @@ static int create_sort_keys(struct hist_trigger_data *hist_data) } if (j == hist_data->n_fields) { ret = -EINVAL; + hist_err(tr, HIST_ERR_INVALID_SORT_FIELD, errpos(field_name)); break; } } @@ -5528,7 +6420,7 @@ static int hist_show(struct seq_file *m, void *v) goto out_unlock; } - list_for_each_entry_rcu(data, &event_file->triggers, list) { + list_for_each_entry(data, &event_file->triggers, list) { if (data->cmd_ops->trigger_type == ETT_EVENT_HIST) hist_trigger_show(m, data, n++); } @@ -5921,7 +6813,9 @@ static int hist_register_trigger(char *glob, struct event_trigger_ops *ops, if (hist_data->attrs->name && !named_data) goto new; - list_for_each_entry_rcu(test, &file->triggers, list) { + lockdep_assert_held(&event_mutex); + + list_for_each_entry(test, &file->triggers, list) { if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) { if (!hist_trigger_match(data, test, named_data, false)) continue; @@ -6005,10 +6899,12 @@ static bool have_hist_trigger_match(struct event_trigger_data *data, struct event_trigger_data *test, *named_data = NULL; bool match = false; + lockdep_assert_held(&event_mutex); + if (hist_data->attrs->name) named_data = find_named_trigger(hist_data->attrs->name); - list_for_each_entry_rcu(test, &file->triggers, list) { + list_for_each_entry(test, &file->triggers, list) { if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) { if (hist_trigger_match(data, test, named_data, false)) { match = true; @@ -6026,10 +6922,12 @@ static bool hist_trigger_check_refs(struct event_trigger_data *data, struct hist_trigger_data *hist_data = data->private_data; struct event_trigger_data *test, *named_data = NULL; + lockdep_assert_held(&event_mutex); + if (hist_data->attrs->name) named_data = find_named_trigger(hist_data->attrs->name); - list_for_each_entry_rcu(test, &file->triggers, list) { + list_for_each_entry(test, &file->triggers, list) { if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) { if (!hist_trigger_match(data, test, named_data, false)) continue; @@ -6051,10 +6949,12 @@ static void hist_unregister_trigger(char *glob, struct event_trigger_ops *ops, struct event_trigger_data *test, *named_data = NULL; bool unregistered = false; + lockdep_assert_held(&event_mutex); + if (hist_data->attrs->name) named_data = find_named_trigger(hist_data->attrs->name); - list_for_each_entry_rcu(test, &file->triggers, list) { + list_for_each_entry(test, &file->triggers, list) { if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) { if (!hist_trigger_match(data, test, named_data, false)) continue; @@ -6080,7 +6980,9 @@ static bool hist_file_check_refs(struct trace_event_file *file) struct hist_trigger_data *hist_data; struct event_trigger_data *test; - list_for_each_entry_rcu(test, &file->triggers, list) { + lockdep_assert_held(&event_mutex); + + list_for_each_entry(test, &file->triggers, list) { if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) { hist_data = test->private_data; if (check_var_refs(hist_data)) @@ -6323,7 +7225,8 @@ hist_enable_trigger(struct event_trigger_data *data, void *rec, struct enable_trigger_data *enable_data = data->private_data; struct event_trigger_data *test; - list_for_each_entry_rcu(test, &enable_data->file->triggers, list) { + list_for_each_entry_rcu(test, &enable_data->file->triggers, list, + lockdep_is_held(&event_mutex)) { if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) { if (enable_data->enable) test->paused = false; diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index 2cd53ca21b51..dd34a1b46a86 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -116,9 +116,10 @@ static void *trigger_next(struct seq_file *m, void *t, loff_t *pos) { struct trace_event_file *event_file = event_file_data(m->private); - if (t == SHOW_AVAILABLE_TRIGGERS) + if (t == SHOW_AVAILABLE_TRIGGERS) { + (*pos)++; return NULL; - + } return seq_list_next(t, &event_file->triggers, pos); } @@ -213,7 +214,7 @@ static int event_trigger_regex_open(struct inode *inode, struct file *file) return ret; } -static int trigger_process_regex(struct trace_event_file *file, char *buff) +int trigger_process_regex(struct trace_event_file *file, char *buff) { char *command, *next = buff; struct event_command *p; @@ -501,7 +502,9 @@ void update_cond_flag(struct trace_event_file *file) struct event_trigger_data *data; bool set_cond = false; - list_for_each_entry_rcu(data, &file->triggers, list) { + lockdep_assert_held(&event_mutex); + + list_for_each_entry(data, &file->triggers, list) { if (data->filter || event_command_post_trigger(data->cmd_ops) || event_command_needs_rec(data->cmd_ops)) { set_cond = true; @@ -536,7 +539,9 @@ static int register_trigger(char *glob, struct event_trigger_ops *ops, struct event_trigger_data *test; int ret = 0; - list_for_each_entry_rcu(test, &file->triggers, list) { + lockdep_assert_held(&event_mutex); + + list_for_each_entry(test, &file->triggers, list) { if (test->cmd_ops->trigger_type == data->cmd_ops->trigger_type) { ret = -EEXIST; goto out; @@ -581,7 +586,9 @@ static void unregister_trigger(char *glob, struct event_trigger_ops *ops, struct event_trigger_data *data; bool unregistered = false; - list_for_each_entry_rcu(data, &file->triggers, list) { + lockdep_assert_held(&event_mutex); + + list_for_each_entry(data, &file->triggers, list) { if (data->cmd_ops->trigger_type == test->cmd_ops->trigger_type) { unregistered = true; list_del_rcu(&data->list); @@ -1497,7 +1504,9 @@ int event_enable_register_trigger(char *glob, struct event_trigger_data *test; int ret = 0; - list_for_each_entry_rcu(test, &file->triggers, list) { + lockdep_assert_held(&event_mutex); + + list_for_each_entry(test, &file->triggers, list) { test_enable_data = test->private_data; if (test_enable_data && (test->cmd_ops->trigger_type == @@ -1537,7 +1546,9 @@ void event_enable_unregister_trigger(char *glob, struct event_trigger_data *data; bool unregistered = false; - list_for_each_entry_rcu(data, &file->triggers, list) { + lockdep_assert_held(&event_mutex); + + list_for_each_entry(data, &file->triggers, list) { enable_data = data->private_data; if (enable_data && (data->cmd_ops->trigger_type == diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index 2e6d2e9741cc..77ce5a3b6773 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -29,10 +29,8 @@ static int ftrace_event_register(struct trace_event_call *call, * function and thus become accesible via perf. */ #undef FTRACE_ENTRY_REG -#define FTRACE_ENTRY_REG(name, struct_name, id, tstruct, print, \ - filter, regfn) \ - FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print), \ - filter) +#define FTRACE_ENTRY_REG(name, struct_name, id, tstruct, print, regfn) \ + FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print)) /* not needed for this file */ #undef __field_struct @@ -41,6 +39,9 @@ static int ftrace_event_register(struct trace_event_call *call, #undef __field #define __field(type, item) type item; +#undef __field_fn +#define __field_fn(type, item) type item; + #undef __field_desc #define __field_desc(type, container, item) type item; @@ -60,7 +61,7 @@ static int ftrace_event_register(struct trace_event_call *call, #define F_printk(fmt, args...) fmt, args #undef FTRACE_ENTRY -#define FTRACE_ENTRY(name, struct_name, id, tstruct, print, filter) \ +#define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \ struct ____ftrace_##name { \ tstruct \ }; \ @@ -73,76 +74,46 @@ static void __always_unused ____ftrace_check_##name(void) \ } #undef FTRACE_ENTRY_DUP -#define FTRACE_ENTRY_DUP(name, struct_name, id, tstruct, print, filter) \ - FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print), \ - filter) +#define FTRACE_ENTRY_DUP(name, struct_name, id, tstruct, print) \ + FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print)) #include "trace_entries.h" +#undef __field_ext +#define __field_ext(_type, _item, _filter_type) { \ + .type = #_type, .name = #_item, \ + .size = sizeof(_type), .align = __alignof__(_type), \ + is_signed_type(_type), .filter_type = _filter_type }, + #undef __field -#define __field(type, item) \ - ret = trace_define_field(event_call, #type, #item, \ - offsetof(typeof(field), item), \ - sizeof(field.item), \ - is_signed_type(type), filter_type); \ - if (ret) \ - return ret; +#define __field(_type, _item) __field_ext(_type, _item, FILTER_OTHER) + +#undef __field_fn +#define __field_fn(_type, _item) __field_ext(_type, _item, FILTER_TRACE_FN) #undef __field_desc -#define __field_desc(type, container, item) \ - ret = trace_define_field(event_call, #type, #item, \ - offsetof(typeof(field), \ - container.item), \ - sizeof(field.container.item), \ - is_signed_type(type), filter_type); \ - if (ret) \ - return ret; +#define __field_desc(_type, _container, _item) __field_ext(_type, _item, FILTER_OTHER) #undef __array -#define __array(type, item, len) \ - do { \ - char *type_str = #type"["__stringify(len)"]"; \ - BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \ - ret = trace_define_field(event_call, type_str, #item, \ - offsetof(typeof(field), item), \ - sizeof(field.item), \ - is_signed_type(type), filter_type); \ - if (ret) \ - return ret; \ - } while (0); +#define __array(_type, _item, _len) { \ + .type = #_type"["__stringify(_len)"]", .name = #_item, \ + .size = sizeof(_type[_len]), .align = __alignof__(_type), \ + is_signed_type(_type), .filter_type = FILTER_OTHER }, #undef __array_desc -#define __array_desc(type, container, item, len) \ - BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \ - ret = trace_define_field(event_call, #type "[" #len "]", #item, \ - offsetof(typeof(field), \ - container.item), \ - sizeof(field.container.item), \ - is_signed_type(type), filter_type); \ - if (ret) \ - return ret; +#define __array_desc(_type, _container, _item, _len) __array(_type, _item, _len) #undef __dynamic_array -#define __dynamic_array(type, item) \ - ret = trace_define_field(event_call, #type "[]", #item, \ - offsetof(typeof(field), item), \ - 0, is_signed_type(type), filter_type);\ - if (ret) \ - return ret; +#define __dynamic_array(_type, _item) { \ + .type = #_type "[]", .name = #_item, \ + .size = 0, .align = __alignof__(_type), \ + is_signed_type(_type), .filter_type = FILTER_OTHER }, #undef FTRACE_ENTRY -#define FTRACE_ENTRY(name, struct_name, id, tstruct, print, filter) \ -static int __init \ -ftrace_define_fields_##name(struct trace_event_call *event_call) \ -{ \ - struct struct_name field; \ - int ret; \ - int filter_type = filter; \ - \ - tstruct; \ - \ - return ret; \ -} +#define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \ +static struct trace_event_fields ftrace_event_fields_##name[] = { \ + tstruct \ + {} }; #include "trace_entries.h" @@ -152,6 +123,9 @@ ftrace_define_fields_##name(struct trace_event_call *event_call) \ #undef __field #define __field(type, item) +#undef __field_fn +#define __field_fn(type, item) + #undef __field_desc #define __field_desc(type, container, item) @@ -168,12 +142,10 @@ ftrace_define_fields_##name(struct trace_event_call *event_call) \ #define F_printk(fmt, args...) __stringify(fmt) ", " __stringify(args) #undef FTRACE_ENTRY_REG -#define FTRACE_ENTRY_REG(call, struct_name, etype, tstruct, print, filter,\ - regfn) \ - \ +#define FTRACE_ENTRY_REG(call, struct_name, etype, tstruct, print, regfn) \ static struct trace_event_class __refdata event_class_ftrace_##call = { \ .system = __stringify(TRACE_SYSTEM), \ - .define_fields = ftrace_define_fields_##call, \ + .fields_array = ftrace_event_fields_##call, \ .fields = LIST_HEAD_INIT(event_class_ftrace_##call.fields),\ .reg = regfn, \ }; \ @@ -191,9 +163,9 @@ static struct trace_event_call __used \ __attribute__((section("_ftrace_events"))) *__event_##call = &event_##call; #undef FTRACE_ENTRY -#define FTRACE_ENTRY(call, struct_name, etype, tstruct, print, filter) \ +#define FTRACE_ENTRY(call, struct_name, etype, tstruct, print) \ FTRACE_ENTRY_REG(call, struct_name, etype, \ - PARAMS(tstruct), PARAMS(print), filter, NULL) + PARAMS(tstruct), PARAMS(print), NULL) bool ftrace_event_is_function(struct trace_event_call *call) { diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index b611cd36e22d..8a4c8d5c2c98 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -101,7 +101,7 @@ static int function_trace_init(struct trace_array *tr) ftrace_init_array_ops(tr, func); - tr->trace_buffer.cpu = get_cpu(); + tr->array_buffer.cpu = get_cpu(); put_cpu(); tracing_start_cmdline_record(); @@ -118,7 +118,7 @@ static void function_trace_reset(struct trace_array *tr) static void function_trace_start(struct trace_array *tr) { - tracing_reset_online_cpus(&tr->trace_buffer); + tracing_reset_online_cpus(&tr->array_buffer); } static void @@ -143,7 +143,7 @@ function_trace_call(unsigned long ip, unsigned long parent_ip, goto out; cpu = smp_processor_id(); - data = per_cpu_ptr(tr->trace_buffer.data, cpu); + data = per_cpu_ptr(tr->array_buffer.data, cpu); if (!atomic_read(&data->disabled)) { local_save_flags(flags); trace_function(tr, ip, parent_ip, flags, pc); @@ -192,7 +192,7 @@ function_stack_trace_call(unsigned long ip, unsigned long parent_ip, */ local_irq_save(flags); cpu = raw_smp_processor_id(); - data = per_cpu_ptr(tr->trace_buffer.data, cpu); + data = per_cpu_ptr(tr->array_buffer.data, cpu); disabled = atomic_inc_return(&data->disabled); if (likely(disabled == 1)) { diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 78af97163147..7d71546ba00a 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -101,7 +101,7 @@ int __trace_graph_entry(struct trace_array *tr, { struct trace_event_call *call = &event_funcgraph_entry; struct ring_buffer_event *event; - struct ring_buffer *buffer = tr->trace_buffer.buffer; + struct trace_buffer *buffer = tr->array_buffer.buffer; struct ftrace_graph_ent_entry *entry; event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_ENT, @@ -171,7 +171,7 @@ int trace_graph_entry(struct ftrace_graph_ent *trace) local_irq_save(flags); cpu = raw_smp_processor_id(); - data = per_cpu_ptr(tr->trace_buffer.data, cpu); + data = per_cpu_ptr(tr->array_buffer.data, cpu); disabled = atomic_inc_return(&data->disabled); if (likely(disabled == 1)) { pc = preempt_count(); @@ -221,7 +221,7 @@ void __trace_graph_return(struct trace_array *tr, { struct trace_event_call *call = &event_funcgraph_exit; struct ring_buffer_event *event; - struct ring_buffer *buffer = tr->trace_buffer.buffer; + struct trace_buffer *buffer = tr->array_buffer.buffer; struct ftrace_graph_ret_entry *entry; event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_RET, @@ -252,7 +252,7 @@ void trace_graph_return(struct ftrace_graph_ret *trace) local_irq_save(flags); cpu = raw_smp_processor_id(); - data = per_cpu_ptr(tr->trace_buffer.data, cpu); + data = per_cpu_ptr(tr->array_buffer.data, cpu); disabled = atomic_inc_return(&data->disabled); if (likely(disabled == 1)) { pc = preempt_count(); @@ -444,9 +444,9 @@ get_return_for_leaf(struct trace_iterator *iter, * We need to consume the current entry to see * the next one. */ - ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, + ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, NULL, NULL); - event = ring_buffer_peek(iter->trace_buffer->buffer, iter->cpu, + event = ring_buffer_peek(iter->array_buffer->buffer, iter->cpu, NULL, NULL); } @@ -503,7 +503,7 @@ print_graph_rel_time(struct trace_iterator *iter, struct trace_seq *s) { unsigned long long usecs; - usecs = iter->ts - iter->trace_buffer->time_start; + usecs = iter->ts - iter->array_buffer->time_start; do_div(usecs, NSEC_PER_USEC); trace_seq_printf(s, "%9llu us | ", usecs); diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c index 6638d63f0921..a48808c43249 100644 --- a/kernel/trace/trace_hwlat.c +++ b/kernel/trace/trace_hwlat.c @@ -104,7 +104,7 @@ static void trace_hwlat_sample(struct hwlat_sample *sample) { struct trace_array *tr = hwlat_trace; struct trace_event_call *call = &event_hwlat; - struct ring_buffer *buffer = tr->trace_buffer.buffer; + struct trace_buffer *buffer = tr->array_buffer.buffer; struct ring_buffer_event *event; struct hwlat_entry *entry; unsigned long flags; @@ -556,7 +556,7 @@ static int init_tracefs(void) return 0; err: - tracefs_remove_recursive(top_dir); + tracefs_remove(top_dir); return -ENOMEM; } diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index a745b0cee5d3..10bbb0f381d5 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -122,7 +122,7 @@ static int func_prolog_dec(struct trace_array *tr, if (!irqs_disabled_flags(*flags) && !preempt_count()) return 0; - *data = per_cpu_ptr(tr->trace_buffer.data, cpu); + *data = per_cpu_ptr(tr->array_buffer.data, cpu); disabled = atomic_inc_return(&(*data)->disabled); if (likely(disabled == 1)) @@ -167,7 +167,7 @@ static int irqsoff_display_graph(struct trace_array *tr, int set) per_cpu(tracing_cpu, cpu) = 0; tr->max_latency = 0; - tracing_reset_online_cpus(&irqsoff_trace->trace_buffer); + tracing_reset_online_cpus(&irqsoff_trace->array_buffer); return start_irqsoff_tracer(irqsoff_trace, set); } @@ -382,7 +382,7 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip, int pc) if (per_cpu(tracing_cpu, cpu)) return; - data = per_cpu_ptr(tr->trace_buffer.data, cpu); + data = per_cpu_ptr(tr->array_buffer.data, cpu); if (unlikely(!data) || atomic_read(&data->disabled)) return; @@ -420,7 +420,7 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip, int pc) if (!tracer_enabled || !tracing_is_enabled()) return; - data = per_cpu_ptr(tr->trace_buffer.data, cpu); + data = per_cpu_ptr(tr->array_buffer.data, cpu); if (unlikely(!data) || !data->critical_start || atomic_read(&data->disabled)) diff --git a/kernel/trace/trace_kdb.c b/kernel/trace/trace_kdb.c index cca65044c14c..9da76104f7a2 100644 --- a/kernel/trace/trace_kdb.c +++ b/kernel/trace/trace_kdb.c @@ -43,7 +43,7 @@ static void ftrace_dump_buf(int skip_entries, long cpu_file) if (cpu_file == RING_BUFFER_ALL_CPUS) { for_each_tracing_cpu(cpu) { iter.buffer_iter[cpu] = - ring_buffer_read_prepare(iter.trace_buffer->buffer, + ring_buffer_read_prepare(iter.array_buffer->buffer, cpu, GFP_ATOMIC); ring_buffer_read_start(iter.buffer_iter[cpu]); tracing_iter_reset(&iter, cpu); @@ -51,7 +51,7 @@ static void ftrace_dump_buf(int skip_entries, long cpu_file) } else { iter.cpu_file = cpu_file; iter.buffer_iter[cpu_file] = - ring_buffer_read_prepare(iter.trace_buffer->buffer, + ring_buffer_read_prepare(iter.array_buffer->buffer, cpu_file, GFP_ATOMIC); ring_buffer_read_start(iter.buffer_iter[cpu_file]); tracing_iter_reset(&iter, cpu_file); @@ -124,7 +124,7 @@ static int kdb_ftdump(int argc, const char **argv) iter.buffer_iter = buffer_iter; for_each_tracing_cpu(cpu) { - atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled); + atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled); } /* A negative skip_entries means skip all but the last entries */ @@ -139,7 +139,7 @@ static int kdb_ftdump(int argc, const char **argv) ftrace_dump_buf(skip_entries, cpu_file); for_each_tracing_cpu(cpu) { - atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled); + atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled); } kdb_trap_printk--; diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 7f890262c8a3..d8264ebb9581 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -22,7 +22,6 @@ #define KPROBE_EVENT_SYSTEM "kprobes" #define KRETPROBE_MAXACTIVE_MAX 4096 -#define MAX_KPROBE_CMDLINE_SIZE 1024 /* Kprobe early definition from command line */ static char kprobe_boot_events_buf[COMMAND_LINE_SIZE] __initdata; @@ -290,7 +289,7 @@ static struct trace_kprobe *alloc_trace_kprobe(const char *group, INIT_HLIST_NODE(&tk->rp.kp.hlist); INIT_LIST_HEAD(&tk->rp.kp.list); - ret = trace_probe_init(&tk->tp, event, group); + ret = trace_probe_init(&tk->tp, event, group, false); if (ret < 0) goto error; @@ -902,6 +901,167 @@ static int create_or_delete_trace_kprobe(int argc, char **argv) return ret == -ECANCELED ? -EINVAL : ret; } +static int trace_kprobe_run_command(struct dynevent_cmd *cmd) +{ + return trace_run_command(cmd->seq.buffer, create_or_delete_trace_kprobe); +} + +/** + * kprobe_event_cmd_init - Initialize a kprobe event command object + * @cmd: A pointer to the dynevent_cmd struct representing the new event + * @buf: A pointer to the buffer used to build the command + * @maxlen: The length of the buffer passed in @buf + * + * Initialize a synthetic event command object. Use this before + * calling any of the other kprobe_event functions. + */ +void kprobe_event_cmd_init(struct dynevent_cmd *cmd, char *buf, int maxlen) +{ + dynevent_cmd_init(cmd, buf, maxlen, DYNEVENT_TYPE_KPROBE, + trace_kprobe_run_command); +} +EXPORT_SYMBOL_GPL(kprobe_event_cmd_init); + +/** + * __kprobe_event_gen_cmd_start - Generate a kprobe event command from arg list + * @cmd: A pointer to the dynevent_cmd struct representing the new event + * @name: The name of the kprobe event + * @loc: The location of the kprobe event + * @kretprobe: Is this a return probe? + * @args: Variable number of arg (pairs), one pair for each field + * + * NOTE: Users normally won't want to call this function directly, but + * rather use the kprobe_event_gen_cmd_start() wrapper, which automatically + * adds a NULL to the end of the arg list. If this function is used + * directly, make sure the last arg in the variable arg list is NULL. + * + * Generate a kprobe event command to be executed by + * kprobe_event_gen_cmd_end(). This function can be used to generate the + * complete command or only the first part of it; in the latter case, + * kprobe_event_add_fields() can be used to add more fields following this. + * + * Return: 0 if successful, error otherwise. + */ +int __kprobe_event_gen_cmd_start(struct dynevent_cmd *cmd, bool kretprobe, + const char *name, const char *loc, ...) +{ + char buf[MAX_EVENT_NAME_LEN]; + struct dynevent_arg arg; + va_list args; + int ret; + + if (cmd->type != DYNEVENT_TYPE_KPROBE) + return -EINVAL; + + if (kretprobe) + snprintf(buf, MAX_EVENT_NAME_LEN, "r:kprobes/%s", name); + else + snprintf(buf, MAX_EVENT_NAME_LEN, "p:kprobes/%s", name); + + ret = dynevent_str_add(cmd, buf); + if (ret) + return ret; + + dynevent_arg_init(&arg, 0); + arg.str = loc; + ret = dynevent_arg_add(cmd, &arg, NULL); + if (ret) + return ret; + + va_start(args, loc); + for (;;) { + const char *field; + + field = va_arg(args, const char *); + if (!field) + break; + + if (++cmd->n_fields > MAX_TRACE_ARGS) { + ret = -EINVAL; + break; + } + + arg.str = field; + ret = dynevent_arg_add(cmd, &arg, NULL); + if (ret) + break; + } + va_end(args); + + return ret; +} +EXPORT_SYMBOL_GPL(__kprobe_event_gen_cmd_start); + +/** + * __kprobe_event_add_fields - Add probe fields to a kprobe command from arg list + * @cmd: A pointer to the dynevent_cmd struct representing the new event + * @args: Variable number of arg (pairs), one pair for each field + * + * NOTE: Users normally won't want to call this function directly, but + * rather use the kprobe_event_add_fields() wrapper, which + * automatically adds a NULL to the end of the arg list. If this + * function is used directly, make sure the last arg in the variable + * arg list is NULL. + * + * Add probe fields to an existing kprobe command using a variable + * list of args. Fields are added in the same order they're listed. + * + * Return: 0 if successful, error otherwise. + */ +int __kprobe_event_add_fields(struct dynevent_cmd *cmd, ...) +{ + struct dynevent_arg arg; + va_list args; + int ret; + + if (cmd->type != DYNEVENT_TYPE_KPROBE) + return -EINVAL; + + dynevent_arg_init(&arg, 0); + + va_start(args, cmd); + for (;;) { + const char *field; + + field = va_arg(args, const char *); + if (!field) + break; + + if (++cmd->n_fields > MAX_TRACE_ARGS) { + ret = -EINVAL; + break; + } + + arg.str = field; + ret = dynevent_arg_add(cmd, &arg, NULL); + if (ret) + break; + } + va_end(args); + + return ret; +} +EXPORT_SYMBOL_GPL(__kprobe_event_add_fields); + +/** + * kprobe_event_delete - Delete a kprobe event + * @name: The name of the kprobe event to delete + * + * Delete a kprobe event with the give @name from kernel code rather + * than directly from the command line. + * + * Return: 0 if successful, error otherwise. + */ +int kprobe_event_delete(const char *name) +{ + char buf[MAX_EVENT_NAME_LEN]; + + snprintf(buf, MAX_EVENT_NAME_LEN, "-:%s", name); + + return trace_run_command(buf, create_or_delete_trace_kprobe); +} +EXPORT_SYMBOL_GPL(kprobe_event_delete); + static int trace_kprobe_release(struct dyn_event *ev) { struct trace_kprobe *tk = to_trace_kprobe(ev); @@ -1175,35 +1335,35 @@ __kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs, struct trace_event_file *trace_file) { struct kprobe_trace_entry_head *entry; - struct ring_buffer_event *event; - struct ring_buffer *buffer; - int size, dsize, pc; - unsigned long irq_flags; struct trace_event_call *call = trace_probe_event_call(&tk->tp); + struct trace_event_buffer fbuffer; + int dsize; WARN_ON(call != trace_file->event_call); if (trace_trigger_soft_disabled(trace_file)) return; - local_save_flags(irq_flags); - pc = preempt_count(); + local_save_flags(fbuffer.flags); + fbuffer.pc = preempt_count(); + fbuffer.trace_file = trace_file; dsize = __get_data_size(&tk->tp, regs); - size = sizeof(*entry) + tk->tp.size + dsize; - event = trace_event_buffer_lock_reserve(&buffer, trace_file, - call->event.type, - size, irq_flags, pc); - if (!event) + fbuffer.event = + trace_event_buffer_lock_reserve(&fbuffer.buffer, trace_file, + call->event.type, + sizeof(*entry) + tk->tp.size + dsize, + fbuffer.flags, fbuffer.pc); + if (!fbuffer.event) return; - entry = ring_buffer_event_data(event); + fbuffer.regs = regs; + entry = fbuffer.entry = ring_buffer_event_data(fbuffer.event); entry->ip = (unsigned long)tk->rp.kp.addr; store_trace_args(&entry[1], &tk->tp, regs, sizeof(*entry), dsize); - event_trigger_unlock_commit_regs(trace_file, buffer, event, - entry, irq_flags, pc, regs); + trace_event_buffer_commit(&fbuffer); } static void @@ -1223,36 +1383,35 @@ __kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri, struct trace_event_file *trace_file) { struct kretprobe_trace_entry_head *entry; - struct ring_buffer_event *event; - struct ring_buffer *buffer; - int size, pc, dsize; - unsigned long irq_flags; + struct trace_event_buffer fbuffer; struct trace_event_call *call = trace_probe_event_call(&tk->tp); + int dsize; WARN_ON(call != trace_file->event_call); if (trace_trigger_soft_disabled(trace_file)) return; - local_save_flags(irq_flags); - pc = preempt_count(); + local_save_flags(fbuffer.flags); + fbuffer.pc = preempt_count(); + fbuffer.trace_file = trace_file; dsize = __get_data_size(&tk->tp, regs); - size = sizeof(*entry) + tk->tp.size + dsize; - - event = trace_event_buffer_lock_reserve(&buffer, trace_file, - call->event.type, - size, irq_flags, pc); - if (!event) + fbuffer.event = + trace_event_buffer_lock_reserve(&fbuffer.buffer, trace_file, + call->event.type, + sizeof(*entry) + tk->tp.size + dsize, + fbuffer.flags, fbuffer.pc); + if (!fbuffer.event) return; - entry = ring_buffer_event_data(event); + fbuffer.regs = regs; + entry = fbuffer.entry = ring_buffer_event_data(fbuffer.event); entry->func = (unsigned long)tk->rp.kp.addr; entry->ret_ip = (unsigned long)ri->ret_addr; store_trace_args(&entry[1], &tk->tp, regs, sizeof(*entry), dsize); - event_trigger_unlock_commit_regs(trace_file, buffer, event, - entry, irq_flags, pc, regs); + trace_event_buffer_commit(&fbuffer); } static void @@ -1555,16 +1714,28 @@ static struct trace_event_functions kprobe_funcs = { .trace = print_kprobe_event }; +static struct trace_event_fields kretprobe_fields_array[] = { + { .type = TRACE_FUNCTION_TYPE, + .define_fields = kretprobe_event_define_fields }, + {} +}; + +static struct trace_event_fields kprobe_fields_array[] = { + { .type = TRACE_FUNCTION_TYPE, + .define_fields = kprobe_event_define_fields }, + {} +}; + static inline void init_trace_event_call(struct trace_kprobe *tk) { struct trace_event_call *call = trace_probe_event_call(&tk->tp); if (trace_kprobe_is_return(tk)) { call->event.funcs = &kretprobe_funcs; - call->class->define_fields = kretprobe_event_define_fields; + call->class->fields_array = kretprobe_fields_array; } else { call->event.funcs = &kprobe_funcs; - call->class->define_fields = kprobe_event_define_fields; + call->class->fields_array = kprobe_fields_array; } call->flags = TRACE_EVENT_FL_KPROBE; @@ -1686,11 +1857,12 @@ static __init void setup_boot_kprobe_events(void) enable_boot_kprobe_events(); } -/* Make a tracefs interface for controlling probe points */ -static __init int init_kprobe_trace(void) +/* + * Register dynevent at subsys_initcall. This allows kernel to setup kprobe + * events in fs_initcall without tracefs. + */ +static __init int init_kprobe_trace_early(void) { - struct dentry *d_tracer; - struct dentry *entry; int ret; ret = dyn_event_register(&trace_kprobe_ops); @@ -1700,6 +1872,16 @@ static __init int init_kprobe_trace(void) if (register_module_notifier(&trace_kprobe_module_nb)) return -EINVAL; + return 0; +} +subsys_initcall(init_kprobe_trace_early); + +/* Make a tracefs interface for controlling probe points */ +static __init int init_kprobe_trace(void) +{ + struct dentry *d_tracer; + struct dentry *entry; + d_tracer = tracing_init_dentry(); if (IS_ERR(d_tracer)) return 0; diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c index b0388016b687..84582bf1ed5f 100644 --- a/kernel/trace/trace_mmiotrace.c +++ b/kernel/trace/trace_mmiotrace.c @@ -32,7 +32,7 @@ static void mmio_reset_data(struct trace_array *tr) overrun_detected = false; prev_overruns = 0; - tracing_reset_online_cpus(&tr->trace_buffer); + tracing_reset_online_cpus(&tr->array_buffer); } static int mmio_trace_init(struct trace_array *tr) @@ -122,7 +122,7 @@ static void mmio_close(struct trace_iterator *iter) static unsigned long count_overruns(struct trace_iterator *iter) { unsigned long cnt = atomic_xchg(&dropped_count, 0); - unsigned long over = ring_buffer_overruns(iter->trace_buffer->buffer); + unsigned long over = ring_buffer_overruns(iter->array_buffer->buffer); if (over > prev_overruns) cnt += over - prev_overruns; @@ -297,7 +297,7 @@ static void __trace_mmiotrace_rw(struct trace_array *tr, struct mmiotrace_rw *rw) { struct trace_event_call *call = &event_mmiotrace_rw; - struct ring_buffer *buffer = tr->trace_buffer.buffer; + struct trace_buffer *buffer = tr->array_buffer.buffer; struct ring_buffer_event *event; struct trace_mmiotrace_rw *entry; int pc = preempt_count(); @@ -318,7 +318,7 @@ static void __trace_mmiotrace_rw(struct trace_array *tr, void mmio_trace_rw(struct mmiotrace_rw *rw) { struct trace_array *tr = mmio_trace_array; - struct trace_array_cpu *data = per_cpu_ptr(tr->trace_buffer.data, smp_processor_id()); + struct trace_array_cpu *data = per_cpu_ptr(tr->array_buffer.data, smp_processor_id()); __trace_mmiotrace_rw(tr, data, rw); } @@ -327,7 +327,7 @@ static void __trace_mmiotrace_map(struct trace_array *tr, struct mmiotrace_map *map) { struct trace_event_call *call = &event_mmiotrace_map; - struct ring_buffer *buffer = tr->trace_buffer.buffer; + struct trace_buffer *buffer = tr->array_buffer.buffer; struct ring_buffer_event *event; struct trace_mmiotrace_map *entry; int pc = preempt_count(); @@ -351,7 +351,7 @@ void mmio_trace_mapping(struct mmiotrace_map *map) struct trace_array_cpu *data; preempt_disable(); - data = per_cpu_ptr(tr->trace_buffer.data, smp_processor_id()); + data = per_cpu_ptr(tr->array_buffer.data, smp_processor_id()); __trace_mmiotrace_map(tr, data, map); preempt_enable(); } diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index d9b4b7c22db4..b4909082f6a4 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -538,7 +538,7 @@ lat_print_timestamp(struct trace_iterator *iter, u64 next_ts) struct trace_array *tr = iter->tr; unsigned long verbose = tr->trace_flags & TRACE_ITER_VERBOSE; unsigned long in_ns = iter->iter_flags & TRACE_FILE_TIME_IN_NS; - unsigned long long abs_ts = iter->ts - iter->trace_buffer->time_start; + unsigned long long abs_ts = iter->ts - iter->array_buffer->time_start; unsigned long long rel_ts = next_ts - iter->ts; struct trace_seq *s = &iter->seq; diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index 905b10af5d5c..ab8b6436d53f 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -876,7 +876,8 @@ static int __set_print_fmt(struct trace_probe *tp, char *buf, int len, for (i = 0; i < tp->nr_args; i++) { parg = tp->args + i; if (parg->count) { - if (strcmp(parg->type->name, "string") == 0) + if ((strcmp(parg->type->name, "string") == 0) || + (strcmp(parg->type->name, "ustring") == 0)) fmt = ", __get_str(%s[%d])"; else fmt = ", REC->%s[%d]"; @@ -884,7 +885,8 @@ static int __set_print_fmt(struct trace_probe *tp, char *buf, int len, pos += snprintf(buf + pos, LEN_OR_ZERO, fmt, parg->name, j); } else { - if (strcmp(parg->type->name, "string") == 0) + if ((strcmp(parg->type->name, "string") == 0) || + (strcmp(parg->type->name, "ustring") == 0)) fmt = ", __get_str(%s)"; else fmt = ", REC->%s"; @@ -984,15 +986,19 @@ void trace_probe_cleanup(struct trace_probe *tp) } int trace_probe_init(struct trace_probe *tp, const char *event, - const char *group) + const char *group, bool alloc_filter) { struct trace_event_call *call; + size_t size = sizeof(struct trace_probe_event); int ret = 0; if (!event || !group) return -EINVAL; - tp->event = kzalloc(sizeof(struct trace_probe_event), GFP_KERNEL); + if (alloc_filter) + size += sizeof(struct trace_uprobe_filter); + + tp->event = kzalloc(size, GFP_KERNEL); if (!tp->event) return -ENOMEM; diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index 4ee703728aec..a0ff9e200ef6 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -223,6 +223,12 @@ struct probe_arg { const struct fetch_type *type; /* Type of this argument */ }; +struct trace_uprobe_filter { + rwlock_t rwlock; + int nr_systemwide; + struct list_head perf_events; +}; + /* Event call and class holder */ struct trace_probe_event { unsigned int flags; /* For TP_FLAG_* */ @@ -230,6 +236,7 @@ struct trace_probe_event { struct trace_event_call call; struct list_head files; struct list_head probes; + struct trace_uprobe_filter filter[0]; }; struct trace_probe { @@ -322,7 +329,7 @@ static inline bool trace_probe_has_single_file(struct trace_probe *tp) } int trace_probe_init(struct trace_probe *tp, const char *event, - const char *group); + const char *group, bool alloc_filter); void trace_probe_cleanup(struct trace_probe *tp); int trace_probe_append(struct trace_probe *tp, struct trace_probe *to); void trace_probe_unlink(struct trace_probe *tp); diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index e288168661e1..e304196d7c28 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -89,8 +89,10 @@ static void tracing_sched_unregister(void) static void tracing_start_sched_switch(int ops) { - bool sched_register = (!sched_cmdline_ref && !sched_tgid_ref); + bool sched_register; + mutex_lock(&sched_register_mutex); + sched_register = (!sched_cmdline_ref && !sched_tgid_ref); switch (ops) { case RECORD_CMDLINE: diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 617e297f46dc..97b10bb31a1f 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -82,7 +82,7 @@ func_prolog_preempt_disable(struct trace_array *tr, if (cpu != wakeup_current_cpu) goto out_enable; - *data = per_cpu_ptr(tr->trace_buffer.data, cpu); + *data = per_cpu_ptr(tr->array_buffer.data, cpu); disabled = atomic_inc_return(&(*data)->disabled); if (unlikely(disabled != 1)) goto out; @@ -378,7 +378,7 @@ tracing_sched_switch_trace(struct trace_array *tr, unsigned long flags, int pc) { struct trace_event_call *call = &event_context_switch; - struct ring_buffer *buffer = tr->trace_buffer.buffer; + struct trace_buffer *buffer = tr->array_buffer.buffer; struct ring_buffer_event *event; struct ctx_switch_entry *entry; @@ -408,7 +408,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr, struct trace_event_call *call = &event_wakeup; struct ring_buffer_event *event; struct ctx_switch_entry *entry; - struct ring_buffer *buffer = tr->trace_buffer.buffer; + struct trace_buffer *buffer = tr->array_buffer.buffer; event = trace_buffer_lock_reserve(buffer, TRACE_WAKE, sizeof(*entry), flags, pc); @@ -459,7 +459,7 @@ probe_wakeup_sched_switch(void *ignore, bool preempt, /* disable local data, not wakeup_cpu data */ cpu = raw_smp_processor_id(); - disabled = atomic_inc_return(&per_cpu_ptr(wakeup_trace->trace_buffer.data, cpu)->disabled); + disabled = atomic_inc_return(&per_cpu_ptr(wakeup_trace->array_buffer.data, cpu)->disabled); if (likely(disabled != 1)) goto out; @@ -471,7 +471,7 @@ probe_wakeup_sched_switch(void *ignore, bool preempt, goto out_unlock; /* The task we are waiting for is waking up */ - data = per_cpu_ptr(wakeup_trace->trace_buffer.data, wakeup_cpu); + data = per_cpu_ptr(wakeup_trace->array_buffer.data, wakeup_cpu); __trace_function(wakeup_trace, CALLER_ADDR0, CALLER_ADDR1, flags, pc); tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc); @@ -494,7 +494,7 @@ out_unlock: arch_spin_unlock(&wakeup_lock); local_irq_restore(flags); out: - atomic_dec(&per_cpu_ptr(wakeup_trace->trace_buffer.data, cpu)->disabled); + atomic_dec(&per_cpu_ptr(wakeup_trace->array_buffer.data, cpu)->disabled); } static void __wakeup_reset(struct trace_array *tr) @@ -513,7 +513,7 @@ static void wakeup_reset(struct trace_array *tr) { unsigned long flags; - tracing_reset_online_cpus(&tr->trace_buffer); + tracing_reset_online_cpus(&tr->array_buffer); local_irq_save(flags); arch_spin_lock(&wakeup_lock); @@ -551,7 +551,7 @@ probe_wakeup(void *ignore, struct task_struct *p) return; pc = preempt_count(); - disabled = atomic_inc_return(&per_cpu_ptr(wakeup_trace->trace_buffer.data, cpu)->disabled); + disabled = atomic_inc_return(&per_cpu_ptr(wakeup_trace->array_buffer.data, cpu)->disabled); if (unlikely(disabled != 1)) goto out; @@ -583,7 +583,7 @@ probe_wakeup(void *ignore, struct task_struct *p) local_save_flags(flags); - data = per_cpu_ptr(wakeup_trace->trace_buffer.data, wakeup_cpu); + data = per_cpu_ptr(wakeup_trace->array_buffer.data, wakeup_cpu); data->preempt_timestamp = ftrace_now(cpu); tracing_sched_wakeup_trace(wakeup_trace, p, current, flags, pc); __trace_stack(wakeup_trace, flags, 0, pc); @@ -598,7 +598,7 @@ probe_wakeup(void *ignore, struct task_struct *p) out_locked: arch_spin_unlock(&wakeup_lock); out: - atomic_dec(&per_cpu_ptr(wakeup_trace->trace_buffer.data, cpu)->disabled); + atomic_dec(&per_cpu_ptr(wakeup_trace->array_buffer.data, cpu)->disabled); } static void start_wakeup_tracer(struct trace_array *tr) diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 69ee8ef12cee..b5e3496cf803 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -23,7 +23,7 @@ static inline int trace_valid_entry(struct trace_entry *entry) return 0; } -static int trace_test_buffer_cpu(struct trace_buffer *buf, int cpu) +static int trace_test_buffer_cpu(struct array_buffer *buf, int cpu) { struct ring_buffer_event *event; struct trace_entry *entry; @@ -60,7 +60,7 @@ static int trace_test_buffer_cpu(struct trace_buffer *buf, int cpu) * Test the trace buffer to see if all the elements * are still sane. */ -static int __maybe_unused trace_test_buffer(struct trace_buffer *buf, unsigned long *count) +static int __maybe_unused trace_test_buffer(struct array_buffer *buf, unsigned long *count) { unsigned long flags, cnt = 0; int cpu, ret = 0; @@ -362,7 +362,7 @@ static int trace_selftest_startup_dynamic_tracing(struct tracer *trace, msleep(100); /* we should have nothing in the buffer */ - ret = trace_test_buffer(&tr->trace_buffer, &count); + ret = trace_test_buffer(&tr->array_buffer, &count); if (ret) goto out; @@ -383,7 +383,7 @@ static int trace_selftest_startup_dynamic_tracing(struct tracer *trace, ftrace_enabled = 0; /* check the trace buffer */ - ret = trace_test_buffer(&tr->trace_buffer, &count); + ret = trace_test_buffer(&tr->array_buffer, &count); ftrace_enabled = 1; tracing_start(); @@ -682,7 +682,7 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr) ftrace_enabled = 0; /* check the trace buffer */ - ret = trace_test_buffer(&tr->trace_buffer, &count); + ret = trace_test_buffer(&tr->array_buffer, &count); ftrace_enabled = 1; trace->reset(tr); @@ -768,7 +768,7 @@ trace_selftest_startup_function_graph(struct tracer *trace, * Simulate the init() callback but we attach a watchdog callback * to detect and recover from possible hangs */ - tracing_reset_online_cpus(&tr->trace_buffer); + tracing_reset_online_cpus(&tr->array_buffer); set_graph_array(tr); ret = register_ftrace_graph(&fgraph_ops); if (ret) { @@ -790,7 +790,7 @@ trace_selftest_startup_function_graph(struct tracer *trace, tracing_stop(); /* check the trace buffer */ - ret = trace_test_buffer(&tr->trace_buffer, &count); + ret = trace_test_buffer(&tr->array_buffer, &count); /* Need to also simulate the tr->reset to remove this fgraph_ops */ tracing_stop_cmdline_record(); @@ -848,7 +848,7 @@ trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr) /* stop the tracing. */ tracing_stop(); /* check both trace buffers */ - ret = trace_test_buffer(&tr->trace_buffer, NULL); + ret = trace_test_buffer(&tr->array_buffer, NULL); if (!ret) ret = trace_test_buffer(&tr->max_buffer, &count); trace->reset(tr); @@ -910,7 +910,7 @@ trace_selftest_startup_preemptoff(struct tracer *trace, struct trace_array *tr) /* stop the tracing. */ tracing_stop(); /* check both trace buffers */ - ret = trace_test_buffer(&tr->trace_buffer, NULL); + ret = trace_test_buffer(&tr->array_buffer, NULL); if (!ret) ret = trace_test_buffer(&tr->max_buffer, &count); trace->reset(tr); @@ -976,7 +976,7 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array * /* stop the tracing. */ tracing_stop(); /* check both trace buffers */ - ret = trace_test_buffer(&tr->trace_buffer, NULL); + ret = trace_test_buffer(&tr->array_buffer, NULL); if (ret) goto out; @@ -1006,7 +1006,7 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array * /* stop the tracing. */ tracing_stop(); /* check both trace buffers */ - ret = trace_test_buffer(&tr->trace_buffer, NULL); + ret = trace_test_buffer(&tr->array_buffer, NULL); if (ret) goto out; @@ -1136,7 +1136,7 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr) /* stop the tracing. */ tracing_stop(); /* check both trace buffers */ - ret = trace_test_buffer(&tr->trace_buffer, NULL); + ret = trace_test_buffer(&tr->array_buffer, NULL); if (!ret) ret = trace_test_buffer(&tr->max_buffer, &count); @@ -1177,7 +1177,7 @@ trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr) /* stop the tracing. */ tracing_stop(); /* check the trace buffer */ - ret = trace_test_buffer(&tr->trace_buffer, &count); + ret = trace_test_buffer(&tr->array_buffer, &count); trace->reset(tr); tracing_start(); diff --git a/kernel/trace/trace_seq.c b/kernel/trace/trace_seq.c index 87de6edafd14..1d84fcc78e3e 100644 --- a/kernel/trace/trace_seq.c +++ b/kernel/trace/trace_seq.c @@ -30,9 +30,6 @@ /* How much buffer is left on the trace_seq? */ #define TRACE_SEQ_BUF_LEFT(s) seq_buf_buffer_left(&(s)->seq) -/* How much buffer is written? */ -#define TRACE_SEQ_BUF_USED(s) seq_buf_used(&(s)->seq) - /* * trace_seq should work with being initialized with 0s. */ diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c index 874f1274cf99..d1fa19773cc8 100644 --- a/kernel/trace/trace_stat.c +++ b/kernel/trace/trace_stat.c @@ -280,18 +280,22 @@ static int tracing_stat_init(void) d_tracing = tracing_init_dentry(); if (IS_ERR(d_tracing)) - return 0; + return -ENODEV; stat_dir = tracefs_create_dir("trace_stat", d_tracing); - if (!stat_dir) + if (!stat_dir) { pr_warn("Could not create tracefs 'trace_stat' entry\n"); + return -ENOMEM; + } return 0; } static int init_stat_file(struct stat_session *session) { - if (!stat_dir && tracing_stat_init()) - return -ENODEV; + int ret; + + if (!stat_dir && (ret = tracing_stat_init())) + return ret; session->file = tracefs_create_file(session->ts->name, 0644, stat_dir, @@ -304,7 +308,7 @@ static int init_stat_file(struct stat_session *session) int register_stat_tracer(struct tracer_stat *trace) { struct stat_session *session, *node; - int ret; + int ret = -EINVAL; if (!trace) return -EINVAL; @@ -315,17 +319,15 @@ int register_stat_tracer(struct tracer_stat *trace) /* Already registered? */ mutex_lock(&all_stat_sessions_mutex); list_for_each_entry(node, &all_stat_sessions, session_list) { - if (node->ts == trace) { - mutex_unlock(&all_stat_sessions_mutex); - return -EINVAL; - } + if (node->ts == trace) + goto out; } - mutex_unlock(&all_stat_sessions_mutex); + ret = -ENOMEM; /* Init the session */ session = kzalloc(sizeof(*session), GFP_KERNEL); if (!session) - return -ENOMEM; + goto out; session->ts = trace; INIT_LIST_HEAD(&session->session_list); @@ -334,15 +336,16 @@ int register_stat_tracer(struct tracer_stat *trace) ret = init_stat_file(session); if (ret) { destroy_session(session); - return ret; + goto out; } + ret = 0; /* Register */ - mutex_lock(&all_stat_sessions_mutex); list_add_tail(&session->session_list, &all_stat_sessions); + out: mutex_unlock(&all_stat_sessions_mutex); - return 0; + return ret; } void unregister_stat_tracer(struct tracer_stat *trace) diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 16fa218556fa..d85a2f0f316b 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -203,11 +203,10 @@ print_syscall_exit(struct trace_iterator *iter, int flags, extern char *__bad_type_size(void); -#define SYSCALL_FIELD(type, field, name) \ - sizeof(type) != sizeof(trace.field) ? \ - __bad_type_size() : \ - #type, #name, offsetof(typeof(trace), field), \ - sizeof(trace.field), is_signed_type(type) +#define SYSCALL_FIELD(_type, _name) { \ + .type = #_type, .name = #_name, \ + .size = sizeof(_type), .align = __alignof__(_type), \ + .is_signed = is_signed_type(_type), .filter_type = FILTER_OTHER } static int __init __set_enter_print_fmt(struct syscall_metadata *entry, char *buf, int len) @@ -274,42 +273,23 @@ static int __init syscall_enter_define_fields(struct trace_event_call *call) { struct syscall_trace_enter trace; struct syscall_metadata *meta = call->data; - int ret; - int i; int offset = offsetof(typeof(trace), args); - - ret = trace_define_field(call, SYSCALL_FIELD(int, nr, __syscall_nr), - FILTER_OTHER); - if (ret) - return ret; + int ret = 0; + int i; for (i = 0; i < meta->nb_args; i++) { ret = trace_define_field(call, meta->types[i], meta->args[i], offset, sizeof(unsigned long), 0, FILTER_OTHER); + if (ret) + break; offset += sizeof(unsigned long); } return ret; } -static int __init syscall_exit_define_fields(struct trace_event_call *call) -{ - struct syscall_trace_exit trace; - int ret; - - ret = trace_define_field(call, SYSCALL_FIELD(int, nr, __syscall_nr), - FILTER_OTHER); - if (ret) - return ret; - - ret = trace_define_field(call, SYSCALL_FIELD(long, ret, ret), - FILTER_OTHER); - - return ret; -} - static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id) { struct trace_array *tr = data; @@ -317,7 +297,7 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id) struct syscall_trace_enter *entry; struct syscall_metadata *sys_data; struct ring_buffer_event *event; - struct ring_buffer *buffer; + struct trace_buffer *buffer; unsigned long irq_flags; unsigned long args[6]; int pc; @@ -345,7 +325,7 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id) local_save_flags(irq_flags); pc = preempt_count(); - buffer = tr->trace_buffer.buffer; + buffer = tr->array_buffer.buffer; event = trace_buffer_lock_reserve(buffer, sys_data->enter_event->event.type, size, irq_flags, pc); if (!event) @@ -367,7 +347,7 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret) struct syscall_trace_exit *entry; struct syscall_metadata *sys_data; struct ring_buffer_event *event; - struct ring_buffer *buffer; + struct trace_buffer *buffer; unsigned long irq_flags; int pc; int syscall_nr; @@ -391,7 +371,7 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret) local_save_flags(irq_flags); pc = preempt_count(); - buffer = tr->trace_buffer.buffer; + buffer = tr->array_buffer.buffer; event = trace_buffer_lock_reserve(buffer, sys_data->exit_event->event.type, sizeof(*entry), irq_flags, pc); @@ -507,6 +487,13 @@ static int __init init_syscall_trace(struct trace_event_call *call) return id; } +static struct trace_event_fields __refdata syscall_enter_fields_array[] = { + SYSCALL_FIELD(int, __syscall_nr), + { .type = TRACE_FUNCTION_TYPE, + .define_fields = syscall_enter_define_fields }, + {} +}; + struct trace_event_functions enter_syscall_print_funcs = { .trace = print_syscall_enter, }; @@ -518,7 +505,7 @@ struct trace_event_functions exit_syscall_print_funcs = { struct trace_event_class __refdata event_class_syscall_enter = { .system = "syscalls", .reg = syscall_enter_register, - .define_fields = syscall_enter_define_fields, + .fields_array = syscall_enter_fields_array, .get_fields = syscall_get_enter_fields, .raw_init = init_syscall_trace, }; @@ -526,7 +513,11 @@ struct trace_event_class __refdata event_class_syscall_enter = { struct trace_event_class __refdata event_class_syscall_exit = { .system = "syscalls", .reg = syscall_exit_register, - .define_fields = syscall_exit_define_fields, + .fields_array = (struct trace_event_fields[]){ + SYSCALL_FIELD(int, __syscall_nr), + SYSCALL_FIELD(long, ret), + {} + }, .fields = LIST_HEAD_INIT(event_class_syscall_exit.fields), .raw_init = init_syscall_trace, }; diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 352073d36585..18d16f3ef980 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -34,12 +34,6 @@ struct uprobe_trace_entry_head { #define DATAOF_TRACE_ENTRY(entry, is_return) \ ((void*)(entry) + SIZEOF_TRACE_ENTRY(is_return)) -struct trace_uprobe_filter { - rwlock_t rwlock; - int nr_systemwide; - struct list_head perf_events; -}; - static int trace_uprobe_create(int argc, const char **argv); static int trace_uprobe_show(struct seq_file *m, struct dyn_event *ev); static int trace_uprobe_release(struct dyn_event *ev); @@ -60,7 +54,6 @@ static struct dyn_event_operations trace_uprobe_ops = { */ struct trace_uprobe { struct dyn_event devent; - struct trace_uprobe_filter filter; struct uprobe_consumer consumer; struct path path; struct inode *inode; @@ -351,7 +344,7 @@ alloc_trace_uprobe(const char *group, const char *event, int nargs, bool is_ret) if (!tu) return ERR_PTR(-ENOMEM); - ret = trace_probe_init(&tu->tp, event, group); + ret = trace_probe_init(&tu->tp, event, group, true); if (ret < 0) goto error; @@ -359,7 +352,7 @@ alloc_trace_uprobe(const char *group, const char *event, int nargs, bool is_ret) tu->consumer.handler = uprobe_dispatcher; if (is_ret) tu->consumer.ret_handler = uretprobe_dispatcher; - init_trace_uprobe_filter(&tu->filter); + init_trace_uprobe_filter(tu->tp.event->filter); return tu; error: @@ -938,8 +931,8 @@ static void __uprobe_trace_func(struct trace_uprobe *tu, struct trace_event_file *trace_file) { struct uprobe_trace_entry_head *entry; + struct trace_buffer *buffer; struct ring_buffer_event *event; - struct ring_buffer *buffer; void *data; int size, esize; struct trace_event_call *call = trace_probe_event_call(&tu->tp); @@ -1067,13 +1060,14 @@ static void __probe_event_disable(struct trace_probe *tp) struct trace_probe *pos; struct trace_uprobe *tu; + tu = container_of(tp, struct trace_uprobe, tp); + WARN_ON(!uprobe_filter_is_empty(tu->tp.event->filter)); + list_for_each_entry(pos, trace_probe_probe_list(tp), list) { tu = container_of(pos, struct trace_uprobe, tp); if (!tu->inode) continue; - WARN_ON(!uprobe_filter_is_empty(&tu->filter)); - uprobe_unregister(tu->inode, tu->offset, &tu->consumer); tu->inode = NULL; } @@ -1108,7 +1102,7 @@ static int probe_event_enable(struct trace_event_call *call, } tu = container_of(tp, struct trace_uprobe, tp); - WARN_ON(!uprobe_filter_is_empty(&tu->filter)); + WARN_ON(!uprobe_filter_is_empty(tu->tp.event->filter)); if (enabled) return 0; @@ -1205,39 +1199,39 @@ __uprobe_perf_filter(struct trace_uprobe_filter *filter, struct mm_struct *mm) } static inline bool -uprobe_filter_event(struct trace_uprobe *tu, struct perf_event *event) +trace_uprobe_filter_event(struct trace_uprobe_filter *filter, + struct perf_event *event) { - return __uprobe_perf_filter(&tu->filter, event->hw.target->mm); + return __uprobe_perf_filter(filter, event->hw.target->mm); } -static int uprobe_perf_close(struct trace_uprobe *tu, struct perf_event *event) +static bool trace_uprobe_filter_remove(struct trace_uprobe_filter *filter, + struct perf_event *event) { bool done; - write_lock(&tu->filter.rwlock); + write_lock(&filter->rwlock); if (event->hw.target) { list_del(&event->hw.tp_list); - done = tu->filter.nr_systemwide || + done = filter->nr_systemwide || (event->hw.target->flags & PF_EXITING) || - uprobe_filter_event(tu, event); + trace_uprobe_filter_event(filter, event); } else { - tu->filter.nr_systemwide--; - done = tu->filter.nr_systemwide; + filter->nr_systemwide--; + done = filter->nr_systemwide; } - write_unlock(&tu->filter.rwlock); + write_unlock(&filter->rwlock); - if (!done) - return uprobe_apply(tu->inode, tu->offset, &tu->consumer, false); - - return 0; + return done; } -static int uprobe_perf_open(struct trace_uprobe *tu, struct perf_event *event) +/* This returns true if the filter always covers target mm */ +static bool trace_uprobe_filter_add(struct trace_uprobe_filter *filter, + struct perf_event *event) { bool done; - int err; - write_lock(&tu->filter.rwlock); + write_lock(&filter->rwlock); if (event->hw.target) { /* * event->parent != NULL means copy_process(), we can avoid @@ -1247,28 +1241,21 @@ static int uprobe_perf_open(struct trace_uprobe *tu, struct perf_event *event) * attr.enable_on_exec means that exec/mmap will install the * breakpoints we need. */ - done = tu->filter.nr_systemwide || + done = filter->nr_systemwide || event->parent || event->attr.enable_on_exec || - uprobe_filter_event(tu, event); - list_add(&event->hw.tp_list, &tu->filter.perf_events); + trace_uprobe_filter_event(filter, event); + list_add(&event->hw.tp_list, &filter->perf_events); } else { - done = tu->filter.nr_systemwide; - tu->filter.nr_systemwide++; + done = filter->nr_systemwide; + filter->nr_systemwide++; } - write_unlock(&tu->filter.rwlock); + write_unlock(&filter->rwlock); - err = 0; - if (!done) { - err = uprobe_apply(tu->inode, tu->offset, &tu->consumer, true); - if (err) - uprobe_perf_close(tu, event); - } - return err; + return done; } -static int uprobe_perf_multi_call(struct trace_event_call *call, - struct perf_event *event, - int (*op)(struct trace_uprobe *tu, struct perf_event *event)) +static int uprobe_perf_close(struct trace_event_call *call, + struct perf_event *event) { struct trace_probe *pos, *tp; struct trace_uprobe *tu; @@ -1278,25 +1265,59 @@ static int uprobe_perf_multi_call(struct trace_event_call *call, if (WARN_ON_ONCE(!tp)) return -ENODEV; + tu = container_of(tp, struct trace_uprobe, tp); + if (trace_uprobe_filter_remove(tu->tp.event->filter, event)) + return 0; + list_for_each_entry(pos, trace_probe_probe_list(tp), list) { tu = container_of(pos, struct trace_uprobe, tp); - ret = op(tu, event); + ret = uprobe_apply(tu->inode, tu->offset, &tu->consumer, false); if (ret) break; } return ret; } + +static int uprobe_perf_open(struct trace_event_call *call, + struct perf_event *event) +{ + struct trace_probe *pos, *tp; + struct trace_uprobe *tu; + int err = 0; + + tp = trace_probe_primary_from_call(call); + if (WARN_ON_ONCE(!tp)) + return -ENODEV; + + tu = container_of(tp, struct trace_uprobe, tp); + if (trace_uprobe_filter_add(tu->tp.event->filter, event)) + return 0; + + list_for_each_entry(pos, trace_probe_probe_list(tp), list) { + err = uprobe_apply(tu->inode, tu->offset, &tu->consumer, true); + if (err) { + uprobe_perf_close(call, event); + break; + } + } + + return err; +} + static bool uprobe_perf_filter(struct uprobe_consumer *uc, enum uprobe_filter_ctx ctx, struct mm_struct *mm) { + struct trace_uprobe_filter *filter; struct trace_uprobe *tu; int ret; tu = container_of(uc, struct trace_uprobe, consumer); - read_lock(&tu->filter.rwlock); - ret = __uprobe_perf_filter(&tu->filter, mm); - read_unlock(&tu->filter.rwlock); + filter = tu->tp.event->filter; + + read_lock(&filter->rwlock); + ret = __uprobe_perf_filter(filter, mm); + read_unlock(&filter->rwlock); return ret; } @@ -1419,10 +1440,10 @@ trace_uprobe_register(struct trace_event_call *event, enum trace_reg type, return 0; case TRACE_REG_PERF_OPEN: - return uprobe_perf_multi_call(event, data, uprobe_perf_open); + return uprobe_perf_open(event, data); case TRACE_REG_PERF_CLOSE: - return uprobe_perf_multi_call(event, data, uprobe_perf_close); + return uprobe_perf_close(event, data); #endif default: @@ -1507,12 +1528,17 @@ static struct trace_event_functions uprobe_funcs = { .trace = print_uprobe_event }; +static struct trace_event_fields uprobe_fields_array[] = { + { .type = TRACE_FUNCTION_TYPE, + .define_fields = uprobe_event_define_fields }, + {} +}; + static inline void init_trace_event_call(struct trace_uprobe *tu) { struct trace_event_call *call = trace_probe_event_call(&tu->tp); - call->event.funcs = &uprobe_funcs; - call->class->define_fields = uprobe_event_define_fields; + call->class->fields_array = uprobe_fields_array; call->flags = TRACE_EVENT_FL_UPROBE | TRACE_EVENT_FL_CAP_ANY; call->class->reg = trace_uprobe_register; |