diff options
Diffstat (limited to 'tools/perf/builtin-trace.c')
-rw-r--r-- | tools/perf/builtin-trace.c | 637 |
1 files changed, 498 insertions, 139 deletions
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 4f0bbffee05f..0f633f0d6be8 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1,4 +1,3 @@ -// SPDX-License-Identifier: GPL-2.0-only /* * builtin-trace.c * @@ -15,6 +14,7 @@ * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'") */ +#include "util/record.h" #include <traceevent/event-parse.h> #include <api/fs/tracing_path.h> #include <bpf/bpf.h> @@ -25,9 +25,12 @@ #include "util/color.h" #include "util/config.h" #include "util/debug.h" +#include "util/dso.h" #include "util/env.h" #include "util/event.h" #include "util/evlist.h" +#include "util/evswitch.h" +#include <subcmd/pager.h> #include <subcmd/exec-cmd.h> #include "util/machine.h" #include "util/map.h" @@ -40,6 +43,8 @@ #include "util/intlist.h" #include "util/thread_map.h" #include "util/stat.h" +#include "util/tool.h" +#include "util/util.h" #include "trace/beauty/beauty.h" #include "trace-event.h" #include "util/parse-events.h" @@ -49,6 +54,7 @@ #include "string2.h" #include "syscalltbl.h" #include "rb_resort.h" +#include "../perf.h" #include <errno.h> #include <inttypes.h> @@ -80,28 +86,34 @@ struct trace { struct perf_tool tool; struct syscalltbl *sctbl; struct { - int max; struct syscall *table; struct bpf_map *map; + struct { // per syscall BPF_MAP_TYPE_PROG_ARRAY + struct bpf_map *sys_enter, + *sys_exit; + } prog_array; struct { - struct perf_evsel *sys_enter, + struct evsel *sys_enter, *sys_exit, *augmented; } events; + struct bpf_program *unaugmented_prog; } syscalls; struct { struct bpf_map *map; } dump; struct record_opts opts; - struct perf_evlist *evlist; + struct evlist *evlist; struct machine *host; struct thread *current; + struct bpf_object *bpf_obj; struct cgroup *cgroup; u64 base_time; FILE *output; unsigned long nr_events; unsigned long nr_events_printed; unsigned long max_events; + struct evswitch evswitch; struct strlist *ev_qualifier; struct { size_t nr; @@ -122,6 +134,7 @@ struct trace { unsigned int min_stack; int raw_augmented_syscalls_args_size; bool raw_augmented_syscalls; + bool fd_path_disabled; bool sort_events; bool not_ev_qualifier; bool live; @@ -237,7 +250,7 @@ struct syscall_tp { }; }; -static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel, +static int perf_evsel__init_tp_uint_field(struct evsel *evsel, struct tp_field *field, const char *name) { @@ -253,7 +266,7 @@ static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel, ({ struct syscall_tp *sc = evsel->priv;\ perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); }) -static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel, +static int perf_evsel__init_tp_ptr_field(struct evsel *evsel, struct tp_field *field, const char *name) { @@ -269,13 +282,13 @@ static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel, ({ struct syscall_tp *sc = evsel->priv;\ perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); }) -static void perf_evsel__delete_priv(struct perf_evsel *evsel) +static void evsel__delete_priv(struct evsel *evsel) { zfree(&evsel->priv); - perf_evsel__delete(evsel); + evsel__delete(evsel); } -static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel) +static int perf_evsel__init_syscall_tp(struct evsel *evsel) { struct syscall_tp *sc = evsel->priv = malloc(sizeof(struct syscall_tp)); @@ -292,7 +305,7 @@ out_delete: return -ENOENT; } -static int perf_evsel__init_augmented_syscall_tp(struct perf_evsel *evsel, struct perf_evsel *tp) +static int perf_evsel__init_augmented_syscall_tp(struct evsel *evsel, struct evsel *tp) { struct syscall_tp *sc = evsel->priv = malloc(sizeof(struct syscall_tp)); @@ -314,21 +327,21 @@ out_delete: return -EINVAL; } -static int perf_evsel__init_augmented_syscall_tp_args(struct perf_evsel *evsel) +static int perf_evsel__init_augmented_syscall_tp_args(struct evsel *evsel) { struct syscall_tp *sc = evsel->priv; return __tp_field__init_ptr(&sc->args, sc->id.offset + sizeof(u64)); } -static int perf_evsel__init_augmented_syscall_tp_ret(struct perf_evsel *evsel) +static int perf_evsel__init_augmented_syscall_tp_ret(struct evsel *evsel) { struct syscall_tp *sc = evsel->priv; return __tp_field__init_uint(&sc->ret, sizeof(u64), sc->id.offset + sizeof(u64), evsel->needs_swap); } -static int perf_evsel__init_raw_syscall_tp(struct perf_evsel *evsel, void *handler) +static int perf_evsel__init_raw_syscall_tp(struct evsel *evsel, void *handler) { evsel->priv = malloc(sizeof(struct syscall_tp)); if (evsel->priv != NULL) { @@ -346,9 +359,9 @@ out_delete: return -ENOENT; } -static struct perf_evsel *perf_evsel__raw_syscall_newtp(const char *direction, void *handler) +static struct evsel *perf_evsel__raw_syscall_newtp(const char *direction, void *handler) { - struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction); + struct evsel *evsel = perf_evsel__newtp("raw_syscalls", direction); /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */ if (IS_ERR(evsel)) @@ -363,7 +376,7 @@ static struct perf_evsel *perf_evsel__raw_syscall_newtp(const char *direction, v return evsel; out_delete: - perf_evsel__delete_priv(evsel); + evsel__delete_priv(evsel); return NULL; } @@ -688,6 +701,10 @@ struct syscall_arg_fmt { static struct syscall_fmt { const char *name; const char *alias; + struct { + const char *sys_enter, + *sys_exit; + } bpf_prog_name; struct syscall_arg_fmt arg[6]; u8 nr_args; bool errpid; @@ -700,7 +717,9 @@ static struct syscall_fmt { .arg = { [0] = { .scnprintf = SCA_X86_ARCH_PRCTL_CODE, /* code */ }, [1] = { .scnprintf = SCA_PTR, /* arg2 */ }, }, }, { .name = "bind", - .arg = { [1] = { .scnprintf = SCA_SOCKADDR, /* umyaddr */ }, }, }, + .arg = { [0] = { .scnprintf = SCA_INT, /* fd */ }, + [1] = { .scnprintf = SCA_SOCKADDR, /* umyaddr */ }, + [2] = { .scnprintf = SCA_INT, /* addrlen */ }, }, }, { .name = "bpf", .arg = { [0] = STRARRAY(cmd, bpf_cmd), }, }, { .name = "brk", .hexret = true, @@ -716,7 +735,9 @@ static struct syscall_fmt { { .name = "close", .arg = { [0] = { .scnprintf = SCA_CLOSE_FD, /* fd */ }, }, }, { .name = "connect", - .arg = { [1] = { .scnprintf = SCA_SOCKADDR, /* servaddr */ }, }, }, + .arg = { [0] = { .scnprintf = SCA_INT, /* fd */ }, + [1] = { .scnprintf = SCA_SOCKADDR, /* servaddr */ }, + [2] = { .scnprintf = SCA_INT, /* addrlen */ }, }, }, { .name = "epoll_ctl", .arg = { [1] = STRARRAY(op, epoll_ctl_ops), }, }, { .name = "eventfd2", @@ -882,6 +903,7 @@ static struct syscall_fmt { .arg = { [0] = { .scnprintf = SCA_SECCOMP_OP, /* op */ }, [1] = { .scnprintf = SCA_SECCOMP_FLAGS, /* flags */ }, }, }, { .name = "select", .timeout = true, }, + { .name = "sendfile", .alias = "sendfile64", }, { .name = "sendmmsg", .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, }, { .name = "sendmsg", @@ -960,13 +982,19 @@ static struct syscall_fmt *syscall_fmt__find_by_alias(const char *alias) * is_exit: is this "exit" or "exit_group"? * is_open: is this "open" or "openat"? To associate the fd returned in sys_exit with the pathname in sys_enter. * args_size: sum of the sizes of the syscall arguments, anything after that is augmented stuff: pathname for openat, etc. + * nonexistent: Just a hole in the syscall table, syscall id not allocated */ struct syscall { struct tep_event *tp_format; int nr_args; int args_size; + struct { + struct bpf_program *sys_enter, + *sys_exit; + } bpf_prog; bool is_exit; bool is_open; + bool nonexistent; struct tep_format_field *args; const char *name; struct syscall_fmt *fmt; @@ -1163,7 +1191,7 @@ static const char *thread__fd_path(struct thread *thread, int fd, { struct thread_trace *ttrace = thread__priv(thread); - if (ttrace == NULL) + if (ttrace == NULL || trace->fd_path_disabled) return NULL; if (fd < 0) @@ -1359,7 +1387,7 @@ static char *trace__machine__resolve_kernel_addr(void *vmachine, unsigned long l if (symbol_conf.kptr_restrict) { pr_warning("Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n" - "Check /proc/sys/kernel/kptr_restrict.\n\n" + "Check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n" "Kernel samples will not be resolved.\n"); machine->kptr_restrict_warned = true; return NULL; @@ -1368,7 +1396,7 @@ static char *trace__machine__resolve_kernel_addr(void *vmachine, unsigned long l return machine__resolve_kernel_addr(vmachine, addrp, modp); } -static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist) +static int trace__symbols_init(struct trace *trace, struct evlist *evlist) { int err = symbol__init(NULL); @@ -1384,7 +1412,7 @@ static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist) goto out; err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target, - evlist->threads, trace__tool_process, false, + evlist->core.threads, trace__tool_process, false, 1); out: if (err) @@ -1471,29 +1499,22 @@ static int trace__read_syscall_info(struct trace *trace, int id) struct syscall *sc; const char *name = syscalltbl__name(trace->sctbl, id); - if (name == NULL) - return -1; - - if (id > trace->syscalls.max) { - struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc)); - - if (nsyscalls == NULL) - return -1; + if (trace->syscalls.table == NULL) { + trace->syscalls.table = calloc(trace->sctbl->syscalls.max_id + 1, sizeof(*sc)); + if (trace->syscalls.table == NULL) + return -ENOMEM; + } - if (trace->syscalls.max != -1) { - memset(nsyscalls + trace->syscalls.max + 1, 0, - (id - trace->syscalls.max) * sizeof(*sc)); - } else { - memset(nsyscalls, 0, (id + 1) * sizeof(*sc)); - } + sc = trace->syscalls.table + id; + if (sc->nonexistent) + return 0; - trace->syscalls.table = nsyscalls; - trace->syscalls.max = id; + if (name == NULL) { + sc->nonexistent = true; + return 0; } - sc = trace->syscalls.table + id; sc->name = name; - sc->fmt = syscall_fmt__find(sc->name); snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name); @@ -1505,10 +1526,10 @@ static int trace__read_syscall_info(struct trace *trace, int id) } if (syscall__alloc_arg_fmts(sc, IS_ERR(sc->tp_format) ? 6 : sc->tp_format->format.nr_fields)) - return -1; + return -ENOMEM; if (IS_ERR(sc->tp_format)) - return -1; + return PTR_ERR(sc->tp_format); sc->args = sc->tp_format->format.fields; /* @@ -1527,6 +1548,13 @@ static int trace__read_syscall_info(struct trace *trace, int id) return syscall__set_arg_fmts(sc); } +static int intcmp(const void *a, const void *b) +{ + const int *one = a, *another = b; + + return *one - *another; +} + static int trace__validate_ev_qualifier(struct trace *trace) { int err = 0; @@ -1590,6 +1618,7 @@ matches: } trace->ev_qualifier_ids.nr = nr_used; + qsort(trace->ev_qualifier_ids.entries, nr_used, sizeof(int), intcmp); out: if (printed_invalid_prefix) pr_debug("\n"); @@ -1600,6 +1629,22 @@ out_free: goto out; } +static __maybe_unused bool trace__syscall_enabled(struct trace *trace, int id) +{ + bool in_ev_qualifier; + + if (trace->ev_qualifier_ids.nr == 0) + return true; + + in_ev_qualifier = bsearch(&id, trace->ev_qualifier_ids.entries, + trace->ev_qualifier_ids.nr, sizeof(int), intcmp) != NULL; + + if (in_ev_qualifier) + return !trace->not_ev_qualifier; + + return trace->not_ev_qualifier; +} + /* * args is to be interpreted as a series of longs but we need to handle * 8-byte unaligned accesses. args points to raw_data within the event @@ -1738,13 +1783,14 @@ next_arg: return printed; } -typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel, +typedef int (*tracepoint_handler)(struct trace *trace, struct evsel *evsel, union perf_event *event, struct perf_sample *sample); static struct syscall *trace__syscall_info(struct trace *trace, - struct perf_evsel *evsel, int id) + struct evsel *evsel, int id) { + int err = 0; if (id < 0) { @@ -1766,19 +1812,28 @@ static struct syscall *trace__syscall_info(struct trace *trace, return NULL; } - if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) && - trace__read_syscall_info(trace, id)) + err = -EINVAL; + + if (id > trace->sctbl->syscalls.max_id) + goto out_cant_read; + + if ((trace->syscalls.table == NULL || trace->syscalls.table[id].name == NULL) && + (err = trace__read_syscall_info(trace, id)) != 0) goto out_cant_read; - if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL)) + if (trace->syscalls.table[id].name == NULL) { + if (trace->syscalls.table[id].nonexistent) + return NULL; goto out_cant_read; + } return &trace->syscalls.table[id]; out_cant_read: if (verbose > 0) { - fprintf(trace->output, "Problems reading syscall %d", id); - if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL) + char sbuf[STRERR_BUFSIZE]; + fprintf(trace->output, "Problems reading syscall %d: %d (%s)", id, -err, str_error_r(-err, sbuf, sizeof(sbuf))); + if (id <= trace->sctbl->syscalls.max_id && trace->syscalls.table[id].name != NULL) fprintf(trace->output, "(%s)", trace->syscalls.table[id].name); fputs(" information\n", trace->output); } @@ -1839,7 +1894,7 @@ static int trace__printf_interrupted_entry(struct trace *trace) return printed; } -static int trace__fprintf_sample(struct trace *trace, struct perf_evsel *evsel, +static int trace__fprintf_sample(struct trace *trace, struct evsel *evsel, struct perf_sample *sample, struct thread *thread) { int printed = 0; @@ -1882,7 +1937,7 @@ static void *syscall__augmented_args(struct syscall *sc, struct perf_sample *sam return augmented_args; } -static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, +static int trace__sys_enter(struct trace *trace, struct evsel *evsel, union perf_event *event __maybe_unused, struct perf_sample *sample) { @@ -1961,7 +2016,7 @@ out_put: return err; } -static int trace__fprintf_sys_enter(struct trace *trace, struct perf_evsel *evsel, +static int trace__fprintf_sys_enter(struct trace *trace, struct evsel *evsel, struct perf_sample *sample) { struct thread_trace *ttrace; @@ -1994,13 +2049,13 @@ out_put: return err; } -static int trace__resolve_callchain(struct trace *trace, struct perf_evsel *evsel, +static int trace__resolve_callchain(struct trace *trace, struct evsel *evsel, struct perf_sample *sample, struct callchain_cursor *cursor) { struct addr_location al; - int max_stack = evsel->attr.sample_max_stack ? - evsel->attr.sample_max_stack : + int max_stack = evsel->core.attr.sample_max_stack ? + evsel->core.attr.sample_max_stack : trace->max_stack; int err; @@ -2022,7 +2077,7 @@ static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sam return sample__fprintf_callchain(sample, 38, print_opts, &callchain_cursor, trace->output); } -static const char *errno_to_name(struct perf_evsel *evsel, int err) +static const char *errno_to_name(struct evsel *evsel, int err) { struct perf_env *env = perf_evsel__env(evsel); const char *arch_name = perf_env__arch(env); @@ -2030,7 +2085,7 @@ static const char *errno_to_name(struct perf_evsel *evsel, int err) return arch_syscalls__strerrno(arch_name, err); } -static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, +static int trace__sys_exit(struct trace *trace, struct evsel *evsel, union perf_event *event __maybe_unused, struct perf_sample *sample) { @@ -2058,7 +2113,7 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, ret = perf_evsel__sc_tp_uint(evsel, ret, sample); - if (sc->is_open && ret >= 0 && ttrace->filename.pending_open) { + if (!trace->fd_path_disabled && sc->is_open && ret >= 0 && ttrace->filename.pending_open) { trace__set_fd_pathname(thread, ret, ttrace->filename.name); ttrace->filename.pending_open = false; ++trace->stats.vfs_getname; @@ -2164,7 +2219,7 @@ out_put: return err; } -static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel, +static int trace__vfs_getname(struct trace *trace, struct evsel *evsel, union perf_event *event __maybe_unused, struct perf_sample *sample) { @@ -2225,7 +2280,7 @@ out: return 0; } -static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel, +static int trace__sched_stat_runtime(struct trace *trace, struct evsel *evsel, union perf_event *event __maybe_unused, struct perf_sample *sample) { @@ -2287,7 +2342,7 @@ static void bpf_output__fprintf(struct trace *trace, ++trace->nr_events_printed; } -static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel, +static int trace__event_handler(struct trace *trace, struct evsel *evsel, union perf_event *event __maybe_unused, struct perf_sample *sample) { @@ -2353,8 +2408,8 @@ static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel, ++trace->nr_events_printed; if (evsel->max_events != ULONG_MAX && ++evsel->nr_events_printed == evsel->max_events) { - perf_evsel__disable(evsel); - perf_evsel__close(evsel); + evsel__disable(evsel); + evsel__close(evsel); } } } @@ -2389,7 +2444,7 @@ static void print_location(FILE *f, struct perf_sample *sample, } static int trace__pgfault(struct trace *trace, - struct perf_evsel *evsel, + struct evsel *evsel, union perf_event *event __maybe_unused, struct perf_sample *sample) { @@ -2415,7 +2470,7 @@ static int trace__pgfault(struct trace *trace, if (ttrace == NULL) goto out_put; - if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ) + if (evsel->core.attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ) ttrace->pfmaj++; else ttrace->pfmin++; @@ -2428,7 +2483,7 @@ static int trace__pgfault(struct trace *trace, trace__fprintf_entry_head(trace, thread, 0, true, sample->time, trace->output); fprintf(trace->output, "%sfault [", - evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ? + evsel->core.attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ? "maj" : "min"); print_location(trace->output, sample, &al, false, true); @@ -2464,7 +2519,7 @@ out_put: } static void trace__set_base_time(struct trace *trace, - struct perf_evsel *evsel, + struct evsel *evsel, struct perf_sample *sample) { /* @@ -2476,14 +2531,14 @@ static void trace__set_base_time(struct trace *trace, * appears in our event stream (vfs_getname comes to mind). */ if (trace->base_time == 0 && !trace->full_time && - (evsel->attr.sample_type & PERF_SAMPLE_TIME)) + (evsel->core.attr.sample_type & PERF_SAMPLE_TIME)) trace->base_time = sample->time; } static int trace__process_sample(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, - struct perf_evsel *evsel, + struct evsel *evsel, struct machine *machine __maybe_unused) { struct trace *trace = container_of(tool, struct trace, tool); @@ -2569,10 +2624,10 @@ static int trace__record(struct trace *trace, int argc, const char **argv) static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp); -static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist) +static bool evlist__add_vfs_getname(struct evlist *evlist) { bool found = false; - struct perf_evsel *evsel, *tmp; + struct evsel *evsel, *tmp; struct parse_events_error err = { .idx = 0, }; int ret = parse_events(evlist, "probe:vfs_getname*", &err); @@ -2589,17 +2644,17 @@ static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist) continue; } - list_del_init(&evsel->node); + list_del_init(&evsel->core.node); evsel->evlist = NULL; - perf_evsel__delete(evsel); + evsel__delete(evsel); } return found; } -static struct perf_evsel *perf_evsel__new_pgfault(u64 config) +static struct evsel *perf_evsel__new_pgfault(u64 config) { - struct perf_evsel *evsel; + struct evsel *evsel; struct perf_event_attr attr = { .type = PERF_TYPE_SOFTWARE, .mmap_data = 1, @@ -2610,7 +2665,7 @@ static struct perf_evsel *perf_evsel__new_pgfault(u64 config) event_attr_init(&attr); - evsel = perf_evsel__new(&attr); + evsel = evsel__new(&attr); if (evsel) evsel->handler = trace__pgfault; @@ -2620,7 +2675,7 @@ static struct perf_evsel *perf_evsel__new_pgfault(u64 config) static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample) { const u32 type = event->header.type; - struct perf_evsel *evsel; + struct evsel *evsel; if (type != PERF_RECORD_SAMPLE) { trace__process_event(trace, trace->host, event, sample); @@ -2633,9 +2688,12 @@ static void trace__handle_event(struct trace *trace, union perf_event *event, st return; } + if (evswitch__discard(&trace->evswitch, evsel)) + return; + trace__set_base_time(trace, evsel, sample); - if (evsel->attr.type == PERF_TYPE_TRACEPOINT && + if (evsel->core.attr.type == PERF_TYPE_TRACEPOINT && sample->raw_data == NULL) { fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n", perf_evsel__name(evsel), sample->tid, @@ -2652,8 +2710,8 @@ static void trace__handle_event(struct trace *trace, union perf_event *event, st static int trace__add_syscall_newtp(struct trace *trace) { int ret = -1; - struct perf_evlist *evlist = trace->evlist; - struct perf_evsel *sys_enter, *sys_exit; + struct evlist *evlist = trace->evlist; + struct evsel *sys_enter, *sys_exit; sys_enter = perf_evsel__raw_syscall_newtp("sys_enter", trace__sys_enter); if (sys_enter == NULL) @@ -2672,8 +2730,8 @@ static int trace__add_syscall_newtp(struct trace *trace) perf_evsel__config_callchain(sys_enter, &trace->opts, &callchain_param); perf_evsel__config_callchain(sys_exit, &trace->opts, &callchain_param); - perf_evlist__add(evlist, sys_enter); - perf_evlist__add(evlist, sys_exit); + evlist__add(evlist, sys_enter); + evlist__add(evlist, sys_exit); if (callchain_param.enabled && !trace->kernel_syscallchains) { /* @@ -2681,7 +2739,7 @@ static int trace__add_syscall_newtp(struct trace *trace) * leading to the syscall, allow overriding that for * debugging reasons using --kernel_syscall_callchains */ - sys_exit->attr.exclude_callchain_kernel = 1; + sys_exit->core.attr.exclude_callchain_kernel = 1; } trace->syscalls.events.sys_enter = sys_enter; @@ -2692,16 +2750,16 @@ out: return ret; out_delete_sys_exit: - perf_evsel__delete_priv(sys_exit); + evsel__delete_priv(sys_exit); out_delete_sys_enter: - perf_evsel__delete_priv(sys_enter); + evsel__delete_priv(sys_enter); goto out; } static int trace__set_ev_qualifier_tp_filter(struct trace *trace) { int err = -1; - struct perf_evsel *sys_exit; + struct evsel *sys_exit; char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier, trace->ev_qualifier_ids.nr, trace->ev_qualifier_ids.entries); @@ -2724,6 +2782,70 @@ out_enomem: } #ifdef HAVE_LIBBPF_SUPPORT +static struct bpf_program *trace__find_bpf_program_by_title(struct trace *trace, const char *name) +{ + if (trace->bpf_obj == NULL) + return NULL; + + return bpf_object__find_program_by_title(trace->bpf_obj, name); +} + +static struct bpf_program *trace__find_syscall_bpf_prog(struct trace *trace, struct syscall *sc, + const char *prog_name, const char *type) +{ + struct bpf_program *prog; + + if (prog_name == NULL) { + char default_prog_name[256]; + scnprintf(default_prog_name, sizeof(default_prog_name), "!syscalls:sys_%s_%s", type, sc->name); + prog = trace__find_bpf_program_by_title(trace, default_prog_name); + if (prog != NULL) + goto out_found; + if (sc->fmt && sc->fmt->alias) { + scnprintf(default_prog_name, sizeof(default_prog_name), "!syscalls:sys_%s_%s", type, sc->fmt->alias); + prog = trace__find_bpf_program_by_title(trace, default_prog_name); + if (prog != NULL) + goto out_found; + } + goto out_unaugmented; + } + + prog = trace__find_bpf_program_by_title(trace, prog_name); + + if (prog != NULL) { +out_found: + return prog; + } + + pr_debug("Couldn't find BPF prog \"%s\" to associate with syscalls:sys_%s_%s, not augmenting it\n", + prog_name, type, sc->name); +out_unaugmented: + return trace->syscalls.unaugmented_prog; +} + +static void trace__init_syscall_bpf_progs(struct trace *trace, int id) +{ + struct syscall *sc = trace__syscall_info(trace, NULL, id); + + if (sc == NULL) + return; + + sc->bpf_prog.sys_enter = trace__find_syscall_bpf_prog(trace, sc, sc->fmt ? sc->fmt->bpf_prog_name.sys_enter : NULL, "enter"); + sc->bpf_prog.sys_exit = trace__find_syscall_bpf_prog(trace, sc, sc->fmt ? sc->fmt->bpf_prog_name.sys_exit : NULL, "exit"); +} + +static int trace__bpf_prog_sys_enter_fd(struct trace *trace, int id) +{ + struct syscall *sc = trace__syscall_info(trace, NULL, id); + return sc ? bpf_program__fd(sc->bpf_prog.sys_enter) : bpf_program__fd(trace->syscalls.unaugmented_prog); +} + +static int trace__bpf_prog_sys_exit_fd(struct trace *trace, int id) +{ + struct syscall *sc = trace__syscall_info(trace, NULL, id); + return sc ? bpf_program__fd(sc->bpf_prog.sys_exit) : bpf_program__fd(trace->syscalls.unaugmented_prog); +} + static void trace__init_bpf_map_syscall_args(struct trace *trace, int id, struct bpf_map_syscall_entry *entry) { struct syscall *sc = trace__syscall_info(trace, NULL, id); @@ -2755,8 +2877,10 @@ static int trace__set_ev_qualifier_bpf_filter(struct trace *trace) for (i = 0; i < trace->ev_qualifier_ids.nr; ++i) { int key = trace->ev_qualifier_ids.entries[i]; - if (value.enabled) + if (value.enabled) { trace__init_bpf_map_syscall_args(trace, key, &value); + trace__init_syscall_bpf_progs(trace, key); + } err = bpf_map_update_elem(fd, &key, &value, BPF_EXIST); if (err) @@ -2795,6 +2919,186 @@ static int trace__init_syscalls_bpf_map(struct trace *trace) return __trace__init_syscalls_bpf_map(trace, enabled); } + +static struct bpf_program *trace__find_usable_bpf_prog_entry(struct trace *trace, struct syscall *sc) +{ + struct tep_format_field *field, *candidate_field; + int id; + + /* + * We're only interested in syscalls that have a pointer: + */ + for (field = sc->args; field; field = field->next) { + if (field->flags & TEP_FIELD_IS_POINTER) + goto try_to_find_pair; + } + + return NULL; + +try_to_find_pair: + for (id = 0; id < trace->sctbl->syscalls.nr_entries; ++id) { + struct syscall *pair = trace__syscall_info(trace, NULL, id); + struct bpf_program *pair_prog; + bool is_candidate = false; + + if (pair == NULL || pair == sc || + pair->bpf_prog.sys_enter == trace->syscalls.unaugmented_prog) + continue; + + for (field = sc->args, candidate_field = pair->args; + field && candidate_field; field = field->next, candidate_field = candidate_field->next) { + bool is_pointer = field->flags & TEP_FIELD_IS_POINTER, + candidate_is_pointer = candidate_field->flags & TEP_FIELD_IS_POINTER; + + if (is_pointer) { + if (!candidate_is_pointer) { + // The candidate just doesn't copies our pointer arg, might copy other pointers we want. + continue; + } + } else { + if (candidate_is_pointer) { + // The candidate might copy a pointer we don't have, skip it. + goto next_candidate; + } + continue; + } + + if (strcmp(field->type, candidate_field->type)) + goto next_candidate; + + is_candidate = true; + } + + if (!is_candidate) + goto next_candidate; + + /* + * Check if the tentative pair syscall augmenter has more pointers, if it has, + * then it may be collecting that and we then can't use it, as it would collect + * more than what is common to the two syscalls. + */ + if (candidate_field) { + for (candidate_field = candidate_field->next; candidate_field; candidate_field = candidate_field->next) + if (candidate_field->flags & TEP_FIELD_IS_POINTER) + goto next_candidate; + } + + pair_prog = pair->bpf_prog.sys_enter; + /* + * If the pair isn't enabled, then its bpf_prog.sys_enter will not + * have been searched for, so search it here and if it returns the + * unaugmented one, then ignore it, otherwise we'll reuse that BPF + * program for a filtered syscall on a non-filtered one. + * + * For instance, we have "!syscalls:sys_enter_renameat" and that is + * useful for "renameat2". + */ + if (pair_prog == NULL) { + pair_prog = trace__find_syscall_bpf_prog(trace, pair, pair->fmt ? pair->fmt->bpf_prog_name.sys_enter : NULL, "enter"); + if (pair_prog == trace->syscalls.unaugmented_prog) + goto next_candidate; + } + + pr_debug("Reusing \"%s\" BPF sys_enter augmenter for \"%s\"\n", pair->name, sc->name); + return pair_prog; + next_candidate: + continue; + } + + return NULL; +} + +static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace) +{ + int map_enter_fd = bpf_map__fd(trace->syscalls.prog_array.sys_enter), + map_exit_fd = bpf_map__fd(trace->syscalls.prog_array.sys_exit); + int err = 0, key; + + for (key = 0; key < trace->sctbl->syscalls.nr_entries; ++key) { + int prog_fd; + + if (!trace__syscall_enabled(trace, key)) + continue; + + trace__init_syscall_bpf_progs(trace, key); + + // It'll get at least the "!raw_syscalls:unaugmented" + prog_fd = trace__bpf_prog_sys_enter_fd(trace, key); + err = bpf_map_update_elem(map_enter_fd, &key, &prog_fd, BPF_ANY); + if (err) + break; + prog_fd = trace__bpf_prog_sys_exit_fd(trace, key); + err = bpf_map_update_elem(map_exit_fd, &key, &prog_fd, BPF_ANY); + if (err) + break; + } + + /* + * Now lets do a second pass looking for enabled syscalls without + * an augmenter that have a signature that is a superset of another + * syscall with an augmenter so that we can auto-reuse it. + * + * I.e. if we have an augmenter for the "open" syscall that has + * this signature: + * + * int open(const char *pathname, int flags, mode_t mode); + * + * I.e. that will collect just the first string argument, then we + * can reuse it for the 'creat' syscall, that has this signature: + * + * int creat(const char *pathname, mode_t mode); + * + * and for: + * + * int stat(const char *pathname, struct stat *statbuf); + * int lstat(const char *pathname, struct stat *statbuf); + * + * Because the 'open' augmenter will collect the first arg as a string, + * and leave alone all the other args, which already helps with + * beautifying 'stat' and 'lstat''s pathname arg. + * + * Then, in time, when 'stat' gets an augmenter that collects both + * first and second arg (this one on the raw_syscalls:sys_exit prog + * array tail call, then that one will be used. + */ + for (key = 0; key < trace->sctbl->syscalls.nr_entries; ++key) { + struct syscall *sc = trace__syscall_info(trace, NULL, key); + struct bpf_program *pair_prog; + int prog_fd; + + if (sc == NULL || sc->bpf_prog.sys_enter == NULL) + continue; + + /* + * For now we're just reusing the sys_enter prog, and if it + * already has an augmenter, we don't need to find one. + */ + if (sc->bpf_prog.sys_enter != trace->syscalls.unaugmented_prog) + continue; + + /* + * Look at all the other syscalls for one that has a signature + * that is close enough that we can share: + */ + pair_prog = trace__find_usable_bpf_prog_entry(trace, sc); + if (pair_prog == NULL) + continue; + + sc->bpf_prog.sys_enter = pair_prog; + + /* + * Update the BPF_MAP_TYPE_PROG_SHARED for raw_syscalls:sys_enter + * with the fd for the program we're reusing: + */ + prog_fd = bpf_program__fd(sc->bpf_prog.sys_enter); + err = bpf_map_update_elem(map_enter_fd, &key, &prog_fd, BPF_ANY); + if (err) + break; + } + + + return err; +} #else static int trace__set_ev_qualifier_bpf_filter(struct trace *trace __maybe_unused) { @@ -2805,6 +3109,17 @@ static int trace__init_syscalls_bpf_map(struct trace *trace __maybe_unused) { return 0; } + +static struct bpf_program *trace__find_bpf_program_by_title(struct trace *trace __maybe_unused, + const char *name __maybe_unused) +{ + return NULL; +} + +static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace __maybe_unused) +{ + return 0; +} #endif // HAVE_LIBBPF_SUPPORT static int trace__set_ev_qualifier_filter(struct trace *trace) @@ -2879,7 +3194,7 @@ static int trace__set_filter_pids(struct trace *trace) err = bpf_map__set_filter_pids(trace->filter_pids.map, trace->filter_pids.nr, trace->filter_pids.entries); } - } else if (thread_map__pid(trace->evlist->threads, 0) == -1) { + } else if (perf_thread_map__pid(trace->evlist->core.threads, 0) == -1) { err = trace__set_filter_loop_pids(trace); } @@ -2888,7 +3203,7 @@ static int trace__set_filter_pids(struct trace *trace) static int __trace__deliver_event(struct trace *trace, union perf_event *event) { - struct perf_evlist *evlist = trace->evlist; + struct evlist *evlist = trace->evlist; struct perf_sample sample; int err; @@ -2946,8 +3261,8 @@ static int ordered_events__deliver_event(struct ordered_events *oe, static int trace__run(struct trace *trace, int argc, const char **argv) { - struct perf_evlist *evlist = trace->evlist; - struct perf_evsel *evsel, *pgfault_maj = NULL, *pgfault_min = NULL; + struct evlist *evlist = trace->evlist; + struct evsel *evsel, *pgfault_maj = NULL, *pgfault_min = NULL; int err = -1, i; unsigned long before; const bool forks = argc > 0; @@ -2960,7 +3275,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv) goto out_error_raw_syscalls; if (trace->trace_syscalls) - trace->vfs_getname = perf_evlist__add_vfs_getname(evlist); + trace->vfs_getname = evlist__add_vfs_getname(evlist); } if ((trace->trace_pgfaults & TRACE_PFMAJ)) { @@ -2968,7 +3283,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv) if (pgfault_maj == NULL) goto out_error_mem; perf_evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param); - perf_evlist__add(evlist, pgfault_maj); + evlist__add(evlist, pgfault_maj); } if ((trace->trace_pgfaults & TRACE_PFMIN)) { @@ -2976,7 +3291,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv) if (pgfault_min == NULL) goto out_error_mem; perf_evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param); - perf_evlist__add(evlist, pgfault_min); + evlist__add(evlist, pgfault_min); } if (trace->sched && @@ -3038,7 +3353,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv) } } - err = perf_evlist__open(evlist); + err = evlist__open(evlist); if (err < 0) goto out_error_open; @@ -3059,6 +3374,9 @@ static int trace__run(struct trace *trace, int argc, const char **argv) if (trace->syscalls.map) trace__init_syscalls_bpf_map(trace); + if (trace->syscalls.prog_array.sys_enter) + trace__init_syscalls_bpf_prog_array_maps(trace); + if (trace->ev_qualifier_ids.nr > 0) { err = trace__set_ev_qualifier_filter(trace); if (err < 0) @@ -3070,6 +3388,19 @@ static int trace__run(struct trace *trace, int argc, const char **argv) } } + /* + * If the "close" syscall is not traced, then we will not have the + * opportunity to, in syscall_arg__scnprintf_close_fd() invalidate the + * fd->pathname table and were ending up showing the last value set by + * syscalls opening a pathname and associating it with a descriptor or + * reading it from /proc/pid/fd/ in cases where that doesn't make + * sense. + * + * So just disable this beautifier (SCA_FD, SCA_FDAT) when 'close' is + * not in use. + */ + trace->fd_path_disabled = !trace__syscall_enabled(trace, syscalltbl__id(trace->sctbl, "close")); + err = perf_evlist__apply_filters(evlist, &evsel); if (err < 0) goto out_error_apply_filters; @@ -3082,30 +3413,30 @@ static int trace__run(struct trace *trace, int argc, const char **argv) goto out_error_mmap; if (!target__none(&trace->opts.target) && !trace->opts.initial_delay) - perf_evlist__enable(evlist); + evlist__enable(evlist); if (forks) perf_evlist__start_workload(evlist); if (trace->opts.initial_delay) { usleep(trace->opts.initial_delay * 1000); - perf_evlist__enable(evlist); + evlist__enable(evlist); } - trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 || - evlist->threads->nr > 1 || - perf_evlist__first(evlist)->attr.inherit; + trace->multiple_threads = perf_thread_map__pid(evlist->core.threads, 0) == -1 || + evlist->core.threads->nr > 1 || + perf_evlist__first(evlist)->core.attr.inherit; /* - * Now that we already used evsel->attr to ask the kernel to setup the - * events, lets reuse evsel->attr.sample_max_stack as the limit in + * Now that we already used evsel->core.attr to ask the kernel to setup the + * events, lets reuse evsel->core.attr.sample_max_stack as the limit in * trace__resolve_callchain(), allowing per-event max-stack settings * to override an explicitly set --max-stack global setting. */ evlist__for_each_entry(evlist, evsel) { if (evsel__has_callchain(evsel) && - evsel->attr.sample_max_stack == 0) - evsel->attr.sample_max_stack = trace->max_stack; + evsel->core.attr.sample_max_stack == 0) + evsel->core.attr.sample_max_stack = trace->max_stack; } again: before = trace->nr_events; @@ -3131,7 +3462,7 @@ again: goto out_disable; if (done && !draining) { - perf_evlist__disable(evlist); + evlist__disable(evlist); draining = true; } } @@ -3157,7 +3488,7 @@ again: out_disable: thread__zput(trace->current); - perf_evlist__disable(evlist); + evlist__disable(evlist); if (trace->sort_events) ordered_events__flush(&trace->oe.data, OE_FLUSH__FINAL); @@ -3178,7 +3509,7 @@ out_disable: out_delete_evlist: trace__symbols__exit(trace); - perf_evlist__delete(evlist); + evlist__delete(evlist); cgroup__put(trace->cgroup); trace->evlist = NULL; trace->live = false; @@ -3223,7 +3554,7 @@ out_errno: static int trace__replay(struct trace *trace) { - const struct perf_evsel_str_handler handlers[] = { + const struct evsel_str_handler handlers[] = { { "probe:vfs_getname", trace__vfs_getname, }, }; struct perf_data data = { @@ -3232,7 +3563,7 @@ static int trace__replay(struct trace *trace) .force = trace->force, }; struct perf_session *session; - struct perf_evsel *evsel; + struct evsel *evsel; int err = -1; trace->tool.sample = trace__process_sample; @@ -3298,10 +3629,10 @@ static int trace__replay(struct trace *trace) } evlist__for_each_entry(session->evlist, evsel) { - if (evsel->attr.type == PERF_TYPE_SOFTWARE && - (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ || - evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN || - evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS)) + if (evsel->core.attr.type == PERF_TYPE_SOFTWARE && + (evsel->core.attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ || + evsel->core.attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN || + evsel->core.attr.config == PERF_COUNT_SW_PAGE_FAULTS)) evsel->handler = trace__pgfault; } @@ -3523,17 +3854,17 @@ static int parse_pagefaults(const struct option *opt, const char *str, return 0; } -static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler) +static void evlist__set_evsel_handler(struct evlist *evlist, void *handler) { - struct perf_evsel *evsel; + struct evsel *evsel; evlist__for_each_entry(evlist, evsel) evsel->handler = handler; } -static int evlist__set_syscall_tp_fields(struct perf_evlist *evlist) +static int evlist__set_syscall_tp_fields(struct evlist *evlist) { - struct perf_evsel *evsel; + struct evsel *evsel; evlist__for_each_entry(evlist, evsel) { if (evsel->priv || !evsel->tp_format) @@ -3660,7 +3991,7 @@ static int trace__parse_cgroups(const struct option *opt, const char *str, int u { struct trace *trace = opt->value; - if (!list_empty(&trace->evlist->entries)) + if (!list_empty(&trace->evlist->core.entries)) return parse_cgroups(opt, str, unset); trace->cgroup = evlist__findnew_cgroup(trace->evlist, str); @@ -3668,28 +3999,24 @@ static int trace__parse_cgroups(const struct option *opt, const char *str, int u return 0; } -static struct bpf_map *bpf__find_map_by_name(const char *name) +static struct bpf_map *trace__find_bpf_map_by_name(struct trace *trace, const char *name) { - struct bpf_object *obj, *tmp; - - bpf_object__for_each_safe(obj, tmp) { - struct bpf_map *map = bpf_object__find_map_by_name(obj, name); - if (map) - return map; - - } + if (trace->bpf_obj == NULL) + return NULL; - return NULL; + return bpf_object__find_map_by_name(trace->bpf_obj, name); } static void trace__set_bpf_map_filtered_pids(struct trace *trace) { - trace->filter_pids.map = bpf__find_map_by_name("pids_filtered"); + trace->filter_pids.map = trace__find_bpf_map_by_name(trace, "pids_filtered"); } static void trace__set_bpf_map_syscalls(struct trace *trace) { - trace->syscalls.map = bpf__find_map_by_name("syscalls"); + trace->syscalls.map = trace__find_bpf_map_by_name(trace, "syscalls"); + trace->syscalls.prog_array.sys_enter = trace__find_bpf_map_by_name(trace, "syscalls_sys_enter"); + trace->syscalls.prog_array.sys_exit = trace__find_bpf_map_by_name(trace, "syscalls_sys_exit"); } static int trace__config(const char *var, const char *value, void *arg) @@ -3745,9 +4072,6 @@ int cmd_trace(int argc, const char **argv) NULL }; struct trace trace = { - .syscalls = { - . max = -1, - }, .opts = { .target = { .uid = UINT_MAX, @@ -3844,11 +4168,12 @@ int cmd_trace(int argc, const char **argv) OPT_UINTEGER('D', "delay", &trace.opts.initial_delay, "ms to wait before starting measurement after program " "start"), + OPTS_EVSWITCH(&trace.evswitch), OPT_END() }; bool __maybe_unused max_stack_user_set = true; bool mmap_pages_user_set = true; - struct perf_evsel *evsel; + struct evsel *evsel; const char * const trace_subcommands[] = { "record", NULL }; int err = -1; char bf[BUFSIZ]; @@ -3856,7 +4181,7 @@ int cmd_trace(int argc, const char **argv) signal(SIGSEGV, sighandler_dump_stack); signal(SIGFPE, sighandler_dump_stack); - trace.evlist = perf_evlist__new(); + trace.evlist = evlist__new(); trace.sctbl = syscalltbl__new(); if (trace.evlist == NULL || trace.sctbl == NULL) { @@ -3895,8 +4220,23 @@ int cmd_trace(int argc, const char **argv) if (evsel) { trace.syscalls.events.augmented = evsel; + + evsel = perf_evlist__find_tracepoint_by_name(trace.evlist, "raw_syscalls:sys_enter"); + if (evsel == NULL) { + pr_err("ERROR: raw_syscalls:sys_enter not found in the augmented BPF object\n"); + goto out; + } + + if (evsel->bpf_obj == NULL) { + pr_err("ERROR: raw_syscalls:sys_enter not associated to a BPF object\n"); + goto out; + } + + trace.bpf_obj = evsel->bpf_obj; + trace__set_bpf_map_filtered_pids(&trace); trace__set_bpf_map_syscalls(&trace); + trace.syscalls.unaugmented_prog = trace__find_bpf_program_by_title(&trace, "!raw_syscalls:unaugmented"); } err = bpf__setup_stdout(trace.evlist); @@ -3909,7 +4249,7 @@ int cmd_trace(int argc, const char **argv) err = -1; if (map_dump_str) { - trace.dump.map = bpf__find_map_by_name(map_dump_str); + trace.dump.map = trace__find_bpf_map_by_name(&trace, map_dump_str); if (trace.dump.map == NULL) { pr_err("ERROR: BPF map \"%s\" not found\n", map_dump_str); goto out; @@ -3942,7 +4282,7 @@ int cmd_trace(int argc, const char **argv) symbol_conf.use_callchain = true; } - if (trace.evlist->nr_entries > 0) { + if (trace.evlist->core.nr_entries > 0) { evlist__set_evsel_handler(trace.evlist, trace__event_handler); if (evlist__set_syscall_tp_fields(trace.evlist)) { perror("failed to set syscalls:* tracepoint fields"); @@ -3977,11 +4317,26 @@ int cmd_trace(int argc, const char **argv) if (trace.syscalls.events.augmented->priv == NULL && strstr(perf_evsel__name(evsel), "syscalls:sys_enter")) { - struct perf_evsel *augmented = trace.syscalls.events.augmented; + struct evsel *augmented = trace.syscalls.events.augmented; if (perf_evsel__init_augmented_syscall_tp(augmented, evsel) || perf_evsel__init_augmented_syscall_tp_args(augmented)) goto out; + /* + * Augmented is __augmented_syscalls__ BPF_OUTPUT event + * Above we made sure we can get from the payload the tp fields + * that we get from syscalls:sys_enter tracefs format file. + */ augmented->handler = trace__sys_enter; + /* + * Now we do the same for the *syscalls:sys_enter event so that + * if we handle it directly, i.e. if the BPF prog returns 0 so + * as not to filter it, then we'll handle it just like we would + * for the BPF_OUTPUT one: + */ + if (perf_evsel__init_augmented_syscall_tp(evsel, evsel) || + perf_evsel__init_augmented_syscall_tp_args(evsel)) + goto out; + evsel->handler = trace__sys_enter; } if (strstarts(perf_evsel__name(evsel), "syscalls:sys_exit_")) { @@ -4025,7 +4380,7 @@ init_augmented_syscall_tp: trace.summary = trace.summary_only; if (!trace.trace_syscalls && !trace.trace_pgfaults && - trace.evlist->nr_entries == 0 /* Was --events used? */) { + trace.evlist->core.nr_entries == 0 /* Was --events used? */) { trace.trace_syscalls = true; } @@ -4037,6 +4392,10 @@ init_augmented_syscall_tp: } } + err = evswitch__init(&trace.evswitch, trace.evlist, stderr); + if (err) + goto out_close; + err = target__validate(&trace.opts.target); if (err) { target__strerror(&trace.opts.target, err, bf, sizeof(bf)); |