diff options
Diffstat (limited to 'tools/perf/builtin-record.c')
-rw-r--r-- | tools/perf/builtin-record.c | 186 |
1 files changed, 163 insertions, 23 deletions
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 1ab349abe904..e108d90ae2ed 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -34,6 +34,7 @@ #include "util/tsc.h" #include "util/parse-branch-options.h" #include "util/parse-regs-options.h" +#include "util/perf_api_probe.h" #include "util/llvm-utils.h" #include "util/bpf-loader.h" #include "util/trigger.h" @@ -43,6 +44,8 @@ #include "util/time-utils.h" #include "util/units.h" #include "util/bpf-event.h" +#include "util/util.h" +#include "util/pfm.h" #include "asm/bug.h" #include "perf.h" @@ -50,9 +53,13 @@ #include <inttypes.h> #include <locale.h> #include <poll.h> +#include <pthread.h> #include <unistd.h> #include <sched.h> #include <signal.h> +#ifdef HAVE_EVENTFD_SUPPORT +#include <sys/eventfd.h> +#endif #include <sys/mman.h> #include <sys/wait.h> #include <sys/types.h> @@ -84,7 +91,10 @@ struct record { struct auxtrace_record *itr; struct evlist *evlist; struct perf_session *session; + struct evlist *sb_evlist; + pthread_t thread_id; int realtime_prio; + bool switch_output_event_set; bool no_buildid; bool no_buildid_set; bool no_buildid_cache; @@ -503,6 +513,20 @@ static int process_synthesized_event(struct perf_tool *tool, return record__write(rec, NULL, event, event->header.size); } +static int process_locked_synthesized_event(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample __maybe_unused, + struct machine *machine __maybe_unused) +{ + static pthread_mutex_t synth_lock = PTHREAD_MUTEX_INITIALIZER; + int ret; + + pthread_mutex_lock(&synth_lock); + ret = process_synthesized_event(tool, event, sample, machine); + pthread_mutex_unlock(&synth_lock); + return ret; +} + static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size) { struct record *rec = to; @@ -518,6 +542,9 @@ static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size) static volatile int signr = -1; static volatile int child_finished; +#ifdef HAVE_EVENTFD_SUPPORT +static int done_fd = -1; +#endif static void sig_handler(int sig) { @@ -527,6 +554,21 @@ static void sig_handler(int sig) signr = sig; done = 1; +#ifdef HAVE_EVENTFD_SUPPORT +{ + u64 tmp = 1; + /* + * It is possible for this signal handler to run after done is checked + * in the main loop, but before the perf counter fds are polled. If this + * happens, the poll() will continue to wait even though done is set, + * and will only break out if either another signal is received, or the + * counters are ready for read. To ensure the poll() doesn't sleep when + * done is set, use an eventfd (done_fd) to wake up the poll(). + */ + if (write(done_fd, &tmp, sizeof(tmp)) < 0) + pr_err("failed to signal wakeup fd, error: %m\n"); +} +#endif // HAVE_EVENTFD_SUPPORT } static void sigsegv_handler(int sig) @@ -805,19 +847,28 @@ static int record__open(struct record *rec) int rc = 0; /* - * For initial_delay we need to add a dummy event so that we can track - * PERF_RECORD_MMAP while we wait for the initial delay to enable the - * real events, the ones asked by the user. + * For initial_delay or system wide, we need to add a dummy event so + * that we can track PERF_RECORD_MMAP to cover the delay of waiting or + * event synthesis. */ - if (opts->initial_delay) { + if (opts->initial_delay || target__has_cpu(&opts->target)) { if (perf_evlist__add_dummy(evlist)) return -ENOMEM; + /* Disable tracking of mmaps on lead event. */ pos = evlist__first(evlist); pos->tracking = 0; + /* Set up dummy event. */ pos = evlist__last(evlist); pos->tracking = 1; - pos->core.attr.enable_on_exec = 1; + /* + * Enable the dummy event when the process is forked for + * initial_delay, immediately for system wide. + */ + if (opts->initial_delay) + pos->core.attr.enable_on_exec = 1; + else + pos->immediate = 1; } perf_evlist__config(evlist, opts, &callchain_param); @@ -825,7 +876,7 @@ static int record__open(struct record *rec) evlist__for_each_entry(evlist, pos) { try_again: if (evsel__open(pos, pos->core.cpus, pos->core.threads) < 0) { - if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) { + if (evsel__fallback(pos, errno, msg, sizeof(msg))) { if (verbose > 0) ui__warning("%s\n", msg); goto try_again; @@ -837,8 +888,7 @@ try_again: goto try_again; } rc = -errno; - perf_evsel__open_strerror(pos, &opts->target, - errno, msg, sizeof(msg)); + evsel__open_strerror(pos, &opts->target, errno, msg, sizeof(msg)); ui__error("%s\n", msg); goto out; } @@ -859,7 +909,7 @@ try_again: if (perf_evlist__apply_filters(evlist, &pos)) { pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n", - pos->filter, perf_evsel__name(pos), errno, + pos->filter, evsel__name(pos), errno, str_error_r(errno, msg, sizeof(msg))); rc = -1; goto out; @@ -1288,6 +1338,7 @@ static int record__synthesize(struct record *rec, bool tail) struct perf_tool *tool = &rec->tool; int fd = perf_data__fd(data); int err = 0; + event_op f = process_synthesized_event; if (rec->opts.tail_synthesize != tail) return 0; @@ -1402,13 +1453,67 @@ static int record__synthesize(struct record *rec, bool tail) if (err < 0) pr_warning("Couldn't synthesize cgroup events.\n"); + if (rec->opts.nr_threads_synthesize > 1) { + perf_set_multithreaded(); + f = process_locked_synthesized_event; + } + err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->core.threads, - process_synthesized_event, opts->sample_address, - 1); + f, opts->sample_address, + rec->opts.nr_threads_synthesize); + + if (rec->opts.nr_threads_synthesize > 1) + perf_set_singlethreaded(); + out: return err; } +static int record__process_signal_event(union perf_event *event __maybe_unused, void *data) +{ + struct record *rec = data; + pthread_kill(rec->thread_id, SIGUSR2); + return 0; +} + +static int record__setup_sb_evlist(struct record *rec) +{ + struct record_opts *opts = &rec->opts; + + if (rec->sb_evlist != NULL) { + /* + * We get here if --switch-output-event populated the + * sb_evlist, so associate a callback that will send a SIGUSR2 + * to the main thread. + */ + evlist__set_cb(rec->sb_evlist, record__process_signal_event, rec); + rec->thread_id = pthread_self(); + } + + if (!opts->no_bpf_event) { + if (rec->sb_evlist == NULL) { + rec->sb_evlist = evlist__new(); + + if (rec->sb_evlist == NULL) { + pr_err("Couldn't create side band evlist.\n."); + return -1; + } + } + + if (evlist__add_bpf_sb_event(rec->sb_evlist, &rec->session->header.env)) { + pr_err("Couldn't ask for PERF_RECORD_BPF_EVENT side band events.\n."); + return -1; + } + } + + if (perf_evlist__start_sb_thread(rec->sb_evlist, &rec->opts.target)) { + pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n"); + opts->no_bpf_event = true; + } + + return 0; +} + static int __cmd_record(struct record *rec, int argc, const char **argv) { int err; @@ -1420,7 +1525,6 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) struct perf_data *data = &rec->data; struct perf_session *session; bool disabled = false, draining = false; - struct evlist *sb_evlist = NULL; int fd; float ratio = 0; @@ -1465,6 +1569,20 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) pr_err("Compression initialization failed.\n"); return -1; } +#ifdef HAVE_EVENTFD_SUPPORT + done_fd = eventfd(0, EFD_NONBLOCK); + if (done_fd < 0) { + pr_err("Failed to create wakeup eventfd, error: %m\n"); + status = -1; + goto out_delete_session; + } + err = evlist__add_pollfd(rec->evlist, done_fd); + if (err < 0) { + pr_err("Failed to add wakeup eventfd to poll list\n"); + status = err; + goto out_delete_session; + } +#endif // HAVE_EVENTFD_SUPPORT session->header.env.comp_type = PERF_COMP_ZSTD; session->header.env.comp_level = rec->opts.comp_level; @@ -1546,21 +1664,17 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) goto out_child; } + err = -1; if (!rec->no_buildid && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) { pr_err("Couldn't generate buildids. " "Use --no-buildid to profile anyway.\n"); - err = -1; goto out_child; } - if (!opts->no_bpf_event) - bpf_event__add_sb_event(&sb_evlist, &session->header.env); - - if (perf_evlist__start_sb_thread(sb_evlist, &rec->opts.target)) { - pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n"); - opts->no_bpf_event = true; - } + err = record__setup_sb_evlist(rec); + if (err) + goto out_child; err = record__synthesize(rec, false); if (err < 0) @@ -1827,11 +1941,15 @@ out_child: } out_delete_session: +#ifdef HAVE_EVENTFD_SUPPORT + if (done_fd >= 0) + close(done_fd); +#endif zstd_fini(&session->zstd_data); perf_session__delete(session); if (!opts->no_bpf_event) - perf_evlist__stop_sb_thread(sb_evlist); + perf_evlist__stop_sb_thread(rec->sb_evlist); return status; } @@ -2142,10 +2260,19 @@ static int switch_output_setup(struct record *rec) }; unsigned long val; + /* + * If we're using --switch-output-events, then we imply its + * --switch-output=signal, as we'll send a SIGUSR2 from the side band + * thread to its parent. + */ + if (rec->switch_output_event_set) + goto do_signal; + if (!s->set) return 0; if (!strcmp(s->str, "signal")) { +do_signal: s->signal = true; pr_debug("switch-output with SIGUSR2 signal\n"); goto enabled; @@ -2232,6 +2359,7 @@ static struct record record = { .default_per_cpu = true, }, .mmap_flush = MMAP_FLUSH_DEFAULT, + .nr_threads_synthesize = 1, }, .tool = { .sample = process_sample_event, @@ -2374,8 +2502,9 @@ static struct option __record_options[] = { "Record namespaces events"), OPT_BOOLEAN(0, "all-cgroups", &record.opts.record_cgroup, "Record cgroup events"), - OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events, - "Record context switch events"), + OPT_BOOLEAN_SET(0, "switch-events", &record.opts.record_switch_events, + &record.opts.record_switch_events_set, + "Record context switch events"), OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel, "Configure all used events to run in kernel space.", PARSE_OPT_EXCLUSIVE), @@ -2402,6 +2531,9 @@ static struct option __record_options[] = { &record.switch_output.set, "signal or size[BKMG] or time[smhd]", "Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold", "signal"), + OPT_CALLBACK_SET(0, "switch-output-event", &record.sb_evlist, &record.switch_output_event_set, "switch output event", + "switch output event selector. use 'perf list' to list available events", + parse_events_option_new_evlist), OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files, "Limit number of switch output generated files"), OPT_BOOLEAN(0, "dry-run", &dry_run, @@ -2421,6 +2553,14 @@ static struct option __record_options[] = { #endif OPT_CALLBACK(0, "max-size", &record.output_max_size, "size", "Limit the maximum size of the output file", parse_output_max_size), + OPT_UINTEGER(0, "num-thread-synthesize", + &record.opts.nr_threads_synthesize, + "number of threads to run for event synthesis"), +#ifdef HAVE_LIBPFM + OPT_CALLBACK(0, "pfm-events", &record.evlist, "event", + "libpfm4 event selector. use 'perf list' to list available events", + parse_libpfm_events_option), +#endif OPT_END() }; |