diff options
Diffstat (limited to 'tools/perf/util')
74 files changed, 2475 insertions, 1062 deletions
diff --git a/tools/perf/util/Build b/tools/perf/util/Build index c0cf8dff694e..ca07a162d602 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -10,6 +10,7 @@ perf-y += db-export.o perf-y += env.o perf-y += event.o perf-y += evlist.o +perf-y += sideband_evlist.o perf-y += evsel.o perf-y += evsel_fprintf.o perf-y += perf_event_attr_fprintf.o @@ -88,6 +89,7 @@ perf-y += counts.o perf-y += stat.o perf-y += stat-shadow.o perf-y += stat-display.o +perf-y += perf_api_probe.o perf-y += record.o perf-y += srcline.o perf-y += srccode.o diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index f1ea0d61eb5b..d828c2d2edee 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1191,7 +1191,7 @@ static struct disasm_line *disasm_line__new(struct annotate_args *args) struct disasm_line *dl = NULL; int nr = 1; - if (perf_evsel__is_group_event(args->evsel)) + if (evsel__is_group_event(args->evsel)) nr = args->evsel->core.nr_members; dl = zalloc(disasm_line_size(nr)); @@ -1437,7 +1437,7 @@ annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start if (queue) return -1; - if (perf_evsel__is_group_event(evsel)) + if (evsel__is_group_event(evsel)) width *= evsel->core.nr_members; if (!*al->line) @@ -1821,6 +1821,24 @@ static int symbol__disassemble_bpf(struct symbol *sym __maybe_unused, } #endif // defined(HAVE_LIBBFD_SUPPORT) && defined(HAVE_LIBBPF_SUPPORT) +static int +symbol__disassemble_bpf_image(struct symbol *sym, + struct annotate_args *args) +{ + struct annotation *notes = symbol__annotation(sym); + struct disasm_line *dl; + + args->offset = -1; + args->line = strdup("to be implemented"); + args->line_nr = 0; + dl = disasm_line__new(args); + if (dl) + annotation_line__add(&dl->al, ¬es->src->source); + + free(args->line); + return 0; +} + /* * Possibly create a new version of line with tabs expanded. Returns the * existing or new line, storage is updated if a new line is allocated. If @@ -1920,6 +1938,8 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) if (dso->binary_type == DSO_BINARY_TYPE__BPF_PROG_INFO) { return symbol__disassemble_bpf(sym, args); + } else if (dso->binary_type == DSO_BINARY_TYPE__BPF_IMAGE) { + return symbol__disassemble_bpf_image(sym, args); } else if (dso__is_kcore(dso)) { kce.kcore_filename = symfs_filename; kce.addr = map__rip_2objdump(map, sym->start); @@ -2136,7 +2156,7 @@ int symbol__annotate(struct map_symbol *ms, struct evsel *evsel, .evsel = evsel, .options = options, }; - struct perf_env *env = perf_evsel__env(evsel); + struct perf_env *env = evsel__env(evsel); const char *arch_name = perf_env__arch(env); struct arch *arch; int err; @@ -2324,7 +2344,7 @@ int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel, struct dso *dso = map->dso; char *filename; const char *d_filename; - const char *evsel_name = perf_evsel__name(evsel); + const char *evsel_name = evsel__name(evsel); struct annotation *notes = symbol__annotation(sym); struct sym_hist *h = annotation__histogram(notes, evsel->idx); struct annotation_line *pos, *queue = NULL; @@ -2348,9 +2368,9 @@ int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel, len = symbol__size(sym); - if (perf_evsel__is_group_event(evsel)) { + if (evsel__is_group_event(evsel)) { width *= evsel->core.nr_members; - perf_evsel__group_desc(evsel, buf, sizeof(buf)); + evsel__group_desc(evsel, buf, sizeof(buf)); evsel_name = buf; } @@ -2485,7 +2505,7 @@ static int symbol__annotate_fprintf2(struct symbol *sym, FILE *fp, int map_symbol__annotation_dump(struct map_symbol *ms, struct evsel *evsel, struct annotation_options *opts) { - const char *ev_name = perf_evsel__name(evsel); + const char *ev_name = evsel__name(evsel); char buf[1024]; char *filename; int err = -1; @@ -2498,8 +2518,8 @@ int map_symbol__annotation_dump(struct map_symbol *ms, struct evsel *evsel, if (fp == NULL) goto out_free_filename; - if (perf_evsel__is_group_event(evsel)) { - perf_evsel__group_desc(evsel, buf, sizeof(buf)); + if (evsel__is_group_event(evsel)) { + evsel__group_desc(evsel, buf, sizeof(buf)); ev_name = buf; } @@ -3044,7 +3064,7 @@ int symbol__annotate2(struct map_symbol *ms, struct evsel *evsel, if (notes->offsets == NULL) return ENOMEM; - if (perf_evsel__is_group_event(evsel)) + if (evsel__is_group_event(evsel)) nr_pcnt = evsel->core.nr_members; err = symbol__annotate(ms, evsel, options, parch); diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c index 53be12b23ff4..875a0dd540e5 100644 --- a/tools/perf/util/arm-spe.c +++ b/tools/perf/util/arm-spe.c @@ -176,6 +176,14 @@ static void arm_spe_free(struct perf_session *session) free(spe); } +static bool arm_spe_evsel_is_auxtrace(struct perf_session *session, + struct evsel *evsel) +{ + struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, auxtrace); + + return evsel->core.attr.type == spe->pmu_type; +} + static const char * const arm_spe_info_fmts[] = { [ARM_SPE_PMU_TYPE] = " PMU Type %"PRId64"\n", }; @@ -218,6 +226,7 @@ int arm_spe_process_auxtrace_info(union perf_event *event, spe->auxtrace.flush_events = arm_spe_flush; spe->auxtrace.free_events = arm_spe_free_events; spe->auxtrace.free = arm_spe_free; + spe->auxtrace.evsel_is_auxtrace = arm_spe_evsel_is_auxtrace; session->auxtrace = &spe->auxtrace; arm_spe_print_info(&auxtrace_info->priv[0]); diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index 3571ce72ca28..749487a41cc7 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -33,6 +33,7 @@ #include "evsel.h" #include "evsel_config.h" #include "symbol.h" +#include "util/perf_api_probe.h" #include "util/synthetic-events.h" #include "thread_map.h" #include "asm/bug.h" @@ -58,25 +59,6 @@ #include "symbol/kallsyms.h" #include <internal/lib.h> -static struct perf_pmu *perf_evsel__find_pmu(struct evsel *evsel) -{ - struct perf_pmu *pmu = NULL; - - while ((pmu = perf_pmu__scan(pmu)) != NULL) { - if (pmu->type == evsel->core.attr.type) - break; - } - - return pmu; -} - -static bool perf_evsel__is_aux_event(struct evsel *evsel) -{ - struct perf_pmu *pmu = perf_evsel__find_pmu(evsel); - - return pmu && pmu->auxtrace; -} - /* * Make a group from 'leader' to 'last', requiring that the events were not * already grouped to a different leader. @@ -88,7 +70,7 @@ static int perf_evlist__regroup(struct evlist *evlist, struct evsel *evsel; bool grp; - if (!perf_evsel__is_group_leader(leader)) + if (!evsel__is_group_leader(leader)) return -EINVAL; grp = false; @@ -703,8 +685,8 @@ static int auxtrace_validate_aux_sample_size(struct evlist *evlist, evlist__for_each_entry(evlist, evsel) { sz = evsel->core.attr.aux_sample_size; - if (perf_evsel__is_group_leader(evsel)) { - has_aux_leader = perf_evsel__is_aux_event(evsel); + if (evsel__is_group_leader(evsel)) { + has_aux_leader = evsel__is_aux_event(evsel); if (sz) { if (has_aux_leader) pr_err("Cannot add AUX area sampling to an AUX area event\n"); @@ -723,10 +705,10 @@ static int auxtrace_validate_aux_sample_size(struct evlist *evlist, pr_err("Cannot add AUX area sampling because group leader is not an AUX area event\n"); return -EINVAL; } - perf_evsel__set_sample_bit(evsel, AUX); + evsel__set_sample_bit(evsel, AUX); opts->auxtrace_sample_mode = true; } else { - perf_evsel__reset_sample_bit(evsel, AUX); + evsel__reset_sample_bit(evsel, AUX); } } @@ -777,8 +759,8 @@ int auxtrace_parse_sample_options(struct auxtrace_record *itr, /* Set aux_sample_size based on --aux-sample option */ evlist__for_each_entry(evlist, evsel) { - if (perf_evsel__is_group_leader(evsel)) { - has_aux_leader = perf_evsel__is_aux_event(evsel); + if (evsel__is_group_leader(evsel)) { + has_aux_leader = evsel__is_aux_event(evsel); } else if (has_aux_leader) { evsel->core.attr.aux_sample_size = sz; } @@ -787,7 +769,7 @@ no_opt: aux_evsel = NULL; /* Override with aux_sample_size from config term */ evlist__for_each_entry(evlist, evsel) { - if (perf_evsel__is_aux_event(evsel)) + if (evsel__is_aux_event(evsel)) aux_evsel = evsel; term = perf_evsel__get_config_term(evsel, AUX_SAMPLE_SIZE); if (term) { @@ -1234,29 +1216,79 @@ out_free: return err; } +static void unleader_evsel(struct evlist *evlist, struct evsel *leader) +{ + struct evsel *new_leader = NULL; + struct evsel *evsel; + + /* Find new leader for the group */ + evlist__for_each_entry(evlist, evsel) { + if (evsel->leader != leader || evsel == leader) + continue; + if (!new_leader) + new_leader = evsel; + evsel->leader = new_leader; + } + + /* Update group information */ + if (new_leader) { + zfree(&new_leader->group_name); + new_leader->group_name = leader->group_name; + leader->group_name = NULL; + + new_leader->core.nr_members = leader->core.nr_members - 1; + leader->core.nr_members = 1; + } +} + +static void unleader_auxtrace(struct perf_session *session) +{ + struct evsel *evsel; + + evlist__for_each_entry(session->evlist, evsel) { + if (auxtrace__evsel_is_auxtrace(session, evsel) && + evsel__is_group_leader(evsel)) { + unleader_evsel(session->evlist, evsel); + } + } +} + int perf_event__process_auxtrace_info(struct perf_session *session, union perf_event *event) { enum auxtrace_type type = event->auxtrace_info.type; + int err; if (dump_trace) fprintf(stdout, " type: %u\n", type); switch (type) { case PERF_AUXTRACE_INTEL_PT: - return intel_pt_process_auxtrace_info(event, session); + err = intel_pt_process_auxtrace_info(event, session); + break; case PERF_AUXTRACE_INTEL_BTS: - return intel_bts_process_auxtrace_info(event, session); + err = intel_bts_process_auxtrace_info(event, session); + break; case PERF_AUXTRACE_ARM_SPE: - return arm_spe_process_auxtrace_info(event, session); + err = arm_spe_process_auxtrace_info(event, session); + break; case PERF_AUXTRACE_CS_ETM: - return cs_etm__process_auxtrace_info(event, session); + err = cs_etm__process_auxtrace_info(event, session); + break; case PERF_AUXTRACE_S390_CPUMSF: - return s390_cpumsf_process_auxtrace_info(event, session); + err = s390_cpumsf_process_auxtrace_info(event, session); + break; case PERF_AUXTRACE_UNKNOWN: default: return -EINVAL; } + + if (err) + return err; + + unleader_auxtrace(session); + + return 0; } s64 perf_event__process_auxtrace(struct perf_session *session, @@ -1412,8 +1444,12 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str, synth_opts->branches = true; synth_opts->returns = true; break; + case 'G': case 'g': - synth_opts->callchain = true; + if (p[-1] == 'G') + synth_opts->add_callchain = true; + else + synth_opts->callchain = true; synth_opts->callchain_sz = PERF_ITRACE_DEFAULT_CALLCHAIN_SZ; while (*p == ' ' || *p == ',') @@ -1428,8 +1464,12 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str, synth_opts->callchain_sz = val; } break; + case 'L': case 'l': - synth_opts->last_branch = true; + if (p[-1] == 'L') + synth_opts->add_last_branch = true; + else + synth_opts->last_branch = true; synth_opts->last_branch_sz = PERF_ITRACE_DEFAULT_LAST_BRANCH_SZ; while (*p == ' ' || *p == ',') @@ -2482,7 +2522,7 @@ static int parse_addr_filter(struct evsel *evsel, const char *filter, goto out_exit; } - if (perf_evsel__append_addr_filter(evsel, new_filter)) { + if (evsel__append_addr_filter(evsel, new_filter)) { err = -ENOMEM; goto out_exit; } @@ -2500,9 +2540,9 @@ out_exit: return err; } -static int perf_evsel__nr_addr_filter(struct evsel *evsel) +static int evsel__nr_addr_filter(struct evsel *evsel) { - struct perf_pmu *pmu = perf_evsel__find_pmu(evsel); + struct perf_pmu *pmu = evsel__find_pmu(evsel); int nr_addr_filters = 0; if (!pmu) @@ -2521,7 +2561,7 @@ int auxtrace_parse_filters(struct evlist *evlist) evlist__for_each_entry(evlist, evsel) { filter = evsel->filter; - max_nr = perf_evsel__nr_addr_filter(evsel); + max_nr = evsel__nr_addr_filter(evsel); if (!filter || !max_nr) continue; evsel->filter = NULL; @@ -2577,3 +2617,12 @@ void auxtrace__free(struct perf_session *session) return session->auxtrace->free(session); } + +bool auxtrace__evsel_is_auxtrace(struct perf_session *session, + struct evsel *evsel) +{ + if (!session->auxtrace || !session->auxtrace->evsel_is_auxtrace) + return false; + + return session->auxtrace->evsel_is_auxtrace(session, evsel); +} diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index e58ef160b599..0220a2e86c16 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -21,6 +21,7 @@ union perf_event; struct perf_session; struct evlist; +struct evsel; struct perf_tool; struct mmap; struct perf_sample; @@ -73,8 +74,10 @@ enum itrace_period_type { * @calls: limit branch samples to calls (can be combined with @returns) * @returns: limit branch samples to returns (can be combined with @calls) * @callchain: add callchain to 'instructions' events + * @add_callchain: add callchain to existing event records * @thread_stack: feed branches to the thread_stack * @last_branch: add branch context to 'instruction' events + * @add_last_branch: add branch context to existing event records * @callchain_sz: maximum callchain size * @last_branch_sz: branch context size * @period: 'instructions' events period @@ -100,8 +103,10 @@ struct itrace_synth_opts { bool calls; bool returns; bool callchain; + bool add_callchain; bool thread_stack; bool last_branch; + bool add_last_branch; unsigned int callchain_sz; unsigned int last_branch_sz; unsigned long long period; @@ -166,6 +171,8 @@ struct auxtrace { struct perf_tool *tool); void (*free_events)(struct perf_session *session); void (*free)(struct perf_session *session); + bool (*evsel_is_auxtrace)(struct perf_session *session, + struct evsel *evsel); }; /** @@ -584,6 +591,8 @@ void auxtrace__dump_auxtrace_sample(struct perf_session *session, int auxtrace__flush_events(struct perf_session *session, struct perf_tool *tool); void auxtrace__free_events(struct perf_session *session); void auxtrace__free(struct perf_session *session); +bool auxtrace__evsel_is_auxtrace(struct perf_session *session, + struct evsel *evsel); #define ITRACE_HELP \ " i: synthesize instructions events\n" \ @@ -750,6 +759,13 @@ void auxtrace_index__free(struct list_head *head __maybe_unused) } static inline +bool auxtrace__evsel_is_auxtrace(struct perf_session *session __maybe_unused, + struct evsel *evsel __maybe_unused) +{ + return false; +} + +static inline int auxtrace_parse_filters(struct evlist *evlist __maybe_unused) { return 0; diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c index a3207d900339..3742511a08d1 100644 --- a/tools/perf/util/bpf-event.c +++ b/tools/perf/util/bpf-event.c @@ -6,6 +6,9 @@ #include <bpf/libbpf.h> #include <linux/btf.h> #include <linux/err.h> +#include <linux/string.h> +#include <internal/lib.h> +#include <symbol/kallsyms.h> #include "bpf-event.h" #include "debug.h" #include "dso.h" @@ -290,11 +293,82 @@ out: return err ? -1 : 0; } +struct kallsyms_parse { + union perf_event *event; + perf_event__handler_t process; + struct machine *machine; + struct perf_tool *tool; +}; + +static int +process_bpf_image(char *name, u64 addr, struct kallsyms_parse *data) +{ + struct machine *machine = data->machine; + union perf_event *event = data->event; + struct perf_record_ksymbol *ksymbol; + int len; + + ksymbol = &event->ksymbol; + + *ksymbol = (struct perf_record_ksymbol) { + .header = { + .type = PERF_RECORD_KSYMBOL, + .size = offsetof(struct perf_record_ksymbol, name), + }, + .addr = addr, + .len = page_size, + .ksym_type = PERF_RECORD_KSYMBOL_TYPE_BPF, + .flags = 0, + }; + + len = scnprintf(ksymbol->name, KSYM_NAME_LEN, "%s", name); + ksymbol->header.size += PERF_ALIGN(len + 1, sizeof(u64)); + memset((void *) event + event->header.size, 0, machine->id_hdr_size); + event->header.size += machine->id_hdr_size; + + return perf_tool__process_synth_event(data->tool, event, machine, + data->process); +} + +static int +kallsyms_process_symbol(void *data, const char *_name, + char type __maybe_unused, u64 start) +{ + char disp[KSYM_NAME_LEN]; + char *module, *name; + unsigned long id; + int err = 0; + + module = strchr(_name, '\t'); + if (!module) + return 0; + + /* We are going after [bpf] module ... */ + if (strcmp(module + 1, "[bpf]")) + return 0; + + name = memdup(_name, (module - _name) + 1); + if (!name) + return -ENOMEM; + + name[module - _name] = 0; + + /* .. and only for trampolines and dispatchers */ + if ((sscanf(name, "bpf_trampoline_%lu", &id) == 1) || + (sscanf(name, "bpf_dispatcher_%s", disp) == 1)) + err = process_bpf_image(name, start, data); + + free(name); + return err; +} + int perf_event__synthesize_bpf_events(struct perf_session *session, perf_event__handler_t process, struct machine *machine, struct record_opts *opts) { + const char *kallsyms_filename = "/proc/kallsyms"; + struct kallsyms_parse arg; union perf_event *event; __u32 id = 0; int err; @@ -303,6 +377,8 @@ int perf_event__synthesize_bpf_events(struct perf_session *session, event = malloc(sizeof(event->bpf) + KSYM_NAME_LEN + machine->id_hdr_size); if (!event) return -1; + + /* Synthesize all the bpf programs in system. */ while (true) { err = bpf_prog_get_next_id(id, &id); if (err) { @@ -335,6 +411,23 @@ int perf_event__synthesize_bpf_events(struct perf_session *session, break; } } + + /* Synthesize all the bpf images - trampolines/dispatchers. */ + if (symbol_conf.kallsyms_name != NULL) + kallsyms_filename = symbol_conf.kallsyms_name; + + arg = (struct kallsyms_parse) { + .event = event, + .process = process, + .machine = machine, + .tool = session->tool, + }; + + if (kallsyms__parse(kallsyms_filename, &arg, kallsyms_process_symbol)) { + pr_err("%s: failed to synthesize bpf images: %s\n", + __func__, strerror(errno)); + } + free(event); return err; } @@ -416,8 +509,7 @@ static int bpf_event__sb_cb(union perf_event *event, void *data) return 0; } -int bpf_event__add_sb_event(struct evlist **evlist, - struct perf_env *env) +int evlist__add_bpf_sb_event(struct evlist *evlist, struct perf_env *env) { struct perf_event_attr attr = { .type = PERF_TYPE_SOFTWARE, diff --git a/tools/perf/util/bpf-event.h b/tools/perf/util/bpf-event.h index 81fdc88e6c1a..68f315c3df5b 100644 --- a/tools/perf/util/bpf-event.h +++ b/tools/perf/util/bpf-event.h @@ -33,8 +33,7 @@ struct btf_node { #ifdef HAVE_LIBBPF_SUPPORT int machine__process_bpf(struct machine *machine, union perf_event *event, struct perf_sample *sample); -int bpf_event__add_sb_event(struct evlist **evlist, - struct perf_env *env); +int evlist__add_bpf_sb_event(struct evlist *evlist, struct perf_env *env); void bpf_event__print_bpf_prog_info(struct bpf_prog_info *info, struct perf_env *env, FILE *fp); @@ -46,8 +45,8 @@ static inline int machine__process_bpf(struct machine *machine __maybe_unused, return 0; } -static inline int bpf_event__add_sb_event(struct evlist **evlist __maybe_unused, - struct perf_env *env __maybe_unused) +static inline int evlist__add_bpf_sb_event(struct evlist *evlist __maybe_unused, + struct perf_env *env __maybe_unused) { return 0; } diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c index 10c187b8b8ea..83bfb8768235 100644 --- a/tools/perf/util/bpf-loader.c +++ b/tools/perf/util/bpf-loader.c @@ -1430,7 +1430,7 @@ apply_config_evsel_for_key(const char *name, int map_fd, void *pkey, return -BPF_LOADER_ERRNO__OBJCONF_MAP_EVTINH; } - if (perf_evsel__is_bpf_output(evsel)) + if (evsel__is_bpf_output(evsel)) check_pass = true; if (attr->type == PERF_TYPE_RAW) check_pass = true; diff --git a/tools/perf/util/branch.h b/tools/perf/util/branch.h index 154a05cd03af..4d3f02fa223d 100644 --- a/tools/perf/util/branch.h +++ b/tools/perf/util/branch.h @@ -15,13 +15,18 @@ #include "event.h" struct branch_flags { - u64 mispred:1; - u64 predicted:1; - u64 in_tx:1; - u64 abort:1; - u64 cycles:16; - u64 type:4; - u64 reserved:40; + union { + u64 value; + struct { + u64 mispred:1; + u64 predicted:1; + u64 in_tx:1; + u64 abort:1; + u64 cycles:16; + u64 type:4; + u64 reserved:40; + }; + }; }; struct branch_info { diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 706bb7bbe1e1..8f668ee29f25 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -143,6 +143,9 @@ struct callchain_cursor_node { u64 ip; struct map_symbol ms; const char *srcline; + /* Indicate valid cursor node for LBR stitch */ + bool valid; + bool branch; struct branch_flags branch_flags; u64 branch_from; @@ -151,6 +154,11 @@ struct callchain_cursor_node { struct callchain_cursor_node *next; }; +struct stitch_list { + struct list_head node; + struct callchain_cursor_node cursor; +}; + struct callchain_cursor { u64 nr; struct callchain_cursor_node *first; diff --git a/tools/perf/util/cap.h b/tools/perf/util/cap.h index 051dc590ceee..ae52878c0b2e 100644 --- a/tools/perf/util/cap.h +++ b/tools/perf/util/cap.h @@ -29,4 +29,8 @@ static inline bool perf_cap__capable(int cap __maybe_unused) #define CAP_SYSLOG 34 #endif +#ifndef CAP_PERFMON +#define CAP_PERFMON 38 +#endif + #endif /* __PERF_CAP_H */ diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c index b73fb7823048..050dea9f1e88 100644 --- a/tools/perf/util/cgroup.c +++ b/tools/perf/util/cgroup.c @@ -107,7 +107,8 @@ found: static void cgroup__delete(struct cgroup *cgroup) { - close(cgroup->fd); + if (cgroup->fd >= 0) + close(cgroup->fd); zfree(&cgroup->name); free(cgroup); } diff --git a/tools/perf/util/cloexec.c b/tools/perf/util/cloexec.c index a12872f2856a..6b3988a7aba8 100644 --- a/tools/perf/util/cloexec.c +++ b/tools/perf/util/cloexec.c @@ -28,7 +28,7 @@ int __weak sched_getcpu(void) static int perf_flag_probe(void) { - /* use 'safest' configuration as used in perf_evsel__fallback() */ + /* use 'safest' configuration as used in evsel__fallback() */ struct perf_event_attr attr = { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_CLOCK, diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c index cd92a99eb89d..cd007cc9c283 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -564,6 +564,8 @@ static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer( resp = cs_etm_decoder__set_tid(etmq, packet_queue, elem, trace_chan_id); break; + /* Unused packet types */ + case OCSD_GEN_TRC_ELEM_I_RANGE_NOPATH: case OCSD_GEN_TRC_ELEM_ADDR_NACC: case OCSD_GEN_TRC_ELEM_CYCLE_COUNT: case OCSD_GEN_TRC_ELEM_ADDR_UNKNOWN: diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 62d2f9b9ce1b..c283223fb31f 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -94,6 +94,9 @@ struct cs_etm_queue { struct cs_etm_traceid_queue **traceid_queues; }; +/* RB tree for quick conversion between traceID and metadata pointers */ +static struct intlist *traceid_list; + static int cs_etm__update_queues(struct cs_etm_auxtrace *etm); static int cs_etm__process_queues(struct cs_etm_auxtrace *etm); static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, @@ -631,6 +634,16 @@ static void cs_etm__free(struct perf_session *session) zfree(&aux); } +static bool cs_etm__evsel_is_auxtrace(struct perf_session *session, + struct evsel *evsel) +{ + struct cs_etm_auxtrace *aux = container_of(session->auxtrace, + struct cs_etm_auxtrace, + auxtrace); + + return evsel->core.attr.type == aux->pmu_type; +} + static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address) { struct machine *machine; @@ -2618,6 +2631,7 @@ int cs_etm__process_auxtrace_info(union perf_event *event, etm->auxtrace.flush_events = cs_etm__flush_events; etm->auxtrace.free_events = cs_etm__free_events; etm->auxtrace.free = cs_etm__free; + etm->auxtrace.evsel_is_auxtrace = cs_etm__evsel_is_auxtrace; session->auxtrace = &etm->auxtrace; etm->unknown_thread = thread__new(999999999, 999999999); diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h index 650ecc2a6349..4ad925d6d799 100644 --- a/tools/perf/util/cs-etm.h +++ b/tools/perf/util/cs-etm.h @@ -114,9 +114,6 @@ enum cs_etm_isa { CS_ETM_ISA_T32, }; -/* RB tree for quick conversion between traceID and metadata pointers */ -struct intlist *traceid_list; - struct cs_etm_queue; struct cs_etm_packet { diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c index dbc772bfb04e..5f36fc6a5578 100644 --- a/tools/perf/util/data-convert-bt.c +++ b/tools/perf/util/data-convert-bt.c @@ -835,7 +835,7 @@ static int process_sample_event(struct perf_tool *tool, return -1; } - if (perf_evsel__is_bpf_output(evsel)) { + if (evsel__is_bpf_output(evsel)) { ret = add_bpf_output_values(event_class, event, sample); if (ret) return -1; @@ -1155,7 +1155,7 @@ static int add_event(struct ctf_writer *cw, struct evsel *evsel) { struct bt_ctf_event_class *event_class; struct evsel_priv *priv; - const char *name = perf_evsel__name(evsel); + const char *name = evsel__name(evsel); int ret; pr("Adding event '%s' (type %d)\n", name, evsel->core.attr.type); @@ -1174,7 +1174,7 @@ static int add_event(struct ctf_writer *cw, struct evsel *evsel) goto err; } - if (perf_evsel__is_bpf_output(evsel)) { + if (evsel__is_bpf_output(evsel)) { ret = add_bpf_output_types(cw, event_class); if (ret) goto err; diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index 91f21239608b..f338990e0fe6 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -191,6 +191,7 @@ int dso__read_binary_type_filename(const struct dso *dso, case DSO_BINARY_TYPE__GUEST_KALLSYMS: case DSO_BINARY_TYPE__JAVA_JIT: case DSO_BINARY_TYPE__BPF_PROG_INFO: + case DSO_BINARY_TYPE__BPF_IMAGE: case DSO_BINARY_TYPE__NOT_FOUND: ret = -1; break; diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index 2db64b79617a..9553a1fd9e8a 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -40,6 +40,7 @@ enum dso_binary_type { DSO_BINARY_TYPE__GUEST_KCORE, DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO, DSO_BINARY_TYPE__BPF_PROG_INFO, + DSO_BINARY_TYPE__BPF_IMAGE, DSO_BINARY_TYPE__NOT_FOUND, }; diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h index 7632075a8792..1ab2682d5d2b 100644 --- a/tools/perf/util/env.h +++ b/tools/perf/util/env.h @@ -48,6 +48,7 @@ struct perf_env { char *cpuid; unsigned long long total_mem; unsigned int msr_pmu_type; + unsigned int max_branches; int nr_cmdline; int nr_sibling_cores; @@ -57,12 +58,14 @@ struct perf_env { int nr_memory_nodes; int nr_pmu_mappings; int nr_groups; + int nr_cpu_pmu_caps; char *cmdline; const char **cmdline_argv; char *sibling_cores; char *sibling_dies; char *sibling_threads; char *pmu_mappings; + char *cpu_pmu_caps; struct cpu_topology_map *cpu; struct cpu_cache_level *caches; int caches_cnt; diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index dc0e11214ae1..f581550a3015 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -626,7 +626,7 @@ int machine__resolve(struct machine *machine, struct addr_location *al, ret = strlist__has_entry(symbol_conf.sym_list, al->sym->name); } - if (!(ret && al->sym)) { + if (!ret && al->sym) { snprintf(al_addr_str, sz, "0x%"PRIx64, al->map->unmap_ip(al->map, al->sym->start)); ret = strlist__has_entry(symbol_conf.sym_list, diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 1548237b6558..0a0b760d6948 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -23,6 +23,7 @@ #include "asm/bug.h" #include "bpf-event.h" #include "util/string2.h" +#include "util/perf_api_probe.h" #include <signal.h> #include <unistd.h> #include <sched.h> @@ -118,7 +119,7 @@ static void perf_evlist__update_id_pos(struct evlist *evlist) struct evsel *evsel; evlist__for_each_entry(evlist, evsel) - perf_evsel__calc_id_pos(evsel); + evsel__calc_id_pos(evsel); perf_evlist__set_id_pos(evlist); } @@ -390,14 +391,14 @@ void evlist__disable(struct evlist *evlist) evlist__for_each_entry(evlist, pos) { if (evsel__cpu_iter_skip(pos, cpu)) continue; - if (pos->disabled || !perf_evsel__is_group_leader(pos) || !pos->core.fd) + if (pos->disabled || !evsel__is_group_leader(pos) || !pos->core.fd) continue; evsel__disable_cpu(pos, pos->cpu_iter - 1); } } affinity__cleanup(&affinity); evlist__for_each_entry(evlist, pos) { - if (!perf_evsel__is_group_leader(pos) || !pos->core.fd) + if (!evsel__is_group_leader(pos) || !pos->core.fd) continue; pos->disabled = true; } @@ -420,14 +421,14 @@ void evlist__enable(struct evlist *evlist) evlist__for_each_entry(evlist, pos) { if (evsel__cpu_iter_skip(pos, cpu)) continue; - if (!perf_evsel__is_group_leader(pos) || !pos->core.fd) + if (!evsel__is_group_leader(pos) || !pos->core.fd) continue; evsel__enable_cpu(pos, pos->cpu_iter - 1); } } affinity__cleanup(&affinity); evlist__for_each_entry(evlist, pos) { - if (!perf_evsel__is_group_leader(pos) || !pos->core.fd) + if (!evsel__is_group_leader(pos) || !pos->core.fd) continue; pos->disabled = false; } @@ -947,7 +948,7 @@ void __perf_evlist__set_sample_bit(struct evlist *evlist, struct evsel *evsel; evlist__for_each_entry(evlist, evsel) - __perf_evsel__set_sample_bit(evsel, bit); + __evsel__set_sample_bit(evsel, bit); } void __perf_evlist__reset_sample_bit(struct evlist *evlist, @@ -956,7 +957,7 @@ void __perf_evlist__reset_sample_bit(struct evlist *evlist, struct evsel *evsel; evlist__for_each_entry(evlist, evsel) - __perf_evsel__reset_sample_bit(evsel, bit); + __evsel__reset_sample_bit(evsel, bit); } int perf_evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel) @@ -994,7 +995,7 @@ int perf_evlist__set_tp_filter(struct evlist *evlist, const char *filter) if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT) continue; - err = perf_evsel__set_filter(evsel, filter); + err = evsel__set_filter(evsel, filter); if (err) break; } @@ -1014,7 +1015,7 @@ int perf_evlist__append_tp_filter(struct evlist *evlist, const char *filter) if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT) continue; - err = perf_evsel__append_tp_filter(evsel, filter); + err = evsel__append_tp_filter(evsel, filter); if (err) break; } @@ -1131,8 +1132,10 @@ bool perf_evlist__valid_read_format(struct evlist *evlist) u64 sample_type = first->core.attr.sample_type; evlist__for_each_entry(evlist, pos) { - if (read_format != pos->core.attr.read_format) - return false; + if (read_format != pos->core.attr.read_format) { + pr_debug("Read format differs %#" PRIx64 " vs %#" PRIx64 "\n", + read_format, (u64)pos->core.attr.read_format); + } } /* PERF_SAMPLE_READ imples PERF_FORMAT_ID. */ @@ -1436,7 +1439,7 @@ int perf_evlist__parse_sample(struct evlist *evlist, union perf_event *event, if (!evsel) return -EFAULT; - return perf_evsel__parse_sample(evsel, event, sample); + return evsel__parse_sample(evsel, event, sample); } int perf_evlist__parse_sample_timestamp(struct evlist *evlist, @@ -1447,7 +1450,7 @@ int perf_evlist__parse_sample_timestamp(struct evlist *evlist, if (!evsel) return -EFAULT; - return perf_evsel__parse_sample_timestamp(evsel, event, timestamp); + return evsel__parse_sample_timestamp(evsel, event, timestamp); } int perf_evlist__strerror_open(struct evlist *evlist, @@ -1701,133 +1704,3 @@ struct evsel *perf_evlist__reset_weak_group(struct evlist *evsel_list, } return leader; } - -int perf_evlist__add_sb_event(struct evlist **evlist, - struct perf_event_attr *attr, - perf_evsel__sb_cb_t cb, - void *data) -{ - struct evsel *evsel; - bool new_evlist = (*evlist) == NULL; - - if (*evlist == NULL) - *evlist = evlist__new(); - if (*evlist == NULL) - return -1; - - if (!attr->sample_id_all) { - pr_warning("enabling sample_id_all for all side band events\n"); - attr->sample_id_all = 1; - } - - evsel = perf_evsel__new_idx(attr, (*evlist)->core.nr_entries); - if (!evsel) - goto out_err; - - evsel->side_band.cb = cb; - evsel->side_band.data = data; - evlist__add(*evlist, evsel); - return 0; - -out_err: - if (new_evlist) { - evlist__delete(*evlist); - *evlist = NULL; - } - return -1; -} - -static void *perf_evlist__poll_thread(void *arg) -{ - struct evlist *evlist = arg; - bool draining = false; - int i, done = 0; - /* - * In order to read symbols from other namespaces perf to needs to call - * setns(2). This isn't permitted if the struct_fs has multiple users. - * unshare(2) the fs so that we may continue to setns into namespaces - * that we're observing when, for instance, reading the build-ids at - * the end of a 'perf record' session. - */ - unshare(CLONE_FS); - - while (!done) { - bool got_data = false; - - if (evlist->thread.done) - draining = true; - - if (!draining) - evlist__poll(evlist, 1000); - - for (i = 0; i < evlist->core.nr_mmaps; i++) { - struct mmap *map = &evlist->mmap[i]; - union perf_event *event; - - if (perf_mmap__read_init(&map->core)) - continue; - while ((event = perf_mmap__read_event(&map->core)) != NULL) { - struct evsel *evsel = perf_evlist__event2evsel(evlist, event); - - if (evsel && evsel->side_band.cb) - evsel->side_band.cb(event, evsel->side_band.data); - else - pr_warning("cannot locate proper evsel for the side band event\n"); - - perf_mmap__consume(&map->core); - got_data = true; - } - perf_mmap__read_done(&map->core); - } - - if (draining && !got_data) - break; - } - return NULL; -} - -int perf_evlist__start_sb_thread(struct evlist *evlist, - struct target *target) -{ - struct evsel *counter; - - if (!evlist) - return 0; - - if (perf_evlist__create_maps(evlist, target)) - goto out_delete_evlist; - - evlist__for_each_entry(evlist, counter) { - if (evsel__open(counter, evlist->core.cpus, - evlist->core.threads) < 0) - goto out_delete_evlist; - } - - if (evlist__mmap(evlist, UINT_MAX)) - goto out_delete_evlist; - - evlist__for_each_entry(evlist, counter) { - if (evsel__enable(counter)) - goto out_delete_evlist; - } - - evlist->thread.done = 0; - if (pthread_create(&evlist->thread.th, NULL, perf_evlist__poll_thread, evlist)) - goto out_delete_evlist; - - return 0; - -out_delete_evlist: - evlist__delete(evlist); - evlist = NULL; - return -1; -} - -void perf_evlist__stop_sb_thread(struct evlist *evlist) -{ - if (!evlist) - return; - evlist->thread.done = 1; - pthread_join(evlist->thread.th, NULL); - evlist__delete(evlist); -} diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index f5bd5c386df1..b6f325dfb4d2 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -107,10 +107,11 @@ int __perf_evlist__add_default_attrs(struct evlist *evlist, int perf_evlist__add_dummy(struct evlist *evlist); -int perf_evlist__add_sb_event(struct evlist **evlist, +int perf_evlist__add_sb_event(struct evlist *evlist, struct perf_event_attr *attr, - perf_evsel__sb_cb_t cb, + evsel__sb_cb_t cb, void *data); +void evlist__set_cb(struct evlist *evlist, evsel__sb_cb_t cb, void *data); int perf_evlist__start_sb_thread(struct evlist *evlist, struct target *target); void perf_evlist__stop_sb_thread(struct evlist *evlist); @@ -173,10 +174,6 @@ void evlist__close(struct evlist *evlist); struct callchain_param; void perf_evlist__set_id_pos(struct evlist *evlist); -bool perf_can_sample_identifier(void); -bool perf_can_record_switch_events(void); -bool perf_can_record_cpu_wide(void); -bool perf_can_aux_sample(void); void perf_evlist__config(struct evlist *evlist, struct record_opts *opts, struct callchain_param *callchain); int record_opts__config(struct record_opts *opts); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index eb880efbce16..f3e60c45d59a 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -102,7 +102,7 @@ set_methods: #define FD(e, x, y) (*(int *)xyarray__entry(e->core.fd, x, y)) -int __perf_evsel__sample_size(u64 sample_type) +int __evsel__sample_size(u64 sample_type) { u64 mask = sample_type & PERF_SAMPLE_MASK; int size = 0; @@ -178,53 +178,53 @@ static int __perf_evsel__calc_is_pos(u64 sample_type) return idx; } -void perf_evsel__calc_id_pos(struct evsel *evsel) +void evsel__calc_id_pos(struct evsel *evsel) { evsel->id_pos = __perf_evsel__calc_id_pos(evsel->core.attr.sample_type); evsel->is_pos = __perf_evsel__calc_is_pos(evsel->core.attr.sample_type); } -void __perf_evsel__set_sample_bit(struct evsel *evsel, +void __evsel__set_sample_bit(struct evsel *evsel, enum perf_event_sample_format bit) { if (!(evsel->core.attr.sample_type & bit)) { evsel->core.attr.sample_type |= bit; evsel->sample_size += sizeof(u64); - perf_evsel__calc_id_pos(evsel); + evsel__calc_id_pos(evsel); } } -void __perf_evsel__reset_sample_bit(struct evsel *evsel, +void __evsel__reset_sample_bit(struct evsel *evsel, enum perf_event_sample_format bit) { if (evsel->core.attr.sample_type & bit) { evsel->core.attr.sample_type &= ~bit; evsel->sample_size -= sizeof(u64); - perf_evsel__calc_id_pos(evsel); + evsel__calc_id_pos(evsel); } } -void perf_evsel__set_sample_id(struct evsel *evsel, +void evsel__set_sample_id(struct evsel *evsel, bool can_sample_identifier) { if (can_sample_identifier) { - perf_evsel__reset_sample_bit(evsel, ID); - perf_evsel__set_sample_bit(evsel, IDENTIFIER); + evsel__reset_sample_bit(evsel, ID); + evsel__set_sample_bit(evsel, IDENTIFIER); } else { - perf_evsel__set_sample_bit(evsel, ID); + evsel__set_sample_bit(evsel, ID); } evsel->core.attr.read_format |= PERF_FORMAT_ID; } /** - * perf_evsel__is_function_event - Return whether given evsel is a function + * evsel__is_function_event - Return whether given evsel is a function * trace event * * @evsel - evsel selector to be tested * * Return %true if event is function trace event */ -bool perf_evsel__is_function_event(struct evsel *evsel) +bool evsel__is_function_event(struct evsel *evsel) { #define FUNCTION_EVENT "ftrace:function" @@ -249,8 +249,8 @@ void evsel__init(struct evsel *evsel, evsel->bpf_fd = -1; INIT_LIST_HEAD(&evsel->config_terms); perf_evsel__object.init(evsel); - evsel->sample_size = __perf_evsel__sample_size(attr->sample_type); - perf_evsel__calc_id_pos(evsel); + evsel->sample_size = __evsel__sample_size(attr->sample_type); + evsel__calc_id_pos(evsel); evsel->cmdline_group_boundary = false; evsel->metric_expr = NULL; evsel->metric_name = NULL; @@ -267,13 +267,13 @@ struct evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx) return NULL; evsel__init(evsel, attr, idx); - if (perf_evsel__is_bpf_output(evsel)) { + if (evsel__is_bpf_output(evsel)) { evsel->core.attr.sample_type |= (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME | PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD), evsel->core.attr.sample_period = 1; } - if (perf_evsel__is_clock(evsel)) { + if (evsel__is_clock(evsel)) { /* * The evsel->unit points to static alias->unit * so it's ok to use static string in here. @@ -385,7 +385,7 @@ const char *perf_evsel__hw_names[PERF_COUNT_HW_MAX] = { "ref-cycles", }; -static const char *__perf_evsel__hw_name(u64 config) +static const char *__evsel__hw_name(u64 config) { if (config < PERF_COUNT_HW_MAX && perf_evsel__hw_names[config]) return perf_evsel__hw_names[config]; @@ -429,9 +429,9 @@ static int perf_evsel__add_modifiers(struct evsel *evsel, char *bf, size_t size) return r; } -static int perf_evsel__hw_name(struct evsel *evsel, char *bf, size_t size) +static int evsel__hw_name(struct evsel *evsel, char *bf, size_t size) { - int r = scnprintf(bf, size, "%s", __perf_evsel__hw_name(evsel->core.attr.config)); + int r = scnprintf(bf, size, "%s", __evsel__hw_name(evsel->core.attr.config)); return r + perf_evsel__add_modifiers(evsel, bf + r, size - r); } @@ -448,20 +448,20 @@ const char *perf_evsel__sw_names[PERF_COUNT_SW_MAX] = { "dummy", }; -static const char *__perf_evsel__sw_name(u64 config) +static const char *__evsel__sw_name(u64 config) { if (config < PERF_COUNT_SW_MAX && perf_evsel__sw_names[config]) return perf_evsel__sw_names[config]; return "unknown-software"; } -static int perf_evsel__sw_name(struct evsel *evsel, char *bf, size_t size) +static int evsel__sw_name(struct evsel *evsel, char *bf, size_t size) { - int r = scnprintf(bf, size, "%s", __perf_evsel__sw_name(evsel->core.attr.config)); + int r = scnprintf(bf, size, "%s", __evsel__sw_name(evsel->core.attr.config)); return r + perf_evsel__add_modifiers(evsel, bf + r, size - r); } -static int __perf_evsel__bp_name(char *bf, size_t size, u64 addr, u64 type) +static int __evsel__bp_name(char *bf, size_t size, u64 addr, u64 type) { int r; @@ -479,10 +479,10 @@ static int __perf_evsel__bp_name(char *bf, size_t size, u64 addr, u64 type) return r; } -static int perf_evsel__bp_name(struct evsel *evsel, char *bf, size_t size) +static int evsel__bp_name(struct evsel *evsel, char *bf, size_t size) { struct perf_event_attr *attr = &evsel->core.attr; - int r = __perf_evsel__bp_name(bf, size, attr->bp_addr, attr->bp_type); + int r = __evsel__bp_name(bf, size, attr->bp_addr, attr->bp_type); return r + perf_evsel__add_modifiers(evsel, bf + r, size - r); } @@ -531,7 +531,7 @@ static unsigned long perf_evsel__hw_cache_stat[C(MAX)] = { [C(NODE)] = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH), }; -bool perf_evsel__is_cache_op_valid(u8 type, u8 op) +bool evsel__is_cache_op_valid(u8 type, u8 op) { if (perf_evsel__hw_cache_stat[type] & COP(op)) return true; /* valid */ @@ -539,8 +539,7 @@ bool perf_evsel__is_cache_op_valid(u8 type, u8 op) return false; /* invalid */ } -int __perf_evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result, - char *bf, size_t size) +int __evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result, char *bf, size_t size) { if (result) { return scnprintf(bf, size, "%s-%s-%s", perf_evsel__hw_cache[type][0], @@ -552,7 +551,7 @@ int __perf_evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result, perf_evsel__hw_cache_op[op][1]); } -static int __perf_evsel__hw_cache_name(u64 config, char *bf, size_t size) +static int __evsel__hw_cache_name(u64 config, char *bf, size_t size) { u8 op, result, type = (config >> 0) & 0xff; const char *err = "unknown-ext-hardware-cache-type"; @@ -571,33 +570,33 @@ static int __perf_evsel__hw_cache_name(u64 config, char *bf, size_t size) goto out_err; err = "invalid-cache"; - if (!perf_evsel__is_cache_op_valid(type, op)) + if (!evsel__is_cache_op_valid(type, op)) goto out_err; - return __perf_evsel__hw_cache_type_op_res_name(type, op, result, bf, size); + return __evsel__hw_cache_type_op_res_name(type, op, result, bf, size); out_err: return scnprintf(bf, size, "%s", err); } -static int perf_evsel__hw_cache_name(struct evsel *evsel, char *bf, size_t size) +static int evsel__hw_cache_name(struct evsel *evsel, char *bf, size_t size) { - int ret = __perf_evsel__hw_cache_name(evsel->core.attr.config, bf, size); + int ret = __evsel__hw_cache_name(evsel->core.attr.config, bf, size); return ret + perf_evsel__add_modifiers(evsel, bf + ret, size - ret); } -static int perf_evsel__raw_name(struct evsel *evsel, char *bf, size_t size) +static int evsel__raw_name(struct evsel *evsel, char *bf, size_t size) { int ret = scnprintf(bf, size, "raw 0x%" PRIx64, evsel->core.attr.config); return ret + perf_evsel__add_modifiers(evsel, bf + ret, size - ret); } -static int perf_evsel__tool_name(char *bf, size_t size) +static int evsel__tool_name(char *bf, size_t size) { int ret = scnprintf(bf, size, "duration_time"); return ret; } -const char *perf_evsel__name(struct evsel *evsel) +const char *evsel__name(struct evsel *evsel) { char bf[128]; @@ -609,22 +608,22 @@ const char *perf_evsel__name(struct evsel *evsel) switch (evsel->core.attr.type) { case PERF_TYPE_RAW: - perf_evsel__raw_name(evsel, bf, sizeof(bf)); + evsel__raw_name(evsel, bf, sizeof(bf)); break; case PERF_TYPE_HARDWARE: - perf_evsel__hw_name(evsel, bf, sizeof(bf)); + evsel__hw_name(evsel, bf, sizeof(bf)); break; case PERF_TYPE_HW_CACHE: - perf_evsel__hw_cache_name(evsel, bf, sizeof(bf)); + evsel__hw_cache_name(evsel, bf, sizeof(bf)); break; case PERF_TYPE_SOFTWARE: if (evsel->tool_event) - perf_evsel__tool_name(bf, sizeof(bf)); + evsel__tool_name(bf, sizeof(bf)); else - perf_evsel__sw_name(evsel, bf, sizeof(bf)); + evsel__sw_name(evsel, bf, sizeof(bf)); break; case PERF_TYPE_TRACEPOINT: @@ -632,7 +631,7 @@ const char *perf_evsel__name(struct evsel *evsel) break; case PERF_TYPE_BREAKPOINT: - perf_evsel__bp_name(evsel, bf, sizeof(bf)); + evsel__bp_name(evsel, bf, sizeof(bf)); break; default: @@ -649,7 +648,7 @@ out_unknown: return "unknown"; } -const char *perf_evsel__group_name(struct evsel *evsel) +const char *evsel__group_name(struct evsel *evsel) { return evsel->group_name ?: "anon group"; } @@ -664,21 +663,19 @@ const char *perf_evsel__group_name(struct evsel *evsel) * For record -e 'cycles,instructions' and report --group * 'cycles:u, instructions:u' */ -int perf_evsel__group_desc(struct evsel *evsel, char *buf, size_t size) +int evsel__group_desc(struct evsel *evsel, char *buf, size_t size) { int ret = 0; struct evsel *pos; - const char *group_name = perf_evsel__group_name(evsel); + const char *group_name = evsel__group_name(evsel); if (!evsel->forced_leader) ret = scnprintf(buf, size, "%s { ", group_name); - ret += scnprintf(buf + ret, size - ret, "%s", - perf_evsel__name(evsel)); + ret += scnprintf(buf + ret, size - ret, "%s", evsel__name(evsel)); for_each_group_member(pos, evsel) - ret += scnprintf(buf + ret, size - ret, ", %s", - perf_evsel__name(pos)); + ret += scnprintf(buf + ret, size - ret, ", %s", evsel__name(pos)); if (!evsel->forced_leader) ret += scnprintf(buf + ret, size - ret, " }"); @@ -686,14 +683,13 @@ int perf_evsel__group_desc(struct evsel *evsel, char *buf, size_t size) return ret; } -static void __perf_evsel__config_callchain(struct evsel *evsel, - struct record_opts *opts, - struct callchain_param *param) +static void __evsel__config_callchain(struct evsel *evsel, struct record_opts *opts, + struct callchain_param *param) { - bool function = perf_evsel__is_function_event(evsel); + bool function = evsel__is_function_event(evsel); struct perf_event_attr *attr = &evsel->core.attr; - perf_evsel__set_sample_bit(evsel, CALLCHAIN); + evsel__set_sample_bit(evsel, CALLCHAIN); attr->sample_max_stack = param->max_stack; @@ -708,7 +704,7 @@ static void __perf_evsel__config_callchain(struct evsel *evsel, "to get user callchain information. " "Falling back to framepointers.\n"); } else { - perf_evsel__set_sample_bit(evsel, BRANCH_STACK); + evsel__set_sample_bit(evsel, BRANCH_STACK); attr->branch_sample_type = PERF_SAMPLE_BRANCH_USER | PERF_SAMPLE_BRANCH_CALL_STACK | PERF_SAMPLE_BRANCH_NO_CYCLES | @@ -722,8 +718,8 @@ static void __perf_evsel__config_callchain(struct evsel *evsel, if (param->record_mode == CALLCHAIN_DWARF) { if (!function) { - perf_evsel__set_sample_bit(evsel, REGS_USER); - perf_evsel__set_sample_bit(evsel, STACK_USER); + evsel__set_sample_bit(evsel, REGS_USER); + evsel__set_sample_bit(evsel, STACK_USER); if (opts->sample_user_regs && DWARF_MINIMAL_REGS != PERF_REGS_MASK) { attr->sample_regs_user |= DWARF_MINIMAL_REGS; pr_warning("WARNING: The use of --call-graph=dwarf may require all the user registers, " @@ -746,12 +742,11 @@ static void __perf_evsel__config_callchain(struct evsel *evsel, } } -void perf_evsel__config_callchain(struct evsel *evsel, - struct record_opts *opts, - struct callchain_param *param) +void evsel__config_callchain(struct evsel *evsel, struct record_opts *opts, + struct callchain_param *param) { if (param->enabled) - return __perf_evsel__config_callchain(evsel, opts, param); + return __evsel__config_callchain(evsel, opts, param); } static void @@ -760,16 +755,16 @@ perf_evsel__reset_callgraph(struct evsel *evsel, { struct perf_event_attr *attr = &evsel->core.attr; - perf_evsel__reset_sample_bit(evsel, CALLCHAIN); + evsel__reset_sample_bit(evsel, CALLCHAIN); if (param->record_mode == CALLCHAIN_LBR) { - perf_evsel__reset_sample_bit(evsel, BRANCH_STACK); + evsel__reset_sample_bit(evsel, BRANCH_STACK); attr->branch_sample_type &= ~(PERF_SAMPLE_BRANCH_USER | PERF_SAMPLE_BRANCH_CALL_STACK | PERF_SAMPLE_BRANCH_HW_INDEX); } if (param->record_mode == CALLCHAIN_DWARF) { - perf_evsel__reset_sample_bit(evsel, REGS_USER); - perf_evsel__reset_sample_bit(evsel, STACK_USER); + evsel__reset_sample_bit(evsel, REGS_USER); + evsel__reset_sample_bit(evsel, STACK_USER); } } @@ -793,32 +788,32 @@ static void apply_config_terms(struct evsel *evsel, if (!(term->weak && opts->user_interval != ULLONG_MAX)) { attr->sample_period = term->val.period; attr->freq = 0; - perf_evsel__reset_sample_bit(evsel, PERIOD); + evsel__reset_sample_bit(evsel, PERIOD); } break; case PERF_EVSEL__CONFIG_TERM_FREQ: if (!(term->weak && opts->user_freq != UINT_MAX)) { attr->sample_freq = term->val.freq; attr->freq = 1; - perf_evsel__set_sample_bit(evsel, PERIOD); + evsel__set_sample_bit(evsel, PERIOD); } break; case PERF_EVSEL__CONFIG_TERM_TIME: if (term->val.time) - perf_evsel__set_sample_bit(evsel, TIME); + evsel__set_sample_bit(evsel, TIME); else - perf_evsel__reset_sample_bit(evsel, TIME); + evsel__reset_sample_bit(evsel, TIME); break; case PERF_EVSEL__CONFIG_TERM_CALLGRAPH: callgraph_buf = term->val.str; break; case PERF_EVSEL__CONFIG_TERM_BRANCH: if (term->val.str && strcmp(term->val.str, "no")) { - perf_evsel__set_sample_bit(evsel, BRANCH_STACK); + evsel__set_sample_bit(evsel, BRANCH_STACK); parse_branch_str(term->val.str, &attr->branch_sample_type); } else - perf_evsel__reset_sample_bit(evsel, BRANCH_STACK); + evsel__reset_sample_bit(evsel, BRANCH_STACK); break; case PERF_EVSEL__CONFIG_TERM_STACK_USER: dump_size = term->val.stack_user; @@ -832,7 +827,7 @@ static void apply_config_terms(struct evsel *evsel, case PERF_EVSEL__CONFIG_TERM_INHERIT: /* * attr->inherit should has already been set by - * perf_evsel__config. If user explicitly set + * evsel__config. If user explicitly set * inherit using config terms, override global * opt->no_inherit setting. */ @@ -897,11 +892,11 @@ static void apply_config_terms(struct evsel *evsel, /* set perf-event callgraph */ if (param.enabled) { if (sample_address) { - perf_evsel__set_sample_bit(evsel, ADDR); - perf_evsel__set_sample_bit(evsel, DATA_SRC); + evsel__set_sample_bit(evsel, ADDR); + evsel__set_sample_bit(evsel, DATA_SRC); evsel->core.attr.mmap_data = track; } - perf_evsel__config_callchain(evsel, opts, ¶m); + evsel__config_callchain(evsel, opts, ¶m); } } } @@ -953,8 +948,8 @@ struct perf_evsel_config_term *__perf_evsel__get_config_term(struct evsel *evsel * enable/disable events specifically, as there's no * initial traced exec call. */ -void perf_evsel__config(struct evsel *evsel, struct record_opts *opts, - struct callchain_param *callchain) +void evsel__config(struct evsel *evsel, struct record_opts *opts, + struct callchain_param *callchain) { struct evsel *leader = evsel->leader; struct perf_event_attr *attr = &evsel->core.attr; @@ -965,17 +960,17 @@ void perf_evsel__config(struct evsel *evsel, struct record_opts *opts, attr->inherit = !opts->no_inherit; attr->write_backward = opts->overwrite ? 1 : 0; - perf_evsel__set_sample_bit(evsel, IP); - perf_evsel__set_sample_bit(evsel, TID); + evsel__set_sample_bit(evsel, IP); + evsel__set_sample_bit(evsel, TID); if (evsel->sample_read) { - perf_evsel__set_sample_bit(evsel, READ); + evsel__set_sample_bit(evsel, READ); /* * We need ID even in case of single event, because * PERF_SAMPLE_READ process ID specific data. */ - perf_evsel__set_sample_id(evsel, false); + evsel__set_sample_id(evsel, false); /* * Apply group format only if we belong to group @@ -994,7 +989,7 @@ void perf_evsel__config(struct evsel *evsel, struct record_opts *opts, if (!attr->sample_period || (opts->user_freq != UINT_MAX || opts->user_interval != ULLONG_MAX)) { if (opts->freq) { - perf_evsel__set_sample_bit(evsel, PERIOD); + evsel__set_sample_bit(evsel, PERIOD); attr->freq = 1; attr->sample_freq = opts->freq; } else { @@ -1002,25 +997,6 @@ void perf_evsel__config(struct evsel *evsel, struct record_opts *opts, } } - /* - * Disable sampling for all group members other - * than leader in case leader 'leads' the sampling. - */ - if ((leader != evsel) && leader->sample_read) { - attr->freq = 0; - attr->sample_freq = 0; - attr->sample_period = 0; - attr->write_backward = 0; - - /* - * We don't get sample for slave events, we make them - * when delivering group leader sample. Set the slave - * event to follow the master sample_type to ease up - * report. - */ - attr->sample_type = leader->core.attr.sample_type; - } - if (opts->no_samples) attr->sample_freq = 0; @@ -1033,7 +1009,7 @@ void perf_evsel__config(struct evsel *evsel, struct record_opts *opts, } if (opts->sample_address) { - perf_evsel__set_sample_bit(evsel, ADDR); + evsel__set_sample_bit(evsel, ADDR); attr->mmap_data = track; } @@ -1042,24 +1018,24 @@ void perf_evsel__config(struct evsel *evsel, struct record_opts *opts, * event, due to issues with page faults while tracing page * fault handler and its overall trickiness nature. */ - if (perf_evsel__is_function_event(evsel)) + if (evsel__is_function_event(evsel)) evsel->core.attr.exclude_callchain_user = 1; if (callchain && callchain->enabled && !evsel->no_aux_samples) - perf_evsel__config_callchain(evsel, opts, callchain); + evsel__config_callchain(evsel, opts, callchain); if (opts->sample_intr_regs) { attr->sample_regs_intr = opts->sample_intr_regs; - perf_evsel__set_sample_bit(evsel, REGS_INTR); + evsel__set_sample_bit(evsel, REGS_INTR); } if (opts->sample_user_regs) { attr->sample_regs_user |= opts->sample_user_regs; - perf_evsel__set_sample_bit(evsel, REGS_USER); + evsel__set_sample_bit(evsel, REGS_USER); } if (target__has_cpu(&opts->target) || opts->sample_cpu) - perf_evsel__set_sample_bit(evsel, CPU); + evsel__set_sample_bit(evsel, CPU); /* * When the user explicitly disabled time don't force it here. @@ -1068,31 +1044,31 @@ void perf_evsel__config(struct evsel *evsel, struct record_opts *opts, (!perf_missing_features.sample_id_all && (!opts->no_inherit || target__has_cpu(&opts->target) || per_cpu || opts->sample_time_set))) - perf_evsel__set_sample_bit(evsel, TIME); + evsel__set_sample_bit(evsel, TIME); if (opts->raw_samples && !evsel->no_aux_samples) { - perf_evsel__set_sample_bit(evsel, TIME); - perf_evsel__set_sample_bit(evsel, RAW); - perf_evsel__set_sample_bit(evsel, CPU); + evsel__set_sample_bit(evsel, TIME); + evsel__set_sample_bit(evsel, RAW); + evsel__set_sample_bit(evsel, CPU); } if (opts->sample_address) - perf_evsel__set_sample_bit(evsel, DATA_SRC); + evsel__set_sample_bit(evsel, DATA_SRC); if (opts->sample_phys_addr) - perf_evsel__set_sample_bit(evsel, PHYS_ADDR); + evsel__set_sample_bit(evsel, PHYS_ADDR); if (opts->no_buffering) { attr->watermark = 0; attr->wakeup_events = 1; } if (opts->branch_stack && !evsel->no_aux_samples) { - perf_evsel__set_sample_bit(evsel, BRANCH_STACK); + evsel__set_sample_bit(evsel, BRANCH_STACK); attr->branch_sample_type = opts->branch_stack; } if (opts->sample_weight) - perf_evsel__set_sample_bit(evsel, WEIGHT); + evsel__set_sample_bit(evsel, WEIGHT); attr->task = track; attr->mmap = track; @@ -1106,14 +1082,14 @@ void perf_evsel__config(struct evsel *evsel, struct record_opts *opts, if (opts->record_cgroup) { attr->cgroup = track && !perf_missing_features.cgroup; - perf_evsel__set_sample_bit(evsel, CGROUP); + evsel__set_sample_bit(evsel, CGROUP); } if (opts->record_switch_events) attr->context_switch = track; if (opts->sample_transaction) - perf_evsel__set_sample_bit(evsel, TRANSACTION); + evsel__set_sample_bit(evsel, TRANSACTION); if (opts->running_time) { evsel->core.attr.read_format |= @@ -1127,15 +1103,15 @@ void perf_evsel__config(struct evsel *evsel, struct record_opts *opts, * Disabling only independent events or group leaders, * keeping group members enabled. */ - if (perf_evsel__is_group_leader(evsel)) + if (evsel__is_group_leader(evsel)) attr->disabled = 1; /* * Setting enable_on_exec for independent events and * group leaders for traced executed by perf. */ - if (target__none(&opts->target) && perf_evsel__is_group_leader(evsel) && - !opts->initial_delay) + if (target__none(&opts->target) && evsel__is_group_leader(evsel) && + !opts->initial_delay) attr->enable_on_exec = 1; if (evsel->immediate) { @@ -1176,9 +1152,9 @@ void perf_evsel__config(struct evsel *evsel, struct record_opts *opts, /* The --period option takes the precedence. */ if (opts->period_set) { if (opts->period) - perf_evsel__set_sample_bit(evsel, PERIOD); + evsel__set_sample_bit(evsel, PERIOD); else - perf_evsel__reset_sample_bit(evsel, PERIOD); + evsel__reset_sample_bit(evsel, PERIOD); } /* @@ -1187,10 +1163,10 @@ void perf_evsel__config(struct evsel *evsel, struct record_opts *opts, * if BRANCH_STACK bit is set. */ if (opts->initial_delay && is_dummy_event(evsel)) - perf_evsel__reset_sample_bit(evsel, BRANCH_STACK); + evsel__reset_sample_bit(evsel, BRANCH_STACK); } -int perf_evsel__set_filter(struct evsel *evsel, const char *filter) +int evsel__set_filter(struct evsel *evsel, const char *filter) { char *new_filter = strdup(filter); @@ -1203,13 +1179,12 @@ int perf_evsel__set_filter(struct evsel *evsel, const char *filter) return -1; } -static int perf_evsel__append_filter(struct evsel *evsel, - const char *fmt, const char *filter) +static int evsel__append_filter(struct evsel *evsel, const char *fmt, const char *filter) { char *new_filter; if (evsel->filter == NULL) - return perf_evsel__set_filter(evsel, filter); + return evsel__set_filter(evsel, filter); if (asprintf(&new_filter, fmt, evsel->filter, filter) > 0) { free(evsel->filter); @@ -1220,14 +1195,14 @@ static int perf_evsel__append_filter(struct evsel *evsel, return -1; } -int perf_evsel__append_tp_filter(struct evsel *evsel, const char *filter) +int evsel__append_tp_filter(struct evsel *evsel, const char *filter) { - return perf_evsel__append_filter(evsel, "(%s) && (%s)", filter); + return evsel__append_filter(evsel, "(%s) && (%s)", filter); } -int perf_evsel__append_addr_filter(struct evsel *evsel, const char *filter) +int evsel__append_addr_filter(struct evsel *evsel, const char *filter) { - return perf_evsel__append_filter(evsel, "%s,%s", filter); + return evsel__append_filter(evsel, "%s,%s", filter); } /* Caller has to clear disabled after going through all CPUs. */ @@ -1278,7 +1253,7 @@ static void perf_evsel__free_config_terms(struct evsel *evsel) } } -void perf_evsel__exit(struct evsel *evsel) +void evsel__exit(struct evsel *evsel) { assert(list_empty(&evsel->core.node)); assert(evsel->evlist == NULL); @@ -1298,12 +1273,12 @@ void perf_evsel__exit(struct evsel *evsel) void evsel__delete(struct evsel *evsel) { - perf_evsel__exit(evsel); + evsel__exit(evsel); free(evsel); } -void perf_evsel__compute_deltas(struct evsel *evsel, int cpu, int thread, - struct perf_counts_values *count) +void evsel__compute_deltas(struct evsel *evsel, int cpu, int thread, + struct perf_counts_values *count) { struct perf_counts_values tmp; @@ -1342,8 +1317,7 @@ void perf_counts_values__scale(struct perf_counts_values *count, *pscaled = scaled; } -static int -perf_evsel__read_one(struct evsel *evsel, int cpu, int thread) +static int evsel__read_one(struct evsel *evsel, int cpu, int thread) { struct perf_counts_values *count = perf_counts(evsel->counts, cpu, thread); @@ -1403,8 +1377,7 @@ perf_evsel__process_group_data(struct evsel *leader, return 0; } -static int -perf_evsel__read_group(struct evsel *leader, int cpu, int thread) +static int evsel__read_group(struct evsel *leader, int cpu, int thread) { struct perf_stat_evsel *ps = leader->stats; u64 read_format = leader->core.attr.read_format; @@ -1414,7 +1387,7 @@ perf_evsel__read_group(struct evsel *leader, int cpu, int thread) if (!(read_format & PERF_FORMAT_ID)) return -EINVAL; - if (!perf_evsel__is_group_leader(leader)) + if (!evsel__is_group_leader(leader)) return -EINVAL; if (!data) { @@ -1434,18 +1407,17 @@ perf_evsel__read_group(struct evsel *leader, int cpu, int thread) return perf_evsel__process_group_data(leader, cpu, thread, data); } -int perf_evsel__read_counter(struct evsel *evsel, int cpu, int thread) +int evsel__read_counter(struct evsel *evsel, int cpu, int thread) { u64 read_format = evsel->core.attr.read_format; if (read_format & PERF_FORMAT_GROUP) - return perf_evsel__read_group(evsel, cpu, thread); - else - return perf_evsel__read_one(evsel, cpu, thread); + return evsel__read_group(evsel, cpu, thread); + + return evsel__read_one(evsel, cpu, thread); } -int __perf_evsel__read_on_cpu(struct evsel *evsel, - int cpu, int thread, bool scale) +int __evsel__read_on_cpu(struct evsel *evsel, int cpu, int thread, bool scale) { struct perf_counts_values count; size_t nv = scale ? 3 : 1; @@ -1459,7 +1431,7 @@ int __perf_evsel__read_on_cpu(struct evsel *evsel, if (readn(FD(evsel, cpu, thread), &count, nv * sizeof(u64)) <= 0) return -errno; - perf_evsel__compute_deltas(evsel, cpu, thread, &count); + evsel__compute_deltas(evsel, cpu, thread, &count); perf_counts_values__scale(&count, scale, NULL); *perf_counts(evsel->counts, cpu, thread) = count; return 0; @@ -1470,7 +1442,7 @@ static int get_group_fd(struct evsel *evsel, int cpu, int thread) struct evsel *leader = evsel->leader; int fd; - if (perf_evsel__is_group_leader(evsel)) + if (evsel__is_group_leader(evsel)) return -1; /* @@ -1749,8 +1721,7 @@ retry_open: /* * If we succeeded but had to kill clockid, fail and - * have perf_evsel__open_strerror() print us a nice - * error. + * have evsel__open_strerror() print us a nice error. */ if (perf_missing_features.clockid || perf_missing_features.clockid_wrong) { @@ -1854,7 +1825,7 @@ try_fallback: } else if (!perf_missing_features.group_read && evsel->core.attr.inherit && (evsel->core.attr.read_format & PERF_FORMAT_GROUP) && - perf_evsel__is_group_leader(evsel)) { + evsel__is_group_leader(evsel)) { perf_missing_features.group_read = true; pr_debug2_peo("switching off group read\n"); goto fallback_missing_features; @@ -1888,9 +1859,7 @@ void evsel__close(struct evsel *evsel) perf_evsel__free_id(&evsel->core); } -int perf_evsel__open_per_cpu(struct evsel *evsel, - struct perf_cpu_map *cpus, - int cpu) +int evsel__open_per_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, int cpu) { if (cpu == -1) return evsel__open_cpu(evsel, cpus, NULL, 0, @@ -1899,8 +1868,7 @@ int perf_evsel__open_per_cpu(struct evsel *evsel, return evsel__open_cpu(evsel, cpus, NULL, cpu, cpu + 1); } -int perf_evsel__open_per_thread(struct evsel *evsel, - struct perf_thread_map *threads) +int evsel__open_per_thread(struct evsel *evsel, struct perf_thread_map *threads) { return evsel__open(evsel, NULL, threads); } @@ -1995,8 +1963,8 @@ perf_event__check_size(union perf_event *event, unsigned int sample_size) return 0; } -int perf_evsel__parse_sample(struct evsel *evsel, union perf_event *event, - struct perf_sample *data) +int evsel__parse_sample(struct evsel *evsel, union perf_event *event, + struct perf_sample *data) { u64 type = evsel->core.attr.sample_type; bool swapped = evsel->needs_swap; @@ -2136,7 +2104,7 @@ int perf_evsel__parse_sample(struct evsel *evsel, union perf_event *event, } } - if (evsel__has_callchain(evsel)) { + if (type & PERF_SAMPLE_CALLCHAIN) { const u64 max_callchain_nr = UINT64_MAX / sizeof(u64); OVERFLOW_CHECK_u64(array); @@ -2190,7 +2158,7 @@ int perf_evsel__parse_sample(struct evsel *evsel, union perf_event *event, return -EFAULT; sz = data->branch_stack->nr * sizeof(struct branch_entry); - if (perf_evsel__has_branch_hw_idx(evsel)) + if (evsel__has_branch_hw_idx(evsel)) sz += sizeof(u64); else data->no_hw_idx = true; @@ -2298,9 +2266,8 @@ int perf_evsel__parse_sample(struct evsel *evsel, union perf_event *event, return 0; } -int perf_evsel__parse_sample_timestamp(struct evsel *evsel, - union perf_event *event, - u64 *timestamp) +int evsel__parse_sample_timestamp(struct evsel *evsel, union perf_event *event, + u64 *timestamp) { u64 type = evsel->core.attr.sample_type; const __u64 *array; @@ -2342,15 +2309,14 @@ int perf_evsel__parse_sample_timestamp(struct evsel *evsel, return 0; } -struct tep_format_field *perf_evsel__field(struct evsel *evsel, const char *name) +struct tep_format_field *evsel__field(struct evsel *evsel, const char *name) { return tep_find_field(evsel->tp_format, name); } -void *perf_evsel__rawptr(struct evsel *evsel, struct perf_sample *sample, - const char *name) +void *evsel__rawptr(struct evsel *evsel, struct perf_sample *sample, const char *name) { - struct tep_format_field *field = perf_evsel__field(evsel, name); + struct tep_format_field *field = evsel__field(evsel, name); int offset; if (!field) @@ -2405,10 +2371,9 @@ u64 format_field__intval(struct tep_format_field *field, struct perf_sample *sam return 0; } -u64 perf_evsel__intval(struct evsel *evsel, struct perf_sample *sample, - const char *name) +u64 evsel__intval(struct evsel *evsel, struct perf_sample *sample, const char *name) { - struct tep_format_field *field = perf_evsel__field(evsel, name); + struct tep_format_field *field = evsel__field(evsel, name); if (!field) return 0; @@ -2416,8 +2381,7 @@ u64 perf_evsel__intval(struct evsel *evsel, struct perf_sample *sample, return field ? format_field__intval(field, sample, evsel->needs_swap) : 0; } -bool perf_evsel__fallback(struct evsel *evsel, int err, - char *msg, size_t msgsize) +bool evsel__fallback(struct evsel *evsel, int err, char *msg, size_t msgsize) { int paranoid; @@ -2442,10 +2406,14 @@ bool perf_evsel__fallback(struct evsel *evsel, int err, return true; } else if (err == EACCES && !evsel->core.attr.exclude_kernel && (paranoid = perf_event_paranoid()) > 1) { - const char *name = perf_evsel__name(evsel); + const char *name = evsel__name(evsel); char *new_name; const char *sep = ":"; + /* If event has exclude user then don't exclude kernel. */ + if (evsel->core.attr.exclude_user) + return false; + /* Is there already the separator in the name. */ if (strchr(name, '/') || strchr(name, ':')) @@ -2505,8 +2473,8 @@ static bool find_process(const char *name) return ret ? false : true; } -int perf_evsel__open_strerror(struct evsel *evsel, struct target *target, - int err, char *msg, size_t size) +int evsel__open_strerror(struct evsel *evsel, struct target *target, + int err, char *msg, size_t size) { char sbuf[STRERR_BUFSIZE]; int printed = 0; @@ -2516,28 +2484,26 @@ int perf_evsel__open_strerror(struct evsel *evsel, struct target *target, case EACCES: if (err == EPERM) printed = scnprintf(msg, size, - "No permission to enable %s event.\n\n", - perf_evsel__name(evsel)); + "No permission to enable %s event.\n\n", evsel__name(evsel)); return scnprintf(msg + printed, size - printed, "You may not have permission to collect %sstats.\n\n" "Consider tweaking /proc/sys/kernel/perf_event_paranoid,\n" "which controls use of the performance events system by\n" - "unprivileged users (without CAP_SYS_ADMIN).\n\n" + "unprivileged users (without CAP_PERFMON or CAP_SYS_ADMIN).\n\n" "The current value is %d:\n\n" " -1: Allow use of (almost) all events by all users\n" " Ignore mlock limit after perf_event_mlock_kb without CAP_IPC_LOCK\n" - ">= 0: Disallow ftrace function tracepoint by users without CAP_SYS_ADMIN\n" - " Disallow raw tracepoint access by users without CAP_SYS_ADMIN\n" - ">= 1: Disallow CPU event access by users without CAP_SYS_ADMIN\n" - ">= 2: Disallow kernel profiling by users without CAP_SYS_ADMIN\n\n" + ">= 0: Disallow ftrace function tracepoint by users without CAP_PERFMON or CAP_SYS_ADMIN\n" + " Disallow raw tracepoint access by users without CAP_SYS_PERFMON or CAP_SYS_ADMIN\n" + ">= 1: Disallow CPU event access by users without CAP_PERFMON or CAP_SYS_ADMIN\n" + ">= 2: Disallow kernel profiling by users without CAP_PERFMON or CAP_SYS_ADMIN\n\n" "To make this setting permanent, edit /etc/sysctl.conf too, e.g.:\n\n" " kernel.perf_event_paranoid = -1\n" , target->system_wide ? "system-wide " : "", perf_event_paranoid()); case ENOENT: - return scnprintf(msg, size, "The %s event is not supported.", - perf_evsel__name(evsel)); + return scnprintf(msg, size, "The %s event is not supported.", evsel__name(evsel)); case EMFILE: return scnprintf(msg, size, "%s", "Too many events are opened.\n" @@ -2561,7 +2527,7 @@ int perf_evsel__open_strerror(struct evsel *evsel, struct target *target, if (evsel->core.attr.sample_period != 0) return scnprintf(msg, size, "%s: PMU Hardware doesn't support sampling/overflow-interrupts. Try 'perf stat'", - perf_evsel__name(evsel)); + evsel__name(evsel)); if (evsel->core.attr.precise_ip) return scnprintf(msg, size, "%s", "\'precise\' request may not be supported. Try removing 'p' modifier."); @@ -2594,11 +2560,10 @@ int perf_evsel__open_strerror(struct evsel *evsel, struct target *target, return scnprintf(msg, size, "The sys_perf_event_open() syscall returned with %d (%s) for event (%s).\n" "/bin/dmesg | grep -i perf may provide additional information.\n", - err, str_error_r(err, sbuf, sizeof(sbuf)), - perf_evsel__name(evsel)); + err, str_error_r(err, sbuf, sizeof(sbuf)), evsel__name(evsel)); } -struct perf_env *perf_evsel__env(struct evsel *evsel) +struct perf_env *evsel__env(struct evsel *evsel) { if (evsel && evsel->evlist) return evsel->evlist->env; @@ -2623,7 +2588,7 @@ static int store_evsel_ids(struct evsel *evsel, struct evlist *evlist) return 0; } -int perf_evsel__store_ids(struct evsel *evsel, struct evlist *evlist) +int evsel__store_ids(struct evsel *evsel, struct evlist *evlist) { struct perf_cpu_map *cpus = evsel->core.cpus; struct perf_thread_map *threads = evsel->core.threads; diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 53187c501ee8..351c0aaf2a11 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -18,7 +18,7 @@ struct perf_counts; struct perf_stat_evsel; union perf_event; -typedef int (perf_evsel__sb_cb_t)(union perf_event *event, void *data); +typedef int (evsel__sb_cb_t)(union perf_event *event, void *data); enum perf_tool_event { PERF_TOOL_NONE = 0, @@ -101,9 +101,17 @@ struct evsel { int cpu_iter; const char *pmu_name; struct { - perf_evsel__sb_cb_t *cb; - void *data; + evsel__sb_cb_t *cb; + void *data; } side_band; + /* + * For reporting purposes, an evsel sample can have a callchain + * synthesized from AUX area data. Keep track of synthesized sample + * types here. Note, the recorded sample_type cannot be changed because + * it is needed to continue to parse events. + * See also evsel__has_callchain(). + */ + __u64 synth_sample_type; }; struct perf_missing_features { @@ -135,7 +143,7 @@ static inline struct perf_cpu_map *evsel__cpus(struct evsel *evsel) return perf_evsel__cpus(&evsel->core); } -static inline int perf_evsel__nr_cpus(struct evsel *evsel) +static inline int evsel__nr_cpus(struct evsel *evsel) { return evsel__cpus(evsel)->nr; } @@ -143,13 +151,16 @@ static inline int perf_evsel__nr_cpus(struct evsel *evsel) void perf_counts_values__scale(struct perf_counts_values *count, bool scale, s8 *pscaled); -void perf_evsel__compute_deltas(struct evsel *evsel, int cpu, int thread, - struct perf_counts_values *count); +void evsel__compute_deltas(struct evsel *evsel, int cpu, int thread, + struct perf_counts_values *count); int perf_evsel__object_config(size_t object_size, int (*init)(struct evsel *evsel), void (*fini)(struct evsel *evsel)); +struct perf_pmu *evsel__find_pmu(struct evsel *evsel); +bool evsel__is_aux_event(struct evsel *evsel); + struct evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx); static inline struct evsel *evsel__new(struct perf_event_attr *attr) @@ -172,22 +183,20 @@ struct evsel *perf_evsel__new_cycles(bool precise); struct tep_event *event_format__new(const char *sys, const char *name); void evsel__init(struct evsel *evsel, struct perf_event_attr *attr, int idx); -void perf_evsel__exit(struct evsel *evsel); +void evsel__exit(struct evsel *evsel); void evsel__delete(struct evsel *evsel); struct callchain_param; -void perf_evsel__config(struct evsel *evsel, - struct record_opts *opts, - struct callchain_param *callchain); -void perf_evsel__config_callchain(struct evsel *evsel, - struct record_opts *opts, - struct callchain_param *callchain); +void evsel__config(struct evsel *evsel, struct record_opts *opts, + struct callchain_param *callchain); +void evsel__config_callchain(struct evsel *evsel, struct record_opts *opts, + struct callchain_param *callchain); -int __perf_evsel__sample_size(u64 sample_type); -void perf_evsel__calc_id_pos(struct evsel *evsel); +int __evsel__sample_size(u64 sample_type); +void evsel__calc_id_pos(struct evsel *evsel); -bool perf_evsel__is_cache_op_valid(u8 type, u8 op); +bool evsel__is_cache_op_valid(u8 type, u8 op); #define PERF_EVSEL__MAX_ALIASES 8 @@ -199,177 +208,153 @@ extern const char *perf_evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX] [PERF_EVSEL__MAX_ALIASES]; extern const char *perf_evsel__hw_names[PERF_COUNT_HW_MAX]; extern const char *perf_evsel__sw_names[PERF_COUNT_SW_MAX]; -int __perf_evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result, - char *bf, size_t size); -const char *perf_evsel__name(struct evsel *evsel); +int __evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result, char *bf, size_t size); +const char *evsel__name(struct evsel *evsel); -const char *perf_evsel__group_name(struct evsel *evsel); -int perf_evsel__group_desc(struct evsel *evsel, char *buf, size_t size); +const char *evsel__group_name(struct evsel *evsel); +int evsel__group_desc(struct evsel *evsel, char *buf, size_t size); -void __perf_evsel__set_sample_bit(struct evsel *evsel, - enum perf_event_sample_format bit); -void __perf_evsel__reset_sample_bit(struct evsel *evsel, - enum perf_event_sample_format bit); +void __evsel__set_sample_bit(struct evsel *evsel, enum perf_event_sample_format bit); +void __evsel__reset_sample_bit(struct evsel *evsel, enum perf_event_sample_format bit); -#define perf_evsel__set_sample_bit(evsel, bit) \ - __perf_evsel__set_sample_bit(evsel, PERF_SAMPLE_##bit) +#define evsel__set_sample_bit(evsel, bit) \ + __evsel__set_sample_bit(evsel, PERF_SAMPLE_##bit) -#define perf_evsel__reset_sample_bit(evsel, bit) \ - __perf_evsel__reset_sample_bit(evsel, PERF_SAMPLE_##bit) +#define evsel__reset_sample_bit(evsel, bit) \ + __evsel__reset_sample_bit(evsel, PERF_SAMPLE_##bit) -void perf_evsel__set_sample_id(struct evsel *evsel, - bool use_sample_identifier); +void evsel__set_sample_id(struct evsel *evsel, bool use_sample_identifier); -int perf_evsel__set_filter(struct evsel *evsel, const char *filter); -int perf_evsel__append_tp_filter(struct evsel *evsel, const char *filter); -int perf_evsel__append_addr_filter(struct evsel *evsel, - const char *filter); +int evsel__set_filter(struct evsel *evsel, const char *filter); +int evsel__append_tp_filter(struct evsel *evsel, const char *filter); +int evsel__append_addr_filter(struct evsel *evsel, const char *filter); int evsel__enable_cpu(struct evsel *evsel, int cpu); int evsel__enable(struct evsel *evsel); int evsel__disable(struct evsel *evsel); int evsel__disable_cpu(struct evsel *evsel, int cpu); -int perf_evsel__open_per_cpu(struct evsel *evsel, - struct perf_cpu_map *cpus, - int cpu); -int perf_evsel__open_per_thread(struct evsel *evsel, - struct perf_thread_map *threads); +int evsel__open_per_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, int cpu); +int evsel__open_per_thread(struct evsel *evsel, struct perf_thread_map *threads); int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus, struct perf_thread_map *threads); void evsel__close(struct evsel *evsel); struct perf_sample; -void *perf_evsel__rawptr(struct evsel *evsel, struct perf_sample *sample, - const char *name); -u64 perf_evsel__intval(struct evsel *evsel, struct perf_sample *sample, - const char *name); +void *evsel__rawptr(struct evsel *evsel, struct perf_sample *sample, const char *name); +u64 evsel__intval(struct evsel *evsel, struct perf_sample *sample, const char *name); -static inline char *perf_evsel__strval(struct evsel *evsel, - struct perf_sample *sample, - const char *name) +static inline char *evsel__strval(struct evsel *evsel, struct perf_sample *sample, const char *name) { - return perf_evsel__rawptr(evsel, sample, name); + return evsel__rawptr(evsel, sample, name); } struct tep_format_field; u64 format_field__intval(struct tep_format_field *field, struct perf_sample *sample, bool needs_swap); -struct tep_format_field *perf_evsel__field(struct evsel *evsel, const char *name); +struct tep_format_field *evsel__field(struct evsel *evsel, const char *name); -#define perf_evsel__match(evsel, t, c) \ +#define evsel__match(evsel, t, c) \ (evsel->core.attr.type == PERF_TYPE_##t && \ evsel->core.attr.config == PERF_COUNT_##c) -static inline bool perf_evsel__match2(struct evsel *e1, - struct evsel *e2) +static inline bool evsel__match2(struct evsel *e1, struct evsel *e2) { return (e1->core.attr.type == e2->core.attr.type) && (e1->core.attr.config == e2->core.attr.config); } -#define perf_evsel__cmp(a, b) \ - ((a) && \ - (b) && \ - (a)->core.attr.type == (b)->core.attr.type && \ - (a)->core.attr.config == (b)->core.attr.config) - -int perf_evsel__read_counter(struct evsel *evsel, int cpu, int thread); +int evsel__read_counter(struct evsel *evsel, int cpu, int thread); -int __perf_evsel__read_on_cpu(struct evsel *evsel, - int cpu, int thread, bool scale); +int __evsel__read_on_cpu(struct evsel *evsel, int cpu, int thread, bool scale); /** - * perf_evsel__read_on_cpu - Read out the results on a CPU and thread + * evsel__read_on_cpu - Read out the results on a CPU and thread * * @evsel - event selector to read value * @cpu - CPU of interest * @thread - thread of interest */ -static inline int perf_evsel__read_on_cpu(struct evsel *evsel, - int cpu, int thread) +static inline int evsel__read_on_cpu(struct evsel *evsel, int cpu, int thread) { - return __perf_evsel__read_on_cpu(evsel, cpu, thread, false); + return __evsel__read_on_cpu(evsel, cpu, thread, false); } /** - * perf_evsel__read_on_cpu_scaled - Read out the results on a CPU and thread, scaled + * evsel__read_on_cpu_scaled - Read out the results on a CPU and thread, scaled * * @evsel - event selector to read value * @cpu - CPU of interest * @thread - thread of interest */ -static inline int perf_evsel__read_on_cpu_scaled(struct evsel *evsel, - int cpu, int thread) +static inline int evsel__read_on_cpu_scaled(struct evsel *evsel, int cpu, int thread) { - return __perf_evsel__read_on_cpu(evsel, cpu, thread, true); + return __evsel__read_on_cpu(evsel, cpu, thread, true); } -int perf_evsel__parse_sample(struct evsel *evsel, union perf_event *event, - struct perf_sample *sample); +int evsel__parse_sample(struct evsel *evsel, union perf_event *event, + struct perf_sample *sample); -int perf_evsel__parse_sample_timestamp(struct evsel *evsel, - union perf_event *event, - u64 *timestamp); +int evsel__parse_sample_timestamp(struct evsel *evsel, union perf_event *event, + u64 *timestamp); -static inline struct evsel *perf_evsel__next(struct evsel *evsel) +static inline struct evsel *evsel__next(struct evsel *evsel) { return list_entry(evsel->core.node.next, struct evsel, core.node); } -static inline struct evsel *perf_evsel__prev(struct evsel *evsel) +static inline struct evsel *evsel__prev(struct evsel *evsel) { return list_entry(evsel->core.node.prev, struct evsel, core.node); } /** - * perf_evsel__is_group_leader - Return whether given evsel is a leader event + * evsel__is_group_leader - Return whether given evsel is a leader event * * @evsel - evsel selector to be tested * * Return %true if @evsel is a group leader or a stand-alone event */ -static inline bool perf_evsel__is_group_leader(const struct evsel *evsel) +static inline bool evsel__is_group_leader(const struct evsel *evsel) { return evsel->leader == evsel; } /** - * perf_evsel__is_group_event - Return whether given evsel is a group event + * evsel__is_group_event - Return whether given evsel is a group event * * @evsel - evsel selector to be tested * * Return %true iff event group view is enabled and @evsel is a actual group * leader which has other members in the group */ -static inline bool perf_evsel__is_group_event(struct evsel *evsel) +static inline bool evsel__is_group_event(struct evsel *evsel) { if (!symbol_conf.event_group) return false; - return perf_evsel__is_group_leader(evsel) && evsel->core.nr_members > 1; + return evsel__is_group_leader(evsel) && evsel->core.nr_members > 1; } -bool perf_evsel__is_function_event(struct evsel *evsel); +bool evsel__is_function_event(struct evsel *evsel); -static inline bool perf_evsel__is_bpf_output(struct evsel *evsel) +static inline bool evsel__is_bpf_output(struct evsel *evsel) { - return perf_evsel__match(evsel, SOFTWARE, SW_BPF_OUTPUT); + return evsel__match(evsel, SOFTWARE, SW_BPF_OUTPUT); } -static inline bool perf_evsel__is_clock(struct evsel *evsel) +static inline bool evsel__is_clock(struct evsel *evsel) { - return perf_evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK) || - perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK); + return evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK) || + evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK); } -bool perf_evsel__fallback(struct evsel *evsel, int err, - char *msg, size_t msgsize); -int perf_evsel__open_strerror(struct evsel *evsel, struct target *target, - int err, char *msg, size_t size); +bool evsel__fallback(struct evsel *evsel, int err, char *msg, size_t msgsize); +int evsel__open_strerror(struct evsel *evsel, struct target *target, + int err, char *msg, size_t size); -static inline int perf_evsel__group_idx(struct evsel *evsel) +static inline int evsel__group_idx(struct evsel *evsel) { return evsel->idx - evsel->leader->idx; } @@ -386,22 +371,37 @@ for ((_evsel) = _leader; \ (_evsel) && (_evsel)->leader == (_leader); \ (_evsel) = list_entry((_evsel)->core.node.next, struct evsel, core.node)) -static inline bool perf_evsel__has_branch_callstack(const struct evsel *evsel) +static inline bool evsel__has_branch_callstack(const struct evsel *evsel) { return evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_CALL_STACK; } -static inline bool perf_evsel__has_branch_hw_idx(const struct evsel *evsel) +static inline bool evsel__has_branch_hw_idx(const struct evsel *evsel) { return evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX; } static inline bool evsel__has_callchain(const struct evsel *evsel) { - return (evsel->core.attr.sample_type & PERF_SAMPLE_CALLCHAIN) != 0; + /* + * For reporting purposes, an evsel sample can have a recorded callchain + * or a callchain synthesized from AUX area data. + */ + return evsel->core.attr.sample_type & PERF_SAMPLE_CALLCHAIN || + evsel->synth_sample_type & PERF_SAMPLE_CALLCHAIN; +} + +static inline bool evsel__has_br_stack(const struct evsel *evsel) +{ + /* + * For reporting purposes, an evsel sample can have a recorded branch + * stack or a branch stack synthesized from AUX area data. + */ + return evsel->core.attr.sample_type & PERF_SAMPLE_BRANCH_STACK || + evsel->synth_sample_type & PERF_SAMPLE_BRANCH_STACK; } -struct perf_env *perf_evsel__env(struct evsel *evsel); +struct perf_env *evsel__env(struct evsel *evsel); -int perf_evsel__store_ids(struct evsel *evsel, struct evlist *evlist); +int evsel__store_ids(struct evsel *evsel, struct evlist *evlist); #endif /* __PERF_EVSEL_H */ diff --git a/tools/perf/util/evsel_config.h b/tools/perf/util/evsel_config.h index e026ab67b008..f8938916577c 100644 --- a/tools/perf/util/evsel_config.h +++ b/tools/perf/util/evsel_config.h @@ -7,7 +7,7 @@ /* * The 'struct perf_evsel_config_term' is used to pass event - * specific configuration data to perf_evsel__config routine. + * specific configuration data to evsel__config routine. * It is allocated within event parsing and attached to * perf_evsel::config_terms list head. */ diff --git a/tools/perf/util/evsel_fprintf.c b/tools/perf/util/evsel_fprintf.c index 3b4842840db0..99aed708bd5a 100644 --- a/tools/perf/util/evsel_fprintf.c +++ b/tools/perf/util/evsel_fprintf.c @@ -44,22 +44,22 @@ int perf_evsel__fprintf(struct evsel *evsel, if (details->event_group) { struct evsel *pos; - if (!perf_evsel__is_group_leader(evsel)) + if (!evsel__is_group_leader(evsel)) return 0; if (evsel->core.nr_members > 1) printed += fprintf(fp, "%s{", evsel->group_name ?: ""); - printed += fprintf(fp, "%s", perf_evsel__name(evsel)); + printed += fprintf(fp, "%s", evsel__name(evsel)); for_each_group_member(pos, evsel) - printed += fprintf(fp, ",%s", perf_evsel__name(pos)); + printed += fprintf(fp, ",%s", evsel__name(pos)); if (evsel->core.nr_members > 1) printed += fprintf(fp, "}"); goto out; } - printed += fprintf(fp, "%s", perf_evsel__name(evsel)); + printed += fprintf(fp, "%s", evsel__name(evsel)); if (details->verbose) { printed += perf_event_attr__fprintf(fp, &evsel->core.attr, diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c index fd192ddf93c1..aa631e37ad1e 100644 --- a/tools/perf/util/expr.c +++ b/tools/perf/util/expr.c @@ -3,7 +3,6 @@ #include <assert.h> #include "expr.h" #include "expr-bison.h" -#define YY_EXTRA_TYPE int #include "expr-flex.h" #ifdef PARSER_DEBUG @@ -11,7 +10,7 @@ extern int expr_debug; #endif /* Caller must make sure id is allocated */ -void expr__add_id(struct parse_ctx *ctx, const char *name, double val) +void expr__add_id(struct expr_parse_ctx *ctx, const char *name, double val) { int idx; @@ -21,20 +20,24 @@ void expr__add_id(struct parse_ctx *ctx, const char *name, double val) ctx->ids[idx].val = val; } -void expr__ctx_init(struct parse_ctx *ctx) +void expr__ctx_init(struct expr_parse_ctx *ctx) { ctx->num_ids = 0; } static int -__expr__parse(double *val, struct parse_ctx *ctx, const char *expr, - int start) +__expr__parse(double *val, struct expr_parse_ctx *ctx, const char *expr, + int start, int runtime) { + struct expr_scanner_ctx scanner_ctx = { + .start_token = start, + .runtime = runtime, + }; YY_BUFFER_STATE buffer; void *scanner; int ret; - ret = expr_lex_init_extra(start, &scanner); + ret = expr_lex_init_extra(&scanner_ctx, &scanner); if (ret) return ret; @@ -52,9 +55,9 @@ __expr__parse(double *val, struct parse_ctx *ctx, const char *expr, return ret; } -int expr__parse(double *final_val, struct parse_ctx *ctx, const char *expr) +int expr__parse(double *final_val, struct expr_parse_ctx *ctx, const char *expr, int runtime) { - return __expr__parse(final_val, ctx, expr, EXPR_PARSE) ? -1 : 0; + return __expr__parse(final_val, ctx, expr, EXPR_PARSE, runtime) ? -1 : 0; } static bool @@ -72,13 +75,13 @@ already_seen(const char *val, const char *one, const char **other, } int expr__find_other(const char *expr, const char *one, const char ***other, - int *num_other) + int *num_other, int runtime) { int err, i = 0, j = 0; - struct parse_ctx ctx; + struct expr_parse_ctx ctx; expr__ctx_init(&ctx); - err = __expr__parse(NULL, &ctx, expr, EXPR_OTHER); + err = __expr__parse(NULL, &ctx, expr, EXPR_OTHER, runtime); if (err) return -1; diff --git a/tools/perf/util/expr.h b/tools/perf/util/expr.h index 9377538f4097..87d627bb699b 100644 --- a/tools/perf/util/expr.h +++ b/tools/perf/util/expr.h @@ -5,20 +5,25 @@ #define EXPR_MAX_OTHER 20 #define MAX_PARSE_ID EXPR_MAX_OTHER -struct parse_id { +struct expr_parse_id { const char *name; double val; }; -struct parse_ctx { +struct expr_parse_ctx { int num_ids; - struct parse_id ids[MAX_PARSE_ID]; + struct expr_parse_id ids[MAX_PARSE_ID]; }; -void expr__ctx_init(struct parse_ctx *ctx); -void expr__add_id(struct parse_ctx *ctx, const char *id, double val); -int expr__parse(double *final_val, struct parse_ctx *ctx, const char *expr); +struct expr_scanner_ctx { + int start_token; + int runtime; +}; + +void expr__ctx_init(struct expr_parse_ctx *ctx); +void expr__add_id(struct expr_parse_ctx *ctx, const char *id, double val); +int expr__parse(double *final_val, struct expr_parse_ctx *ctx, const char *expr, int runtime); int expr__find_other(const char *expr, const char *one, const char ***other, - int *num_other); + int *num_other, int runtime); #endif diff --git a/tools/perf/util/expr.l b/tools/perf/util/expr.l index eaad29243c23..74b9b59b1aa5 100644 --- a/tools/perf/util/expr.l +++ b/tools/perf/util/expr.l @@ -35,7 +35,7 @@ static int value(yyscan_t scanner, int base) * Allow @ instead of / to be able to specify pmu/event/ without * conflicts with normal division. */ -static char *normalize(char *str) +static char *normalize(char *str, int runtime) { char *ret = str; char *dst = str; @@ -45,6 +45,19 @@ static char *normalize(char *str) *dst++ = '/'; else if (*str == '\\') *dst++ = *++str; + else if (*str == '?') { + char *paramval; + int i = 0; + int size = asprintf(¶mval, "%d", runtime); + + if (size < 0) + *dst++ = '0'; + else { + while (i < size) + *dst++ = paramval[i++]; + free(paramval); + } + } else *dst++ = *str; str++; @@ -54,16 +67,16 @@ static char *normalize(char *str) return ret; } -static int str(yyscan_t scanner, int token) +static int str(yyscan_t scanner, int token, int runtime) { YYSTYPE *yylval = expr_get_lval(scanner); char *text = expr_get_text(scanner); - yylval->str = normalize(strdup(text)); + yylval->str = normalize(strdup(text), runtime); if (!yylval->str) return EXPR_ERROR; - yylval->str = normalize(yylval->str); + yylval->str = normalize(yylval->str, runtime); return token; } %} @@ -72,17 +85,17 @@ number [0-9]+ sch [-,=] spec \\{sch} -sym [0-9a-zA-Z_\.:@]+ -symbol {spec}*{sym}*{spec}*{sym}* +sym [0-9a-zA-Z_\.:@?]+ +symbol {spec}*{sym}*{spec}*{sym}*{spec}*{sym} %% - { - int start_token; + struct expr_scanner_ctx *sctx = expr_get_extra(yyscanner); - start_token = expr_get_extra(yyscanner); + { + int start_token = sctx->start_token; - if (start_token) { - expr_set_extra(NULL, yyscanner); + if (sctx->start_token) { + sctx->start_token = 0; return start_token; } } @@ -93,7 +106,7 @@ if { return IF; } else { return ELSE; } #smt_on { return SMT_ON; } {number} { return value(yyscanner, 10); } -{symbol} { return str(yyscanner, ID); } +{symbol} { return str(yyscanner, ID, sctx->runtime); } "|" { return '|'; } "^" { return '^'; } "&" { return '&'; } diff --git a/tools/perf/util/expr.y b/tools/perf/util/expr.y index 4720cbe79357..cd17486c1c5d 100644 --- a/tools/perf/util/expr.y +++ b/tools/perf/util/expr.y @@ -15,7 +15,7 @@ %define api.pure full %parse-param { double *final_val } -%parse-param { struct parse_ctx *ctx } +%parse-param { struct expr_parse_ctx *ctx } %parse-param {void *scanner} %lex-param {void* scanner} @@ -39,14 +39,14 @@ %{ static void expr_error(double *final_val __maybe_unused, - struct parse_ctx *ctx __maybe_unused, + struct expr_parse_ctx *ctx __maybe_unused, void *scanner, const char *s) { pr_debug("%s\n", s); } -static int lookup_id(struct parse_ctx *ctx, char *id, double *val) +static int lookup_id(struct expr_parse_ctx *ctx, char *id, double *val) { int i; diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index acbd046bf95c..0ce47283a8a1 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -525,7 +525,7 @@ static int write_event_desc(struct feat_fd *ff, /* * write event string as passed on cmdline */ - ret = do_write_string(ff, perf_evsel__name(evsel)); + ret = do_write_string(ff, evsel__name(evsel)); if (ret < 0) return ret; /* @@ -783,8 +783,7 @@ static int write_group_desc(struct feat_fd *ff, return ret; evlist__for_each_entry(evlist, evsel) { - if (perf_evsel__is_group_leader(evsel) && - evsel->core.nr_members > 1) { + if (evsel__is_group_leader(evsel) && evsel->core.nr_members > 1) { const char *name = evsel->group_name ?: "{anon_group}"; u32 leader_idx = evsel->idx; u32 nr_members = evsel->core.nr_members; @@ -1395,6 +1394,38 @@ static int write_compressed(struct feat_fd *ff __maybe_unused, return do_write(ff, &(ff->ph->env.comp_mmap_len), sizeof(ff->ph->env.comp_mmap_len)); } +static int write_cpu_pmu_caps(struct feat_fd *ff, + struct evlist *evlist __maybe_unused) +{ + struct perf_pmu *cpu_pmu = perf_pmu__find("cpu"); + struct perf_pmu_caps *caps = NULL; + int nr_caps; + int ret; + + if (!cpu_pmu) + return -ENOENT; + + nr_caps = perf_pmu__caps_parse(cpu_pmu); + if (nr_caps < 0) + return nr_caps; + + ret = do_write(ff, &nr_caps, sizeof(nr_caps)); + if (ret < 0) + return ret; + + list_for_each_entry(caps, &cpu_pmu->caps, list) { + ret = do_write_string(ff, caps->name); + if (ret < 0) + return ret; + + ret = do_write_string(ff, caps->value); + if (ret < 0) + return ret; + } + + return ret; +} + static void print_hostname(struct feat_fd *ff, FILE *fp) { fprintf(fp, "# hostname : %s\n", ff->ph->env.hostname); @@ -1809,6 +1840,27 @@ static void print_compressed(struct feat_fd *ff, FILE *fp) ff->ph->env.comp_level, ff->ph->env.comp_ratio); } +static void print_cpu_pmu_caps(struct feat_fd *ff, FILE *fp) +{ + const char *delimiter = "# cpu pmu capabilities: "; + u32 nr_caps = ff->ph->env.nr_cpu_pmu_caps; + char *str; + + if (!nr_caps) { + fprintf(fp, "# cpu pmu capabilities: not available\n"); + return; + } + + str = ff->ph->env.cpu_pmu_caps; + while (nr_caps--) { + fprintf(fp, "%s%s", delimiter, str); + delimiter = ", "; + str += strlen(str) + 1; + } + + fprintf(fp, "\n"); +} + static void print_pmu_mappings(struct feat_fd *ff, FILE *fp) { const char *delimiter = "# pmu mappings: "; @@ -1854,14 +1906,12 @@ static void print_group_desc(struct feat_fd *ff, FILE *fp) session = container_of(ff->ph, struct perf_session, header); evlist__for_each_entry(session->evlist, evsel) { - if (perf_evsel__is_group_leader(evsel) && - evsel->core.nr_members > 1) { - fprintf(fp, "# group: %s{%s", evsel->group_name ?: "", - perf_evsel__name(evsel)); + if (evsel__is_group_leader(evsel) && evsel->core.nr_members > 1) { + fprintf(fp, "# group: %s{%s", evsel->group_name ?: "", evsel__name(evsel)); nr = evsel->core.nr_members - 1; } else if (nr) { - fprintf(fp, ",%s", perf_evsel__name(evsel)); + fprintf(fp, ",%s", evsel__name(evsel)); if (--nr == 0) fprintf(fp, "}\n"); @@ -2846,6 +2896,60 @@ static int process_compressed(struct feat_fd *ff, return 0; } +static int process_cpu_pmu_caps(struct feat_fd *ff, + void *data __maybe_unused) +{ + char *name, *value; + struct strbuf sb; + u32 nr_caps; + + if (do_read_u32(ff, &nr_caps)) + return -1; + + if (!nr_caps) { + pr_debug("cpu pmu capabilities not available\n"); + return 0; + } + + ff->ph->env.nr_cpu_pmu_caps = nr_caps; + + if (strbuf_init(&sb, 128) < 0) + return -1; + + while (nr_caps--) { + name = do_read_string(ff); + if (!name) + goto error; + + value = do_read_string(ff); + if (!value) + goto free_name; + + if (strbuf_addf(&sb, "%s=%s", name, value) < 0) + goto free_value; + + /* include a NULL character at the end */ + if (strbuf_add(&sb, "", 1) < 0) + goto free_value; + + if (!strcmp(name, "branches")) + ff->ph->env.max_branches = atoi(value); + + free(value); + free(name); + } + ff->ph->env.cpu_pmu_caps = strbuf_detach(&sb, NULL); + return 0; + +free_value: + free(value); +free_name: + free(name); +error: + strbuf_release(&sb); + return -1; +} + #define FEAT_OPR(n, func, __full_only) \ [HEADER_##n] = { \ .name = __stringify(n), \ @@ -2903,6 +3007,7 @@ const struct perf_header_feature_ops feat_ops[HEADER_LAST_FEATURE] = { FEAT_OPR(BPF_PROG_INFO, bpf_prog_info, false), FEAT_OPR(BPF_BTF, bpf_btf, false), FEAT_OPR(COMPRESSED, compressed, false), + FEAT_OPR(CPU_PMU_CAPS, cpu_pmu_caps, false), }; struct header_print_data { diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index 840f95cee349..650bd1c7a99b 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -43,6 +43,7 @@ enum { HEADER_BPF_PROG_INFO, HEADER_BPF_BTF, HEADER_COMPRESSED, + HEADER_CPU_PMU_CAPS, HEADER_LAST_FEATURE, HEADER_FEAT_BITS = 256, }; diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 283a69ff6a3d..12b65d00cf65 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -1070,6 +1070,20 @@ iter_next_cumulative_entry(struct hist_entry_iter *iter, return fill_callchain_info(al, node, iter->hide_unresolved); } +static bool +hist_entry__fast__sym_diff(struct hist_entry *left, + struct hist_entry *right) +{ + struct symbol *sym_l = left->ms.sym; + struct symbol *sym_r = right->ms.sym; + + if (!sym_l && !sym_r) + return left->ip != right->ip; + + return !!_sort__sym_cmp(sym_l, sym_r); +} + + static int iter_add_next_cumulative_entry(struct hist_entry_iter *iter, struct addr_location *al) @@ -1096,6 +1110,7 @@ iter_add_next_cumulative_entry(struct hist_entry_iter *iter, }; int i; struct callchain_cursor cursor; + bool fast = hists__has(he_tmp.hists, sym); callchain_cursor_snapshot(&cursor, &callchain_cursor); @@ -1106,6 +1121,14 @@ iter_add_next_cumulative_entry(struct hist_entry_iter *iter, * It's possible that it has cycles or recursive calls. */ for (i = 0; i < iter->curr; i++) { + /* + * For most cases, there are no duplicate entries in callchain. + * The symbols are usually different. Do a quick check for + * symbols first. + */ + if (fast && hist_entry__fast__sym_diff(he_cache[i], &he_tmp)) + continue; + if (hist_entry__cmp(he_cache[i], &he_tmp) == 0) { /* to avoid calling callback function */ iter->he = NULL; @@ -2637,7 +2660,7 @@ size_t perf_evlist__fprintf_nr_events(struct evlist *evlist, FILE *fp) size_t ret = 0; evlist__for_each_entry(evlist, pos) { - ret += fprintf(fp, "%s stats:\n", perf_evsel__name(pos)); + ret += fprintf(fp, "%s stats:\n", evsel__name(pos)); ret += events_stats__fprintf(&evsel__hists(pos)->stats, fp); } @@ -2661,7 +2684,7 @@ int __hists__scnprintf_title(struct hists *hists, char *bf, size_t size, bool sh unsigned long nr_samples = hists->stats.nr_events[PERF_RECORD_SAMPLE]; u64 nr_events = hists->stats.total_period; struct evsel *evsel = hists_to_evsel(hists); - const char *ev_name = perf_evsel__name(evsel); + const char *ev_name = evsel__name(evsel); char buf[512], sample_freq_str[64] = ""; size_t buflen = sizeof(buf); char ref[30] = " show reference callgraph, "; @@ -2672,10 +2695,10 @@ int __hists__scnprintf_title(struct hists *hists, char *bf, size_t size, bool sh nr_events = hists->stats.total_non_filtered_period; } - if (perf_evsel__is_group_event(evsel)) { + if (evsel__is_group_event(evsel)) { struct evsel *pos; - perf_evsel__group_desc(evsel, buf, buflen); + evsel__group_desc(evsel, buf, buflen); ev_name = buf; for_each_group_member(pos, evsel) { diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c index 34cb380d19a3..af1e78d76228 100644 --- a/tools/perf/util/intel-bts.c +++ b/tools/perf/util/intel-bts.c @@ -432,7 +432,7 @@ static int intel_bts_process_buffer(struct intel_bts_queue *btsq, le64_to_cpu(branch->from), le64_to_cpu(branch->to), btsq->intel_pt_insn.length, - buffer->buffer_nr + 1); + buffer->buffer_nr + 1, true, 0, 0); if (filter && !(filter & btsq->sample_flags)) continue; err = intel_bts_synth_branch_sample(btsq, branch); @@ -728,6 +728,15 @@ static void intel_bts_free(struct perf_session *session) free(bts); } +static bool intel_bts_evsel_is_auxtrace(struct perf_session *session, + struct evsel *evsel) +{ + struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts, + auxtrace); + + return evsel->core.attr.type == bts->pmu_type; +} + struct intel_bts_synth { struct perf_tool dummy_tool; struct perf_session *session; @@ -816,10 +825,10 @@ static int intel_bts_synth_events(struct intel_bts *bts, bts->branches_id = id; /* * We only use sample types from PERF_SAMPLE_MASK so we can use - * __perf_evsel__sample_size() here. + * __evsel__sample_size() here. */ bts->branches_event_size = sizeof(struct perf_record_sample) + - __perf_evsel__sample_size(attr.sample_type); + __evsel__sample_size(attr.sample_type); } return 0; @@ -883,6 +892,7 @@ int intel_bts_process_auxtrace_info(union perf_event *event, bts->auxtrace.flush_events = intel_bts_flush; bts->auxtrace.free_events = intel_bts_free_events; bts->auxtrace.free = intel_bts_free; + bts->auxtrace.evsel_is_auxtrace = intel_bts_evsel_is_auxtrace; session->auxtrace = &bts->auxtrace; intel_bts_print_info(&auxtrace_info->priv[0], INTEL_BTS_PMU_TYPE, diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c index 0ccf10a0bf44..4ce109993e74 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c @@ -552,7 +552,7 @@ static int intel_pt_do_get_packet(const unsigned char *buf, size_t len, break; default: break; - }; + } if (!(byte & BIT(0))) { if (byte == 0) diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 23c8289c2472..f17b1e769ae4 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -33,6 +33,7 @@ #include "tsc.h" #include "intel-pt.h" #include "config.h" +#include "util/perf_api_probe.h" #include "util/synthetic-events.h" #include "time-utils.h" @@ -68,6 +69,10 @@ struct intel_pt { bool est_tsc; bool sync_switch; bool mispred_all; + bool use_thread_stack; + bool callstack; + unsigned int br_stack_sz; + unsigned int br_stack_sz_plus; int have_sched_switch; u32 pmu_type; u64 kernel_start; @@ -124,6 +129,9 @@ struct intel_pt { struct range *time_ranges; unsigned int range_cnt; + + struct ip_callchain *chain; + struct branch_stack *br_stack; }; enum switch_state { @@ -143,8 +151,6 @@ struct intel_pt_queue { const struct intel_pt_state *state; struct ip_callchain *chain; struct branch_stack *last_branch; - struct branch_stack *last_branch_rb; - size_t last_branch_pos; union perf_event *event_buf; bool on_heap; bool stop; @@ -868,6 +874,83 @@ static u64 intel_pt_ns_to_ticks(const struct intel_pt *pt, u64 ns) pt->tc.time_mult; } +static struct ip_callchain *intel_pt_alloc_chain(struct intel_pt *pt) +{ + size_t sz = sizeof(struct ip_callchain); + + /* Add 1 to callchain_sz for callchain context */ + sz += (pt->synth_opts.callchain_sz + 1) * sizeof(u64); + return zalloc(sz); +} + +static int intel_pt_callchain_init(struct intel_pt *pt) +{ + struct evsel *evsel; + + evlist__for_each_entry(pt->session->evlist, evsel) { + if (!(evsel->core.attr.sample_type & PERF_SAMPLE_CALLCHAIN)) + evsel->synth_sample_type |= PERF_SAMPLE_CALLCHAIN; + } + + pt->chain = intel_pt_alloc_chain(pt); + if (!pt->chain) + return -ENOMEM; + + return 0; +} + +static void intel_pt_add_callchain(struct intel_pt *pt, + struct perf_sample *sample) +{ + struct thread *thread = machine__findnew_thread(pt->machine, + sample->pid, + sample->tid); + + thread_stack__sample_late(thread, sample->cpu, pt->chain, + pt->synth_opts.callchain_sz + 1, sample->ip, + pt->kernel_start); + + sample->callchain = pt->chain; +} + +static struct branch_stack *intel_pt_alloc_br_stack(struct intel_pt *pt) +{ + size_t sz = sizeof(struct branch_stack); + + sz += pt->br_stack_sz * sizeof(struct branch_entry); + return zalloc(sz); +} + +static int intel_pt_br_stack_init(struct intel_pt *pt) +{ + struct evsel *evsel; + + evlist__for_each_entry(pt->session->evlist, evsel) { + if (!(evsel->core.attr.sample_type & PERF_SAMPLE_BRANCH_STACK)) + evsel->synth_sample_type |= PERF_SAMPLE_BRANCH_STACK; + } + + pt->br_stack = intel_pt_alloc_br_stack(pt); + if (!pt->br_stack) + return -ENOMEM; + + return 0; +} + +static void intel_pt_add_br_stack(struct intel_pt *pt, + struct perf_sample *sample) +{ + struct thread *thread = machine__findnew_thread(pt->machine, + sample->pid, + sample->tid); + + thread_stack__br_sample_late(thread, sample->cpu, pt->br_stack, + pt->br_stack_sz, sample->ip, + pt->kernel_start); + + sample->branch_stack = pt->br_stack; +} + static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt, unsigned int queue_nr) { @@ -880,26 +963,15 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt, return NULL; if (pt->synth_opts.callchain) { - size_t sz = sizeof(struct ip_callchain); - - /* Add 1 to callchain_sz for callchain context */ - sz += (pt->synth_opts.callchain_sz + 1) * sizeof(u64); - ptq->chain = zalloc(sz); + ptq->chain = intel_pt_alloc_chain(pt); if (!ptq->chain) goto out_free; } if (pt->synth_opts.last_branch) { - size_t sz = sizeof(struct branch_stack); - - sz += pt->synth_opts.last_branch_sz * - sizeof(struct branch_entry); - ptq->last_branch = zalloc(sz); + ptq->last_branch = intel_pt_alloc_br_stack(pt); if (!ptq->last_branch) goto out_free; - ptq->last_branch_rb = zalloc(sz); - if (!ptq->last_branch_rb) - goto out_free; } ptq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE); @@ -968,7 +1040,6 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt, out_free: zfree(&ptq->event_buf); zfree(&ptq->last_branch); - zfree(&ptq->last_branch_rb); zfree(&ptq->chain); free(ptq); return NULL; @@ -984,7 +1055,6 @@ static void intel_pt_free_queue(void *priv) intel_pt_decoder_free(ptq->decoder); zfree(&ptq->event_buf); zfree(&ptq->last_branch); - zfree(&ptq->last_branch_rb); zfree(&ptq->chain); free(ptq); } @@ -1152,58 +1222,6 @@ static int intel_pt_setup_queues(struct intel_pt *pt) return 0; } -static inline void intel_pt_copy_last_branch_rb(struct intel_pt_queue *ptq) -{ - struct branch_stack *bs_src = ptq->last_branch_rb; - struct branch_stack *bs_dst = ptq->last_branch; - size_t nr = 0; - - bs_dst->nr = bs_src->nr; - - if (!bs_src->nr) - return; - - nr = ptq->pt->synth_opts.last_branch_sz - ptq->last_branch_pos; - memcpy(&bs_dst->entries[0], - &bs_src->entries[ptq->last_branch_pos], - sizeof(struct branch_entry) * nr); - - if (bs_src->nr >= ptq->pt->synth_opts.last_branch_sz) { - memcpy(&bs_dst->entries[nr], - &bs_src->entries[0], - sizeof(struct branch_entry) * ptq->last_branch_pos); - } -} - -static inline void intel_pt_reset_last_branch_rb(struct intel_pt_queue *ptq) -{ - ptq->last_branch_pos = 0; - ptq->last_branch_rb->nr = 0; -} - -static void intel_pt_update_last_branch_rb(struct intel_pt_queue *ptq) -{ - const struct intel_pt_state *state = ptq->state; - struct branch_stack *bs = ptq->last_branch_rb; - struct branch_entry *be; - - if (!ptq->last_branch_pos) - ptq->last_branch_pos = ptq->pt->synth_opts.last_branch_sz; - - ptq->last_branch_pos -= 1; - - be = &bs->entries[ptq->last_branch_pos]; - be->from = state->from_ip; - be->to = state->to_ip; - be->flags.abort = !!(state->flags & INTEL_PT_ABORT_TX); - be->flags.in_tx = !!(state->flags & INTEL_PT_IN_TX); - /* No support for mispredict */ - be->flags.mispred = ptq->pt->mispred_all; - - if (bs->nr < ptq->pt->synth_opts.last_branch_sz) - bs->nr += 1; -} - static inline bool intel_pt_skip_event(struct intel_pt *pt) { return pt->synth_opts.initial_skip && @@ -1271,9 +1289,9 @@ static inline int intel_pt_opt_inject(struct intel_pt *pt, return intel_pt_inject_event(event, sample, type); } -static int intel_pt_deliver_synth_b_event(struct intel_pt *pt, - union perf_event *event, - struct perf_sample *sample, u64 type) +static int intel_pt_deliver_synth_event(struct intel_pt *pt, + union perf_event *event, + struct perf_sample *sample, u64 type) { int ret; @@ -1333,8 +1351,8 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq) ptq->last_br_cyc_cnt = ptq->ipc_cyc_cnt; } - return intel_pt_deliver_synth_b_event(pt, event, &sample, - pt->branches_sample_type); + return intel_pt_deliver_synth_event(pt, event, &sample, + pt->branches_sample_type); } static void intel_pt_prep_sample(struct intel_pt *pt, @@ -1352,27 +1370,12 @@ static void intel_pt_prep_sample(struct intel_pt *pt, } if (pt->synth_opts.last_branch) { - intel_pt_copy_last_branch_rb(ptq); + thread_stack__br_sample(ptq->thread, ptq->cpu, ptq->last_branch, + pt->br_stack_sz); sample->branch_stack = ptq->last_branch; } } -static inline int intel_pt_deliver_synth_event(struct intel_pt *pt, - struct intel_pt_queue *ptq, - union perf_event *event, - struct perf_sample *sample, - u64 type) -{ - int ret; - - ret = intel_pt_deliver_synth_b_event(pt, event, sample, type); - - if (pt->synth_opts.last_branch) - intel_pt_reset_last_branch_rb(ptq); - - return ret; -} - static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq) { struct intel_pt *pt = ptq->pt; @@ -1397,7 +1400,7 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq) ptq->last_insn_cnt = ptq->state->tot_insn_cnt; - return intel_pt_deliver_synth_event(pt, ptq, event, &sample, + return intel_pt_deliver_synth_event(pt, event, &sample, pt->instructions_sample_type); } @@ -1415,7 +1418,7 @@ static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq) sample.id = ptq->pt->transactions_id; sample.stream_id = ptq->pt->transactions_id; - return intel_pt_deliver_synth_event(pt, ptq, event, &sample, + return intel_pt_deliver_synth_event(pt, event, &sample, pt->transactions_sample_type); } @@ -1456,7 +1459,7 @@ static int intel_pt_synth_ptwrite_sample(struct intel_pt_queue *ptq) sample.raw_size = perf_synth__raw_size(raw); sample.raw_data = perf_synth__raw_data(&raw); - return intel_pt_deliver_synth_event(pt, ptq, event, &sample, + return intel_pt_deliver_synth_event(pt, event, &sample, pt->ptwrites_sample_type); } @@ -1486,7 +1489,7 @@ static int intel_pt_synth_cbr_sample(struct intel_pt_queue *ptq) sample.raw_size = perf_synth__raw_size(raw); sample.raw_data = perf_synth__raw_data(&raw); - return intel_pt_deliver_synth_event(pt, ptq, event, &sample, + return intel_pt_deliver_synth_event(pt, event, &sample, pt->pwr_events_sample_type); } @@ -1511,7 +1514,7 @@ static int intel_pt_synth_mwait_sample(struct intel_pt_queue *ptq) sample.raw_size = perf_synth__raw_size(raw); sample.raw_data = perf_synth__raw_data(&raw); - return intel_pt_deliver_synth_event(pt, ptq, event, &sample, + return intel_pt_deliver_synth_event(pt, event, &sample, pt->pwr_events_sample_type); } @@ -1536,7 +1539,7 @@ static int intel_pt_synth_pwre_sample(struct intel_pt_queue *ptq) sample.raw_size = perf_synth__raw_size(raw); sample.raw_data = perf_synth__raw_data(&raw); - return intel_pt_deliver_synth_event(pt, ptq, event, &sample, + return intel_pt_deliver_synth_event(pt, event, &sample, pt->pwr_events_sample_type); } @@ -1561,7 +1564,7 @@ static int intel_pt_synth_exstop_sample(struct intel_pt_queue *ptq) sample.raw_size = perf_synth__raw_size(raw); sample.raw_data = perf_synth__raw_data(&raw); - return intel_pt_deliver_synth_event(pt, ptq, event, &sample, + return intel_pt_deliver_synth_event(pt, event, &sample, pt->pwr_events_sample_type); } @@ -1586,7 +1589,7 @@ static int intel_pt_synth_pwrx_sample(struct intel_pt_queue *ptq) sample.raw_size = perf_synth__raw_size(raw); sample.raw_data = perf_synth__raw_data(&raw); - return intel_pt_deliver_synth_event(pt, ptq, event, &sample, + return intel_pt_deliver_synth_event(pt, event, &sample, pt->pwr_events_sample_type); } @@ -1680,15 +1683,14 @@ static u64 intel_pt_lbr_flags(u64 info) union { struct branch_flags flags; u64 result; - } u = { - .flags = { - .mispred = !!(info & LBR_INFO_MISPRED), - .predicted = !(info & LBR_INFO_MISPRED), - .in_tx = !!(info & LBR_INFO_IN_TX), - .abort = !!(info & LBR_INFO_ABORT), - .cycles = info & LBR_INFO_CYCLES, - } - }; + } u; + + u.result = 0; + u.flags.mispred = !!(info & LBR_INFO_MISPRED); + u.flags.predicted = !(info & LBR_INFO_MISPRED); + u.flags.in_tx = !!(info & LBR_INFO_IN_TX); + u.flags.abort = !!(info & LBR_INFO_ABORT); + u.flags.cycles = info & LBR_INFO_CYCLES; return u.result; } @@ -1807,7 +1809,9 @@ static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq) intel_pt_add_lbrs(&br.br_stack, items); sample.branch_stack = &br.br_stack; } else if (pt->synth_opts.last_branch) { - intel_pt_copy_last_branch_rb(ptq); + thread_stack__br_sample(ptq->thread, ptq->cpu, + ptq->last_branch, + pt->br_stack_sz); sample.branch_stack = ptq->last_branch; } else { br.br_stack.nr = 0; @@ -1842,7 +1846,7 @@ static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq) sample.transaction = txn; } - return intel_pt_deliver_synth_event(pt, ptq, event, &sample, sample_type); + return intel_pt_deliver_synth_event(pt, event, &sample, sample_type); } static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu, @@ -1992,12 +1996,15 @@ static int intel_pt_sample(struct intel_pt_queue *ptq) if (!(state->type & INTEL_PT_BRANCH)) return 0; - if (pt->synth_opts.callchain || pt->synth_opts.thread_stack) - thread_stack__event(ptq->thread, ptq->cpu, ptq->flags, state->from_ip, - state->to_ip, ptq->insn_len, - state->trace_nr); - else + if (pt->use_thread_stack) { + thread_stack__event(ptq->thread, ptq->cpu, ptq->flags, + state->from_ip, state->to_ip, ptq->insn_len, + state->trace_nr, pt->callstack, + pt->br_stack_sz_plus, + pt->mispred_all); + } else { thread_stack__set_trace_nr(ptq->thread, ptq->cpu, state->trace_nr); + } if (pt->sample_branches) { err = intel_pt_synth_branch_sample(ptq); @@ -2005,9 +2012,6 @@ static int intel_pt_sample(struct intel_pt_queue *ptq) return err; } - if (pt->synth_opts.last_branch) - intel_pt_update_last_branch_rb(ptq); - if (!ptq->sync_switch) return 0; @@ -2484,7 +2488,7 @@ static int intel_pt_process_switch(struct intel_pt *pt, if (evsel != pt->switch_evsel) return 0; - tid = perf_evsel__intval(evsel, sample, "next_pid"); + tid = evsel__intval(evsel, sample, "next_pid"); cpu = sample->cpu; intel_pt_log("sched_switch: cpu %d tid %d time %"PRIu64" tsc %#"PRIx64"\n", @@ -2639,6 +2643,13 @@ static int intel_pt_process_event(struct perf_session *session, if (err) return err; + if (event->header.type == PERF_RECORD_SAMPLE) { + if (pt->synth_opts.add_callchain && !sample->callchain) + intel_pt_add_callchain(pt, sample); + if (pt->synth_opts.add_last_branch && !sample->branch_stack) + intel_pt_add_br_stack(pt, sample); + } + if (event->header.type == PERF_RECORD_AUX && (event->aux.flags & PERF_AUX_FLAG_TRUNCATED) && pt->synth_opts.errors) { @@ -2710,11 +2721,21 @@ static void intel_pt_free(struct perf_session *session) session->auxtrace = NULL; thread__put(pt->unknown_thread); addr_filters__exit(&pt->filts); + zfree(&pt->chain); zfree(&pt->filter); zfree(&pt->time_ranges); free(pt); } +static bool intel_pt_evsel_is_auxtrace(struct perf_session *session, + struct evsel *evsel) +{ + struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt, + auxtrace); + + return evsel->core.attr.type == pt->pmu_type; +} + static int intel_pt_process_auxtrace_event(struct perf_session *session, union perf_event *event, struct perf_tool *tool __maybe_unused) @@ -3016,7 +3037,7 @@ static struct evsel *intel_pt_find_sched_switch(struct evlist *evlist) struct evsel *evsel; evlist__for_each_entry_reverse(evlist, evsel) { - const char *name = perf_evsel__name(evsel); + const char *name = evsel__name(evsel); if (!strcmp(name, "sched:sched_switch")) return evsel; @@ -3310,6 +3331,7 @@ int intel_pt_process_auxtrace_info(union perf_event *event, pt->auxtrace.flush_events = intel_pt_flush; pt->auxtrace.free_events = intel_pt_free_events; pt->auxtrace.free = intel_pt_free; + pt->auxtrace.evsel_is_auxtrace = intel_pt_evsel_is_auxtrace; session->auxtrace = &pt->auxtrace; if (dump_trace) @@ -3338,6 +3360,7 @@ int intel_pt_process_auxtrace_info(union perf_event *event, !session->itrace_synth_opts->inject) { pt->synth_opts.branches = false; pt->synth_opts.callchain = true; + pt->synth_opts.add_callchain = true; } pt->synth_opts.thread_stack = session->itrace_synth_opts->thread_stack; @@ -3370,14 +3393,54 @@ int intel_pt_process_auxtrace_info(union perf_event *event, pt->branches_filter |= PERF_IP_FLAG_RETURN | PERF_IP_FLAG_TRACE_BEGIN; - if (pt->synth_opts.callchain && !symbol_conf.use_callchain) { + if ((pt->synth_opts.callchain || pt->synth_opts.add_callchain) && + !symbol_conf.use_callchain) { symbol_conf.use_callchain = true; if (callchain_register_param(&callchain_param) < 0) { symbol_conf.use_callchain = false; pt->synth_opts.callchain = false; + pt->synth_opts.add_callchain = false; } } + if (pt->synth_opts.add_callchain) { + err = intel_pt_callchain_init(pt); + if (err) + goto err_delete_thread; + } + + if (pt->synth_opts.last_branch || pt->synth_opts.add_last_branch) { + pt->br_stack_sz = pt->synth_opts.last_branch_sz; + pt->br_stack_sz_plus = pt->br_stack_sz; + } + + if (pt->synth_opts.add_last_branch) { + err = intel_pt_br_stack_init(pt); + if (err) + goto err_delete_thread; + /* + * Additional branch stack size to cater for tracing from the + * actual sample ip to where the sample time is recorded. + * Measured at about 200 branches, but generously set to 1024. + * If kernel space is not being traced, then add just 1 for the + * branch to kernel space. + */ + if (intel_pt_tracing_kernel(pt)) + pt->br_stack_sz_plus += 1024; + else + pt->br_stack_sz_plus += 1; + } + + pt->use_thread_stack = pt->synth_opts.callchain || + pt->synth_opts.add_callchain || + pt->synth_opts.thread_stack || + pt->synth_opts.last_branch || + pt->synth_opts.add_last_branch; + + pt->callstack = pt->synth_opts.callchain || + pt->synth_opts.add_callchain || + pt->synth_opts.thread_stack; + err = intel_pt_synth_events(pt, session); if (err) goto err_delete_thread; @@ -3400,6 +3463,7 @@ int intel_pt_process_auxtrace_info(union perf_event *event, return 0; err_delete_thread: + zfree(&pt->chain); thread__zput(pt->unknown_thread); err_free_queues: intel_pt_log_disable(); diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 97142e9671be..8ed2135893bb 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -736,6 +736,12 @@ int machine__process_switch_event(struct machine *machine __maybe_unused, return 0; } +static int is_bpf_image(const char *name) +{ + return strncmp(name, "bpf_trampoline_", sizeof("bpf_trampoline_") - 1) || + strncmp(name, "bpf_dispatcher_", sizeof("bpf_dispatcher_") - 1); +} + static int machine__process_ksymbol_register(struct machine *machine, union perf_event *event, struct perf_sample *sample __maybe_unused) @@ -759,6 +765,12 @@ static int machine__process_ksymbol_register(struct machine *machine, map->start = event->ksymbol.addr; map->end = map->start + event->ksymbol.len; maps__insert(&machine->kmaps, map); + dso__set_loaded(dso); + + if (is_bpf_image(event->ksymbol.name)) { + dso->binary_type = DSO_BINARY_TYPE__BPF_IMAGE; + dso__set_long_name(dso, "", false); + } } sym = symbol__new(map->map_ip(map, map->start), @@ -2178,6 +2190,303 @@ static int remove_loops(struct branch_entry *l, int nr, return nr; } +static int lbr_callchain_add_kernel_ip(struct thread *thread, + struct callchain_cursor *cursor, + struct perf_sample *sample, + struct symbol **parent, + struct addr_location *root_al, + u64 branch_from, + bool callee, int end) +{ + struct ip_callchain *chain = sample->callchain; + u8 cpumode = PERF_RECORD_MISC_USER; + int err, i; + + if (callee) { + for (i = 0; i < end + 1; i++) { + err = add_callchain_ip(thread, cursor, parent, + root_al, &cpumode, chain->ips[i], + false, NULL, NULL, branch_from); + if (err) + return err; + } + return 0; + } + + for (i = end; i >= 0; i--) { + err = add_callchain_ip(thread, cursor, parent, + root_al, &cpumode, chain->ips[i], + false, NULL, NULL, branch_from); + if (err) + return err; + } + + return 0; +} + +static void save_lbr_cursor_node(struct thread *thread, + struct callchain_cursor *cursor, + int idx) +{ + struct lbr_stitch *lbr_stitch = thread->lbr_stitch; + + if (!lbr_stitch) + return; + + if (cursor->pos == cursor->nr) { + lbr_stitch->prev_lbr_cursor[idx].valid = false; + return; + } + + if (!cursor->curr) + cursor->curr = cursor->first; + else + cursor->curr = cursor->curr->next; + memcpy(&lbr_stitch->prev_lbr_cursor[idx], cursor->curr, + sizeof(struct callchain_cursor_node)); + + lbr_stitch->prev_lbr_cursor[idx].valid = true; + cursor->pos++; +} + +static int lbr_callchain_add_lbr_ip(struct thread *thread, + struct callchain_cursor *cursor, + struct perf_sample *sample, + struct symbol **parent, + struct addr_location *root_al, + u64 *branch_from, + bool callee) +{ + struct branch_stack *lbr_stack = sample->branch_stack; + struct branch_entry *entries = perf_sample__branch_entries(sample); + u8 cpumode = PERF_RECORD_MISC_USER; + int lbr_nr = lbr_stack->nr; + struct branch_flags *flags; + int err, i; + u64 ip; + + /* + * The curr and pos are not used in writing session. They are cleared + * in callchain_cursor_commit() when the writing session is closed. + * Using curr and pos to track the current cursor node. + */ + if (thread->lbr_stitch) { + cursor->curr = NULL; + cursor->pos = cursor->nr; + if (cursor->nr) { + cursor->curr = cursor->first; + for (i = 0; i < (int)(cursor->nr - 1); i++) + cursor->curr = cursor->curr->next; + } + } + + if (callee) { + /* Add LBR ip from first entries.to */ + ip = entries[0].to; + flags = &entries[0].flags; + *branch_from = entries[0].from; + err = add_callchain_ip(thread, cursor, parent, + root_al, &cpumode, ip, + true, flags, NULL, + *branch_from); + if (err) + return err; + + /* + * The number of cursor node increases. + * Move the current cursor node. + * But does not need to save current cursor node for entry 0. + * It's impossible to stitch the whole LBRs of previous sample. + */ + if (thread->lbr_stitch && (cursor->pos != cursor->nr)) { + if (!cursor->curr) + cursor->curr = cursor->first; + else + cursor->curr = cursor->curr->next; + cursor->pos++; + } + + /* Add LBR ip from entries.from one by one. */ + for (i = 0; i < lbr_nr; i++) { + ip = entries[i].from; + flags = &entries[i].flags; + err = add_callchain_ip(thread, cursor, parent, + root_al, &cpumode, ip, + true, flags, NULL, + *branch_from); + if (err) + return err; + save_lbr_cursor_node(thread, cursor, i); + } + return 0; + } + + /* Add LBR ip from entries.from one by one. */ + for (i = lbr_nr - 1; i >= 0; i--) { + ip = entries[i].from; + flags = &entries[i].flags; + err = add_callchain_ip(thread, cursor, parent, + root_al, &cpumode, ip, + true, flags, NULL, + *branch_from); + if (err) + return err; + save_lbr_cursor_node(thread, cursor, i); + } + + /* Add LBR ip from first entries.to */ + ip = entries[0].to; + flags = &entries[0].flags; + *branch_from = entries[0].from; + err = add_callchain_ip(thread, cursor, parent, + root_al, &cpumode, ip, + true, flags, NULL, + *branch_from); + if (err) + return err; + + return 0; +} + +static int lbr_callchain_add_stitched_lbr_ip(struct thread *thread, + struct callchain_cursor *cursor) +{ + struct lbr_stitch *lbr_stitch = thread->lbr_stitch; + struct callchain_cursor_node *cnode; + struct stitch_list *stitch_node; + int err; + + list_for_each_entry(stitch_node, &lbr_stitch->lists, node) { + cnode = &stitch_node->cursor; + + err = callchain_cursor_append(cursor, cnode->ip, + &cnode->ms, + cnode->branch, + &cnode->branch_flags, + cnode->nr_loop_iter, + cnode->iter_cycles, + cnode->branch_from, + cnode->srcline); + if (err) + return err; + } + return 0; +} + +static struct stitch_list *get_stitch_node(struct thread *thread) +{ + struct lbr_stitch *lbr_stitch = thread->lbr_stitch; + struct stitch_list *stitch_node; + + if (!list_empty(&lbr_stitch->free_lists)) { + stitch_node = list_first_entry(&lbr_stitch->free_lists, + struct stitch_list, node); + list_del(&stitch_node->node); + + return stitch_node; + } + + return malloc(sizeof(struct stitch_list)); +} + +static bool has_stitched_lbr(struct thread *thread, + struct perf_sample *cur, + struct perf_sample *prev, + unsigned int max_lbr, + bool callee) +{ + struct branch_stack *cur_stack = cur->branch_stack; + struct branch_entry *cur_entries = perf_sample__branch_entries(cur); + struct branch_stack *prev_stack = prev->branch_stack; + struct branch_entry *prev_entries = perf_sample__branch_entries(prev); + struct lbr_stitch *lbr_stitch = thread->lbr_stitch; + int i, j, nr_identical_branches = 0; + struct stitch_list *stitch_node; + u64 cur_base, distance; + + if (!cur_stack || !prev_stack) + return false; + + /* Find the physical index of the base-of-stack for current sample. */ + cur_base = max_lbr - cur_stack->nr + cur_stack->hw_idx + 1; + + distance = (prev_stack->hw_idx > cur_base) ? (prev_stack->hw_idx - cur_base) : + (max_lbr + prev_stack->hw_idx - cur_base); + /* Previous sample has shorter stack. Nothing can be stitched. */ + if (distance + 1 > prev_stack->nr) + return false; + + /* + * Check if there are identical LBRs between two samples. + * Identicall LBRs must have same from, to and flags values. Also, + * they have to be saved in the same LBR registers (same physical + * index). + * + * Starts from the base-of-stack of current sample. + */ + for (i = distance, j = cur_stack->nr - 1; (i >= 0) && (j >= 0); i--, j--) { + if ((prev_entries[i].from != cur_entries[j].from) || + (prev_entries[i].to != cur_entries[j].to) || + (prev_entries[i].flags.value != cur_entries[j].flags.value)) + break; + nr_identical_branches++; + } + + if (!nr_identical_branches) + return false; + + /* + * Save the LBRs between the base-of-stack of previous sample + * and the base-of-stack of current sample into lbr_stitch->lists. + * These LBRs will be stitched later. + */ + for (i = prev_stack->nr - 1; i > (int)distance; i--) { + + if (!lbr_stitch->prev_lbr_cursor[i].valid) + continue; + + stitch_node = get_stitch_node(thread); + if (!stitch_node) + return false; + + memcpy(&stitch_node->cursor, &lbr_stitch->prev_lbr_cursor[i], + sizeof(struct callchain_cursor_node)); + + if (callee) + list_add(&stitch_node->node, &lbr_stitch->lists); + else + list_add_tail(&stitch_node->node, &lbr_stitch->lists); + } + + return true; +} + +static bool alloc_lbr_stitch(struct thread *thread, unsigned int max_lbr) +{ + if (thread->lbr_stitch) + return true; + + thread->lbr_stitch = zalloc(sizeof(*thread->lbr_stitch)); + if (!thread->lbr_stitch) + goto err; + + thread->lbr_stitch->prev_lbr_cursor = calloc(max_lbr + 1, sizeof(struct callchain_cursor_node)); + if (!thread->lbr_stitch->prev_lbr_cursor) + goto free_lbr_stitch; + + INIT_LIST_HEAD(&thread->lbr_stitch->lists); + INIT_LIST_HEAD(&thread->lbr_stitch->free_lists); + + return true; + +free_lbr_stitch: + zfree(&thread->lbr_stitch); +err: + pr_warning("Failed to allocate space for stitched LBRs. Disable LBR stitch\n"); + thread->lbr_stitch_enable = false; + return false; +} + /* * Recolve LBR callstack chain sample * Return: @@ -2190,12 +2499,16 @@ static int resolve_lbr_callchain_sample(struct thread *thread, struct perf_sample *sample, struct symbol **parent, struct addr_location *root_al, - int max_stack) + int max_stack, + unsigned int max_lbr) { + bool callee = (callchain_param.order == ORDER_CALLEE); struct ip_callchain *chain = sample->callchain; int chain_nr = min(max_stack, (int)chain->nr), i; - u8 cpumode = PERF_RECORD_MISC_USER; - u64 ip, branch_from = 0; + struct lbr_stitch *lbr_stitch; + bool stitched_lbr = false; + u64 branch_from = 0; + int err; for (i = 0; i < chain_nr; i++) { if (chain->ips[i] == PERF_CONTEXT_USER) @@ -2203,71 +2516,65 @@ static int resolve_lbr_callchain_sample(struct thread *thread, } /* LBR only affects the user callchain */ - if (i != chain_nr) { - struct branch_stack *lbr_stack = sample->branch_stack; - struct branch_entry *entries = perf_sample__branch_entries(sample); - int lbr_nr = lbr_stack->nr, j, k; - bool branch; - struct branch_flags *flags; - /* - * LBR callstack can only get user call chain. - * The mix_chain_nr is kernel call chain - * number plus LBR user call chain number. - * i is kernel call chain number, - * 1 is PERF_CONTEXT_USER, - * lbr_nr + 1 is the user call chain number. - * For details, please refer to the comments - * in callchain__printf - */ - int mix_chain_nr = i + 1 + lbr_nr + 1; + if (i == chain_nr) + return 0; - for (j = 0; j < mix_chain_nr; j++) { - int err; - branch = false; - flags = NULL; + if (thread->lbr_stitch_enable && !sample->no_hw_idx && + (max_lbr > 0) && alloc_lbr_stitch(thread, max_lbr)) { + lbr_stitch = thread->lbr_stitch; - if (callchain_param.order == ORDER_CALLEE) { - if (j < i + 1) - ip = chain->ips[j]; - else if (j > i + 1) { - k = j - i - 2; - ip = entries[k].from; - branch = true; - flags = &entries[k].flags; - } else { - ip = entries[0].to; - branch = true; - flags = &entries[0].flags; - branch_from = entries[0].from; - } - } else { - if (j < lbr_nr) { - k = lbr_nr - j - 1; - ip = entries[k].from; - branch = true; - flags = &entries[k].flags; - } - else if (j > lbr_nr) - ip = chain->ips[i + 1 - (j - lbr_nr)]; - else { - ip = entries[0].to; - branch = true; - flags = &entries[0].flags; - branch_from = entries[0].from; - } - } + stitched_lbr = has_stitched_lbr(thread, sample, + &lbr_stitch->prev_sample, + max_lbr, callee); - err = add_callchain_ip(thread, cursor, parent, - root_al, &cpumode, ip, - branch, flags, NULL, - branch_from); + if (!stitched_lbr && !list_empty(&lbr_stitch->lists)) { + list_replace_init(&lbr_stitch->lists, + &lbr_stitch->free_lists); + } + memcpy(&lbr_stitch->prev_sample, sample, sizeof(*sample)); + } + + if (callee) { + /* Add kernel ip */ + err = lbr_callchain_add_kernel_ip(thread, cursor, sample, + parent, root_al, branch_from, + true, i); + if (err) + goto error; + + err = lbr_callchain_add_lbr_ip(thread, cursor, sample, parent, + root_al, &branch_from, true); + if (err) + goto error; + + if (stitched_lbr) { + err = lbr_callchain_add_stitched_lbr_ip(thread, cursor); if (err) - return (err < 0) ? err : 0; + goto error; } - return 1; + + } else { + if (stitched_lbr) { + err = lbr_callchain_add_stitched_lbr_ip(thread, cursor); + if (err) + goto error; + } + err = lbr_callchain_add_lbr_ip(thread, cursor, sample, parent, + root_al, &branch_from, false); + if (err) + goto error; + + /* Add kernel ip */ + err = lbr_callchain_add_kernel_ip(thread, cursor, sample, + parent, root_al, branch_from, + false, i); + if (err) + goto error; } + return 1; - return 0; +error: + return (err < 0) ? err : 0; } static int find_prev_cpumode(struct ip_callchain *chain, struct thread *thread, @@ -2311,9 +2618,12 @@ static int thread__resolve_callchain_sample(struct thread *thread, if (chain) chain_nr = chain->nr; - if (perf_evsel__has_branch_callstack(evsel)) { + if (evsel__has_branch_callstack(evsel)) { + struct perf_env *env = evsel__env(evsel); + err = resolve_lbr_callchain_sample(thread, cursor, sample, parent, - root_al, max_stack); + root_al, max_stack, + !env ? 0 : env->max_branches); if (err) return (err < 0) ? err : 0; } diff --git a/tools/perf/util/mem2node.c b/tools/perf/util/mem2node.c index 797d86a1ab09..c84f5841c7ab 100644 --- a/tools/perf/util/mem2node.c +++ b/tools/perf/util/mem2node.c @@ -1,5 +1,6 @@ #include <errno.h> #include <inttypes.h> +#include <asm/bug.h> #include <linux/bitmap.h> #include <linux/kernel.h> #include <linux/zalloc.h> @@ -95,7 +96,7 @@ int mem2node__init(struct mem2node *map, struct perf_env *env) /* Cut unused entries, due to merging. */ tmp_entries = realloc(entries, sizeof(*entries) * j); - if (tmp_entries) + if (tmp_entries || WARN_ON_ONCE(j == 0)) entries = tmp_entries; for (i = 0; i < j; i++) { diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 926449a7cdbf..b071df373f8b 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -90,6 +90,7 @@ struct egroup { const char *metric_name; const char *metric_expr; const char *metric_unit; + int runtime; }; static struct evsel *find_evsel_group(struct evlist *perf_evlist, @@ -202,6 +203,7 @@ static int metricgroup__setup_events(struct list_head *groups, expr->metric_name = eg->metric_name; expr->metric_unit = eg->metric_unit; expr->metric_events = metric_events; + expr->runtime = eg->runtime; list_add(&expr->nd, &me->head); } @@ -485,6 +487,45 @@ static bool metricgroup__has_constraint(struct pmu_event *pe) return false; } +int __weak arch_get_runtimeparam(void) +{ + return 1; +} + +static int __metricgroup__add_metric(struct strbuf *events, + struct list_head *group_list, struct pmu_event *pe, int runtime) +{ + + const char **ids; + int idnum; + struct egroup *eg; + + if (expr__find_other(pe->metric_expr, NULL, &ids, &idnum, runtime) < 0) + return -EINVAL; + + if (events->len > 0) + strbuf_addf(events, ","); + + if (metricgroup__has_constraint(pe)) + metricgroup__add_metric_non_group(events, ids, idnum); + else + metricgroup__add_metric_weak_group(events, ids, idnum); + + eg = malloc(sizeof(*eg)); + if (!eg) + return -ENOMEM; + + eg->ids = ids; + eg->idnum = idnum; + eg->metric_name = pe->metric_name; + eg->metric_expr = pe->metric_expr; + eg->metric_unit = pe->unit; + eg->runtime = runtime; + list_add_tail(&eg->nd, group_list); + + return 0; +} + static int metricgroup__add_metric(const char *metric, struct strbuf *events, struct list_head *group_list) { @@ -504,35 +545,26 @@ static int metricgroup__add_metric(const char *metric, struct strbuf *events, continue; if (match_metric(pe->metric_group, metric) || match_metric(pe->metric_name, metric)) { - const char **ids; - int idnum; - struct egroup *eg; pr_debug("metric expr %s for %s\n", pe->metric_expr, pe->metric_name); - if (expr__find_other(pe->metric_expr, - NULL, &ids, &idnum) < 0) - continue; - if (events->len > 0) - strbuf_addf(events, ","); + if (!strstr(pe->metric_expr, "?")) { + ret = __metricgroup__add_metric(events, group_list, pe, 1); + } else { + int j, count; - if (metricgroup__has_constraint(pe)) - metricgroup__add_metric_non_group(events, ids, idnum); - else - metricgroup__add_metric_weak_group(events, ids, idnum); + count = arch_get_runtimeparam(); - eg = malloc(sizeof(struct egroup)); - if (!eg) { - ret = -ENOMEM; - break; + /* This loop is added to create multiple + * events depend on count value and add + * those events to group_list. + */ + + for (j = 0; j < count; j++) + ret = __metricgroup__add_metric(events, group_list, pe, j); } - eg->ids = ids; - eg->idnum = idnum; - eg->metric_name = pe->metric_name; - eg->metric_expr = pe->metric_expr; - eg->metric_unit = pe->unit; - list_add_tail(&eg->nd, group_list); - ret = 0; + if (ret == -ENOMEM) + break; } } return ret; diff --git a/tools/perf/util/metricgroup.h b/tools/perf/util/metricgroup.h index 475c7f912864..6b09eb30b4ec 100644 --- a/tools/perf/util/metricgroup.h +++ b/tools/perf/util/metricgroup.h @@ -22,6 +22,7 @@ struct metric_expr { const char *metric_name; const char *metric_unit; struct evsel **metric_events; + int runtime; }; struct metric_event *metricgroup__lookup(struct rblist *metric_events, @@ -34,4 +35,5 @@ int metricgroup__parse_groups(const struct option *opt, void metricgroup__print(bool metrics, bool groups, char *filter, bool raw, bool details); bool metricgroup__has_metric(const char *metric); +int arch_get_runtimeparam(void); #endif diff --git a/tools/perf/util/ordered-events.c b/tools/perf/util/ordered-events.c index 359db2b1fcef..48c8f609441b 100644 --- a/tools/perf/util/ordered-events.c +++ b/tools/perf/util/ordered-events.c @@ -314,7 +314,7 @@ static int __ordered_events__flush(struct ordered_events *oe, enum oe_flush how, case OE_FLUSH__NONE: default: break; - }; + } pr_oe_time(oe->next_flush, "next_flush - ordered_events__flush PRE %s, nr_events %u\n", str[how], oe->nr_events); diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 10107747b361..b7a0518d607d 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -461,7 +461,7 @@ int parse_events_add_cache(struct list_head *list, int *idx, cache_op = parse_aliases(str, perf_evsel__hw_cache_op, PERF_COUNT_HW_CACHE_OP_MAX); if (cache_op >= 0) { - if (!perf_evsel__is_cache_op_valid(cache_type, cache_op)) + if (!evsel__is_cache_op_valid(cache_type, cache_op)) return -EINVAL; continue; } @@ -1482,6 +1482,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, list_for_each_entry_safe(pos, tmp, &config_terms, list) { list_del_init(&pos->list); + zfree(&pos->val.str); free(pos); } return -EINVAL; @@ -1870,7 +1871,7 @@ int parse_events__modifier_event(struct list_head *list, char *str, bool add) evsel->precise_max = mod.precise_max; evsel->weak_group = mod.weak; - if (perf_evsel__is_group_leader(evsel)) + if (evsel__is_group_leader(evsel)) evsel->core.attr.pinned = mod.pinned; } @@ -2190,6 +2191,29 @@ int parse_events_option(const struct option *opt, const char *str, return ret; } +int parse_events_option_new_evlist(const struct option *opt, const char *str, int unset) +{ + struct evlist **evlistp = opt->value; + int ret; + + if (*evlistp == NULL) { + *evlistp = evlist__new(); + + if (*evlistp == NULL) { + fprintf(stderr, "Not enough memory to create evlist\n"); + return -1; + } + } + + ret = parse_events_option(opt, str, unset); + if (ret) { + evlist__delete(*evlistp); + *evlistp = NULL; + } + + return ret; +} + static int foreach_evsel_in_last_glob(struct evlist *evlist, int (*func)(struct evsel *evsel, @@ -2237,7 +2261,7 @@ static int set_filter(struct evsel *evsel, const void *arg) } if (evsel->core.attr.type == PERF_TYPE_TRACEPOINT) { - if (perf_evsel__append_tp_filter(evsel, str) < 0) { + if (evsel__append_tp_filter(evsel, str) < 0) { fprintf(stderr, "not enough memory to hold filter string\n"); return -1; @@ -2262,7 +2286,7 @@ static int set_filter(struct evsel *evsel, const void *arg) return -1; } - if (perf_evsel__append_addr_filter(evsel, str) < 0) { + if (evsel__append_addr_filter(evsel, str) < 0) { fprintf(stderr, "not enough memory to hold filter string\n"); return -1; @@ -2293,7 +2317,7 @@ static int add_exclude_perf_filter(struct evsel *evsel, snprintf(new_filter, sizeof(new_filter), "common_pid != %d", getpid()); - if (perf_evsel__append_tp_filter(evsel, new_filter) < 0) { + if (evsel__append_tp_filter(evsel, new_filter) < 0) { fprintf(stderr, "not enough memory to hold filter string\n"); return -1; @@ -2603,12 +2627,11 @@ restart: for (type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) { for (op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) { /* skip invalid cache type */ - if (!perf_evsel__is_cache_op_valid(type, op)) + if (!evsel__is_cache_op_valid(type, op)) continue; for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) { - __perf_evsel__hw_cache_type_op_res_name(type, op, i, - name, sizeof(name)); + __evsel__hw_cache_type_op_res_name(type, op, i, name, sizeof(name)); if (event_glob != NULL && !strglobmatch(name, event_glob)) continue; diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index 27596cbd0ba0..6ead9661238c 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -31,6 +31,7 @@ bool have_tracepoints(struct list_head *evlist); const char *event_type(int type); int parse_events_option(const struct option *opt, const char *str, int unset); +int parse_events_option_new_evlist(const struct option *opt, const char *str, int unset); int parse_events(struct evlist *evlist, const char *str, struct parse_events_error *error); int parse_events_terms(struct list_head *terms, const char *str); diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index baa48f28d57d..c589fc42f058 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -286,6 +286,7 @@ no-overwrite { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NOOVERWRITE); } percore { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_PERCORE); } aux-output { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT); } aux-sample-size { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE); } +r{num_raw_hex} { return raw(yyscanner); } , { return ','; } "/" { BEGIN(INITIAL); return '/'; } {name_minus} { return str(yyscanner, PE_NAME); } diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index 94f8bcd83582..c4ca932d092d 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -44,7 +44,7 @@ static void free_list_evsel(struct list_head* list_evsel) list_for_each_entry_safe(evsel, tmp, list_evsel, core.node) { list_del_init(&evsel->core.node); - perf_evsel__delete(evsel); + evsel__delete(evsel); } free(list_evsel); } @@ -326,6 +326,7 @@ PE_NAME opt_pmu_config } parse_events_terms__delete($2); parse_events_terms__delete(orig_terms); + free(pattern); free($1); $$ = list; #undef CLEANUP_YYABORT @@ -706,6 +707,15 @@ event_term } event_term: +PE_RAW +{ + struct parse_events_term *term; + + ABORT_ON(parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_CONFIG, + NULL, $1, false, &@1, NULL)); + $$ = term; +} +| PE_NAME '=' PE_NAME { struct parse_events_term *term; diff --git a/tools/perf/util/perf_api_probe.c b/tools/perf/util/perf_api_probe.c new file mode 100644 index 000000000000..1337965673d7 --- /dev/null +++ b/tools/perf/util/perf_api_probe.c @@ -0,0 +1,164 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include "perf-sys.h" +#include "util/cloexec.h" +#include "util/evlist.h" +#include "util/evsel.h" +#include "util/parse-events.h" +#include "util/perf_api_probe.h" +#include <perf/cpumap.h> +#include <errno.h> + +typedef void (*setup_probe_fn_t)(struct evsel *evsel); + +static int perf_do_probe_api(setup_probe_fn_t fn, int cpu, const char *str) +{ + struct evlist *evlist; + struct evsel *evsel; + unsigned long flags = perf_event_open_cloexec_flag(); + int err = -EAGAIN, fd; + static pid_t pid = -1; + + evlist = evlist__new(); + if (!evlist) + return -ENOMEM; + + if (parse_events(evlist, str, NULL)) + goto out_delete; + + evsel = evlist__first(evlist); + + while (1) { + fd = sys_perf_event_open(&evsel->core.attr, pid, cpu, -1, flags); + if (fd < 0) { + if (pid == -1 && errno == EACCES) { + pid = 0; + continue; + } + goto out_delete; + } + break; + } + close(fd); + + fn(evsel); + + fd = sys_perf_event_open(&evsel->core.attr, pid, cpu, -1, flags); + if (fd < 0) { + if (errno == EINVAL) + err = -EINVAL; + goto out_delete; + } + close(fd); + err = 0; + +out_delete: + evlist__delete(evlist); + return err; +} + +static bool perf_probe_api(setup_probe_fn_t fn) +{ + const char *try[] = {"cycles:u", "instructions:u", "cpu-clock:u", NULL}; + struct perf_cpu_map *cpus; + int cpu, ret, i = 0; + + cpus = perf_cpu_map__new(NULL); + if (!cpus) + return false; + cpu = cpus->map[0]; + perf_cpu_map__put(cpus); + + do { + ret = perf_do_probe_api(fn, cpu, try[i++]); + if (!ret) + return true; + } while (ret == -EAGAIN && try[i]); + + return false; +} + +static void perf_probe_sample_identifier(struct evsel *evsel) +{ + evsel->core.attr.sample_type |= PERF_SAMPLE_IDENTIFIER; +} + +static void perf_probe_comm_exec(struct evsel *evsel) +{ + evsel->core.attr.comm_exec = 1; +} + +static void perf_probe_context_switch(struct evsel *evsel) +{ + evsel->core.attr.context_switch = 1; +} + +bool perf_can_sample_identifier(void) +{ + return perf_probe_api(perf_probe_sample_identifier); +} + +bool perf_can_comm_exec(void) +{ + return perf_probe_api(perf_probe_comm_exec); +} + +bool perf_can_record_switch_events(void) +{ + return perf_probe_api(perf_probe_context_switch); +} + +bool perf_can_record_cpu_wide(void) +{ + struct perf_event_attr attr = { + .type = PERF_TYPE_SOFTWARE, + .config = PERF_COUNT_SW_CPU_CLOCK, + .exclude_kernel = 1, + }; + struct perf_cpu_map *cpus; + int cpu, fd; + + cpus = perf_cpu_map__new(NULL); + if (!cpus) + return false; + cpu = cpus->map[0]; + perf_cpu_map__put(cpus); + + fd = sys_perf_event_open(&attr, -1, cpu, -1, 0); + if (fd < 0) + return false; + close(fd); + + return true; +} + +/* + * Architectures are expected to know if AUX area sampling is supported by the + * hardware. Here we check for kernel support. + */ +bool perf_can_aux_sample(void) +{ + struct perf_event_attr attr = { + .size = sizeof(struct perf_event_attr), + .exclude_kernel = 1, + /* + * Non-zero value causes the kernel to calculate the effective + * attribute size up to that byte. + */ + .aux_sample_size = 1, + }; + int fd; + + fd = sys_perf_event_open(&attr, -1, 0, -1, 0); + /* + * If the kernel attribute is big enough to contain aux_sample_size + * then we assume that it is supported. We are relying on the kernel to + * validate the attribute size before anything else that could be wrong. + */ + if (fd < 0 && errno == E2BIG) + return false; + if (fd >= 0) + close(fd); + + return true; +} diff --git a/tools/perf/util/perf_api_probe.h b/tools/perf/util/perf_api_probe.h new file mode 100644 index 000000000000..706c3c6426e2 --- /dev/null +++ b/tools/perf/util/perf_api_probe.h @@ -0,0 +1,14 @@ + +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PERF_API_PROBE_H +#define __PERF_API_PROBE_H + +#include <stdbool.h> + +bool perf_can_aux_sample(void); +bool perf_can_comm_exec(void); +bool perf_can_record_cpu_wide(void); +bool perf_can_record_switch_events(void); +bool perf_can_sample_identifier(void); + +#endif // __PERF_API_PROBE_H diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index ef6a63f3d386..92bd7fafcce6 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -18,6 +18,7 @@ #include <regex.h> #include <perf/cpumap.h> #include "debug.h" +#include "evsel.h" #include "pmu.h" #include "parse-events.h" #include "header.h" @@ -849,6 +850,7 @@ static struct perf_pmu *pmu_lookup(const char *name) INIT_LIST_HEAD(&pmu->format); INIT_LIST_HEAD(&pmu->aliases); + INIT_LIST_HEAD(&pmu->caps); list_splice(&format, &pmu->format); list_splice(&aliases, &pmu->aliases); list_add_tail(&pmu->list, &pmus); @@ -869,6 +871,17 @@ static struct perf_pmu *pmu_find(const char *name) return NULL; } +struct perf_pmu *perf_pmu__find_by_type(unsigned int type) +{ + struct perf_pmu *pmu; + + list_for_each_entry(pmu, &pmus, list) + if (pmu->type == type) + return pmu; + + return NULL; +} + struct perf_pmu *perf_pmu__scan(struct perf_pmu *pmu) { /* @@ -884,6 +897,25 @@ struct perf_pmu *perf_pmu__scan(struct perf_pmu *pmu) return NULL; } +struct perf_pmu *evsel__find_pmu(struct evsel *evsel) +{ + struct perf_pmu *pmu = NULL; + + while ((pmu = perf_pmu__scan(pmu)) != NULL) { + if (pmu->type == evsel->core.attr.type) + break; + } + + return pmu; +} + +bool evsel__is_aux_event(struct evsel *evsel) +{ + struct perf_pmu *pmu = evsel__find_pmu(evsel); + + return pmu && pmu->auxtrace; +} + struct perf_pmu *perf_pmu__find(const char *name) { struct perf_pmu *pmu; @@ -1574,3 +1606,84 @@ int perf_pmu__scan_file(struct perf_pmu *pmu, const char *name, const char *fmt, va_end(args); return ret; } + +static int perf_pmu__new_caps(struct list_head *list, char *name, char *value) +{ + struct perf_pmu_caps *caps = zalloc(sizeof(*caps)); + + if (!caps) + return -ENOMEM; + + caps->name = strdup(name); + if (!caps->name) + goto free_caps; + caps->value = strndup(value, strlen(value) - 1); + if (!caps->value) + goto free_name; + list_add_tail(&caps->list, list); + return 0; + +free_name: + zfree(caps->name); +free_caps: + free(caps); + + return -ENOMEM; +} + +/* + * Reading/parsing the given pmu capabilities, which should be located at: + * /sys/bus/event_source/devices/<dev>/caps as sysfs group attributes. + * Return the number of capabilities + */ +int perf_pmu__caps_parse(struct perf_pmu *pmu) +{ + struct stat st; + char caps_path[PATH_MAX]; + const char *sysfs = sysfs__mountpoint(); + DIR *caps_dir; + struct dirent *evt_ent; + int nr_caps = 0; + + if (!sysfs) + return -1; + + snprintf(caps_path, PATH_MAX, + "%s" EVENT_SOURCE_DEVICE_PATH "%s/caps", sysfs, pmu->name); + + if (stat(caps_path, &st) < 0) + return 0; /* no error if caps does not exist */ + + caps_dir = opendir(caps_path); + if (!caps_dir) + return -EINVAL; + + while ((evt_ent = readdir(caps_dir)) != NULL) { + char path[PATH_MAX + NAME_MAX + 1]; + char *name = evt_ent->d_name; + char value[128]; + FILE *file; + + if (!strcmp(name, ".") || !strcmp(name, "..")) + continue; + + snprintf(path, sizeof(path), "%s/%s", caps_path, name); + + file = fopen(path, "r"); + if (!file) + continue; + + if (!fgets(value, sizeof(value), file) || + (perf_pmu__new_caps(&pmu->caps, name, value) < 0)) { + fclose(file); + continue; + } + + nr_caps++; + fclose(file); + } + + closedir(caps_dir); + + return nr_caps; +} diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 5fb3f16828df..cb6fbec50313 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -22,6 +22,12 @@ enum { struct perf_event_attr; +struct perf_pmu_caps { + char *name; + char *value; + struct list_head list; +}; + struct perf_pmu { char *name; __u32 type; @@ -33,6 +39,7 @@ struct perf_pmu { struct perf_cpu_map *cpus; struct list_head format; /* HEAD struct perf_pmu_format -> list */ struct list_head aliases; /* HEAD struct perf_pmu_alias -> list */ + struct list_head caps; /* HEAD struct perf_pmu_caps -> list */ struct list_head list; /* ELEM */ }; @@ -65,6 +72,7 @@ struct perf_pmu_alias { }; struct perf_pmu *perf_pmu__find(const char *name); +struct perf_pmu *perf_pmu__find_by_type(unsigned int type); int perf_pmu__config(struct perf_pmu *pmu, struct perf_event_attr *attr, struct list_head *head_terms, struct parse_events_error *error); @@ -107,4 +115,6 @@ bool pmu_uncore_alias_match(const char *pmu_name, const char *name); int perf_pmu__convert_scale(const char *scale, char **end, double *sval); +int perf_pmu__caps_parse(struct perf_pmu *pmu); + #endif /* __PMU_H */ diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 83212c65848b..75a9b1d62bba 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -801,7 +801,7 @@ static int pyrf_evsel__init(struct pyrf_evsel *pevsel, static void pyrf_evsel__delete(struct pyrf_evsel *pevsel) { - perf_evsel__exit(&pevsel->evsel); + evsel__exit(&pevsel->evsel); Py_TYPE(pevsel)->tp_free((PyObject*)pevsel); } @@ -1044,7 +1044,7 @@ static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist, pevent->evsel = evsel; - err = perf_evsel__parse_sample(evsel, event, &pevent->sample); + err = evsel__parse_sample(evsel, event, &pevent->sample); /* Consume the even only after we parsed it out. */ perf_mmap__consume(&md->core); diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c index 7def66168503..a4cc11592f6b 100644 --- a/tools/perf/util/record.c +++ b/tools/perf/util/record.c @@ -10,161 +10,64 @@ #include <subcmd/parse-options.h> #include <perf/cpumap.h> #include "cloexec.h" +#include "util/perf_api_probe.h" #include "record.h" #include "../perf-sys.h" -typedef void (*setup_probe_fn_t)(struct evsel *evsel); - -static int perf_do_probe_api(setup_probe_fn_t fn, int cpu, const char *str) +/* + * evsel__config_leader_sampling() uses special rules for leader sampling. + * However, if the leader is an AUX area event, then assume the event to sample + * is the next event. + */ +static struct evsel *evsel__read_sampler(struct evsel *evsel, struct evlist *evlist) { - struct evlist *evlist; - struct evsel *evsel; - unsigned long flags = perf_event_open_cloexec_flag(); - int err = -EAGAIN, fd; - static pid_t pid = -1; - - evlist = evlist__new(); - if (!evlist) - return -ENOMEM; - - if (parse_events(evlist, str, NULL)) - goto out_delete; - - evsel = evlist__first(evlist); + struct evsel *leader = evsel->leader; - while (1) { - fd = sys_perf_event_open(&evsel->core.attr, pid, cpu, -1, flags); - if (fd < 0) { - if (pid == -1 && errno == EACCES) { - pid = 0; - continue; - } - goto out_delete; + if (evsel__is_aux_event(leader)) { + evlist__for_each_entry(evlist, evsel) { + if (evsel->leader == leader && evsel != evsel->leader) + return evsel; } - break; - } - close(fd); - - fn(evsel); - - fd = sys_perf_event_open(&evsel->core.attr, pid, cpu, -1, flags); - if (fd < 0) { - if (errno == EINVAL) - err = -EINVAL; - goto out_delete; } - close(fd); - err = 0; - -out_delete: - evlist__delete(evlist); - return err; -} - -static bool perf_probe_api(setup_probe_fn_t fn) -{ - const char *try[] = {"cycles:u", "instructions:u", "cpu-clock:u", NULL}; - struct perf_cpu_map *cpus; - int cpu, ret, i = 0; - - cpus = perf_cpu_map__new(NULL); - if (!cpus) - return false; - cpu = cpus->map[0]; - perf_cpu_map__put(cpus); - - do { - ret = perf_do_probe_api(fn, cpu, try[i++]); - if (!ret) - return true; - } while (ret == -EAGAIN && try[i]); - return false; + return leader; } -static void perf_probe_sample_identifier(struct evsel *evsel) +static void evsel__config_leader_sampling(struct evsel *evsel, struct evlist *evlist) { - evsel->core.attr.sample_type |= PERF_SAMPLE_IDENTIFIER; -} - -static void perf_probe_comm_exec(struct evsel *evsel) -{ - evsel->core.attr.comm_exec = 1; -} - -static void perf_probe_context_switch(struct evsel *evsel) -{ - evsel->core.attr.context_switch = 1; -} - -bool perf_can_sample_identifier(void) -{ - return perf_probe_api(perf_probe_sample_identifier); -} + struct perf_event_attr *attr = &evsel->core.attr; + struct evsel *leader = evsel->leader; + struct evsel *read_sampler; -static bool perf_can_comm_exec(void) -{ - return perf_probe_api(perf_probe_comm_exec); -} + if (!leader->sample_read) + return; -bool perf_can_record_switch_events(void) -{ - return perf_probe_api(perf_probe_context_switch); -} + read_sampler = evsel__read_sampler(evsel, evlist); -bool perf_can_record_cpu_wide(void) -{ - struct perf_event_attr attr = { - .type = PERF_TYPE_SOFTWARE, - .config = PERF_COUNT_SW_CPU_CLOCK, - .exclude_kernel = 1, - }; - struct perf_cpu_map *cpus; - int cpu, fd; - - cpus = perf_cpu_map__new(NULL); - if (!cpus) - return false; - cpu = cpus->map[0]; - perf_cpu_map__put(cpus); + if (evsel == read_sampler) + return; - fd = sys_perf_event_open(&attr, -1, cpu, -1, 0); - if (fd < 0) - return false; - close(fd); - - return true; -} - -/* - * Architectures are expected to know if AUX area sampling is supported by the - * hardware. Here we check for kernel support. - */ -bool perf_can_aux_sample(void) -{ - struct perf_event_attr attr = { - .size = sizeof(struct perf_event_attr), - .exclude_kernel = 1, - /* - * Non-zero value causes the kernel to calculate the effective - * attribute size up to that byte. - */ - .aux_sample_size = 1, - }; - int fd; - - fd = sys_perf_event_open(&attr, -1, 0, -1, 0); /* - * If the kernel attribute is big enough to contain aux_sample_size - * then we assume that it is supported. We are relying on the kernel to - * validate the attribute size before anything else that could be wrong. + * Disable sampling for all group members other than the leader in + * case the leader 'leads' the sampling, except when the leader is an + * AUX area event, in which case the 2nd event in the group is the one + * that 'leads' the sampling. */ - if (fd < 0 && errno == E2BIG) - return false; - if (fd >= 0) - close(fd); + attr->freq = 0; + attr->sample_freq = 0; + attr->sample_period = 0; + attr->write_backward = 0; - return true; + /* + * We don't get a sample for slave events, we make them when delivering + * the group leader sample. Set the slave event to follow the master + * sample_type to ease up reporting. + * An AUX area event also has sample_type requirements, so also include + * the sample type bits from the leader's sample_type to cover that + * case. + */ + attr->sample_type = read_sampler->core.attr.sample_type | + leader->core.attr.sample_type; } void perf_evlist__config(struct evlist *evlist, struct record_opts *opts, @@ -188,11 +91,15 @@ void perf_evlist__config(struct evlist *evlist, struct record_opts *opts, use_comm_exec = perf_can_comm_exec(); evlist__for_each_entry(evlist, evsel) { - perf_evsel__config(evsel, opts, callchain); + evsel__config(evsel, opts, callchain); if (evsel->tracking && use_comm_exec) evsel->core.attr.comm_exec = 1; } + /* Configure leader sampling here now that the sample type is known */ + evlist__for_each_entry(evlist, evsel) + evsel__config_leader_sampling(evsel, evlist); + if (opts->full_auxtrace) { /* * Need to be able to synthesize and parse selected events with @@ -215,7 +122,7 @@ void perf_evlist__config(struct evlist *evlist, struct record_opts *opts, if (sample_id) { evlist__for_each_entry(evlist, evsel) - perf_evsel__set_sample_id(evsel, use_sample_identifier); + evsel__set_sample_id(evsel, use_sample_identifier); } perf_evlist__set_id_pos(evlist); diff --git a/tools/perf/util/record.h b/tools/perf/util/record.h index 24316458be20..923565c3b155 100644 --- a/tools/perf/util/record.h +++ b/tools/perf/util/record.h @@ -68,6 +68,7 @@ struct record_opts { int affinity; int mmap_flush; unsigned int comp_level; + unsigned int nr_threads_synthesize; }; extern const char * const *record_usage; diff --git a/tools/perf/util/s390-cpumcf-kernel.h b/tools/perf/util/s390-cpumcf-kernel.h index d4356030b504..f55ca07f3ca1 100644 --- a/tools/perf/util/s390-cpumcf-kernel.h +++ b/tools/perf/util/s390-cpumcf-kernel.h @@ -11,6 +11,7 @@ #define S390_CPUMCF_DIAG_DEF 0xfeef /* Counter diagnostic entry ID */ #define PERF_EVENT_CPUM_CF_DIAG 0xBC000 /* Event: Counter sets */ +#define PERF_EVENT_CPUM_SF_DIAG 0xBD000 /* Event: Combined-sampling */ struct cf_ctrset_entry { /* CPU-M CF counter set entry (8 byte) */ unsigned int def:16; /* 0-15 Data Entry Format */ diff --git a/tools/perf/util/s390-cpumsf.c b/tools/perf/util/s390-cpumsf.c index 6785cd87aa4d..f8861998e5bd 100644 --- a/tools/perf/util/s390-cpumsf.c +++ b/tools/perf/util/s390-cpumsf.c @@ -1047,6 +1047,14 @@ static void s390_cpumsf_free(struct perf_session *session) free(sf); } +static bool +s390_cpumsf_evsel_is_auxtrace(struct perf_session *session __maybe_unused, + struct evsel *evsel) +{ + return evsel->core.attr.type == PERF_TYPE_RAW && + evsel->core.attr.config == PERF_EVENT_CPUM_SF_DIAG; +} + static int s390_cpumsf_get_type(const char *cpuid) { int ret, family = 0; @@ -1071,7 +1079,8 @@ static bool check_auxtrace_itrace(struct itrace_synth_opts *itops) itops->pwr_events || itops->errors || itops->dont_decode || itops->calls || itops->returns || itops->callchain || itops->thread_stack || - itops->last_branch; + itops->last_branch || itops->add_callchain || + itops->add_last_branch; if (!ison) return true; pr_err("Unsupported --itrace options specified\n"); @@ -1142,6 +1151,7 @@ int s390_cpumsf_process_auxtrace_info(union perf_event *event, sf->auxtrace.flush_events = s390_cpumsf_flush; sf->auxtrace.free_events = s390_cpumsf_free_events; sf->auxtrace.free = s390_cpumsf_free; + sf->auxtrace.evsel_is_auxtrace = s390_cpumsf_evsel_is_auxtrace; session->auxtrace = &sf->auxtrace; if (dump_trace) diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 2c372cf5495e..739516fdf6e3 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -741,7 +741,7 @@ static PyObject *get_perf_sample_dict(struct perf_sample *sample, if (!dict_sample) Py_FatalError("couldn't create Python dictionary"); - pydict_set_item_string_decref(dict, "ev_name", _PyUnicode_FromString(perf_evsel__name(evsel))); + pydict_set_item_string_decref(dict, "ev_name", _PyUnicode_FromString(evsel__name(evsel))); pydict_set_item_string_decref(dict, "attr", _PyBytes_FromStringAndSize((const char *)&evsel->core.attr, sizeof(evsel->core.attr))); pydict_set_item_string_decref(dict_sample, "pid", @@ -968,7 +968,7 @@ static int python_export_evsel(struct db_export *dbe, struct evsel *evsel) t = tuple_new(2); tuple_set_u64(t, 0, evsel->db_id); - tuple_set_string(t, 1, perf_evsel__name(evsel)); + tuple_set_string(t, 1, evsel__name(evsel)); call_object(tables->evsel_handler, t, "evsel_table"); @@ -1349,7 +1349,7 @@ static void get_handler_name(char *str, size_t size, { char *p = str; - scnprintf(str, size, "stat__%s", perf_evsel__name(evsel)); + scnprintf(str, size, "stat__%s", evsel__name(evsel)); while ((p = strchr(p, ':'))) { *p = '_'; diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 0b0bfe5bef17..c11d89e0ee55 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1059,7 +1059,7 @@ static void callchain__printf(struct evsel *evsel, unsigned int i; struct ip_callchain *callchain = sample->callchain; - if (perf_evsel__has_branch_callstack(evsel)) + if (evsel__has_branch_callstack(evsel)) callchain__lbr_callstack_printf(sample); printf("... FP chain: nr:%" PRIu64 "\n", callchain->nr); @@ -1243,8 +1243,8 @@ static void dump_sample(struct evsel *evsel, union perf_event *event, if (evsel__has_callchain(evsel)) callchain__printf(evsel, sample); - if (sample_type & PERF_SAMPLE_BRANCH_STACK) - branch_stack__printf(sample, perf_evsel__has_branch_callstack(evsel)); + if (evsel__has_br_stack(evsel)) + branch_stack__printf(sample, evsel__has_branch_callstack(evsel)); if (sample_type & PERF_SAMPLE_REGS_USER) regs_user__printf(sample); @@ -1280,8 +1280,7 @@ static void dump_read(struct evsel *evsel, union perf_event *event) return; printf(": %d %d %s %" PRI_lu64 "\n", event->read.pid, event->read.tid, - perf_evsel__name(evsel), - event->read.value); + evsel__name(evsel), event->read.value); if (!evsel) return; diff --git a/tools/perf/util/sideband_evlist.c b/tools/perf/util/sideband_evlist.c new file mode 100644 index 000000000000..1580a3cbec2d --- /dev/null +++ b/tools/perf/util/sideband_evlist.c @@ -0,0 +1,148 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include "util/debug.h" +#include "util/evlist.h" +#include "util/evsel.h" +#include "util/mmap.h" +#include "util/perf_api_probe.h" +#include <perf/mmap.h> +#include <linux/perf_event.h> +#include <limits.h> +#include <pthread.h> +#include <sched.h> +#include <stdbool.h> + +int perf_evlist__add_sb_event(struct evlist *evlist, struct perf_event_attr *attr, + evsel__sb_cb_t cb, void *data) +{ + struct evsel *evsel; + + if (!attr->sample_id_all) { + pr_warning("enabling sample_id_all for all side band events\n"); + attr->sample_id_all = 1; + } + + evsel = perf_evsel__new_idx(attr, evlist->core.nr_entries); + if (!evsel) + return -1; + + evsel->side_band.cb = cb; + evsel->side_band.data = data; + evlist__add(evlist, evsel); + return 0; +} + +static void *perf_evlist__poll_thread(void *arg) +{ + struct evlist *evlist = arg; + bool draining = false; + int i, done = 0; + /* + * In order to read symbols from other namespaces perf to needs to call + * setns(2). This isn't permitted if the struct_fs has multiple users. + * unshare(2) the fs so that we may continue to setns into namespaces + * that we're observing when, for instance, reading the build-ids at + * the end of a 'perf record' session. + */ + unshare(CLONE_FS); + + while (!done) { + bool got_data = false; + + if (evlist->thread.done) + draining = true; + + if (!draining) + evlist__poll(evlist, 1000); + + for (i = 0; i < evlist->core.nr_mmaps; i++) { + struct mmap *map = &evlist->mmap[i]; + union perf_event *event; + + if (perf_mmap__read_init(&map->core)) + continue; + while ((event = perf_mmap__read_event(&map->core)) != NULL) { + struct evsel *evsel = perf_evlist__event2evsel(evlist, event); + + if (evsel && evsel->side_band.cb) + evsel->side_band.cb(event, evsel->side_band.data); + else + pr_warning("cannot locate proper evsel for the side band event\n"); + + perf_mmap__consume(&map->core); + got_data = true; + } + perf_mmap__read_done(&map->core); + } + + if (draining && !got_data) + break; + } + return NULL; +} + +void evlist__set_cb(struct evlist *evlist, evsel__sb_cb_t cb, void *data) +{ + struct evsel *evsel; + + evlist__for_each_entry(evlist, evsel) { + evsel->core.attr.sample_id_all = 1; + evsel->core.attr.watermark = 1; + evsel->core.attr.wakeup_watermark = 1; + evsel->side_band.cb = cb; + evsel->side_band.data = data; + } +} + +int perf_evlist__start_sb_thread(struct evlist *evlist, struct target *target) +{ + struct evsel *counter; + + if (!evlist) + return 0; + + if (perf_evlist__create_maps(evlist, target)) + goto out_delete_evlist; + + if (evlist->core.nr_entries > 1) { + bool can_sample_identifier = perf_can_sample_identifier(); + + evlist__for_each_entry(evlist, counter) + evsel__set_sample_id(counter, can_sample_identifier); + + perf_evlist__set_id_pos(evlist); + } + + evlist__for_each_entry(evlist, counter) { + if (evsel__open(counter, evlist->core.cpus, evlist->core.threads) < 0) + goto out_delete_evlist; + } + + if (evlist__mmap(evlist, UINT_MAX)) + goto out_delete_evlist; + + evlist__for_each_entry(evlist, counter) { + if (evsel__enable(counter)) + goto out_delete_evlist; + } + + evlist->thread.done = 0; + if (pthread_create(&evlist->thread.th, NULL, perf_evlist__poll_thread, evlist)) + goto out_delete_evlist; + + return 0; + +out_delete_evlist: + evlist__delete(evlist); + evlist = NULL; + return -1; +} + +void perf_evlist__stop_sb_thread(struct evlist *evlist) +{ + if (!evlist) + return; + evlist->thread.done = 1; + pthread_join(evlist->thread.th, NULL); + evlist__delete(evlist); +} diff --git a/tools/perf/util/smt.c b/tools/perf/util/smt.c index 3b791ef2cd50..20bacd5972ad 100644 --- a/tools/perf/util/smt.c +++ b/tools/perf/util/smt.c @@ -15,6 +15,9 @@ int smt_on(void) if (cached) return cached_result; + if (sysfs__read_int("devices/system/cpu/smt/active", &cached_result) > 0) + goto done; + ncpu = sysconf(_SC_NPROCESSORS_CONF); for (cpu = 0; cpu < ncpu; cpu++) { unsigned long long siblings; @@ -24,13 +27,13 @@ int smt_on(void) snprintf(fn, sizeof fn, "devices/system/cpu/cpu%d/topology/core_cpus", cpu); - if (access(fn, F_OK) == -1) { + if (sysfs__read_str(fn, &str, &strlen) < 0) { snprintf(fn, sizeof fn, "devices/system/cpu/cpu%d/topology/thread_siblings", cpu); + if (sysfs__read_str(fn, &str, &strlen) < 0) + continue; } - if (sysfs__read_str(fn, &str, &strlen) < 0) - continue; /* Entry is hex, but does not have 0x, so need custom parser */ siblings = strtoull(str, NULL, 16); free(str); @@ -42,6 +45,7 @@ int smt_on(void) } if (!cached) { cached_result = 0; +done: cached = true; } return cached_result; diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index f14cc728c358..c1f8879f92cc 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -237,7 +237,7 @@ static int64_t _sort__addr_cmp(u64 left_ip, u64 right_ip) return (int64_t)(right_ip - left_ip); } -static int64_t _sort__sym_cmp(struct symbol *sym_l, struct symbol *sym_r) +int64_t _sort__sym_cmp(struct symbol *sym_l, struct symbol *sym_r) { if (!sym_l || !sym_r) return cmp_null(sym_l, sym_r); @@ -300,8 +300,14 @@ static int _hist_entry__sym_snprintf(struct map_symbol *ms, if (verbose > 0) { char o = map ? dso__symtab_origin(map->dso) : '!'; + u64 rip = ip; + + if (map && map->dso && map->dso->kernel + && map->dso->adjust_symbols) + rip = map->unmap_ip(map, ip); + ret += repsep_snprintf(bf, size, "%-#*llx %c ", - BITS_PER_LONG / 4 + 2, ip, o); + BITS_PER_LONG / 4 + 2, rip, o); } ret += repsep_snprintf(bf + ret, size - ret, "[%c] ", level); @@ -2354,7 +2360,7 @@ static struct evsel *find_evsel(struct evlist *evlist, char *event_name) evsel = evlist__first(evlist); while (--nr > 0) - evsel = perf_evsel__next(evsel); + evsel = evsel__next(evsel); return evsel; } diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index cfa6ac6f7d06..66d39c4cfe2b 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -311,5 +311,7 @@ int64_t sort__daddr_cmp(struct hist_entry *left, struct hist_entry *right); int64_t sort__dcacheline_cmp(struct hist_entry *left, struct hist_entry *right); +int64_t +_sort__sym_cmp(struct symbol *sym_l, struct symbol *sym_r); char *hist_entry__srcline(struct hist_entry *he); #endif /* __PERF_SORT_H */ diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 9e757d18d713..3c6976f7574c 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -237,8 +237,6 @@ static bool valid_only_metric(const char *unit) if (!unit) return false; if (strstr(unit, "/sec") || - strstr(unit, "hz") || - strstr(unit, "Hz") || strstr(unit, "CPUs utilized")) return false; return true; @@ -248,7 +246,7 @@ static const char *fixunit(char *buf, struct evsel *evsel, const char *unit) { if (!strncmp(unit, "of all", 6)) { - snprintf(buf, 1024, "%s %s", perf_evsel__name(evsel), + snprintf(buf, 1024, "%s %s", evsel__name(evsel), unit); return buf; } @@ -335,7 +333,7 @@ static int first_shadow_cpu(struct perf_stat_config *config, if (config->aggr_mode == AGGR_GLOBAL) return 0; - for (i = 0; i < perf_evsel__nr_cpus(evsel); i++) { + for (i = 0; i < evsel__nr_cpus(evsel); i++) { int cpu2 = evsel__cpus(evsel)->map[i]; if (config->aggr_get_id(config, evlist->core.cpus, cpu2) == id) @@ -369,7 +367,7 @@ static void abs_printout(struct perf_stat_config *config, config->csv_output ? 0 : config->unit_width, evsel->unit, config->csv_sep); - fprintf(output, "%-*s", config->csv_output ? 0 : 25, perf_evsel__name(evsel)); + fprintf(output, "%-*s", config->csv_output ? 0 : 25, evsel__name(evsel)); print_cgroup(config, evsel); } @@ -463,8 +461,7 @@ static void printout(struct perf_stat_config *config, int id, int nr, counter->unit, config->csv_sep); fprintf(config->output, "%*s", - config->csv_output ? 0 : -25, - perf_evsel__name(counter)); + config->csv_output ? 0 : -25, evsel__name(counter)); print_cgroup(config, counter); @@ -510,7 +507,7 @@ static void aggr_update_shadow(struct perf_stat_config *config, id = config->aggr_map->map[s]; evlist__for_each_entry(evlist, counter) { val = 0; - for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { + for (cpu = 0; cpu < evsel__nr_cpus(counter); cpu++) { s2 = config->aggr_get_id(config, evlist->core.cpus, cpu); if (s2 != id) continue; @@ -561,11 +558,11 @@ static void collect_all_aliases(struct perf_stat_config *config, struct evsel *c alias = list_prepare_entry(counter, &(evlist->core.entries), core.node); list_for_each_entry_continue (alias, &evlist->core.entries, core.node) { - if (strcmp(perf_evsel__name(alias), perf_evsel__name(counter)) || + if (strcmp(evsel__name(alias), evsel__name(counter)) || alias->scale != counter->scale || alias->cgrp != counter->cgrp || strcmp(alias->unit, counter->unit) || - perf_evsel__is_clock(alias) != perf_evsel__is_clock(counter) || + evsel__is_clock(alias) != evsel__is_clock(counter) || !strcmp(alias->pmu_name, counter->pmu_name)) break; alias->merged_stat = true; @@ -601,7 +598,7 @@ static void aggr_cb(struct perf_stat_config *config, struct aggr_data *ad = data; int cpu, s2; - for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { + for (cpu = 0; cpu < evsel__nr_cpus(counter); cpu++) { struct perf_counts_values *counts; s2 = config->aggr_get_id(config, evsel__cpus(counter), cpu); @@ -849,7 +846,7 @@ static void print_counter(struct perf_stat_config *config, double uval; int cpu; - for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { + for (cpu = 0; cpu < evsel__nr_cpus(counter); cpu++) { struct aggr_data ad = { .cpu = cpu }; if (!collect_data(config, counter, counter_cb, &ad)) @@ -1150,7 +1147,7 @@ static void print_percore_thread(struct perf_stat_config *config, int s, s2, id; bool first = true; - for (int i = 0; i < perf_evsel__nr_cpus(counter); i++) { + for (int i = 0; i < evsel__nr_cpus(counter); i++) { s2 = config->aggr_get_id(config, evsel__cpus(counter), i); for (s = 0; s < config->aggr_map->nr; s++) { id = config->aggr_map->map[s]; diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 03ecb8cd0eec..129b8c5f2538 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -216,9 +216,9 @@ void perf_stat__update_shadow_stats(struct evsel *counter, u64 count, count *= counter->scale; - if (perf_evsel__is_clock(counter)) + if (evsel__is_clock(counter)) update_runtime_stat(st, STAT_NSECS, 0, cpu, count_ns); - else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) + else if (evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) update_runtime_stat(st, STAT_CYCLES, ctx, cpu, count); else if (perf_stat_evsel__is(counter, CYCLES_IN_TX)) update_runtime_stat(st, STAT_CYCLES_IN_TX, ctx, cpu, count); @@ -241,25 +241,25 @@ void perf_stat__update_shadow_stats(struct evsel *counter, u64 count, else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES)) update_runtime_stat(st, STAT_TOPDOWN_RECOVERY_BUBBLES, ctx, cpu, count); - else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) + else if (evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) update_runtime_stat(st, STAT_STALLED_CYCLES_FRONT, ctx, cpu, count); - else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND)) + else if (evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND)) update_runtime_stat(st, STAT_STALLED_CYCLES_BACK, ctx, cpu, count); - else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS)) + else if (evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS)) update_runtime_stat(st, STAT_BRANCHES, ctx, cpu, count); - else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES)) + else if (evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES)) update_runtime_stat(st, STAT_CACHEREFS, ctx, cpu, count); - else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D)) + else if (evsel__match(counter, HW_CACHE, HW_CACHE_L1D)) update_runtime_stat(st, STAT_L1_DCACHE, ctx, cpu, count); - else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I)) + else if (evsel__match(counter, HW_CACHE, HW_CACHE_L1I)) update_runtime_stat(st, STAT_L1_ICACHE, ctx, cpu, count); - else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL)) + else if (evsel__match(counter, HW_CACHE, HW_CACHE_LL)) update_runtime_stat(st, STAT_LL_CACHE, ctx, cpu, count); - else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB)) + else if (evsel__match(counter, HW_CACHE, HW_CACHE_DTLB)) update_runtime_stat(st, STAT_DTLB_CACHE, ctx, cpu, count); - else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB)) + else if (evsel__match(counter, HW_CACHE, HW_CACHE_ITLB)) update_runtime_stat(st, STAT_ITLB_CACHE, ctx, cpu, count); else if (perf_stat_evsel__is(counter, SMI_NUM)) update_runtime_stat(st, STAT_SMI_NUM, ctx, cpu, count); @@ -336,7 +336,7 @@ void perf_stat__collect_metric_expr(struct evlist *evsel_list) metric_events = counter->metric_events; if (!metric_events) { if (expr__find_other(counter->metric_expr, counter->name, - &metric_names, &num_metric_names) < 0) + &metric_names, &num_metric_names, 1) < 0) continue; metric_events = calloc(sizeof(struct evsel *), @@ -723,13 +723,14 @@ static void generic_metric(struct perf_stat_config *config, char *name, const char *metric_name, const char *metric_unit, + int runtime, double avg, int cpu, struct perf_stat_output_ctx *out, struct runtime_stat *st) { print_metric_t print_metric = out->print_metric; - struct parse_ctx pctx; + struct expr_parse_ctx pctx; double ratio, scale; int i; void *ctxp = out->ctx; @@ -777,7 +778,7 @@ static void generic_metric(struct perf_stat_config *config, } if (!metric_events[i]) { - if (expr__parse(&ratio, &pctx, metric_expr) == 0) { + if (expr__parse(&ratio, &pctx, metric_expr, runtime) == 0) { char *unit; char metric_bf[64]; @@ -786,9 +787,13 @@ static void generic_metric(struct perf_stat_config *config, &unit, &scale) >= 0) { ratio *= scale; } - - scnprintf(metric_bf, sizeof(metric_bf), + if (strstr(metric_expr, "?")) + scnprintf(metric_bf, sizeof(metric_bf), + "%s %s_%d", unit, metric_name, runtime); + else + scnprintf(metric_bf, sizeof(metric_bf), "%s %s", unit, metric_name); + print_metric(config, ctxp, NULL, "%8.1f", metric_bf, ratio); } else { @@ -828,7 +833,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, struct metric_event *me; int num = 1; - if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) { + if (evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) { total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); if (total) { @@ -853,7 +858,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, "stalled cycles per insn", ratio); } - } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) { + } else if (evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) { if (runtime_stat_n(st, STAT_BRANCHES, ctx, cpu) != 0) print_branch_misses(config, cpu, evsel, avg, out, st); else @@ -908,7 +913,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, print_ll_cache_misses(config, cpu, evsel, avg, out, st); else print_metric(config, ctxp, NULL, NULL, "of all LL-cache hits", 0); - } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) { + } else if (evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) { total = runtime_stat_avg(st, STAT_CACHEREFS, ctx, cpu); if (total) @@ -919,11 +924,11 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, "of all cache refs", ratio); else print_metric(config, ctxp, NULL, NULL, "of all cache refs", 0); - } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) { + } else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) { print_stalled_cycles_frontend(config, cpu, evsel, avg, out, st); - } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) { + } else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) { print_stalled_cycles_backend(config, cpu, evsel, avg, out, st); - } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) { + } else if (evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) { total = runtime_stat_avg(st, STAT_NSECS, 0, cpu); if (total) { @@ -974,7 +979,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, ratio = total / avg; print_metric(config, ctxp, NULL, "%8.0f", "cycles / elision", ratio); - } else if (perf_evsel__is_clock(evsel)) { + } else if (evsel__is_clock(evsel)) { if ((ratio = avg_stats(&walltime_nsecs_stats)) != 0) print_metric(config, ctxp, NULL, "%8.3f", "CPUs utilized", avg / (ratio * evsel->scale)); @@ -1022,7 +1027,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, print_metric(config, ctxp, NULL, NULL, name, 0); } else if (evsel->metric_expr) { generic_metric(config, evsel->metric_expr, evsel->metric_events, evsel->name, - evsel->metric_name, NULL, avg, cpu, out, st); + evsel->metric_name, NULL, 1, avg, cpu, out, st); } else if (runtime_stat_n(st, STAT_NSECS, 0, cpu) != 0) { char unit = 'M'; char unit_buf[10]; @@ -1051,7 +1056,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, out->new_line(config, ctxp); generic_metric(config, mexp->metric_expr, mexp->metric_events, evsel->name, mexp->metric_name, - mexp->metric_unit, avg, cpu, out, st); + mexp->metric_unit, mexp->runtime, avg, cpu, out, st); } } if (num == 0) diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 5f26137b8d60..774468341851 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -108,7 +108,7 @@ static void perf_stat_evsel_id_init(struct evsel *evsel) /* ps->id is 0 hence PERF_STAT_EVSEL_ID__NONE by default */ for (i = 0; i < PERF_STAT_EVSEL_ID__MAX; i++) { - if (!strcmp(perf_evsel__name(evsel), id_str[i])) { + if (!strcmp(evsel__name(evsel), id_str[i])) { ps->id = i; break; } @@ -173,7 +173,7 @@ static void perf_evsel__reset_prev_raw_counts(struct evsel *evsel) static int perf_evsel__alloc_stats(struct evsel *evsel, bool alloc_raw) { - int ncpus = perf_evsel__nr_cpus(evsel); + int ncpus = evsel__nr_cpus(evsel); int nthreads = perf_thread_map__nr(evsel->core.threads); if (perf_evsel__alloc_stat_priv(evsel) < 0 || @@ -302,7 +302,7 @@ process_counter_values(struct perf_stat_config *config, struct evsel *evsel, case AGGR_NODE: case AGGR_NONE: if (!evsel->snapshot) - perf_evsel__compute_deltas(evsel, cpu, thread, count); + evsel__compute_deltas(evsel, cpu, thread, count); perf_counts_values__scale(count, config->scale, NULL); if ((config->aggr_mode == AGGR_NONE) && (!evsel->percore)) { perf_stat__update_shadow_stats(evsel, count->val, @@ -334,7 +334,7 @@ static int process_counter_maps(struct perf_stat_config *config, struct evsel *counter) { int nthreads = perf_thread_map__nr(counter->core.threads); - int ncpus = perf_evsel__nr_cpus(counter); + int ncpus = evsel__nr_cpus(counter); int cpu, thread; if (counter->core.system_wide) @@ -368,8 +368,10 @@ int perf_stat_process_counter(struct perf_stat_config *config, * interval mode, otherwise overall avg running * averages will be shown for each interval. */ - if (config->interval) - init_stats(ps->res_stats); + if (config->interval) { + for (i = 0; i < 3; i++) + init_stats(&ps->res_stats[i]); + } if (counter->per_pkg) zero_per_pkg(counter); @@ -382,7 +384,7 @@ int perf_stat_process_counter(struct perf_stat_config *config, return 0; if (!counter->snapshot) - perf_evsel__compute_deltas(counter, -1, -1, aggr); + evsel__compute_deltas(counter, -1, -1, aggr); perf_counts_values__scale(aggr, config->scale, &counter->counts->scaled); for (i = 0; i < 3; i++) @@ -390,7 +392,7 @@ int perf_stat_process_counter(struct perf_stat_config *config, if (verbose > 0) { fprintf(config->output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n", - perf_evsel__name(counter), count[0], count[1], count[2]); + evsel__name(counter), count[0], count[1], count[2]); } /* @@ -507,7 +509,7 @@ int create_perf_stat_counter(struct evsel *evsel, * either manually by us or by kernel via enable_on_exec * set later. */ - if (perf_evsel__is_group_leader(evsel)) { + if (evsel__is_group_leader(evsel)) { attr->disabled = 1; /* @@ -519,7 +521,7 @@ int create_perf_stat_counter(struct evsel *evsel, } if (target__has_cpu(target) && !target__has_per_thread(target)) - return perf_evsel__open_per_cpu(evsel, evsel__cpus(evsel), cpu); + return evsel__open_per_cpu(evsel, evsel__cpus(evsel), cpu); - return perf_evsel__open_per_thread(evsel, evsel->core.threads); + return evsel__open_per_thread(evsel, evsel->core.threads); } diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 26bc6a0096ce..381da6b39f89 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -566,6 +566,20 @@ void dso__sort_by_name(struct dso *dso) return symbols__sort_by_name(&dso->symbol_names, &dso->symbols); } +/* + * While we find nice hex chars, build a long_val. + * Return number of chars processed. + */ +static int hex2u64(const char *ptr, u64 *long_val) +{ + char *p; + + *long_val = strtoull(ptr, &p, 16); + + return p - ptr; +} + + int modules__parse(const char *filename, void *arg, int (*process_module)(void *arg, const char *name, u64 start, u64 size)) @@ -1544,6 +1558,7 @@ static bool dso__is_compatible_symtab_type(struct dso *dso, bool kmod, return true; case DSO_BINARY_TYPE__BPF_PROG_INFO: + case DSO_BINARY_TYPE__BPF_IMAGE: case DSO_BINARY_TYPE__NOT_FOUND: default: return false; diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index a661b122d9d8..89b390623b63 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -37,6 +37,7 @@ #include <string.h> #include <uapi/linux/mman.h> /* To get things like MAP_HUGETLB even on older libc headers */ #include <api/fs/fs.h> +#include <api/io.h> #include <sys/types.h> #include <sys/stat.h> #include <fcntl.h> @@ -71,7 +72,6 @@ int perf_tool__process_synth_event(struct perf_tool *tool, static int perf_event__get_comm_ids(pid_t pid, char *comm, size_t len, pid_t *tgid, pid_t *ppid) { - char filename[PATH_MAX]; char bf[4096]; int fd; size_t size = 0; @@ -81,11 +81,11 @@ static int perf_event__get_comm_ids(pid_t pid, char *comm, size_t len, *tgid = -1; *ppid = -1; - snprintf(filename, sizeof(filename), "/proc/%d/status", pid); + snprintf(bf, sizeof(bf), "/proc/%d/status", pid); - fd = open(filename, O_RDONLY); + fd = open(bf, O_RDONLY); if (fd < 0) { - pr_debug("couldn't open %s\n", filename); + pr_debug("couldn't open %s\n", bf); return -1; } @@ -274,6 +274,79 @@ static int perf_event__synthesize_fork(struct perf_tool *tool, return 0; } +static bool read_proc_maps_line(struct io *io, __u64 *start, __u64 *end, + u32 *prot, u32 *flags, __u64 *offset, + u32 *maj, u32 *min, + __u64 *inode, + ssize_t pathname_size, char *pathname) +{ + __u64 temp; + int ch; + char *start_pathname = pathname; + + if (io__get_hex(io, start) != '-') + return false; + if (io__get_hex(io, end) != ' ') + return false; + + /* map protection and flags bits */ + *prot = 0; + ch = io__get_char(io); + if (ch == 'r') + *prot |= PROT_READ; + else if (ch != '-') + return false; + ch = io__get_char(io); + if (ch == 'w') + *prot |= PROT_WRITE; + else if (ch != '-') + return false; + ch = io__get_char(io); + if (ch == 'x') + *prot |= PROT_EXEC; + else if (ch != '-') + return false; + ch = io__get_char(io); + if (ch == 's') + *flags = MAP_SHARED; + else if (ch == 'p') + *flags = MAP_PRIVATE; + else + return false; + if (io__get_char(io) != ' ') + return false; + + if (io__get_hex(io, offset) != ' ') + return false; + + if (io__get_hex(io, &temp) != ':') + return false; + *maj = temp; + if (io__get_hex(io, &temp) != ' ') + return false; + *min = temp; + + ch = io__get_dec(io, inode); + if (ch != ' ') { + *pathname = '\0'; + return ch == '\n'; + } + do { + ch = io__get_char(io); + } while (ch == ' '); + while (true) { + if (ch < 0) + return false; + if (ch == '\0' || ch == '\n' || + (pathname + 1 - start_pathname) >= pathname_size) { + *pathname = '\0'; + return true; + } + *pathname++ = ch; + ch = io__get_char(io); + } +} + int perf_event__synthesize_mmap_events(struct perf_tool *tool, union perf_event *event, pid_t pid, pid_t tgid, @@ -281,9 +354,9 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool, struct machine *machine, bool mmap_data) { - char filename[PATH_MAX]; - FILE *fp; unsigned long long t; + char bf[BUFSIZ]; + struct io io; bool truncation = false; unsigned long long timeout = proc_map_timeout * 1000000ULL; int rc = 0; @@ -293,59 +366,52 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool, if (machine__is_default_guest(machine)) return 0; - snprintf(filename, sizeof(filename), "%s/proc/%d/task/%d/maps", - machine->root_dir, pid, pid); + snprintf(bf, sizeof(bf), "%s/proc/%d/task/%d/maps", + machine->root_dir, pid, pid); - fp = fopen(filename, "r"); - if (fp == NULL) { + io.fd = open(bf, O_RDONLY, 0); + if (io.fd < 0) { /* * We raced with a task exiting - just return: */ - pr_debug("couldn't open %s\n", filename); + pr_debug("couldn't open %s\n", bf); return -1; } + io__init(&io, io.fd, bf, sizeof(bf)); event->header.type = PERF_RECORD_MMAP2; t = rdclock(); - while (1) { - char bf[BUFSIZ]; - char prot[5]; - char execname[PATH_MAX]; - char anonstr[] = "//anon"; - unsigned int ino; + while (!io.eof) { + static const char anonstr[] = "//anon"; size_t size; - ssize_t n; - if (fgets(bf, sizeof(bf), fp) == NULL) - break; + /* ensure null termination since stack will be reused. */ + event->mmap2.filename[0] = '\0'; + + /* 00400000-0040c000 r-xp 00000000 fd:01 41038 /bin/cat */ + if (!read_proc_maps_line(&io, + &event->mmap2.start, + &event->mmap2.len, + &event->mmap2.prot, + &event->mmap2.flags, + &event->mmap2.pgoff, + &event->mmap2.maj, + &event->mmap2.min, + &event->mmap2.ino, + sizeof(event->mmap2.filename), + event->mmap2.filename)) + continue; if ((rdclock() - t) > timeout) { - pr_warning("Reading %s time out. " + pr_warning("Reading %s/proc/%d/task/%d/maps time out. " "You may want to increase " "the time limit by --proc-map-timeout\n", - filename); + machine->root_dir, pid, pid); truncation = true; goto out; } - /* ensure null termination since stack will be reused. */ - strcpy(execname, ""); - - /* 00400000-0040c000 r-xp 00000000 fd:01 41038 /bin/cat */ - n = sscanf(bf, "%"PRI_lx64"-%"PRI_lx64" %s %"PRI_lx64" %x:%x %u %[^\n]\n", - &event->mmap2.start, &event->mmap2.len, prot, - &event->mmap2.pgoff, &event->mmap2.maj, - &event->mmap2.min, - &ino, execname); - - /* - * Anon maps don't have the execname. - */ - if (n < 7) - continue; - - event->mmap2.ino = (u64)ino; event->mmap2.ino_generation = 0; /* @@ -356,23 +422,8 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool, else event->header.misc = PERF_RECORD_MISC_GUEST_USER; - /* map protection and flags bits */ - event->mmap2.prot = 0; - event->mmap2.flags = 0; - if (prot[0] == 'r') - event->mmap2.prot |= PROT_READ; - if (prot[1] == 'w') - event->mmap2.prot |= PROT_WRITE; - if (prot[2] == 'x') - event->mmap2.prot |= PROT_EXEC; - - if (prot[3] == 's') - event->mmap2.flags |= MAP_SHARED; - else - event->mmap2.flags |= MAP_PRIVATE; - - if (prot[2] != 'x') { - if (!mmap_data || prot[0] != 'r') + if ((event->mmap2.prot & PROT_EXEC) == 0) { + if (!mmap_data || (event->mmap2.prot & PROT_READ) == 0) continue; event->header.misc |= PERF_RECORD_MISC_MMAP_DATA; @@ -382,17 +433,17 @@ out: if (truncation) event->header.misc |= PERF_RECORD_MISC_PROC_MAP_PARSE_TIMEOUT; - if (!strcmp(execname, "")) - strcpy(execname, anonstr); + if (!strcmp(event->mmap2.filename, "")) + strcpy(event->mmap2.filename, anonstr); if (hugetlbfs_mnt_len && - !strncmp(execname, hugetlbfs_mnt, hugetlbfs_mnt_len)) { - strcpy(execname, anonstr); + !strncmp(event->mmap2.filename, hugetlbfs_mnt, + hugetlbfs_mnt_len)) { + strcpy(event->mmap2.filename, anonstr); event->mmap2.flags |= MAP_HUGETLB; } - size = strlen(execname) + 1; - memcpy(event->mmap2.filename, execname, size); + size = strlen(event->mmap2.filename) + 1; size = PERF_ALIGN(size, sizeof(u64)); event->mmap2.len -= event->mmap.start; event->mmap2.header.size = (sizeof(event->mmap2) - @@ -411,7 +462,7 @@ out: break; } - fclose(fp); + close(io.fd); return rc; } @@ -1130,7 +1181,7 @@ void cpu_map_data__synthesize(struct perf_record_cpu_map_data *data, struct perf synthesize_mask((struct perf_record_record_cpu_map *)data->data, map, max); default: break; - }; + } } static struct perf_record_cpu_map *cpu_map_event__new(struct perf_cpu_map *map) diff --git a/tools/perf/util/thread-stack.c b/tools/perf/util/thread-stack.c index 0885967d5bc3..1b992bbba4e8 100644 --- a/tools/perf/util/thread-stack.c +++ b/tools/perf/util/thread-stack.c @@ -80,6 +80,10 @@ struct thread_stack_entry { * @comm: current comm * @arr_sz: size of array if this is the first element of an array * @rstate: used to detect retpolines + * @br_stack_rb: branch stack (ring buffer) + * @br_stack_sz: maximum branch stack size + * @br_stack_pos: current position in @br_stack_rb + * @mispred_all: mark all branches as mispredicted */ struct thread_stack { struct thread_stack_entry *stack; @@ -95,6 +99,10 @@ struct thread_stack { struct comm *comm; unsigned int arr_sz; enum retpoline_state_t rstate; + struct branch_stack *br_stack_rb; + unsigned int br_stack_sz; + unsigned int br_stack_pos; + bool mispred_all; }; /* @@ -126,13 +134,26 @@ static int thread_stack__grow(struct thread_stack *ts) } static int thread_stack__init(struct thread_stack *ts, struct thread *thread, - struct call_return_processor *crp) + struct call_return_processor *crp, + bool callstack, unsigned int br_stack_sz) { int err; - err = thread_stack__grow(ts); - if (err) - return err; + if (callstack) { + err = thread_stack__grow(ts); + if (err) + return err; + } + + if (br_stack_sz) { + size_t sz = sizeof(struct branch_stack); + + sz += br_stack_sz * sizeof(struct branch_entry); + ts->br_stack_rb = zalloc(sz); + if (!ts->br_stack_rb) + return -ENOMEM; + ts->br_stack_sz = br_stack_sz; + } if (thread->maps && thread->maps->machine) { struct machine *machine = thread->maps->machine; @@ -150,7 +171,9 @@ static int thread_stack__init(struct thread_stack *ts, struct thread *thread, } static struct thread_stack *thread_stack__new(struct thread *thread, int cpu, - struct call_return_processor *crp) + struct call_return_processor *crp, + bool callstack, + unsigned int br_stack_sz) { struct thread_stack *ts = thread->ts, *new_ts; unsigned int old_sz = ts ? ts->arr_sz : 0; @@ -176,7 +199,7 @@ static struct thread_stack *thread_stack__new(struct thread *thread, int cpu, ts += cpu; if (!ts->stack && - thread_stack__init(ts, thread, crp)) + thread_stack__init(ts, thread, crp, callstack, br_stack_sz)) return NULL; return ts; @@ -319,6 +342,9 @@ static int __thread_stack__flush(struct thread *thread, struct thread_stack *ts) if (!crp) { ts->cnt = 0; + ts->br_stack_pos = 0; + if (ts->br_stack_rb) + ts->br_stack_rb->nr = 0; return 0; } @@ -353,8 +379,33 @@ int thread_stack__flush(struct thread *thread) return err; } +static void thread_stack__update_br_stack(struct thread_stack *ts, u32 flags, + u64 from_ip, u64 to_ip) +{ + struct branch_stack *bs = ts->br_stack_rb; + struct branch_entry *be; + + if (!ts->br_stack_pos) + ts->br_stack_pos = ts->br_stack_sz; + + ts->br_stack_pos -= 1; + + be = &bs->entries[ts->br_stack_pos]; + be->from = from_ip; + be->to = to_ip; + be->flags.value = 0; + be->flags.abort = !!(flags & PERF_IP_FLAG_TX_ABORT); + be->flags.in_tx = !!(flags & PERF_IP_FLAG_IN_TX); + /* No support for mispredict */ + be->flags.mispred = ts->mispred_all; + + if (bs->nr < ts->br_stack_sz) + bs->nr += 1; +} + int thread_stack__event(struct thread *thread, int cpu, u32 flags, u64 from_ip, - u64 to_ip, u16 insn_len, u64 trace_nr) + u64 to_ip, u16 insn_len, u64 trace_nr, bool callstack, + unsigned int br_stack_sz, bool mispred_all) { struct thread_stack *ts = thread__stack(thread, cpu); @@ -362,12 +413,13 @@ int thread_stack__event(struct thread *thread, int cpu, u32 flags, u64 from_ip, return -EINVAL; if (!ts) { - ts = thread_stack__new(thread, cpu, NULL); + ts = thread_stack__new(thread, cpu, NULL, callstack, br_stack_sz); if (!ts) { pr_warning("Out of memory: no thread stack\n"); return -ENOMEM; } ts->trace_nr = trace_nr; + ts->mispred_all = mispred_all; } /* @@ -381,8 +433,14 @@ int thread_stack__event(struct thread *thread, int cpu, u32 flags, u64 from_ip, ts->trace_nr = trace_nr; } - /* Stop here if thread_stack__process() is in use */ - if (ts->crp) + if (br_stack_sz) + thread_stack__update_br_stack(ts, flags, from_ip, to_ip); + + /* + * Stop here if thread_stack__process() is in use, or not recording call + * stack. + */ + if (ts->crp || !callstack) return 0; if (flags & PERF_IP_FLAG_CALL) { @@ -430,6 +488,7 @@ static void __thread_stack__free(struct thread *thread, struct thread_stack *ts) { __thread_stack__flush(thread, ts); zfree(&ts->stack); + zfree(&ts->br_stack_rb); } static void thread_stack__reset(struct thread *thread, struct thread_stack *ts) @@ -497,6 +556,199 @@ void thread_stack__sample(struct thread *thread, int cpu, chain->nr = i; } +/* + * Hardware sample records, created some time after the event occurred, need to + * have subsequent addresses removed from the call chain. + */ +void thread_stack__sample_late(struct thread *thread, int cpu, + struct ip_callchain *chain, size_t sz, + u64 sample_ip, u64 kernel_start) +{ + struct thread_stack *ts = thread__stack(thread, cpu); + u64 sample_context = callchain_context(sample_ip, kernel_start); + u64 last_context, context, ip; + size_t nr = 0, j; + + if (sz < 2) { + chain->nr = 0; + return; + } + + if (!ts) + goto out; + + /* + * When tracing kernel space, kernel addresses occur at the top of the + * call chain after the event occurred but before tracing stopped. + * Skip them. + */ + for (j = 1; j <= ts->cnt; j++) { + ip = ts->stack[ts->cnt - j].ret_addr; + context = callchain_context(ip, kernel_start); + if (context == PERF_CONTEXT_USER || + (context == sample_context && ip == sample_ip)) + break; + } + + last_context = sample_ip; /* Use sample_ip as an invalid context */ + + for (; nr < sz && j <= ts->cnt; nr++, j++) { + ip = ts->stack[ts->cnt - j].ret_addr; + context = callchain_context(ip, kernel_start); + if (context != last_context) { + if (nr >= sz - 1) + break; + chain->ips[nr++] = context; + last_context = context; + } + chain->ips[nr] = ip; + } +out: + if (nr) { + chain->nr = nr; + } else { + chain->ips[0] = sample_context; + chain->ips[1] = sample_ip; + chain->nr = 2; + } +} + +void thread_stack__br_sample(struct thread *thread, int cpu, + struct branch_stack *dst, unsigned int sz) +{ + struct thread_stack *ts = thread__stack(thread, cpu); + const size_t bsz = sizeof(struct branch_entry); + struct branch_stack *src; + struct branch_entry *be; + unsigned int nr; + + dst->nr = 0; + + if (!ts) + return; + + src = ts->br_stack_rb; + if (!src->nr) + return; + + dst->nr = min((unsigned int)src->nr, sz); + + be = &dst->entries[0]; + nr = min(ts->br_stack_sz - ts->br_stack_pos, (unsigned int)dst->nr); + memcpy(be, &src->entries[ts->br_stack_pos], bsz * nr); + + if (src->nr >= ts->br_stack_sz) { + sz -= nr; + be = &dst->entries[nr]; + nr = min(ts->br_stack_pos, sz); + memcpy(be, &src->entries[0], bsz * ts->br_stack_pos); + } +} + +/* Start of user space branch entries */ +static bool us_start(struct branch_entry *be, u64 kernel_start, bool *start) +{ + if (!*start) + *start = be->to && be->to < kernel_start; + + return *start; +} + +/* + * Start of branch entries after the ip fell in between 2 branches, or user + * space branch entries. + */ +static bool ks_start(struct branch_entry *be, u64 sample_ip, u64 kernel_start, + bool *start, struct branch_entry *nb) +{ + if (!*start) { + *start = (nb && sample_ip >= be->to && sample_ip <= nb->from) || + be->from < kernel_start || + (be->to && be->to < kernel_start); + } + + return *start; +} + +/* + * Hardware sample records, created some time after the event occurred, need to + * have subsequent addresses removed from the branch stack. + */ +void thread_stack__br_sample_late(struct thread *thread, int cpu, + struct branch_stack *dst, unsigned int sz, + u64 ip, u64 kernel_start) +{ + struct thread_stack *ts = thread__stack(thread, cpu); + struct branch_entry *d, *s, *spos, *ssz; + struct branch_stack *src; + unsigned int nr = 0; + bool start = false; + + dst->nr = 0; + + if (!ts) + return; + + src = ts->br_stack_rb; + if (!src->nr) + return; + + spos = &src->entries[ts->br_stack_pos]; + ssz = &src->entries[ts->br_stack_sz]; + + d = &dst->entries[0]; + s = spos; + + if (ip < kernel_start) { + /* + * User space sample: start copying branch entries when the + * branch is in user space. + */ + for (s = spos; s < ssz && nr < sz; s++) { + if (us_start(s, kernel_start, &start)) { + *d++ = *s; + nr += 1; + } + } + + if (src->nr >= ts->br_stack_sz) { + for (s = &src->entries[0]; s < spos && nr < sz; s++) { + if (us_start(s, kernel_start, &start)) { + *d++ = *s; + nr += 1; + } + } + } + } else { + struct branch_entry *nb = NULL; + + /* + * Kernel space sample: start copying branch entries when the ip + * falls in between 2 branches (or the branch is in user space + * because then the start must have been missed). + */ + for (s = spos; s < ssz && nr < sz; s++) { + if (ks_start(s, ip, kernel_start, &start, nb)) { + *d++ = *s; + nr += 1; + } + nb = s; + } + + if (src->nr >= ts->br_stack_sz) { + for (s = &src->entries[0]; s < spos && nr < sz; s++) { + if (ks_start(s, ip, kernel_start, &start, nb)) { + *d++ = *s; + nr += 1; + } + nb = s; + } + } + } + + dst->nr = nr; +} + struct call_return_processor * call_return_processor__new(int (*process)(struct call_return *cr, u64 *parent_db_id, void *data), void *data) @@ -864,7 +1116,7 @@ int thread_stack__process(struct thread *thread, struct comm *comm, } if (!ts) { - ts = thread_stack__new(thread, sample->cpu, crp); + ts = thread_stack__new(thread, sample->cpu, crp, true, 0); if (!ts) return -ENOMEM; ts->comm = comm; diff --git a/tools/perf/util/thread-stack.h b/tools/perf/util/thread-stack.h index e1ec5a58f1b2..3bc47a42af8e 100644 --- a/tools/perf/util/thread-stack.h +++ b/tools/perf/util/thread-stack.h @@ -81,10 +81,19 @@ struct call_return_processor { }; int thread_stack__event(struct thread *thread, int cpu, u32 flags, u64 from_ip, - u64 to_ip, u16 insn_len, u64 trace_nr); + u64 to_ip, u16 insn_len, u64 trace_nr, bool callstack, + unsigned int br_stack_sz, bool mispred_all); void thread_stack__set_trace_nr(struct thread *thread, int cpu, u64 trace_nr); void thread_stack__sample(struct thread *thread, int cpu, struct ip_callchain *chain, size_t sz, u64 ip, u64 kernel_start); +void thread_stack__sample_late(struct thread *thread, int cpu, + struct ip_callchain *chain, size_t sz, u64 ip, + u64 kernel_start); +void thread_stack__br_sample(struct thread *thread, int cpu, + struct branch_stack *dst, unsigned int sz); +void thread_stack__br_sample_late(struct thread *thread, int cpu, + struct branch_stack *dst, unsigned int sz, + u64 sample_ip, u64 kernel_start); int thread_stack__flush(struct thread *thread); void thread_stack__free(struct thread *thread); size_t thread_stack__depth(struct thread *thread, int cpu); diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index 28b719388028..665e5c0618ed 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -47,6 +47,7 @@ struct thread *thread__new(pid_t pid, pid_t tid) thread->tid = tid; thread->ppid = -1; thread->cpu = -1; + thread->lbr_stitch_enable = false; INIT_LIST_HEAD(&thread->namespaces_list); INIT_LIST_HEAD(&thread->comm_list); init_rwsem(&thread->namespaces_lock); @@ -110,6 +111,7 @@ void thread__delete(struct thread *thread) exit_rwsem(&thread->namespaces_lock); exit_rwsem(&thread->comm_lock); + thread__free_stitch_list(thread); free(thread); } @@ -452,3 +454,25 @@ int thread__memcpy(struct thread *thread, struct machine *machine, return dso__data_read_offset(al.map->dso, machine, offset, buf, len); } + +void thread__free_stitch_list(struct thread *thread) +{ + struct lbr_stitch *lbr_stitch = thread->lbr_stitch; + struct stitch_list *pos, *tmp; + + if (!lbr_stitch) + return; + + list_for_each_entry_safe(pos, tmp, &lbr_stitch->lists, node) { + list_del_init(&pos->node); + free(pos); + } + + list_for_each_entry_safe(pos, tmp, &lbr_stitch->free_lists, node) { + list_del_init(&pos->node); + free(pos); + } + + zfree(&lbr_stitch->prev_lbr_cursor); + zfree(&thread->lbr_stitch); +} diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index 20b96b5d1f15..b066fb30d203 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -13,6 +13,8 @@ #include <strlist.h> #include <intlist.h> #include "rwsem.h" +#include "event.h" +#include "callchain.h" struct addr_location; struct map; @@ -20,6 +22,13 @@ struct perf_record_namespaces; struct thread_stack; struct unwind_libunwind_ops; +struct lbr_stitch { + struct list_head lists; + struct list_head free_lists; + struct perf_sample prev_sample; + struct callchain_cursor_node *prev_lbr_cursor; +}; + struct thread { union { struct rb_node rb_node; @@ -46,6 +55,10 @@ struct thread { struct srccode_state srccode_state; bool filter; int filter_entry_depth; + + /* LBR call stack stitch */ + bool lbr_stitch_enable; + struct lbr_stitch *lbr_stitch; }; struct machine; @@ -142,4 +155,6 @@ static inline bool thread__is_filtered(struct thread *thread) return false; } +void thread__free_stitch_list(struct thread *thread); + #endif /* __PERF_THREAD_H */ diff --git a/tools/perf/util/top.c b/tools/perf/util/top.c index 3dce2de9d005..27945eeb0cb5 100644 --- a/tools/perf/util/top.c +++ b/tools/perf/util/top.c @@ -77,7 +77,7 @@ size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size) opts->freq ? "Hz" : ""); } - ret += SNPRINTF(bf + ret, size - ret, "%s", perf_evsel__name(top->sym_evsel)); + ret += SNPRINTF(bf + ret, size - ret, "%s", evsel__name(top->sym_evsel)); ret += SNPRINTF(bf + ret, size - ret, "], "); diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h index f117d4f4821e..ff8391208ecd 100644 --- a/tools/perf/util/top.h +++ b/tools/perf/util/top.h @@ -18,7 +18,7 @@ struct perf_session; struct perf_top { struct perf_tool tool; - struct evlist *evlist; + struct evlist *evlist, *sb_evlist; struct record_opts record_opts; struct annotation_options annotation_opts; struct evswitch evswitch; @@ -36,6 +36,7 @@ struct perf_top { bool use_tui, use_stdio; bool vmlinux_warned; bool dump_symtab; + bool stitch_lbr; struct hist_entry *sym_filter_entry; struct evsel *sym_evsel; struct perf_session *session; diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c index 8593d3c200c6..f507dff713c9 100644 --- a/tools/perf/util/trace-event-read.c +++ b/tools/perf/util/trace-event-read.c @@ -75,7 +75,7 @@ static void skip(int size) r = size > BUFSIZ ? BUFSIZ : size; do_read(buf, r); size -= r; - }; + } } static unsigned int read4(struct tep_handle *pevent) diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index d707c9624dd9..37a9492edb3e 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -290,6 +290,7 @@ int perf_event_paranoid(void) bool perf_event_paranoid_check(int max_level) { return perf_cap__capable(CAP_SYS_ADMIN) || + perf_cap__capable(CAP_PERFMON) || perf_event_paranoid() <= max_level; } |