summaryrefslogtreecommitdiff
path: root/tools/perf/util
diff options
context:
space:
mode:
Diffstat (limited to 'tools/perf/util')
-rw-r--r--tools/perf/util/Build39
-rw-r--r--tools/perf/util/addr_location.c1
-rw-r--r--tools/perf/util/addr_location.h6
-rw-r--r--tools/perf/util/annotate-data.c49
-rw-r--r--tools/perf/util/annotate-data.h13
-rw-r--r--tools/perf/util/annotate.c299
-rw-r--r--tools/perf/util/annotate.h30
-rw-r--r--tools/perf/util/arm-spe-decoder/arm-spe-decoder.c23
-rw-r--r--tools/perf/util/arm-spe-decoder/arm-spe-decoder.h20
-rw-r--r--tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c14
-rw-r--r--tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h12
-rw-r--r--tools/perf/util/arm-spe.c253
-rw-r--r--tools/perf/util/arm64-frame-pointer-unwind-support.c29
-rw-r--r--tools/perf/util/auxtrace.c82
-rw-r--r--tools/perf/util/auxtrace.h6
-rw-r--r--tools/perf/util/bpf-filter.l2
-rw-r--r--tools/perf/util/bpf_ftrace.c29
-rw-r--r--tools/perf/util/bpf_kwork.c2
-rw-r--r--tools/perf/util/bpf_kwork_top.c2
-rw-r--r--tools/perf/util/bpf_lock_contention.c227
-rw-r--r--tools/perf/util/bpf_off_cpu.c5
-rw-r--r--tools/perf/util/bpf_skel/func_latency.bpf.c46
-rw-r--r--tools/perf/util/bpf_skel/kwork_top.bpf.c4
-rw-r--r--tools/perf/util/bpf_skel/kwork_trace.bpf.c2
-rw-r--r--tools/perf/util/bpf_skel/lock_contention.bpf.c340
-rw-r--r--tools/perf/util/bpf_skel/lock_data.h22
-rw-r--r--tools/perf/util/bpf_skel/vmlinux/vmlinux.h8
-rw-r--r--tools/perf/util/branch.h3
-rw-r--r--tools/perf/util/btf.c27
-rw-r--r--tools/perf/util/btf.h10
-rw-r--r--tools/perf/util/callchain.c10
-rw-r--r--tools/perf/util/cgroup.c2
-rw-r--r--tools/perf/util/color.h5
-rw-r--r--tools/perf/util/color_config.c11
-rw-r--r--tools/perf/util/comm.c2
-rw-r--r--tools/perf/util/compress.h20
-rw-r--r--tools/perf/util/config.c33
-rw-r--r--tools/perf/util/config.h2
-rw-r--r--tools/perf/util/cpumap.c74
-rw-r--r--tools/perf/util/cs-etm.c31
-rw-r--r--tools/perf/util/data-convert-bt.c10
-rw-r--r--tools/perf/util/data-convert-json.c8
-rw-r--r--tools/perf/util/data.c20
-rw-r--r--tools/perf/util/data.h1
-rw-r--r--tools/perf/util/debug.c2
-rw-r--r--tools/perf/util/debuginfo.c6
-rw-r--r--tools/perf/util/disasm.c32
-rw-r--r--tools/perf/util/dlfilter.c3
-rw-r--r--tools/perf/util/dso.c166
-rw-r--r--tools/perf/util/dso.h82
-rw-r--r--tools/perf/util/env.c19
-rw-r--r--tools/perf/util/env.h2
-rw-r--r--tools/perf/util/event.c11
-rw-r--r--tools/perf/util/event.h12
-rw-r--r--tools/perf/util/events_stats.h2
-rw-r--r--tools/perf/util/evlist.c32
-rw-r--r--tools/perf/util/evlist.h1
-rw-r--r--tools/perf/util/evsel.c381
-rw-r--r--tools/perf/util/evsel.h15
-rw-r--r--tools/perf/util/evsel_config.h1
-rw-r--r--tools/perf/util/evsel_fprintf.c4
-rw-r--r--tools/perf/util/expr.c2
-rw-r--r--tools/perf/util/ftrace.h11
-rwxr-xr-xtools/perf/util/generate-cmdlist.sh4
-rw-r--r--tools/perf/util/header.c33
-rw-r--r--tools/perf/util/hist.c222
-rw-r--r--tools/perf/util/hist.h46
-rw-r--r--tools/perf/util/hwmon_pmu.c56
-rw-r--r--tools/perf/util/hwmon_pmu.h16
-rw-r--r--tools/perf/util/intel-bts.c4
-rw-r--r--tools/perf/util/intel-pt-decoder/Build18
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c3
-rw-r--r--tools/perf/util/intel-pt.c341
-rw-r--r--tools/perf/util/intel-tpebs.c2
-rw-r--r--tools/perf/util/jitdump.c25
-rw-r--r--tools/perf/util/kvm-stat.c70
-rw-r--r--tools/perf/util/kvm-stat.h3
-rw-r--r--tools/perf/util/kwork.h7
-rw-r--r--tools/perf/util/llvm-c-helpers.cpp1
-rw-r--r--tools/perf/util/lock-contention.c143
-rw-r--r--tools/perf/util/lock-contention.h27
-rw-r--r--tools/perf/util/lzma.c29
-rw-r--r--tools/perf/util/machine.c96
-rw-r--r--tools/perf/util/machine.h8
-rw-r--r--tools/perf/util/maps.c58
-rw-r--r--tools/perf/util/mem-events.c77
-rw-r--r--tools/perf/util/mem-events.h3
-rw-r--r--tools/perf/util/mmap.c15
-rw-r--r--tools/perf/util/mmap.h3
-rw-r--r--tools/perf/util/mutex.h8
-rw-r--r--tools/perf/util/parse-events.c205
-rw-r--r--tools/perf/util/parse-events.h1
-rw-r--r--tools/perf/util/parse-events.l52
-rw-r--r--tools/perf/util/path.c8
-rw-r--r--tools/perf/util/path.h2
-rw-r--r--tools/perf/util/perf_event_attr_fprintf.c131
-rw-r--r--tools/perf/util/pmu.c349
-rw-r--r--tools/perf/util/pmu.h12
-rw-r--r--tools/perf/util/pmus.c202
-rw-r--r--tools/perf/util/pmus.h1
-rw-r--r--tools/perf/util/print-events.c1
-rw-r--r--tools/perf/util/probe-event.c50
-rw-r--r--tools/perf/util/probe-event.h1
-rw-r--r--tools/perf/util/probe-finder.c36
-rw-r--r--tools/perf/util/probe-finder.h6
-rw-r--r--tools/perf/util/pstack.c14
-rw-r--r--tools/perf/util/pstack.h1
-rw-r--r--tools/perf/util/python.c497
-rw-r--r--tools/perf/util/rb_resort.h146
-rw-r--r--tools/perf/util/s390-cpumsf.c6
-rw-r--r--tools/perf/util/sample.c43
-rw-r--r--tools/perf/util/sample.h11
-rw-r--r--tools/perf/util/scripting-engines/trace-event-perl.c3
-rw-r--r--tools/perf/util/scripting-engines/trace-event-python.c95
-rw-r--r--tools/perf/util/session.c107
-rw-r--r--tools/perf/util/session.h1
-rw-r--r--tools/perf/util/setup.py10
-rw-r--r--tools/perf/util/sort.c185
-rw-r--r--tools/perf/util/sort.h3
-rw-r--r--tools/perf/util/stat-display.c202
-rw-r--r--tools/perf/util/stat-shadow.c8
-rw-r--r--tools/perf/util/stat.c13
-rw-r--r--tools/perf/util/stat.h3
-rw-r--r--tools/perf/util/stream.c7
-rw-r--r--tools/perf/util/stream.h10
-rw-r--r--tools/perf/util/string.c15
-rw-r--r--tools/perf/util/svghelper.c1
-rw-r--r--tools/perf/util/symbol-elf.c139
-rw-r--r--tools/perf/util/symbol-minimal.c160
-rw-r--r--tools/perf/util/symbol.c36
-rw-r--r--tools/perf/util/symbol_conf.h8
-rw-r--r--tools/perf/util/synthetic-events.c60
-rw-r--r--tools/perf/util/syscalltbl.c226
-rw-r--r--tools/perf/util/syscalltbl.h23
-rw-r--r--tools/perf/util/thread.c88
-rw-r--r--tools/perf/util/thread.h16
-rw-r--r--tools/perf/util/tool_pmu.c43
-rw-r--r--tools/perf/util/tool_pmu.h2
-rw-r--r--tools/perf/util/trace-event-parse.c2
-rw-r--r--tools/perf/util/trace-event-scripting.c241
-rw-r--r--tools/perf/util/trace-event.h9
-rw-r--r--tools/perf/util/units.c2
-rw-r--r--tools/perf/util/unwind-libdw.c9
-rw-r--r--tools/perf/util/unwind-libunwind-local.c28
-rw-r--r--tools/perf/util/values.c106
-rw-r--r--tools/perf/util/values.h9
146 files changed, 5323 insertions, 2258 deletions
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index c06d2ee9024c..946bce6628f3 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -67,6 +67,7 @@ perf-util-y += maps.o
perf-util-y += pstack.o
perf-util-y += session.o
perf-util-y += tool.o
+perf-util-y += sample.o
perf-util-y += sample-raw.o
perf-util-y += s390-sample-raw.o
perf-util-y += amd-sample-raw.o
@@ -86,7 +87,7 @@ perf-util-y += pmu-bison.o
perf-util-y += hwmon_pmu.o
perf-util-y += tool_pmu.o
perf-util-y += svghelper.o
-perf-util-$(CONFIG_LIBTRACEEVENT) += trace-event-info.o
+perf-util-y += trace-event-info.o
perf-util-y += trace-event-scripting.o
perf-util-$(CONFIG_LIBTRACEEVENT) += trace-event.o
perf-util-$(CONFIG_LIBTRACEEVENT) += trace-event-parse.o
@@ -121,8 +122,10 @@ perf-util-y += spark.o
perf-util-y += topdown.o
perf-util-y += iostat.o
perf-util-y += stream.o
+perf-util-y += kvm-stat.o
+perf-util-y += lock-contention.o
perf-util-$(CONFIG_AUXTRACE) += auxtrace.o
-perf-util-$(CONFIG_AUXTRACE) += intel-pt-decoder/
+perf-util-y += intel-pt-decoder/
perf-util-$(CONFIG_AUXTRACE) += intel-pt.o
perf-util-$(CONFIG_AUXTRACE) += intel-bts.o
perf-util-$(CONFIG_AUXTRACE) += arm-spe.o
@@ -168,6 +171,7 @@ perf-util-$(CONFIG_PERF_BPF_SKEL) += bpf_off_cpu.o
perf-util-$(CONFIG_PERF_BPF_SKEL) += bpf-filter.o
perf-util-$(CONFIG_PERF_BPF_SKEL) += bpf-filter-flex.o
perf-util-$(CONFIG_PERF_BPF_SKEL) += bpf-filter-bison.o
+perf-util-$(CONFIG_PERF_BPF_SKEL) += btf.o
ifeq ($(CONFIG_LIBTRACEEVENT),y)
perf-util-$(CONFIG_PERF_BPF_SKEL) += bpf_lock_contention.o
@@ -402,14 +406,39 @@ $(OUTPUT)util/list_sort.o: ../lib/list_sort.c FORCE
ifdef SHELLCHECK
SHELL_TESTS := generate-cmdlist.sh
- TEST_LOGS := $(SHELL_TESTS:%=%.shellcheck_log)
+ SHELL_TEST_LOGS := $(SHELL_TESTS:%=%.shellcheck_log)
else
SHELL_TESTS :=
- TEST_LOGS :=
+ SHELL_TEST_LOGS :=
endif
$(OUTPUT)%.shellcheck_log: %
$(call rule_mkdir)
$(Q)$(call echo-cmd,test)shellcheck -a -S warning "$<" > $@ || (cat $@ && rm $@ && false)
-perf-util-y += $(TEST_LOGS)
+perf-util-y += $(SHELL_TEST_LOGS)
+
+PY_TESTS := setup.py
+ifdef MYPY
+ MYPY_TEST_LOGS := $(PY_TESTS:%=%.mypy_log)
+else
+ MYPY_TEST_LOGS :=
+endif
+
+$(OUTPUT)%.mypy_log: %
+ $(call rule_mkdir)
+ $(Q)$(call echo-cmd,test)mypy "$<" > $@ || (cat $@ && rm $@ && false)
+
+perf-util-y += $(MYPY_TEST_LOGS)
+
+ifdef PYLINT
+ PYLINT_TEST_LOGS := $(PY_TESTS:%=%.pylint_log)
+else
+ PYLINT_TEST_LOGS :=
+endif
+
+$(OUTPUT)%.pylint_log: %
+ $(call rule_mkdir)
+ $(Q)$(call echo-cmd,test)pylint "$<" > $@ || (cat $@ && rm $@ && false)
+
+perf-util-y += $(PYLINT_TEST_LOGS)
diff --git a/tools/perf/util/addr_location.c b/tools/perf/util/addr_location.c
index 51825ef8c0ab..007a2f5df9a6 100644
--- a/tools/perf/util/addr_location.c
+++ b/tools/perf/util/addr_location.c
@@ -17,6 +17,7 @@ void addr_location__init(struct addr_location *al)
al->cpumode = 0;
al->cpu = 0;
al->socket = 0;
+ al->parallelism = 1;
}
/*
diff --git a/tools/perf/util/addr_location.h b/tools/perf/util/addr_location.h
index d8ac0428dff2..64b551025216 100644
--- a/tools/perf/util/addr_location.h
+++ b/tools/perf/util/addr_location.h
@@ -17,10 +17,14 @@ struct addr_location {
const char *srcline;
u64 addr;
char level;
- u8 filtered;
u8 cpumode;
+ u16 filtered;
s32 cpu;
s32 socket;
+ /* Same as machine.parallelism but within [1, nr_cpus]. */
+ int parallelism;
+ /* See he_stat.latency. */
+ u64 latency;
};
void addr_location__init(struct addr_location *al);
diff --git a/tools/perf/util/annotate-data.c b/tools/perf/util/annotate-data.c
index 976abedca09e..1ef2edbc71d9 100644
--- a/tools/perf/util/annotate-data.c
+++ b/tools/perf/util/annotate-data.c
@@ -314,6 +314,40 @@ static void delete_members(struct annotated_member *member)
}
}
+static int fill_member_name(char *buf, size_t sz, struct annotated_member *m,
+ int offset, bool first)
+{
+ struct annotated_member *child;
+
+ if (list_empty(&m->children))
+ return 0;
+
+ list_for_each_entry(child, &m->children, node) {
+ int len;
+
+ if (offset < child->offset || offset >= child->offset + child->size)
+ continue;
+
+ /* It can have anonymous struct/union members */
+ if (child->var_name) {
+ len = scnprintf(buf, sz, "%s%s",
+ first ? "" : ".", child->var_name);
+ first = false;
+ } else {
+ len = 0;
+ }
+
+ return fill_member_name(buf + len, sz - len, child, offset, first) + len;
+ }
+ return 0;
+}
+
+int annotated_data_type__get_member_name(struct annotated_data_type *adt,
+ char *buf, size_t sz, int member_offset)
+{
+ return fill_member_name(buf, sz, &adt->self, member_offset, /*first=*/true);
+}
+
static struct annotated_data_type *dso__findnew_data_type(struct dso *dso,
Dwarf_Die *type_die)
{
@@ -830,7 +864,7 @@ static void update_var_state(struct type_state *state, struct data_loc_info *dlo
if (!dwarf_offdie(dloc->di->dbg, var->die_off, &mem_die))
continue;
- if (var->reg == DWARF_REG_FB || var->reg == fbreg) {
+ if (var->reg == DWARF_REG_FB || var->reg == fbreg || var->reg == state->stack_reg) {
int offset = var->offset;
struct type_state_stack *stack;
@@ -845,8 +879,13 @@ static void update_var_state(struct type_state *state, struct data_loc_info *dlo
findnew_stack_state(state, offset, TSR_KIND_TYPE,
&mem_die);
- pr_debug_dtp("var [%"PRIx64"] -%#x(stack)",
- insn_offset, -offset);
+ if (var->reg == state->stack_reg) {
+ pr_debug_dtp("var [%"PRIx64"] %#x(reg%d)",
+ insn_offset, offset, state->stack_reg);
+ } else {
+ pr_debug_dtp("var [%"PRIx64"] -%#x(stack)",
+ insn_offset, -offset);
+ }
pr_debug_type_name(&mem_die, TSR_KIND_TYPE);
} else if (has_reg_type(state, var->reg) && var->offset == 0) {
struct type_state_reg *reg;
@@ -1127,10 +1166,10 @@ again:
}
check_non_register:
- if (reg == dloc->fbreg) {
+ if (reg == dloc->fbreg || reg == state->stack_reg) {
struct type_state_stack *stack;
- pr_debug_dtp("fbreg");
+ pr_debug_dtp("%s", reg == dloc->fbreg ? "fbreg" : "stack");
stack = find_stack_state(state, dloc->type_offset);
if (stack == NULL) {
diff --git a/tools/perf/util/annotate-data.h b/tools/perf/util/annotate-data.h
index 98c80b2268dd..541fee1a5f0a 100644
--- a/tools/perf/util/annotate-data.h
+++ b/tools/perf/util/annotate-data.h
@@ -227,8 +227,13 @@ void annotated_data_type__tree_delete(struct rb_root *root);
/* Release all global variable information in the tree */
void global_var_type__tree_delete(struct rb_root *root);
+/* Print data type annotation (including members) on stdout */
int hist_entry__annotate_data_tty(struct hist_entry *he, struct evsel *evsel);
+/* Get name of member field at the given offset in the data type */
+int annotated_data_type__get_member_name(struct annotated_data_type *adt,
+ char *buf, size_t sz, int member_offset);
+
bool has_reg_type(struct type_state *state, int reg);
struct type_state_stack *findnew_stack_state(struct type_state *state,
int offset, u8 kind,
@@ -276,6 +281,14 @@ static inline int hist_entry__annotate_data_tty(struct hist_entry *he __maybe_un
return -1;
}
+static inline int annotated_data_type__get_member_name(struct annotated_data_type *adt __maybe_unused,
+ char *buf __maybe_unused,
+ size_t sz __maybe_unused,
+ int member_offset __maybe_unused)
+{
+ return -1;
+}
+
#endif /* HAVE_LIBDW_SUPPORT */
#ifdef HAVE_SLANG_SUPPORT
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 31dce9b87bff..1e59b9e5339d 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -87,6 +87,8 @@ struct annotated_data_type canary_type = {
},
};
+#define NO_TYPE ((struct annotated_data_type *)-1UL)
+
/* symbol histogram: key = offset << 16 | evsel->core.idx */
static size_t sym_hist_hash(long key, void *ctx __maybe_unused)
{
@@ -209,7 +211,7 @@ static int __symbol__account_cycles(struct cyc_hist *ch,
}
static int __symbol__inc_addr_samples(struct map_symbol *ms,
- struct annotated_source *src, int evidx, u64 addr,
+ struct annotated_source *src, struct evsel *evsel, u64 addr,
struct perf_sample *sample)
{
struct symbol *sym = ms->sym;
@@ -228,14 +230,14 @@ static int __symbol__inc_addr_samples(struct map_symbol *ms,
}
offset = addr - sym->start;
- h = annotated_source__histogram(src, evidx);
+ h = annotated_source__histogram(src, evsel);
if (h == NULL) {
pr_debug("%s(%d): ENOMEM! sym->name=%s, start=%#" PRIx64 ", addr=%#" PRIx64 ", end=%#" PRIx64 ", func: %d\n",
__func__, __LINE__, sym->name, sym->start, addr, sym->end, sym->type == STT_FUNC);
return -ENOMEM;
}
- hash_key = offset << 16 | evidx;
+ hash_key = offset << 16 | evsel->core.idx;
if (!hashmap__find(src->samples, hash_key, &entry)) {
entry = zalloc(sizeof(*entry));
if (entry == NULL)
@@ -252,7 +254,7 @@ static int __symbol__inc_addr_samples(struct map_symbol *ms,
pr_debug3("%#" PRIx64 " %s: period++ [addr: %#" PRIx64 ", %#" PRIx64
", evidx=%d] => nr_samples: %" PRIu64 ", period: %" PRIu64 "\n",
- sym->start, sym->name, addr, addr - sym->start, evidx,
+ sym->start, sym->name, addr, addr - sym->start, evsel->core.idx,
entry->nr_samples, entry->period);
return 0;
}
@@ -323,7 +325,7 @@ static int symbol__inc_addr_samples(struct map_symbol *ms,
if (sym == NULL)
return 0;
src = symbol__hists(sym, evsel->evlist->core.nr_entries);
- return src ? __symbol__inc_addr_samples(ms, src, evsel->core.idx, addr, sample) : 0;
+ return src ? __symbol__inc_addr_samples(ms, src, evsel, addr, sample) : 0;
}
static int symbol__account_br_cntr(struct annotated_branch *branch,
@@ -758,15 +760,31 @@ static int disasm_line__print(struct disasm_line *dl, u64 start, int addr_fmt_wi
return 0;
}
+static struct annotated_data_type *
+__hist_entry__get_data_type(struct hist_entry *he, struct arch *arch,
+ struct debuginfo *dbg, struct disasm_line *dl,
+ int *type_offset);
+
+struct annotation_print_data {
+ struct hist_entry *he;
+ struct evsel *evsel;
+ struct arch *arch;
+ struct debuginfo *dbg;
+ u64 start;
+ int addr_fmt_width;
+};
+
static int
-annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start,
- struct evsel *evsel, u64 len, int min_pcnt, int printed,
- int max_lines, struct annotation_line *queue, int addr_fmt_width,
- int percent_type)
+annotation_line__print(struct annotation_line *al, struct annotation_print_data *apd,
+ struct annotation_options *opts, int printed,
+ struct annotation_line *queue)
{
+ struct symbol *sym = apd->he->ms.sym;
struct disasm_line *dl = container_of(al, struct disasm_line, al);
struct annotation *notes = symbol__annotation(sym);
static const char *prev_line;
+ int max_lines = opts->max_lines;
+ int percent_type = opts->percent_type;
if (al->offset != -1) {
double max_percent = 0.0;
@@ -786,19 +804,23 @@ annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start
if (al->data_nr > nr_percent)
nr_percent = al->data_nr;
- if (max_percent < min_pcnt)
+ if (max_percent < opts->min_pcnt)
return -1;
if (max_lines && printed >= max_lines)
return 1;
if (queue != NULL) {
+ struct annotation_options queue_opts = {
+ .max_lines = 1,
+ .percent_type = percent_type,
+ };
+
list_for_each_entry_from(queue, &notes->src->source, node) {
if (queue == al)
break;
- annotation_line__print(queue, sym, start, evsel, len,
- 0, 0, 1, NULL, addr_fmt_width,
- percent_type);
+ annotation_line__print(queue, apd, &queue_opts,
+ /*printed=*/0, /*queue=*/NULL);
}
}
@@ -823,7 +845,31 @@ annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start
printf(" : ");
- disasm_line__print(dl, start, addr_fmt_width);
+ disasm_line__print(dl, apd->start, apd->addr_fmt_width);
+
+ if (opts->code_with_type && apd->dbg) {
+ struct annotated_data_type *data_type;
+ int offset = 0;
+
+ data_type = __hist_entry__get_data_type(apd->he, apd->arch,
+ apd->dbg, dl, &offset);
+ if (data_type && data_type != NO_TYPE) {
+ char buf[4096];
+
+ printf("\t\t# data-type: %s",
+ data_type->self.type_name);
+
+ if (data_type != &stackop_type &&
+ data_type != &canary_type)
+ printf(" +%#x", offset);
+
+ if (annotated_data_type__get_member_name(data_type,
+ buf,
+ sizeof(buf),
+ offset))
+ printf(" (%s)", buf);
+ }
+ }
/*
* Also color the filename and line if needed, with
@@ -849,7 +895,8 @@ annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start
if (!*al->line)
printf(" %*s:\n", width, " ");
else
- printf(" %*s: %-*d %s\n", width, " ", addr_fmt_width, al->line_nr, al->line);
+ printf(" %*s: %-*d %s\n", width, " ", apd->addr_fmt_width,
+ al->line_nr, al->line);
}
return 0;
@@ -861,15 +908,14 @@ static void calc_percent(struct annotation *notes,
s64 offset, s64 end)
{
struct hists *hists = evsel__hists(evsel);
- int evidx = evsel->core.idx;
- struct sym_hist *sym_hist = annotation__histogram(notes, evidx);
+ struct sym_hist *sym_hist = annotation__histogram(notes, evsel);
unsigned int hits = 0;
u64 period = 0;
while (offset < end) {
struct sym_hist_entry *entry;
- entry = annotated_source__hist_entry(notes->src, evidx, offset);
+ entry = annotated_source__hist_entry(notes->src, evsel, offset);
if (entry) {
hits += entry->nr_samples;
period += entry->period;
@@ -1140,15 +1186,14 @@ static void print_summary(struct rb_root *root, const char *filename)
static void symbol__annotate_hits(struct symbol *sym, struct evsel *evsel)
{
- int evidx = evsel->core.idx;
struct annotation *notes = symbol__annotation(sym);
- struct sym_hist *h = annotation__histogram(notes, evidx);
+ struct sym_hist *h = annotation__histogram(notes, evsel);
u64 len = symbol__size(sym), offset;
for (offset = 0; offset < len; ++offset) {
struct sym_hist_entry *entry;
- entry = annotated_source__hist_entry(notes->src, evidx, offset);
+ entry = annotated_source__hist_entry(notes->src, evsel, offset);
if (entry && entry->nr_samples != 0)
printf("%*" PRIx64 ": %" PRIu64 "\n", BITS_PER_LONG / 2,
sym->start + offset, entry->nr_samples);
@@ -1169,8 +1214,9 @@ static int annotated_source__addr_fmt_width(struct list_head *lines, u64 start)
return 0;
}
-int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel)
+int hist_entry__annotate_printf(struct hist_entry *he, struct evsel *evsel)
{
+ struct map_symbol *ms = &he->ms;
struct map *map = ms->map;
struct symbol *sym = ms->sym;
struct dso *dso = map__dso(map);
@@ -1178,14 +1224,17 @@ int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel)
const char *d_filename;
const char *evsel_name = evsel__name(evsel);
struct annotation *notes = symbol__annotation(sym);
- struct sym_hist *h = annotation__histogram(notes, evsel->core.idx);
+ struct sym_hist *h = annotation__histogram(notes, evsel);
struct annotation_line *pos, *queue = NULL;
struct annotation_options *opts = &annotate_opts;
- u64 start = map__rip_2objdump(map, sym->start);
- int printed = 2, queue_len = 0, addr_fmt_width;
+ struct annotation_print_data apd = {
+ .he = he,
+ .evsel = evsel,
+ .start = map__rip_2objdump(map, sym->start),
+ };
+ int printed = 2, queue_len = 0;
int more = 0;
bool context = opts->context;
- u64 len;
int width = annotation__pcnt_width(notes);
int graph_dotted_len;
char buf[512];
@@ -1199,8 +1248,6 @@ int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel)
else
d_filename = basename(filename);
- len = symbol__size(sym);
-
if (evsel__is_group_event(evsel)) {
evsel__group_desc(evsel, buf, sizeof(buf));
evsel_name = buf;
@@ -1219,7 +1266,10 @@ int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel)
if (verbose > 0)
symbol__annotate_hits(sym, evsel);
- addr_fmt_width = annotated_source__addr_fmt_width(&notes->src->source, start);
+ apd.addr_fmt_width = annotated_source__addr_fmt_width(&notes->src->source,
+ apd.start);
+ evsel__get_arch(evsel, &apd.arch);
+ apd.dbg = debuginfo__new(filename);
list_for_each_entry(pos, &notes->src->source, node) {
int err;
@@ -1229,9 +1279,7 @@ int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel)
queue_len = 0;
}
- err = annotation_line__print(pos, sym, start, evsel, len,
- opts->min_pcnt, printed, opts->max_lines,
- queue, addr_fmt_width, opts->percent_type);
+ err = annotation_line__print(pos, &apd, opts, printed, queue);
switch (err) {
case 0:
@@ -1262,6 +1310,7 @@ int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel)
}
}
+ debuginfo__delete(apd.dbg);
free(filename);
return more;
@@ -1364,18 +1413,18 @@ out_free_filename:
return err;
}
-void symbol__annotate_zero_histogram(struct symbol *sym, int evidx)
+void symbol__annotate_zero_histogram(struct symbol *sym, struct evsel *evsel)
{
struct annotation *notes = symbol__annotation(sym);
- struct sym_hist *h = annotation__histogram(notes, evidx);
+ struct sym_hist *h = annotation__histogram(notes, evsel);
memset(h, 0, sizeof(*notes->src->histograms) * notes->src->nr_histograms);
}
-void symbol__annotate_decay_histogram(struct symbol *sym, int evidx)
+void symbol__annotate_decay_histogram(struct symbol *sym, struct evsel *evsel)
{
struct annotation *notes = symbol__annotation(sym);
- struct sym_hist *h = annotation__histogram(notes, evidx);
+ struct sym_hist *h = annotation__histogram(notes, evsel);
struct annotation_line *al;
h->nr_samples = 0;
@@ -1385,7 +1434,7 @@ void symbol__annotate_decay_histogram(struct symbol *sym, int evidx)
if (al->offset == -1)
continue;
- entry = annotated_source__hist_entry(notes->src, evidx, al->offset);
+ entry = annotated_source__hist_entry(notes->src, evsel, al->offset);
if (entry == NULL)
continue;
@@ -1599,8 +1648,9 @@ static void symbol__calc_lines(struct map_symbol *ms, struct rb_root *root)
annotation__calc_lines(notes, ms, root);
}
-int symbol__tty_annotate2(struct map_symbol *ms, struct evsel *evsel)
+int hist_entry__tty_annotate2(struct hist_entry *he, struct evsel *evsel)
{
+ struct map_symbol *ms = &he->ms;
struct dso *dso = map__dso(ms->map);
struct symbol *sym = ms->sym;
struct rb_root source_line = RB_ROOT;
@@ -1634,8 +1684,9 @@ int symbol__tty_annotate2(struct map_symbol *ms, struct evsel *evsel)
return 0;
}
-int symbol__tty_annotate(struct map_symbol *ms, struct evsel *evsel)
+int hist_entry__tty_annotate(struct hist_entry *he, struct evsel *evsel)
{
+ struct map_symbol *ms = &he->ms;
struct dso *dso = map__dso(ms->map);
struct symbol *sym = ms->sym;
struct rb_root source_line = RB_ROOT;
@@ -1659,7 +1710,7 @@ int symbol__tty_annotate(struct map_symbol *ms, struct evsel *evsel)
print_summary(&source_line, dso__long_name(dso));
}
- symbol__annotate_printf(ms, evsel);
+ hist_entry__annotate_printf(he, evsel);
annotated_source__purge(symbol__annotation(sym)->src);
@@ -2645,6 +2696,92 @@ void debuginfo_cache__delete(void)
di_cache.dbg = NULL;
}
+static struct annotated_data_type *
+__hist_entry__get_data_type(struct hist_entry *he, struct arch *arch,
+ struct debuginfo *dbg, struct disasm_line *dl,
+ int *type_offset)
+{
+ struct map_symbol *ms = &he->ms;
+ struct annotated_insn_loc loc;
+ struct annotated_op_loc *op_loc;
+ struct annotated_data_type *mem_type;
+ struct annotated_item_stat *istat;
+ int i;
+
+ istat = annotate_data_stat(&ann_insn_stat, dl->ins.name);
+ if (istat == NULL) {
+ ann_data_stat.no_insn++;
+ return NO_TYPE;
+ }
+
+ if (annotate_get_insn_location(arch, dl, &loc) < 0) {
+ ann_data_stat.no_insn_ops++;
+ istat->bad++;
+ return NO_TYPE;
+ }
+
+ if (is_stack_operation(arch, dl)) {
+ istat->good++;
+ *type_offset = 0;
+ return &stackop_type;
+ }
+
+ for_each_insn_op_loc(&loc, i, op_loc) {
+ struct data_loc_info dloc = {
+ .arch = arch,
+ .thread = he->thread,
+ .ms = ms,
+ .ip = ms->sym->start + dl->al.offset,
+ .cpumode = he->cpumode,
+ .op = op_loc,
+ .di = dbg,
+ };
+
+ if (!op_loc->mem_ref && op_loc->segment == INSN_SEG_NONE)
+ continue;
+
+ /* PC-relative addressing */
+ if (op_loc->reg1 == DWARF_REG_PC) {
+ dloc.var_addr = annotate_calc_pcrel(ms, dloc.ip,
+ op_loc->offset, dl);
+ }
+
+ /* This CPU access in kernel - pretend PC-relative addressing */
+ if (dso__kernel(map__dso(ms->map)) && arch__is(arch, "x86") &&
+ op_loc->segment == INSN_SEG_X86_GS && op_loc->imm) {
+ dloc.var_addr = op_loc->offset;
+ op_loc->reg1 = DWARF_REG_PC;
+ }
+
+ mem_type = find_data_type(&dloc);
+
+ if (mem_type == NULL && is_stack_canary(arch, op_loc)) {
+ istat->good++;
+ *type_offset = 0;
+ return &canary_type;
+ }
+
+ if (mem_type)
+ istat->good++;
+ else
+ istat->bad++;
+
+ if (symbol_conf.annotate_data_sample) {
+ struct evsel *evsel = hists_to_evsel(he->hists);
+
+ annotated_data_type__update_samples(mem_type, evsel,
+ dloc.type_offset,
+ he->stat.nr_events,
+ he->stat.period);
+ }
+ *type_offset = dloc.type_offset;
+ return mem_type ?: NO_TYPE;
+ }
+
+ /* retry with a fused instruction */
+ return NULL;
+}
+
/**
* hist_entry__get_data_type - find data type for given hist entry
* @he: hist entry
@@ -2660,12 +2797,9 @@ struct annotated_data_type *hist_entry__get_data_type(struct hist_entry *he)
struct evsel *evsel = hists_to_evsel(he->hists);
struct arch *arch;
struct disasm_line *dl;
- struct annotated_insn_loc loc;
- struct annotated_op_loc *op_loc;
struct annotated_data_type *mem_type;
struct annotated_item_stat *istat;
u64 ip = he->ip;
- int i;
ann_data_stat.total++;
@@ -2717,77 +2851,10 @@ struct annotated_data_type *hist_entry__get_data_type(struct hist_entry *he)
}
retry:
- istat = annotate_data_stat(&ann_insn_stat, dl->ins.name);
- if (istat == NULL) {
- ann_data_stat.no_insn++;
- return NULL;
- }
-
- if (annotate_get_insn_location(arch, dl, &loc) < 0) {
- ann_data_stat.no_insn_ops++;
- istat->bad++;
- return NULL;
- }
-
- if (is_stack_operation(arch, dl)) {
- istat->good++;
- he->mem_type_off = 0;
- return &stackop_type;
- }
-
- for_each_insn_op_loc(&loc, i, op_loc) {
- struct data_loc_info dloc = {
- .arch = arch,
- .thread = he->thread,
- .ms = ms,
- /* Recalculate IP for LOCK prefix or insn fusion */
- .ip = ms->sym->start + dl->al.offset,
- .cpumode = he->cpumode,
- .op = op_loc,
- .di = di_cache.dbg,
- };
-
- if (!op_loc->mem_ref && op_loc->segment == INSN_SEG_NONE)
- continue;
-
- /* Recalculate IP because of LOCK prefix or insn fusion */
- ip = ms->sym->start + dl->al.offset;
-
- /* PC-relative addressing */
- if (op_loc->reg1 == DWARF_REG_PC) {
- dloc.var_addr = annotate_calc_pcrel(ms, dloc.ip,
- op_loc->offset, dl);
- }
-
- /* This CPU access in kernel - pretend PC-relative addressing */
- if (dso__kernel(map__dso(ms->map)) && arch__is(arch, "x86") &&
- op_loc->segment == INSN_SEG_X86_GS && op_loc->imm) {
- dloc.var_addr = op_loc->offset;
- op_loc->reg1 = DWARF_REG_PC;
- }
-
- mem_type = find_data_type(&dloc);
-
- if (mem_type == NULL && is_stack_canary(arch, op_loc)) {
- istat->good++;
- he->mem_type_off = 0;
- return &canary_type;
- }
-
- if (mem_type)
- istat->good++;
- else
- istat->bad++;
-
- if (symbol_conf.annotate_data_sample) {
- annotated_data_type__update_samples(mem_type, evsel,
- dloc.type_offset,
- he->stat.nr_events,
- he->stat.period);
- }
- he->mem_type_off = dloc.type_offset;
- return mem_type;
- }
+ mem_type = __hist_entry__get_data_type(he, arch, di_cache.dbg, dl,
+ &he->mem_type_off);
+ if (mem_type)
+ return mem_type == NO_TYPE ? NULL : mem_type;
/*
* Some instructions can be fused and the actual memory access came
@@ -2807,7 +2874,9 @@ retry:
}
ann_data_stat.no_mem_ops++;
- istat->bad++;
+ istat = annotate_data_stat(&ann_insn_stat, dl->ins.name);
+ if (istat)
+ istat->bad++;
return NULL;
}
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index 858912157e01..0e6e3f60a897 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -15,6 +15,7 @@
#include "hashmap.h"
#include "disasm.h"
#include "branch.h"
+#include "evsel.h"
struct hist_browser_timer;
struct hist_entry;
@@ -23,7 +24,6 @@ struct map_symbol;
struct addr_map_symbol;
struct option;
struct perf_sample;
-struct evsel;
struct symbol;
struct annotated_data_type;
@@ -55,9 +55,11 @@ struct annotation_options {
show_asm_raw,
show_br_cntr,
annotate_src,
+ code_with_type,
full_addr;
u8 offset_level;
u8 disassemblers[MAX_DISASSEMBLERS];
+ u8 disassembler_used;
int min_pcnt;
int max_lines;
int context;
@@ -378,21 +380,23 @@ static inline u8 annotation__br_cntr_width(void)
void annotation__update_column_widths(struct annotation *notes);
void annotation__toggle_full_addr(struct annotation *notes, struct map_symbol *ms);
-static inline struct sym_hist *annotated_source__histogram(struct annotated_source *src, int idx)
+static inline struct sym_hist *annotated_source__histogram(struct annotated_source *src,
+ const struct evsel *evsel)
{
- return &src->histograms[idx];
+ return &src->histograms[evsel->core.idx];
}
-static inline struct sym_hist *annotation__histogram(struct annotation *notes, int idx)
+static inline struct sym_hist *annotation__histogram(struct annotation *notes,
+ const struct evsel *evsel)
{
- return annotated_source__histogram(notes->src, idx);
+ return annotated_source__histogram(notes->src, evsel);
}
static inline struct sym_hist_entry *
-annotated_source__hist_entry(struct annotated_source *src, int idx, u64 offset)
+annotated_source__hist_entry(struct annotated_source *src, const struct evsel *evsel, u64 offset)
{
struct sym_hist_entry *entry;
- long key = offset << 16 | idx;
+ long key = offset << 16 | evsel->core.idx;
if (!hashmap__find(src->samples, key, &entry))
return NULL;
@@ -446,24 +450,24 @@ enum symbol_disassemble_errno {
SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_REGEXP,
SYMBOL_ANNOTATE_ERRNO__BPF_INVALID_FILE,
SYMBOL_ANNOTATE_ERRNO__BPF_MISSING_BTF,
+ SYMBOL_ANNOTATE_ERRNO__COULDNT_DETERMINE_FILE_TYPE,
__SYMBOL_ANNOTATE_ERRNO__END,
};
int symbol__strerror_disassemble(struct map_symbol *ms, int errnum, char *buf, size_t buflen);
-int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel);
-void symbol__annotate_zero_histogram(struct symbol *sym, int evidx);
-void symbol__annotate_decay_histogram(struct symbol *sym, int evidx);
+void symbol__annotate_zero_histogram(struct symbol *sym, struct evsel *evsel);
+void symbol__annotate_decay_histogram(struct symbol *sym, struct evsel *evsel);
void annotated_source__purge(struct annotated_source *as);
int map_symbol__annotation_dump(struct map_symbol *ms, struct evsel *evsel);
bool ui__has_annotation(void);
-int symbol__tty_annotate(struct map_symbol *ms, struct evsel *evsel);
-
-int symbol__tty_annotate2(struct map_symbol *ms, struct evsel *evsel);
+int hist_entry__annotate_printf(struct hist_entry *he, struct evsel *evsel);
+int hist_entry__tty_annotate(struct hist_entry *he, struct evsel *evsel);
+int hist_entry__tty_annotate2(struct hist_entry *he, struct evsel *evsel);
#ifdef HAVE_SLANG_SUPPORT
int symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel,
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
index ba807071d3c1..688fe6d75244 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
@@ -28,7 +28,8 @@ static u64 arm_spe_calc_ip(int index, u64 payload)
/* Instruction virtual address or Branch target address */
if (index == SPE_ADDR_PKT_HDR_INDEX_INS ||
- index == SPE_ADDR_PKT_HDR_INDEX_BRANCH) {
+ index == SPE_ADDR_PKT_HDR_INDEX_BRANCH ||
+ index == SPE_ADDR_PKT_HDR_INDEX_PREV_BRANCH) {
ns = SPE_ADDR_PKT_GET_NS(payload);
el = SPE_ADDR_PKT_GET_EL(payload);
@@ -181,6 +182,8 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder)
decoder->record.virt_addr = ip;
else if (idx == SPE_ADDR_PKT_HDR_INDEX_DATA_PHYS)
decoder->record.phys_addr = ip;
+ else if (idx == SPE_ADDR_PKT_HDR_INDEX_PREV_BRANCH)
+ decoder->record.prev_br_tgt = ip;
break;
case ARM_SPE_COUNTER:
if (idx == SPE_CNT_PKT_HDR_INDEX_TOTAL_LAT)
@@ -207,6 +210,18 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder)
break;
case SPE_OP_PKT_HDR_CLASS_BR_ERET:
decoder->record.op |= ARM_SPE_OP_BRANCH_ERET;
+ if (payload & SPE_OP_PKT_COND)
+ decoder->record.op |= ARM_SPE_OP_BR_COND;
+ if (payload & SPE_OP_PKT_INDIRECT_BRANCH)
+ decoder->record.op |= ARM_SPE_OP_BR_INDIRECT;
+ if (payload & SPE_OP_PKT_GCS)
+ decoder->record.op |= ARM_SPE_OP_BR_GCS;
+ if (SPE_OP_PKT_CR_BL(payload))
+ decoder->record.op |= ARM_SPE_OP_BR_CR_BL;
+ if (SPE_OP_PKT_CR_RET(payload))
+ decoder->record.op |= ARM_SPE_OP_BR_CR_RET;
+ if (SPE_OP_PKT_CR_NON_BL_RET(payload))
+ decoder->record.op |= ARM_SPE_OP_BR_CR_NON_BL_RET;
break;
default:
pr_err("Get packet error!\n");
@@ -238,6 +253,12 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder)
if (payload & BIT(EV_MISPRED))
decoder->record.type |= ARM_SPE_BRANCH_MISS;
+ if (payload & BIT(EV_NOT_TAKEN))
+ decoder->record.type |= ARM_SPE_BRANCH_NOT_TAKEN;
+
+ if (payload & BIT(EV_TRANSACTIONAL))
+ decoder->record.type |= ARM_SPE_IN_TXN;
+
if (payload & BIT(EV_PARTIAL_PREDICATE))
decoder->record.type |= ARM_SPE_SVE_PARTIAL_PRED;
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
index 358c611eeddb..5d232188643b 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
@@ -24,6 +24,8 @@ enum arm_spe_sample_type {
ARM_SPE_REMOTE_ACCESS = 1 << 7,
ARM_SPE_SVE_PARTIAL_PRED = 1 << 8,
ARM_SPE_SVE_EMPTY_PRED = 1 << 9,
+ ARM_SPE_BRANCH_NOT_TAKEN = 1 << 10,
+ ARM_SPE_IN_TXN = 1 << 11,
};
enum arm_spe_op_type {
@@ -52,8 +54,12 @@ enum arm_spe_op_type {
ARM_SPE_OP_SVE_SG = 1 << 27,
/* Second level operation type for BRANCH_ERET */
- ARM_SPE_OP_BR_COND = 1 << 16,
- ARM_SPE_OP_BR_INDIRECT = 1 << 17,
+ ARM_SPE_OP_BR_COND = 1 << 16,
+ ARM_SPE_OP_BR_INDIRECT = 1 << 17,
+ ARM_SPE_OP_BR_GCS = 1 << 18,
+ ARM_SPE_OP_BR_CR_BL = 1 << 19,
+ ARM_SPE_OP_BR_CR_RET = 1 << 20,
+ ARM_SPE_OP_BR_CR_NON_BL_RET = 1 << 21,
};
enum arm_spe_common_data_source {
@@ -67,6 +73,15 @@ enum arm_spe_common_data_source {
ARM_SPE_COMMON_DS_DRAM = 0xe,
};
+enum arm_spe_ampereone_data_source {
+ ARM_SPE_AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE = 0x0,
+ ARM_SPE_AMPEREONE_SLC = 0x3,
+ ARM_SPE_AMPEREONE_REMOTE_CHIP_CACHE = 0x5,
+ ARM_SPE_AMPEREONE_DDR = 0x7,
+ ARM_SPE_AMPEREONE_L1D = 0x8,
+ ARM_SPE_AMPEREONE_L2D = 0x9,
+};
+
struct arm_spe_record {
enum arm_spe_sample_type type;
int err;
@@ -74,6 +89,7 @@ struct arm_spe_record {
u32 latency;
u64 from_ip;
u64 to_ip;
+ u64 prev_br_tgt;
u64 timestamp;
u64 virt_addr;
u64 phys_addr;
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
index 4cef10a83962..13cadb2f1cea 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
@@ -308,6 +308,8 @@ static int arm_spe_pkt_desc_event(const struct arm_spe_pkt *packet,
arm_spe_pkt_out_string(&err, &buf, &buf_len, " REMOTE-ACCESS");
if (payload & BIT(EV_ALIGNMENT))
arm_spe_pkt_out_string(&err, &buf, &buf_len, " ALIGNMENT");
+ if (payload & BIT(EV_TRANSACTIONAL))
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, " TXN");
if (payload & BIT(EV_PARTIAL_PREDICATE))
arm_spe_pkt_out_string(&err, &buf, &buf_len, " SVE-PARTIAL-PRED");
if (payload & BIT(EV_EMPTY_PREDICATE))
@@ -397,10 +399,16 @@ static int arm_spe_pkt_desc_op_type(const struct arm_spe_pkt *packet,
if (payload & SPE_OP_PKT_COND)
arm_spe_pkt_out_string(&err, &buf, &buf_len, " COND");
-
- if (SPE_OP_PKT_IS_INDIRECT_BRANCH(payload))
+ if (payload & SPE_OP_PKT_INDIRECT_BRANCH)
arm_spe_pkt_out_string(&err, &buf, &buf_len, " IND");
-
+ if (payload & SPE_OP_PKT_GCS)
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, " GCS");
+ if (SPE_OP_PKT_CR_BL(payload))
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, " CR-BL");
+ if (SPE_OP_PKT_CR_RET(payload))
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, " CR-RET");
+ if (SPE_OP_PKT_CR_NON_BL_RET(payload))
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, " CR-NON-BL-RET");
break;
default:
/* Unknown index */
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
index 464a912b221c..2cdf9f6da268 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
@@ -7,6 +7,7 @@
#ifndef INCLUDE__ARM_SPE_PKT_DECODER_H__
#define INCLUDE__ARM_SPE_PKT_DECODER_H__
+#include <linux/bitfield.h>
#include <stddef.h>
#include <stdint.h>
@@ -104,6 +105,7 @@ enum arm_spe_events {
EV_LLC_MISS = 9,
EV_REMOTE_ACCESS = 10,
EV_ALIGNMENT = 11,
+ EV_TRANSACTIONAL = 16,
EV_PARTIAL_PREDICATE = 17,
EV_EMPTY_PREDICATE = 18,
};
@@ -116,8 +118,6 @@ enum arm_spe_events {
#define SPE_OP_PKT_IS_OTHER_SVE_OP(v) (((v) & (BIT(7) | BIT(3) | BIT(0))) == 0x8)
-#define SPE_OP_PKT_COND BIT(0)
-
#define SPE_OP_PKT_LDST_SUBCLASS_GET(v) ((v) & GENMASK_ULL(7, 1))
#define SPE_OP_PKT_LDST_SUBCLASS_GP_REG 0x0
#define SPE_OP_PKT_LDST_SUBCLASS_SIMD_FP 0x4
@@ -148,7 +148,13 @@ enum arm_spe_events {
#define SPE_OP_PKT_SVE_PRED BIT(2)
#define SPE_OP_PKT_SVE_FP BIT(1)
-#define SPE_OP_PKT_IS_INDIRECT_BRANCH(v) (((v) & GENMASK_ULL(7, 1)) == 0x2)
+#define SPE_OP_PKT_CR_MASK GENMASK_ULL(4, 3)
+#define SPE_OP_PKT_CR_BL(v) (FIELD_GET(SPE_OP_PKT_CR_MASK, (v)) == 1)
+#define SPE_OP_PKT_CR_RET(v) (FIELD_GET(SPE_OP_PKT_CR_MASK, (v)) == 2)
+#define SPE_OP_PKT_CR_NON_BL_RET(v) (FIELD_GET(SPE_OP_PKT_CR_MASK, (v)) == 3)
+#define SPE_OP_PKT_GCS BIT(2)
+#define SPE_OP_PKT_INDIRECT_BRANCH BIT(1)
+#define SPE_OP_PKT_COND BIT(0)
const char *arm_spe_pkt_name(enum arm_spe_pkt_type);
diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
index dbf13f47879c..2a9775649cc2 100644
--- a/tools/perf/util/arm-spe.c
+++ b/tools/perf/util/arm-spe.c
@@ -37,6 +37,8 @@
#include "../../arch/arm64/include/asm/cputype.h"
#define MAX_TIMESTAMP (~0ULL)
+#define is_ldst_op(op) (!!((op) & ARM_SPE_OP_LDST))
+
struct arm_spe {
struct auxtrace auxtrace;
struct auxtrace_queues queues;
@@ -101,8 +103,21 @@ struct arm_spe_queue {
struct thread *thread;
u64 period_instructions;
u32 flags;
+ struct branch_stack *last_branch;
+};
+
+struct data_source_handle {
+ const struct midr_range *midr_ranges;
+ void (*ds_synth)(const struct arm_spe_record *record,
+ union perf_mem_data_src *data_src);
};
+#define DS(range, func) \
+ { \
+ .midr_ranges = range, \
+ .ds_synth = arm_spe__synth_##func, \
+ }
+
static void arm_spe_dump(struct arm_spe *spe __maybe_unused,
unsigned char *buf, size_t len)
{
@@ -219,6 +234,17 @@ static struct arm_spe_queue *arm_spe__alloc_queue(struct arm_spe *spe,
params.get_trace = arm_spe_get_trace;
params.data = speq;
+ if (spe->synth_opts.last_branch) {
+ size_t sz = sizeof(struct branch_stack);
+
+ /* Allocate up to two entries for PBT + TGT */
+ sz += sizeof(struct branch_entry) *
+ min(spe->synth_opts.last_branch_sz, 2U);
+ speq->last_branch = zalloc(sz);
+ if (!speq->last_branch)
+ goto out_free;
+ }
+
/* create new decoder */
speq->decoder = arm_spe_decoder_new(&params);
if (!speq->decoder)
@@ -228,6 +254,7 @@ static struct arm_spe_queue *arm_spe__alloc_queue(struct arm_spe *spe,
out_free:
zfree(&speq->event_buf);
+ zfree(&speq->last_branch);
free(speq);
return NULL;
@@ -334,6 +361,88 @@ static void arm_spe_prep_sample(struct arm_spe *spe,
event->sample.header.size = sizeof(struct perf_event_header);
}
+static void arm_spe__prep_branch_stack(struct arm_spe_queue *speq)
+{
+ struct arm_spe *spe = speq->spe;
+ struct arm_spe_record *record = &speq->decoder->record;
+ struct branch_stack *bstack = speq->last_branch;
+ struct branch_flags *bs_flags;
+ unsigned int last_branch_sz = spe->synth_opts.last_branch_sz;
+ bool have_tgt = !!(speq->flags & PERF_IP_FLAG_BRANCH);
+ bool have_pbt = last_branch_sz >= (have_tgt + 1U) && record->prev_br_tgt;
+ size_t sz = sizeof(struct branch_stack) +
+ sizeof(struct branch_entry) * min(last_branch_sz, 2U) /* PBT + TGT */;
+ int i = 0;
+
+ /* Clean up branch stack */
+ memset(bstack, 0x0, sz);
+
+ if (!have_tgt && !have_pbt)
+ return;
+
+ if (have_tgt) {
+ bstack->entries[i].from = record->from_ip;
+ bstack->entries[i].to = record->to_ip;
+
+ bs_flags = &bstack->entries[i].flags;
+ bs_flags->value = 0;
+
+ if (record->op & ARM_SPE_OP_BR_CR_BL) {
+ if (record->op & ARM_SPE_OP_BR_COND)
+ bs_flags->type |= PERF_BR_COND_CALL;
+ else
+ bs_flags->type |= PERF_BR_CALL;
+ /*
+ * Indirect branch instruction without link (e.g. BR),
+ * take this case as function return.
+ */
+ } else if (record->op & ARM_SPE_OP_BR_CR_RET ||
+ record->op & ARM_SPE_OP_BR_INDIRECT) {
+ if (record->op & ARM_SPE_OP_BR_COND)
+ bs_flags->type |= PERF_BR_COND_RET;
+ else
+ bs_flags->type |= PERF_BR_RET;
+ } else if (record->op & ARM_SPE_OP_BR_CR_NON_BL_RET) {
+ if (record->op & ARM_SPE_OP_BR_COND)
+ bs_flags->type |= PERF_BR_COND;
+ else
+ bs_flags->type |= PERF_BR_UNCOND;
+ } else {
+ if (record->op & ARM_SPE_OP_BR_COND)
+ bs_flags->type |= PERF_BR_COND;
+ else
+ bs_flags->type |= PERF_BR_UNKNOWN;
+ }
+
+ if (record->type & ARM_SPE_BRANCH_MISS) {
+ bs_flags->mispred = 1;
+ bs_flags->predicted = 0;
+ } else {
+ bs_flags->mispred = 0;
+ bs_flags->predicted = 1;
+ }
+
+ if (record->type & ARM_SPE_BRANCH_NOT_TAKEN)
+ bs_flags->not_taken = 1;
+
+ if (record->type & ARM_SPE_IN_TXN)
+ bs_flags->in_tx = 1;
+
+ bs_flags->cycles = min(record->latency, 0xFFFFU);
+ i++;
+ }
+
+ if (have_pbt) {
+ bs_flags = &bstack->entries[i].flags;
+ bs_flags->type |= PERF_BR_UNKNOWN;
+ bstack->entries[i].to = record->prev_br_tgt;
+ i++;
+ }
+
+ bstack->nr = i;
+ bstack->hw_idx = -1ULL;
+}
+
static int arm_spe__inject_event(union perf_event *event, struct perf_sample *sample, u64 type)
{
event->header.size = perf_event__sample_event_size(sample, type, 0);
@@ -367,8 +476,10 @@ static int arm_spe__synth_mem_sample(struct arm_spe_queue *speq,
struct arm_spe *spe = speq->spe;
struct arm_spe_record *record = &speq->decoder->record;
union perf_event *event = speq->event_buf;
- struct perf_sample sample = { .ip = 0, };
+ struct perf_sample sample;
+ int ret;
+ perf_sample__init(&sample, /*all=*/true);
arm_spe_prep_sample(spe, speq, event, &sample);
sample.id = spe_events_id;
@@ -378,7 +489,9 @@ static int arm_spe__synth_mem_sample(struct arm_spe_queue *speq,
sample.data_src = data_src;
sample.weight = record->latency;
- return arm_spe_deliver_synth_event(spe, speq, event, &sample);
+ ret = arm_spe_deliver_synth_event(spe, speq, event, &sample);
+ perf_sample__exit(&sample);
+ return ret;
}
static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq,
@@ -387,8 +500,10 @@ static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq,
struct arm_spe *spe = speq->spe;
struct arm_spe_record *record = &speq->decoder->record;
union perf_event *event = speq->event_buf;
- struct perf_sample sample = { .ip = 0, };
+ struct perf_sample sample;
+ int ret;
+ perf_sample__init(&sample, /*all=*/true);
arm_spe_prep_sample(spe, speq, event, &sample);
sample.id = spe_events_id;
@@ -396,8 +511,11 @@ static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq,
sample.addr = record->to_ip;
sample.weight = record->latency;
sample.flags = speq->flags;
+ sample.branch_stack = speq->last_branch;
- return arm_spe_deliver_synth_event(spe, speq, event, &sample);
+ ret = arm_spe_deliver_synth_event(spe, speq, event, &sample);
+ perf_sample__exit(&sample);
+ return ret;
}
static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq,
@@ -406,7 +524,8 @@ static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq,
struct arm_spe *spe = speq->spe;
struct arm_spe_record *record = &speq->decoder->record;
union perf_event *event = speq->event_buf;
- struct perf_sample sample = { .ip = 0, };
+ struct perf_sample sample;
+ int ret;
/*
* Handles perf instruction sampling period.
@@ -416,6 +535,7 @@ static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq,
return 0;
speq->period_instructions = 0;
+ perf_sample__init(&sample, /*all=*/true);
arm_spe_prep_sample(spe, speq, event, &sample);
sample.id = spe_events_id;
@@ -426,8 +546,11 @@ static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq,
sample.period = spe->instructions_sample_period;
sample.weight = record->latency;
sample.flags = speq->flags;
+ sample.branch_stack = speq->last_branch;
- return arm_spe_deliver_synth_event(spe, speq, event, &sample);
+ ret = arm_spe_deliver_synth_event(spe, speq, event, &sample);
+ perf_sample__exit(&sample);
+ return ret;
}
static const struct midr_range common_ds_encoding_cpus[] = {
@@ -443,6 +566,11 @@ static const struct midr_range common_ds_encoding_cpus[] = {
{},
};
+static const struct midr_range ampereone_ds_encoding_cpus[] = {
+ MIDR_ALL_VERSIONS(MIDR_AMPERE1A),
+ {},
+};
+
static void arm_spe__sample_flags(struct arm_spe_queue *speq)
{
const struct arm_spe_record *record = &speq->decoder->record;
@@ -453,6 +581,26 @@ static void arm_spe__sample_flags(struct arm_spe_queue *speq)
if (record->type & ARM_SPE_BRANCH_MISS)
speq->flags |= PERF_IP_FLAG_BRANCH_MISS;
+
+ if (record->type & ARM_SPE_BRANCH_NOT_TAKEN)
+ speq->flags |= PERF_IP_FLAG_NOT_TAKEN;
+
+ if (record->type & ARM_SPE_IN_TXN)
+ speq->flags |= PERF_IP_FLAG_IN_TX;
+
+ if (record->op & ARM_SPE_OP_BR_COND)
+ speq->flags |= PERF_IP_FLAG_CONDITIONAL;
+
+ if (record->op & ARM_SPE_OP_BR_CR_BL)
+ speq->flags |= PERF_IP_FLAG_CALL;
+ else if (record->op & ARM_SPE_OP_BR_CR_RET)
+ speq->flags |= PERF_IP_FLAG_RETURN;
+ /*
+ * Indirect branch instruction without link (e.g. BR),
+ * take it as a function return.
+ */
+ else if (record->op & ARM_SPE_OP_BR_INDIRECT)
+ speq->flags |= PERF_IP_FLAG_RETURN;
}
}
@@ -532,6 +680,49 @@ static void arm_spe__synth_data_source_common(const struct arm_spe_record *recor
}
}
+/*
+ * Source is IMPDEF. Here we convert the source code used on AmpereOne cores
+ * to the common (Neoverse, Cortex) to avoid duplicating the decoding code.
+ */
+static void arm_spe__synth_data_source_ampereone(const struct arm_spe_record *record,
+ union perf_mem_data_src *data_src)
+{
+ struct arm_spe_record common_record;
+
+ switch (record->source) {
+ case ARM_SPE_AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE:
+ common_record.source = ARM_SPE_COMMON_DS_PEER_CORE;
+ break;
+ case ARM_SPE_AMPEREONE_SLC:
+ common_record.source = ARM_SPE_COMMON_DS_SYS_CACHE;
+ break;
+ case ARM_SPE_AMPEREONE_REMOTE_CHIP_CACHE:
+ common_record.source = ARM_SPE_COMMON_DS_REMOTE;
+ break;
+ case ARM_SPE_AMPEREONE_DDR:
+ common_record.source = ARM_SPE_COMMON_DS_DRAM;
+ break;
+ case ARM_SPE_AMPEREONE_L1D:
+ common_record.source = ARM_SPE_COMMON_DS_L1D;
+ break;
+ case ARM_SPE_AMPEREONE_L2D:
+ common_record.source = ARM_SPE_COMMON_DS_L2;
+ break;
+ default:
+ pr_warning_once("AmpereOne: Unknown data source (0x%x)\n",
+ record->source);
+ return;
+ }
+
+ common_record.op = record->op;
+ arm_spe__synth_data_source_common(&common_record, data_src);
+}
+
+static const struct data_source_handle data_source_handles[] = {
+ DS(common_ds_encoding_cpus, data_source_common),
+ DS(ampereone_ds_encoding_cpus, data_source_ampereone),
+};
+
static void arm_spe__synth_memory_level(const struct arm_spe_record *record,
union perf_mem_data_src *data_src)
{
@@ -555,12 +746,14 @@ static void arm_spe__synth_memory_level(const struct arm_spe_record *record,
data_src->mem_lvl |= PERF_MEM_LVL_REM_CCE1;
}
-static bool arm_spe__is_common_ds_encoding(struct arm_spe_queue *speq)
+static bool arm_spe__synth_ds(struct arm_spe_queue *speq,
+ const struct arm_spe_record *record,
+ union perf_mem_data_src *data_src)
{
struct arm_spe *spe = speq->spe;
- bool is_in_cpu_list;
u64 *metadata = NULL;
- u64 midr = 0;
+ u64 midr;
+ unsigned int i;
/* Metadata version 1 assumes all CPUs are the same (old behavior) */
if (spe->metadata_ver == 1) {
@@ -592,18 +785,24 @@ static bool arm_spe__is_common_ds_encoding(struct arm_spe_queue *speq)
midr = metadata[ARM_SPE_CPU_MIDR];
}
- is_in_cpu_list = is_midr_in_range_list(midr, common_ds_encoding_cpus);
- if (is_in_cpu_list)
- return true;
- else
- return false;
+ for (i = 0; i < ARRAY_SIZE(data_source_handles); i++) {
+ if (is_midr_in_range_list(midr, data_source_handles[i].midr_ranges)) {
+ data_source_handles[i].ds_synth(record, data_src);
+ return true;
+ }
+ }
+
+ return false;
}
static u64 arm_spe__synth_data_source(struct arm_spe_queue *speq,
const struct arm_spe_record *record)
{
union perf_mem_data_src data_src = { .mem_op = PERF_MEM_OP_NA };
- bool is_common = arm_spe__is_common_ds_encoding(speq);
+
+ /* Only synthesize data source for LDST operations */
+ if (!is_ldst_op(record->op))
+ return 0;
if (record->op & ARM_SPE_OP_LD)
data_src.mem_op = PERF_MEM_OP_LOAD;
@@ -612,9 +811,7 @@ static u64 arm_spe__synth_data_source(struct arm_spe_queue *speq,
else
return 0;
- if (is_common)
- arm_spe__synth_data_source_common(record, &data_src);
- else
+ if (!arm_spe__synth_ds(speq, record, &data_src))
arm_spe__synth_memory_level(record, &data_src);
if (record->type & (ARM_SPE_TLB_ACCESS | ARM_SPE_TLB_MISS)) {
@@ -687,6 +884,10 @@ static int arm_spe_sample(struct arm_spe_queue *speq)
}
}
+ if (spe->synth_opts.last_branch &&
+ (spe->sample_branch || spe->sample_instructions))
+ arm_spe__prep_branch_stack(speq);
+
if (spe->sample_branch && (record->op & ARM_SPE_OP_BRANCH_ERET)) {
err = arm_spe__synth_branch_sample(speq, spe->branch_id);
if (err)
@@ -705,7 +906,7 @@ static int arm_spe_sample(struct arm_spe_queue *speq)
* When data_src is zero it means the record is not a memory operation,
* skip to synthesize memory sample for this case.
*/
- if (spe->sample_memory && data_src) {
+ if (spe->sample_memory && is_ldst_op(record->op)) {
err = arm_spe__synth_mem_sample(speq, spe->memory_id, data_src);
if (err)
return err;
@@ -1178,6 +1379,7 @@ static void arm_spe_free_queue(void *priv)
thread__zput(speq->thread);
arm_spe_decoder_free(speq->decoder);
zfree(&speq->event_buf);
+ zfree(&speq->last_branch);
free(speq);
}
@@ -1397,6 +1599,19 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
id += 1;
}
+ if (spe->synth_opts.last_branch) {
+ if (spe->synth_opts.last_branch_sz > 2)
+ pr_debug("Arm SPE supports only two bstack entries (PBT+TGT).\n");
+
+ attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
+ /*
+ * We don't use the hardware index, but the sample generation
+ * code uses the new format branch_stack with this field,
+ * so the event attributes must indicate that it's present.
+ */
+ attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX;
+ }
+
if (spe->synth_opts.branches) {
spe->sample_branch = true;
diff --git a/tools/perf/util/arm64-frame-pointer-unwind-support.c b/tools/perf/util/arm64-frame-pointer-unwind-support.c
index 4940be4a0569..958afe8b821e 100644
--- a/tools/perf/util/arm64-frame-pointer-unwind-support.c
+++ b/tools/perf/util/arm64-frame-pointer-unwind-support.c
@@ -4,6 +4,7 @@
#include "event.h"
#include "perf_regs.h" // SMPL_REG_MASK
#include "unwind.h"
+#include <string.h>
#define perf_event_arm_regs perf_event_arm64_regs
#include "../../arch/arm64/include/uapi/asm/perf_regs.h"
@@ -16,8 +17,13 @@ struct entries {
static bool get_leaf_frame_caller_enabled(struct perf_sample *sample)
{
- return callchain_param.record_mode == CALLCHAIN_FP && sample->user_regs.regs
- && sample->user_regs.mask & SMPL_REG_MASK(PERF_REG_ARM64_LR);
+ struct regs_dump *regs;
+
+ if (callchain_param.record_mode != CALLCHAIN_FP)
+ return false;
+
+ regs = perf_sample__user_regs(sample);
+ return regs->regs && regs->mask & SMPL_REG_MASK(PERF_REG_ARM64_LR);
}
static int add_entry(struct unwind_entry *entry, void *arg)
@@ -32,7 +38,7 @@ u64 get_leaf_frame_caller_aarch64(struct perf_sample *sample, struct thread *thr
{
int ret;
struct entries entries = {};
- struct regs_dump old_regs = sample->user_regs;
+ struct regs_dump old_regs, *regs;
if (!get_leaf_frame_caller_enabled(sample))
return 0;
@@ -42,19 +48,20 @@ u64 get_leaf_frame_caller_aarch64(struct perf_sample *sample, struct thread *thr
* and set its mask. SP is not used when doing the unwinding but it
* still needs to be set to prevent failures.
*/
-
- if (!(sample->user_regs.mask & SMPL_REG_MASK(PERF_REG_ARM64_PC))) {
- sample->user_regs.cache_mask |= SMPL_REG_MASK(PERF_REG_ARM64_PC);
- sample->user_regs.cache_regs[PERF_REG_ARM64_PC] = sample->callchain->ips[usr_idx+1];
+ regs = perf_sample__user_regs(sample);
+ memcpy(&old_regs, regs, sizeof(*regs));
+ if (!(regs->mask & SMPL_REG_MASK(PERF_REG_ARM64_PC))) {
+ regs->cache_mask |= SMPL_REG_MASK(PERF_REG_ARM64_PC);
+ regs->cache_regs[PERF_REG_ARM64_PC] = sample->callchain->ips[usr_idx+1];
}
- if (!(sample->user_regs.mask & SMPL_REG_MASK(PERF_REG_ARM64_SP))) {
- sample->user_regs.cache_mask |= SMPL_REG_MASK(PERF_REG_ARM64_SP);
- sample->user_regs.cache_regs[PERF_REG_ARM64_SP] = 0;
+ if (!(regs->mask & SMPL_REG_MASK(PERF_REG_ARM64_SP))) {
+ regs->cache_mask |= SMPL_REG_MASK(PERF_REG_ARM64_SP);
+ regs->cache_regs[PERF_REG_ARM64_SP] = 0;
}
ret = unwind__get_entries(add_entry, &entries, thread, sample, 2, true);
- sample->user_regs = old_regs;
+ memcpy(regs, &old_regs, sizeof(*regs));
if (ret || entries.length != 2)
return ret;
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index ca8682966fae..03211c2623de 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -810,19 +810,76 @@ no_opt:
return auxtrace_validate_aux_sample_size(evlist, opts);
}
-void auxtrace_regroup_aux_output(struct evlist *evlist)
+static struct aux_action_opt {
+ const char *str;
+ u32 aux_action;
+ bool aux_event_opt;
+} aux_action_opts[] = {
+ {"start-paused", BIT(0), true},
+ {"pause", BIT(1), false},
+ {"resume", BIT(2), false},
+ {.str = NULL},
+};
+
+static const struct aux_action_opt *auxtrace_parse_aux_action_str(const char *str)
+{
+ const struct aux_action_opt *opt;
+
+ if (!str)
+ return NULL;
+
+ for (opt = aux_action_opts; opt->str; opt++)
+ if (!strcmp(str, opt->str))
+ return opt;
+
+ return NULL;
+}
+
+int auxtrace_parse_aux_action(struct evlist *evlist)
{
- struct evsel *evsel, *aux_evsel = NULL;
struct evsel_config_term *term;
+ struct evsel *aux_evsel = NULL;
+ struct evsel *evsel;
evlist__for_each_entry(evlist, evsel) {
- if (evsel__is_aux_event(evsel))
+ bool is_aux_event = evsel__is_aux_event(evsel);
+ const struct aux_action_opt *opt;
+
+ if (is_aux_event)
aux_evsel = evsel;
- term = evsel__get_config_term(evsel, AUX_OUTPUT);
+ term = evsel__get_config_term(evsel, AUX_ACTION);
+ if (!term) {
+ if (evsel__get_config_term(evsel, AUX_OUTPUT))
+ goto regroup;
+ continue;
+ }
+ opt = auxtrace_parse_aux_action_str(term->val.str);
+ if (!opt) {
+ pr_err("Bad aux-action '%s'\n", term->val.str);
+ return -EINVAL;
+ }
+ if (opt->aux_event_opt && !is_aux_event) {
+ pr_err("aux-action '%s' can only be used with AUX area event\n",
+ term->val.str);
+ return -EINVAL;
+ }
+ if (!opt->aux_event_opt && is_aux_event) {
+ pr_err("aux-action '%s' cannot be used for AUX area event itself\n",
+ term->val.str);
+ return -EINVAL;
+ }
+ evsel->core.attr.aux_action = opt->aux_action;
+regroup:
/* If possible, group with the AUX event */
- if (term && aux_evsel)
+ if (aux_evsel)
evlist__regroup(evlist, aux_evsel, evsel);
+ if (!evsel__is_aux_event(evsel__leader(evsel))) {
+ pr_err("Events with aux-action must have AUX area event group leader\n");
+ return -EINVAL;
+ }
}
+
+ return 0;
}
struct auxtrace_record *__weak
@@ -1116,16 +1173,19 @@ static int auxtrace_queue_data_cb(struct perf_session *session,
if (!qd->samples || event->header.type != PERF_RECORD_SAMPLE)
return 0;
+ perf_sample__init(&sample, /*all=*/false);
err = evlist__parse_sample(session->evlist, event, &sample);
if (err)
- return err;
-
- if (!sample.aux_sample.size)
- return 0;
+ goto out;
- offset += sample.aux_sample.data - (void *)event;
+ if (sample.aux_sample.size) {
+ offset += sample.aux_sample.data - (void *)event;
- return session->auxtrace->queue_data(session, &sample, NULL, offset);
+ err = session->auxtrace->queue_data(session, &sample, NULL, offset);
+ }
+out:
+ perf_sample__exit(&sample);
+ return err;
}
int auxtrace_queue_data(struct perf_session *session, bool samples, bool events)
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index dddaf4f3ffed..b0db84d27b25 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h
@@ -578,7 +578,7 @@ int auxtrace_parse_snapshot_options(struct auxtrace_record *itr,
int auxtrace_parse_sample_options(struct auxtrace_record *itr,
struct evlist *evlist,
struct record_opts *opts, const char *str);
-void auxtrace_regroup_aux_output(struct evlist *evlist);
+int auxtrace_parse_aux_action(struct evlist *evlist);
int auxtrace_record__options(struct auxtrace_record *itr,
struct evlist *evlist,
struct record_opts *opts);
@@ -799,8 +799,10 @@ int auxtrace_parse_sample_options(struct auxtrace_record *itr __maybe_unused,
}
static inline
-void auxtrace_regroup_aux_output(struct evlist *evlist __maybe_unused)
+int auxtrace_parse_aux_action(struct evlist *evlist __maybe_unused)
{
+ pr_err("AUX area tracing not supported\n");
+ return -EINVAL;
}
static inline
diff --git a/tools/perf/util/bpf-filter.l b/tools/perf/util/bpf-filter.l
index f313404f95a9..6aa65ade3385 100644
--- a/tools/perf/util/bpf-filter.l
+++ b/tools/perf/util/bpf-filter.l
@@ -76,7 +76,7 @@ static int path_or_error(void)
num_dec [0-9]+
num_hex 0[Xx][0-9a-fA-F]+
space [ \t]+
-path [^ \t\n]+
+path [^ \t\n,]+
ident [_a-zA-Z][_a-zA-Z0-9]+
%%
diff --git a/tools/perf/util/bpf_ftrace.c b/tools/perf/util/bpf_ftrace.c
index 06d1c4018407..7324668cc83e 100644
--- a/tools/perf/util/bpf_ftrace.c
+++ b/tools/perf/util/bpf_ftrace.c
@@ -11,6 +11,7 @@
#include "util/debug.h"
#include "util/evlist.h"
#include "util/bpf_counter.h"
+#include "util/stat.h"
#include "util/bpf_skel/func_latency.skel.h"
@@ -36,6 +37,13 @@ int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace)
return -1;
}
+ skel->rodata->bucket_range = ftrace->bucket_range;
+ skel->rodata->min_latency = ftrace->min_latency;
+ skel->rodata->bucket_num = ftrace->bucket_num;
+ if (ftrace->bucket_range && ftrace->bucket_num) {
+ bpf_map__set_max_entries(skel->maps.latency, ftrace->bucket_num);
+ }
+
/* don't need to set cpu filter for system-wide mode */
if (ftrace->target.cpu_list) {
ncpus = perf_cpu_map__nr(ftrace->evlist->core.user_requested_cpus);
@@ -83,6 +91,8 @@ int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace)
}
}
+ skel->bss->min = INT64_MAX;
+
skel->links.func_begin = bpf_program__attach_kprobe(skel->progs.func_begin,
false, func->name);
if (IS_ERR(skel->links.func_begin)) {
@@ -118,8 +128,8 @@ int perf_ftrace__latency_stop_bpf(struct perf_ftrace *ftrace __maybe_unused)
return 0;
}
-int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace __maybe_unused,
- int buckets[])
+int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace,
+ int buckets[], struct stats *stats)
{
int i, fd, err;
u32 idx;
@@ -132,7 +142,7 @@ int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace __maybe_unused,
if (hist == NULL)
return -ENOMEM;
- for (idx = 0; idx < NUM_BUCKET; idx++) {
+ for (idx = 0; idx < skel->rodata->bucket_num; idx++) {
err = bpf_map_lookup_elem(fd, &idx, hist);
if (err) {
buckets[idx] = 0;
@@ -143,6 +153,19 @@ int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace __maybe_unused,
buckets[idx] += hist[i];
}
+ if (skel->bss->count) {
+ stats->mean = skel->bss->total / skel->bss->count;
+ stats->n = skel->bss->count;
+ stats->max = skel->bss->max;
+ stats->min = skel->bss->min;
+
+ if (!ftrace->use_nsec) {
+ stats->mean /= 1000;
+ stats->max /= 1000;
+ stats->min /= 1000;
+ }
+ }
+
free(hist);
return 0;
}
diff --git a/tools/perf/util/bpf_kwork.c b/tools/perf/util/bpf_kwork.c
index 6c7126b7670d..5cff755c71fa 100644
--- a/tools/perf/util/bpf_kwork.c
+++ b/tools/perf/util/bpf_kwork.c
@@ -285,7 +285,7 @@ static int add_work(struct perf_kwork *kwork,
(bpf_trace->get_work_name(key, &tmp.name)))
return -1;
- work = perf_kwork_add_work(kwork, tmp.class, &tmp);
+ work = kwork->add_work(kwork, tmp.class, &tmp);
if (work == NULL)
return -1;
diff --git a/tools/perf/util/bpf_kwork_top.c b/tools/perf/util/bpf_kwork_top.c
index 7261cad43468..b6f187dd9136 100644
--- a/tools/perf/util/bpf_kwork_top.c
+++ b/tools/perf/util/bpf_kwork_top.c
@@ -255,7 +255,7 @@ static int add_work(struct perf_kwork *kwork, struct work_key *key,
bpf_trace = kwork_class_bpf_supported_list[type];
tmp.class = bpf_trace->class;
- work = perf_kwork_add_work(kwork, tmp.class, &tmp);
+ work = kwork->add_work(kwork, tmp.class, &tmp);
if (!work)
return -1;
diff --git a/tools/perf/util/bpf_lock_contention.c b/tools/perf/util/bpf_lock_contention.c
index 41a1ad087895..5af8f6d1bc95 100644
--- a/tools/perf/util/bpf_lock_contention.c
+++ b/tools/perf/util/bpf_lock_contention.c
@@ -2,6 +2,7 @@
#include "util/cgroup.h"
#include "util/debug.h"
#include "util/evlist.h"
+#include "util/hashmap.h"
#include "util/machine.h"
#include "util/map.h"
#include "util/symbol.h"
@@ -12,17 +13,106 @@
#include <linux/zalloc.h>
#include <linux/string.h>
#include <bpf/bpf.h>
+#include <bpf/btf.h>
#include <inttypes.h>
#include "bpf_skel/lock_contention.skel.h"
#include "bpf_skel/lock_data.h"
static struct lock_contention_bpf *skel;
+static bool has_slab_iter;
+static struct hashmap slab_hash;
+
+static size_t slab_cache_hash(long key, void *ctx __maybe_unused)
+{
+ return key;
+}
+
+static bool slab_cache_equal(long key1, long key2, void *ctx __maybe_unused)
+{
+ return key1 == key2;
+}
+
+static void check_slab_cache_iter(struct lock_contention *con)
+{
+ struct btf *btf = btf__load_vmlinux_btf();
+ s32 ret;
+
+ hashmap__init(&slab_hash, slab_cache_hash, slab_cache_equal, /*ctx=*/NULL);
+
+ if (btf == NULL) {
+ pr_debug("BTF loading failed: %s\n", strerror(errno));
+ return;
+ }
+
+ ret = btf__find_by_name_kind(btf, "bpf_iter__kmem_cache", BTF_KIND_STRUCT);
+ if (ret < 0) {
+ bpf_program__set_autoload(skel->progs.slab_cache_iter, false);
+ pr_debug("slab cache iterator is not available: %d\n", ret);
+ goto out;
+ }
+
+ has_slab_iter = true;
+
+ bpf_map__set_max_entries(skel->maps.slab_caches, con->map_nr_entries);
+out:
+ btf__free(btf);
+}
+
+static void run_slab_cache_iter(void)
+{
+ int fd;
+ char buf[256];
+ long key, *prev_key;
+
+ if (!has_slab_iter)
+ return;
+
+ fd = bpf_iter_create(bpf_link__fd(skel->links.slab_cache_iter));
+ if (fd < 0) {
+ pr_debug("cannot create slab cache iter: %d\n", fd);
+ return;
+ }
+
+ /* This will run the bpf program */
+ while (read(fd, buf, sizeof(buf)) > 0)
+ continue;
+
+ close(fd);
+
+ /* Read the slab cache map and build a hash with IDs */
+ fd = bpf_map__fd(skel->maps.slab_caches);
+ prev_key = NULL;
+ while (!bpf_map_get_next_key(fd, prev_key, &key)) {
+ struct slab_cache_data *data;
+
+ data = malloc(sizeof(*data));
+ if (data == NULL)
+ break;
+
+ if (bpf_map_lookup_elem(fd, &key, data) < 0)
+ break;
+
+ hashmap__add(&slab_hash, data->id, data);
+ prev_key = &key;
+ }
+}
+
+static void exit_slab_cache_iter(void)
+{
+ struct hashmap_entry *cur;
+ unsigned bkt;
+
+ hashmap__for_each_entry(&slab_hash, cur, bkt)
+ free(cur->pvalue);
+
+ hashmap__clear(&slab_hash);
+}
int lock_contention_prepare(struct lock_contention *con)
{
int i, fd;
- int ncpus = 1, ntasks = 1, ntypes = 1, naddrs = 1, ncgrps = 1;
+ int ncpus = 1, ntasks = 1, ntypes = 1, naddrs = 1, ncgrps = 1, nslabs = 1;
struct evlist *evlist = con->evlist;
struct target *target = con->target;
@@ -41,10 +131,20 @@ int lock_contention_prepare(struct lock_contention *con)
else
bpf_map__set_max_entries(skel->maps.task_data, 1);
- if (con->save_callstack)
+ if (con->save_callstack) {
bpf_map__set_max_entries(skel->maps.stacks, con->map_nr_entries);
- else
+ if (con->owner) {
+ bpf_map__set_value_size(skel->maps.stack_buf, con->max_stack * sizeof(u64));
+ bpf_map__set_key_size(skel->maps.owner_stacks,
+ con->max_stack * sizeof(u64));
+ bpf_map__set_max_entries(skel->maps.owner_stacks, con->map_nr_entries);
+ bpf_map__set_max_entries(skel->maps.owner_data, con->map_nr_entries);
+ bpf_map__set_max_entries(skel->maps.owner_stat, con->map_nr_entries);
+ skel->rodata->max_stack = con->max_stack;
+ }
+ } else {
bpf_map__set_max_entries(skel->maps.stacks, 1);
+ }
if (target__has_cpu(target)) {
skel->rodata->has_cpu = 1;
@@ -109,6 +209,15 @@ int lock_contention_prepare(struct lock_contention *con)
skel->rodata->use_cgroup_v2 = 1;
}
+ check_slab_cache_iter(con);
+
+ if (con->filters->nr_slabs && has_slab_iter) {
+ skel->rodata->has_slab = 1;
+ nslabs = con->filters->nr_slabs;
+ }
+
+ bpf_map__set_max_entries(skel->maps.slab_filter, nslabs);
+
if (lock_contention_bpf__load(skel) < 0) {
pr_err("Failed to load lock-contention BPF skeleton\n");
return -1;
@@ -179,6 +288,36 @@ int lock_contention_prepare(struct lock_contention *con)
bpf_program__set_autoload(skel->progs.collect_lock_syms, false);
lock_contention_bpf__attach(skel);
+
+ /* run the slab iterator after attaching */
+ run_slab_cache_iter();
+
+ if (con->filters->nr_slabs) {
+ u8 val = 1;
+ int cache_fd;
+ long key, *prev_key;
+
+ fd = bpf_map__fd(skel->maps.slab_filter);
+
+ /* Read the slab cache map and build a hash with its address */
+ cache_fd = bpf_map__fd(skel->maps.slab_caches);
+ prev_key = NULL;
+ while (!bpf_map_get_next_key(cache_fd, prev_key, &key)) {
+ struct slab_cache_data data;
+
+ if (bpf_map_lookup_elem(cache_fd, &key, &data) < 0)
+ break;
+
+ for (i = 0; i < con->filters->nr_slabs; i++) {
+ if (!strcmp(con->filters->slabs[i], data.name)) {
+ bpf_map_update_elem(fd, &key, &val, BPF_ANY);
+ break;
+ }
+ }
+ prev_key = &key;
+ }
+ }
+
return 0;
}
@@ -321,7 +460,6 @@ static const char *lock_contention_get_name(struct lock_contention *con,
{
int idx = 0;
u64 addr;
- const char *name = "";
static char name_buf[KSYM_NAME_LEN];
struct symbol *sym;
struct map *kmap;
@@ -336,17 +474,19 @@ static const char *lock_contention_get_name(struct lock_contention *con,
if (pid) {
struct thread *t = machine__findnew_thread(machine, /*pid=*/-1, pid);
- if (t == NULL)
- return name;
- if (!bpf_map_lookup_elem(task_fd, &pid, &task) &&
- thread__set_comm(t, task.comm, /*timestamp=*/0))
- name = task.comm;
+ if (t != NULL &&
+ !bpf_map_lookup_elem(task_fd, &pid, &task) &&
+ thread__set_comm(t, task.comm, /*timestamp=*/0)) {
+ snprintf(name_buf, sizeof(name_buf), "%s", task.comm);
+ return name_buf;
+ }
}
- return name;
+ return "";
}
if (con->aggr_mode == LOCK_AGGR_ADDR) {
int lock_fd = bpf_map__fd(skel->maps.lock_syms);
+ struct slab_cache_data *slab_data;
/* per-process locks set upper bits of the flags */
if (flags & LCD_F_MMAP_LOCK)
@@ -365,6 +505,12 @@ static const char *lock_contention_get_name(struct lock_contention *con,
return "rq_lock";
}
+ /* look slab_hash for dynamic locks in a slab object */
+ if (hashmap__find(&slab_hash, flags & LCB_F_SLAB_ID_MASK, &slab_data)) {
+ snprintf(name_buf, sizeof(name_buf), "&%s", slab_data->name);
+ return name_buf;
+ }
+
return "";
}
@@ -403,6 +549,63 @@ static const char *lock_contention_get_name(struct lock_contention *con,
return name_buf;
}
+struct lock_stat *pop_owner_stack_trace(struct lock_contention *con)
+{
+ int stacks_fd, stat_fd;
+ u64 *stack_trace = NULL;
+ s32 stack_id;
+ struct contention_key ckey = {};
+ struct contention_data cdata = {};
+ size_t stack_size = con->max_stack * sizeof(*stack_trace);
+ struct lock_stat *st = NULL;
+
+ stacks_fd = bpf_map__fd(skel->maps.owner_stacks);
+ stat_fd = bpf_map__fd(skel->maps.owner_stat);
+ if (!stacks_fd || !stat_fd)
+ goto out_err;
+
+ stack_trace = zalloc(stack_size);
+ if (stack_trace == NULL)
+ goto out_err;
+
+ if (bpf_map_get_next_key(stacks_fd, NULL, stack_trace))
+ goto out_err;
+
+ bpf_map_lookup_elem(stacks_fd, stack_trace, &stack_id);
+ ckey.stack_id = stack_id;
+ bpf_map_lookup_elem(stat_fd, &ckey, &cdata);
+
+ st = zalloc(sizeof(struct lock_stat));
+ if (!st)
+ goto out_err;
+
+ st->name = strdup(stack_trace[0] ? lock_contention_get_name(con, NULL, stack_trace, 0) :
+ "unknown");
+ if (!st->name)
+ goto out_err;
+
+ st->flags = cdata.flags;
+ st->nr_contended = cdata.count;
+ st->wait_time_total = cdata.total_time;
+ st->wait_time_max = cdata.max_time;
+ st->wait_time_min = cdata.min_time;
+ st->callstack = stack_trace;
+
+ if (cdata.count)
+ st->avg_wait_time = cdata.total_time / cdata.count;
+
+ bpf_map_delete_elem(stacks_fd, stack_trace);
+ bpf_map_delete_elem(stat_fd, &ckey);
+
+ return st;
+
+out_err:
+ free(stack_trace);
+ free(st);
+
+ return NULL;
+}
+
int lock_contention_read(struct lock_contention *con)
{
int fd, stack, err = 0;
@@ -458,7 +661,7 @@ int lock_contention_read(struct lock_contention *con)
if (con->save_callstack) {
bpf_map_lookup_elem(stack, &key.stack_id, stack_trace);
- if (!match_callstack_filter(machine, stack_trace)) {
+ if (!match_callstack_filter(machine, stack_trace, con->max_stack)) {
con->nr_filtered += data.count;
goto next;
}
@@ -539,5 +742,7 @@ int lock_contention_finish(struct lock_contention *con)
cgroup__put(cgrp);
}
+ exit_slab_cache_iter();
+
return 0;
}
diff --git a/tools/perf/util/bpf_off_cpu.c b/tools/perf/util/bpf_off_cpu.c
index a590a8ac1f9d..4269b41d1771 100644
--- a/tools/perf/util/bpf_off_cpu.c
+++ b/tools/perf/util/bpf_off_cpu.c
@@ -100,6 +100,11 @@ static void check_sched_switch_args(void)
const struct btf_type *t1, *t2, *t3;
u32 type_id;
+ if (!btf) {
+ pr_debug("Missing btf, check if CONFIG_DEBUG_INFO_BTF is enabled\n");
+ goto cleanup;
+ }
+
type_id = btf__find_by_name_kind(btf, "btf_trace_sched_switch",
BTF_KIND_TYPEDEF);
if ((s32)type_id < 0)
diff --git a/tools/perf/util/bpf_skel/func_latency.bpf.c b/tools/perf/util/bpf_skel/func_latency.bpf.c
index f613dc9cb123..e731a79a753a 100644
--- a/tools/perf/util/bpf_skel/func_latency.bpf.c
+++ b/tools/perf/util/bpf_skel/func_latency.bpf.c
@@ -38,9 +38,19 @@ struct {
int enabled = 0;
+// stats
+__s64 total;
+__s64 count;
+__s64 max;
+__s64 min;
+
const volatile int has_cpu = 0;
const volatile int has_task = 0;
const volatile int use_nsec = 0;
+const volatile unsigned int bucket_range;
+const volatile unsigned int min_latency;
+const volatile unsigned int max_latency;
+const volatile unsigned int bucket_num = NUM_BUCKET;
SEC("kprobe/func")
int BPF_PROG(func_begin)
@@ -92,7 +102,8 @@ int BPF_PROG(func_end)
start = bpf_map_lookup_elem(&functime, &tid);
if (start) {
__s64 delta = bpf_ktime_get_ns() - *start;
- __u32 key;
+ __u64 val = delta;
+ __u32 key = 0;
__u64 *hist;
bpf_map_delete_elem(&functime, &tid);
@@ -100,17 +111,46 @@ int BPF_PROG(func_end)
if (delta < 0)
return 0;
+ if (bucket_range != 0) {
+ val = delta / cmp_base;
+
+ if (min_latency > 0) {
+ if (val > min_latency)
+ val -= min_latency;
+ else
+ goto do_lookup;
+ }
+
+ // Less than 1 unit (ms or ns), or, in the future,
+ // than the min latency desired.
+ if (val > 0) { // 1st entry: [ 1 unit .. bucket_range units )
+ key = val / bucket_range + 1;
+ if (key >= bucket_num)
+ key = bucket_num - 1;
+ }
+
+ goto do_lookup;
+ }
// calculate index using delta
- for (key = 0; key < (NUM_BUCKET - 1); key++) {
+ for (key = 0; key < (bucket_num - 1); key++) {
if (delta < (cmp_base << key))
break;
}
+do_lookup:
hist = bpf_map_lookup_elem(&latency, &key);
if (!hist)
return 0;
- *hist += 1;
+ __sync_fetch_and_add(hist, 1);
+
+ __sync_fetch_and_add(&total, delta); // always in nsec
+ __sync_fetch_and_add(&count, 1);
+
+ if (delta > max)
+ max = delta;
+ if (delta < min)
+ min = delta;
}
return 0;
diff --git a/tools/perf/util/bpf_skel/kwork_top.bpf.c b/tools/perf/util/bpf_skel/kwork_top.bpf.c
index 594da91965a2..73e32e063030 100644
--- a/tools/perf/util/bpf_skel/kwork_top.bpf.c
+++ b/tools/perf/util/bpf_skel/kwork_top.bpf.c
@@ -18,7 +18,9 @@ enum kwork_class_type {
};
#define MAX_ENTRIES 102400
-#define MAX_NR_CPUS 2048
+#ifndef MAX_NR_CPUS
+#define MAX_NR_CPUS 4096
+#endif
#define PF_KTHREAD 0x00200000
#define MAX_COMMAND_LEN 16
diff --git a/tools/perf/util/bpf_skel/kwork_trace.bpf.c b/tools/perf/util/bpf_skel/kwork_trace.bpf.c
index cbd79bc4b330..9ce9c8dddc4b 100644
--- a/tools/perf/util/bpf_skel/kwork_trace.bpf.c
+++ b/tools/perf/util/bpf_skel/kwork_trace.bpf.c
@@ -80,7 +80,7 @@ static __always_inline int local_strncmp(const char *s1,
for (i = 0; i < sz; i++) {
ret = (unsigned char)s1[i] - (unsigned char)s2[i];
- if (ret || !s1[i] || !s2[i])
+ if (ret || !s1[i])
break;
}
diff --git a/tools/perf/util/bpf_skel/lock_contention.bpf.c b/tools/perf/util/bpf_skel/lock_contention.bpf.c
index 1069bda5d733..69be7a4234e0 100644
--- a/tools/perf/util/bpf_skel/lock_contention.bpf.c
+++ b/tools/perf/util/bpf_skel/lock_contention.bpf.c
@@ -27,6 +27,38 @@ struct {
__uint(max_entries, MAX_ENTRIES);
} stacks SEC(".maps");
+/* buffer for owner stacktrace */
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u64));
+ __uint(max_entries, 1);
+} stack_buf SEC(".maps");
+
+/* a map for tracing owner stacktrace to owner stack id */
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(key_size, sizeof(__u64)); // owner stacktrace
+ __uint(value_size, sizeof(__s32)); // owner stack id
+ __uint(max_entries, 1);
+} owner_stacks SEC(".maps");
+
+/* a map for tracing lock address to owner data */
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(key_size, sizeof(__u64)); // lock address
+ __uint(value_size, sizeof(struct owner_tracing_data));
+ __uint(max_entries, 1);
+} owner_data SEC(".maps");
+
+/* a map for contention_key (stores owner stack id) to contention data */
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(key_size, sizeof(struct contention_key));
+ __uint(value_size, sizeof(struct contention_data));
+ __uint(max_entries, 1);
+} owner_stat SEC(".maps");
+
/* maintain timestamp at the beginning of contention */
struct {
__uint(type, BPF_MAP_TYPE_HASH);
@@ -100,6 +132,20 @@ struct {
__uint(max_entries, 1);
} cgroup_filter SEC(".maps");
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(key_size, sizeof(long));
+ __uint(value_size, sizeof(__u8));
+ __uint(max_entries, 1);
+} slab_filter SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(key_size, sizeof(long));
+ __uint(value_size, sizeof(struct slab_cache_data));
+ __uint(max_entries, 1);
+} slab_caches SEC(".maps");
+
struct rw_semaphore___old {
struct task_struct *owner;
} __attribute__((preserve_access_index));
@@ -116,16 +162,20 @@ struct mm_struct___new {
struct rw_semaphore mmap_lock;
} __attribute__((preserve_access_index));
+extern struct kmem_cache *bpf_get_kmem_cache(u64 addr) __ksym __weak;
+
/* control flags */
const volatile int has_cpu;
const volatile int has_task;
const volatile int has_type;
const volatile int has_addr;
const volatile int has_cgroup;
+const volatile int has_slab;
const volatile int needs_callstack;
const volatile int stack_skip;
const volatile int lock_owner;
const volatile int use_cgroup_v2;
+const volatile int max_stack;
/* determine the key of lock stat */
const volatile int aggr_mode;
@@ -136,6 +186,8 @@ int perf_subsys_id = -1;
__u64 end_ts;
+__u32 slab_cache_id;
+
/* error stat */
int task_fail;
int stack_fail;
@@ -145,6 +197,9 @@ int data_fail;
int task_map_full;
int data_map_full;
+struct task_struct *bpf_task_from_pid(s32 pid) __ksym __weak;
+void bpf_task_release(struct task_struct *p) __ksym __weak;
+
static inline __u64 get_current_cgroup_id(void)
{
struct task_struct *task;
@@ -202,7 +257,7 @@ static inline int can_record(u64 *ctx)
__u64 addr = ctx[0];
ok = bpf_map_lookup_elem(&addr_filter, &addr);
- if (!ok)
+ if (!ok && !has_slab)
return 0;
}
@@ -215,6 +270,17 @@ static inline int can_record(u64 *ctx)
return 0;
}
+ if (has_slab && bpf_get_kmem_cache) {
+ __u8 *ok;
+ __u64 addr = ctx[0];
+ long kmem_cache_addr;
+
+ kmem_cache_addr = (long)bpf_get_kmem_cache(addr);
+ ok = bpf_map_lookup_elem(&slab_filter, &kmem_cache_addr);
+ if (!ok)
+ return 0;
+ }
+
return 1;
}
@@ -357,6 +423,61 @@ static inline struct tstamp_data *get_tstamp_elem(__u32 flags)
return pelem;
}
+static inline s32 get_owner_stack_id(u64 *stacktrace)
+{
+ s32 *id, new_id;
+ static s64 id_gen = 1;
+
+ id = bpf_map_lookup_elem(&owner_stacks, stacktrace);
+ if (id)
+ return *id;
+
+ new_id = (s32)__sync_fetch_and_add(&id_gen, 1);
+
+ bpf_map_update_elem(&owner_stacks, stacktrace, &new_id, BPF_NOEXIST);
+
+ id = bpf_map_lookup_elem(&owner_stacks, stacktrace);
+ if (id)
+ return *id;
+
+ return -1;
+}
+
+static inline void update_contention_data(struct contention_data *data, u64 duration, u32 count)
+{
+ __sync_fetch_and_add(&data->total_time, duration);
+ __sync_fetch_and_add(&data->count, count);
+
+ /* FIXME: need atomic operations */
+ if (data->max_time < duration)
+ data->max_time = duration;
+ if (data->min_time > duration)
+ data->min_time = duration;
+}
+
+static inline void update_owner_stat(u32 id, u64 duration, u32 flags)
+{
+ struct contention_key key = {
+ .stack_id = id,
+ .pid = 0,
+ .lock_addr_or_cgroup = 0,
+ };
+ struct contention_data *data = bpf_map_lookup_elem(&owner_stat, &key);
+
+ if (!data) {
+ struct contention_data first = {
+ .total_time = duration,
+ .max_time = duration,
+ .min_time = duration,
+ .count = 1,
+ .flags = flags,
+ };
+ bpf_map_update_elem(&owner_stat, &key, &first, BPF_NOEXIST);
+ } else {
+ update_contention_data(data, duration, 1);
+ }
+}
+
SEC("tp_btf/contention_begin")
int contention_begin(u64 *ctx)
{
@@ -374,6 +495,72 @@ int contention_begin(u64 *ctx)
pelem->flags = (__u32)ctx[1];
if (needs_callstack) {
+ u32 i = 0;
+ u32 id = 0;
+ int owner_pid;
+ u64 *buf;
+ struct task_struct *task;
+ struct owner_tracing_data *otdata;
+
+ if (!lock_owner)
+ goto skip_owner;
+
+ task = get_lock_owner(pelem->lock, pelem->flags);
+ if (!task)
+ goto skip_owner;
+
+ owner_pid = BPF_CORE_READ(task, pid);
+
+ buf = bpf_map_lookup_elem(&stack_buf, &i);
+ if (!buf)
+ goto skip_owner;
+ for (i = 0; i < max_stack; i++)
+ buf[i] = 0x0;
+
+ if (!bpf_task_from_pid)
+ goto skip_owner;
+
+ task = bpf_task_from_pid(owner_pid);
+ if (!task)
+ goto skip_owner;
+
+ bpf_get_task_stack(task, buf, max_stack * sizeof(unsigned long), 0);
+ bpf_task_release(task);
+
+ otdata = bpf_map_lookup_elem(&owner_data, &pelem->lock);
+ id = get_owner_stack_id(buf);
+
+ /*
+ * Contention just happens, or corner case `lock` is owned by process not
+ * `owner_pid`. For the corner case we treat it as unexpected internal error and
+ * just ignore the precvious tracing record.
+ */
+ if (!otdata || otdata->pid != owner_pid) {
+ struct owner_tracing_data first = {
+ .pid = owner_pid,
+ .timestamp = pelem->timestamp,
+ .count = 1,
+ .stack_id = id,
+ };
+ bpf_map_update_elem(&owner_data, &pelem->lock, &first, BPF_ANY);
+ }
+ /* Contention is ongoing and new waiter joins */
+ else {
+ __sync_fetch_and_add(&otdata->count, 1);
+
+ /*
+ * The owner is the same, but stacktrace might be changed. In this case we
+ * store/update `owner_stat` based on current owner stack id.
+ */
+ if (id != otdata->stack_id) {
+ update_owner_stat(id, pelem->timestamp - otdata->timestamp,
+ pelem->flags);
+
+ otdata->timestamp = pelem->timestamp;
+ otdata->stack_id = id;
+ }
+ }
+skip_owner:
pelem->stack_id = bpf_get_stackid(ctx, &stacks,
BPF_F_FAST_STACK_CMP | stack_skip);
if (pelem->stack_id < 0)
@@ -410,6 +597,7 @@ int contention_end(u64 *ctx)
struct tstamp_data *pelem;
struct contention_key key = {};
struct contention_data *data;
+ __u64 timestamp;
__u64 duration;
bool need_delete = false;
@@ -437,12 +625,88 @@ int contention_end(u64 *ctx)
need_delete = true;
}
- duration = bpf_ktime_get_ns() - pelem->timestamp;
+ timestamp = bpf_ktime_get_ns();
+ duration = timestamp - pelem->timestamp;
if ((__s64)duration < 0) {
__sync_fetch_and_add(&time_fail, 1);
goto out;
}
+ if (needs_callstack && lock_owner) {
+ struct owner_tracing_data *otdata = bpf_map_lookup_elem(&owner_data, &pelem->lock);
+
+ if (!otdata)
+ goto skip_owner;
+
+ /* Update `owner_stat` */
+ update_owner_stat(otdata->stack_id, timestamp - otdata->timestamp, pelem->flags);
+
+ /* No contention is occurring, delete `lock` entry in `owner_data` */
+ if (otdata->count <= 1)
+ bpf_map_delete_elem(&owner_data, &pelem->lock);
+ /*
+ * Contention is still ongoing, with a new owner (current task). `owner_data`
+ * should be updated accordingly.
+ */
+ else {
+ u32 i = 0;
+ s32 ret = (s32)ctx[1];
+ u64 *buf;
+
+ otdata->timestamp = timestamp;
+ __sync_fetch_and_add(&otdata->count, -1);
+
+ buf = bpf_map_lookup_elem(&stack_buf, &i);
+ if (!buf)
+ goto skip_owner;
+ for (i = 0; i < (u32)max_stack; i++)
+ buf[i] = 0x0;
+
+ /*
+ * `ret` has the return code of the lock function.
+ * If `ret` is negative, the current task terminates lock waiting without
+ * acquiring it. Owner is not changed, but we still need to update the owner
+ * stack.
+ */
+ if (ret < 0) {
+ s32 id = 0;
+ struct task_struct *task;
+
+ if (!bpf_task_from_pid)
+ goto skip_owner;
+
+ task = bpf_task_from_pid(otdata->pid);
+ if (!task)
+ goto skip_owner;
+
+ bpf_get_task_stack(task, buf,
+ max_stack * sizeof(unsigned long), 0);
+ bpf_task_release(task);
+
+ id = get_owner_stack_id(buf);
+
+ /*
+ * If owner stack is changed, update owner stack id for this lock.
+ */
+ if (id != otdata->stack_id)
+ otdata->stack_id = id;
+ }
+ /*
+ * Otherwise, update tracing data with the current task, which is the new
+ * owner.
+ */
+ else {
+ otdata->pid = pid;
+ /*
+ * We don't want to retrieve callstack here, since it is where the
+ * current task acquires the lock and provides no additional
+ * information. We simply assign -1 to invalidate it.
+ */
+ otdata->stack_id = -1;
+ }
+ }
+ }
+skip_owner:
switch (aggr_mode) {
case LOCK_AGGR_CALLER:
key.stack_id = pelem->stack_id;
@@ -487,8 +751,28 @@ int contention_end(u64 *ctx)
};
int err;
- if (aggr_mode == LOCK_AGGR_ADDR)
- first.flags |= check_lock_type(pelem->lock, pelem->flags);
+ if (aggr_mode == LOCK_AGGR_ADDR) {
+ first.flags |= check_lock_type(pelem->lock,
+ pelem->flags & LCB_F_TYPE_MASK);
+
+ /* Check if it's from a slab object */
+ if (bpf_get_kmem_cache) {
+ struct kmem_cache *s;
+ struct slab_cache_data *d;
+
+ s = bpf_get_kmem_cache(pelem->lock);
+ if (s != NULL) {
+ /*
+ * Save the ID of the slab cache in the flags
+ * (instead of full address) to reduce the
+ * space in the contention_data.
+ */
+ d = bpf_map_lookup_elem(&slab_caches, &s);
+ if (d != NULL)
+ first.flags |= d->id;
+ }
+ }
+ }
err = bpf_map_update_elem(&lock_stat, &key, &first, BPF_NOEXIST);
if (err < 0) {
@@ -506,14 +790,7 @@ int contention_end(u64 *ctx)
}
found:
- __sync_fetch_and_add(&data->total_time, duration);
- __sync_fetch_and_add(&data->count, 1);
-
- /* FIXME: need atomic operations */
- if (data->max_time < duration)
- data->max_time = duration;
- if (data->min_time > duration)
- data->min_time = duration;
+ update_contention_data(data, duration, 1);
out:
pelem->lock = 0;
@@ -563,4 +840,43 @@ int BPF_PROG(end_timestamp)
return 0;
}
+/*
+ * bpf_iter__kmem_cache added recently so old kernels don't have it in the
+ * vmlinux.h. But we cannot add it here since it will cause a compiler error
+ * due to redefinition of the struct on later kernels.
+ *
+ * So it uses a CO-RE trick to access the member only if it has the type.
+ * This will support both old and new kernels without compiler errors.
+ */
+struct bpf_iter__kmem_cache___new {
+ struct kmem_cache *s;
+} __attribute__((preserve_access_index));
+
+SEC("iter/kmem_cache")
+int slab_cache_iter(void *ctx)
+{
+ struct kmem_cache *s = NULL;
+ struct slab_cache_data d;
+ const char *nameptr;
+
+ if (bpf_core_type_exists(struct bpf_iter__kmem_cache)) {
+ struct bpf_iter__kmem_cache___new *iter = ctx;
+
+ s = iter->s;
+ }
+
+ if (s == NULL)
+ return 0;
+
+ nameptr = s->name;
+ bpf_probe_read_kernel_str(d.name, sizeof(d.name), nameptr);
+
+ d.id = ++slab_cache_id << LCB_F_SLAB_ID_SHIFT;
+ if (d.id >= LCB_F_SLAB_ID_END)
+ return 0;
+
+ bpf_map_update_elem(&slab_caches, &s, &d, BPF_NOEXIST);
+ return 0;
+}
+
char LICENSE[] SEC("license") = "Dual BSD/GPL";
diff --git a/tools/perf/util/bpf_skel/lock_data.h b/tools/perf/util/bpf_skel/lock_data.h
index de12892f992f..15f5743bd409 100644
--- a/tools/perf/util/bpf_skel/lock_data.h
+++ b/tools/perf/util/bpf_skel/lock_data.h
@@ -3,6 +3,13 @@
#ifndef UTIL_BPF_SKEL_LOCK_DATA_H
#define UTIL_BPF_SKEL_LOCK_DATA_H
+struct owner_tracing_data {
+ u32 pid; // Who has the lock.
+ u32 count; // How many waiters for this lock.
+ u64 timestamp; // The time while the owner acquires lock and contention is going on.
+ s32 stack_id; // Identifier for `owner_stat`, which stores as value in `owner_stacks`
+};
+
struct tstamp_data {
u64 timestamp;
u64 lock;
@@ -32,7 +39,15 @@ struct contention_task_data {
#define LCD_F_MMAP_LOCK (1U << 31)
#define LCD_F_SIGHAND_LOCK (1U << 30)
-#define LCB_F_MAX_FLAGS (1U << 7)
+#define LCB_F_SLAB_ID_SHIFT 16
+#define LCB_F_SLAB_ID_START (1U << 16)
+#define LCB_F_SLAB_ID_END (1U << 26)
+#define LCB_F_SLAB_ID_MASK 0x03FF0000U
+
+#define LCB_F_TYPE_MAX (1U << 7)
+#define LCB_F_TYPE_MASK 0x0000007FU
+
+#define SLAB_NAME_MAX 28
struct contention_data {
u64 total_time;
@@ -54,4 +69,9 @@ enum lock_class_sym {
LOCK_CLASS_RQLOCK,
};
+struct slab_cache_data {
+ u32 id;
+ char name[SLAB_NAME_MAX];
+};
+
#endif /* UTIL_BPF_SKEL_LOCK_DATA_H */
diff --git a/tools/perf/util/bpf_skel/vmlinux/vmlinux.h b/tools/perf/util/bpf_skel/vmlinux/vmlinux.h
index 4dcad7b682bd..7b81d3173917 100644
--- a/tools/perf/util/bpf_skel/vmlinux/vmlinux.h
+++ b/tools/perf/util/bpf_skel/vmlinux/vmlinux.h
@@ -195,4 +195,12 @@ struct bpf_perf_event_data_kern {
*/
struct rq {};
+struct kmem_cache {
+ const char *name;
+} __attribute__((preserve_access_index));
+
+struct bpf_iter__kmem_cache {
+ struct kmem_cache *s;
+} __attribute__((preserve_access_index));
+
#endif // __VMLINUX_H
diff --git a/tools/perf/util/branch.h b/tools/perf/util/branch.h
index b80c12c74bbb..7429530fa774 100644
--- a/tools/perf/util/branch.h
+++ b/tools/perf/util/branch.h
@@ -25,7 +25,8 @@ struct branch_flags {
u64 spec:2;
u64 new_type:4;
u64 priv:3;
- u64 reserved:31;
+ u64 not_taken:1;
+ u64 reserved:30;
};
};
};
diff --git a/tools/perf/util/btf.c b/tools/perf/util/btf.c
new file mode 100644
index 000000000000..bb163fe87767
--- /dev/null
+++ b/tools/perf/util/btf.c
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Arnaldo Carvalho de Melo <acme@redhat.com>
+ *
+ * Copyright (C) 2024, Red Hat, Inc
+ */
+
+#include <bpf/btf.h>
+#include <util/btf.h>
+#include <string.h>
+
+const struct btf_member *__btf_type__find_member_by_name(struct btf *btf,
+ int type_id, const char *member_name)
+{
+ const struct btf_type *t = btf__type_by_id(btf, type_id);
+ const struct btf_member *m;
+ int i;
+
+ for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) {
+ const char *current_member_name = btf__name_by_offset(btf, m->name_off);
+
+ if (!strcmp(current_member_name, member_name))
+ return m;
+ }
+
+ return NULL;
+}
diff --git a/tools/perf/util/btf.h b/tools/perf/util/btf.h
new file mode 100644
index 000000000000..05e6e5bf23d6
--- /dev/null
+++ b/tools/perf/util/btf.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_UTIL_BTF
+#define __PERF_UTIL_BTF 1
+
+struct btf;
+struct btf_member;
+
+const struct btf_member *__btf_type__find_member_by_name(struct btf *btf,
+ int type_id, const char *member_name);
+#endif // __PERF_UTIL_BTF
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 0c7564747a14..d7b7eef740b9 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -589,9 +589,7 @@ fill_node(struct callchain_node *node, struct callchain_cursor *cursor)
return -ENOMEM;
}
call->ip = cursor_node->ip;
- call->ms = cursor_node->ms;
- call->ms.map = map__get(call->ms.map);
- call->ms.maps = maps__get(call->ms.maps);
+ map_symbol__copy(&call->ms, &cursor_node->ms);
call->srcline = cursor_node->srcline;
if (cursor_node->branch) {
@@ -1094,9 +1092,7 @@ int callchain_cursor_append(struct callchain_cursor *cursor,
node->ip = ip;
map_symbol__exit(&node->ms);
- node->ms = *ms;
- node->ms.maps = maps__get(ms->maps);
- node->ms.map = map__get(ms->map);
+ map_symbol__copy(&node->ms, ms);
node->branch = branch;
node->nr_loop_iter = nr_loop_iter;
node->iter_cycles = iter_cycles;
@@ -1564,7 +1560,7 @@ int callchain_node__make_parent_list(struct callchain_node *node)
goto out;
*new = *chain;
new->has_children = false;
- new->ms.map = map__get(new->ms.map);
+ map_symbol__copy(&new->ms, &chain->ms);
list_add_tail(&new->list, &head);
}
parent = parent->parent;
diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c
index 0f759dd96db7..fbcc0626f9ce 100644
--- a/tools/perf/util/cgroup.c
+++ b/tools/perf/util/cgroup.c
@@ -473,7 +473,7 @@ int evlist__expand_cgroup(struct evlist *evlist, const char *str,
leader = NULL;
evlist__for_each_entry(orig_list, pos) {
- evsel = evsel__clone(pos);
+ evsel = evsel__clone(/*dest=*/NULL, pos);
if (evsel == NULL)
goto out_err;
diff --git a/tools/perf/util/color.h b/tools/perf/util/color.h
index 9a7248dbe2d7..0319546decca 100644
--- a/tools/perf/util/color.h
+++ b/tools/perf/util/color.h
@@ -30,11 +30,6 @@
extern int perf_use_color_default;
-/*
- * Use this instead of perf_default_config if you need the value of color.ui.
- */
-int perf_color_default_config(const char *var, const char *value, void *cb);
-
int perf_config_colorbool(const char *var, const char *value, int stdout_is_tty);
int color_vsnprintf(char *bf, size_t size, const char *color,
const char *fmt, va_list args);
diff --git a/tools/perf/util/color_config.c b/tools/perf/util/color_config.c
index dc09ba7cb31e..301031ddc025 100644
--- a/tools/perf/util/color_config.c
+++ b/tools/perf/util/color_config.c
@@ -35,14 +35,3 @@ int perf_config_colorbool(const char *var, const char *value, int stdout_is_tty)
}
return 0;
}
-
-int perf_color_default_config(const char *var, const char *value,
- void *cb __maybe_unused)
-{
- if (!strcmp(var, "color.ui")) {
- perf_use_color_default = perf_config_colorbool(var, value, -1);
- return 0;
- }
-
- return 0;
-}
diff --git a/tools/perf/util/comm.c b/tools/perf/util/comm.c
index 49b79cf0c5cc..8aa456d7c2cd 100644
--- a/tools/perf/util/comm.c
+++ b/tools/perf/util/comm.c
@@ -5,6 +5,8 @@
#include <internal/rc_check.h>
#include <linux/refcount.h>
#include <linux/zalloc.h>
+#include <tools/libc_compat.h> // reallocarray
+
#include "rwsem.h"
DECLARE_RC_STRUCT(comm_str) {
diff --git a/tools/perf/util/compress.h b/tools/perf/util/compress.h
index b29109cd3609..6cfecfca16f2 100644
--- a/tools/perf/util/compress.h
+++ b/tools/perf/util/compress.h
@@ -4,7 +4,9 @@
#include <stdbool.h>
#include <stddef.h>
+#include <stdio.h>
#include <sys/types.h>
+#include <linux/compiler.h>
#ifdef HAVE_ZSTD_SUPPORT
#include <zstd.h>
#endif
@@ -15,8 +17,26 @@ bool gzip_is_compressed(const char *input);
#endif
#ifdef HAVE_LZMA_SUPPORT
+int lzma_decompress_stream_to_file(FILE *input, int output_fd);
int lzma_decompress_to_file(const char *input, int output_fd);
bool lzma_is_compressed(const char *input);
+#else
+static inline
+int lzma_decompress_stream_to_file(FILE *input __maybe_unused,
+ int output_fd __maybe_unused)
+{
+ return -1;
+}
+static inline
+int lzma_decompress_to_file(const char *input __maybe_unused,
+ int output_fd __maybe_unused)
+{
+ return -1;
+}
+static inline int lzma_is_compressed(const char *input __maybe_unused)
+{
+ return false;
+}
#endif
struct zstd_data {
diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c
index 68f9407ca74b..ae72b66b6ded 100644
--- a/tools/perf/util/config.c
+++ b/tools/perf/util/config.c
@@ -13,6 +13,7 @@
#include <sys/param.h>
#include "cache.h"
#include "callchain.h"
+#include "header.h"
#include <subcmd/exec-cmd.h>
#include "util/event.h" /* proc_map_timeout */
#include "util/hist.h" /* perf_hist_config */
@@ -34,6 +35,22 @@
#define DEBUG_CACHE_DIR ".debug"
+#define METRIC_ONLY_LEN 20
+
+struct perf_stat_config stat_config = {
+ .aggr_mode = AGGR_GLOBAL,
+ .aggr_level = MAX_CACHE_LVL + 1,
+ .scale = true,
+ .unit_width = 4, /* strlen("unit") */
+ .run_count = 1,
+ .metric_only_len = METRIC_ONLY_LEN,
+ .walltime_nsecs_stats = &walltime_nsecs_stats,
+ .ru_stats = &ru_stats,
+ .big_num = true,
+ .ctl_fd = -1,
+ .ctl_fd_ack = -1,
+ .iostat_run = false,
+};
char buildid_dir[MAXPATHLEN]; /* root dir for buildid, binary cache */
@@ -455,6 +472,16 @@ static int perf_ui_config(const char *var, const char *value)
return 0;
}
+void perf_stat__set_big_num(int set)
+{
+ stat_config.big_num = (set != 0);
+}
+
+static void perf_stat__set_no_csv_summary(int set)
+{
+ stat_config.no_csv_summary = (set != 0);
+}
+
static int perf_stat_config(const char *var, const char *value)
{
if (!strcmp(var, "stat.big-num"))
@@ -829,12 +856,6 @@ void perf_config__exit(void)
config_set = NULL;
}
-void perf_config__refresh(void)
-{
- perf_config__exit();
- perf_config__init();
-}
-
static void perf_config_item__delete(struct perf_config_item *item)
{
zfree(&item->name);
diff --git a/tools/perf/util/config.h b/tools/perf/util/config.h
index 9971313d61c1..987b47cf54c3 100644
--- a/tools/perf/util/config.h
+++ b/tools/perf/util/config.h
@@ -49,7 +49,7 @@ void perf_config_set__delete(struct perf_config_set *set);
int perf_config_set__collect(struct perf_config_set *set, const char *file_name,
const char *var, const char *value);
void perf_config__exit(void);
-void perf_config__refresh(void);
+int perf_config__set_variable(const char *var, const char *value);
/**
* perf_config_sections__for_each - iterate thru all the sections
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index 27094211edd8..89570397a4b3 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -67,19 +67,23 @@ static struct perf_cpu_map *cpu_map__from_entries(const struct perf_record_cpu_m
struct perf_cpu_map *map;
map = perf_cpu_map__empty_new(data->cpus_data.nr);
- if (map) {
- unsigned i;
-
- for (i = 0; i < data->cpus_data.nr; i++) {
- /*
- * Special treatment for -1, which is not real cpu number,
- * and we need to use (int) -1 to initialize map[i],
- * otherwise it would become 65535.
- */
- if (data->cpus_data.cpu[i] == (u16) -1)
- RC_CHK_ACCESS(map)->map[i].cpu = -1;
- else
- RC_CHK_ACCESS(map)->map[i].cpu = (int) data->cpus_data.cpu[i];
+ if (!map)
+ return NULL;
+
+ for (unsigned int i = 0; i < data->cpus_data.nr; i++) {
+ /*
+ * Special treatment for -1, which is not real cpu number,
+ * and we need to use (int) -1 to initialize map[i],
+ * otherwise it would become 65535.
+ */
+ if (data->cpus_data.cpu[i] == (u16) -1) {
+ RC_CHK_ACCESS(map)->map[i].cpu = -1;
+ } else if (data->cpus_data.cpu[i] < INT16_MAX) {
+ RC_CHK_ACCESS(map)->map[i].cpu = (int16_t) data->cpus_data.cpu[i];
+ } else {
+ pr_err("Invalid cpumap entry %u\n", data->cpus_data.cpu[i]);
+ perf_cpu_map__put(map);
+ return NULL;
}
}
@@ -106,8 +110,15 @@ static struct perf_cpu_map *cpu_map__from_mask(const struct perf_record_cpu_map_
int cpu;
perf_record_cpu_map_data__read_one_mask(data, i, local_copy);
- for_each_set_bit(cpu, local_copy, 64)
- RC_CHK_ACCESS(map)->map[j++].cpu = cpu + cpus_per_i;
+ for_each_set_bit(cpu, local_copy, 64) {
+ if (cpu + cpus_per_i < INT16_MAX) {
+ RC_CHK_ACCESS(map)->map[j++].cpu = cpu + cpus_per_i;
+ } else {
+ pr_err("Invalid cpumap entry %d\n", cpu + cpus_per_i);
+ perf_cpu_map__put(map);
+ return NULL;
+ }
+ }
}
return map;
@@ -127,8 +138,15 @@ static struct perf_cpu_map *cpu_map__from_range(const struct perf_record_cpu_map
RC_CHK_ACCESS(map)->map[i++].cpu = -1;
for (int cpu = data->range_cpu_data.start_cpu; cpu <= data->range_cpu_data.end_cpu;
- i++, cpu++)
- RC_CHK_ACCESS(map)->map[i].cpu = cpu;
+ i++, cpu++) {
+ if (cpu < INT16_MAX) {
+ RC_CHK_ACCESS(map)->map[i].cpu = cpu;
+ } else {
+ pr_err("Invalid cpumap entry %d\n", cpu);
+ perf_cpu_map__put(map);
+ return NULL;
+ }
+ }
return map;
}
@@ -293,7 +311,7 @@ struct aggr_cpu_id aggr_cpu_id__die(struct perf_cpu cpu, void *data)
die = cpu__get_die_id(cpu);
/* There is no die_id on legacy system. */
- if (die == -1)
+ if (die < 0)
die = 0;
/*
@@ -322,7 +340,7 @@ struct aggr_cpu_id aggr_cpu_id__cluster(struct perf_cpu cpu, void *data)
struct aggr_cpu_id id;
/* There is no cluster_id on legacy system. */
- if (cluster == -1)
+ if (cluster < 0)
cluster = 0;
id = aggr_cpu_id__die(cpu, data);
@@ -427,7 +445,7 @@ static void set_max_cpu_num(void)
{
const char *mnt;
char path[PATH_MAX];
- int ret = -1;
+ int max, ret = -1;
/* set up default */
max_cpu_num.cpu = 4096;
@@ -444,10 +462,12 @@ static void set_max_cpu_num(void)
goto out;
}
- ret = get_max_num(path, &max_cpu_num.cpu);
+ ret = get_max_num(path, &max);
if (ret)
goto out;
+ max_cpu_num.cpu = max;
+
/* get the highest present cpu number for a sparse allocation */
ret = snprintf(path, PATH_MAX, "%s/devices/system/cpu/present", mnt);
if (ret >= PATH_MAX) {
@@ -455,8 +475,14 @@ static void set_max_cpu_num(void)
goto out;
}
- ret = get_max_num(path, &max_present_cpu_num.cpu);
+ ret = get_max_num(path, &max);
+ if (!ret && max > INT16_MAX) {
+ pr_err("Read out of bounds max cpus of %d\n", max);
+ ret = -1;
+ }
+ if (!ret)
+ max_present_cpu_num.cpu = (int16_t)max;
out:
if (ret)
pr_err("Failed to read max cpus, using default of %d\n", max_cpu_num.cpu);
@@ -606,7 +632,7 @@ size_t cpu_map__snprint(struct perf_cpu_map *map, char *buf, size_t size)
#define COMMA first ? "" : ","
for (i = 0; i < perf_cpu_map__nr(map) + 1; i++) {
- struct perf_cpu cpu = { .cpu = INT_MAX };
+ struct perf_cpu cpu = { .cpu = INT16_MAX };
bool last = i == perf_cpu_map__nr(map);
if (!last)
@@ -696,7 +722,7 @@ struct perf_cpu_map *cpu_map__online(void) /* thread unsafe */
if (!online)
online = perf_cpu_map__new_online_cpus(); /* from /sys/devices/system/cpu/online */
- return online;
+ return perf_cpu_map__get(online);
}
bool aggr_cpu_id__equal(const struct aggr_cpu_id *a, const struct aggr_cpu_id *b)
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index 0bf9e5c27b59..30f4bb3e7fa3 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -506,20 +506,27 @@ static int cs_etm__process_aux_output_hw_id(struct perf_session *session,
evsel = evlist__event2evsel(session->evlist, event);
if (!evsel)
return -EINVAL;
+ perf_sample__init(&sample, /*all=*/false);
err = evsel__parse_sample(evsel, event, &sample);
if (err)
- return err;
+ goto out;
cpu = sample.cpu;
if (cpu == -1) {
/* no CPU in the sample - possibly recorded with an old version of perf */
pr_err("CS_ETM: no CPU AUX_OUTPUT_HW_ID sample. Use compatible perf to record.");
- return -EINVAL;
+ err = -EINVAL;
+ goto out;
}
- if (FIELD_GET(CS_AUX_HW_ID_MINOR_VERSION_MASK, hw_id) == 0)
- return cs_etm__process_trace_id_v0(etm, cpu, hw_id);
+ if (FIELD_GET(CS_AUX_HW_ID_MINOR_VERSION_MASK, hw_id) == 0) {
+ err = cs_etm__process_trace_id_v0(etm, cpu, hw_id);
+ goto out;
+ }
- return cs_etm__process_trace_id_v0_1(etm, cpu, hw_id);
+ err = cs_etm__process_trace_id_v0_1(etm, cpu, hw_id);
+out:
+ perf_sample__exit(&sample);
+ return err;
}
void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq,
@@ -1560,8 +1567,9 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
int ret = 0;
struct cs_etm_auxtrace *etm = etmq->etm;
union perf_event *event = tidq->event_buf;
- struct perf_sample sample = {.ip = 0,};
+ struct perf_sample sample;
+ perf_sample__init(&sample, /*all=*/true);
event->sample.header.type = PERF_RECORD_SAMPLE;
event->sample.header.misc = cs_etm__cpu_mode(etmq, addr, tidq->el);
event->sample.header.size = sizeof(struct perf_event_header);
@@ -1598,6 +1606,7 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
"CS ETM Trace: failed to deliver instruction event, error %d\n",
ret);
+ perf_sample__exit(&sample);
return ret;
}
@@ -3151,9 +3160,10 @@ static int cs_etm__queue_aux_records_cb(struct perf_session *session, union perf
evsel = evlist__event2evsel(session->evlist, event);
if (!evsel)
return -EINVAL;
+ perf_sample__init(&sample, /*all=*/false);
ret = evsel__parse_sample(evsel, event, &sample);
if (ret)
- return ret;
+ goto out;
/*
* Loop through the auxtrace index to find the buffer that matches up with this aux event.
@@ -3168,7 +3178,7 @@ static int cs_etm__queue_aux_records_cb(struct perf_session *session, union perf
* 1 ('not found')
*/
if (ret != 1)
- return ret;
+ goto out;
}
}
@@ -3178,7 +3188,10 @@ static int cs_etm__queue_aux_records_cb(struct perf_session *session, union perf
*/
pr_err("CS ETM: Couldn't find auxtrace buffer for aux_offset: %#"PRI_lx64
" tid: %d cpu: %d\n", event->aux.aux_offset, sample.tid, sample.cpu);
- return 0;
+ ret = 0;
+out:
+ perf_sample__exit(&sample);
+ return ret;
}
static int cs_etm__queue_aux_records(struct perf_session *session)
diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c
index f0599c61fab4..5e7ff09fbc95 100644
--- a/tools/perf/util/data-convert-bt.c
+++ b/tools/perf/util/data-convert-bt.c
@@ -426,8 +426,9 @@ static int add_tracepoint_values(struct ctf_writer *cw,
struct evsel *evsel,
struct perf_sample *sample)
{
- struct tep_format_field *common_fields = evsel->tp_format->format.common_fields;
- struct tep_format_field *fields = evsel->tp_format->format.fields;
+ const struct tep_event *tp_format = evsel__tp_format(evsel);
+ struct tep_format_field *common_fields = tp_format->format.common_fields;
+ struct tep_format_field *fields = tp_format->format.fields;
int ret;
ret = add_tracepoint_fields_values(cw, event_class, event,
@@ -1064,8 +1065,9 @@ static int add_tracepoint_types(struct ctf_writer *cw,
struct evsel *evsel,
struct bt_ctf_event_class *class)
{
- struct tep_format_field *common_fields = evsel->tp_format->format.common_fields;
- struct tep_format_field *fields = evsel->tp_format->format.fields;
+ const struct tep_event *tp_format = evsel__tp_format(evsel);
+ struct tep_format_field *common_fields = tp_format ? tp_format->format.common_fields : NULL;
+ struct tep_format_field *fields = tp_format ? tp_format->format.fields : NULL;
int ret;
ret = add_tracepoint_fields_types(cw, common_fields, class);
diff --git a/tools/perf/util/data-convert-json.c b/tools/perf/util/data-convert-json.c
index 8304cd2d4a9c..d9f805bf6fb0 100644
--- a/tools/perf/util/data-convert-json.c
+++ b/tools/perf/util/data-convert-json.c
@@ -230,12 +230,12 @@ static int process_sample_event(const struct perf_tool *tool,
#ifdef HAVE_LIBTRACEEVENT
if (sample->raw_data) {
- int i;
- struct tep_format_field **fields;
+ struct tep_event *tp_format = evsel__tp_format(evsel);
+ struct tep_format_field **fields = tp_format ? tep_event_fields(tp_format) : NULL;
- fields = tep_event_fields(evsel->tp_format);
if (fields) {
- i = 0;
+ int i = 0;
+
while (fields[i]) {
struct trace_seq s;
diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c
index 98661ede2a73..164eb45a0b36 100644
--- a/tools/perf/util/data.c
+++ b/tools/perf/util/data.c
@@ -158,26 +158,6 @@ out_err:
return ret;
}
-int perf_data__update_dir(struct perf_data *data)
-{
- int i;
-
- if (WARN_ON(!data->is_dir))
- return -EINVAL;
-
- for (i = 0; i < data->dir.nr; i++) {
- struct perf_data_file *file = &data->dir.files[i];
- struct stat st;
-
- if (fstat(file->fd, &st))
- return -1;
-
- file->size = st.st_size;
- }
-
- return 0;
-}
-
static bool check_pipe(struct perf_data *data)
{
struct stat st;
diff --git a/tools/perf/util/data.h b/tools/perf/util/data.h
index 110f3ebde30f..1438e32e0451 100644
--- a/tools/perf/util/data.h
+++ b/tools/perf/util/data.h
@@ -97,7 +97,6 @@ int perf_data__switch(struct perf_data *data,
int perf_data__create_dir(struct perf_data *data, int nr);
int perf_data__open_dir(struct perf_data *data);
void perf_data__close_dir(struct perf_data *data);
-int perf_data__update_dir(struct perf_data *data);
unsigned long perf_data__size(struct perf_data *data);
int perf_data__make_kcore_dir(struct perf_data *data, char *buf, size_t buf_sz);
bool has_kcore_dir(const char *path);
diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c
index 995f6bb05b5f..f9ef7d045c92 100644
--- a/tools/perf/util/debug.c
+++ b/tools/perf/util/debug.c
@@ -46,8 +46,8 @@ int debug_type_profile;
FILE *debug_file(void)
{
if (!_debug_file) {
- pr_warning_once("debug_file not set");
debug_set_file(stderr);
+ pr_warning_once("debug_file not set");
}
return _debug_file;
}
diff --git a/tools/perf/util/debuginfo.c b/tools/perf/util/debuginfo.c
index 19acf4775d35..b5deea7cbdf2 100644
--- a/tools/perf/util/debuginfo.c
+++ b/tools/perf/util/debuginfo.c
@@ -125,8 +125,12 @@ struct debuginfo *debuginfo__new(const char *path)
dso__put(dso);
out:
+ if (dinfo)
+ return dinfo;
+
/* if failed to open all distro debuginfo, open given binary */
- return dinfo ? : __debuginfo__new(path);
+ symbol__join_symfs(buf, path);
+ return __debuginfo__new(buf);
}
void debuginfo__delete(struct debuginfo *dbg)
diff --git a/tools/perf/util/disasm.c b/tools/perf/util/disasm.c
index 28ceb76e465b..8f0eb56c6fc6 100644
--- a/tools/perf/util/disasm.c
+++ b/tools/perf/util/disasm.c
@@ -48,7 +48,7 @@ static int call__scnprintf(struct ins *ins, char *bf, size_t size,
static void ins__sort(struct arch *arch);
static int disasm_line__parse(char *line, const char **namep, char **rawp);
-static int disasm_line__parse_powerpc(struct disasm_line *dl);
+static int disasm_line__parse_powerpc(struct disasm_line *dl, struct annotate_args *args);
static char *expand_tabs(char *line, char **storage, size_t *storage_len);
static __attribute__((constructor)) void symbol__init_regexpr(void)
@@ -968,24 +968,25 @@ out:
#define PPC_OP(op) (((op) >> 26) & 0x3F)
#define RAW_BYTES 11
-static int disasm_line__parse_powerpc(struct disasm_line *dl)
+static int disasm_line__parse_powerpc(struct disasm_line *dl, struct annotate_args *args)
{
char *line = dl->al.line;
const char **namep = &dl->ins.name;
char **rawp = &dl->ops.raw;
char *tmp_raw_insn, *name_raw_insn = skip_spaces(line);
char *name = skip_spaces(name_raw_insn + RAW_BYTES);
- int objdump = 0;
+ int disasm = 0;
+ int ret = 0;
- if (strlen(line) > RAW_BYTES)
- objdump = 1;
+ if (args->options->disassembler_used)
+ disasm = 1;
if (name_raw_insn[0] == '\0')
return -1;
- if (objdump) {
- disasm_line__parse(name, namep, rawp);
- } else
+ if (disasm)
+ ret = disasm_line__parse(name, namep, rawp);
+ else
*namep = "";
tmp_raw_insn = strndup(name_raw_insn, 11);
@@ -995,10 +996,10 @@ static int disasm_line__parse_powerpc(struct disasm_line *dl)
remove_spaces(tmp_raw_insn);
sscanf(tmp_raw_insn, "%x", &dl->raw.raw_insn);
- if (objdump)
+ if (disasm)
dl->raw.raw_insn = be32_to_cpu(dl->raw.raw_insn);
- return 0;
+ return ret;
}
static void annotation_line__init(struct annotation_line *al,
@@ -1054,7 +1055,7 @@ struct disasm_line *disasm_line__new(struct annotate_args *args)
if (args->offset != -1) {
if (arch__is(args->arch, "powerpc")) {
- if (disasm_line__parse_powerpc(dl) < 0)
+ if (disasm_line__parse_powerpc(dl, args) < 0)
goto out_free_line;
} else if (disasm_line__parse(dl->al.line, &dl->ins.name, &dl->ops.raw) < 0)
goto out_free_line;
@@ -1245,6 +1246,9 @@ int symbol__strerror_disassemble(struct map_symbol *ms, int errnum, char *buf, s
scnprintf(buf, buflen, "The %s BPF file has no BTF section, compile with -g or use pahole -J.",
dso__long_name(dso));
break;
+ case SYMBOL_ANNOTATE_ERRNO__COULDNT_DETERMINE_FILE_TYPE:
+ scnprintf(buf, buflen, "Couldn't determine the file %s type.", dso__long_name(dso));
+ break;
default:
scnprintf(buf, buflen, "Internal error: Invalid %d error code\n", errnum);
break;
@@ -2238,7 +2242,7 @@ int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
} else if (dso__binary_type(dso) == DSO_BINARY_TYPE__BPF_IMAGE) {
return symbol__disassemble_bpf_image(sym, args);
} else if (dso__binary_type(dso) == DSO_BINARY_TYPE__NOT_FOUND) {
- return -1;
+ return SYMBOL_ANNOTATE_ERRNO__COULDNT_DETERMINE_FILE_TYPE;
} else if (dso__is_kcore(dso)) {
kce.addr = map__rip_2objdump(map, sym->start);
kce.kcore_filename = symfs_filename;
@@ -2286,16 +2290,20 @@ int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
switch (dis) {
case PERF_DISASM_LLVM:
+ args->options->disassembler_used = PERF_DISASM_LLVM;
err = symbol__disassemble_llvm(symfs_filename, sym, args);
break;
case PERF_DISASM_CAPSTONE:
+ args->options->disassembler_used = PERF_DISASM_CAPSTONE;
err = symbol__disassemble_capstone(symfs_filename, sym, args);
break;
case PERF_DISASM_OBJDUMP:
+ args->options->disassembler_used = PERF_DISASM_OBJDUMP;
err = symbol__disassemble_objdump(symfs_filename, sym, args);
break;
case PERF_DISASM_UNKNOWN: /* End of disassemblers. */
default:
+ args->options->disassembler_used = PERF_DISASM_UNKNOWN;
goto out_remove_tmp;
}
if (err == 0)
diff --git a/tools/perf/util/dlfilter.c b/tools/perf/util/dlfilter.c
index 7d180bdaedbc..ddacef881af2 100644
--- a/tools/perf/util/dlfilter.c
+++ b/tools/perf/util/dlfilter.c
@@ -234,7 +234,8 @@ static const __u8 *dlfilter__insn(void *ctx, __u32 *len)
struct machine *machine = maps__machine(thread__maps(al->thread));
if (machine)
- script_fetch_insn(d->sample, al->thread, machine);
+ script_fetch_insn(d->sample, al->thread, machine,
+ /*native_arch=*/true);
}
}
diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index 5c6e85fdae0d..8619b6eea62d 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -67,6 +67,7 @@ char dso__symtab_origin(const struct dso *dso)
[DSO_BINARY_TYPE__GUEST_KMODULE] = 'G',
[DSO_BINARY_TYPE__GUEST_KMODULE_COMP] = 'M',
[DSO_BINARY_TYPE__GUEST_VMLINUX] = 'V',
+ [DSO_BINARY_TYPE__GNU_DEBUGDATA] = 'n',
};
if (dso == NULL || dso__symtab_type(dso) == DSO_BINARY_TYPE__NOT_FOUND)
@@ -93,6 +94,7 @@ bool dso__is_object_file(const struct dso *dso)
case DSO_BINARY_TYPE__UBUNTU_DEBUGINFO:
case DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO:
case DSO_BINARY_TYPE__BUILDID_DEBUGINFO:
+ case DSO_BINARY_TYPE__GNU_DEBUGDATA:
case DSO_BINARY_TYPE__SYSTEM_PATH_DSO:
case DSO_BINARY_TYPE__GUEST_KMODULE:
case DSO_BINARY_TYPE__GUEST_KMODULE_COMP:
@@ -224,6 +226,7 @@ int dso__read_binary_type_filename(const struct dso *dso,
case DSO_BINARY_TYPE__VMLINUX:
case DSO_BINARY_TYPE__GUEST_VMLINUX:
case DSO_BINARY_TYPE__SYSTEM_PATH_DSO:
+ case DSO_BINARY_TYPE__GNU_DEBUGDATA:
__symbol__join_symfs(filename, size, dso__long_name(dso));
break;
@@ -490,11 +493,25 @@ void dso__set_module_info(struct dso *dso, struct kmod_path *m,
/*
* Global list of open DSOs and the counter.
*/
+struct mutex _dso__data_open_lock;
static LIST_HEAD(dso__data_open);
-static long dso__data_open_cnt;
-static pthread_mutex_t dso__data_open_lock = PTHREAD_MUTEX_INITIALIZER;
+static long dso__data_open_cnt GUARDED_BY(_dso__data_open_lock);
-static void dso__list_add(struct dso *dso)
+static void dso__data_open_lock_init(void)
+{
+ mutex_init(&_dso__data_open_lock);
+}
+
+static struct mutex *dso__data_open_lock(void) LOCK_RETURNED(_dso__data_open_lock)
+{
+ static pthread_once_t data_open_lock_once = PTHREAD_ONCE_INIT;
+
+ pthread_once(&data_open_lock_once, dso__data_open_lock_init);
+
+ return &_dso__data_open_lock;
+}
+
+static void dso__list_add(struct dso *dso) EXCLUSIVE_LOCKS_REQUIRED(_dso__data_open_lock)
{
list_add_tail(&dso__data(dso)->open_entry, &dso__data_open);
#ifdef REFCNT_CHECKING
@@ -505,11 +522,13 @@ static void dso__list_add(struct dso *dso)
dso__data_open_cnt++;
}
-static void dso__list_del(struct dso *dso)
+static void dso__list_del(struct dso *dso) EXCLUSIVE_LOCKS_REQUIRED(_dso__data_open_lock)
{
list_del_init(&dso__data(dso)->open_entry);
#ifdef REFCNT_CHECKING
+ mutex_unlock(dso__data_open_lock());
dso__put(dso__data(dso)->dso);
+ mutex_lock(dso__data_open_lock());
#endif
WARN_ONCE(dso__data_open_cnt <= 0,
"DSO data fd counter out of bounds.");
@@ -518,7 +537,7 @@ static void dso__list_del(struct dso *dso)
static void close_first_dso(void);
-static int do_open(char *name)
+static int do_open(char *name) EXCLUSIVE_LOCKS_REQUIRED(_dso__data_open_lock)
{
int fd;
char sbuf[STRERR_BUFSIZE];
@@ -545,6 +564,7 @@ char *dso__filename_with_chroot(const struct dso *dso, const char *filename)
}
static int __open_dso(struct dso *dso, struct machine *machine)
+ EXCLUSIVE_LOCKS_REQUIRED(_dso__data_open_lock)
{
int fd = -EINVAL;
char *root_dir = (char *)"";
@@ -610,6 +630,7 @@ static void check_data_close(void);
* list/count of open DSO objects.
*/
static int open_dso(struct dso *dso, struct machine *machine)
+ EXCLUSIVE_LOCKS_REQUIRED(_dso__data_open_lock)
{
int fd;
struct nscookie nsc;
@@ -635,7 +656,7 @@ static int open_dso(struct dso *dso, struct machine *machine)
return fd;
}
-static void close_data_fd(struct dso *dso)
+static void close_data_fd(struct dso *dso) EXCLUSIVE_LOCKS_REQUIRED(_dso__data_open_lock)
{
if (dso__data(dso)->fd >= 0) {
close(dso__data(dso)->fd);
@@ -652,12 +673,12 @@ static void close_data_fd(struct dso *dso)
* Close @dso's data file descriptor and updates
* list/count of open DSO objects.
*/
-static void close_dso(struct dso *dso)
+static void close_dso(struct dso *dso) EXCLUSIVE_LOCKS_REQUIRED(_dso__data_open_lock)
{
close_data_fd(dso);
}
-static void close_first_dso(void)
+static void close_first_dso(void) EXCLUSIVE_LOCKS_REQUIRED(_dso__data_open_lock)
{
struct dso_data *dso_data;
struct dso *dso;
@@ -702,7 +723,7 @@ void reset_fd_limit(void)
fd_limit = 0;
}
-static bool may_cache_fd(void)
+static bool may_cache_fd(void) EXCLUSIVE_LOCKS_REQUIRED(_dso__data_open_lock)
{
if (!fd_limit)
fd_limit = get_fd_limit();
@@ -718,7 +739,7 @@ static bool may_cache_fd(void)
* for opened dso file descriptors. The limit is half
* of the RLIMIT_NOFILE files opened.
*/
-static void check_data_close(void)
+static void check_data_close(void) EXCLUSIVE_LOCKS_REQUIRED(_dso__data_open_lock)
{
bool cache_fd = may_cache_fd();
@@ -734,12 +755,13 @@ static void check_data_close(void)
*/
void dso__data_close(struct dso *dso)
{
- pthread_mutex_lock(&dso__data_open_lock);
+ mutex_lock(dso__data_open_lock());
close_dso(dso);
- pthread_mutex_unlock(&dso__data_open_lock);
+ mutex_unlock(dso__data_open_lock());
}
static void try_to_open_dso(struct dso *dso, struct machine *machine)
+ EXCLUSIVE_LOCKS_REQUIRED(_dso__data_open_lock)
{
enum dso_binary_type binary_type_data[] = {
DSO_BINARY_TYPE__BUILD_ID_CACHE,
@@ -781,25 +803,27 @@ out:
* returns file descriptor. It should be paired with
* dso__data_put_fd() if it returns non-negative value.
*/
-int dso__data_get_fd(struct dso *dso, struct machine *machine)
+bool dso__data_get_fd(struct dso *dso, struct machine *machine, int *fd)
{
+ *fd = -1;
if (dso__data(dso)->status == DSO_DATA_STATUS_ERROR)
- return -1;
+ return false;
- if (pthread_mutex_lock(&dso__data_open_lock) < 0)
- return -1;
+ mutex_lock(dso__data_open_lock());
try_to_open_dso(dso, machine);
- if (dso__data(dso)->fd < 0)
- pthread_mutex_unlock(&dso__data_open_lock);
+ *fd = dso__data(dso)->fd;
+ if (*fd >= 0)
+ return true;
- return dso__data(dso)->fd;
+ mutex_unlock(dso__data_open_lock());
+ return false;
}
void dso__data_put_fd(struct dso *dso __maybe_unused)
{
- pthread_mutex_unlock(&dso__data_open_lock);
+ mutex_unlock(dso__data_open_lock());
}
bool dso__data_status_seen(struct dso *dso, enum dso_data_status_seen by)
@@ -951,7 +975,7 @@ static ssize_t file_read(struct dso *dso, struct machine *machine,
{
ssize_t ret;
- pthread_mutex_lock(&dso__data_open_lock);
+ mutex_lock(dso__data_open_lock());
/*
* dso__data(dso)->fd might be closed if other thread opened another
@@ -967,7 +991,7 @@ static ssize_t file_read(struct dso *dso, struct machine *machine,
ret = pread(dso__data(dso)->fd, data, DSO__DATA_CACHE_SIZE, offset);
out:
- pthread_mutex_unlock(&dso__data_open_lock);
+ mutex_unlock(dso__data_open_lock());
return ret;
}
@@ -1075,7 +1099,7 @@ static int file_size(struct dso *dso, struct machine *machine)
struct stat st;
char sbuf[STRERR_BUFSIZE];
- pthread_mutex_lock(&dso__data_open_lock);
+ mutex_lock(dso__data_open_lock());
/*
* dso__data(dso)->fd might be closed if other thread opened another
@@ -1099,7 +1123,7 @@ static int file_size(struct dso *dso, struct machine *machine)
dso__data(dso)->file_size = st.st_size;
out:
- pthread_mutex_unlock(&dso__data_open_lock);
+ mutex_unlock(dso__data_open_lock());
return ret;
}
@@ -1170,6 +1194,68 @@ ssize_t dso__data_read_offset(struct dso *dso, struct machine *machine,
return data_read_write_offset(dso, machine, offset, data, size, true);
}
+uint16_t dso__e_machine(struct dso *dso, struct machine *machine)
+{
+ uint16_t e_machine = EM_NONE;
+ int fd;
+
+ switch (dso__binary_type(dso)) {
+ case DSO_BINARY_TYPE__KALLSYMS:
+ case DSO_BINARY_TYPE__GUEST_KALLSYMS:
+ case DSO_BINARY_TYPE__VMLINUX:
+ case DSO_BINARY_TYPE__GUEST_VMLINUX:
+ case DSO_BINARY_TYPE__GUEST_KMODULE:
+ case DSO_BINARY_TYPE__GUEST_KMODULE_COMP:
+ case DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE:
+ case DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP:
+ case DSO_BINARY_TYPE__KCORE:
+ case DSO_BINARY_TYPE__GUEST_KCORE:
+ case DSO_BINARY_TYPE__BPF_PROG_INFO:
+ case DSO_BINARY_TYPE__BPF_IMAGE:
+ case DSO_BINARY_TYPE__OOL:
+ case DSO_BINARY_TYPE__JAVA_JIT:
+ return EM_HOST;
+ case DSO_BINARY_TYPE__DEBUGLINK:
+ case DSO_BINARY_TYPE__BUILD_ID_CACHE:
+ case DSO_BINARY_TYPE__BUILD_ID_CACHE_DEBUGINFO:
+ case DSO_BINARY_TYPE__GNU_DEBUGDATA:
+ case DSO_BINARY_TYPE__SYSTEM_PATH_DSO:
+ case DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO:
+ case DSO_BINARY_TYPE__FEDORA_DEBUGINFO:
+ case DSO_BINARY_TYPE__UBUNTU_DEBUGINFO:
+ case DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO:
+ case DSO_BINARY_TYPE__BUILDID_DEBUGINFO:
+ break;
+ case DSO_BINARY_TYPE__NOT_FOUND:
+ default:
+ return EM_NONE;
+ }
+
+ mutex_lock(dso__data_open_lock());
+
+ /*
+ * dso__data(dso)->fd might be closed if other thread opened another
+ * file (dso) due to open file limit (RLIMIT_NOFILE).
+ */
+ try_to_open_dso(dso, machine);
+ fd = dso__data(dso)->fd;
+ if (fd >= 0) {
+ _Static_assert(offsetof(Elf32_Ehdr, e_machine) == 18, "Unexpected offset");
+ _Static_assert(offsetof(Elf64_Ehdr, e_machine) == 18, "Unexpected offset");
+ if (dso__needs_swap(dso) == DSO_SWAP__UNSET) {
+ unsigned char eidata;
+
+ if (pread(fd, &eidata, sizeof(eidata), EI_DATA) == sizeof(eidata))
+ dso__swap_init(dso, eidata);
+ }
+ if (dso__needs_swap(dso) != DSO_SWAP__UNSET &&
+ pread(fd, &e_machine, sizeof(e_machine), 18) == sizeof(e_machine))
+ e_machine = DSO__SWAP(dso, uint16_t, e_machine);
+ }
+ mutex_unlock(dso__data_open_lock());
+ return e_machine;
+}
+
/**
* dso__data_read_addr - Read data from dso address
* @dso: dso object
@@ -1525,6 +1611,33 @@ void dso__put(struct dso *dso)
RC_CHK_PUT(dso);
}
+int dso__swap_init(struct dso *dso, unsigned char eidata)
+{
+ static unsigned int const endian = 1;
+
+ dso__set_needs_swap(dso, DSO_SWAP__NO);
+
+ switch (eidata) {
+ case ELFDATA2LSB:
+ /* We are big endian, DSO is little endian. */
+ if (*(unsigned char const *)&endian != 1)
+ dso__set_needs_swap(dso, DSO_SWAP__YES);
+ break;
+
+ case ELFDATA2MSB:
+ /* We are little endian, DSO is big endian. */
+ if (*(unsigned char const *)&endian != 0)
+ dso__set_needs_swap(dso, DSO_SWAP__YES);
+ break;
+
+ default:
+ pr_err("unrecognized DSO data encoding %d\n", eidata);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
void dso__set_build_id(struct dso *dso, struct build_id *bid)
{
RC_CHK_ACCESS(dso)->bid = *bid;
@@ -1608,11 +1721,10 @@ size_t dso__fprintf(struct dso *dso, FILE *fp)
enum dso_type dso__type(struct dso *dso, struct machine *machine)
{
- int fd;
+ int fd = -1;
enum dso_type type = DSO__TYPE_UNKNOWN;
- fd = dso__data_get_fd(dso, machine);
- if (fd >= 0) {
+ if (dso__data_get_fd(dso, machine, &fd)) {
type = dso__type_fd(fd);
dso__data_put_fd(dso);
}
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
index bb8e8f444054..c87564471f9b 100644
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h
@@ -20,30 +20,88 @@ struct perf_env;
#define DSO__NAME_KALLSYMS "[kernel.kallsyms]"
#define DSO__NAME_KCORE "[kernel.kcore]"
+/**
+ * enum dso_binary_type - The kind of DSO generally associated with a memory
+ * region (struct map).
+ */
enum dso_binary_type {
+ /** @DSO_BINARY_TYPE__KALLSYMS: Symbols from /proc/kallsyms file. */
DSO_BINARY_TYPE__KALLSYMS = 0,
+ /** @DSO_BINARY_TYPE__GUEST_KALLSYMS: Guest /proc/kallsyms file. */
DSO_BINARY_TYPE__GUEST_KALLSYMS,
+ /** @DSO_BINARY_TYPE__VMLINUX: Path to kernel /boot/vmlinux file. */
DSO_BINARY_TYPE__VMLINUX,
+ /** @DSO_BINARY_TYPE__GUEST_VMLINUX: Path to guest kernel /boot/vmlinux file. */
DSO_BINARY_TYPE__GUEST_VMLINUX,
+ /** @DSO_BINARY_TYPE__JAVA_JIT: Symbols from /tmp/perf.map file. */
DSO_BINARY_TYPE__JAVA_JIT,
+ /**
+ * @DSO_BINARY_TYPE__DEBUGLINK: Debug file readable from the file path
+ * in the .gnu_debuglink ELF section of the dso.
+ */
DSO_BINARY_TYPE__DEBUGLINK,
+ /**
+ * @DSO_BINARY_TYPE__BUILD_ID_CACHE: File named after buildid located in
+ * the buildid cache with an elf filename.
+ */
DSO_BINARY_TYPE__BUILD_ID_CACHE,
+ /**
+ * @DSO_BINARY_TYPE__BUILD_ID_CACHE_DEBUGINFO: File named after buildid
+ * located in the buildid cache with a debug filename.
+ */
DSO_BINARY_TYPE__BUILD_ID_CACHE_DEBUGINFO,
+ /**
+ * @DSO_BINARY_TYPE__FEDORA_DEBUGINFO: Debug file in /usr/lib/debug
+ * with .debug suffix.
+ */
DSO_BINARY_TYPE__FEDORA_DEBUGINFO,
+ /** @DSO_BINARY_TYPE__UBUNTU_DEBUGINFO: Debug file in /usr/lib/debug. */
DSO_BINARY_TYPE__UBUNTU_DEBUGINFO,
+ /**
+ * @DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO: dso__long_name debuginfo
+ * file in /usr/lib/debug/lib rather than the expected
+ * /usr/lib/debug/usr/lib.
+ */
DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO,
+ /**
+ * @DSO_BINARY_TYPE__BUILDID_DEBUGINFO: File named after buildid located
+ * in /usr/lib/debug/.build-id/.
+ */
DSO_BINARY_TYPE__BUILDID_DEBUGINFO,
+ /**
+ * @DSO_BINARY_TYPE__GNU_DEBUGDATA: MiniDebuginfo where a compressed
+ * ELF file is placed in a .gnu_debugdata section.
+ */
+ DSO_BINARY_TYPE__GNU_DEBUGDATA,
+ /** @DSO_BINARY_TYPE__SYSTEM_PATH_DSO: A regular executable/shared-object file. */
DSO_BINARY_TYPE__SYSTEM_PATH_DSO,
+ /** @DSO_BINARY_TYPE__GUEST_KMODULE: Guest kernel module .ko file. */
DSO_BINARY_TYPE__GUEST_KMODULE,
+ /** @DSO_BINARY_TYPE__GUEST_KMODULE_COMP: Guest kernel module .ko.gz file. */
DSO_BINARY_TYPE__GUEST_KMODULE_COMP,
+ /** @DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE: Kernel module .ko file. */
DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE,
+ /** @DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP: Kernel module .ko.gz file. */
DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP,
+ /** @DSO_BINARY_TYPE__KCORE: /proc/kcore file. */
DSO_BINARY_TYPE__KCORE,
+ /** @DSO_BINARY_TYPE__GUEST_KCORE: Guest /proc/kcore file. */
DSO_BINARY_TYPE__GUEST_KCORE,
+ /**
+ * @DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO: Openembedded/Yocto -dbg
+ * package debug info.
+ */
DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO,
+ /** @DSO_BINARY_TYPE__BPF_PROG_INFO: jitted BPF code. */
DSO_BINARY_TYPE__BPF_PROG_INFO,
+ /** @DSO_BINARY_TYPE__BPF_IMAGE: jitted BPF trampoline or dispatcher code. */
DSO_BINARY_TYPE__BPF_IMAGE,
+ /**
+ * @DSO_BINARY_TYPE__OOL: out of line code such as kprobe-replaced
+ * instructions or optimized kprobes or ftrace trampolines.
+ */
DSO_BINARY_TYPE__OOL,
+ /** @DSO_BINARY_TYPE__NOT_FOUND: Unknown DSO kind. */
DSO_BINARY_TYPE__NOT_FOUND,
};
@@ -154,10 +212,12 @@ struct dso_data {
int status;
u32 status_seen;
u64 file_size;
+#ifdef HAVE_LIBUNWIND_SUPPORT
u64 elf_base_addr;
u64 debug_frame_offset;
u64 eh_frame_hdr_addr;
u64 eh_frame_hdr_offset;
+#endif
};
struct dso_bpf_prog {
@@ -231,6 +291,8 @@ DECLARE_RC_STRUCT(dso) {
char name[];
};
+extern struct mutex _dso__data_open_lock;
+
/* dso__for_each_symbol - iterate over the symbols of given type
*
* @dso: the 'struct dso *' in which symbols are iterated
@@ -652,7 +714,7 @@ void __dso__inject_id(struct dso *dso, const struct dso_id *id);
int dso__name_len(const struct dso *dso);
struct dso *dso__get(struct dso *dso);
-void dso__put(struct dso *dso);
+void dso__put(struct dso *dso) LOCKS_EXCLUDED(_dso__data_open_lock);
static inline void __dso__zput(struct dso **dso)
{
@@ -675,6 +737,8 @@ bool dso__sorted_by_name(const struct dso *dso);
void dso__set_sorted_by_name(struct dso *dso);
void dso__sort_by_name(struct dso *dso);
+int dso__swap_init(struct dso *dso, unsigned char eidata);
+
void dso__set_build_id(struct dso *dso, struct build_id *bid);
bool dso__build_id_equal(const struct dso *dso, struct build_id *bid);
void dso__read_running_kernel_build_id(struct dso *dso,
@@ -732,8 +796,8 @@ void dso__set_module_info(struct dso *dso, struct kmod_path *m,
* The current usage of the dso__data_* interface is as follows:
*
* Get DSO's fd:
- * int fd = dso__data_get_fd(dso, machine);
- * if (fd >= 0) {
+ * int fd;
+ * if (dso__data_get_fd(dso, machine, &fd)) {
* USE 'fd' SOMEHOW
* dso__data_put_fd(dso);
* }
@@ -755,14 +819,16 @@ void dso__set_module_info(struct dso *dso, struct kmod_path *m,
*
* TODO
*/
-int dso__data_get_fd(struct dso *dso, struct machine *machine);
-void dso__data_put_fd(struct dso *dso);
-void dso__data_close(struct dso *dso);
+bool dso__data_get_fd(struct dso *dso, struct machine *machine, int *fd)
+ EXCLUSIVE_TRYLOCK_FUNCTION(true, _dso__data_open_lock);
+void dso__data_put_fd(struct dso *dso) UNLOCK_FUNCTION(_dso__data_open_lock);
+void dso__data_close(struct dso *dso) LOCKS_EXCLUDED(_dso__data_open_lock);
int dso__data_file_size(struct dso *dso, struct machine *machine);
off_t dso__data_size(struct dso *dso, struct machine *machine);
ssize_t dso__data_read_offset(struct dso *dso, struct machine *machine,
u64 offset, u8 *data, ssize_t size);
+uint16_t dso__e_machine(struct dso *dso, struct machine *machine);
ssize_t dso__data_read_addr(struct dso *dso, struct map *map,
struct machine *machine, u64 addr,
u8 *data, ssize_t size);
@@ -808,7 +874,9 @@ static inline bool dso__is_kcore(const struct dso *dso)
static inline bool dso__is_kallsyms(const struct dso *dso)
{
- return RC_CHK_ACCESS(dso)->kernel && RC_CHK_ACCESS(dso)->long_name[0] != '/';
+ enum dso_binary_type bt = dso__binary_type(dso);
+
+ return bt == DSO_BINARY_TYPE__KALLSYMS || bt == DSO_BINARY_TYPE__GUEST_KALLSYMS;
}
bool dso__is_object_file(const struct dso *dso);
diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c
index a6321e7f0633..36411749e007 100644
--- a/tools/perf/util/env.c
+++ b/tools/perf/util/env.c
@@ -331,10 +331,13 @@ int perf_env__read_cpu_topology_map(struct perf_env *env)
for (idx = 0; idx < nr_cpus; ++idx) {
struct perf_cpu cpu = { .cpu = idx };
+ int core_id = cpu__get_core_id(cpu);
+ int socket_id = cpu__get_socket_id(cpu);
+ int die_id = cpu__get_die_id(cpu);
- env->cpu[idx].core_id = cpu__get_core_id(cpu);
- env->cpu[idx].socket_id = cpu__get_socket_id(cpu);
- env->cpu[idx].die_id = cpu__get_die_id(cpu);
+ env->cpu[idx].core_id = core_id >= 0 ? core_id : -1;
+ env->cpu[idx].socket_id = socket_id >= 0 ? socket_id : -1;
+ env->cpu[idx].die_id = die_id >= 0 ? die_id : -1;
}
env->nr_cpus_avail = nr_cpus;
@@ -477,15 +480,19 @@ const char *perf_env__arch(struct perf_env *env)
return normalize_arch(arch_name);
}
+#if defined(HAVE_LIBTRACEEVENT)
+#include "trace/beauty/arch_errno_names.c"
+#endif
+
const char *perf_env__arch_strerrno(struct perf_env *env __maybe_unused, int err __maybe_unused)
{
-#if defined(HAVE_SYSCALL_TABLE_SUPPORT) && defined(HAVE_LIBTRACEEVENT)
+#if defined(HAVE_LIBTRACEEVENT)
if (env->arch_strerrno == NULL)
env->arch_strerrno = arch_syscalls__strerrno_function(perf_env__arch(env));
return env->arch_strerrno ? env->arch_strerrno(err) : "no arch specific strerrno function";
#else
- return "!(HAVE_SYSCALL_TABLE_SUPPORT && HAVE_LIBTRACEEVENT)";
+ return "!HAVE_LIBTRACEEVENT";
#endif
}
@@ -536,7 +543,7 @@ int perf_env__numa_node(struct perf_env *env, struct perf_cpu cpu)
for (i = 0; i < env->nr_numa_nodes; i++) {
nn = &env->numa_nodes[i];
- nr = max(nr, perf_cpu_map__max(nn->map).cpu);
+ nr = max(nr, (int)perf_cpu_map__max(nn->map).cpu);
}
nr++;
diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
index da11add761d0..d90e343cf1fa 100644
--- a/tools/perf/util/env.h
+++ b/tools/perf/util/env.h
@@ -56,8 +56,6 @@ struct pmu_caps {
typedef const char *(arch_syscalls__strerrno_t)(int err);
-arch_syscalls__strerrno_t *arch_syscalls__strerrno_function(const char *arch);
-
struct perf_env {
char *hostname;
char *os_release;
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index aac96d5d1917..c23b77f8f854 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -767,6 +767,17 @@ int machine__resolve(struct machine *machine, struct addr_location *al,
al->socket = env->cpu[al->cpu].socket_id;
}
+ /* Account for possible out-of-order switch events. */
+ al->parallelism = max(1, min(machine->parallelism, machine__nr_cpus_avail(machine)));
+ if (test_bit(al->parallelism, symbol_conf.parallelism_filter))
+ al->filtered |= (1 << HIST_FILTER__PARALLELISM);
+ /*
+ * Multiply it by some const to avoid precision loss or dealing
+ * with floats. The multiplier does not matter otherwise since
+ * we only print it as percents.
+ */
+ al->latency = sample->period * 1000 / al->parallelism;
+
if (al->map) {
if (symbol_conf.dso_list &&
(!dso || !(strlist__has_entry(symbol_conf.dso_list,
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 2744c54f404e..664bf39567ce 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -67,9 +67,15 @@ enum {
PERF_IP_FLAG_INTR_DISABLE = 1ULL << 13,
PERF_IP_FLAG_INTR_TOGGLE = 1ULL << 14,
PERF_IP_FLAG_BRANCH_MISS = 1ULL << 15,
+ PERF_IP_FLAG_NOT_TAKEN = 1ULL << 16,
};
-#define PERF_IP_FLAG_CHARS "bcrosyiABExghDt"
+#define PERF_IP_FLAG_CHARS "bcrosyiABExghDtmn"
+
+#define PERF_ADDITIONAL_STATE_MASK \
+ (PERF_IP_FLAG_IN_TX | \
+ PERF_IP_FLAG_INTR_DISABLE | \
+ PERF_IP_FLAG_INTR_TOGGLE)
#define PERF_BRANCH_MASK (\
PERF_IP_FLAG_BRANCH |\
@@ -85,6 +91,10 @@ enum {
PERF_IP_FLAG_VMENTRY |\
PERF_IP_FLAG_VMEXIT)
+#define PERF_IP_FLAG_BRANCH_EVENT_MASK \
+ (PERF_IP_FLAG_BRANCH_MISS | \
+ PERF_IP_FLAG_NOT_TAKEN)
+
#define PERF_MEM_DATA_SRC_NONE \
(PERF_MEM_S(OP, NA) |\
PERF_MEM_S(LVL, NA) |\
diff --git a/tools/perf/util/events_stats.h b/tools/perf/util/events_stats.h
index eabd7913c309..dcff697ed252 100644
--- a/tools/perf/util/events_stats.h
+++ b/tools/perf/util/events_stats.h
@@ -57,6 +57,8 @@ struct events_stats {
struct hists_stats {
u64 total_period;
u64 total_non_filtered_period;
+ u64 total_latency;
+ u64 total_non_filtered_latency;
u32 nr_samples;
u32 nr_non_filtered_samples;
u32 nr_lost_samples;
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index f0dd174e2deb..c1a04141aed0 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -1373,19 +1373,18 @@ static int evlist__create_syswide_maps(struct evlist *evlist)
*/
cpus = perf_cpu_map__new_online_cpus();
if (!cpus)
- goto out;
+ return -ENOMEM;
threads = perf_thread_map__new_dummy();
- if (!threads)
- goto out_put;
+ if (!threads) {
+ perf_cpu_map__put(cpus);
+ return -ENOMEM;
+ }
perf_evlist__set_maps(&evlist->core, cpus, threads);
-
perf_thread_map__put(threads);
-out_put:
perf_cpu_map__put(cpus);
-out:
- return -ENOMEM;
+ return 0;
}
int evlist__open(struct evlist *evlist)
@@ -2535,10 +2534,10 @@ void evlist__warn_user_requested_cpus(struct evlist *evlist, const char *cpu_lis
return;
evlist__for_each_entry(evlist, pos) {
- struct perf_cpu_map *intersect, *to_test;
+ struct perf_cpu_map *intersect, *to_test, *online = cpu_map__online();
const struct perf_pmu *pmu = evsel__find_pmu(pos);
- to_test = pmu && pmu->is_core ? pmu->cpus : cpu_map__online();
+ to_test = pmu && pmu->is_core ? pmu->cpus : online;
intersect = perf_cpu_map__intersect(to_test, user_requested_cpus);
if (!perf_cpu_map__equal(intersect, user_requested_cpus)) {
char buf[128];
@@ -2548,6 +2547,7 @@ void evlist__warn_user_requested_cpus(struct evlist *evlist, const char *cpu_lis
cpu_list, pmu ? pmu->name : "cpu", buf, evsel__name(pos));
}
perf_cpu_map__put(intersect);
+ perf_cpu_map__put(online);
}
perf_cpu_map__put(user_requested_cpus);
}
@@ -2594,3 +2594,17 @@ bool evlist__has_bpf_output(struct evlist *evlist)
return false;
}
+
+bool evlist__needs_bpf_sb_event(struct evlist *evlist)
+{
+ struct evsel *evsel;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel__is_dummy_event(evsel))
+ continue;
+ if (!evsel->core.attr.exclude_kernel)
+ return true;
+ }
+
+ return false;
+}
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index adddb1db1ad2..edcbf1c10e92 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -435,5 +435,6 @@ void evlist__check_mem_load_aux(struct evlist *evlist);
void evlist__warn_user_requested_cpus(struct evlist *evlist, const char *cpu_list);
void evlist__uniquify_name(struct evlist *evlist);
bool evlist__has_bpf_output(struct evlist *evlist);
+bool evlist__needs_bpf_sb_event(struct evlist *evlist);
#endif /* __PERF_EVLIST_H */
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index d22c5df1701e..3c030da2e477 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -237,6 +237,16 @@ set_methods:
return 0;
}
+const char *evsel__pmu_name(const struct evsel *evsel)
+{
+ struct perf_pmu *pmu = evsel__find_pmu(evsel);
+
+ if (pmu)
+ return pmu->name;
+
+ return event_type(evsel->core.attr.type);
+}
+
#define FD(e, x, y) (*(int *)xyarray__entry(e->core.fd, x, y))
int __evsel__sample_size(u64 sample_type)
@@ -395,6 +405,7 @@ void evsel__init(struct evsel *evsel,
evsel->group_pmu_name = NULL;
evsel->skippable = false;
evsel->alternate_hw_config = PERF_COUNT_HW_MAX;
+ evsel->script_output_type = -1; // FIXME: OUTPUT_TYPE_UNSET, see builtin-script.c
}
struct evsel *evsel__new_idx(struct perf_event_attr *attr, int idx)
@@ -454,7 +465,7 @@ static int evsel__copy_config_terms(struct evsel *dst, struct evsel *src)
* The assumption is that @orig is not configured nor opened yet.
* So we only care about the attributes that can be set while it's parsed.
*/
-struct evsel *evsel__clone(struct evsel *orig)
+struct evsel *evsel__clone(struct evsel *dest, struct evsel *orig)
{
struct evsel *evsel;
@@ -467,7 +478,11 @@ struct evsel *evsel__clone(struct evsel *orig)
if (orig->bpf_obj)
return NULL;
- evsel = evsel__new(&orig->core.attr);
+ if (dest)
+ evsel = dest;
+ else
+ evsel = evsel__new(&orig->core.attr);
+
if (evsel == NULL)
return NULL;
@@ -506,17 +521,28 @@ struct evsel *evsel__clone(struct evsel *orig)
}
evsel->cgrp = cgroup__get(orig->cgrp);
#ifdef HAVE_LIBTRACEEVENT
+ if (orig->tp_sys) {
+ evsel->tp_sys = strdup(orig->tp_sys);
+ if (evsel->tp_sys == NULL)
+ goto out_err;
+ }
+ if (orig->tp_name) {
+ evsel->tp_name = strdup(orig->tp_name);
+ if (evsel->tp_name == NULL)
+ goto out_err;
+ }
evsel->tp_format = orig->tp_format;
#endif
evsel->handler = orig->handler;
evsel->core.leader = orig->core.leader;
evsel->max_events = orig->max_events;
- free((char *)evsel->unit);
- evsel->unit = strdup(orig->unit);
- if (evsel->unit == NULL)
- goto out_err;
-
+ zfree(&evsel->unit);
+ if (orig->unit) {
+ evsel->unit = strdup(orig->unit);
+ if (evsel->unit == NULL)
+ goto out_err;
+ }
evsel->scale = orig->scale;
evsel->snapshot = orig->snapshot;
evsel->per_pkg = orig->per_pkg;
@@ -544,53 +570,105 @@ out_err:
return NULL;
}
+static int trace_event__id(const char *sys, const char *name)
+{
+ char *tp_dir = get_events_file(sys);
+ char path[PATH_MAX];
+ int id, err;
+
+ if (!tp_dir)
+ return -1;
+
+ scnprintf(path, PATH_MAX, "%s/%s/id", tp_dir, name);
+ put_events_file(tp_dir);
+ err = filename__read_int(path, &id);
+ if (err)
+ return err;
+
+ return id;
+}
+
/*
* Returns pointer with encoded error via <linux/err.h> interface.
*/
-#ifdef HAVE_LIBTRACEEVENT
struct evsel *evsel__newtp_idx(const char *sys, const char *name, int idx, bool format)
{
+ struct perf_event_attr attr = {
+ .type = PERF_TYPE_TRACEPOINT,
+ .sample_type = (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME |
+ PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD),
+ };
struct evsel *evsel = zalloc(perf_evsel__object.size);
- int err = -ENOMEM;
+ int err = -ENOMEM, id = -1;
- if (evsel == NULL) {
+ if (evsel == NULL)
goto out_err;
- } else {
- struct perf_event_attr attr = {
- .type = PERF_TYPE_TRACEPOINT,
- .sample_type = (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME |
- PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD),
- };
- if (asprintf(&evsel->name, "%s:%s", sys, name) < 0)
- goto out_free;
- event_attr_init(&attr);
+ if (asprintf(&evsel->name, "%s:%s", sys, name) < 0)
+ goto out_free;
- if (format) {
- evsel->tp_format = trace_event__tp_format(sys, name);
- if (IS_ERR(evsel->tp_format)) {
- err = PTR_ERR(evsel->tp_format);
- goto out_free;
- }
- attr.config = evsel->tp_format->id;
- } else {
- attr.config = (__u64) -1;
- }
+#ifdef HAVE_LIBTRACEEVENT
+ evsel->tp_sys = strdup(sys);
+ if (!evsel->tp_sys)
+ goto out_free;
+ evsel->tp_name = strdup(name);
+ if (!evsel->tp_name)
+ goto out_free;
+#endif
- attr.sample_period = 1;
- evsel__init(evsel, &attr, idx);
- }
+ event_attr_init(&attr);
+ if (format) {
+ id = trace_event__id(sys, name);
+ if (id < 0) {
+ err = id;
+ goto out_free;
+ }
+ }
+ attr.config = (__u64)id;
+ attr.sample_period = 1;
+ evsel__init(evsel, &attr, idx);
return evsel;
out_free:
zfree(&evsel->name);
+#ifdef HAVE_LIBTRACEEVENT
+ zfree(&evsel->tp_sys);
+ zfree(&evsel->tp_name);
+#endif
free(evsel);
out_err:
return ERR_PTR(err);
}
+
+#ifdef HAVE_LIBTRACEEVENT
+struct tep_event *evsel__tp_format(struct evsel *evsel)
+{
+ struct tep_event *tp_format = evsel->tp_format;
+
+ if (tp_format)
+ return tp_format;
+
+ if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT)
+ return NULL;
+
+ if (!evsel->tp_sys)
+ tp_format = trace_event__tp_format_id(evsel->core.attr.config);
+ else
+ tp_format = trace_event__tp_format(evsel->tp_sys, evsel->tp_name);
+
+ if (IS_ERR(tp_format)) {
+ int err = -PTR_ERR(evsel->tp_format);
+
+ pr_err("Error getting tracepoint format '%s' '%s'(%d)\n",
+ evsel__name(evsel), strerror(err), err);
+ return NULL;
+ }
+ evsel->tp_format = tp_format;
+ return evsel->tp_format;
+}
#endif
const char *const evsel__hw_names[PERF_COUNT_HW_MAX] = {
@@ -1103,6 +1181,9 @@ static void evsel__apply_config_terms(struct evsel *evsel,
case EVSEL__CONFIG_TERM_AUX_OUTPUT:
attr->aux_output = term->val.aux_output ? 1 : 0;
break;
+ case EVSEL__CONFIG_TERM_AUX_ACTION:
+ /* Already applied by auxtrace */
+ break;
case EVSEL__CONFIG_TERM_AUX_SAMPLE_SIZE:
/* Already applied by auxtrace */
break;
@@ -1587,6 +1668,10 @@ void evsel__exit(struct evsel *evsel)
perf_thread_map__put(evsel->core.threads);
zfree(&evsel->group_name);
zfree(&evsel->name);
+#ifdef HAVE_LIBTRACEEVENT
+ zfree(&evsel->tp_sys);
+ zfree(&evsel->tp_name);
+#endif
zfree(&evsel->filter);
zfree(&evsel->group_pmu_name);
zfree(&evsel->unit);
@@ -2090,16 +2175,17 @@ int evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus,
return err;
}
-static bool has_attr_feature(struct perf_event_attr *attr, unsigned long flags)
+static bool __has_attr_feature(struct perf_event_attr *attr,
+ struct perf_cpu cpu, unsigned long flags)
{
- int fd = syscall(SYS_perf_event_open, attr, /*pid=*/0, /*cpu=*/-1,
+ int fd = syscall(SYS_perf_event_open, attr, /*pid=*/0, cpu.cpu,
/*group_fd=*/-1, flags);
close(fd);
if (fd < 0) {
attr->exclude_kernel = 1;
- fd = syscall(SYS_perf_event_open, attr, /*pid=*/0, /*cpu=*/-1,
+ fd = syscall(SYS_perf_event_open, attr, /*pid=*/0, cpu.cpu,
/*group_fd=*/-1, flags);
close(fd);
}
@@ -2107,7 +2193,7 @@ static bool has_attr_feature(struct perf_event_attr *attr, unsigned long flags)
if (fd < 0) {
attr->exclude_hv = 1;
- fd = syscall(SYS_perf_event_open, attr, /*pid=*/0, /*cpu=*/-1,
+ fd = syscall(SYS_perf_event_open, attr, /*pid=*/0, cpu.cpu,
/*group_fd=*/-1, flags);
close(fd);
}
@@ -2115,7 +2201,7 @@ static bool has_attr_feature(struct perf_event_attr *attr, unsigned long flags)
if (fd < 0) {
attr->exclude_guest = 1;
- fd = syscall(SYS_perf_event_open, attr, /*pid=*/0, /*cpu=*/-1,
+ fd = syscall(SYS_perf_event_open, attr, /*pid=*/0, cpu.cpu,
/*group_fd=*/-1, flags);
close(fd);
}
@@ -2127,6 +2213,13 @@ static bool has_attr_feature(struct perf_event_attr *attr, unsigned long flags)
return fd >= 0;
}
+static bool has_attr_feature(struct perf_event_attr *attr, unsigned long flags)
+{
+ struct perf_cpu cpu = {.cpu = -1};
+
+ return __has_attr_feature(attr, cpu, flags);
+}
+
static void evsel__detect_missing_pmu_features(struct evsel *evsel)
{
struct perf_event_attr attr = {
@@ -2215,7 +2308,65 @@ found:
errno = old_errno;
}
-static bool evsel__detect_missing_features(struct evsel *evsel)
+static bool evsel__probe_aux_action(struct evsel *evsel, struct perf_cpu cpu)
+{
+ struct perf_event_attr attr = evsel->core.attr;
+ int old_errno = errno;
+
+ attr.disabled = 1;
+ attr.aux_start_paused = 1;
+
+ if (__has_attr_feature(&attr, cpu, /*flags=*/0)) {
+ errno = old_errno;
+ return true;
+ }
+
+ /*
+ * EOPNOTSUPP means the kernel supports the feature but the PMU does
+ * not, so keep that distinction if possible.
+ */
+ if (errno != EOPNOTSUPP)
+ errno = old_errno;
+
+ return false;
+}
+
+static void evsel__detect_missing_aux_action_feature(struct evsel *evsel, struct perf_cpu cpu)
+{
+ static bool detection_done;
+ struct evsel *leader;
+
+ /*
+ * Don't bother probing aux_action if it is not being used or has been
+ * probed before.
+ */
+ if (!evsel->core.attr.aux_action || detection_done)
+ return;
+
+ detection_done = true;
+
+ /*
+ * The leader is an AUX area event. If it has failed, assume the feature
+ * is not supported.
+ */
+ leader = evsel__leader(evsel);
+ if (evsel == leader) {
+ perf_missing_features.aux_action = true;
+ return;
+ }
+
+ /*
+ * AUX area event with aux_action must have been opened successfully
+ * already, so feature is supported.
+ */
+ if (leader->core.attr.aux_action)
+ return;
+
+ if (!evsel__probe_aux_action(leader, cpu))
+ perf_missing_features.aux_action = true;
+}
+
+static bool evsel__detect_missing_features(struct evsel *evsel, struct perf_cpu cpu)
{
static bool detection_done = false;
struct perf_event_attr attr = {
@@ -2225,6 +2376,8 @@ static bool evsel__detect_missing_features(struct evsel *evsel)
};
int old_errno;
+ evsel__detect_missing_aux_action_feature(evsel, cpu);
+
evsel__detect_missing_pmu_features(evsel);
if (evsel__has_br_stack(evsel))
@@ -2413,25 +2566,6 @@ check:
return false;
}
-static bool evsel__handle_error_quirks(struct evsel *evsel, int error)
-{
- /*
- * AMD core PMU tries to forward events with precise_ip to IBS PMU
- * implicitly. But IBS PMU has more restrictions so it can fail with
- * supported event attributes. Let's forward it back to the core PMU
- * by clearing precise_ip only if it's from precise_max (:P).
- */
- if ((error == -EINVAL || error == -ENOENT) && x86__is_amd_cpu() &&
- evsel->core.attr.precise_ip && evsel->precise_max) {
- evsel->core.attr.precise_ip = 0;
- pr_debug2_peo("removing precise_ip on AMD\n");
- display_attr(&evsel->core.attr);
- return true;
- }
-
- return false;
-}
-
static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus,
struct perf_thread_map *threads,
int start_cpu_map_idx, int end_cpu_map_idx)
@@ -2439,6 +2573,7 @@ static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus,
int idx, thread, nthreads;
int pid = -1, err, old_errno;
enum rlimit_action set_rlimit = NO_CHANGE;
+ struct perf_cpu cpu;
if (evsel__is_retire_lat(evsel))
return tpebs_start(evsel->evlist);
@@ -2476,6 +2611,7 @@ fallback_missing_features:
}
for (idx = start_cpu_map_idx; idx < end_cpu_map_idx; idx++) {
+ cpu = perf_cpu_map__cpu(cpus, idx);
for (thread = 0; thread < nthreads; thread++) {
int fd, group_fd;
@@ -2496,10 +2632,9 @@ retry_open:
/* Debug message used by test scripts */
pr_debug2_peo("sys_perf_event_open: pid %d cpu %d group_fd %d flags %#lx",
- pid, perf_cpu_map__cpu(cpus, idx).cpu, group_fd, evsel->open_flags);
+ pid, cpu.cpu, group_fd, evsel->open_flags);
- fd = sys_perf_event_open(&evsel->core.attr, pid,
- perf_cpu_map__cpu(cpus, idx).cpu,
+ fd = sys_perf_event_open(&evsel->core.attr, pid, cpu.cpu,
group_fd, evsel->open_flags);
FD(evsel, idx, thread) = fd;
@@ -2515,8 +2650,7 @@ retry_open:
bpf_counter__install_pe(evsel, idx, fd);
if (unlikely(test_attr__enabled())) {
- test_attr__open(&evsel->core.attr, pid,
- perf_cpu_map__cpu(cpus, idx),
+ test_attr__open(&evsel->core.attr, pid, cpu,
fd, group_fd, evsel->open_flags);
}
@@ -2571,15 +2705,12 @@ try_fallback:
if (err == -EMFILE && rlimit__increase_nofile(&set_rlimit))
goto retry_open;
- if (err == -EINVAL && evsel__detect_missing_features(evsel))
+ if (err == -EINVAL && evsel__detect_missing_features(evsel, cpu))
goto fallback_missing_features;
if (evsel__precise_ip_fallback(evsel))
goto retry_open;
- if (evsel__handle_error_quirks(evsel, err))
- goto retry_open;
-
out_close:
if (err)
threads->err_thread = thread;
@@ -3035,17 +3166,19 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
}
if (type & PERF_SAMPLE_REGS_USER) {
+ struct regs_dump *regs = perf_sample__user_regs(data);
+
OVERFLOW_CHECK_u64(array);
- data->user_regs.abi = *array;
+ regs->abi = *array;
array++;
- if (data->user_regs.abi) {
+ if (regs->abi) {
u64 mask = evsel->core.attr.sample_regs_user;
sz = hweight64(mask) * sizeof(u64);
OVERFLOW_CHECK(array, sz, max_size);
- data->user_regs.mask = mask;
- data->user_regs.regs = (u64 *)array;
+ regs->mask = mask;
+ regs->regs = (u64 *)array;
array = (void *)array + sz;
}
}
@@ -3089,19 +3222,20 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
array++;
}
- data->intr_regs.abi = PERF_SAMPLE_REGS_ABI_NONE;
if (type & PERF_SAMPLE_REGS_INTR) {
+ struct regs_dump *regs = perf_sample__intr_regs(data);
+
OVERFLOW_CHECK_u64(array);
- data->intr_regs.abi = *array;
+ regs->abi = *array;
array++;
- if (data->intr_regs.abi != PERF_SAMPLE_REGS_ABI_NONE) {
+ if (regs->abi != PERF_SAMPLE_REGS_ABI_NONE) {
u64 mask = evsel->core.attr.sample_regs_intr;
sz = hweight64(mask) * sizeof(u64);
OVERFLOW_CHECK(array, sz, max_size);
- data->intr_regs.mask = mask;
- data->intr_regs.regs = (u64 *)array;
+ regs->mask = mask;
+ regs->regs = (u64 *)array;
array = (void *)array + sz;
}
}
@@ -3218,12 +3352,16 @@ u16 evsel__id_hdr_size(const struct evsel *evsel)
#ifdef HAVE_LIBTRACEEVENT
struct tep_format_field *evsel__field(struct evsel *evsel, const char *name)
{
- return tep_find_field(evsel->tp_format, name);
+ struct tep_event *tp_format = evsel__tp_format(evsel);
+
+ return tp_format ? tep_find_field(tp_format, name) : NULL;
}
struct tep_format_field *evsel__common_field(struct evsel *evsel, const char *name)
{
- return tep_find_common_field(evsel->tp_format, name);
+ struct tep_event *tp_format = evsel__tp_format(evsel);
+
+ return tp_format ? tep_find_common_field(tp_format, name) : NULL;
}
void *evsel__rawptr(struct evsel *evsel, struct perf_sample *sample, const char *name)
@@ -3448,6 +3586,78 @@ static bool find_process(const char *name)
return ret ? false : true;
}
+static int dump_perf_event_processes(char *msg, size_t size)
+{
+ DIR *proc_dir;
+ struct dirent *proc_entry;
+ int printed = 0;
+
+ proc_dir = opendir(procfs__mountpoint());
+ if (!proc_dir)
+ return 0;
+
+ /* Walk through the /proc directory. */
+ while ((proc_entry = readdir(proc_dir)) != NULL) {
+ char buf[256];
+ DIR *fd_dir;
+ struct dirent *fd_entry;
+ int fd_dir_fd;
+
+ if (proc_entry->d_type != DT_DIR ||
+ !isdigit(proc_entry->d_name[0]) ||
+ strlen(proc_entry->d_name) > sizeof(buf) - 4)
+ continue;
+
+ scnprintf(buf, sizeof(buf), "%s/fd", proc_entry->d_name);
+ fd_dir_fd = openat(dirfd(proc_dir), buf, O_DIRECTORY);
+ if (fd_dir_fd == -1)
+ continue;
+ fd_dir = fdopendir(fd_dir_fd);
+ if (!fd_dir) {
+ close(fd_dir_fd);
+ continue;
+ }
+ while ((fd_entry = readdir(fd_dir)) != NULL) {
+ ssize_t link_size;
+
+ if (fd_entry->d_type != DT_LNK)
+ continue;
+ link_size = readlinkat(fd_dir_fd, fd_entry->d_name, buf, sizeof(buf));
+ if (link_size < 0)
+ continue;
+ /* Take care as readlink doesn't null terminate the string. */
+ if (!strncmp(buf, "anon_inode:[perf_event]", link_size)) {
+ int cmdline_fd;
+ ssize_t cmdline_size;
+
+ scnprintf(buf, sizeof(buf), "%s/cmdline", proc_entry->d_name);
+ cmdline_fd = openat(dirfd(proc_dir), buf, O_RDONLY);
+ if (cmdline_fd == -1)
+ continue;
+ cmdline_size = read(cmdline_fd, buf, sizeof(buf) - 1);
+ close(cmdline_fd);
+ if (cmdline_size < 0)
+ continue;
+ buf[cmdline_size] = '\0';
+ for (ssize_t i = 0; i < cmdline_size; i++) {
+ if (buf[i] == '\0')
+ buf[i] = ' ';
+ }
+
+ if (printed == 0)
+ printed += scnprintf(msg, size, "Possible processes:\n");
+
+ printed += scnprintf(msg + printed, size - printed,
+ "%s %s\n", proc_entry->d_name, buf);
+ break;
+ }
+ }
+ closedir(fd_dir);
+ }
+ closedir(proc_dir);
+ return printed;
+}
+
int __weak arch_evsel__open_strerror(struct evsel *evsel __maybe_unused,
char *msg __maybe_unused,
size_t size __maybe_unused)
@@ -3481,7 +3691,7 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target,
printed += scnprintf(msg, size,
"No permission to enable %s event.\n\n", evsel__name(evsel));
- return scnprintf(msg + printed, size - printed,
+ return printed + scnprintf(msg + printed, size - printed,
"Consider adjusting /proc/sys/kernel/perf_event_paranoid setting to open\n"
"access to performance monitoring and observability operations for processes\n"
"without CAP_PERFMON, CAP_SYS_PTRACE or CAP_SYS_ADMIN Linux capability.\n"
@@ -3526,6 +3736,10 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target,
return scnprintf(msg, size,
"%s: PMU Hardware doesn't support 'aux_output' feature",
evsel__name(evsel));
+ if (evsel->core.attr.aux_action)
+ return scnprintf(msg, size,
+ "%s: PMU Hardware doesn't support 'aux_action' feature",
+ evsel__name(evsel));
if (evsel->core.attr.sample_period != 0)
return scnprintf(msg, size,
"%s: PMU Hardware doesn't support sampling/overflow-interrupts. Try 'perf stat'",
@@ -3544,6 +3758,11 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target,
return scnprintf(msg, size,
"The PMU counters are busy/taken by another profiler.\n"
"We found oprofile daemon running, please stop it and try again.");
+ printed += scnprintf(
+ msg, size,
+ "The PMU %s counters are busy and in use by another process.\n",
+ evsel->pmu ? evsel->pmu->name : "");
+ return printed + dump_perf_event_processes(msg + printed, size - printed);
break;
case EINVAL:
if (evsel->core.attr.sample_type & PERF_SAMPLE_CODE_PAGE_SIZE && perf_missing_features.code_page_size)
@@ -3556,6 +3775,8 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target,
return scnprintf(msg, size, "clockid feature not supported.");
if (perf_missing_features.clockid_wrong)
return scnprintf(msg, size, "wrong clockid (%d).", clockid);
+ if (perf_missing_features.aux_action)
+ return scnprintf(msg, size, "The 'aux_action' feature is not supported, update the kernel.");
if (perf_missing_features.aux_output)
return scnprintf(msg, size, "The 'aux_output' feature is not supported, update the kernel.");
if (!target__has_cpu(target))
@@ -3640,10 +3861,10 @@ void evsel__zero_per_pkg(struct evsel *evsel)
*/
bool evsel__is_hybrid(const struct evsel *evsel)
{
- if (perf_pmus__num_core_pmus() == 1)
+ if (!evsel->core.is_pmu_core)
return false;
- return evsel->core.is_pmu_core;
+ return perf_pmus__num_core_pmus() > 1;
}
struct evsel *evsel__leader(const struct evsel *evsel)
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 04934a7af174..aae431d63d64 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -59,6 +59,8 @@ struct evsel {
char *group_name;
const char *group_pmu_name;
#ifdef HAVE_LIBTRACEEVENT
+ char *tp_sys;
+ char *tp_name;
struct tep_event *tp_format;
#endif
char *filter;
@@ -117,8 +119,10 @@ struct evsel {
bool errored;
bool needs_auxtrace_mmap;
bool default_metricgroup; /* A member of the Default metricgroup */
+ bool needs_uniquify;
struct hashmap *per_pkg_mask;
int err;
+ int script_output_type;
struct {
evsel__sb_cb_t *cb;
void *data;
@@ -205,6 +209,7 @@ struct perf_missing_features {
bool weight_struct;
bool read_lost;
bool branch_counters;
+ bool aux_action;
bool inherit_sample_read;
};
@@ -232,6 +237,7 @@ int evsel__object_config(size_t object_size,
void (*fini)(struct evsel *evsel));
struct perf_pmu *evsel__find_pmu(const struct evsel *evsel);
+const char *evsel__pmu_name(const struct evsel *evsel);
bool evsel__is_aux_event(const struct evsel *evsel);
struct evsel *evsel__new_idx(struct perf_event_attr *attr, int idx);
@@ -241,26 +247,23 @@ static inline struct evsel *evsel__new(struct perf_event_attr *attr)
return evsel__new_idx(attr, 0);
}
-struct evsel *evsel__clone(struct evsel *orig);
+struct evsel *evsel__clone(struct evsel *dest, struct evsel *orig);
int copy_config_terms(struct list_head *dst, struct list_head *src);
void free_config_terms(struct list_head *config_terms);
-#ifdef HAVE_LIBTRACEEVENT
-struct evsel *evsel__newtp_idx(const char *sys, const char *name, int idx, bool format);
-
/*
* Returns pointer with encoded error via <linux/err.h> interface.
*/
+struct evsel *evsel__newtp_idx(const char *sys, const char *name, int idx, bool format);
static inline struct evsel *evsel__newtp(const char *sys, const char *name)
{
return evsel__newtp_idx(sys, name, 0, true);
}
-#endif
#ifdef HAVE_LIBTRACEEVENT
-struct tep_event *event_format__new(const char *sys, const char *name);
+struct tep_event *evsel__tp_format(struct evsel *evsel);
#endif
void evsel__init(struct evsel *evsel, struct perf_event_attr *attr, int idx);
diff --git a/tools/perf/util/evsel_config.h b/tools/perf/util/evsel_config.h
index aee6f808b512..af52a1516d0b 100644
--- a/tools/perf/util/evsel_config.h
+++ b/tools/perf/util/evsel_config.h
@@ -25,6 +25,7 @@ enum evsel_term_type {
EVSEL__CONFIG_TERM_BRANCH,
EVSEL__CONFIG_TERM_PERCORE,
EVSEL__CONFIG_TERM_AUX_OUTPUT,
+ EVSEL__CONFIG_TERM_AUX_ACTION,
EVSEL__CONFIG_TERM_AUX_SAMPLE_SIZE,
EVSEL__CONFIG_TERM_CFG_CHG,
};
diff --git a/tools/perf/util/evsel_fprintf.c b/tools/perf/util/evsel_fprintf.c
index 86b7f46f9e2a..103984b29b1e 100644
--- a/tools/perf/util/evsel_fprintf.c
+++ b/tools/perf/util/evsel_fprintf.c
@@ -81,13 +81,15 @@ int evsel__fprintf(struct evsel *evsel, struct perf_attr_details *details, FILE
#ifdef HAVE_LIBTRACEEVENT
if (details->trace_fields) {
struct tep_format_field *field;
+ const struct tep_event *tp_format;
if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT) {
printed += comma_fprintf(fp, &first, " (not a tracepoint)");
goto out;
}
- field = evsel->tp_format->format.fields;
+ tp_format = evsel__tp_format(evsel);
+ field = tp_format ? tp_format->format.fields : NULL;
if (field == NULL) {
printed += comma_fprintf(fp, &first, " (no trace field)");
goto out;
diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c
index c221dcce6666..6413537442aa 100644
--- a/tools/perf/util/expr.c
+++ b/tools/perf/util/expr.c
@@ -215,6 +215,8 @@ int expr__add_ref(struct expr_parse_ctx *ctx, struct metric_ref *ref)
int expr__get_id(struct expr_parse_ctx *ctx, const char *id,
struct expr_id_data **data)
{
+ if (!ctx || !id)
+ return -1;
return hashmap__find(ctx->ids, id, data) ? 0 : -1;
}
diff --git a/tools/perf/util/ftrace.h b/tools/perf/util/ftrace.h
index bae649ef50e8..a9bc47da83a5 100644
--- a/tools/perf/util/ftrace.h
+++ b/tools/perf/util/ftrace.h
@@ -7,6 +7,7 @@
struct evlist;
struct hashamp;
+struct stats;
struct perf_ftrace {
struct evlist *evlist;
@@ -20,6 +21,11 @@ struct perf_ftrace {
unsigned long percpu_buffer_size;
bool inherit;
bool use_nsec;
+ unsigned int bucket_range;
+ unsigned int min_latency;
+ unsigned int max_latency;
+ unsigned int bucket_num;
+ bool hide_empty;
int graph_depth;
int func_stack_trace;
int func_irq_info;
@@ -43,7 +49,7 @@ int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace);
int perf_ftrace__latency_start_bpf(struct perf_ftrace *ftrace);
int perf_ftrace__latency_stop_bpf(struct perf_ftrace *ftrace);
int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace,
- int buckets[]);
+ int buckets[], struct stats *stats);
int perf_ftrace__latency_cleanup_bpf(struct perf_ftrace *ftrace);
#else /* !HAVE_BPF_SKEL */
@@ -68,7 +74,8 @@ perf_ftrace__latency_stop_bpf(struct perf_ftrace *ftrace __maybe_unused)
static inline int
perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace __maybe_unused,
- int buckets[] __maybe_unused)
+ int buckets[] __maybe_unused,
+ struct stats *stats __maybe_unused)
{
return -1;
}
diff --git a/tools/perf/util/generate-cmdlist.sh b/tools/perf/util/generate-cmdlist.sh
index 1b5140e5ce99..6a73c903d690 100755
--- a/tools/perf/util/generate-cmdlist.sh
+++ b/tools/perf/util/generate-cmdlist.sh
@@ -38,7 +38,7 @@ do
done
echo "#endif /* HAVE_LIBELF_SUPPORT */"
-echo "#if defined(HAVE_LIBTRACEEVENT) && (defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE_SUPPORT))"
+echo "#if defined(HAVE_LIBTRACEEVENT)"
sed -n -e 's/^perf-\([^ ]*\)[ ].* audit*/\1/p' command-list.txt |
sort |
while read cmd
@@ -51,7 +51,7 @@ do
p
}' "Documentation/perf-$cmd.txt"
done
-echo "#endif /* HAVE_LIBTRACEEVENT && (HAVE_LIBAUDIT_SUPPORT || HAVE_SYSCALL_TABLE_SUPPORT) */"
+echo "#endif /* HAVE_LIBTRACEEVENT */"
echo "#ifdef HAVE_LIBTRACEEVENT"
sed -n -e 's/^perf-\([^ ]*\)[ ].* traceevent.*/\1/p' command-list.txt |
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index d06aa86352d3..e3cdc3b7b4ab 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -44,6 +44,7 @@
#include "build-id.h"
#include "data.h"
#include <api/fs/fs.h>
+#include <api/io_dir.h>
#include "asm/bug.h"
#include "tool.h"
#include "time-utils.h"
@@ -1311,11 +1312,11 @@ static int memory_node__read(struct memory_node *n, unsigned long idx)
{
unsigned int phys, size = 0;
char path[PATH_MAX];
- struct dirent *ent;
- DIR *dir;
+ struct io_dirent64 *ent;
+ struct io_dir dir;
#define for_each_memory(mem, dir) \
- while ((ent = readdir(dir))) \
+ while ((ent = io_dir__readdir(&dir)) != NULL) \
if (strcmp(ent->d_name, ".") && \
strcmp(ent->d_name, "..") && \
sscanf(ent->d_name, "memory%u", &mem) == 1)
@@ -1324,9 +1325,9 @@ static int memory_node__read(struct memory_node *n, unsigned long idx)
"%s/devices/system/node/node%lu",
sysfs__mountpoint(), idx);
- dir = opendir(path);
- if (!dir) {
- pr_warning("failed: can't open memory sysfs data\n");
+ io_dir__init(&dir, open(path, O_CLOEXEC | O_DIRECTORY | O_RDONLY));
+ if (dir.dirfd < 0) {
+ pr_warning("failed: can't open memory sysfs data '%s'\n", path);
return -1;
}
@@ -1338,20 +1339,20 @@ static int memory_node__read(struct memory_node *n, unsigned long idx)
n->set = bitmap_zalloc(size);
if (!n->set) {
- closedir(dir);
+ close(dir.dirfd);
return -ENOMEM;
}
n->node = idx;
n->size = size;
- rewinddir(dir);
+ io_dir__rewinddir(&dir);
for_each_memory(phys, dir) {
__set_bit(phys, n->set);
}
- closedir(dir);
+ close(dir.dirfd);
return 0;
}
@@ -1374,8 +1375,8 @@ static int memory_node__sort(const void *a, const void *b)
static int build_mem_topology(struct memory_node **nodesp, u64 *cntp)
{
char path[PATH_MAX];
- struct dirent *ent;
- DIR *dir;
+ struct io_dirent64 *ent;
+ struct io_dir dir;
int ret = 0;
size_t cnt = 0, size = 0;
struct memory_node *nodes = NULL;
@@ -1383,14 +1384,14 @@ static int build_mem_topology(struct memory_node **nodesp, u64 *cntp)
scnprintf(path, PATH_MAX, "%s/devices/system/node/",
sysfs__mountpoint());
- dir = opendir(path);
- if (!dir) {
+ io_dir__init(&dir, open(path, O_CLOEXEC | O_DIRECTORY | O_RDONLY));
+ if (dir.dirfd < 0) {
pr_debug2("%s: couldn't read %s, does this arch have topology information?\n",
__func__, path);
return -1;
}
- while (!ret && (ent = readdir(dir))) {
+ while (!ret && (ent = io_dir__readdir(&dir))) {
unsigned int idx;
int r;
@@ -1419,7 +1420,7 @@ static int build_mem_topology(struct memory_node **nodesp, u64 *cntp)
cnt += 1;
}
out:
- closedir(dir);
+ close(dir.dirfd);
if (!ret) {
*cntp = cnt;
*nodesp = nodes;
@@ -2769,6 +2770,8 @@ static int process_pmu_mappings(struct feat_fd *ff, void *data __maybe_unused)
free(name);
pmu_num--;
}
+ /* AMD may set it by evlist__has_amd_ibs() from perf_session__new() */
+ free(ff->ph->env.pmu_mappings);
ff->ph->env.pmu_mappings = strbuf_detach(&sb, NULL);
return 0;
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index fff134565801..d65228c11412 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -32,6 +32,9 @@
#include <linux/time64.h>
#include <linux/zalloc.h>
+static int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right);
+static int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry *right);
+
static bool hists__filter_entry_by_dso(struct hists *hists,
struct hist_entry *he);
static bool hists__filter_entry_by_thread(struct hists *hists,
@@ -40,6 +43,8 @@ static bool hists__filter_entry_by_symbol(struct hists *hists,
struct hist_entry *he);
static bool hists__filter_entry_by_socket(struct hists *hists,
struct hist_entry *he);
+static bool hists__filter_entry_by_parallelism(struct hists *hists,
+ struct hist_entry *he);
u16 hists__col_len(struct hists *hists, enum hist_column col)
{
@@ -204,6 +209,7 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
hists__new_col_len(hists, HISTC_CGROUP, 6);
hists__new_col_len(hists, HISTC_CGROUP_ID, 20);
+ hists__new_col_len(hists, HISTC_PARALLELISM, 11);
hists__new_col_len(hists, HISTC_CPU, 3);
hists__new_col_len(hists, HISTC_SOCKET, 6);
hists__new_col_len(hists, HISTC_MEM_LOCKED, 6);
@@ -299,9 +305,10 @@ static long hist_time(unsigned long htime)
return htime;
}
-static void he_stat__add_period(struct he_stat *he_stat, u64 period)
+static void he_stat__add_period(struct he_stat *he_stat, u64 period, u64 latency)
{
he_stat->period += period;
+ he_stat->latency += latency;
he_stat->nr_events += 1;
}
@@ -316,6 +323,7 @@ static void he_stat__add_stat(struct he_stat *dest, struct he_stat *src)
dest->weight2 += src->weight2;
dest->weight3 += src->weight3;
dest->nr_events += src->nr_events;
+ dest->latency += src->latency;
}
static void he_stat__decay(struct he_stat *he_stat)
@@ -325,6 +333,7 @@ static void he_stat__decay(struct he_stat *he_stat)
he_stat->weight1 = (he_stat->weight1 * 7) / 8;
he_stat->weight2 = (he_stat->weight2 * 7) / 8;
he_stat->weight3 = (he_stat->weight3 * 7) / 8;
+ he_stat->latency = (he_stat->latency * 7) / 8;
}
static void hists__delete_entry(struct hists *hists, struct hist_entry *he);
@@ -332,7 +341,7 @@ static void hists__delete_entry(struct hists *hists, struct hist_entry *he);
static bool hists__decay_entry(struct hists *hists, struct hist_entry *he)
{
u64 prev_period = he->stat.period;
- u64 diff;
+ u64 prev_latency = he->stat.latency;
if (prev_period == 0)
return true;
@@ -342,12 +351,16 @@ static bool hists__decay_entry(struct hists *hists, struct hist_entry *he)
he_stat__decay(he->stat_acc);
decay_callchain(he->callchain);
- diff = prev_period - he->stat.period;
-
if (!he->depth) {
- hists->stats.total_period -= diff;
- if (!he->filtered)
- hists->stats.total_non_filtered_period -= diff;
+ u64 period_diff = prev_period - he->stat.period;
+ u64 latency_diff = prev_latency - he->stat.latency;
+
+ hists->stats.total_period -= period_diff;
+ hists->stats.total_latency -= latency_diff;
+ if (!he->filtered) {
+ hists->stats.total_non_filtered_period -= period_diff;
+ hists->stats.total_non_filtered_latency -= latency_diff;
+ }
}
if (!he->leaf) {
@@ -362,7 +375,7 @@ static bool hists__decay_entry(struct hists *hists, struct hist_entry *he)
}
}
- return he->stat.period == 0;
+ return he->stat.period == 0 && he->stat.latency == 0;
}
static void hists__delete_entry(struct hists *hists, struct hist_entry *he)
@@ -581,21 +594,24 @@ static struct hist_entry *hist_entry__new(struct hist_entry *template,
return he;
}
-static u8 symbol__parent_filter(const struct symbol *parent)
+static filter_mask_t symbol__parent_filter(const struct symbol *parent)
{
if (symbol_conf.exclude_other && parent == NULL)
return 1 << HIST_FILTER__PARENT;
return 0;
}
-static void hist_entry__add_callchain_period(struct hist_entry *he, u64 period)
+static void hist_entry__add_callchain_period(struct hist_entry *he, u64 period, u64 latency)
{
if (!hist_entry__has_callchains(he) || !symbol_conf.use_callchain)
return;
he->hists->callchain_period += period;
- if (!he->filtered)
+ he->hists->callchain_latency += latency;
+ if (!he->filtered) {
he->hists->callchain_non_filtered_period += period;
+ he->hists->callchain_non_filtered_latency += latency;
+ }
}
static struct hist_entry *hists__findnew_entry(struct hists *hists,
@@ -608,6 +624,7 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists,
struct hist_entry *he;
int64_t cmp;
u64 period = entry->stat.period;
+ u64 latency = entry->stat.latency;
bool leftmost = true;
p = &hists->entries_in->rb_root.rb_node;
@@ -626,10 +643,10 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists,
if (!cmp) {
if (sample_self) {
he_stat__add_stat(&he->stat, &entry->stat);
- hist_entry__add_callchain_period(he, period);
+ hist_entry__add_callchain_period(he, period, latency);
}
if (symbol_conf.cumulate_callchain)
- he_stat__add_period(he->stat_acc, period);
+ he_stat__add_period(he->stat_acc, period, latency);
block_info__delete(entry->block_info);
@@ -666,7 +683,7 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists,
return NULL;
if (sample_self)
- hist_entry__add_callchain_period(he, period);
+ hist_entry__add_callchain_period(he, period, latency);
hists->nr_entries++;
rb_link_node(&he->rb_node_in, parent, p);
@@ -738,12 +755,14 @@ __hists__add_entry(struct hists *hists,
.ip = al->addr,
.level = al->level,
.code_page_size = sample->code_page_size,
+ .parallelism = al->parallelism,
.stat = {
.nr_events = 1,
.period = sample->period,
.weight1 = sample->weight,
.weight2 = sample->ins_lat,
.weight3 = sample->p_stage_cyc,
+ .latency = al->latency,
},
.parent = sym_parent,
.filtered = symbol__parent_filter(sym_parent) | al->filtered,
@@ -972,8 +991,6 @@ iter_add_next_branch_entry(struct hist_entry_iter *iter, struct addr_location *a
if (he == NULL)
return -ENOMEM;
- hists__inc_nr_samples(hists, he->filtered);
-
out:
iter->he = he;
iter->curr++;
@@ -992,9 +1009,15 @@ static int
iter_finish_branch_entry(struct hist_entry_iter *iter,
struct addr_location *al __maybe_unused)
{
+ struct evsel *evsel = iter->evsel;
+ struct hists *hists = evsel__hists(evsel);
+
for (int i = 0; i < iter->total; i++)
branch_info__exit(&iter->bi[i]);
+ if (iter->he)
+ hists__inc_nr_samples(hists, iter->he->filtered);
+
zfree(&iter->bi);
iter->he = NULL;
@@ -1292,19 +1315,35 @@ out:
return err;
}
-int64_t
-hist_entry__cmp(struct hist_entry *left, struct hist_entry *right)
+static int64_t
+hist_entry__cmp_impl(struct perf_hpp_list *hpp_list, struct hist_entry *left,
+ struct hist_entry *right, unsigned long fn_offset,
+ bool ignore_dynamic, bool ignore_skipped)
{
struct hists *hists = left->hists;
struct perf_hpp_fmt *fmt;
- int64_t cmp = 0;
+ perf_hpp_fmt_cmp_t *fn;
+ int64_t cmp;
- hists__for_each_sort_list(hists, fmt) {
- if (perf_hpp__is_dynamic_entry(fmt) &&
+ /*
+ * Never collapse filtered and non-filtered entries.
+ * Note this is not the same as having an extra (invisible) fmt
+ * that corresponds to the filtered status.
+ */
+ cmp = (int64_t)!!left->filtered - (int64_t)!!right->filtered;
+ if (cmp)
+ return cmp;
+
+ perf_hpp_list__for_each_sort_list(hpp_list, fmt) {
+ if (ignore_dynamic && perf_hpp__is_dynamic_entry(fmt) &&
!perf_hpp__defined_dynamic_entry(fmt, hists))
continue;
- cmp = fmt->cmp(fmt, left, right);
+ if (ignore_skipped && perf_hpp__should_skip(fmt, hists))
+ continue;
+
+ fn = (void *)fmt + fn_offset;
+ cmp = (*fn)(fmt, left, right);
if (cmp)
break;
}
@@ -1313,29 +1352,49 @@ hist_entry__cmp(struct hist_entry *left, struct hist_entry *right)
}
int64_t
-hist_entry__collapse(struct hist_entry *left, struct hist_entry *right)
+hist_entry__cmp(struct hist_entry *left, struct hist_entry *right)
{
- struct hists *hists = left->hists;
- struct perf_hpp_fmt *fmt;
- int64_t cmp = 0;
+ return hist_entry__cmp_impl(left->hists->hpp_list, left, right,
+ offsetof(struct perf_hpp_fmt, cmp), true, false);
+}
- hists__for_each_sort_list(hists, fmt) {
- if (perf_hpp__is_dynamic_entry(fmt) &&
- !perf_hpp__defined_dynamic_entry(fmt, hists))
- continue;
+static int64_t
+hist_entry__sort(struct hist_entry *left, struct hist_entry *right)
+{
+ return hist_entry__cmp_impl(left->hists->hpp_list, left, right,
+ offsetof(struct perf_hpp_fmt, sort), false, true);
+}
- cmp = fmt->collapse(fmt, left, right);
- if (cmp)
- break;
- }
+int64_t
+hist_entry__collapse(struct hist_entry *left, struct hist_entry *right)
+{
+ return hist_entry__cmp_impl(left->hists->hpp_list, left, right,
+ offsetof(struct perf_hpp_fmt, collapse), true, false);
+}
- return cmp;
+static int64_t
+hist_entry__collapse_hierarchy(struct perf_hpp_list *hpp_list,
+ struct hist_entry *left,
+ struct hist_entry *right)
+{
+ return hist_entry__cmp_impl(hpp_list, left, right,
+ offsetof(struct perf_hpp_fmt, collapse), false, false);
}
void hist_entry__delete(struct hist_entry *he)
{
struct hist_entry_ops *ops = he->ops;
+ if (symbol_conf.report_hierarchy) {
+ struct rb_root *root = &he->hroot_out.rb_root;
+ struct hist_entry *child, *tmp;
+
+ rbtree_postorder_for_each_entry_safe(child, tmp, root, rb_node)
+ hist_entry__delete(child);
+
+ *root = RB_ROOT;
+ }
+
thread__zput(he->thread);
map_symbol__exit(&he->ms);
@@ -1426,6 +1485,10 @@ static void hist_entry__check_and_remove_filter(struct hist_entry *he,
if (symbol_conf.sym_list == NULL)
return;
break;
+ case HIST_FILTER__PARALLELISM:
+ if (__bitmap_weight(symbol_conf.parallelism_filter, MAX_NR_CPUS + 1) == 0)
+ return;
+ break;
case HIST_FILTER__PARENT:
case HIST_FILTER__GUEST:
case HIST_FILTER__HOST:
@@ -1484,6 +1547,9 @@ static void hist_entry__apply_hierarchy_filters(struct hist_entry *he)
hist_entry__check_and_remove_filter(he, HIST_FILTER__SYMBOL,
perf_hpp__is_sym_entry);
+ hist_entry__check_and_remove_filter(he, HIST_FILTER__PARALLELISM,
+ perf_hpp__is_parallelism_entry);
+
hists__apply_filters(he->hists, he);
}
@@ -1503,14 +1569,7 @@ static struct hist_entry *hierarchy_insert_entry(struct hists *hists,
while (*p != NULL) {
parent = *p;
iter = rb_entry(parent, struct hist_entry, rb_node_in);
-
- cmp = 0;
- perf_hpp_list__for_each_sort_list(hpp_list, fmt) {
- cmp = fmt->collapse(fmt, iter, he);
- if (cmp)
- break;
- }
-
+ cmp = hist_entry__collapse_hierarchy(hpp_list, iter, he);
if (!cmp) {
he_stat__add_stat(&iter->stat, &he->stat);
return iter;
@@ -1687,6 +1746,7 @@ static void hists__apply_filters(struct hists *hists, struct hist_entry *he)
hists__filter_entry_by_thread(hists, he);
hists__filter_entry_by_symbol(hists, he);
hists__filter_entry_by_socket(hists, he);
+ hists__filter_entry_by_parallelism(hists, he);
}
int hists__collapse_resort(struct hists *hists, struct ui_progress *prog)
@@ -1730,34 +1790,18 @@ int hists__collapse_resort(struct hists *hists, struct ui_progress *prog)
return 0;
}
-static int64_t hist_entry__sort(struct hist_entry *a, struct hist_entry *b)
-{
- struct hists *hists = a->hists;
- struct perf_hpp_fmt *fmt;
- int64_t cmp = 0;
-
- hists__for_each_sort_list(hists, fmt) {
- if (perf_hpp__should_skip(fmt, a->hists))
- continue;
-
- cmp = fmt->sort(fmt, a, b);
- if (cmp)
- break;
- }
-
- return cmp;
-}
-
static void hists__reset_filter_stats(struct hists *hists)
{
hists->nr_non_filtered_entries = 0;
hists->stats.total_non_filtered_period = 0;
+ hists->stats.total_non_filtered_latency = 0;
}
void hists__reset_stats(struct hists *hists)
{
hists->nr_entries = 0;
hists->stats.total_period = 0;
+ hists->stats.total_latency = 0;
hists__reset_filter_stats(hists);
}
@@ -1766,6 +1810,7 @@ static void hists__inc_filter_stats(struct hists *hists, struct hist_entry *h)
{
hists->nr_non_filtered_entries++;
hists->stats.total_non_filtered_period += h->stat.period;
+ hists->stats.total_non_filtered_latency += h->stat.latency;
}
void hists__inc_stats(struct hists *hists, struct hist_entry *h)
@@ -1775,6 +1820,7 @@ void hists__inc_stats(struct hists *hists, struct hist_entry *h)
hists->nr_entries++;
hists->stats.total_period += h->stat.period;
+ hists->stats.total_latency += h->stat.latency;
}
static void hierarchy_recalc_total_periods(struct hists *hists)
@@ -1786,6 +1832,8 @@ static void hierarchy_recalc_total_periods(struct hists *hists)
hists->stats.total_period = 0;
hists->stats.total_non_filtered_period = 0;
+ hists->stats.total_latency = 0;
+ hists->stats.total_non_filtered_latency = 0;
/*
* recalculate total period using top-level entries only
@@ -1797,8 +1845,11 @@ static void hierarchy_recalc_total_periods(struct hists *hists)
node = rb_next(node);
hists->stats.total_period += he->stat.period;
- if (!he->filtered)
+ hists->stats.total_latency += he->stat.latency;
+ if (!he->filtered) {
hists->stats.total_non_filtered_period += he->stat.period;
+ hists->stats.total_non_filtered_latency += he->stat.latency;
+ }
}
}
@@ -2191,6 +2242,16 @@ static bool hists__filter_entry_by_socket(struct hists *hists,
return false;
}
+static bool hists__filter_entry_by_parallelism(struct hists *hists,
+ struct hist_entry *he)
+{
+ if (test_bit(he->parallelism, hists->parallelism_filter)) {
+ he->filtered |= (1 << HIST_FILTER__PARALLELISM);
+ return true;
+ }
+ return false;
+}
+
typedef bool (*filter_fn_t)(struct hists *hists, struct hist_entry *he);
static void hists__filter_by_type(struct hists *hists, int type, filter_fn_t filter)
@@ -2360,6 +2421,16 @@ void hists__filter_by_socket(struct hists *hists)
hists__filter_entry_by_socket);
}
+void hists__filter_by_parallelism(struct hists *hists)
+{
+ if (symbol_conf.report_hierarchy)
+ hists__filter_hierarchy(hists, HIST_FILTER__PARALLELISM,
+ hists->parallelism_filter);
+ else
+ hists__filter_by_type(hists, HIST_FILTER__PARALLELISM,
+ hists__filter_entry_by_parallelism);
+}
+
void events_stats__inc(struct events_stats *stats, u32 type)
{
++stats->nr_events[0];
@@ -2449,21 +2520,15 @@ static struct hist_entry *add_dummy_hierarchy_entry(struct hists *hists,
struct rb_node **p;
struct rb_node *parent = NULL;
struct hist_entry *he;
- struct perf_hpp_fmt *fmt;
bool leftmost = true;
p = &root->rb_root.rb_node;
while (*p != NULL) {
- int64_t cmp = 0;
+ int64_t cmp;
parent = *p;
he = rb_entry(parent, struct hist_entry, rb_node_in);
-
- perf_hpp_list__for_each_sort_list(he->hpp_list, fmt) {
- cmp = fmt->collapse(fmt, he, pair);
- if (cmp)
- break;
- }
+ cmp = hist_entry__collapse_hierarchy(he->hpp_list, he, pair);
if (!cmp)
goto out;
@@ -2521,16 +2586,10 @@ static struct hist_entry *hists__find_hierarchy_entry(struct rb_root_cached *roo
while (n) {
struct hist_entry *iter;
- struct perf_hpp_fmt *fmt;
- int64_t cmp = 0;
+ int64_t cmp;
iter = rb_entry(n, struct hist_entry, rb_node_in);
- perf_hpp_list__for_each_sort_list(he->hpp_list, fmt) {
- cmp = fmt->collapse(fmt, iter, he);
- if (cmp)
- break;
- }
-
+ cmp = hist_entry__collapse_hierarchy(he->hpp_list, iter, he);
if (cmp < 0)
n = n->rb_left;
else if (cmp > 0)
@@ -2767,6 +2826,12 @@ u64 hists__total_period(struct hists *hists)
hists->stats.total_period;
}
+u64 hists__total_latency(struct hists *hists)
+{
+ return symbol_conf.filter_relative ? hists->stats.total_non_filtered_latency :
+ hists->stats.total_latency;
+}
+
int __hists__scnprintf_title(struct hists *hists, char *bf, size_t size, bool show_freq)
{
char unit;
@@ -2878,6 +2943,7 @@ int __hists__init(struct hists *hists, struct perf_hpp_list *hpp_list)
hists->entries = RB_ROOT_CACHED;
mutex_init(&hists->lock);
hists->socket_filter = -1;
+ hists->parallelism_filter = symbol_conf.parallelism_filter;
hists->hpp_list = hpp_list;
INIT_LIST_HEAD(&hists->hpp_formats);
return 0;
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 1131056924d9..317d06cca8b8 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -31,8 +31,11 @@ enum hist_filter {
HIST_FILTER__HOST,
HIST_FILTER__SOCKET,
HIST_FILTER__C2C,
+ HIST_FILTER__PARALLELISM,
};
+typedef u16 filter_mask_t;
+
enum hist_column {
HISTC_SYMBOL,
HISTC_TIME,
@@ -42,6 +45,7 @@ enum hist_column {
HISTC_CGROUP_ID,
HISTC_CGROUP,
HISTC_PARENT,
+ HISTC_PARALLELISM,
HISTC_CPU,
HISTC_SOCKET,
HISTC_SRCLINE,
@@ -105,10 +109,13 @@ struct hists {
u64 nr_non_filtered_entries;
u64 callchain_period;
u64 callchain_non_filtered_period;
+ u64 callchain_latency;
+ u64 callchain_non_filtered_latency;
struct thread *thread_filter;
const struct dso *dso_filter;
const char *uid_filter_str;
const char *symbol_filter_str;
+ unsigned long *parallelism_filter;
struct mutex lock;
struct hists_stats stats;
u64 event_stream;
@@ -165,6 +172,12 @@ struct res_sample {
struct he_stat {
u64 period;
+ /*
+ * Period re-scaled from CPU time to wall-clock time (divided by the
+ * parallelism at the time of the sample). This represents effect of
+ * the event on latency rather than CPU consumption.
+ */
+ u64 latency;
u64 period_sys;
u64 period_us;
u64 period_guest_sys;
@@ -226,15 +239,16 @@ struct hist_entry {
u64 cgroup;
u64 ip;
u64 transaction;
- s32 socket;
- s32 cpu;
u64 code_page_size;
u64 weight;
u64 ins_lat;
u64 p_stage_cyc;
+ s32 socket;
+ s32 cpu;
+ int parallelism;
+ int mem_type_off;
u8 cpumode;
u8 depth;
- int mem_type_off;
struct simd_flags simd_flags;
/* We are added by hists__add_dummy_entry. */
@@ -242,7 +256,7 @@ struct hist_entry {
bool leaf;
char level;
- u8 filtered;
+ filter_mask_t filtered;
u16 callchain_size;
union {
@@ -342,8 +356,6 @@ int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al,
struct perf_hpp;
struct perf_hpp_fmt;
-int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right);
-int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry *right);
int hist_entry__transaction_len(void);
int hist_entry__sort_snprintf(struct hist_entry *he, char *bf, size_t size,
struct hists *hists);
@@ -370,6 +382,7 @@ void hists__output_recalc_col_len(struct hists *hists, int max_rows);
struct hist_entry *hists__get_entry(struct hists *hists, int idx);
u64 hists__total_period(struct hists *hists);
+u64 hists__total_latency(struct hists *hists);
void hists__reset_stats(struct hists *hists);
void hists__inc_stats(struct hists *hists, struct hist_entry *h);
void hists__inc_nr_events(struct hists *hists);
@@ -386,11 +399,13 @@ void hists__filter_by_dso(struct hists *hists);
void hists__filter_by_thread(struct hists *hists);
void hists__filter_by_symbol(struct hists *hists);
void hists__filter_by_socket(struct hists *hists);
+void hists__filter_by_parallelism(struct hists *hists);
static inline bool hists__has_filter(struct hists *hists)
{
return hists->thread_filter || hists->dso_filter ||
- hists->symbol_filter_str || (hists->socket_filter > -1);
+ hists->symbol_filter_str || (hists->socket_filter > -1) ||
+ hists->parallelism_filter;
}
u16 hists__col_len(struct hists *hists, enum hist_column col);
@@ -452,6 +467,9 @@ struct perf_hpp {
bool skip;
};
+typedef int64_t (*perf_hpp_fmt_cmp_t)(
+ struct perf_hpp_fmt *, struct hist_entry *, struct hist_entry *);
+
struct perf_hpp_fmt {
const char *name;
int (*header)(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
@@ -463,12 +481,9 @@ struct perf_hpp_fmt {
struct hist_entry *he);
int (*entry)(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
struct hist_entry *he);
- int64_t (*cmp)(struct perf_hpp_fmt *fmt,
- struct hist_entry *a, struct hist_entry *b);
- int64_t (*collapse)(struct perf_hpp_fmt *fmt,
- struct hist_entry *a, struct hist_entry *b);
- int64_t (*sort)(struct perf_hpp_fmt *fmt,
- struct hist_entry *a, struct hist_entry *b);
+ perf_hpp_fmt_cmp_t cmp;
+ perf_hpp_fmt_cmp_t collapse;
+ perf_hpp_fmt_cmp_t sort;
bool (*equal)(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b);
void (*free)(struct perf_hpp_fmt *fmt);
@@ -549,11 +564,13 @@ extern struct perf_hpp_fmt perf_hpp__format[];
enum {
/* Matches perf_hpp__format array. */
PERF_HPP__OVERHEAD,
+ PERF_HPP__LATENCY,
PERF_HPP__OVERHEAD_SYS,
PERF_HPP__OVERHEAD_US,
PERF_HPP__OVERHEAD_GUEST_SYS,
PERF_HPP__OVERHEAD_GUEST_US,
PERF_HPP__OVERHEAD_ACC,
+ PERF_HPP__LATENCY_ACC,
PERF_HPP__SAMPLES,
PERF_HPP__PERIOD,
PERF_HPP__WEIGHT1,
@@ -565,6 +582,7 @@ enum {
void perf_hpp__init(void);
void perf_hpp__cancel_cumulate(void);
+void perf_hpp__cancel_latency(void);
void perf_hpp__setup_output_field(struct perf_hpp_list *list);
void perf_hpp__reset_output_field(struct perf_hpp_list *list);
void perf_hpp__append_sort_keys(struct perf_hpp_list *list);
@@ -582,6 +600,7 @@ bool perf_hpp__is_thread_entry(struct perf_hpp_fmt *fmt);
bool perf_hpp__is_comm_entry(struct perf_hpp_fmt *fmt);
bool perf_hpp__is_dso_entry(struct perf_hpp_fmt *fmt);
bool perf_hpp__is_sym_entry(struct perf_hpp_fmt *fmt);
+bool perf_hpp__is_parallelism_entry(struct perf_hpp_fmt *fmt);
struct perf_hpp_fmt *perf_hpp_fmt__dup(struct perf_hpp_fmt *fmt);
@@ -608,6 +627,7 @@ void hists__reset_column_width(struct hists *hists);
enum perf_hpp_fmt_type {
PERF_HPP_FMT_TYPE__RAW,
PERF_HPP_FMT_TYPE__PERCENT,
+ PERF_HPP_FMT_TYPE__LATENCY,
PERF_HPP_FMT_TYPE__AVERAGE,
};
diff --git a/tools/perf/util/hwmon_pmu.c b/tools/perf/util/hwmon_pmu.c
index 4acb9bb19b84..3cce77fc8004 100644
--- a/tools/perf/util/hwmon_pmu.c
+++ b/tools/perf/util/hwmon_pmu.c
@@ -11,13 +11,13 @@
#include <sys/types.h>
#include <assert.h>
#include <ctype.h>
-#include <dirent.h>
#include <fcntl.h>
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
#include <api/fs/fs.h>
#include <api/io.h>
+#include <api/io_dir.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/zalloc.h>
@@ -108,20 +108,6 @@ struct hwmon_pmu {
};
/**
- * union hwmon_pmu_event_key: Key for hwmon_pmu->events as such each key
- * represents an event.
- *
- * Related hwmon files start <type><number> that this key represents.
- */
-union hwmon_pmu_event_key {
- long type_and_num;
- struct {
- int num :16;
- enum hwmon_type type :8;
- };
-};
-
-/**
* struct hwmon_pmu_event_value: Value in hwmon_pmu->events.
*
* Hwmon files are of the form <type><number>_<item> and may have a suffix
@@ -249,31 +235,22 @@ static void fix_name(char *p)
static int hwmon_pmu__read_events(struct hwmon_pmu *pmu)
{
- DIR *dir;
- struct dirent *ent;
- int dup_fd, err = 0;
+ int err = 0;
struct hashmap_entry *cur, *tmp;
size_t bkt;
+ struct io_dirent64 *ent;
+ struct io_dir dir;
if (pmu->pmu.sysfs_aliases_loaded)
return 0;
- /*
- * Use a dup-ed fd as closedir will close it. Use openat so that the
- * directory contents are refreshed.
- */
- dup_fd = openat(pmu->hwmon_dir_fd, ".", O_DIRECTORY);
-
- if (dup_fd == -1)
- return -ENOMEM;
+ /* Use openat so that the directory contents are refreshed. */
+ io_dir__init(&dir, openat(pmu->hwmon_dir_fd, ".", O_CLOEXEC | O_DIRECTORY | O_RDONLY));
- dir = fdopendir(dup_fd);
- if (!dir) {
- close(dup_fd);
- return -ENOMEM;
- }
+ if (dir.dirfd < 0)
+ return -ENOENT;
- while ((ent = readdir(dir)) != NULL) {
+ while ((ent = io_dir__readdir(&dir)) != NULL) {
enum hwmon_type type;
int number;
enum hwmon_item item;
@@ -361,7 +338,7 @@ static int hwmon_pmu__read_events(struct hwmon_pmu *pmu)
pmu->pmu.sysfs_aliases_loaded = true;
err_out:
- closedir(dir);
+ close(dir.dirfd);
return err;
}
@@ -716,8 +693,8 @@ int hwmon_pmu__check_alias(struct parse_events_terms *terms, struct perf_pmu_inf
int perf_pmus__read_hwmon_pmus(struct list_head *pmus)
{
char *line = NULL;
- DIR *class_hwmon_dir;
- struct dirent *class_hwmon_ent;
+ struct io_dirent64 *class_hwmon_ent;
+ struct io_dir class_hwmon_dir;
char buf[PATH_MAX];
const char *sysfs = sysfs__mountpoint();
@@ -725,11 +702,12 @@ int perf_pmus__read_hwmon_pmus(struct list_head *pmus)
return 0;
scnprintf(buf, sizeof(buf), "%s/class/hwmon/", sysfs);
- class_hwmon_dir = opendir(buf);
- if (!class_hwmon_dir)
+ io_dir__init(&class_hwmon_dir, open(buf, O_CLOEXEC | O_DIRECTORY | O_RDONLY));
+
+ if (class_hwmon_dir.dirfd < 0)
return 0;
- while ((class_hwmon_ent = readdir(class_hwmon_dir)) != NULL) {
+ while ((class_hwmon_ent = io_dir__readdir(&class_hwmon_dir)) != NULL) {
size_t line_len;
int hwmon_dir, name_fd;
struct io io;
@@ -759,7 +737,7 @@ int perf_pmus__read_hwmon_pmus(struct list_head *pmus)
close(name_fd);
}
free(line);
- closedir(class_hwmon_dir);
+ close(class_hwmon_dir.dirfd);
return 0;
}
diff --git a/tools/perf/util/hwmon_pmu.h b/tools/perf/util/hwmon_pmu.h
index 882566846df4..b3329774d2b2 100644
--- a/tools/perf/util/hwmon_pmu.h
+++ b/tools/perf/util/hwmon_pmu.h
@@ -91,6 +91,22 @@ enum hwmon_item {
HWMON_ITEM__MAX,
};
+/**
+ * union hwmon_pmu_event_key: Key for hwmon_pmu->events as such each key
+ * represents an event.
+ * union is exposed for testing to ensure problems are avoided on big
+ * endian machines.
+ *
+ * Related hwmon files start <type><number> that this key represents.
+ */
+union hwmon_pmu_event_key {
+ long type_and_num;
+ struct {
+ int num :16;
+ enum hwmon_type type :8;
+ };
+};
+
bool perf_pmu__is_hwmon(const struct perf_pmu *pmu);
bool evsel__is_hwmon(const struct evsel *evsel);
diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c
index a7c589fecb98..3625c6224750 100644
--- a/tools/perf/util/intel-bts.c
+++ b/tools/perf/util/intel-bts.c
@@ -275,12 +275,13 @@ static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq,
int ret;
struct intel_bts *bts = btsq->bts;
union perf_event event;
- struct perf_sample sample = { .ip = 0, };
+ struct perf_sample sample;
if (bts->synth_opts.initial_skip &&
bts->num_events++ <= bts->synth_opts.initial_skip)
return 0;
+ perf_sample__init(&sample, /*all=*/true);
sample.ip = le64_to_cpu(branch->from);
sample.cpumode = intel_bts_cpumode(bts, sample.ip);
sample.pid = btsq->pid;
@@ -312,6 +313,7 @@ static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq,
pr_err("Intel BTS: failed to deliver branch event, error %d\n",
ret);
+ perf_sample__exit(&sample);
return ret;
}
diff --git a/tools/perf/util/intel-pt-decoder/Build b/tools/perf/util/intel-pt-decoder/Build
index 30793d08c6d4..5b8f0149167d 100644
--- a/tools/perf/util/intel-pt-decoder/Build
+++ b/tools/perf/util/intel-pt-decoder/Build
@@ -7,16 +7,24 @@ $(OUTPUT)util/intel-pt-decoder/inat-tables.c: $(inat_tables_script) $(inat_table
$(call rule_mkdir)
@$(call echo-cmd,gen)$(AWK) -f $(inat_tables_script) $(inat_tables_maps) > $@ || rm -f $@
-# Busybox's diff doesn't have -I, avoid warning in the case
+ifeq ($(SRCARCH),x86)
+ perf-util-y += inat.o insn.o
+else
+ perf-util-$(CONFIG_AUXTRACE) += inat.o insn.o
+endif
-$(OUTPUT)util/intel-pt-decoder/intel-pt-insn-decoder.o: util/intel-pt-decoder/intel-pt-insn-decoder.c $(OUTPUT)util/intel-pt-decoder/inat-tables.c
+$(OUTPUT)util/intel-pt-decoder/inat.o: $(srctree)/tools/arch/x86/lib/inat.c $(OUTPUT)util/intel-pt-decoder/inat-tables.c
$(call rule_mkdir)
$(call if_changed_dep,cc_o_c)
-CFLAGS_intel-pt-insn-decoder.o += -I$(OUTPUT)util/intel-pt-decoder
+CFLAGS_inat.o += -I$(OUTPUT)util/intel-pt-decoder
+
+$(OUTPUT)util/intel-pt-decoder/insn.o: $(srctree)/tools/arch/x86/lib/insn.c
+ $(call rule_mkdir)
+ $(call if_changed_dep,cc_o_c)
ifeq ($(CC_NO_CLANG), 1)
- CFLAGS_intel-pt-insn-decoder.o += -Wno-override-init
+ CFLAGS_insn.o += -Wno-override-init
endif
-CFLAGS_intel-pt-insn-decoder.o += -Wno-packed
+CFLAGS_insn.o += -Wno-packed
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
index 47cf35799a4d..8fabddc1c0da 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
@@ -11,9 +11,6 @@
#include <byteswap.h>
#include "../../../arch/x86/include/asm/insn.h"
-#include "../../../arch/x86/lib/inat.c"
-#include "../../../arch/x86/lib/insn.c"
-
#include "event.h"
#include "intel-pt-insn-decoder.h"
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 30be6dfe09eb..9b1011fe4826 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -127,6 +127,7 @@ struct intel_pt {
bool single_pebs;
bool sample_pebs;
+ int pebs_data_src_fmt;
struct evsel *pebs_evsel;
u64 evt_sample_type;
@@ -175,6 +176,7 @@ enum switch_state {
struct intel_pt_pebs_event {
struct evsel *evsel;
u64 id;
+ int data_src_fmt;
};
struct intel_pt_queue {
@@ -1764,12 +1766,13 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
{
struct intel_pt *pt = ptq->pt;
union perf_event *event = ptq->event_buf;
- struct perf_sample sample = { .ip = 0, };
+ struct perf_sample sample;
struct dummy_branch_stack {
u64 nr;
u64 hw_idx;
struct branch_entry entries;
} dummy_bs;
+ int ret;
if (pt->branches_filter && !(pt->branches_filter & ptq->flags))
return 0;
@@ -1777,6 +1780,7 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
if (intel_pt_skip_event(pt))
return 0;
+ perf_sample__init(&sample, /*all=*/true);
intel_pt_prep_b_sample(pt, ptq, event, &sample);
sample.id = ptq->pt->branches_id;
@@ -1806,8 +1810,10 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
ptq->last_br_cyc_cnt = ptq->ipc_cyc_cnt;
}
- return intel_pt_deliver_synth_event(pt, event, &sample,
+ perf_sample__exit(&sample);
+ ret = intel_pt_deliver_synth_event(pt, event, &sample,
pt->branches_sample_type);
+ return ret;
}
static void intel_pt_prep_sample(struct intel_pt *pt,
@@ -1835,11 +1841,13 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
{
struct intel_pt *pt = ptq->pt;
union perf_event *event = ptq->event_buf;
- struct perf_sample sample = { .ip = 0, };
+ struct perf_sample sample;
+ int ret;
if (intel_pt_skip_event(pt))
return 0;
+ perf_sample__init(&sample, /*all=*/true);
intel_pt_prep_sample(pt, ptq, event, &sample);
sample.id = ptq->pt->instructions_id;
@@ -1859,16 +1867,19 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
ptq->last_insn_cnt = ptq->state->tot_insn_cnt;
- return intel_pt_deliver_synth_event(pt, event, &sample,
- pt->instructions_sample_type);
+ ret = intel_pt_deliver_synth_event(pt, event, &sample,
+ pt->instructions_sample_type);
+ perf_sample__exit(&sample);
+ return ret;
}
static int intel_pt_synth_cycle_sample(struct intel_pt_queue *ptq)
{
struct intel_pt *pt = ptq->pt;
union perf_event *event = ptq->event_buf;
- struct perf_sample sample = { .ip = 0, };
+ struct perf_sample sample;
u64 period = 0;
+ int ret;
if (ptq->sample_ipc)
period = ptq->ipc_cyc_cnt - ptq->last_cy_cyc_cnt;
@@ -1876,6 +1887,7 @@ static int intel_pt_synth_cycle_sample(struct intel_pt_queue *ptq)
if (!period || intel_pt_skip_event(pt))
return 0;
+ perf_sample__init(&sample, /*all=*/true);
intel_pt_prep_sample(pt, ptq, event, &sample);
sample.id = ptq->pt->cycles_id;
@@ -1887,25 +1899,31 @@ static int intel_pt_synth_cycle_sample(struct intel_pt_queue *ptq)
ptq->last_cy_insn_cnt = ptq->ipc_insn_cnt;
ptq->last_cy_cyc_cnt = ptq->ipc_cyc_cnt;
- return intel_pt_deliver_synth_event(pt, event, &sample, pt->cycles_sample_type);
+ ret = intel_pt_deliver_synth_event(pt, event, &sample, pt->cycles_sample_type);
+ perf_sample__exit(&sample);
+ return ret;
}
static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq)
{
struct intel_pt *pt = ptq->pt;
union perf_event *event = ptq->event_buf;
- struct perf_sample sample = { .ip = 0, };
+ struct perf_sample sample;
+ int ret;
if (intel_pt_skip_event(pt))
return 0;
+ perf_sample__init(&sample, /*all=*/true);
intel_pt_prep_sample(pt, ptq, event, &sample);
sample.id = ptq->pt->transactions_id;
sample.stream_id = ptq->pt->transactions_id;
- return intel_pt_deliver_synth_event(pt, event, &sample,
- pt->transactions_sample_type);
+ ret = intel_pt_deliver_synth_event(pt, event, &sample,
+ pt->transactions_sample_type);
+ perf_sample__exit(&sample);
+ return ret;
}
static void intel_pt_prep_p_sample(struct intel_pt *pt,
@@ -1953,15 +1971,17 @@ static int intel_pt_synth_cbr_sample(struct intel_pt_queue *ptq)
{
struct intel_pt *pt = ptq->pt;
union perf_event *event = ptq->event_buf;
- struct perf_sample sample = { .ip = 0, };
+ struct perf_sample sample;
struct perf_synth_intel_cbr raw;
u32 flags;
+ int ret;
if (intel_pt_skip_cbr_event(pt))
return 0;
ptq->cbr_seen = ptq->state->cbr;
+ perf_sample__init(&sample, /*all=*/true);
intel_pt_prep_p_sample(pt, ptq, event, &sample);
sample.id = ptq->pt->cbr_id;
@@ -1975,20 +1995,24 @@ static int intel_pt_synth_cbr_sample(struct intel_pt_queue *ptq)
sample.raw_size = perf_synth__raw_size(raw);
sample.raw_data = perf_synth__raw_data(&raw);
- return intel_pt_deliver_synth_event(pt, event, &sample,
- pt->pwr_events_sample_type);
+ ret = intel_pt_deliver_synth_event(pt, event, &sample,
+ pt->pwr_events_sample_type);
+ perf_sample__exit(&sample);
+ return ret;
}
static int intel_pt_synth_psb_sample(struct intel_pt_queue *ptq)
{
struct intel_pt *pt = ptq->pt;
union perf_event *event = ptq->event_buf;
- struct perf_sample sample = { .ip = 0, };
+ struct perf_sample sample;
struct perf_synth_intel_psb raw;
+ int ret;
if (intel_pt_skip_event(pt))
return 0;
+ perf_sample__init(&sample, /*all=*/true);
intel_pt_prep_p_sample(pt, ptq, event, &sample);
sample.id = ptq->pt->psb_id;
@@ -2001,20 +2025,24 @@ static int intel_pt_synth_psb_sample(struct intel_pt_queue *ptq)
sample.raw_size = perf_synth__raw_size(raw);
sample.raw_data = perf_synth__raw_data(&raw);
- return intel_pt_deliver_synth_event(pt, event, &sample,
- pt->pwr_events_sample_type);
+ ret = intel_pt_deliver_synth_event(pt, event, &sample,
+ pt->pwr_events_sample_type);
+ perf_sample__exit(&sample);
+ return ret;
}
static int intel_pt_synth_mwait_sample(struct intel_pt_queue *ptq)
{
struct intel_pt *pt = ptq->pt;
union perf_event *event = ptq->event_buf;
- struct perf_sample sample = { .ip = 0, };
+ struct perf_sample sample;
struct perf_synth_intel_mwait raw;
+ int ret;
if (intel_pt_skip_event(pt))
return 0;
+ perf_sample__init(&sample, /*all=*/true);
intel_pt_prep_p_sample(pt, ptq, event, &sample);
sample.id = ptq->pt->mwait_id;
@@ -2026,20 +2054,24 @@ static int intel_pt_synth_mwait_sample(struct intel_pt_queue *ptq)
sample.raw_size = perf_synth__raw_size(raw);
sample.raw_data = perf_synth__raw_data(&raw);
- return intel_pt_deliver_synth_event(pt, event, &sample,
- pt->pwr_events_sample_type);
+ ret = intel_pt_deliver_synth_event(pt, event, &sample,
+ pt->pwr_events_sample_type);
+ perf_sample__exit(&sample);
+ return ret;
}
static int intel_pt_synth_pwre_sample(struct intel_pt_queue *ptq)
{
struct intel_pt *pt = ptq->pt;
union perf_event *event = ptq->event_buf;
- struct perf_sample sample = { .ip = 0, };
+ struct perf_sample sample;
struct perf_synth_intel_pwre raw;
+ int ret;
if (intel_pt_skip_event(pt))
return 0;
+ perf_sample__init(&sample, /*all=*/true);
intel_pt_prep_p_sample(pt, ptq, event, &sample);
sample.id = ptq->pt->pwre_id;
@@ -2051,20 +2083,24 @@ static int intel_pt_synth_pwre_sample(struct intel_pt_queue *ptq)
sample.raw_size = perf_synth__raw_size(raw);
sample.raw_data = perf_synth__raw_data(&raw);
- return intel_pt_deliver_synth_event(pt, event, &sample,
- pt->pwr_events_sample_type);
+ ret = intel_pt_deliver_synth_event(pt, event, &sample,
+ pt->pwr_events_sample_type);
+ perf_sample__exit(&sample);
+ return ret;
}
static int intel_pt_synth_exstop_sample(struct intel_pt_queue *ptq)
{
struct intel_pt *pt = ptq->pt;
union perf_event *event = ptq->event_buf;
- struct perf_sample sample = { .ip = 0, };
+ struct perf_sample sample;
struct perf_synth_intel_exstop raw;
+ int ret;
if (intel_pt_skip_event(pt))
return 0;
+ perf_sample__init(&sample, /*all=*/true);
intel_pt_prep_p_sample(pt, ptq, event, &sample);
sample.id = ptq->pt->exstop_id;
@@ -2076,20 +2112,24 @@ static int intel_pt_synth_exstop_sample(struct intel_pt_queue *ptq)
sample.raw_size = perf_synth__raw_size(raw);
sample.raw_data = perf_synth__raw_data(&raw);
- return intel_pt_deliver_synth_event(pt, event, &sample,
- pt->pwr_events_sample_type);
+ ret = intel_pt_deliver_synth_event(pt, event, &sample,
+ pt->pwr_events_sample_type);
+ perf_sample__exit(&sample);
+ return ret;
}
static int intel_pt_synth_pwrx_sample(struct intel_pt_queue *ptq)
{
struct intel_pt *pt = ptq->pt;
union perf_event *event = ptq->event_buf;
- struct perf_sample sample = { .ip = 0, };
+ struct perf_sample sample;
struct perf_synth_intel_pwrx raw;
+ int ret;
if (intel_pt_skip_event(pt))
return 0;
+ perf_sample__init(&sample, /*all=*/true);
intel_pt_prep_p_sample(pt, ptq, event, &sample);
sample.id = ptq->pt->pwrx_id;
@@ -2101,8 +2141,10 @@ static int intel_pt_synth_pwrx_sample(struct intel_pt_queue *ptq)
sample.raw_size = perf_synth__raw_size(raw);
sample.raw_data = perf_synth__raw_data(&raw);
- return intel_pt_deliver_synth_event(pt, event, &sample,
- pt->pwr_events_sample_type);
+ ret = intel_pt_deliver_synth_event(pt, event, &sample,
+ pt->pwr_events_sample_type);
+ perf_sample__exit(&sample);
+ return ret;
}
/*
@@ -2232,19 +2274,160 @@ static void intel_pt_add_lbrs(struct branch_stack *br_stack,
}
}
-static int intel_pt_do_synth_pebs_sample(struct intel_pt_queue *ptq, struct evsel *evsel, u64 id)
+#define P(a, b) PERF_MEM_S(a, b)
+#define OP_LH (P(OP, LOAD) | P(LVL, HIT))
+#define LEVEL(x) P(LVLNUM, x)
+#define REM P(REMOTE, REMOTE)
+#define SNOOP_NONE_MISS (P(SNOOP, NONE) | P(SNOOP, MISS))
+
+#define PERF_PEBS_DATA_SOURCE_GRT_MAX 0x10
+#define PERF_PEBS_DATA_SOURCE_GRT_MASK (PERF_PEBS_DATA_SOURCE_GRT_MAX - 1)
+
+/* Based on kernel __intel_pmu_pebs_data_source_grt() and pebs_data_source */
+static const u64 pebs_data_source_grt[PERF_PEBS_DATA_SOURCE_GRT_MAX] = {
+ P(OP, LOAD) | P(LVL, MISS) | LEVEL(L3) | P(SNOOP, NA), /* L3 miss|SNP N/A */
+ OP_LH | P(LVL, L1) | LEVEL(L1) | P(SNOOP, NONE), /* L1 hit|SNP None */
+ OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE), /* LFB/MAB hit|SNP None */
+ OP_LH | P(LVL, L2) | LEVEL(L2) | P(SNOOP, NONE), /* L2 hit|SNP None */
+ OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, NONE), /* L3 hit|SNP None */
+ OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT), /* L3 hit|SNP Hit */
+ OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM), /* L3 hit|SNP HitM */
+ OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM), /* L3 hit|SNP HitM */
+ OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOPX, FWD), /* L3 hit|SNP Fwd */
+ OP_LH | P(LVL, REM_CCE1) | REM | LEVEL(L3) | P(SNOOP, HITM), /* Remote L3 hit|SNP HitM */
+ OP_LH | P(LVL, LOC_RAM) | LEVEL(RAM) | P(SNOOP, HIT), /* RAM hit|SNP Hit */
+ OP_LH | P(LVL, REM_RAM1) | REM | LEVEL(L3) | P(SNOOP, HIT), /* Remote L3 hit|SNP Hit */
+ OP_LH | P(LVL, LOC_RAM) | LEVEL(RAM) | SNOOP_NONE_MISS, /* RAM hit|SNP None or Miss */
+ OP_LH | P(LVL, REM_RAM1) | LEVEL(RAM) | REM | SNOOP_NONE_MISS, /* Remote RAM hit|SNP None or Miss */
+ OP_LH | P(LVL, IO) | LEVEL(NA) | P(SNOOP, NONE), /* I/O hit|SNP None */
+ OP_LH | P(LVL, UNC) | LEVEL(NA) | P(SNOOP, NONE), /* Uncached hit|SNP None */
+};
+
+/* Based on kernel __intel_pmu_pebs_data_source_cmt() and pebs_data_source */
+static const u64 pebs_data_source_cmt[PERF_PEBS_DATA_SOURCE_GRT_MAX] = {
+ P(OP, LOAD) | P(LVL, MISS) | LEVEL(L3) | P(SNOOP, NA), /* L3 miss|SNP N/A */
+ OP_LH | P(LVL, L1) | LEVEL(L1) | P(SNOOP, NONE), /* L1 hit|SNP None */
+ OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE), /* LFB/MAB hit|SNP None */
+ OP_LH | P(LVL, L2) | LEVEL(L2) | P(SNOOP, NONE), /* L2 hit|SNP None */
+ OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, NONE), /* L3 hit|SNP None */
+ OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, MISS), /* L3 hit|SNP Hit */
+ OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT), /* L3 hit|SNP HitM */
+ OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOPX, FWD), /* L3 hit|SNP HitM */
+ OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM), /* L3 hit|SNP Fwd */
+ OP_LH | P(LVL, REM_CCE1) | REM | LEVEL(L3) | P(SNOOP, HITM), /* Remote L3 hit|SNP HitM */
+ OP_LH | P(LVL, LOC_RAM) | LEVEL(RAM) | P(SNOOP, NONE), /* RAM hit|SNP Hit */
+ OP_LH | LEVEL(RAM) | REM | P(SNOOP, NONE), /* Remote L3 hit|SNP Hit */
+ OP_LH | LEVEL(RAM) | REM | P(SNOOPX, FWD), /* RAM hit|SNP None or Miss */
+ OP_LH | LEVEL(RAM) | REM | P(SNOOP, HITM), /* Remote RAM hit|SNP None or Miss */
+ OP_LH | P(LVL, IO) | LEVEL(NA) | P(SNOOP, NONE), /* I/O hit|SNP None */
+ OP_LH | P(LVL, UNC) | LEVEL(NA) | P(SNOOP, NONE), /* Uncached hit|SNP None */
+};
+
+/* Based on kernel pebs_set_tlb_lock() */
+static inline void pebs_set_tlb_lock(u64 *val, bool tlb, bool lock)
+{
+ /*
+ * TLB access
+ * 0 = did not miss 2nd level TLB
+ * 1 = missed 2nd level TLB
+ */
+ if (tlb)
+ *val |= P(TLB, MISS) | P(TLB, L2);
+ else
+ *val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2);
+
+ /* locked prefix */
+ if (lock)
+ *val |= P(LOCK, LOCKED);
+}
+
+/* Based on kernel __grt_latency_data() */
+static u64 intel_pt_grt_latency_data(u8 dse, bool tlb, bool lock, bool blk,
+ const u64 *pebs_data_source)
+{
+ u64 val;
+
+ dse &= PERF_PEBS_DATA_SOURCE_GRT_MASK;
+ val = pebs_data_source[dse];
+
+ pebs_set_tlb_lock(&val, tlb, lock);
+
+ if (blk)
+ val |= P(BLK, DATA);
+ else
+ val |= P(BLK, NA);
+
+ return val;
+}
+
+/* Default value for data source */
+#define PERF_MEM_NA (PERF_MEM_S(OP, NA) |\
+ PERF_MEM_S(LVL, NA) |\
+ PERF_MEM_S(SNOOP, NA) |\
+ PERF_MEM_S(LOCK, NA) |\
+ PERF_MEM_S(TLB, NA) |\
+ PERF_MEM_S(LVLNUM, NA))
+
+enum DATA_SRC_FORMAT {
+ DATA_SRC_FORMAT_ERR = -1,
+ DATA_SRC_FORMAT_NA = 0,
+ DATA_SRC_FORMAT_GRT = 1,
+ DATA_SRC_FORMAT_CMT = 2,
+};
+
+/* Based on kernel grt_latency_data() and cmt_latency_data */
+static u64 intel_pt_get_data_src(u64 mem_aux_info, int data_src_fmt)
+{
+ switch (data_src_fmt) {
+ case DATA_SRC_FORMAT_GRT: {
+ union {
+ u64 val;
+ struct {
+ unsigned int dse:4;
+ unsigned int locked:1;
+ unsigned int stlb_miss:1;
+ unsigned int fwd_blk:1;
+ unsigned int reserved:25;
+ };
+ } x = {.val = mem_aux_info};
+ return intel_pt_grt_latency_data(x.dse, x.stlb_miss, x.locked, x.fwd_blk,
+ pebs_data_source_grt);
+ }
+ case DATA_SRC_FORMAT_CMT: {
+ union {
+ u64 val;
+ struct {
+ unsigned int dse:5;
+ unsigned int locked:1;
+ unsigned int stlb_miss:1;
+ unsigned int fwd_blk:1;
+ unsigned int reserved:24;
+ };
+ } x = {.val = mem_aux_info};
+ return intel_pt_grt_latency_data(x.dse, x.stlb_miss, x.locked, x.fwd_blk,
+ pebs_data_source_cmt);
+ }
+ default:
+ return PERF_MEM_NA;
+ }
+}
+
+static int intel_pt_do_synth_pebs_sample(struct intel_pt_queue *ptq, struct evsel *evsel,
+ u64 id, int data_src_fmt)
{
const struct intel_pt_blk_items *items = &ptq->state->items;
- struct perf_sample sample = { .ip = 0, };
+ struct perf_sample sample;
union perf_event *event = ptq->event_buf;
struct intel_pt *pt = ptq->pt;
u64 sample_type = evsel->core.attr.sample_type;
u8 cpumode;
- u64 regs[8 * sizeof(sample.intr_regs.mask)];
+ u64 regs[8 * sizeof(sample.intr_regs->mask)];
+ int ret;
if (intel_pt_skip_event(pt))
return 0;
+ perf_sample__init(&sample, /*all=*/true);
intel_pt_prep_a_sample(ptq, event, &sample);
sample.id = id;
@@ -2291,15 +2474,16 @@ static int intel_pt_do_synth_pebs_sample(struct intel_pt_queue *ptq, struct evse
items->mask[INTEL_PT_XMM_POS])) {
u64 regs_mask = evsel->core.attr.sample_regs_intr;
u64 *pos;
+ struct regs_dump *intr_regs = perf_sample__intr_regs(&sample);
- sample.intr_regs.abi = items->is_32_bit ?
+ intr_regs->abi = items->is_32_bit ?
PERF_SAMPLE_REGS_ABI_32 :
PERF_SAMPLE_REGS_ABI_64;
- sample.intr_regs.regs = regs;
+ intr_regs->regs = regs;
- pos = intel_pt_add_gp_regs(&sample.intr_regs, regs, items, regs_mask);
+ pos = intel_pt_add_gp_regs(intr_regs, regs, items, regs_mask);
- intel_pt_add_xmm(&sample.intr_regs, pos, items, regs_mask);
+ intel_pt_add_xmm(intr_regs, pos, items, regs_mask);
}
if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
@@ -2350,6 +2534,18 @@ static int intel_pt_do_synth_pebs_sample(struct intel_pt_queue *ptq, struct evse
}
}
+ if (sample_type & PERF_SAMPLE_DATA_SRC) {
+ if (items->has_mem_aux_info && data_src_fmt) {
+ if (data_src_fmt < 0) {
+ pr_err("Intel PT missing data_src info\n");
+ return -1;
+ }
+ sample.data_src = intel_pt_get_data_src(items->mem_aux_info, data_src_fmt);
+ } else {
+ sample.data_src = PERF_MEM_NA;
+ }
+ }
+
if (sample_type & PERF_SAMPLE_TRANSACTION && items->has_tsx_aux_info) {
u64 ax = items->has_rax ? items->rax : 0;
/* Refer kernel's intel_hsw_transaction() */
@@ -2361,16 +2557,19 @@ static int intel_pt_do_synth_pebs_sample(struct intel_pt_queue *ptq, struct evse
sample.transaction = txn;
}
- return intel_pt_deliver_synth_event(pt, event, &sample, sample_type);
+ ret = intel_pt_deliver_synth_event(pt, event, &sample, sample_type);
+ perf_sample__exit(&sample);
+ return ret;
}
static int intel_pt_synth_single_pebs_sample(struct intel_pt_queue *ptq)
{
struct intel_pt *pt = ptq->pt;
struct evsel *evsel = pt->pebs_evsel;
+ int data_src_fmt = pt->pebs_data_src_fmt;
u64 id = evsel->core.id[0];
- return intel_pt_do_synth_pebs_sample(ptq, evsel, id);
+ return intel_pt_do_synth_pebs_sample(ptq, evsel, id, data_src_fmt);
}
static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq)
@@ -2395,7 +2594,7 @@ static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq)
hw_id);
return intel_pt_synth_single_pebs_sample(ptq);
}
- err = intel_pt_do_synth_pebs_sample(ptq, pe->evsel, pe->id);
+ err = intel_pt_do_synth_pebs_sample(ptq, pe->evsel, pe->id, pe->data_src_fmt);
if (err)
return err;
}
@@ -2407,16 +2606,17 @@ static int intel_pt_synth_events_sample(struct intel_pt_queue *ptq)
{
struct intel_pt *pt = ptq->pt;
union perf_event *event = ptq->event_buf;
- struct perf_sample sample = { .ip = 0, };
+ struct perf_sample sample;
struct {
struct perf_synth_intel_evt cfe;
struct perf_synth_intel_evd evd[INTEL_PT_MAX_EVDS];
} raw;
- int i;
+ int i, ret;
if (intel_pt_skip_event(pt))
return 0;
+ perf_sample__init(&sample, /*all=*/true);
intel_pt_prep_p_sample(pt, ptq, event, &sample);
sample.id = ptq->pt->evt_id;
@@ -2438,20 +2638,24 @@ static int intel_pt_synth_events_sample(struct intel_pt_queue *ptq)
ptq->state->evd_cnt * sizeof(struct perf_synth_intel_evd);
sample.raw_data = perf_synth__raw_data(&raw);
- return intel_pt_deliver_synth_event(pt, event, &sample,
- pt->evt_sample_type);
+ ret = intel_pt_deliver_synth_event(pt, event, &sample,
+ pt->evt_sample_type);
+ perf_sample__exit(&sample);
+ return ret;
}
static int intel_pt_synth_iflag_chg_sample(struct intel_pt_queue *ptq)
{
struct intel_pt *pt = ptq->pt;
union perf_event *event = ptq->event_buf;
- struct perf_sample sample = { .ip = 0, };
+ struct perf_sample sample;
struct perf_synth_intel_iflag_chg raw;
+ int ret;
if (intel_pt_skip_event(pt))
return 0;
+ perf_sample__init(&sample, /*all=*/true);
intel_pt_prep_p_sample(pt, ptq, event, &sample);
sample.id = ptq->pt->iflag_chg_id;
@@ -2471,8 +2675,10 @@ static int intel_pt_synth_iflag_chg_sample(struct intel_pt_queue *ptq)
sample.raw_size = perf_synth__raw_size(raw);
sample.raw_data = perf_synth__raw_data(&raw);
- return intel_pt_deliver_synth_event(pt, event, &sample,
- pt->iflag_chg_sample_type);
+ ret = intel_pt_deliver_synth_event(pt, event, &sample,
+ pt->iflag_chg_sample_type);
+ perf_sample__exit(&sample);
+ return ret;
}
static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu,
@@ -3355,6 +3561,49 @@ static int intel_pt_process_itrace_start(struct intel_pt *pt,
event->itrace_start.tid);
}
+/*
+ * Events with data_src are identified by L1_Hit_Indication
+ * refer https://github.com/intel/perfmon
+ */
+static int intel_pt_data_src_fmt(struct intel_pt *pt, struct evsel *evsel)
+{
+ struct perf_env *env = pt->machine->env;
+ int fmt = DATA_SRC_FORMAT_NA;
+
+ if (!env->cpuid)
+ return DATA_SRC_FORMAT_ERR;
+
+ /*
+ * PEBS-via-PT is only supported on E-core non-hybrid. Of those only
+ * Gracemont and Crestmont have data_src. Check for:
+ * Alderlake N (Gracemont)
+ * Sierra Forest (Crestmont)
+ * Grand Ridge (Crestmont)
+ */
+
+ if (!strncmp(env->cpuid, "GenuineIntel,6,190,", 19))
+ fmt = DATA_SRC_FORMAT_GRT;
+
+ if (!strncmp(env->cpuid, "GenuineIntel,6,175,", 19) ||
+ !strncmp(env->cpuid, "GenuineIntel,6,182,", 19))
+ fmt = DATA_SRC_FORMAT_CMT;
+
+ if (fmt == DATA_SRC_FORMAT_NA)
+ return fmt;
+
+ /*
+ * Only data_src events are:
+ * mem-loads event=0xd0,umask=0x5
+ * mem-stores event=0xd0,umask=0x6
+ */
+ if (evsel->core.attr.type == PERF_TYPE_RAW &&
+ ((evsel->core.attr.config & 0xffff) == 0x5d0 ||
+ (evsel->core.attr.config & 0xffff) == 0x6d0))
+ return fmt;
+
+ return DATA_SRC_FORMAT_NA;
+}
+
static int intel_pt_process_aux_output_hw_id(struct intel_pt *pt,
union perf_event *event,
struct perf_sample *sample)
@@ -3375,6 +3624,7 @@ static int intel_pt_process_aux_output_hw_id(struct intel_pt *pt,
ptq->pebs[hw_id].evsel = evsel;
ptq->pebs[hw_id].id = sample->id;
+ ptq->pebs[hw_id].data_src_fmt = intel_pt_data_src_fmt(pt, evsel);
return 0;
}
@@ -3924,6 +4174,7 @@ static void intel_pt_setup_pebs_events(struct intel_pt *pt)
}
pt->single_pebs = true;
pt->sample_pebs = true;
+ pt->pebs_data_src_fmt = intel_pt_data_src_fmt(pt, evsel);
pt->pebs_evsel = evsel;
}
}
diff --git a/tools/perf/util/intel-tpebs.c b/tools/perf/util/intel-tpebs.c
index 50a3c3e07160..2c421b475b3b 100644
--- a/tools/perf/util/intel-tpebs.c
+++ b/tools/perf/util/intel-tpebs.c
@@ -254,7 +254,7 @@ int tpebs_start(struct evlist *evsel_list)
new = zalloc(sizeof(*new));
if (!new) {
ret = -1;
- zfree(name);
+ zfree(&name);
goto err;
}
new->name = name;
diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c
index 346513e5e9b7..624964f01b5f 100644
--- a/tools/perf/util/jitdump.c
+++ b/tools/perf/util/jitdump.c
@@ -516,7 +516,7 @@ static int jit_repipe_code_load(struct jit_buf_desc *jd, union jr_entry *jr)
* create pseudo sample to induce dso hit increment
* use first address as sample address
*/
- memset(&sample, 0, sizeof(sample));
+ perf_sample__init(&sample, /*all=*/true);
sample.cpumode = PERF_RECORD_MISC_USER;
sample.pid = pid;
sample.tid = tid;
@@ -535,6 +535,7 @@ static int jit_repipe_code_load(struct jit_buf_desc *jd, union jr_entry *jr)
build_id__mark_dso_hit(tool, event, &sample, NULL, jd->machine);
out:
+ perf_sample__exit(&sample);
free(event);
return ret;
}
@@ -611,7 +612,7 @@ static int jit_repipe_code_move(struct jit_buf_desc *jd, union jr_entry *jr)
* create pseudo sample to induce dso hit increment
* use first address as sample address
*/
- memset(&sample, 0, sizeof(sample));
+ perf_sample__init(&sample, /*all=*/true);
sample.cpumode = PERF_RECORD_MISC_USER;
sample.pid = pid;
sample.tid = tid;
@@ -620,12 +621,13 @@ static int jit_repipe_code_move(struct jit_buf_desc *jd, union jr_entry *jr)
ret = perf_event__process_mmap2(tool, event, &sample, jd->machine);
if (ret)
- return ret;
+ goto out;
ret = jit_inject_event(jd, event);
if (!ret)
build_id__mark_dso_hit(tool, event, &sample, NULL, jd->machine);
-
+out:
+ perf_sample__exit(&sample);
return ret;
}
@@ -737,7 +739,7 @@ jit_inject(struct jit_buf_desc *jd, const char *path)
* as captured in the RECORD_MMAP record
*/
static int
-jit_detect(const char *mmap_name, pid_t pid, struct nsinfo *nsi)
+jit_detect(const char *mmap_name, pid_t pid, struct nsinfo *nsi, bool *in_pidns)
{
char *p;
char *end = NULL;
@@ -773,11 +775,16 @@ jit_detect(const char *mmap_name, pid_t pid, struct nsinfo *nsi)
if (!end)
return -1;
+ *in_pidns = pid == nsinfo__nstgid(nsi);
/*
* pid does not match mmap pid
* pid==0 in system-wide mode (synthesized)
+ *
+ * If the pid in the file name is equal to the nstgid, then
+ * the agent ran inside a container and perf outside the
+ * container, so record it for further use in jit_inject().
*/
- if (pid && pid2 != nsinfo__nstgid(nsi))
+ if (pid && !(pid2 == pid || *in_pidns))
return -1;
/*
* validate suffix
@@ -830,6 +837,7 @@ jit_process(struct perf_session *session,
struct nsinfo *nsi;
struct evsel *first;
struct jit_buf_desc jd;
+ bool in_pidns = false;
int ret;
thread = machine__findnew_thread(machine, pid, tid);
@@ -844,7 +852,7 @@ jit_process(struct perf_session *session,
/*
* first, detect marker mmap (i.e., the jitdump mmap)
*/
- if (jit_detect(filename, pid, nsi)) {
+ if (jit_detect(filename, pid, nsi, &in_pidns)) {
nsinfo__put(nsi);
/*
@@ -866,6 +874,9 @@ jit_process(struct perf_session *session,
jd.machine = machine;
jd.nsi = nsi;
+ if (in_pidns)
+ nsinfo__set_in_pidns(nsi);
+
/*
* track sample_type to compute id_all layout
* perf sets the same sample type to all events as of now
diff --git a/tools/perf/util/kvm-stat.c b/tools/perf/util/kvm-stat.c
new file mode 100644
index 000000000000..38ace736db5c
--- /dev/null
+++ b/tools/perf/util/kvm-stat.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "debug.h"
+#include "evsel.h"
+#include "kvm-stat.h"
+
+#if defined(HAVE_KVM_STAT_SUPPORT) && defined(HAVE_LIBTRACEEVENT)
+
+bool kvm_exit_event(struct evsel *evsel)
+{
+ return evsel__name_is(evsel, kvm_exit_trace);
+}
+
+void exit_event_get_key(struct evsel *evsel,
+ struct perf_sample *sample,
+ struct event_key *key)
+{
+ key->info = 0;
+ key->key = evsel__intval(evsel, sample, kvm_exit_reason);
+}
+
+
+bool exit_event_begin(struct evsel *evsel,
+ struct perf_sample *sample, struct event_key *key)
+{
+ if (kvm_exit_event(evsel)) {
+ exit_event_get_key(evsel, sample, key);
+ return true;
+ }
+
+ return false;
+}
+
+bool kvm_entry_event(struct evsel *evsel)
+{
+ return evsel__name_is(evsel, kvm_entry_trace);
+}
+
+bool exit_event_end(struct evsel *evsel,
+ struct perf_sample *sample __maybe_unused,
+ struct event_key *key __maybe_unused)
+{
+ return kvm_entry_event(evsel);
+}
+
+static const char *get_exit_reason(struct perf_kvm_stat *kvm,
+ struct exit_reasons_table *tbl,
+ u64 exit_code)
+{
+ while (tbl->reason != NULL) {
+ if (tbl->exit_code == exit_code)
+ return tbl->reason;
+ tbl++;
+ }
+
+ pr_err("unknown kvm exit code:%lld on %s\n",
+ (unsigned long long)exit_code, kvm->exit_reasons_isa);
+ return "UNKNOWN";
+}
+
+void exit_event_decode_key(struct perf_kvm_stat *kvm,
+ struct event_key *key,
+ char *decode)
+{
+ const char *exit_reason = get_exit_reason(kvm, key->exit_reasons,
+ key->key);
+
+ scnprintf(decode, KVM_EVENT_NAME_LEN, "%s", exit_reason);
+}
+
+#endif
diff --git a/tools/perf/util/kvm-stat.h b/tools/perf/util/kvm-stat.h
index 3e9ac754c3d1..4249542544bb 100644
--- a/tools/perf/util/kvm-stat.h
+++ b/tools/perf/util/kvm-stat.h
@@ -115,6 +115,8 @@ struct kvm_reg_events_ops {
struct kvm_events_ops *ops;
};
+#if defined(HAVE_KVM_STAT_SUPPORT) && defined(HAVE_LIBTRACEEVENT)
+
void exit_event_get_key(struct evsel *evsel,
struct perf_sample *sample,
struct event_key *key);
@@ -127,6 +129,7 @@ bool exit_event_end(struct evsel *evsel,
void exit_event_decode_key(struct perf_kvm_stat *kvm,
struct event_key *key,
char *decode);
+#endif
bool kvm_exit_event(struct evsel *evsel);
bool kvm_entry_event(struct evsel *evsel);
diff --git a/tools/perf/util/kwork.h b/tools/perf/util/kwork.h
index 76fe2a821bcf..db00269b73f2 100644
--- a/tools/perf/util/kwork.h
+++ b/tools/perf/util/kwork.h
@@ -1,6 +1,7 @@
#ifndef PERF_UTIL_KWORK_H
#define PERF_UTIL_KWORK_H
+#include "perf.h"
#include "util/tool.h"
#include "util/time-utils.h"
@@ -251,12 +252,14 @@ struct perf_kwork {
* perf kwork top data
*/
struct kwork_top_stat top_stat;
-};
-struct kwork_work *perf_kwork_add_work(struct perf_kwork *kwork,
+ /* Add work callback. */
+ struct kwork_work *(*add_work)(struct perf_kwork *kwork,
struct kwork_class *class,
struct kwork_work *key);
+};
+
#ifdef HAVE_BPF_SKEL
int perf_kwork__trace_prepare_bpf(struct perf_kwork *kwork);
diff --git a/tools/perf/util/llvm-c-helpers.cpp b/tools/perf/util/llvm-c-helpers.cpp
index 663bcaba2041..004081bd12c9 100644
--- a/tools/perf/util/llvm-c-helpers.cpp
+++ b/tools/perf/util/llvm-c-helpers.cpp
@@ -18,7 +18,6 @@
extern "C" {
#include <linux/zalloc.h>
}
-#include "symbol_conf.h"
#include "llvm-c-helpers.h"
extern "C"
diff --git a/tools/perf/util/lock-contention.c b/tools/perf/util/lock-contention.c
new file mode 100644
index 000000000000..92e7b7b572a2
--- /dev/null
+++ b/tools/perf/util/lock-contention.c
@@ -0,0 +1,143 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "debug.h"
+#include "env.h"
+#include "lock-contention.h"
+#include "machine.h"
+#include "symbol.h"
+
+#include <limits.h>
+#include <string.h>
+
+#include <linux/hash.h>
+#include <linux/zalloc.h>
+
+#define __lockhashfn(key) hash_long((unsigned long)key, LOCKHASH_BITS)
+#define lockhashentry(key) (lockhash_table + __lockhashfn((key)))
+
+struct callstack_filter {
+ struct list_head list;
+ char name[];
+};
+
+static LIST_HEAD(callstack_filters);
+struct hlist_head *lockhash_table;
+
+int parse_call_stack(const struct option *opt __maybe_unused, const char *str,
+ int unset __maybe_unused)
+{
+ char *s, *tmp, *tok;
+ int ret = 0;
+
+ s = strdup(str);
+ if (s == NULL)
+ return -1;
+
+ for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) {
+ struct callstack_filter *entry;
+
+ entry = malloc(sizeof(*entry) + strlen(tok) + 1);
+ if (entry == NULL) {
+ pr_err("Memory allocation failure\n");
+ free(s);
+ return -1;
+ }
+
+ strcpy(entry->name, tok);
+ list_add_tail(&entry->list, &callstack_filters);
+ }
+
+ free(s);
+ return ret;
+}
+
+bool needs_callstack(void)
+{
+ return !list_empty(&callstack_filters);
+}
+
+struct lock_stat *lock_stat_find(u64 addr)
+{
+ struct hlist_head *entry = lockhashentry(addr);
+ struct lock_stat *ret;
+
+ hlist_for_each_entry(ret, entry, hash_entry) {
+ if (ret->addr == addr)
+ return ret;
+ }
+ return NULL;
+}
+
+struct lock_stat *lock_stat_findnew(u64 addr, const char *name, int flags)
+{
+ struct hlist_head *entry = lockhashentry(addr);
+ struct lock_stat *ret, *new;
+
+ hlist_for_each_entry(ret, entry, hash_entry) {
+ if (ret->addr == addr)
+ return ret;
+ }
+
+ new = zalloc(sizeof(struct lock_stat));
+ if (!new)
+ goto alloc_failed;
+
+ new->addr = addr;
+ new->name = strdup(name);
+ if (!new->name) {
+ free(new);
+ goto alloc_failed;
+ }
+
+ new->flags = flags;
+ new->wait_time_min = ULLONG_MAX;
+
+ hlist_add_head(&new->hash_entry, entry);
+ return new;
+
+alloc_failed:
+ pr_err("memory allocation failed\n");
+ return NULL;
+}
+
+bool match_callstack_filter(struct machine *machine, u64 *callstack, int max_stack_depth)
+{
+ struct map *kmap;
+ struct symbol *sym;
+ u64 ip;
+ const char *arch = perf_env__arch(machine->env);
+
+ if (list_empty(&callstack_filters))
+ return true;
+
+ for (int i = 0; i < max_stack_depth; i++) {
+ struct callstack_filter *filter;
+
+ /*
+ * In powerpc, the callchain saved by kernel always includes
+ * first three entries as the NIP (next instruction pointer),
+ * LR (link register), and the contents of LR save area in the
+ * second stack frame. In certain scenarios its possible to have
+ * invalid kernel instruction addresses in either LR or the second
+ * stack frame's LR. In that case, kernel will store that address as
+ * zero.
+ *
+ * The below check will continue to look into callstack,
+ * incase first or second callstack index entry has 0
+ * address for powerpc.
+ */
+ if (!callstack || (!callstack[i] && (strcmp(arch, "powerpc") ||
+ (i != 1 && i != 2))))
+ break;
+
+ ip = callstack[i];
+ sym = machine__find_kernel_symbol(machine, ip, &kmap);
+ if (sym == NULL)
+ continue;
+
+ list_for_each_entry(filter, &callstack_filters, list) {
+ if (strstr(sym->name, filter->name))
+ return true;
+ }
+ }
+ return false;
+}
diff --git a/tools/perf/util/lock-contention.h b/tools/perf/util/lock-contention.h
index 1a7248ff3889..b5d916aa49df 100644
--- a/tools/perf/util/lock-contention.h
+++ b/tools/perf/util/lock-contention.h
@@ -10,10 +10,12 @@ struct lock_filter {
int nr_addrs;
int nr_syms;
int nr_cgrps;
+ int nr_slabs;
unsigned int *types;
unsigned long *addrs;
char **syms;
u64 *cgrps;
+ char **slabs;
};
struct lock_stat {
@@ -67,10 +69,11 @@ struct lock_stat {
*/
#define MAX_LOCK_DEPTH 48
-struct lock_stat *lock_stat_find(u64 addr);
-struct lock_stat *lock_stat_findnew(u64 addr, const char *name, int flags);
+/* based on kernel/lockdep.c */
+#define LOCKHASH_BITS 12
+#define LOCKHASH_SIZE (1UL << LOCKHASH_BITS)
-bool match_callstack_filter(struct machine *machine, u64 *callstack);
+extern struct hlist_head *lockhash_table;
/*
* struct lock_seq_stat:
@@ -148,14 +151,25 @@ struct lock_contention {
bool save_callstack;
};
-#ifdef HAVE_BPF_SKEL
+struct option;
+int parse_call_stack(const struct option *opt, const char *str, int unset);
+bool needs_callstack(void);
+
+struct lock_stat *lock_stat_find(u64 addr);
+struct lock_stat *lock_stat_findnew(u64 addr, const char *name, int flags);
+
+bool match_callstack_filter(struct machine *machine, u64 *callstack, int max_stack_depth);
+
+#ifdef HAVE_BPF_SKEL
int lock_contention_prepare(struct lock_contention *con);
int lock_contention_start(void);
int lock_contention_stop(void);
int lock_contention_read(struct lock_contention *con);
int lock_contention_finish(struct lock_contention *con);
+struct lock_stat *pop_owner_stack_trace(struct lock_contention *con);
+
#else /* !HAVE_BPF_SKEL */
static inline int lock_contention_prepare(struct lock_contention *con __maybe_unused)
@@ -175,6 +189,11 @@ static inline int lock_contention_read(struct lock_contention *con __maybe_unuse
return 0;
}
+static inline struct lock_stat *pop_owner_stack_trace(struct lock_contention *con __maybe_unused)
+{
+ return NULL;
+}
+
#endif /* HAVE_BPF_SKEL */
#endif /* PERF_LOCK_CONTENTION_H */
diff --git a/tools/perf/util/lzma.c b/tools/perf/util/lzma.c
index af9a97612f9d..bbcd2ffcf4bd 100644
--- a/tools/perf/util/lzma.c
+++ b/tools/perf/util/lzma.c
@@ -32,7 +32,7 @@ static const char *lzma_strerror(lzma_ret ret)
}
}
-int lzma_decompress_to_file(const char *input, int output_fd)
+int lzma_decompress_stream_to_file(FILE *infile, int output_fd)
{
lzma_action action = LZMA_RUN;
lzma_stream strm = LZMA_STREAM_INIT;
@@ -41,18 +41,11 @@ int lzma_decompress_to_file(const char *input, int output_fd)
u8 buf_in[BUFSIZE];
u8 buf_out[BUFSIZE];
- FILE *infile;
-
- infile = fopen(input, "rb");
- if (!infile) {
- pr_debug("lzma: fopen failed on %s: '%s'\n", input, strerror(errno));
- return -1;
- }
ret = lzma_stream_decoder(&strm, UINT64_MAX, LZMA_CONCATENATED);
if (ret != LZMA_OK) {
pr_debug("lzma: lzma_stream_decoder failed %s (%d)\n", lzma_strerror(ret), ret);
- goto err_fclose;
+ return err;
}
strm.next_in = NULL;
@@ -100,11 +93,25 @@ int lzma_decompress_to_file(const char *input, int output_fd)
err = 0;
err_lzma_end:
lzma_end(&strm);
-err_fclose:
- fclose(infile);
return err;
}
+int lzma_decompress_to_file(const char *input, int output_fd)
+{
+ FILE *infile;
+ int ret;
+
+ infile = fopen(input, "rb");
+ if (!infile) {
+ pr_debug("lzma: fopen failed on %s: '%s'\n", input, strerror(errno));
+ return -1;
+ }
+
+ ret = lzma_decompress_stream_to_file(infile, output_fd);
+ fclose(infile);
+ return ret;
+}
+
bool lzma_is_compressed(const char *input)
{
int fd = open(input, O_RDONLY);
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 9be2f4479f52..b048165b10c1 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -37,6 +37,7 @@
#include <internal/lib.h> // page_size
#include "cgroup.h"
#include "arm64-frame-pointer-unwind-support.h"
+#include <api/io_dir.h>
#include <linux/ctype.h>
#include <symbol/kallsyms.h>
@@ -94,6 +95,8 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
machine->comm_exec = false;
machine->kernel_start = 0;
machine->vmlinux_map = NULL;
+ /* There is no initial context switch in, so we start at 1. */
+ machine->parallelism = 1;
machine->root_dir = strdup(root_dir);
if (machine->root_dir == NULL)
@@ -677,8 +680,11 @@ int machine__process_aux_output_hw_id_event(struct machine *machine __maybe_unus
int machine__process_switch_event(struct machine *machine __maybe_unused,
union perf_event *event)
{
+ bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT;
+
if (dump_trace)
perf_event__fprintf_switch(event, stdout);
+ machine->parallelism += out ? -1 : 1;
return 0;
}
@@ -712,7 +718,7 @@ static int machine__process_ksymbol_register(struct machine *machine,
map__set_start(map, event->ksymbol.addr);
map__set_end(map, map__start(map) + event->ksymbol.len);
- err = maps__insert(machine__kernel_maps(machine), map);
+ err = maps__fixup_overlap_and_insert(machine__kernel_maps(machine), map);
if (err) {
err = -ENOMEM;
goto out;
@@ -773,6 +779,10 @@ int machine__process_ksymbol(struct machine *machine __maybe_unused,
if (dump_trace)
perf_event__fprintf_ksymbol(event, stdout);
+ /* no need to process non-JIT BPF as it cannot get samples */
+ if (event->ksymbol.len == 0)
+ return 0;
+
if (event->ksymbol.flags & PERF_RECORD_KSYMBOL_FLAGS_UNREGISTER)
return machine__process_ksymbol_unregister(machine, event,
sample);
@@ -886,26 +896,6 @@ size_t machines__fprintf_dsos_buildid(struct machines *machines, FILE *fp,
return ret;
}
-size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp)
-{
- int i;
- size_t printed = 0;
- struct dso *kdso = machine__kernel_dso(machine);
-
- if (dso__has_build_id(kdso)) {
- char filename[PATH_MAX];
-
- if (dso__build_id_filename(kdso, filename, sizeof(filename), false))
- printed += fprintf(fp, "[0] %s\n", filename);
- }
-
- for (i = 0; i < vmlinux_path__nr_entries; ++i) {
- printed += fprintf(fp, "[%d] %s\n", i + dso__has_build_id(kdso),
- vmlinux_path[i]);
- }
- return printed;
-}
-
struct machine_fprintf_cb_args {
FILE *fp;
size_t printed;
@@ -1352,27 +1342,24 @@ static int maps__set_module_path(struct maps *maps, const char *path, struct kmo
return 0;
}
-static int maps__set_modules_path_dir(struct maps *maps, const char *dir_name, int depth)
+static int maps__set_modules_path_dir(struct maps *maps, char *path, size_t path_size, int depth)
{
- struct dirent *dent;
- DIR *dir = opendir(dir_name);
+ struct io_dirent64 *dent;
+ struct io_dir iod;
+ size_t root_len = strlen(path);
int ret = 0;
- if (!dir) {
- pr_debug("%s: cannot open %s dir\n", __func__, dir_name);
+ io_dir__init(&iod, open(path, O_CLOEXEC | O_DIRECTORY | O_RDONLY));
+ if (iod.dirfd < 0) {
+ pr_debug("%s: cannot open %s dir\n", __func__, path);
return -1;
}
-
- while ((dent = readdir(dir)) != NULL) {
- char path[PATH_MAX];
- struct stat st;
-
- /*sshfs might return bad dent->d_type, so we have to stat*/
- path__join(path, sizeof(path), dir_name, dent->d_name);
- if (stat(path, &st))
- continue;
-
- if (S_ISDIR(st.st_mode)) {
+ /* Bounds check, should never happen. */
+ if (root_len >= path_size)
+ return -1;
+ path[root_len++] = '/';
+ while ((dent = io_dir__readdir(&iod)) != NULL) {
+ if (io_dir__is_dir(&iod, dent)) {
if (!strcmp(dent->d_name, ".") ||
!strcmp(dent->d_name, ".."))
continue;
@@ -1384,7 +1371,12 @@ static int maps__set_modules_path_dir(struct maps *maps, const char *dir_name, i
continue;
}
- ret = maps__set_modules_path_dir(maps, path, depth + 1);
+ /* Bounds check, should never happen. */
+ if (root_len + strlen(dent->d_name) >= path_size)
+ continue;
+
+ strcpy(path + root_len, dent->d_name);
+ ret = maps__set_modules_path_dir(maps, path, path_size, depth + 1);
if (ret < 0)
goto out;
} else {
@@ -1394,9 +1386,14 @@ static int maps__set_modules_path_dir(struct maps *maps, const char *dir_name, i
if (ret)
goto out;
- if (m.kmod)
- ret = maps__set_module_path(maps, path, &m);
+ if (m.kmod) {
+ /* Bounds check, should never happen. */
+ if (root_len + strlen(dent->d_name) < path_size) {
+ strcpy(path + root_len, dent->d_name);
+ ret = maps__set_module_path(maps, path, &m);
+ }
+ }
zfree(&m.name);
if (ret)
@@ -1405,7 +1402,7 @@ static int maps__set_modules_path_dir(struct maps *maps, const char *dir_name, i
}
out:
- closedir(dir);
+ close(iod.dirfd);
return ret;
}
@@ -1422,7 +1419,8 @@ static int machine__set_modules_path(struct machine *machine)
machine->root_dir, version);
free(version);
- return maps__set_modules_path_dir(machine__kernel_maps(machine), modules_path, 0);
+ return maps__set_modules_path_dir(machine__kernel_maps(machine),
+ modules_path, sizeof(modules_path), 0);
}
int __weak arch__fix_module_text_start(u64 *start __maybe_unused,
u64 *size __maybe_unused,
@@ -1561,6 +1559,8 @@ int machine__create_kernel_maps(struct machine *machine)
}
}
+ maps__fixup_end(machine__kernel_maps(machine));
+
out_put:
dso__put(kernel);
return ret;
@@ -1898,6 +1898,8 @@ int machine__process_exit_event(struct machine *machine, union perf_event *event
if (dump_trace)
perf_event__fprintf_task(event, stdout);
+ /* There is no context switch out before exit, so we decrement here. */
+ machine->parallelism--;
if (thread != NULL) {
if (symbol_conf.keep_exited_threads)
thread__set_exited(thread, /*exited=*/true);
@@ -1974,7 +1976,7 @@ static void ip__resolve_ams(struct thread *thread,
* Thus, we have to try consecutively until we find a match
* or else, the symbol is unknown
*/
- thread__find_cpumode_addr_location(thread, ip, &al);
+ thread__find_cpumode_addr_location(thread, ip, /*symbols=*/true, &al);
ams->addr = ip;
ams->al_addr = al.addr;
@@ -2076,7 +2078,7 @@ static int add_callchain_ip(struct thread *thread,
al.sym = NULL;
al.srcline = NULL;
if (!cpumode) {
- thread__find_cpumode_addr_location(thread, ip, &al);
+ thread__find_cpumode_addr_location(thread, ip, symbols, &al);
} else {
if (ip >= PERF_CONTEXT_MAX) {
switch (ip) {
@@ -2104,6 +2106,8 @@ static int add_callchain_ip(struct thread *thread,
}
if (symbols)
thread__find_symbol(thread, *cpumode, ip, &al);
+ else
+ thread__find_map(thread, *cpumode, ip, &al);
}
if (al.sym != NULL) {
@@ -2927,8 +2931,8 @@ static int thread__resolve_callchain_unwind(struct thread *thread,
return 0;
/* Bail out if nothing was captured. */
- if ((!sample->user_regs.regs) ||
- (!sample->user_stack.size))
+ if (!sample->user_regs || !sample->user_regs->regs ||
+ !sample->user_stack.size)
return 0;
if (!symbols)
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index 2e5a4cb342d8..b56abec84fed 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -50,6 +50,12 @@ struct machine {
u64 text_start;
u64 text_end;
} sched, lock, traceiter, trace;
+ /*
+ * The current parallelism level (number of threads that run on CPUs).
+ * This value can be less than 1, or larger than the total number
+ * of CPUs, if events are poorly ordered.
+ */
+ int parallelism;
pid_t *current_tid;
size_t current_tid_sz;
union { /* Tool specific area */
@@ -266,8 +272,6 @@ int machines__create_kernel_maps(struct machines *machines, pid_t pid);
int machines__create_guest_kernel_maps(struct machines *machines);
void machines__destroy_kernel_maps(struct machines *machines);
-size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp);
-
typedef int (*machine__dso_t)(struct dso *dso, struct machine *machine, void *priv);
int machine__for_each_dso(struct machine *machine, machine__dso_t fn,
diff --git a/tools/perf/util/maps.c b/tools/perf/util/maps.c
index 09c9cc326c08..0b40d901675e 100644
--- a/tools/perf/util/maps.c
+++ b/tools/perf/util/maps.c
@@ -428,11 +428,29 @@ static unsigned int maps__by_name_index(const struct maps *maps, const struct ma
return -1;
}
+static void map__set_kmap_maps(struct map *map, struct maps *maps)
+{
+ struct dso *dso;
+
+ if (map == NULL)
+ return;
+
+ dso = map__dso(map);
+
+ if (dso && dso__kernel(dso)) {
+ struct kmap *kmap = map__kmap(map);
+
+ if (kmap)
+ kmap->kmaps = maps;
+ else
+ pr_err("Internal error: kernel dso with non kernel map\n");
+ }
+}
+
static int __maps__insert(struct maps *maps, struct map *new)
{
struct map **maps_by_address = maps__maps_by_address(maps);
struct map **maps_by_name = maps__maps_by_name(maps);
- const struct dso *dso = map__dso(new);
unsigned int nr_maps = maps__nr_maps(maps);
unsigned int nr_allocate = RC_CHK_ACCESS(maps)->nr_maps_allocated;
@@ -483,14 +501,9 @@ static int __maps__insert(struct maps *maps, struct map *new)
}
if (map__end(new) < map__start(new))
RC_CHK_ACCESS(maps)->ends_broken = true;
- if (dso && dso__kernel(dso)) {
- struct kmap *kmap = map__kmap(new);
- if (kmap)
- kmap->kmaps = maps;
- else
- pr_err("Internal error: kernel dso with non kernel map\n");
- }
+ map__set_kmap_maps(new, maps);
+
return 0;
}
@@ -785,6 +798,9 @@ static int __maps__insert_sorted(struct maps *maps, unsigned int first_after_ind
}
RC_CHK_ACCESS(maps)->nr_maps = nr_maps + to_add;
maps__set_maps_by_name_sorted(maps, false);
+ map__set_kmap_maps(new1, maps);
+ map__set_kmap_maps(new2, maps);
+
check_invariants(maps);
return 0;
}
@@ -797,7 +813,7 @@ static int __maps__fixup_overlap_and_insert(struct maps *maps, struct map *new)
{
int err = 0;
FILE *fp = debug_file();
- unsigned int i;
+ unsigned int i, ni = INT_MAX; // Some gcc complain, but depends on maps_by_name...
if (!maps__maps_by_address_sorted(maps))
__maps__sort_by_address(maps);
@@ -808,6 +824,7 @@ static int __maps__fixup_overlap_and_insert(struct maps *maps, struct map *new)
*/
for (i = first_ending_after(maps, new); i < maps__nr_maps(maps); ) {
struct map **maps_by_address = maps__maps_by_address(maps);
+ struct map **maps_by_name = maps__maps_by_name(maps);
struct map *pos = maps_by_address[i];
struct map *before = NULL, *after = NULL;
@@ -827,6 +844,9 @@ static int __maps__fixup_overlap_and_insert(struct maps *maps, struct map *new)
map__fprintf(pos, fp);
}
+ if (maps_by_name)
+ ni = maps__by_name_index(maps, pos);
+
/*
* Now check if we need to create new maps for areas not
* overlapped by the new map:
@@ -871,6 +891,12 @@ static int __maps__fixup_overlap_and_insert(struct maps *maps, struct map *new)
if (before) {
map__put(maps_by_address[i]);
maps_by_address[i] = before;
+
+ if (maps_by_name) {
+ map__put(maps_by_name[ni]);
+ maps_by_name[ni] = map__get(before);
+ }
+
/* Maps are still ordered, go to next one. */
i++;
if (after) {
@@ -892,6 +918,12 @@ static int __maps__fixup_overlap_and_insert(struct maps *maps, struct map *new)
*/
map__put(maps_by_address[i]);
maps_by_address[i] = map__get(new);
+
+ if (maps_by_name) {
+ map__put(maps_by_name[ni]);
+ maps_by_name[ni] = map__get(new);
+ }
+
err = __maps__insert_sorted(maps, i + 1, after, NULL);
map__put(after);
check_invariants(maps);
@@ -910,6 +942,14 @@ static int __maps__fixup_overlap_and_insert(struct maps *maps, struct map *new)
*/
map__put(maps_by_address[i]);
maps_by_address[i] = map__get(new);
+
+ if (maps_by_name) {
+ map__put(maps_by_name[ni]);
+ maps_by_name[ni] = map__get(new);
+ }
+
+ map__set_kmap_maps(new, maps);
+
check_invariants(maps);
return err;
}
diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c
index bf5090f5220b..884d9aebce91 100644
--- a/tools/perf/util/mem-events.c
+++ b/tools/perf/util/mem-events.c
@@ -31,9 +31,6 @@ struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = {
bool perf_mem_record[PERF_MEM_EVENTS__MAX] = { 0 };
-static char mem_loads_name[100];
-static char mem_stores_name[100];
-
struct perf_mem_event *perf_pmu__mem_events_ptr(struct perf_pmu *pmu, int i)
{
if (i >= PERF_MEM_EVENTS__MAX || !pmu)
@@ -81,7 +78,8 @@ int perf_pmu__mem_events_num_mem_pmus(struct perf_pmu *pmu)
return num;
}
-static const char *perf_pmu__mem_events_name(int i, struct perf_pmu *pmu)
+static const char *perf_pmu__mem_events_name(struct perf_pmu *pmu, int i,
+ char *buf, size_t buf_size)
{
struct perf_mem_event *e;
@@ -96,31 +94,31 @@ static const char *perf_pmu__mem_events_name(int i, struct perf_pmu *pmu)
if (e->ldlat) {
if (!e->aux_event) {
/* ARM and Most of Intel */
- scnprintf(mem_loads_name, sizeof(mem_loads_name),
+ scnprintf(buf, buf_size,
e->name, pmu->name,
perf_mem_events__loads_ldlat);
} else {
/* Intel with mem-loads-aux event */
- scnprintf(mem_loads_name, sizeof(mem_loads_name),
+ scnprintf(buf, buf_size,
e->name, pmu->name, pmu->name,
perf_mem_events__loads_ldlat);
}
} else {
if (!e->aux_event) {
/* AMD and POWER */
- scnprintf(mem_loads_name, sizeof(mem_loads_name),
+ scnprintf(buf, buf_size,
e->name, pmu->name);
- } else
+ } else {
return NULL;
+ }
}
-
- return mem_loads_name;
+ return buf;
}
if (i == PERF_MEM_EVENTS__STORE) {
- scnprintf(mem_stores_name, sizeof(mem_stores_name),
+ scnprintf(buf, buf_size,
e->name, pmu->name);
- return mem_stores_name;
+ return buf;
}
return NULL;
@@ -189,7 +187,7 @@ static bool perf_pmu__mem_events_supported(const char *mnt, struct perf_pmu *pmu
if (!e->event_name)
return true;
- scnprintf(path, PATH_MAX, "%s/devices/%s/events/%s", mnt, pmu->name, e->event_name);
+ scnprintf(path, PATH_MAX, "%s/bus/event_source/devices/%s/events/%s", mnt, pmu->name, e->event_name);
return !stat(path, &st);
}
@@ -238,66 +236,87 @@ void perf_pmu__mem_events_list(struct perf_pmu *pmu)
int j;
for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
+ char buf[128];
struct perf_mem_event *e = perf_pmu__mem_events_ptr(pmu, j);
fprintf(stderr, "%-*s%-*s%s",
e->tag ? 13 : 0,
e->tag ? : "",
e->tag && verbose > 0 ? 25 : 0,
- e->tag && verbose > 0 ? perf_pmu__mem_events_name(j, pmu) : "",
+ e->tag && verbose > 0
+ ? perf_pmu__mem_events_name(pmu, j, buf, sizeof(buf))
+ : "",
e->supported ? ": available\n" : "");
}
}
-int perf_mem_events__record_args(const char **rec_argv, int *argv_nr)
+int perf_mem_events__record_args(const char **rec_argv, int *argv_nr, char **event_name_storage_out)
{
const char *mnt = sysfs__mount();
struct perf_pmu *pmu = NULL;
- struct perf_mem_event *e;
int i = *argv_nr;
- const char *s;
- char *copy;
struct perf_cpu_map *cpu_map = NULL;
+ size_t event_name_storage_size =
+ perf_pmu__mem_events_num_mem_pmus(NULL) * PERF_MEM_EVENTS__MAX * 128;
+ size_t event_name_storage_remaining = event_name_storage_size;
+ char *event_name_storage = malloc(event_name_storage_size);
+ char *event_name_storage_ptr = event_name_storage;
+
+ if (!event_name_storage)
+ return -ENOMEM;
+ *event_name_storage_out = NULL;
while ((pmu = perf_pmus__scan_mem(pmu)) != NULL) {
for (int j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
- e = perf_pmu__mem_events_ptr(pmu, j);
+ const char *s;
+ struct perf_mem_event *e = perf_pmu__mem_events_ptr(pmu, j);
+ int ret;
if (!perf_mem_record[j])
continue;
if (!e->supported) {
+ char buf[128];
+
pr_err("failed: event '%s' not supported\n",
- perf_pmu__mem_events_name(j, pmu));
+ perf_pmu__mem_events_name(pmu, j, buf, sizeof(buf)));
+ free(event_name_storage);
return -1;
}
- s = perf_pmu__mem_events_name(j, pmu);
+ s = perf_pmu__mem_events_name(pmu, j, event_name_storage_ptr,
+ event_name_storage_remaining);
if (!s || !perf_pmu__mem_events_supported(mnt, pmu, e))
continue;
- copy = strdup(s);
- if (!copy)
- return -1;
-
rec_argv[i++] = "-e";
- rec_argv[i++] = copy;
-
- cpu_map = perf_cpu_map__merge(cpu_map, pmu->cpus);
+ rec_argv[i++] = event_name_storage_ptr;
+ event_name_storage_remaining -= strlen(event_name_storage_ptr) + 1;
+ event_name_storage_ptr += strlen(event_name_storage_ptr) + 1;
+
+ ret = perf_cpu_map__merge(&cpu_map, pmu->cpus);
+ if (ret < 0) {
+ free(event_name_storage);
+ return ret;
+ }
}
}
if (cpu_map) {
- if (!perf_cpu_map__equal(cpu_map, cpu_map__online())) {
+ struct perf_cpu_map *online = cpu_map__online();
+
+ if (!perf_cpu_map__equal(cpu_map, online)) {
char buf[200];
cpu_map__snprint(cpu_map, buf, sizeof(buf));
pr_warning("Memory events are enabled on a subset of CPUs: %s\n", buf);
}
+ perf_cpu_map__put(online);
perf_cpu_map__put(cpu_map);
}
*argv_nr = i;
+ *event_name_storage_out = event_name_storage;
return 0;
}
diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h
index 8dc27db9fd52..a5c19d39ee37 100644
--- a/tools/perf/util/mem-events.h
+++ b/tools/perf/util/mem-events.h
@@ -38,7 +38,8 @@ int perf_pmu__mem_events_num_mem_pmus(struct perf_pmu *pmu);
bool is_mem_loads_aux_event(struct evsel *leader);
void perf_pmu__mem_events_list(struct perf_pmu *pmu);
-int perf_mem_events__record_args(const char **rec_argv, int *argv_nr);
+int perf_mem_events__record_args(const char **rec_argv, int *argv_nr,
+ char **event_name_storage_out);
int perf_mem__tlb_scnprintf(char *out, size_t sz, const struct mem_info *mem_info);
int perf_mem__lvl_scnprintf(char *out, size_t sz, const struct mem_info *mem_info);
diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index 43b02293f1d2..a34726219af3 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -244,9 +244,8 @@ static void build_node_mask(int node, struct mmap_cpu_mask *mask)
{
int idx, nr_cpus;
struct perf_cpu cpu;
- const struct perf_cpu_map *cpu_map = NULL;
+ struct perf_cpu_map *cpu_map = cpu_map__online();
- cpu_map = cpu_map__online();
if (!cpu_map)
return;
@@ -256,6 +255,7 @@ static void build_node_mask(int node, struct mmap_cpu_mask *mask)
if (cpu__get_node(cpu) == node)
__set_bit(cpu.cpu, mask->bits);
}
+ perf_cpu_map__put(cpu_map);
}
static int perf_mmap__setup_affinity_mask(struct mmap *map, struct mmap_params *mp)
@@ -356,14 +356,3 @@ int perf_mmap__push(struct mmap *md, void *to,
out:
return rc;
}
-
-int mmap_cpu_mask__duplicate(struct mmap_cpu_mask *original, struct mmap_cpu_mask *clone)
-{
- clone->nbits = original->nbits;
- clone->bits = bitmap_zalloc(original->nbits);
- if (!clone->bits)
- return -ENOMEM;
-
- memcpy(clone->bits, original->bits, MMAP_CPU_MASK_BYTES(original));
- return 0;
-}
diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h
index 0df6e1621c7e..4d72c5fa5084 100644
--- a/tools/perf/util/mmap.h
+++ b/tools/perf/util/mmap.h
@@ -61,7 +61,4 @@ size_t mmap__mmap_len(struct mmap *map);
void mmap_cpu_mask__scnprintf(struct mmap_cpu_mask *mask, const char *tag);
-int mmap_cpu_mask__duplicate(struct mmap_cpu_mask *original,
- struct mmap_cpu_mask *clone);
-
#endif /*__PERF_MMAP_H */
diff --git a/tools/perf/util/mutex.h b/tools/perf/util/mutex.h
index 40661120cacc..62d258c71ded 100644
--- a/tools/perf/util/mutex.h
+++ b/tools/perf/util/mutex.h
@@ -33,6 +33,12 @@
/* Documents if a type is a lockable type. */
#define LOCKABLE __attribute__((lockable))
+/* Documents a function that expects a lock not to be held prior to entry. */
+#define LOCKS_EXCLUDED(...) __attribute__((locks_excluded(__VA_ARGS__)))
+
+/* Documents a function that returns a lock. */
+#define LOCK_RETURNED(x) __attribute__((lock_returned(x)))
+
/* Documents functions that acquire a lock in the body of a function, and do not release it. */
#define EXCLUSIVE_LOCK_FUNCTION(...) __attribute__((exclusive_lock_function(__VA_ARGS__)))
@@ -57,6 +63,8 @@
#define GUARDED_BY(x)
#define PT_GUARDED_BY(x)
#define LOCKABLE
+#define LOCKS_EXCLUDED(...)
+#define LOCK_RETURNED(x)
#define EXCLUSIVE_LOCK_FUNCTION(...)
#define UNLOCK_FUNCTION(...)
#define EXCLUSIVE_TRYLOCK_FUNCTION(...)
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index afeb8d815bbf..5152fd5a6ead 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -17,6 +17,7 @@
#include "strbuf.h"
#include "debug.h"
#include <api/fs/tracing_path.h>
+#include <api/io_dir.h>
#include <perf/cpumap.h>
#include <util/parse-events-bison.h>
#include <util/parse-events-flex.h>
@@ -489,7 +490,6 @@ int parse_events_add_cache(struct list_head *list, int *idx, const char *name,
return found_supported ? 0 : -EINVAL;
}
-#ifdef HAVE_LIBTRACEEVENT
static void tracepoint_error(struct parse_events_error *e, int err,
const char *sys, const char *name, int column)
{
@@ -555,8 +555,8 @@ static int add_tracepoint_multi_event(struct parse_events_state *parse_state,
struct parse_events_terms *head_config, YYLTYPE *loc)
{
char *evt_path;
- struct dirent *evt_ent;
- DIR *evt_dir;
+ struct io_dirent64 *evt_ent;
+ struct io_dir evt_dir;
int ret = 0, found = 0;
evt_path = get_events_file(sys_name);
@@ -564,14 +564,14 @@ static int add_tracepoint_multi_event(struct parse_events_state *parse_state,
tracepoint_error(err, errno, sys_name, evt_name, loc->first_column);
return -1;
}
- evt_dir = opendir(evt_path);
- if (!evt_dir) {
+ io_dir__init(&evt_dir, open(evt_path, O_CLOEXEC | O_DIRECTORY | O_RDONLY));
+ if (evt_dir.dirfd < 0) {
put_events_file(evt_path);
tracepoint_error(err, errno, sys_name, evt_name, loc->first_column);
return -1;
}
- while (!ret && (evt_ent = readdir(evt_dir))) {
+ while (!ret && (evt_ent = io_dir__readdir(&evt_dir))) {
if (!strcmp(evt_ent->d_name, ".")
|| !strcmp(evt_ent->d_name, "..")
|| !strcmp(evt_ent->d_name, "enable")
@@ -593,7 +593,7 @@ static int add_tracepoint_multi_event(struct parse_events_state *parse_state,
}
put_events_file(evt_path);
- closedir(evt_dir);
+ close(evt_dir.dirfd);
return ret;
}
@@ -616,17 +616,23 @@ static int add_tracepoint_multi_sys(struct parse_events_state *parse_state,
struct parse_events_error *err,
struct parse_events_terms *head_config, YYLTYPE *loc)
{
- struct dirent *events_ent;
- DIR *events_dir;
+ struct io_dirent64 *events_ent;
+ struct io_dir events_dir;
int ret = 0;
+ char *events_dir_path = get_tracing_file("events");
- events_dir = tracing_events__opendir();
- if (!events_dir) {
+ if (!events_dir_path) {
+ tracepoint_error(err, errno, sys_name, evt_name, loc->first_column);
+ return -1;
+ }
+ io_dir__init(&events_dir, open(events_dir_path, O_CLOEXEC | O_DIRECTORY | O_RDONLY));
+ put_events_file(events_dir_path);
+ if (events_dir.dirfd < 0) {
tracepoint_error(err, errno, sys_name, evt_name, loc->first_column);
return -1;
}
- while (!ret && (events_ent = readdir(events_dir))) {
+ while (!ret && (events_ent = io_dir__readdir(&events_dir))) {
if (!strcmp(events_ent->d_name, ".")
|| !strcmp(events_ent->d_name, "..")
|| !strcmp(events_ent->d_name, "enable")
@@ -640,11 +646,9 @@ static int add_tracepoint_multi_sys(struct parse_events_state *parse_state,
ret = add_tracepoint_event(parse_state, list, events_ent->d_name,
evt_name, err, head_config, loc);
}
-
- closedir(events_dir);
+ close(events_dir.dirfd);
return ret;
}
-#endif /* HAVE_LIBTRACEEVENT */
size_t default_breakpoint_len(void)
{
@@ -795,6 +799,7 @@ const char *parse_events__term_type_str(enum parse_events__term_type term_type)
[PARSE_EVENTS__TERM_TYPE_DRV_CFG] = "driver-config",
[PARSE_EVENTS__TERM_TYPE_PERCORE] = "percore",
[PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT] = "aux-output",
+ [PARSE_EVENTS__TERM_TYPE_AUX_ACTION] = "aux-action",
[PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE] = "aux-sample-size",
[PARSE_EVENTS__TERM_TYPE_METRIC_ID] = "metric-id",
[PARSE_EVENTS__TERM_TYPE_RAW] = "raw",
@@ -844,6 +849,7 @@ config_term_avail(enum parse_events__term_type term_type, struct parse_events_er
case PARSE_EVENTS__TERM_TYPE_OVERWRITE:
case PARSE_EVENTS__TERM_TYPE_DRV_CFG:
case PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT:
+ case PARSE_EVENTS__TERM_TYPE_AUX_ACTION:
case PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE:
case PARSE_EVENTS__TERM_TYPE_RAW:
case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE:
@@ -963,6 +969,9 @@ do { \
case PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT:
CHECK_TYPE_VAL(NUM);
break;
+ case PARSE_EVENTS__TERM_TYPE_AUX_ACTION:
+ CHECK_TYPE_VAL(STR);
+ break;
case PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE:
CHECK_TYPE_VAL(NUM);
if (term->val.num > UINT_MAX) {
@@ -1066,7 +1075,6 @@ static int config_term_pmu(struct perf_event_attr *attr,
return config_term_common(attr, term, err);
}
-#ifdef HAVE_LIBTRACEEVENT
static int config_term_tracepoint(struct perf_event_attr *attr,
struct parse_events_term *term,
struct parse_events_error *err)
@@ -1081,6 +1089,7 @@ static int config_term_tracepoint(struct perf_event_attr *attr,
case PARSE_EVENTS__TERM_TYPE_OVERWRITE:
case PARSE_EVENTS__TERM_TYPE_NOOVERWRITE:
case PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT:
+ case PARSE_EVENTS__TERM_TYPE_AUX_ACTION:
case PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE:
return config_term_common(attr, term, err);
case PARSE_EVENTS__TERM_TYPE_USER:
@@ -1111,7 +1120,6 @@ static int config_term_tracepoint(struct perf_event_attr *attr,
return 0;
}
-#endif
static int config_attr(struct perf_event_attr *attr,
const struct parse_events_terms *head,
@@ -1217,6 +1225,9 @@ do { \
ADD_CONFIG_TERM_VAL(AUX_OUTPUT, aux_output,
term->val.num ? 1 : 0, term->weak);
break;
+ case PARSE_EVENTS__TERM_TYPE_AUX_ACTION:
+ ADD_CONFIG_TERM_STR(AUX_ACTION, term->val.str, term->weak);
+ break;
case PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE:
ADD_CONFIG_TERM_VAL(AUX_SAMPLE_SIZE, aux_sample_size,
term->val.num, term->weak);
@@ -1279,6 +1290,7 @@ static int get_config_chgs(struct perf_pmu *pmu, struct parse_events_terms *head
case PARSE_EVENTS__TERM_TYPE_DRV_CFG:
case PARSE_EVENTS__TERM_TYPE_PERCORE:
case PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT:
+ case PARSE_EVENTS__TERM_TYPE_AUX_ACTION:
case PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE:
case PARSE_EVENTS__TERM_TYPE_METRIC_ID:
case PARSE_EVENTS__TERM_TYPE_RAW:
@@ -1303,7 +1315,7 @@ int parse_events_add_tracepoint(struct parse_events_state *parse_state,
struct parse_events_terms *head_config, void *loc_)
{
YYLTYPE *loc = loc_;
-#ifdef HAVE_LIBTRACEEVENT
+
if (head_config) {
struct perf_event_attr attr;
@@ -1318,16 +1330,6 @@ int parse_events_add_tracepoint(struct parse_events_state *parse_state,
else
return add_tracepoint_event(parse_state, list, sys, event,
err, head_config, loc);
-#else
- (void)parse_state;
- (void)list;
- (void)sys;
- (void)event;
- (void)head_config;
- parse_events_error__handle(err, loc->first_column, strdup("unsupported tracepoint"),
- strdup("libtraceevent is necessary for tracepoint support"));
- return -1;
-#endif
}
static int __parse_events_add_numeric(struct parse_events_state *parse_state,
@@ -1664,7 +1666,7 @@ int parse_events_multi_pmu_add_or_add_pmu(struct parse_events_state *parse_state
/* Failed to add, try wildcard expansion of event_or_pmu as a PMU name. */
while ((pmu = perf_pmus__scan(pmu)) != NULL) {
if (!parse_events__filter_pmu(parse_state, pmu) &&
- perf_pmu__match(pmu, event_or_pmu)) {
+ perf_pmu__wildcard_match(pmu, event_or_pmu)) {
bool auto_merge_stats = perf_pmu__auto_merge_stats(pmu);
if (!parse_events_add_pmu(parse_state, *listp, pmu,
@@ -1978,48 +1980,55 @@ static int evlist__cmp(void *_fg_idx, const struct list_head *l, const struct li
int *force_grouped_idx = _fg_idx;
int lhs_sort_idx, rhs_sort_idx, ret;
const char *lhs_pmu_name, *rhs_pmu_name;
- bool lhs_has_group, rhs_has_group;
/*
- * First sort by grouping/leader. Read the leader idx only if the evsel
- * is part of a group, by default ungrouped events will be sorted
- * relative to grouped events based on where the first ungrouped event
- * occurs. If both events don't have a group we want to fall-through to
- * the arch specific sorting, that can reorder and fix things like
- * Intel's topdown events.
+ * Get the indexes of the 2 events to sort. If the events are
+ * in groups then the leader's index is used otherwise the
+ * event's index is used. An index may be forced for events that
+ * must be in the same group, namely Intel topdown events.
*/
- if (lhs_core->leader != lhs_core || lhs_core->nr_members > 1) {
- lhs_has_group = true;
- lhs_sort_idx = lhs_core->leader->idx;
+ if (*force_grouped_idx != -1 && arch_evsel__must_be_in_group(lhs)) {
+ lhs_sort_idx = *force_grouped_idx;
} else {
- lhs_has_group = false;
- lhs_sort_idx = *force_grouped_idx != -1 && arch_evsel__must_be_in_group(lhs)
- ? *force_grouped_idx
- : lhs_core->idx;
- }
- if (rhs_core->leader != rhs_core || rhs_core->nr_members > 1) {
- rhs_has_group = true;
- rhs_sort_idx = rhs_core->leader->idx;
+ bool lhs_has_group = lhs_core->leader != lhs_core || lhs_core->nr_members > 1;
+
+ lhs_sort_idx = lhs_has_group ? lhs_core->leader->idx : lhs_core->idx;
+ }
+ if (*force_grouped_idx != -1 && arch_evsel__must_be_in_group(rhs)) {
+ rhs_sort_idx = *force_grouped_idx;
} else {
- rhs_has_group = false;
- rhs_sort_idx = *force_grouped_idx != -1 && arch_evsel__must_be_in_group(rhs)
- ? *force_grouped_idx
- : rhs_core->idx;
+ bool rhs_has_group = rhs_core->leader != rhs_core || rhs_core->nr_members > 1;
+
+ rhs_sort_idx = rhs_has_group ? rhs_core->leader->idx : rhs_core->idx;
}
+ /* If the indices differ then respect the insertion order. */
if (lhs_sort_idx != rhs_sort_idx)
return lhs_sort_idx - rhs_sort_idx;
- /* Group by PMU if there is a group. Groups can't span PMUs. */
- if (lhs_has_group && rhs_has_group) {
- lhs_pmu_name = lhs->group_pmu_name;
- rhs_pmu_name = rhs->group_pmu_name;
- ret = strcmp(lhs_pmu_name, rhs_pmu_name);
- if (ret)
- return ret;
- }
+ /*
+ * Ignoring forcing, lhs_sort_idx == rhs_sort_idx so lhs and rhs should
+ * be in the same group. Events in the same group need to be ordered by
+ * their grouping PMU name as the group will be broken to ensure only
+ * events on the same PMU are programmed together.
+ *
+ * With forcing the lhs_sort_idx == rhs_sort_idx shows that one or both
+ * events are being forced to be at force_group_index. If only one event
+ * is being forced then the other event is the group leader of the group
+ * we're trying to force the event into. Ensure for the force grouped
+ * case that the PMU name ordering is also respected.
+ */
+ lhs_pmu_name = lhs->group_pmu_name;
+ rhs_pmu_name = rhs->group_pmu_name;
+ ret = strcmp(lhs_pmu_name, rhs_pmu_name);
+ if (ret)
+ return ret;
- /* Architecture specific sorting. */
+ /*
+ * Architecture specific sorting, by default sort events in the same
+ * group with the same PMU by their insertion index. On Intel topdown
+ * constraints must be adhered to - slots first, etc.
+ */
return arch_evlist__cmp(lhs, rhs);
}
@@ -2028,9 +2037,11 @@ static int parse_events__sort_events_and_fix_groups(struct list_head *list)
int idx = 0, force_grouped_idx = -1;
struct evsel *pos, *cur_leader = NULL;
struct perf_evsel *cur_leaders_grp = NULL;
- bool idx_changed = false, cur_leader_force_grouped = false;
+ bool idx_changed = false;
int orig_num_leaders = 0, num_leaders = 0;
int ret;
+ struct evsel *force_grouped_leader = NULL;
+ bool last_event_was_forced_leader = false;
/*
* Compute index to insert ungrouped events at. Place them where the
@@ -2053,10 +2064,13 @@ static int parse_events__sort_events_and_fix_groups(struct list_head *list)
*/
pos->core.idx = idx++;
- /* Remember an index to sort all forced grouped events together to. */
- if (force_grouped_idx == -1 && pos == pos_leader && pos->core.nr_members < 2 &&
- arch_evsel__must_be_in_group(pos))
- force_grouped_idx = pos->core.idx;
+ /*
+ * Remember an index to sort all forced grouped events
+ * together to. Use the group leader as some events
+ * must appear first within the group.
+ */
+ if (force_grouped_idx == -1 && arch_evsel__must_be_in_group(pos))
+ force_grouped_idx = pos_leader->core.idx;
}
/* Sort events. */
@@ -2084,31 +2098,66 @@ static int parse_events__sort_events_and_fix_groups(struct list_head *list)
* Set the group leader respecting the given groupings and that
* groups can't span PMUs.
*/
- if (!cur_leader)
+ if (!cur_leader) {
cur_leader = pos;
+ cur_leaders_grp = &pos->core;
+ if (pos_force_grouped)
+ force_grouped_leader = pos;
+ }
cur_leader_pmu_name = cur_leader->group_pmu_name;
- if ((cur_leaders_grp != pos->core.leader &&
- (!pos_force_grouped || !cur_leader_force_grouped)) ||
- strcmp(cur_leader_pmu_name, pos_pmu_name)) {
- /* Event is for a different group/PMU than last. */
+ if (strcmp(cur_leader_pmu_name, pos_pmu_name)) {
+ /* PMU changed so the group/leader must change. */
cur_leader = pos;
- /*
- * Remember the leader's group before it is overwritten,
- * so that later events match as being in the same
- * group.
- */
cur_leaders_grp = pos->core.leader;
+ if (pos_force_grouped && force_grouped_leader == NULL)
+ force_grouped_leader = pos;
+ } else if (cur_leaders_grp != pos->core.leader) {
+ bool split_even_if_last_leader_was_forced = true;
+
/*
- * Avoid forcing events into groups with events that
- * don't need to be in the group.
+ * Event is for a different group. If the last event was
+ * the forced group leader then subsequent group events
+ * and forced events should be in the same group. If
+ * there are no other forced group events then the
+ * forced group leader wasn't really being forced into a
+ * group, it just set arch_evsel__must_be_in_group, and
+ * we don't want the group to split here.
*/
- cur_leader_force_grouped = pos_force_grouped;
+ if (force_grouped_idx != -1 && last_event_was_forced_leader) {
+ struct evsel *pos2 = pos;
+ /*
+ * Search the whole list as the group leaders
+ * aren't currently valid.
+ */
+ list_for_each_entry_continue(pos2, list, core.node) {
+ if (pos->core.leader == pos2->core.leader &&
+ arch_evsel__must_be_in_group(pos2)) {
+ split_even_if_last_leader_was_forced = false;
+ break;
+ }
+ }
+ }
+ if (!last_event_was_forced_leader || split_even_if_last_leader_was_forced) {
+ if (pos_force_grouped) {
+ if (force_grouped_leader) {
+ cur_leader = force_grouped_leader;
+ cur_leaders_grp = force_grouped_leader->core.leader;
+ } else {
+ cur_leader = force_grouped_leader = pos;
+ cur_leaders_grp = &pos->core;
+ }
+ } else {
+ cur_leader = pos;
+ cur_leaders_grp = pos->core.leader;
+ }
+ }
}
if (pos_leader != cur_leader) {
/* The leader changed so update it. */
evsel__set_leader(pos, cur_leader);
}
+ last_event_was_forced_leader = (force_grouped_leader == pos);
}
list_for_each_entry(pos, list, core.node) {
struct evsel *pos_leader = evsel__leader(pos);
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index 3f4334ec6231..e176a34ab088 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -74,6 +74,7 @@ enum parse_events__term_type {
PARSE_EVENTS__TERM_TYPE_DRV_CFG,
PARSE_EVENTS__TERM_TYPE_PERCORE,
PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT,
+ PARSE_EVENTS__TERM_TYPE_AUX_ACTION,
PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE,
PARSE_EVENTS__TERM_TYPE_METRIC_ID,
PARSE_EVENTS__TERM_TYPE_RAW,
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index 14e5bd856a18..7ed86e3e34e3 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -53,21 +53,25 @@ static int str(yyscan_t scanner, int token)
YYSTYPE *yylval = parse_events_get_lval(scanner);
char *text = parse_events_get_text(scanner);
- if (text[0] != '\'') {
- yylval->str = strdup(text);
- } else {
- /*
- * If a text tag specified on the command line
- * contains opening single quite ' then it is
- * expected that the tag ends with single quote
- * as well, like this:
- * name=\'CPU_CLK_UNHALTED.THREAD:cmask=1\'
- * quotes need to be escaped to bypass shell
- * processing.
- */
- yylval->str = strndup(&text[1], strlen(text) - 2);
- }
+ yylval->str = strdup(text);
+ return token;
+}
+
+static int quoted_str(yyscan_t scanner, int token)
+{
+ YYSTYPE *yylval = parse_events_get_lval(scanner);
+ char *text = parse_events_get_text(scanner);
+ /*
+ * If a text tag specified on the command line
+ * contains opening single quite ' then it is
+ * expected that the tag ends with single quote
+ * as well, like this:
+ * name=\'CPU_CLK_UNHALTED.THREAD:cmask=1\'
+ * quotes need to be escaped to bypass shell
+ * processing.
+ */
+ yylval->str = strndup(&text[1], strlen(text) - 2);
return token;
}
@@ -235,9 +239,16 @@ event [^,{}/]+
num_dec [0-9]+
num_hex 0x[a-fA-F0-9]{1,16}
num_raw_hex [a-fA-F0-9]{1,16}
-name [a-zA-Z0-9_*?\[\]][a-zA-Z0-9_*?.\[\]!\-]*
-name_tag [\'][a-zA-Z0-9_*?\[\]][a-zA-Z0-9_*?\-,\.\[\]:=]*[\']
-name_minus [a-zA-Z_*?][a-zA-Z0-9\-_*?.:]*
+/* Regular pattern to match the token PE_NAME. */
+name_start [a-zA-Z0-9_*?\[\]]
+name {name_start}[a-zA-Z0-9_*?.\[\]!\-]*
+/* PE_NAME token when inside a config term list, allows ':'. */
+term_name {name_start}[a-zA-Z0-9_*?.\[\]!\-:]*
+/*
+ * PE_NAME token when quoted, allows ':,.='.
+ * Matches the RHS of terms like: name='COMPLEX_CYCLES_NAME:orig=cycles,desc=chip-clock-ticks'.
+ */
+quoted_name [\']{name_start}[a-zA-Z0-9_*?.\[\]!\-:,\.=]*[\']
drv_cfg_term [a-zA-Z0-9_\.]+(=[a-zA-Z0-9_*?\.:]+)?
/*
* If you add a modifier you need to update check_modifier().
@@ -321,6 +332,7 @@ overwrite { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_OVERWRITE); }
no-overwrite { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NOOVERWRITE); }
percore { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_PERCORE); }
aux-output { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT); }
+aux-action { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_ACTION); }
aux-sample-size { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE); }
metric-id { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_METRIC_ID); }
cpu-cycles|cycles { return hw_term(yyscanner, PERF_COUNT_HW_CPU_CYCLES); }
@@ -340,7 +352,9 @@ r0x{num_raw_hex} { return str(yyscanner, PE_RAW); }
{lc_type} { return lc_str(yyscanner, _parse_state); }
{lc_type}-{lc_op_result} { return lc_str(yyscanner, _parse_state); }
{lc_type}-{lc_op_result}-{lc_op_result} { return lc_str(yyscanner, _parse_state); }
-{name_minus} { return str(yyscanner, PE_NAME); }
+{num_dec} { return value(_parse_state, yyscanner, 10); }
+{num_hex} { return value(_parse_state, yyscanner, 16); }
+{term_name} { return str(yyscanner, PE_NAME); }
@{drv_cfg_term} { return drv_str(yyscanner, PE_DRV_CFG_TERM); }
}
@@ -409,7 +423,7 @@ r{num_raw_hex} { return str(yyscanner, PE_RAW); }
{modifier_event} { return modifiers(_parse_state, yyscanner); }
{name} { return str(yyscanner, PE_NAME); }
-{name_tag} { return str(yyscanner, PE_NAME); }
+{quoted_name} { return quoted_str(yyscanner, PE_NAME); }
"/" { BEGIN(config); return '/'; }
, { BEGIN(event); return ','; }
: { return ':'; }
diff --git a/tools/perf/util/path.c b/tools/perf/util/path.c
index 00adf872bf00..2e62f272fda8 100644
--- a/tools/perf/util/path.c
+++ b/tools/perf/util/path.c
@@ -68,14 +68,12 @@ bool is_directory(const char *base_path, const struct dirent *dent)
return S_ISDIR(st.st_mode);
}
-bool is_executable_file(const char *base_path, const struct dirent *dent)
+bool is_directory_at(int dir_fd, const char *path)
{
- char path[PATH_MAX];
struct stat st;
- snprintf(path, sizeof(path), "%s/%s", base_path, dent->d_name);
- if (stat(path, &st))
+ if (fstatat(dir_fd, path, &st, /*flags=*/0))
return false;
- return !S_ISDIR(st.st_mode) && (st.st_mode & S_IXUSR);
+ return S_ISDIR(st.st_mode);
}
diff --git a/tools/perf/util/path.h b/tools/perf/util/path.h
index d94902c22222..fb850fb55c60 100644
--- a/tools/perf/util/path.h
+++ b/tools/perf/util/path.h
@@ -12,6 +12,6 @@ int path__join3(char *bf, size_t size, const char *path1, const char *path2, con
bool is_regular_file(const char *file);
bool is_directory(const char *base_path, const struct dirent *dent);
-bool is_executable_file(const char *base_path, const struct dirent *dent);
+bool is_directory_at(int dir_fd, const char *path);
#endif /* _PERF_PATH_H */
diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/perf_event_attr_fprintf.c
index 59fbbba79697..66b666d9ce64 100644
--- a/tools/perf/util/perf_event_attr_fprintf.c
+++ b/tools/perf/util/perf_event_attr_fprintf.c
@@ -79,24 +79,22 @@ static void __p_read_format(char *buf, size_t size, u64 value)
#define ENUM_ID_TO_STR_CASE(x) case x: return (#x);
static const char *stringify_perf_type_id(struct perf_pmu *pmu, u32 type)
{
- if (pmu)
- return pmu->name;
-
switch (type) {
ENUM_ID_TO_STR_CASE(PERF_TYPE_HARDWARE)
ENUM_ID_TO_STR_CASE(PERF_TYPE_SOFTWARE)
ENUM_ID_TO_STR_CASE(PERF_TYPE_TRACEPOINT)
ENUM_ID_TO_STR_CASE(PERF_TYPE_HW_CACHE)
- ENUM_ID_TO_STR_CASE(PERF_TYPE_RAW)
ENUM_ID_TO_STR_CASE(PERF_TYPE_BREAKPOINT)
+ case PERF_TYPE_RAW:
+ return pmu ? pmu->name : "PERF_TYPE_RAW";
default:
- return NULL;
+ return pmu ? pmu->name : NULL;
}
}
static const char *stringify_perf_hw_id(u64 value)
{
- switch (value) {
+ switch (value & PERF_HW_EVENT_MASK) {
ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_CPU_CYCLES)
ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_INSTRUCTIONS)
ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_CACHE_REFERENCES)
@@ -169,83 +167,100 @@ static const char *stringify_perf_sw_id(u64 value)
}
#undef ENUM_ID_TO_STR_CASE
-#define PRINT_ID(_s, _f) \
-do { \
- const char *__s = _s; \
- if (__s == NULL) \
- snprintf(buf, size, _f, value); \
- else \
- snprintf(buf, size, _f" (%s)", value, __s); \
-} while (0)
-#define print_id_unsigned(_s) PRINT_ID(_s, "%"PRIu64)
-#define print_id_hex(_s) PRINT_ID(_s, "%#"PRIx64)
+static void print_id_unsigned(char *buf, size_t size, u64 value, const char *s)
+{
+ if (s == NULL)
+ snprintf(buf, size, "%"PRIu64, value);
+ else
+ snprintf(buf, size, "%"PRIu64" (%s)", value, s);
+}
+
+static void print_id_hex(char *buf, size_t size, u64 value, const char *s)
+{
+ if (s == NULL)
+ snprintf(buf, size, "%#"PRIx64, value);
+ else
+ snprintf(buf, size, "%#"PRIx64" (%s)", value, s);
+}
-static void __p_type_id(struct perf_pmu *pmu, char *buf, size_t size, u64 value)
+static void __p_type_id(char *buf, size_t size, struct perf_pmu *pmu, u32 type)
{
- print_id_unsigned(stringify_perf_type_id(pmu, value));
+ print_id_unsigned(buf, size, type, stringify_perf_type_id(pmu, type));
}
-static void __p_config_hw_id(char *buf, size_t size, u64 value)
+static void __p_config_hw_id(char *buf, size_t size, struct perf_pmu *pmu, u64 config)
{
- print_id_hex(stringify_perf_hw_id(value));
+ const char *name = stringify_perf_hw_id(config);
+
+ if (name == NULL) {
+ if (pmu == NULL) {
+ snprintf(buf, size, "%#"PRIx64, config);
+ } else {
+ snprintf(buf, size, "%#"PRIx64" (%s/config=%#"PRIx64"/)", config, pmu->name,
+ config);
+ }
+ } else {
+ if (pmu == NULL)
+ snprintf(buf, size, "%#"PRIx64" (%s)", config, name);
+ else
+ snprintf(buf, size, "%#"PRIx64" (%s/%s/)", config, pmu->name, name);
+ }
}
-static void __p_config_sw_id(char *buf, size_t size, u64 value)
+static void __p_config_sw_id(char *buf, size_t size, u64 id)
{
- print_id_hex(stringify_perf_sw_id(value));
+ print_id_hex(buf, size, id, stringify_perf_sw_id(id));
}
-static void __p_config_hw_cache_id(char *buf, size_t size, u64 value)
+static void __p_config_hw_cache_id(char *buf, size_t size, struct perf_pmu *pmu, u64 config)
{
- const char *hw_cache_str = stringify_perf_hw_cache_id(value & 0xff);
+ const char *hw_cache_str = stringify_perf_hw_cache_id(config & 0xff);
const char *hw_cache_op_str =
- stringify_perf_hw_cache_op_id((value & 0xff00) >> 8);
+ stringify_perf_hw_cache_op_id((config & 0xff00) >> 8);
const char *hw_cache_op_result_str =
- stringify_perf_hw_cache_op_result_id((value & 0xff0000) >> 16);
-
- if (hw_cache_str == NULL || hw_cache_op_str == NULL ||
- hw_cache_op_result_str == NULL) {
- snprintf(buf, size, "%#"PRIx64, value);
+ stringify_perf_hw_cache_op_result_id((config & 0xff0000) >> 16);
+
+ if (hw_cache_str == NULL || hw_cache_op_str == NULL || hw_cache_op_result_str == NULL) {
+ if (pmu == NULL) {
+ snprintf(buf, size, "%#"PRIx64, config);
+ } else {
+ snprintf(buf, size, "%#"PRIx64" (%s/config=%#"PRIx64"/)", config, pmu->name,
+ config);
+ }
} else {
- snprintf(buf, size, "%#"PRIx64" (%s | %s | %s)", value,
- hw_cache_op_result_str, hw_cache_op_str, hw_cache_str);
+ if (pmu == NULL) {
+ snprintf(buf, size, "%#"PRIx64" (%s | %s | %s)", config,
+ hw_cache_op_result_str, hw_cache_op_str, hw_cache_str);
+ } else {
+ snprintf(buf, size, "%#"PRIx64" (%s/%s | %s | %s/)", config, pmu->name,
+ hw_cache_op_result_str, hw_cache_op_str, hw_cache_str);
+ }
}
}
-#ifdef HAVE_LIBTRACEEVENT
-static void __p_config_tracepoint_id(char *buf, size_t size, u64 value)
+static void __p_config_tracepoint_id(char *buf, size_t size, u64 id)
{
- char *str = tracepoint_id_to_name(value);
+ char *str = tracepoint_id_to_name(id);
- print_id_hex(str);
+ print_id_hex(buf, size, id, str);
free(str);
}
-#endif
-static void __p_config_id(struct perf_pmu *pmu, char *buf, size_t size, u32 type, u64 value)
+static void __p_config_id(struct perf_pmu *pmu, char *buf, size_t size, u32 type, u64 config)
{
- const char *name = perf_pmu__name_from_config(pmu, value);
-
- if (name) {
- print_id_hex(name);
- return;
- }
switch (type) {
case PERF_TYPE_HARDWARE:
- return __p_config_hw_id(buf, size, value);
+ return __p_config_hw_id(buf, size, pmu, config);
case PERF_TYPE_SOFTWARE:
- return __p_config_sw_id(buf, size, value);
+ return __p_config_sw_id(buf, size, config);
case PERF_TYPE_HW_CACHE:
- return __p_config_hw_cache_id(buf, size, value);
+ return __p_config_hw_cache_id(buf, size, pmu, config);
case PERF_TYPE_TRACEPOINT:
-#ifdef HAVE_LIBTRACEEVENT
- return __p_config_tracepoint_id(buf, size, value);
-#endif
+ return __p_config_tracepoint_id(buf, size, config);
case PERF_TYPE_RAW:
case PERF_TYPE_BREAKPOINT:
default:
- snprintf(buf, size, "%#"PRIx64, value);
- return;
+ return print_id_hex(buf, size, config, perf_pmu__name_from_config(pmu, config));
}
}
@@ -257,7 +272,7 @@ static void __p_config_id(struct perf_pmu *pmu, char *buf, size_t size, u32 type
#define p_sample_type(val) __p_sample_type(buf, BUF_SIZE, val)
#define p_branch_sample_type(val) __p_branch_sample_type(buf, BUF_SIZE, val)
#define p_read_format(val) __p_read_format(buf, BUF_SIZE, val)
-#define p_type_id(val) __p_type_id(pmu, buf, BUF_SIZE, val)
+#define p_type_id(val) __p_type_id(buf, BUF_SIZE, pmu, val)
#define p_config_id(val) __p_config_id(pmu, buf, BUF_SIZE, attr->type, val)
#define PRINT_ATTRn(_n, _f, _p, _a) \
@@ -277,6 +292,13 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
char buf[BUF_SIZE];
int ret = 0;
+ if (!pmu && (attr->type == PERF_TYPE_HARDWARE || attr->type == PERF_TYPE_HW_CACHE)) {
+ u32 extended_type = attr->config >> PERF_PMU_TYPE_SHIFT;
+
+ if (extended_type)
+ pmu = perf_pmus__find_by_type(extended_type);
+ }
+
PRINT_ATTRn("type", type, p_type_id, true);
PRINT_ATTRf(size, p_unsigned);
PRINT_ATTRn("config", config, p_config_id, true);
@@ -335,6 +357,9 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
PRINT_ATTRf(sample_max_stack, p_unsigned);
PRINT_ATTRf(aux_sample_size, p_unsigned);
PRINT_ATTRf(sig_data, p_unsigned);
+ PRINT_ATTRf(aux_start_paused, p_unsigned);
+ PRINT_ATTRf(aux_pause, p_unsigned);
+ PRINT_ATTRf(aux_resume, p_unsigned);
return ret;
}
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 08a9d0bd9301..e2e3969e12d3 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -12,6 +12,8 @@
#include <stdbool.h>
#include <dirent.h>
#include <api/fs/fs.h>
+#include <api/io.h>
+#include <api/io_dir.h>
#include <locale.h>
#include <fnmatch.h>
#include <math.h>
@@ -35,12 +37,12 @@
#define UNIT_MAX_LEN 31 /* max length for event unit name */
enum event_source {
- /* An event loaded from /sys/devices/<pmu>/events. */
+ /* An event loaded from /sys/bus/event_source/devices/<pmu>/events. */
EVENT_SRC_SYSFS,
/* An event loaded from a CPUID matched json file. */
EVENT_SRC_CPU_JSON,
/*
- * An event loaded from a /sys/devices/<pmu>/identifier matched json
+ * An event loaded from a /sys/bus/event_source/devices/<pmu>/identifier matched json
* file.
*/
EVENT_SRC_SYS_JSON,
@@ -194,19 +196,17 @@ static void perf_pmu_format__load(const struct perf_pmu *pmu, struct perf_pmu_fo
*/
static int perf_pmu__format_parse(struct perf_pmu *pmu, int dirfd, bool eager_load)
{
- struct dirent *evt_ent;
- DIR *format_dir;
+ struct io_dirent64 *evt_ent;
+ struct io_dir format_dir;
int ret = 0;
- format_dir = fdopendir(dirfd);
- if (!format_dir)
- return -EINVAL;
+ io_dir__init(&format_dir, dirfd);
- while ((evt_ent = readdir(format_dir)) != NULL) {
+ while ((evt_ent = io_dir__readdir(&format_dir)) != NULL) {
struct perf_pmu_format *format;
char *name = evt_ent->d_name;
- if (!strcmp(name, ".") || !strcmp(name, ".."))
+ if (io_dir__is_dir(&format_dir, evt_ent))
continue;
format = perf_pmu__new_format(&pmu->format, name);
@@ -233,7 +233,7 @@ static int perf_pmu__format_parse(struct perf_pmu *pmu, int dirfd, bool eager_lo
}
}
- closedir(format_dir);
+ close(format_dir.dirfd);
return ret;
}
@@ -595,7 +595,7 @@ static int perf_pmu__new_alias(struct perf_pmu *pmu, const char *name,
};
if (pmu_events_table__find_event(pmu->events_table, pmu, name,
update_alias, &data) == 0)
- pmu->cpu_json_aliases++;
+ pmu->cpu_common_json_aliases++;
}
pmu->sysfs_aliases++;
break;
@@ -634,14 +634,12 @@ static inline bool pmu_alias_info_file(const char *name)
*/
static int __pmu_aliases_parse(struct perf_pmu *pmu, int events_dir_fd)
{
- struct dirent *evt_ent;
- DIR *event_dir;
+ struct io_dirent64 *evt_ent;
+ struct io_dir event_dir;
- event_dir = fdopendir(events_dir_fd);
- if (!event_dir)
- return -EINVAL;
+ io_dir__init(&event_dir, events_dir_fd);
- while ((evt_ent = readdir(event_dir))) {
+ while ((evt_ent = io_dir__readdir(&event_dir))) {
char *name = evt_ent->d_name;
int fd;
FILE *file;
@@ -673,7 +671,6 @@ static int __pmu_aliases_parse(struct perf_pmu *pmu, int events_dir_fd)
fclose(file);
}
- closedir(event_dir);
pmu->sysfs_aliases_loaded = true;
return 0;
}
@@ -748,32 +745,41 @@ static int pmu_alias_terms(struct perf_pmu_alias *alias, int err_loc, struct lis
* Uncore PMUs have a "cpumask" file under sysfs. CPU PMUs (e.g. on arm/arm64)
* may have a "cpus" file.
*/
-static struct perf_cpu_map *pmu_cpumask(int dirfd, const char *name, bool is_core)
+static struct perf_cpu_map *pmu_cpumask(int dirfd, const char *pmu_name, bool is_core)
{
- struct perf_cpu_map *cpus;
const char *templates[] = {
"cpumask",
"cpus",
NULL
};
const char **template;
- char pmu_name[PATH_MAX];
- struct perf_pmu pmu = {.name = pmu_name};
- FILE *file;
- strlcpy(pmu_name, name, sizeof(pmu_name));
for (template = templates; *template; template++) {
- file = perf_pmu__open_file_at(&pmu, dirfd, *template);
- if (!file)
+ struct io io;
+ char buf[128];
+ char *cpumask = NULL;
+ size_t cpumask_len;
+ ssize_t ret;
+ struct perf_cpu_map *cpus;
+
+ io.fd = perf_pmu__pathname_fd(dirfd, pmu_name, *template, O_RDONLY);
+ if (io.fd < 0)
continue;
- cpus = perf_cpu_map__read(file);
- fclose(file);
+
+ io__init(&io, io.fd, buf, sizeof(buf));
+ ret = io__getline(&io, &cpumask, &cpumask_len);
+ close(io.fd);
+ if (ret < 0)
+ continue;
+
+ cpus = perf_cpu_map__new(cpumask);
+ free(cpumask);
if (cpus)
return cpus;
}
/* Nothing found, for core PMUs assume this means all CPUs. */
- return is_core ? perf_cpu_map__get(cpu_map__online()) : NULL;
+ return is_core ? cpu_map__online() : NULL;
}
static bool pmu_is_uncore(int dirfd, const char *name)
@@ -837,21 +843,23 @@ static size_t pmu_deduped_name_len(const struct perf_pmu *pmu, const char *name,
}
/**
- * perf_pmu__match_ignoring_suffix - Does the pmu_name match tok ignoring any
- * trailing suffix? The Suffix must be in form
- * tok_{digits}, or tok{digits}.
+ * perf_pmu__match_wildcard - Does the pmu_name start with tok and is then only
+ * followed by nothing or a suffix? tok may contain
+ * part of a suffix.
* @pmu_name: The pmu_name with possible suffix.
- * @tok: The possible match to pmu_name without suffix.
+ * @tok: The wildcard argument to match.
*/
-static bool perf_pmu__match_ignoring_suffix(const char *pmu_name, const char *tok)
+static bool perf_pmu__match_wildcard(const char *pmu_name, const char *tok)
{
const char *p, *suffix;
bool has_hex = false;
+ size_t tok_len = strlen(tok);
- if (strncmp(pmu_name, tok, strlen(tok)))
+ /* Check start of pmu_name for equality. */
+ if (strncmp(pmu_name, tok, tok_len))
return false;
- suffix = p = pmu_name + strlen(tok);
+ suffix = p = pmu_name + tok_len;
if (*p == 0)
return true;
@@ -877,60 +885,84 @@ static bool perf_pmu__match_ignoring_suffix(const char *pmu_name, const char *to
}
/**
- * pmu_uncore_alias_match - does name match the PMU name?
- * @pmu_name: the json struct pmu_event name. This may lack a suffix (which
+ * perf_pmu__match_ignoring_suffix_uncore - Does the pmu_name match tok ignoring
+ * any trailing suffix on pmu_name and
+ * tok? The Suffix must be in form
+ * tok_{digits}, or tok{digits}.
+ * @pmu_name: The pmu_name with possible suffix.
+ * @tok: The possible match to pmu_name.
+ */
+static bool perf_pmu__match_ignoring_suffix_uncore(const char *pmu_name, const char *tok)
+{
+ size_t pmu_name_len, tok_len;
+
+ /* For robustness, check for NULL. */
+ if (pmu_name == NULL)
+ return tok == NULL;
+
+ /* uncore_ prefixes are ignored. */
+ if (!strncmp(pmu_name, "uncore_", 7))
+ pmu_name += 7;
+ if (!strncmp(tok, "uncore_", 7))
+ tok += 7;
+
+ pmu_name_len = pmu_name_len_no_suffix(pmu_name);
+ tok_len = pmu_name_len_no_suffix(tok);
+ if (pmu_name_len != tok_len)
+ return false;
+
+ return strncmp(pmu_name, tok, pmu_name_len) == 0;
+}
+
+
+/**
+ * perf_pmu__match_wildcard_uncore - does to_match match the PMU's name?
+ * @pmu_name: The pmu->name or pmu->alias to match against.
+ * @to_match: the json struct pmu_event name. This may lack a suffix (which
* matches) or be of the form "socket,pmuname" which will match
* "socketX_pmunameY".
- * @name: a real full PMU name as from sysfs.
*/
-static bool pmu_uncore_alias_match(const char *pmu_name, const char *name)
+static bool perf_pmu__match_wildcard_uncore(const char *pmu_name, const char *to_match)
{
- char *tmp = NULL, *tok, *str;
- bool res;
-
- if (strchr(pmu_name, ',') == NULL)
- return perf_pmu__match_ignoring_suffix(name, pmu_name);
+ char *mutable_to_match, *tok, *tmp;
- str = strdup(pmu_name);
- if (!str)
+ if (!pmu_name)
return false;
- /*
- * uncore alias may be from different PMU with common prefix
- */
- tok = strtok_r(str, ",", &tmp);
- if (strncmp(pmu_name, tok, strlen(tok))) {
- res = false;
- goto out;
- }
+ /* uncore_ prefixes are ignored. */
+ if (!strncmp(pmu_name, "uncore_", 7))
+ pmu_name += 7;
+ if (!strncmp(to_match, "uncore_", 7))
+ to_match += 7;
- /*
- * Match more complex aliases where the alias name is a comma-delimited
- * list of tokens, orderly contained in the matching PMU name.
- *
- * Example: For alias "socket,pmuname" and PMU "socketX_pmunameY", we
- * match "socket" in "socketX_pmunameY" and then "pmuname" in
- * "pmunameY".
- */
- while (1) {
- char *next_tok = strtok_r(NULL, ",", &tmp);
+ if (strchr(to_match, ',') == NULL)
+ return perf_pmu__match_wildcard(pmu_name, to_match);
- name = strstr(name, tok);
- if (!name ||
- (!next_tok && !perf_pmu__match_ignoring_suffix(name, tok))) {
- res = false;
- goto out;
+ /* Process comma separated list of PMU name components. */
+ mutable_to_match = strdup(to_match);
+ if (!mutable_to_match)
+ return false;
+
+ tok = strtok_r(mutable_to_match, ",", &tmp);
+ while (tok) {
+ size_t tok_len = strlen(tok);
+
+ if (strncmp(pmu_name, tok, tok_len)) {
+ /* Mismatch between part of pmu_name and tok. */
+ free(mutable_to_match);
+ return false;
}
- if (!next_tok)
- break;
- tok = next_tok;
- name += strlen(tok);
+ /* Move pmu_name forward over tok and suffix. */
+ pmu_name += tok_len;
+ while (*pmu_name != '\0' && isdigit(*pmu_name))
+ pmu_name++;
+ if (*pmu_name == '_')
+ pmu_name++;
+
+ tok = strtok_r(NULL, ",", &tmp);
}
-
- res = true;
-out:
- free(str);
- return res;
+ free(mutable_to_match);
+ return *pmu_name == '\0';
}
bool pmu_uncore_identifier_match(const char *compat, const char *id)
@@ -993,11 +1025,19 @@ static int pmu_add_sys_aliases_iter_fn(const struct pmu_event *pe,
{
struct perf_pmu *pmu = vdata;
- if (!pe->compat || !pe->pmu)
+ if (!pe->compat || !pe->pmu) {
+ /* No data to match. */
return 0;
+ }
- if (pmu_uncore_alias_match(pe->pmu, pmu->name) &&
- pmu_uncore_identifier_match(pe->compat, pmu->id)) {
+ if (!perf_pmu__match_wildcard_uncore(pmu->name, pe->pmu) &&
+ !perf_pmu__match_wildcard_uncore(pmu->alias_name, pe->pmu)) {
+ /* PMU name/alias_name don't match. */
+ return 0;
+ }
+
+ if (pmu_uncore_identifier_match(pe->compat, pmu->id)) {
+ /* Id matched. */
perf_pmu__new_alias(pmu,
pe->name,
pe->desc,
@@ -1006,7 +1046,6 @@ static int pmu_add_sys_aliases_iter_fn(const struct pmu_event *pe,
pe,
EVENT_SRC_SYS_JSON);
}
-
return 0;
}
@@ -1763,6 +1802,7 @@ int perf_pmu__for_each_format(struct perf_pmu *pmu, void *state, pmu_format_call
"no-overwrite",
"percore",
"aux-output",
+ "aux-action=(pause|resume|start-paused)",
"aux-sample-size=number",
};
struct perf_pmu_format *format;
@@ -1840,9 +1880,10 @@ size_t perf_pmu__num_events(struct perf_pmu *pmu)
if (pmu->cpu_aliases_added)
nr += pmu->cpu_json_aliases;
else if (pmu->events_table)
- nr += pmu_events_table__num_events(pmu->events_table, pmu) - pmu->cpu_json_aliases;
+ nr += pmu_events_table__num_events(pmu->events_table, pmu) -
+ pmu->cpu_common_json_aliases;
else
- assert(pmu->cpu_json_aliases == 0);
+ assert(pmu->cpu_json_aliases == 0 && pmu->cpu_common_json_aliases == 0);
if (perf_pmu__is_tool(pmu))
nr -= tool_pmu__num_skip_events();
@@ -1963,15 +2004,85 @@ out:
return ret;
}
-bool pmu__name_match(const struct perf_pmu *pmu, const char *pmu_name)
+static bool perf_pmu___name_match(const struct perf_pmu *pmu, const char *to_match, bool wildcard)
{
- return !strcmp(pmu->name, pmu_name) ||
- (pmu->is_uncore && pmu_uncore_alias_match(pmu_name, pmu->name)) ||
+ const char *names[2] = {
+ pmu->name,
+ pmu->alias_name,
+ };
+ if (pmu->is_core) {
+ for (size_t i = 0; i < ARRAY_SIZE(names); i++) {
+ const char *name = names[i];
+
+ if (!name)
+ continue;
+
+ if (!strcmp(name, to_match)) {
+ /* Exact name match. */
+ return true;
+ }
+ }
+ if (!strcmp(to_match, "default_core")) {
+ /*
+ * jevents and tests use default_core as a marker for any core
+ * PMU as the PMU name varies across architectures.
+ */
+ return true;
+ }
+ return false;
+ }
+ if (!pmu->is_uncore) {
/*
- * jevents and tests use default_core as a marker for any core
- * PMU as the PMU name varies across architectures.
+ * PMU isn't core or uncore, some kind of broken CPU mask
+ * situation. Only match exact name.
*/
- (pmu->is_core && !strcmp(pmu_name, "default_core"));
+ for (size_t i = 0; i < ARRAY_SIZE(names); i++) {
+ const char *name = names[i];
+
+ if (!name)
+ continue;
+
+ if (!strcmp(name, to_match)) {
+ /* Exact name match. */
+ return true;
+ }
+ }
+ return false;
+ }
+ for (size_t i = 0; i < ARRAY_SIZE(names); i++) {
+ const char *name = names[i];
+
+ if (!name)
+ continue;
+
+ if (wildcard && perf_pmu__match_wildcard_uncore(name, to_match))
+ return true;
+ if (!wildcard && perf_pmu__match_ignoring_suffix_uncore(name, to_match))
+ return true;
+ }
+ return false;
+}
+
+/**
+ * perf_pmu__name_wildcard_match - Called by the jevents generated code to see
+ * if pmu matches the json to_match string.
+ * @pmu: The pmu whose name/alias to match.
+ * @to_match: The possible match to pmu_name.
+ */
+bool perf_pmu__name_wildcard_match(const struct perf_pmu *pmu, const char *to_match)
+{
+ return perf_pmu___name_match(pmu, to_match, /*wildcard=*/true);
+}
+
+/**
+ * perf_pmu__name_no_suffix_match - Does pmu's name match to_match ignoring any
+ * trailing suffix on the pmu_name and/or tok?
+ * @pmu: The pmu whose name/alias to match.
+ * @to_match: The possible match to pmu_name.
+ */
+bool perf_pmu__name_no_suffix_match(const struct perf_pmu *pmu, const char *to_match)
+{
+ return perf_pmu___name_match(pmu, to_match, /*wildcard=*/false);
}
bool perf_pmu__is_software(const struct perf_pmu *pmu)
@@ -2110,10 +2221,9 @@ static void perf_pmu__del_caps(struct perf_pmu *pmu)
*/
int perf_pmu__caps_parse(struct perf_pmu *pmu)
{
- struct stat st;
char caps_path[PATH_MAX];
- DIR *caps_dir;
- struct dirent *evt_ent;
+ struct io_dir caps_dir;
+ struct io_dirent64 *evt_ent;
int caps_fd;
if (pmu->caps_initialized)
@@ -2124,24 +2234,21 @@ int perf_pmu__caps_parse(struct perf_pmu *pmu)
if (!perf_pmu__pathname_scnprintf(caps_path, sizeof(caps_path), pmu->name, "caps"))
return -1;
- if (stat(caps_path, &st) < 0) {
+ caps_fd = open(caps_path, O_CLOEXEC | O_DIRECTORY | O_RDONLY);
+ if (caps_fd == -1) {
pmu->caps_initialized = true;
return 0; /* no error if caps does not exist */
}
- caps_dir = opendir(caps_path);
- if (!caps_dir)
- return -EINVAL;
-
- caps_fd = dirfd(caps_dir);
+ io_dir__init(&caps_dir, caps_fd);
- while ((evt_ent = readdir(caps_dir)) != NULL) {
+ while ((evt_ent = io_dir__readdir(&caps_dir)) != NULL) {
char *name = evt_ent->d_name;
char value[128];
FILE *file;
int fd;
- if (!strcmp(name, ".") || !strcmp(name, ".."))
+ if (io_dir__is_dir(&caps_dir, evt_ent))
continue;
fd = openat(caps_fd, name, O_RDONLY);
@@ -2163,7 +2270,7 @@ int perf_pmu__caps_parse(struct perf_pmu *pmu)
fclose(file);
}
- closedir(caps_dir);
+ close(caps_fd);
pmu->caps_initialized = true;
return pmu->nr_caps;
@@ -2218,29 +2325,31 @@ void perf_pmu__warn_invalid_config(struct perf_pmu *pmu, __u64 config,
name ?: "N/A", buf, config_name, config);
}
-bool perf_pmu__match(const struct perf_pmu *pmu, const char *tok)
+bool perf_pmu__wildcard_match(const struct perf_pmu *pmu, const char *wildcard_to_match)
{
- const char *name = pmu->name;
- bool need_fnmatch = strisglob(tok);
+ const char *names[2] = {
+ pmu->name,
+ pmu->alias_name,
+ };
+ bool need_fnmatch = strisglob(wildcard_to_match);
- if (!strncmp(tok, "uncore_", 7))
- tok += 7;
- if (!strncmp(name, "uncore_", 7))
- name += 7;
+ if (!strncmp(wildcard_to_match, "uncore_", 7))
+ wildcard_to_match += 7;
- if (perf_pmu__match_ignoring_suffix(name, tok) ||
- (need_fnmatch && !fnmatch(tok, name, 0)))
- return true;
+ for (size_t i = 0; i < ARRAY_SIZE(names); i++) {
+ const char *pmu_name = names[i];
- name = pmu->alias_name;
- if (!name)
- return false;
+ if (!pmu_name)
+ continue;
- if (!strncmp(name, "uncore_", 7))
- name += 7;
+ if (!strncmp(pmu_name, "uncore_", 7))
+ pmu_name += 7;
- return perf_pmu__match_ignoring_suffix(name, tok) ||
- (need_fnmatch && !fnmatch(tok, name, 0));
+ if (perf_pmu__match_wildcard(pmu_name, wildcard_to_match) ||
+ (need_fnmatch && !fnmatch(wildcard_to_match, pmu_name, 0)))
+ return true;
+ }
+ return false;
}
int perf_pmu__event_source_devices_scnprintf(char *pathname, size_t size)
diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
index dbed6c243a5e..b93014cc3670 100644
--- a/tools/perf/util/pmu.h
+++ b/tools/perf/util/pmu.h
@@ -37,6 +37,8 @@ struct perf_pmu_caps {
};
enum {
+ PERF_PMU_TYPE_PE_START = 0,
+ PERF_PMU_TYPE_PE_END = 0xFFFEFFFF,
PERF_PMU_TYPE_HWMON_START = 0xFFFF0000,
PERF_PMU_TYPE_HWMON_END = 0xFFFFFFFD,
PERF_PMU_TYPE_TOOL = 0xFFFFFFFE,
@@ -134,6 +136,11 @@ struct perf_pmu {
uint32_t cpu_json_aliases;
/** @sys_json_aliases: Number of json event aliases loaded matching the PMU's identifier. */
uint32_t sys_json_aliases;
+ /**
+ * @cpu_common_json_aliases: Number of json events that overlapped with sysfs when
+ * loading all sysfs events.
+ */
+ uint32_t cpu_common_json_aliases;
/** @sysfs_aliases_loaded: Are sysfs aliases loaded from disk? */
bool sysfs_aliases_loaded;
/**
@@ -238,7 +245,8 @@ bool perf_pmu__have_event(struct perf_pmu *pmu, const char *name);
size_t perf_pmu__num_events(struct perf_pmu *pmu);
int perf_pmu__for_each_event(struct perf_pmu *pmu, bool skip_duplicate_pmus,
void *state, pmu_event_callback cb);
-bool pmu__name_match(const struct perf_pmu *pmu, const char *pmu_name);
+bool perf_pmu__name_wildcard_match(const struct perf_pmu *pmu, const char *to_match);
+bool perf_pmu__name_no_suffix_match(const struct perf_pmu *pmu, const char *to_match);
/**
* perf_pmu_is_software - is the PMU a software PMU as in it uses the
@@ -273,7 +281,7 @@ void perf_pmu__warn_invalid_config(struct perf_pmu *pmu, __u64 config,
const char *config_name);
void perf_pmu__warn_invalid_formats(struct perf_pmu *pmu);
-bool perf_pmu__match(const struct perf_pmu *pmu, const char *tok);
+bool perf_pmu__wildcard_match(const struct perf_pmu *pmu, const char *wildcard_to_match);
int perf_pmu__event_source_devices_scnprintf(char *pathname, size_t size);
int perf_pmu__pathname_scnprintf(char *buf, size_t size,
diff --git a/tools/perf/util/pmus.c b/tools/perf/util/pmus.c
index b493da0d22ef..b99292de7669 100644
--- a/tools/perf/util/pmus.c
+++ b/tools/perf/util/pmus.c
@@ -3,10 +3,10 @@
#include <linux/list_sort.h>
#include <linux/string.h>
#include <linux/zalloc.h>
+#include <api/io_dir.h>
#include <subcmd/pager.h>
#include <sys/types.h>
#include <ctype.h>
-#include <dirent.h>
#include <pthread.h>
#include <string.h>
#include <unistd.h>
@@ -37,10 +37,25 @@
*/
static LIST_HEAD(core_pmus);
static LIST_HEAD(other_pmus);
-static bool read_sysfs_core_pmus;
-static bool read_sysfs_all_pmus;
+enum perf_tool_pmu_type {
+ PERF_TOOL_PMU_TYPE_PE_CORE,
+ PERF_TOOL_PMU_TYPE_PE_OTHER,
+ PERF_TOOL_PMU_TYPE_TOOL,
+ PERF_TOOL_PMU_TYPE_HWMON,
+
+#define PERF_TOOL_PMU_TYPE_PE_CORE_MASK (1 << PERF_TOOL_PMU_TYPE_PE_CORE)
+#define PERF_TOOL_PMU_TYPE_PE_OTHER_MASK (1 << PERF_TOOL_PMU_TYPE_PE_OTHER)
+#define PERF_TOOL_PMU_TYPE_TOOL_MASK (1 << PERF_TOOL_PMU_TYPE_TOOL)
+#define PERF_TOOL_PMU_TYPE_HWMON_MASK (1 << PERF_TOOL_PMU_TYPE_HWMON)
+
+#define PERF_TOOL_PMU_TYPE_ALL_MASK (PERF_TOOL_PMU_TYPE_PE_CORE_MASK | \
+ PERF_TOOL_PMU_TYPE_PE_OTHER_MASK | \
+ PERF_TOOL_PMU_TYPE_TOOL_MASK | \
+ PERF_TOOL_PMU_TYPE_HWMON_MASK)
+};
+static unsigned int read_pmu_types;
-static void pmu_read_sysfs(bool core_only);
+static void pmu_read_sysfs(unsigned int to_read_pmus);
size_t pmu_name_len_no_suffix(const char *str)
{
@@ -102,8 +117,7 @@ void perf_pmus__destroy(void)
perf_pmu__delete(pmu);
}
- read_sysfs_core_pmus = false;
- read_sysfs_all_pmus = false;
+ read_pmu_types = 0;
}
static struct perf_pmu *pmu_find(const char *name)
@@ -129,6 +143,7 @@ struct perf_pmu *perf_pmus__find(const char *name)
struct perf_pmu *pmu;
int dirfd;
bool core_pmu;
+ unsigned int to_read_pmus = 0;
/*
* Once PMU is loaded it stays in the list,
@@ -139,11 +154,11 @@ struct perf_pmu *perf_pmus__find(const char *name)
if (pmu)
return pmu;
- if (read_sysfs_all_pmus)
+ if (read_pmu_types == PERF_TOOL_PMU_TYPE_ALL_MASK)
return NULL;
core_pmu = is_pmu_core(name);
- if (core_pmu && read_sysfs_core_pmus)
+ if (core_pmu && (read_pmu_types & PERF_TOOL_PMU_TYPE_PE_CORE_MASK))
return NULL;
dirfd = perf_pmu__event_source_devices_fd();
@@ -151,15 +166,27 @@ struct perf_pmu *perf_pmus__find(const char *name)
/*eager_load=*/false);
close(dirfd);
- if (!pmu) {
- /*
- * Looking up an inidividual PMU failed. This may mean name is
- * an alias, so read the PMUs from sysfs and try to find again.
- */
- pmu_read_sysfs(core_pmu);
+ if (pmu)
+ return pmu;
+
+ /* Looking up an individual perf event PMU failed, check if a tool PMU should be read. */
+ if (!strncmp(name, "hwmon_", 6))
+ to_read_pmus |= PERF_TOOL_PMU_TYPE_HWMON_MASK;
+ else if (!strcmp(name, "tool"))
+ to_read_pmus |= PERF_TOOL_PMU_TYPE_TOOL_MASK;
+
+ if (to_read_pmus) {
+ pmu_read_sysfs(to_read_pmus);
pmu = pmu_find(name);
+ if (pmu)
+ return pmu;
}
- return pmu;
+ /* Read all necessary PMUs from sysfs and see if the PMU is found. */
+ to_read_pmus = PERF_TOOL_PMU_TYPE_PE_CORE_MASK;
+ if (!core_pmu)
+ to_read_pmus |= PERF_TOOL_PMU_TYPE_PE_OTHER_MASK;
+ pmu_read_sysfs(to_read_pmus);
+ return pmu_find(name);
}
static struct perf_pmu *perf_pmu__find2(int dirfd, const char *name)
@@ -176,11 +203,11 @@ static struct perf_pmu *perf_pmu__find2(int dirfd, const char *name)
if (pmu)
return pmu;
- if (read_sysfs_all_pmus)
+ if (read_pmu_types == PERF_TOOL_PMU_TYPE_ALL_MASK)
return NULL;
core_pmu = is_pmu_core(name);
- if (core_pmu && read_sysfs_core_pmus)
+ if (core_pmu && (read_pmu_types & PERF_TOOL_PMU_TYPE_PE_CORE_MASK))
return NULL;
return perf_pmu__lookup(core_pmu ? &core_pmus : &other_pmus, dirfd, name,
@@ -197,52 +224,57 @@ static int pmus_cmp(void *priv __maybe_unused,
}
/* Add all pmus in sysfs to pmu list: */
-static void pmu_read_sysfs(bool core_only)
+static void pmu_read_sysfs(unsigned int to_read_types)
{
- int fd;
- DIR *dir;
- struct dirent *dent;
struct perf_pmu *tool_pmu;
- if (read_sysfs_all_pmus || (core_only && read_sysfs_core_pmus))
+ if ((read_pmu_types & to_read_types) == to_read_types) {
+ /* All requested PMU types have been read. */
return;
+ }
- fd = perf_pmu__event_source_devices_fd();
- if (fd < 0)
- return;
+ if (to_read_types & (PERF_TOOL_PMU_TYPE_PE_CORE_MASK | PERF_TOOL_PMU_TYPE_PE_OTHER_MASK)) {
+ int fd = perf_pmu__event_source_devices_fd();
+ struct io_dir dir;
+ struct io_dirent64 *dent;
+ bool core_only = (to_read_types & PERF_TOOL_PMU_TYPE_PE_OTHER_MASK) == 0;
- dir = fdopendir(fd);
- if (!dir) {
- close(fd);
- return;
- }
+ if (fd < 0)
+ goto skip_pe_pmus;
- while ((dent = readdir(dir))) {
- if (!strcmp(dent->d_name, ".") || !strcmp(dent->d_name, ".."))
- continue;
- if (core_only && !is_pmu_core(dent->d_name))
- continue;
- /* add to static LIST_HEAD(core_pmus) or LIST_HEAD(other_pmus): */
- perf_pmu__find2(fd, dent->d_name);
- }
+ io_dir__init(&dir, fd);
+
+ while ((dent = io_dir__readdir(&dir)) != NULL) {
+ if (!strcmp(dent->d_name, ".") || !strcmp(dent->d_name, ".."))
+ continue;
+ if (core_only && !is_pmu_core(dent->d_name))
+ continue;
+ /* add to static LIST_HEAD(core_pmus) or LIST_HEAD(other_pmus): */
+ perf_pmu__find2(fd, dent->d_name);
+ }
- closedir(dir);
- if (list_empty(&core_pmus)) {
+ close(fd);
+ }
+skip_pe_pmus:
+ if ((to_read_types & PERF_TOOL_PMU_TYPE_PE_CORE_MASK) && list_empty(&core_pmus)) {
if (!perf_pmu__create_placeholder_core_pmu(&core_pmus))
pr_err("Failure to set up any core PMUs\n");
}
list_sort(NULL, &core_pmus, pmus_cmp);
- if (!core_only) {
- tool_pmu = perf_pmus__tool_pmu();
- list_add_tail(&tool_pmu->list, &other_pmus);
- perf_pmus__read_hwmon_pmus(&other_pmus);
+
+ if ((to_read_types & PERF_TOOL_PMU_TYPE_TOOL_MASK) != 0 &&
+ (read_pmu_types & PERF_TOOL_PMU_TYPE_TOOL_MASK) == 0) {
+ tool_pmu = tool_pmu__new();
+ if (tool_pmu)
+ list_add_tail(&tool_pmu->list, &other_pmus);
}
+ if ((to_read_types & PERF_TOOL_PMU_TYPE_HWMON_MASK) != 0 &&
+ (read_pmu_types & PERF_TOOL_PMU_TYPE_HWMON_MASK) == 0)
+ perf_pmus__read_hwmon_pmus(&other_pmus);
+
list_sort(NULL, &other_pmus, pmus_cmp);
- if (!list_empty(&core_pmus)) {
- read_sysfs_core_pmus = true;
- if (!core_only)
- read_sysfs_all_pmus = true;
- }
+
+ read_pmu_types |= to_read_types;
}
static struct perf_pmu *__perf_pmus__find_by_type(unsigned int type)
@@ -263,12 +295,21 @@ static struct perf_pmu *__perf_pmus__find_by_type(unsigned int type)
struct perf_pmu *perf_pmus__find_by_type(unsigned int type)
{
+ unsigned int to_read_pmus;
struct perf_pmu *pmu = __perf_pmus__find_by_type(type);
- if (pmu || read_sysfs_all_pmus)
+ if (pmu || (read_pmu_types == PERF_TOOL_PMU_TYPE_ALL_MASK))
return pmu;
- pmu_read_sysfs(/*core_only=*/false);
+ if (type >= PERF_PMU_TYPE_PE_START && type <= PERF_PMU_TYPE_PE_END) {
+ to_read_pmus = PERF_TOOL_PMU_TYPE_PE_CORE_MASK |
+ PERF_TOOL_PMU_TYPE_PE_OTHER_MASK;
+ } else if (type >= PERF_PMU_TYPE_HWMON_START && type <= PERF_PMU_TYPE_HWMON_END) {
+ to_read_pmus = PERF_TOOL_PMU_TYPE_HWMON_MASK;
+ } else {
+ to_read_pmus = PERF_TOOL_PMU_TYPE_TOOL_MASK;
+ }
+ pmu_read_sysfs(to_read_pmus);
pmu = __perf_pmus__find_by_type(type);
return pmu;
}
@@ -282,7 +323,7 @@ struct perf_pmu *perf_pmus__scan(struct perf_pmu *pmu)
bool use_core_pmus = !pmu || pmu->is_core;
if (!pmu) {
- pmu_read_sysfs(/*core_only=*/false);
+ pmu_read_sysfs(PERF_TOOL_PMU_TYPE_ALL_MASK);
pmu = list_prepare_entry(pmu, &core_pmus, list);
}
if (use_core_pmus) {
@@ -300,7 +341,7 @@ struct perf_pmu *perf_pmus__scan(struct perf_pmu *pmu)
struct perf_pmu *perf_pmus__scan_core(struct perf_pmu *pmu)
{
if (!pmu) {
- pmu_read_sysfs(/*core_only=*/true);
+ pmu_read_sysfs(PERF_TOOL_PMU_TYPE_PE_CORE_MASK);
return list_first_entry_or_null(&core_pmus, typeof(*pmu), list);
}
list_for_each_entry_continue(pmu, &core_pmus, list)
@@ -316,7 +357,7 @@ static struct perf_pmu *perf_pmus__scan_skip_duplicates(struct perf_pmu *pmu)
const char *last_pmu_name = (pmu && pmu->name) ? pmu->name : "";
if (!pmu) {
- pmu_read_sysfs(/*core_only=*/false);
+ pmu_read_sysfs(PERF_TOOL_PMU_TYPE_ALL_MASK);
pmu = list_prepare_entry(pmu, &core_pmus, list);
} else
last_pmu_name_len = pmu_name_len_no_suffix(pmu->name ?: "");
@@ -674,47 +715,28 @@ bool perf_pmus__supports_extended_type(void)
return perf_pmus__do_support_extended_type;
}
-char *perf_pmus__default_pmu_name(void)
-{
- int fd;
- DIR *dir;
- struct dirent *dent;
- char *result = NULL;
-
- if (!list_empty(&core_pmus))
- return strdup(list_first_entry(&core_pmus, struct perf_pmu, list)->name);
-
- fd = perf_pmu__event_source_devices_fd();
- if (fd < 0)
- return strdup("cpu");
-
- dir = fdopendir(fd);
- if (!dir) {
- close(fd);
- return strdup("cpu");
- }
-
- while ((dent = readdir(dir))) {
- if (!strcmp(dent->d_name, ".") || !strcmp(dent->d_name, ".."))
- continue;
- if (is_pmu_core(dent->d_name)) {
- result = strdup(dent->d_name);
- break;
- }
- }
-
- closedir(dir);
- return result ?: strdup("cpu");
-}
-
struct perf_pmu *evsel__find_pmu(const struct evsel *evsel)
{
struct perf_pmu *pmu = evsel->pmu;
+ bool legacy_core_type;
- if (!pmu) {
- pmu = perf_pmus__find_by_type(evsel->core.attr.type);
- ((struct evsel *)evsel)->pmu = pmu;
+ if (pmu)
+ return pmu;
+
+ pmu = perf_pmus__find_by_type(evsel->core.attr.type);
+ legacy_core_type =
+ evsel->core.attr.type == PERF_TYPE_HARDWARE ||
+ evsel->core.attr.type == PERF_TYPE_HW_CACHE;
+ if (!pmu && legacy_core_type) {
+ if (perf_pmus__supports_extended_type()) {
+ u32 type = evsel->core.attr.config >> PERF_PMU_TYPE_SHIFT;
+
+ pmu = perf_pmus__find_by_type(type);
+ } else {
+ pmu = perf_pmus__find_core_pmu();
+ }
}
+ ((struct evsel *)evsel)->pmu = pmu;
return pmu;
}
diff --git a/tools/perf/util/pmus.h b/tools/perf/util/pmus.h
index a0cb0eb2ff97..8def20e615ad 100644
--- a/tools/perf/util/pmus.h
+++ b/tools/perf/util/pmus.h
@@ -27,7 +27,6 @@ void perf_pmus__print_raw_pmu_events(const struct print_callbacks *print_cb, voi
bool perf_pmus__have_event(const char *pname, const char *name);
int perf_pmus__num_core_pmus(void);
bool perf_pmus__supports_extended_type(void);
-char *perf_pmus__default_pmu_name(void);
struct perf_pmu *perf_pmus__add_test_pmu(int test_sysfs_dirfd, const char *name);
struct perf_pmu *perf_pmus__add_test_hwmon_pmu(int hwmon_dir,
diff --git a/tools/perf/util/print-events.c b/tools/perf/util/print-events.c
index a786cbfb0ff5..83aaf7cda635 100644
--- a/tools/perf/util/print-events.c
+++ b/tools/perf/util/print-events.c
@@ -268,6 +268,7 @@ bool is_event_supported(u8 type, u64 config)
ret = evsel__open(evsel, NULL, tmap) >= 0;
}
+ evsel__close(evsel);
evsel__delete(evsel);
}
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index eaa0318e9b87..307ad6242a4e 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -1383,20 +1383,20 @@ int parse_line_range_desc(const char *arg, struct line_range *lr)
if (p == buf) {
semantic_error("No file/function name in '%s'.\n", p);
err = -EINVAL;
- goto err;
+ goto out;
}
*(p++) = '\0';
err = parse_line_num(&p, &lr->start, "start line");
if (err)
- goto err;
+ goto out;
if (*p == '+' || *p == '-') {
const char c = *(p++);
err = parse_line_num(&p, &lr->end, "end line");
if (err)
- goto err;
+ goto out;
if (c == '+') {
lr->end += lr->start;
@@ -1416,11 +1416,11 @@ int parse_line_range_desc(const char *arg, struct line_range *lr)
if (lr->start > lr->end) {
semantic_error("Start line must be smaller"
" than end line.\n");
- goto err;
+ goto out;
}
if (*p != '\0') {
semantic_error("Tailing with invalid str '%s'.\n", p);
- goto err;
+ goto out;
}
}
@@ -1431,7 +1431,7 @@ int parse_line_range_desc(const char *arg, struct line_range *lr)
lr->file = strdup_esq(p);
if (lr->file == NULL) {
err = -ENOMEM;
- goto err;
+ goto out;
}
}
if (*buf != '\0')
@@ -1439,7 +1439,7 @@ int parse_line_range_desc(const char *arg, struct line_range *lr)
if (!lr->function && !lr->file) {
semantic_error("Only '@*' is not allowed.\n");
err = -EINVAL;
- goto err;
+ goto out;
}
} else if (strpbrk_esq(buf, "/."))
lr->file = strdup_esq(buf);
@@ -1448,10 +1448,10 @@ int parse_line_range_desc(const char *arg, struct line_range *lr)
else { /* Invalid name */
semantic_error("'%s' is not a valid function name.\n", buf);
err = -EINVAL;
- goto err;
+ goto out;
}
-err:
+out:
free(buf);
return err;
}
@@ -2775,7 +2775,7 @@ int show_perf_probe_events(struct strfilter *filter)
static int get_new_event_name(char *buf, size_t len, const char *base,
struct strlist *namelist, bool ret_event,
- bool allow_suffix)
+ bool allow_suffix, bool not_C_symname)
{
int i, ret;
char *p, *nbase;
@@ -2786,10 +2786,24 @@ static int get_new_event_name(char *buf, size_t len, const char *base,
if (!nbase)
return -ENOMEM;
- /* Cut off the dot suffixes (e.g. .const, .isra) and version suffixes */
- p = strpbrk(nbase, ".@");
- if (p && p != nbase)
- *p = '\0';
+ if (not_C_symname) {
+ /* Replace non-alnum with '_' */
+ char *s, *d;
+
+ s = d = nbase;
+ do {
+ if (*s && !isalnum(*s)) {
+ if (d != nbase && *(d - 1) != '_')
+ *d++ = '_';
+ } else
+ *d++ = *s;
+ } while (*s++);
+ } else {
+ /* Cut off the dot suffixes (e.g. .const, .isra) and version suffixes */
+ p = strpbrk(nbase, ".@");
+ if (p && p != nbase)
+ *p = '\0';
+ }
/* Try no suffix number */
ret = e_snprintf(buf, len, "%s%s", nbase, ret_event ? "__return" : "");
@@ -2884,6 +2898,7 @@ static int probe_trace_event__set_name(struct probe_trace_event *tev,
bool allow_suffix)
{
const char *event, *group;
+ bool not_C_symname = true;
char buf[MAX_EVENT_NAME_LEN];
int ret;
@@ -2898,8 +2913,10 @@ static int probe_trace_event__set_name(struct probe_trace_event *tev,
(strncmp(pev->point.function, "0x", 2) != 0) &&
!strisglob(pev->point.function))
event = pev->point.function;
- else
+ else {
event = tev->point.realname;
+ not_C_symname = !is_known_C_lang(tev->lang);
+ }
}
if (pev->group && !pev->sdt)
group = pev->group;
@@ -2916,7 +2933,8 @@ static int probe_trace_event__set_name(struct probe_trace_event *tev,
/* Get an unused new event name */
ret = get_new_event_name(buf, sizeof(buf), event, namelist,
- tev->point.retprobe, allow_suffix);
+ tev->point.retprobe, allow_suffix,
+ not_C_symname);
if (ret < 0)
return ret;
diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h
index 61a5f4ff4e9c..71905ede0207 100644
--- a/tools/perf/util/probe-event.h
+++ b/tools/perf/util/probe-event.h
@@ -58,6 +58,7 @@ struct probe_trace_event {
char *group; /* Group name */
struct probe_trace_point point; /* Trace point */
int nargs; /* Number of args */
+ int lang; /* Dwarf language code */
bool uprobes; /* uprobes only */
struct probe_trace_arg *args; /* Arguments */
};
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index 7f2ee0cb43ca..3cc7c40f5097 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -35,6 +35,19 @@
/* Kprobe tracer basic type is up to u64 */
#define MAX_BASIC_TYPE_BITS 64
+bool is_known_C_lang(int lang)
+{
+ switch (lang) {
+ case DW_LANG_C89:
+ case DW_LANG_C:
+ case DW_LANG_C99:
+ case DW_LANG_C11:
+ return true;
+ default:
+ return false;
+ }
+}
+
/*
* Probe finder related functions
*/
@@ -960,6 +973,7 @@ static int probe_point_search_cb(Dwarf_Die *sp_die, void *data)
pr_debug("Matched function: %s [%lx]\n", dwarf_diename(sp_die),
(unsigned long)dwarf_dieoffset(sp_die));
pf->fname = fname;
+ pf->abstrace_dieoffset = dwarf_dieoffset(sp_die);
if (pp->line) { /* Function relative line */
dwarf_decl_line(sp_die, &pf->lno);
pf->lno += pp->line;
@@ -1166,6 +1180,8 @@ static int copy_variables_cb(Dwarf_Die *die_mem, void *data)
struct local_vars_finder *vf = data;
struct probe_finder *pf = vf->pf;
int tag;
+ Dwarf_Attribute attr;
+ Dwarf_Die var_die;
tag = dwarf_tag(die_mem);
if (tag == DW_TAG_formal_parameter ||
@@ -1183,10 +1199,22 @@ static int copy_variables_cb(Dwarf_Die *die_mem, void *data)
}
}
- if (dwarf_haspc(die_mem, vf->pf->addr))
+ if (dwarf_haspc(die_mem, vf->pf->addr)) {
+ /*
+ * when DW_AT_entry_pc contains instruction address,
+ * also check if the DW_AT_abstract_origin of die_mem
+ * points to correct die.
+ */
+ if (dwarf_attr(die_mem, DW_AT_abstract_origin, &attr)) {
+ dwarf_formref_die(&attr, &var_die);
+ if (pf->abstrace_dieoffset != dwarf_dieoffset(&var_die))
+ goto out;
+ }
return DIE_FIND_CB_CONTINUE;
- else
- return DIE_FIND_CB_SIBLING;
+ }
+
+out:
+ return DIE_FIND_CB_SIBLING;
}
static int expand_probe_args(Dwarf_Die *sc_die, struct probe_finder *pf,
@@ -1270,6 +1298,8 @@ static int add_probe_trace_event(Dwarf_Die *sc_die, struct probe_finder *pf)
goto end;
}
+ tev->lang = dwarf_srclang(dwarf_diecu(sc_die, &pf->cu_die, NULL, NULL));
+
pr_debug("Probe point found: %s+%lu\n", tev->point.symbol,
tev->point.offset);
diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h
index be7b46ea2460..ecd6d937c592 100644
--- a/tools/perf/util/probe-finder.h
+++ b/tools/perf/util/probe-finder.h
@@ -26,6 +26,9 @@ static inline int is_c_varname(const char *name)
#include "dwarf-aux.h"
#include "debuginfo.h"
+/* Check the language code is known C */
+bool is_known_C_lang(int lang);
+
/* Find probe_trace_events specified by perf_probe_event from debuginfo */
int debuginfo__find_trace_events(struct debuginfo *dbg,
struct perf_probe_event *pev,
@@ -60,6 +63,7 @@ struct probe_finder {
const char *fname; /* Real file name */
Dwarf_Die cu_die; /* Current CU */
Dwarf_Die sp_die;
+ Dwarf_Off abstrace_dieoffset;
struct intlist *lcache; /* Line cache for lazy match */
/* For variable searching */
@@ -103,6 +107,8 @@ struct line_finder {
int found;
};
+#else
+#define is_known_C_lang(lang) (false)
#endif /* HAVE_LIBDW_SUPPORT */
#endif /*_PROBE_FINDER_H */
diff --git a/tools/perf/util/pstack.c b/tools/perf/util/pstack.c
index a1d1e4ef6257..141ffa129c69 100644
--- a/tools/perf/util/pstack.c
+++ b/tools/perf/util/pstack.c
@@ -63,20 +63,6 @@ void pstack__push(struct pstack *pstack, void *key)
pstack->entries[pstack->top++] = key;
}
-void *pstack__pop(struct pstack *pstack)
-{
- void *ret;
-
- if (pstack->top == 0) {
- pr_err("%s: underflow!\n", __func__);
- return NULL;
- }
-
- ret = pstack->entries[--pstack->top];
- pstack->entries[pstack->top] = NULL;
- return ret;
-}
-
void *pstack__peek(struct pstack *pstack)
{
if (pstack->top == 0)
diff --git a/tools/perf/util/pstack.h b/tools/perf/util/pstack.h
index 8729b8be061d..712051b8130f 100644
--- a/tools/perf/util/pstack.h
+++ b/tools/perf/util/pstack.h
@@ -10,7 +10,6 @@ void pstack__delete(struct pstack *pstack);
bool pstack__empty(const struct pstack *pstack);
void pstack__remove(struct pstack *pstack, void *key);
void pstack__push(struct pstack *pstack, void *key);
-void *pstack__pop(struct pstack *pstack);
void *pstack__peek(struct pstack *pstack);
#endif /* _PERF_PSTACK_ */
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
index 2096cdbaa53b..f3c05da25b4a 100644
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -9,57 +9,20 @@
#include <event-parse.h>
#endif
#include <perf/mmap.h>
+#include "callchain.h"
#include "evlist.h"
#include "evsel.h"
#include "event.h"
#include "print_binary.h"
+#include "record.h"
+#include "strbuf.h"
#include "thread_map.h"
#include "trace-event.h"
#include "mmap.h"
-#include "util/bpf-filter.h"
-#include "util/env.h"
-#include "util/kvm-stat.h"
-#include "util/stat.h"
-#include "util/kwork.h"
#include "util/sample.h"
-#include "util/lock-contention.h"
#include <internal/lib.h>
-#include "../builtin.h"
-
-#if PY_MAJOR_VERSION < 3
-#define _PyUnicode_FromString(arg) \
- PyString_FromString(arg)
-#define _PyUnicode_AsString(arg) \
- PyString_AsString(arg)
-#define _PyUnicode_FromFormat(...) \
- PyString_FromFormat(__VA_ARGS__)
-#define _PyLong_FromLong(arg) \
- PyInt_FromLong(arg)
-#else
-
-#define _PyUnicode_FromString(arg) \
- PyUnicode_FromString(arg)
-#define _PyUnicode_FromFormat(...) \
- PyUnicode_FromFormat(__VA_ARGS__)
-#define _PyLong_FromLong(arg) \
- PyLong_FromLong(arg)
-#endif
-
-#ifndef Py_TYPE
-#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)
-#endif
-
-/* Define PyVarObject_HEAD_INIT for python 2.5 */
-#ifndef PyVarObject_HEAD_INIT
-# define PyVarObject_HEAD_INIT(type, size) PyObject_HEAD_INIT(type) size,
-#endif
-
-#if PY_MAJOR_VERSION < 3
-PyMODINIT_FUNC initperf(void);
-#else
PyMODINIT_FUNC PyInit_perf(void);
-#endif
#define member_def(type, member, ptype, help) \
{ #member, ptype, \
@@ -79,7 +42,7 @@ struct pyrf_event {
};
#define sample_members \
- sample_member_def(sample_ip, ip, T_ULONGLONG, "event type"), \
+ sample_member_def(sample_ip, ip, T_ULONGLONG, "event ip"), \
sample_member_def(sample_pid, pid, T_INT, "event pid"), \
sample_member_def(sample_tid, tid, T_INT, "event tid"), \
sample_member_def(sample_time, time, T_ULONGLONG, "event timestamp"), \
@@ -89,7 +52,7 @@ struct pyrf_event {
sample_member_def(sample_period, period, T_ULONGLONG, "event period"), \
sample_member_def(sample_cpu, cpu, T_UINT, "event cpu"),
-static char pyrf_mmap_event__doc[] = PyDoc_STR("perf mmap event object.");
+static const char pyrf_mmap_event__doc[] = PyDoc_STR("perf mmap event object.");
static PyMemberDef pyrf_mmap_event__members[] = {
sample_members
@@ -104,7 +67,7 @@ static PyMemberDef pyrf_mmap_event__members[] = {
{ .name = NULL, },
};
-static PyObject *pyrf_mmap_event__repr(struct pyrf_event *pevent)
+static PyObject *pyrf_mmap_event__repr(const struct pyrf_event *pevent)
{
PyObject *ret;
char *s;
@@ -117,7 +80,7 @@ static PyObject *pyrf_mmap_event__repr(struct pyrf_event *pevent)
pevent->event.mmap.pgoff, pevent->event.mmap.filename) < 0) {
ret = PyErr_NoMemory();
} else {
- ret = _PyUnicode_FromString(s);
+ ret = PyUnicode_FromString(s);
free(s);
}
return ret;
@@ -133,7 +96,7 @@ static PyTypeObject pyrf_mmap_event__type = {
.tp_repr = (reprfunc)pyrf_mmap_event__repr,
};
-static char pyrf_task_event__doc[] = PyDoc_STR("perf task (fork/exit) event object.");
+static const char pyrf_task_event__doc[] = PyDoc_STR("perf task (fork/exit) event object.");
static PyMemberDef pyrf_task_event__members[] = {
sample_members
@@ -146,9 +109,9 @@ static PyMemberDef pyrf_task_event__members[] = {
{ .name = NULL, },
};
-static PyObject *pyrf_task_event__repr(struct pyrf_event *pevent)
+static PyObject *pyrf_task_event__repr(const struct pyrf_event *pevent)
{
- return _PyUnicode_FromFormat("{ type: %s, pid: %u, ppid: %u, tid: %u, "
+ return PyUnicode_FromFormat("{ type: %s, pid: %u, ppid: %u, tid: %u, "
"ptid: %u, time: %" PRI_lu64 "}",
pevent->event.header.type == PERF_RECORD_FORK ? "fork" : "exit",
pevent->event.fork.pid,
@@ -168,7 +131,7 @@ static PyTypeObject pyrf_task_event__type = {
.tp_repr = (reprfunc)pyrf_task_event__repr,
};
-static char pyrf_comm_event__doc[] = PyDoc_STR("perf comm event object.");
+static const char pyrf_comm_event__doc[] = PyDoc_STR("perf comm event object.");
static PyMemberDef pyrf_comm_event__members[] = {
sample_members
@@ -179,9 +142,9 @@ static PyMemberDef pyrf_comm_event__members[] = {
{ .name = NULL, },
};
-static PyObject *pyrf_comm_event__repr(struct pyrf_event *pevent)
+static PyObject *pyrf_comm_event__repr(const struct pyrf_event *pevent)
{
- return _PyUnicode_FromFormat("{ type: comm, pid: %u, tid: %u, comm: %s }",
+ return PyUnicode_FromFormat("{ type: comm, pid: %u, tid: %u, comm: %s }",
pevent->event.comm.pid,
pevent->event.comm.tid,
pevent->event.comm.comm);
@@ -197,7 +160,7 @@ static PyTypeObject pyrf_comm_event__type = {
.tp_repr = (reprfunc)pyrf_comm_event__repr,
};
-static char pyrf_throttle_event__doc[] = PyDoc_STR("perf throttle event object.");
+static const char pyrf_throttle_event__doc[] = PyDoc_STR("perf throttle event object.");
static PyMemberDef pyrf_throttle_event__members[] = {
sample_members
@@ -208,11 +171,12 @@ static PyMemberDef pyrf_throttle_event__members[] = {
{ .name = NULL, },
};
-static PyObject *pyrf_throttle_event__repr(struct pyrf_event *pevent)
+static PyObject *pyrf_throttle_event__repr(const struct pyrf_event *pevent)
{
- struct perf_record_throttle *te = (struct perf_record_throttle *)(&pevent->event.header + 1);
+ const struct perf_record_throttle *te = (const struct perf_record_throttle *)
+ (&pevent->event.header + 1);
- return _PyUnicode_FromFormat("{ type: %sthrottle, time: %" PRI_lu64 ", id: %" PRI_lu64
+ return PyUnicode_FromFormat("{ type: %sthrottle, time: %" PRI_lu64 ", id: %" PRI_lu64
", stream_id: %" PRI_lu64 " }",
pevent->event.header.type == PERF_RECORD_THROTTLE ? "" : "un",
te->time, te->id, te->stream_id);
@@ -228,7 +192,7 @@ static PyTypeObject pyrf_throttle_event__type = {
.tp_repr = (reprfunc)pyrf_throttle_event__repr,
};
-static char pyrf_lost_event__doc[] = PyDoc_STR("perf lost event object.");
+static const char pyrf_lost_event__doc[] = PyDoc_STR("perf lost event object.");
static PyMemberDef pyrf_lost_event__members[] = {
sample_members
@@ -237,7 +201,7 @@ static PyMemberDef pyrf_lost_event__members[] = {
{ .name = NULL, },
};
-static PyObject *pyrf_lost_event__repr(struct pyrf_event *pevent)
+static PyObject *pyrf_lost_event__repr(const struct pyrf_event *pevent)
{
PyObject *ret;
char *s;
@@ -247,7 +211,7 @@ static PyObject *pyrf_lost_event__repr(struct pyrf_event *pevent)
pevent->event.lost.id, pevent->event.lost.lost) < 0) {
ret = PyErr_NoMemory();
} else {
- ret = _PyUnicode_FromString(s);
+ ret = PyUnicode_FromString(s);
free(s);
}
return ret;
@@ -263,7 +227,7 @@ static PyTypeObject pyrf_lost_event__type = {
.tp_repr = (reprfunc)pyrf_lost_event__repr,
};
-static char pyrf_read_event__doc[] = PyDoc_STR("perf read event object.");
+static const char pyrf_read_event__doc[] = PyDoc_STR("perf read event object.");
static PyMemberDef pyrf_read_event__members[] = {
sample_members
@@ -272,9 +236,9 @@ static PyMemberDef pyrf_read_event__members[] = {
{ .name = NULL, },
};
-static PyObject *pyrf_read_event__repr(struct pyrf_event *pevent)
+static PyObject *pyrf_read_event__repr(const struct pyrf_event *pevent)
{
- return _PyUnicode_FromFormat("{ type: read, pid: %u, tid: %u }",
+ return PyUnicode_FromFormat("{ type: read, pid: %u, tid: %u }",
pevent->event.read.pid,
pevent->event.read.tid);
/*
@@ -293,7 +257,7 @@ static PyTypeObject pyrf_read_event__type = {
.tp_repr = (reprfunc)pyrf_read_event__repr,
};
-static char pyrf_sample_event__doc[] = PyDoc_STR("perf sample event object.");
+static const char pyrf_sample_event__doc[] = PyDoc_STR("perf sample event object.");
static PyMemberDef pyrf_sample_event__members[] = {
sample_members
@@ -301,7 +265,13 @@ static PyMemberDef pyrf_sample_event__members[] = {
{ .name = NULL, },
};
-static PyObject *pyrf_sample_event__repr(struct pyrf_event *pevent)
+static void pyrf_sample_event__delete(struct pyrf_event *pevent)
+{
+ perf_sample__exit(&pevent->sample);
+ Py_TYPE(pevent)->tp_free((PyObject*)pevent);
+}
+
+static PyObject *pyrf_sample_event__repr(const struct pyrf_event *pevent)
{
PyObject *ret;
char *s;
@@ -309,20 +279,20 @@ static PyObject *pyrf_sample_event__repr(struct pyrf_event *pevent)
if (asprintf(&s, "{ type: sample }") < 0) {
ret = PyErr_NoMemory();
} else {
- ret = _PyUnicode_FromString(s);
+ ret = PyUnicode_FromString(s);
free(s);
}
return ret;
}
#ifdef HAVE_LIBTRACEEVENT
-static bool is_tracepoint(struct pyrf_event *pevent)
+static bool is_tracepoint(const struct pyrf_event *pevent)
{
return pevent->evsel->core.attr.type == PERF_TYPE_TRACEPOINT;
}
static PyObject*
-tracepoint_field(struct pyrf_event *pe, struct tep_format_field *field)
+tracepoint_field(const struct pyrf_event *pe, struct tep_format_field *field)
{
struct tep_handle *pevent = field->event->tep;
void *data = pe->sample.raw_data;
@@ -343,7 +313,7 @@ tracepoint_field(struct pyrf_event *pe, struct tep_format_field *field)
}
if (field->flags & TEP_FIELD_IS_STRING &&
is_printable_array(data + offset, len)) {
- ret = _PyUnicode_FromString((char *)data + offset);
+ ret = PyUnicode_FromString((char *)data + offset);
} else {
ret = PyByteArray_FromStringAndSize((const char *) data + offset, len);
field->flags &= ~TEP_FIELD_IS_STRING;
@@ -367,23 +337,14 @@ get_tracepoint_field(struct pyrf_event *pevent, PyObject *attr_name)
{
const char *str = _PyUnicode_AsString(PyObject_Str(attr_name));
struct evsel *evsel = pevent->evsel;
+ struct tep_event *tp_format = evsel__tp_format(evsel);
struct tep_format_field *field;
- if (!evsel->tp_format) {
- struct tep_event *tp_format;
-
- tp_format = trace_event__tp_format_id(evsel->core.attr.config);
- if (IS_ERR_OR_NULL(tp_format))
- return NULL;
-
- evsel->tp_format = tp_format;
- }
-
- field = tep_find_any_field(evsel->tp_format, str);
- if (!field)
+ if (IS_ERR_OR_NULL(tp_format))
return NULL;
- return tracepoint_field(pevent, field);
+ field = tep_find_any_field(tp_format, str);
+ return field ? tracepoint_field(pevent, field) : NULL;
}
#endif /* HAVE_LIBTRACEEVENT */
@@ -411,7 +372,7 @@ static PyTypeObject pyrf_sample_event__type = {
.tp_getattro = (getattrofunc) pyrf_sample_event__getattro,
};
-static char pyrf_context_switch_event__doc[] = PyDoc_STR("perf context_switch event object.");
+static const char pyrf_context_switch_event__doc[] = PyDoc_STR("perf context_switch event object.");
static PyMemberDef pyrf_context_switch_event__members[] = {
sample_members
@@ -421,7 +382,7 @@ static PyMemberDef pyrf_context_switch_event__members[] = {
{ .name = NULL, },
};
-static PyObject *pyrf_context_switch_event__repr(struct pyrf_event *pevent)
+static PyObject *pyrf_context_switch_event__repr(const struct pyrf_event *pevent)
{
PyObject *ret;
char *s;
@@ -432,7 +393,7 @@ static PyObject *pyrf_context_switch_event__repr(struct pyrf_event *pevent)
!!(pevent->event.header.misc & PERF_RECORD_MISC_SWITCH_OUT)) < 0) {
ret = PyErr_NoMemory();
} else {
- ret = _PyUnicode_FromString(s);
+ ret = PyUnicode_FromString(s);
free(s);
}
return ret;
@@ -459,6 +420,9 @@ static int pyrf_event__setup_types(void)
pyrf_sample_event__type.tp_new =
pyrf_context_switch_event__type.tp_new =
pyrf_throttle_event__type.tp_new = PyType_GenericNew;
+
+ pyrf_sample_event__type.tp_dealloc = (destructor)pyrf_sample_event__delete,
+
err = PyType_Ready(&pyrf_mmap_event__type);
if (err < 0)
goto out;
@@ -501,7 +465,7 @@ static PyTypeObject *pyrf_event__type[] = {
[PERF_RECORD_SWITCH_CPU_WIDE] = &pyrf_context_switch_event__type,
};
-static PyObject *pyrf_event__new(union perf_event *event)
+static PyObject *pyrf_event__new(const union perf_event *event)
{
struct pyrf_event *pevent;
PyTypeObject *ptype;
@@ -512,6 +476,11 @@ static PyObject *pyrf_event__new(union perf_event *event)
event->header.type == PERF_RECORD_SWITCH_CPU_WIDE))
return NULL;
+ // FIXME this better be dynamic or we need to parse everything
+ // before calling perf_mmap__consume(), including tracepoint fields.
+ if (sizeof(pevent->event) < event->header.size)
+ return NULL;
+
ptype = pyrf_event__type[event->header.type];
pevent = PyObject_New(struct pyrf_event, ptype);
if (pevent != NULL)
@@ -569,7 +538,7 @@ static PySequenceMethods pyrf_cpu_map__sequence_methods = {
.sq_item = pyrf_cpu_map__item,
};
-static char pyrf_cpu_map__doc[] = PyDoc_STR("cpu map object.");
+static const char pyrf_cpu_map__doc[] = PyDoc_STR("cpu map object.");
static PyTypeObject pyrf_cpu_map__type = {
PyVarObject_HEAD_INIT(NULL, 0)
@@ -638,7 +607,7 @@ static PySequenceMethods pyrf_thread_map__sequence_methods = {
.sq_item = pyrf_thread_map__item,
};
-static char pyrf_thread_map__doc[] = PyDoc_STR("thread map object.");
+static const char pyrf_thread_map__doc[] = PyDoc_STR("thread map object.");
static PyTypeObject pyrf_thread_map__type = {
PyVarObject_HEAD_INIT(NULL, 0)
@@ -812,6 +781,17 @@ static PyObject *pyrf_evsel__open(struct pyrf_evsel *pevsel,
return Py_None;
}
+static PyObject *pyrf_evsel__str(PyObject *self)
+{
+ struct pyrf_evsel *pevsel = (void *)self;
+ struct evsel *evsel = &pevsel->evsel;
+
+ if (!evsel->pmu)
+ return PyUnicode_FromFormat("evsel(%s)", evsel__name(evsel));
+
+ return PyUnicode_FromFormat("evsel(%s/%s/)", evsel->pmu->name, evsel__name(evsel));
+}
+
static PyMethodDef pyrf_evsel__methods[] = {
{
.ml_name = "open",
@@ -822,7 +802,29 @@ static PyMethodDef pyrf_evsel__methods[] = {
{ .ml_name = NULL, }
};
-static char pyrf_evsel__doc[] = PyDoc_STR("perf event selector list object.");
+#define evsel_member_def(member, ptype, help) \
+ { #member, ptype, \
+ offsetof(struct pyrf_evsel, evsel.member), \
+ 0, help }
+
+#define evsel_attr_member_def(member, ptype, help) \
+ { #member, ptype, \
+ offsetof(struct pyrf_evsel, evsel.core.attr.member), \
+ 0, help }
+
+static PyMemberDef pyrf_evsel__members[] = {
+ evsel_member_def(tracking, T_BOOL, "tracking event."),
+ evsel_attr_member_def(type, T_UINT, "attribute type."),
+ evsel_attr_member_def(size, T_UINT, "attribute size."),
+ evsel_attr_member_def(config, T_ULONGLONG, "attribute config."),
+ evsel_attr_member_def(sample_period, T_ULONGLONG, "attribute sample_period."),
+ evsel_attr_member_def(sample_type, T_ULONGLONG, "attribute sample_type."),
+ evsel_attr_member_def(read_format, T_ULONGLONG, "attribute read_format."),
+ evsel_attr_member_def(wakeup_events, T_UINT, "attribute wakeup_events."),
+ { .name = NULL, },
+};
+
+static const char pyrf_evsel__doc[] = PyDoc_STR("perf event selector list object.");
static PyTypeObject pyrf_evsel__type = {
PyVarObject_HEAD_INIT(NULL, 0)
@@ -831,8 +833,11 @@ static PyTypeObject pyrf_evsel__type = {
.tp_dealloc = (destructor)pyrf_evsel__delete,
.tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
.tp_doc = pyrf_evsel__doc,
+ .tp_members = pyrf_evsel__members,
.tp_methods = pyrf_evsel__methods,
.tp_init = (initproc)pyrf_evsel__init,
+ .tp_str = pyrf_evsel__str,
+ .tp_repr = pyrf_evsel__str,
};
static int pyrf_evsel__setup_types(void)
@@ -869,6 +874,16 @@ static void pyrf_evlist__delete(struct pyrf_evlist *pevlist)
Py_TYPE(pevlist)->tp_free((PyObject*)pevlist);
}
+static PyObject *pyrf_evlist__all_cpus(struct pyrf_evlist *pevlist)
+{
+ struct pyrf_cpu_map *pcpu_map = PyObject_New(struct pyrf_cpu_map, &pyrf_cpu_map__type);
+
+ if (pcpu_map)
+ pcpu_map->cpus = perf_cpu_map__get(pevlist->evlist.core.all_cpus);
+
+ return (PyObject *)pcpu_map;
+}
+
static PyObject *pyrf_evlist__mmap(struct pyrf_evlist *pevlist,
PyObject *args, PyObject *kwargs)
{
@@ -918,17 +933,8 @@ static PyObject *pyrf_evlist__get_pollfd(struct pyrf_evlist *pevlist,
for (i = 0; i < evlist->core.pollfd.nr; ++i) {
PyObject *file;
-#if PY_MAJOR_VERSION < 3
- FILE *fp = fdopen(evlist->core.pollfd.entries[i].fd, "r");
-
- if (fp == NULL)
- goto free_list;
-
- file = PyFile_FromFile(fp, "perf", "r", NULL);
-#else
file = PyFile_FromFd(evlist->core.pollfd.entries[i].fd, "perf", "r", -1,
NULL, NULL, NULL, 0);
-#endif
if (file == NULL)
goto free_list;
@@ -1011,20 +1017,22 @@ static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist,
evsel = evlist__event2evsel(evlist, event);
if (!evsel) {
+ Py_DECREF(pyevent);
Py_INCREF(Py_None);
return Py_None;
}
pevent->evsel = evsel;
- err = evsel__parse_sample(evsel, event, &pevent->sample);
-
- /* Consume the even only after we parsed it out. */
perf_mmap__consume(&md->core);
- if (err)
+ err = evsel__parse_sample(evsel, &pevent->event, &pevent->sample);
+ if (err) {
+ Py_DECREF(pyevent);
return PyErr_Format(PyExc_OSError,
"perf: can't parse sample, err=%d", err);
+ }
+
return pyevent;
}
end:
@@ -1046,8 +1054,53 @@ static PyObject *pyrf_evlist__open(struct pyrf_evlist *pevlist,
return Py_None;
}
+static PyObject *pyrf_evlist__config(struct pyrf_evlist *pevlist)
+{
+ struct record_opts opts = {
+ .sample_time = true,
+ .mmap_pages = UINT_MAX,
+ .user_freq = UINT_MAX,
+ .user_interval = ULLONG_MAX,
+ .freq = 4000,
+ .target = {
+ .uses_mmap = true,
+ .default_per_cpu = true,
+ },
+ .nr_threads_synthesize = 1,
+ .ctl_fd = -1,
+ .ctl_fd_ack = -1,
+ .no_buffering = true,
+ .no_inherit = true,
+ };
+ struct evlist *evlist = &pevlist->evlist;
+
+ evlist__config(evlist, &opts, &callchain_param);
+ Py_INCREF(Py_None);
+ return Py_None;
+}
+
+static PyObject *pyrf_evlist__disable(struct pyrf_evlist *pevlist)
+{
+ evlist__disable(&pevlist->evlist);
+ Py_INCREF(Py_None);
+ return Py_None;
+}
+
+static PyObject *pyrf_evlist__enable(struct pyrf_evlist *pevlist)
+{
+ evlist__enable(&pevlist->evlist);
+ Py_INCREF(Py_None);
+ return Py_None;
+}
+
static PyMethodDef pyrf_evlist__methods[] = {
{
+ .ml_name = "all_cpus",
+ .ml_meth = (PyCFunction)pyrf_evlist__all_cpus,
+ .ml_flags = METH_NOARGS,
+ .ml_doc = PyDoc_STR("CPU map union of all evsel CPU maps.")
+ },
+ {
.ml_name = "mmap",
.ml_meth = (PyCFunction)pyrf_evlist__mmap,
.ml_flags = METH_VARARGS | METH_KEYWORDS,
@@ -1083,6 +1136,24 @@ static PyMethodDef pyrf_evlist__methods[] = {
.ml_flags = METH_VARARGS | METH_KEYWORDS,
.ml_doc = PyDoc_STR("reads an event.")
},
+ {
+ .ml_name = "config",
+ .ml_meth = (PyCFunction)pyrf_evlist__config,
+ .ml_flags = METH_NOARGS,
+ .ml_doc = PyDoc_STR("Apply default record options to the evlist.")
+ },
+ {
+ .ml_name = "disable",
+ .ml_meth = (PyCFunction)pyrf_evlist__disable,
+ .ml_flags = METH_NOARGS,
+ .ml_doc = PyDoc_STR("Disable the evsels in the evlist.")
+ },
+ {
+ .ml_name = "enable",
+ .ml_meth = (PyCFunction)pyrf_evlist__enable,
+ .ml_flags = METH_NOARGS,
+ .ml_doc = PyDoc_STR("Enable the evsels in the evlist.")
+ },
{ .ml_name = NULL, }
};
@@ -1098,8 +1169,10 @@ static PyObject *pyrf_evlist__item(PyObject *obj, Py_ssize_t i)
struct pyrf_evlist *pevlist = (void *)obj;
struct evsel *pos;
- if (i >= pevlist->evlist.core.nr_entries)
+ if (i >= pevlist->evlist.core.nr_entries) {
+ PyErr_SetString(PyExc_IndexError, "Index out of range");
return NULL;
+ }
evlist__for_each_entry(&pevlist->evlist, pos) {
if (i-- == 0)
@@ -1109,12 +1182,36 @@ static PyObject *pyrf_evlist__item(PyObject *obj, Py_ssize_t i)
return Py_BuildValue("O", container_of(pos, struct pyrf_evsel, evsel));
}
+static PyObject *pyrf_evlist__str(PyObject *self)
+{
+ struct pyrf_evlist *pevlist = (void *)self;
+ struct evsel *pos;
+ struct strbuf sb = STRBUF_INIT;
+ bool first = true;
+ PyObject *result;
+
+ strbuf_addstr(&sb, "evlist([");
+ evlist__for_each_entry(&pevlist->evlist, pos) {
+ if (!first)
+ strbuf_addch(&sb, ',');
+ if (!pos->pmu)
+ strbuf_addstr(&sb, evsel__name(pos));
+ else
+ strbuf_addf(&sb, "%s/%s/", pos->pmu->name, evsel__name(pos));
+ first = false;
+ }
+ strbuf_addstr(&sb, "])");
+ result = PyUnicode_FromString(sb.buf);
+ strbuf_release(&sb);
+ return result;
+}
+
static PySequenceMethods pyrf_evlist__sequence_methods = {
.sq_length = pyrf_evlist__length,
.sq_item = pyrf_evlist__item,
};
-static char pyrf_evlist__doc[] = PyDoc_STR("perf event selector list object.");
+static const char pyrf_evlist__doc[] = PyDoc_STR("perf event selector list object.");
static PyTypeObject pyrf_evlist__type = {
PyVarObject_HEAD_INIT(NULL, 0)
@@ -1126,6 +1223,8 @@ static PyTypeObject pyrf_evlist__type = {
.tp_doc = pyrf_evlist__doc,
.tp_methods = pyrf_evlist__methods,
.tp_init = (initproc)pyrf_evlist__init,
+ .tp_repr = pyrf_evlist__str,
+ .tp_str = pyrf_evlist__str,
};
static int pyrf_evlist__setup_types(void)
@@ -1136,10 +1235,12 @@ static int pyrf_evlist__setup_types(void)
#define PERF_CONST(name) { #name, PERF_##name }
-static struct {
+struct perf_constant {
const char *name;
int value;
-} perf__constants[] = {
+};
+
+static const struct perf_constant perf__constants[] = {
PERF_CONST(TYPE_HARDWARE),
PERF_CONST(TYPE_SOFTWARE),
PERF_CONST(TYPE_TRACEPOINT),
@@ -1234,12 +1335,74 @@ static PyObject *pyrf__tracepoint(struct pyrf_evsel *pevsel,
tp_format = trace_event__tp_format(sys, name);
if (IS_ERR(tp_format))
- return _PyLong_FromLong(-1);
+ return PyLong_FromLong(-1);
- return _PyLong_FromLong(tp_format->id);
+ return PyLong_FromLong(tp_format->id);
#endif // HAVE_LIBTRACEEVENT
}
+static PyObject *pyrf_evsel__from_evsel(struct evsel *evsel)
+{
+ struct pyrf_evsel *pevsel = PyObject_New(struct pyrf_evsel, &pyrf_evsel__type);
+
+ if (!pevsel)
+ return NULL;
+
+ memset(&pevsel->evsel, 0, sizeof(pevsel->evsel));
+ evsel__init(&pevsel->evsel, &evsel->core.attr, evsel->core.idx);
+
+ evsel__clone(&pevsel->evsel, evsel);
+ if (evsel__is_group_leader(evsel))
+ evsel__set_leader(&pevsel->evsel, &pevsel->evsel);
+ return (PyObject *)pevsel;
+}
+
+static PyObject *pyrf_evlist__from_evlist(struct evlist *evlist)
+{
+ struct pyrf_evlist *pevlist = PyObject_New(struct pyrf_evlist, &pyrf_evlist__type);
+ struct evsel *pos;
+
+ if (!pevlist)
+ return NULL;
+
+ memset(&pevlist->evlist, 0, sizeof(pevlist->evlist));
+ evlist__init(&pevlist->evlist, evlist->core.all_cpus, evlist->core.threads);
+ evlist__for_each_entry(evlist, pos) {
+ struct pyrf_evsel *pevsel = (void *)pyrf_evsel__from_evsel(pos);
+
+ evlist__add(&pevlist->evlist, &pevsel->evsel);
+ }
+ return (PyObject *)pevlist;
+}
+
+static PyObject *pyrf__parse_events(PyObject *self, PyObject *args)
+{
+ const char *input;
+ struct evlist evlist = {};
+ struct parse_events_error err;
+ PyObject *result;
+ PyObject *pcpus = NULL, *pthreads = NULL;
+ struct perf_cpu_map *cpus;
+ struct perf_thread_map *threads;
+
+ if (!PyArg_ParseTuple(args, "s|OO", &input, &pcpus, &pthreads))
+ return NULL;
+
+ threads = pthreads ? ((struct pyrf_thread_map *)pthreads)->threads : NULL;
+ cpus = pcpus ? ((struct pyrf_cpu_map *)pcpus)->cpus : NULL;
+
+ parse_events_error__init(&err);
+ evlist__init(&evlist, cpus, threads);
+ if (parse_events(&evlist, input, &err)) {
+ parse_events_error__print(&err, input);
+ PyErr_SetFromErrno(PyExc_OSError);
+ return NULL;
+ }
+ result = pyrf_evlist__from_evlist(&evlist);
+ evlist__exit(&evlist);
+ return result;
+}
+
static PyMethodDef perf__methods[] = {
{
.ml_name = "tracepoint",
@@ -1247,21 +1410,20 @@ static PyMethodDef perf__methods[] = {
.ml_flags = METH_VARARGS | METH_KEYWORDS,
.ml_doc = PyDoc_STR("Get tracepoint config.")
},
+ {
+ .ml_name = "parse_events",
+ .ml_meth = (PyCFunction) pyrf__parse_events,
+ .ml_flags = METH_VARARGS,
+ .ml_doc = PyDoc_STR("Parse a string of events and return an evlist.")
+ },
{ .ml_name = NULL, }
};
-#if PY_MAJOR_VERSION < 3
-PyMODINIT_FUNC initperf(void)
-#else
PyMODINIT_FUNC PyInit_perf(void)
-#endif
{
PyObject *obj;
int i;
PyObject *dict;
-#if PY_MAJOR_VERSION < 3
- PyObject *module = Py_InitModule("perf", perf__methods);
-#else
static struct PyModuleDef moduledef = {
PyModuleDef_HEAD_INIT,
"perf", /* m_name */
@@ -1274,7 +1436,6 @@ PyMODINIT_FUNC PyInit_perf(void)
NULL, /* m_free */
};
PyObject *module = PyModule_Create(&moduledef);
-#endif
if (module == NULL ||
pyrf_event__setup_types() < 0 ||
@@ -1282,11 +1443,7 @@ PyMODINIT_FUNC PyInit_perf(void)
pyrf_evsel__setup_types() < 0 ||
pyrf_thread_map__setup_types() < 0 ||
pyrf_cpu_map__setup_types() < 0)
-#if PY_MAJOR_VERSION < 3
- return;
-#else
return module;
-#endif
/* The page_size is placed in util object. */
page_size = sysconf(_SC_PAGE_SIZE);
@@ -1335,7 +1492,7 @@ PyMODINIT_FUNC PyInit_perf(void)
goto error;
for (i = 0; perf__constants[i].name != NULL; i++) {
- obj = _PyLong_FromLong(perf__constants[i].value);
+ obj = PyLong_FromLong(perf__constants[i].value);
if (obj == NULL)
goto error;
PyDict_SetItemString(dict, perf__constants[i].name, obj);
@@ -1345,109 +1502,5 @@ PyMODINIT_FUNC PyInit_perf(void)
error:
if (PyErr_Occurred())
PyErr_SetString(PyExc_ImportError, "perf: Init failed!");
-#if PY_MAJOR_VERSION >= 3
return module;
-#endif
-}
-
-
-/* The following are stubs to avoid dragging in builtin-* objects. */
-/* TODO: move the code out of the builtin-* file into util. */
-
-unsigned int scripting_max_stack = PERF_MAX_STACK_DEPTH;
-
-#ifdef HAVE_KVM_STAT_SUPPORT
-bool kvm_entry_event(struct evsel *evsel __maybe_unused)
-{
- return false;
-}
-
-bool kvm_exit_event(struct evsel *evsel __maybe_unused)
-{
- return false;
-}
-
-bool exit_event_begin(struct evsel *evsel __maybe_unused,
- struct perf_sample *sample __maybe_unused,
- struct event_key *key __maybe_unused)
-{
- return false;
-}
-
-bool exit_event_end(struct evsel *evsel __maybe_unused,
- struct perf_sample *sample __maybe_unused,
- struct event_key *key __maybe_unused)
-{
- return false;
-}
-
-void exit_event_decode_key(struct perf_kvm_stat *kvm __maybe_unused,
- struct event_key *key __maybe_unused,
- char *decode __maybe_unused)
-{
-}
-#endif // HAVE_KVM_STAT_SUPPORT
-
-int find_scripts(char **scripts_array __maybe_unused, char **scripts_path_array __maybe_unused,
- int num __maybe_unused, int pathlen __maybe_unused)
-{
- return -1;
-}
-
-void perf_stat__set_no_csv_summary(int set __maybe_unused)
-{
-}
-
-void perf_stat__set_big_num(int set __maybe_unused)
-{
-}
-
-int script_spec_register(const char *spec __maybe_unused, struct scripting_ops *ops __maybe_unused)
-{
- return -1;
-}
-
-arch_syscalls__strerrno_t *arch_syscalls__strerrno_function(const char *arch __maybe_unused)
-{
- return NULL;
-}
-
-struct kwork_work *perf_kwork_add_work(struct perf_kwork *kwork __maybe_unused,
- struct kwork_class *class __maybe_unused,
- struct kwork_work *key __maybe_unused)
-{
- return NULL;
-}
-
-void script_fetch_insn(struct perf_sample *sample __maybe_unused,
- struct thread *thread __maybe_unused,
- struct machine *machine __maybe_unused)
-{
-}
-
-int perf_sample__sprintf_flags(u32 flags __maybe_unused, char *str __maybe_unused,
- size_t sz __maybe_unused)
-{
- return -1;
-}
-
-bool match_callstack_filter(struct machine *machine __maybe_unused, u64 *callstack __maybe_unused)
-{
- return false;
-}
-
-struct lock_stat *lock_stat_find(u64 addr __maybe_unused)
-{
- return NULL;
-}
-
-struct lock_stat *lock_stat_findnew(u64 addr __maybe_unused, const char *name __maybe_unused,
- int flags __maybe_unused)
-{
- return NULL;
-}
-
-int cmd_inject(int argc __maybe_unused, const char *argv[] __maybe_unused)
-{
- return -1;
}
diff --git a/tools/perf/util/rb_resort.h b/tools/perf/util/rb_resort.h
deleted file mode 100644
index d927a0d25052..000000000000
--- a/tools/perf/util/rb_resort.h
+++ /dev/null
@@ -1,146 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _PERF_RESORT_RB_H_
-#define _PERF_RESORT_RB_H_
-/*
- * Template for creating a class to resort an existing rb_tree according to
- * a new sort criteria, that must be present in the entries of the source
- * rb_tree.
- *
- * (c) 2016 Arnaldo Carvalho de Melo <acme@redhat.com>
- *
- * Quick example, resorting threads by its shortname:
- *
- * First define the prefix (threads) to be used for the functions and data
- * structures created, and provide an expression for the sorting, then the
- * fields to be present in each of the entries in the new, sorted, rb_tree.
- *
- * The body of the init function should collect the fields, maybe
- * pre-calculating them from multiple entries in the original 'entry' from
- * the rb_tree used as a source for the entries to be sorted:
-
-DEFINE_RB_RESORT_RB(threads, strcmp(a->thread->shortname,
- b->thread->shortname) < 0,
- struct thread *thread;
-)
-{
- entry->thread = rb_entry(nd, struct thread, rb_node);
-}
-
- * After this it is just a matter of instantiating it and iterating it,
- * for a few data structures with existing rb_trees, such as 'struct machine',
- * helpers are available to get the rb_root and the nr_entries:
-
- DECLARE_RESORT_RB_MACHINE_THREADS(threads, machine_ptr);
-
- * This will instantiate the new rb_tree and a cursor for it, that can be used as:
-
- struct rb_node *nd;
-
- resort_rb__for_each_entry(nd, threads) {
- struct thread *t = threads_entry;
- printf("%s: %d\n", t->shortname, t->tid);
- }
-
- * Then delete it:
-
- resort_rb__delete(threads);
-
- * The name of the data structures and functions will have a _sorted suffix
- * right before the method names, i.e. will look like:
- *
- * struct threads_sorted_entry {}
- * threads_sorted__insert()
- */
-
-#define DEFINE_RESORT_RB(__name, __comp, ...) \
-struct __name##_sorted_entry { \
- struct rb_node rb_node; \
- __VA_ARGS__ \
-}; \
-static void __name##_sorted__init_entry(struct rb_node *nd, \
- struct __name##_sorted_entry *entry); \
- \
-static int __name##_sorted__cmp(struct rb_node *nda, struct rb_node *ndb) \
-{ \
- struct __name##_sorted_entry *a, *b; \
- a = rb_entry(nda, struct __name##_sorted_entry, rb_node); \
- b = rb_entry(ndb, struct __name##_sorted_entry, rb_node); \
- return __comp; \
-} \
- \
-struct __name##_sorted { \
- struct rb_root entries; \
- struct __name##_sorted_entry nd[0]; \
-}; \
- \
-static void __name##_sorted__insert(struct __name##_sorted *sorted, \
- struct rb_node *sorted_nd) \
-{ \
- struct rb_node **p = &sorted->entries.rb_node, *parent = NULL; \
- while (*p != NULL) { \
- parent = *p; \
- if (__name##_sorted__cmp(sorted_nd, parent)) \
- p = &(*p)->rb_left; \
- else \
- p = &(*p)->rb_right; \
- } \
- rb_link_node(sorted_nd, parent, p); \
- rb_insert_color(sorted_nd, &sorted->entries); \
-} \
- \
-static void __name##_sorted__sort(struct __name##_sorted *sorted, \
- struct rb_root *entries) \
-{ \
- struct rb_node *nd; \
- unsigned int i = 0; \
- for (nd = rb_first(entries); nd; nd = rb_next(nd)) { \
- struct __name##_sorted_entry *snd = &sorted->nd[i++]; \
- __name##_sorted__init_entry(nd, snd); \
- __name##_sorted__insert(sorted, &snd->rb_node); \
- } \
-} \
- \
-static struct __name##_sorted *__name##_sorted__new(struct rb_root *entries, \
- int nr_entries) \
-{ \
- struct __name##_sorted *sorted; \
- sorted = malloc(sizeof(*sorted) + sizeof(sorted->nd[0]) * nr_entries); \
- if (sorted) { \
- sorted->entries = RB_ROOT; \
- __name##_sorted__sort(sorted, entries); \
- } \
- return sorted; \
-} \
- \
-static void __name##_sorted__delete(struct __name##_sorted *sorted) \
-{ \
- free(sorted); \
-} \
- \
-static void __name##_sorted__init_entry(struct rb_node *nd, \
- struct __name##_sorted_entry *entry)
-
-#define DECLARE_RESORT_RB(__name) \
-struct __name##_sorted_entry *__name##_entry; \
-struct __name##_sorted *__name = __name##_sorted__new
-
-#define resort_rb__for_each_entry(__nd, __name) \
- for (__nd = rb_first(&__name->entries); \
- __name##_entry = rb_entry(__nd, struct __name##_sorted_entry, \
- rb_node), __nd; \
- __nd = rb_next(__nd))
-
-#define resort_rb__delete(__name) \
- __name##_sorted__delete(__name), __name = NULL
-
-/*
- * Helpers for other classes that contains both an rbtree and the
- * number of entries in it:
- */
-
-/* For 'struct intlist' */
-#define DECLARE_RESORT_RB_INTLIST(__name, __ilist) \
- DECLARE_RESORT_RB(__name)(&__ilist->rblist.entries.rb_root, \
- __ilist->rblist.nr_entries)
-
-#endif /* _PERF_RESORT_RB_H_ */
diff --git a/tools/perf/util/s390-cpumsf.c b/tools/perf/util/s390-cpumsf.c
index 30638653ad2d..0ce52f0280b8 100644
--- a/tools/perf/util/s390-cpumsf.c
+++ b/tools/perf/util/s390-cpumsf.c
@@ -513,6 +513,7 @@ static bool s390_cpumsf_make_event(size_t pos,
.period = 1
};
union perf_event event;
+ int ret;
memset(&event, 0, sizeof(event));
if (basic->CL == 1) /* Native LPAR mode */
@@ -536,8 +537,9 @@ static bool s390_cpumsf_make_event(size_t pos,
pr_debug4("%s pos:%#zx ip:%#" PRIx64 " P:%d CL:%d pid:%d.%d cpumode:%d cpu:%d\n",
__func__, pos, sample.ip, basic->P, basic->CL, sample.pid,
sample.tid, sample.cpumode, sample.cpu);
- if (perf_session__deliver_synth_event(sfq->sf->session, &event,
- &sample)) {
+ ret = perf_session__deliver_synth_event(sfq->sf->session, &event, &sample);
+ perf_sample__exit(&sample);
+ if (ret) {
pr_err("s390 Auxiliary Trace: failed to deliver event\n");
return false;
}
diff --git a/tools/perf/util/sample.c b/tools/perf/util/sample.c
new file mode 100644
index 000000000000..605fee971f55
--- /dev/null
+++ b/tools/perf/util/sample.c
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include "sample.h"
+#include "debug.h"
+#include <linux/zalloc.h>
+#include <stdlib.h>
+#include <string.h>
+
+void perf_sample__init(struct perf_sample *sample, bool all)
+{
+ if (all) {
+ memset(sample, 0, sizeof(*sample));
+ } else {
+ sample->user_regs = NULL;
+ sample->intr_regs = NULL;
+ }
+}
+
+void perf_sample__exit(struct perf_sample *sample)
+{
+ free(sample->user_regs);
+ free(sample->intr_regs);
+}
+
+struct regs_dump *perf_sample__user_regs(struct perf_sample *sample)
+{
+ if (!sample->user_regs) {
+ sample->user_regs = zalloc(sizeof(*sample->user_regs));
+ if (!sample->user_regs)
+ pr_err("Failure to allocate sample user_regs");
+ }
+ return sample->user_regs;
+}
+
+
+struct regs_dump *perf_sample__intr_regs(struct perf_sample *sample)
+{
+ if (!sample->intr_regs) {
+ sample->intr_regs = zalloc(sizeof(*sample->intr_regs));
+ if (!sample->intr_regs)
+ pr_err("Failure to allocate sample intr_regs");
+ }
+ return sample->intr_regs;
+}
diff --git a/tools/perf/util/sample.h b/tools/perf/util/sample.h
index 70b2c3135555..0e96240052e9 100644
--- a/tools/perf/util/sample.h
+++ b/tools/perf/util/sample.h
@@ -67,7 +67,7 @@ struct aux_sample {
};
struct simd_flags {
- u64 arch:1, /* architecture (isa) */
+ u8 arch:1, /* architecture (isa) */
pred:2; /* predication */
};
@@ -114,14 +114,19 @@ struct perf_sample {
struct ip_callchain *callchain;
struct branch_stack *branch_stack;
u64 *branch_stack_cntr;
- struct regs_dump user_regs;
- struct regs_dump intr_regs;
+ struct regs_dump *user_regs;
+ struct regs_dump *intr_regs;
struct stack_dump user_stack;
struct sample_read read;
struct aux_sample aux_sample;
struct simd_flags simd_flags;
};
+void perf_sample__init(struct perf_sample *sample, bool all);
+void perf_sample__exit(struct perf_sample *sample);
+struct regs_dump *perf_sample__user_regs(struct perf_sample *sample);
+struct regs_dump *perf_sample__intr_regs(struct perf_sample *sample);
+
/*
* raw_data is always 4 bytes from an 8-byte boundary, so subtract 4 to get
* 8-byte alignment.
diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c
index 85b7f188f729..e261a57b87d4 100644
--- a/tools/perf/util/scripting-engines/trace-event-perl.c
+++ b/tools/perf/util/scripting-engines/trace-event-perl.c
@@ -344,7 +344,7 @@ static void perl_process_tracepoint(struct perf_sample *sample,
struct addr_location *al)
{
struct thread *thread = al->thread;
- struct tep_event *event = evsel->tp_format;
+ struct tep_event *event;
struct tep_format_field *field;
static char handler[256];
unsigned long long val;
@@ -362,6 +362,7 @@ static void perl_process_tracepoint(struct perf_sample *sample,
if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT)
return;
+ event = evsel__tp_format(evsel);
if (!event) {
pr_debug("ug! no event found for type %" PRIu64, (u64)evsel->core.attr.config);
return;
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index 8bdae066e839..520729e78965 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -58,22 +58,6 @@
#include "mem-events.h"
#include "util/perf_regs.h"
-#if PY_MAJOR_VERSION < 3
-#define _PyUnicode_FromString(arg) \
- PyString_FromString(arg)
-#define _PyUnicode_FromStringAndSize(arg1, arg2) \
- PyString_FromStringAndSize((arg1), (arg2))
-#define _PyBytes_FromStringAndSize(arg1, arg2) \
- PyString_FromStringAndSize((arg1), (arg2))
-#define _PyLong_FromLong(arg) \
- PyInt_FromLong(arg)
-#define _PyLong_AsLong(arg) \
- PyInt_AsLong(arg)
-#define _PyCapsule_New(arg1, arg2, arg3) \
- PyCObject_FromVoidPtr((arg1), (arg2))
-
-PyMODINIT_FUNC initperf_trace_context(void);
-#else
#define _PyUnicode_FromString(arg) \
PyUnicode_FromString(arg)
#define _PyUnicode_FromStringAndSize(arg1, arg2) \
@@ -88,7 +72,6 @@ PyMODINIT_FUNC initperf_trace_context(void);
PyCapsule_New((arg1), (arg2), (arg3))
PyMODINIT_FUNC PyInit_perf_trace_context(void);
-#endif
#ifdef HAVE_LIBTRACEEVENT
#define TRACE_EVENT_TYPE_MAX \
@@ -181,17 +164,7 @@ static int get_argument_count(PyObject *handler)
{
int arg_count = 0;
- /*
- * The attribute for the code object is func_code in Python 2,
- * whereas it is __code__ in Python 3.0+.
- */
- PyObject *code_obj = PyObject_GetAttrString(handler,
- "func_code");
- if (PyErr_Occurred()) {
- PyErr_Clear();
- code_obj = PyObject_GetAttrString(handler,
- "__code__");
- }
+ PyObject *code_obj = code_obj = PyObject_GetAttrString(handler, "__code__");
PyErr_Clear();
if (code_obj) {
PyObject *arg_count_obj = PyObject_GetAttrString(code_obj,
@@ -772,19 +745,30 @@ static int set_regs_in_dict(PyObject *dict,
const char *arch = perf_env__arch(evsel__env(evsel));
int size = (__sw_hweight64(attr->sample_regs_intr) * MAX_REG_SIZE) + 1;
- char *bf = malloc(size);
- if (!bf)
- return -1;
+ char *bf = NULL;
- regs_map(&sample->intr_regs, attr->sample_regs_intr, arch, bf, size);
+ if (sample->intr_regs) {
+ bf = malloc(size);
+ if (!bf)
+ return -1;
- pydict_set_item_string_decref(dict, "iregs",
- _PyUnicode_FromString(bf));
+ regs_map(sample->intr_regs, attr->sample_regs_intr, arch, bf, size);
- regs_map(&sample->user_regs, attr->sample_regs_user, arch, bf, size);
+ pydict_set_item_string_decref(dict, "iregs",
+ _PyUnicode_FromString(bf));
+ }
+
+ if (sample->user_regs) {
+ if (!bf) {
+ bf = malloc(size);
+ if (!bf)
+ return -1;
+ }
+ regs_map(sample->user_regs, attr->sample_regs_user, arch, bf, size);
- pydict_set_item_string_decref(dict, "uregs",
- _PyUnicode_FromString(bf));
+ pydict_set_item_string_decref(dict, "uregs",
+ _PyUnicode_FromString(bf));
+ }
free(bf);
return 0;
@@ -949,7 +933,7 @@ static void python_process_tracepoint(struct perf_sample *sample,
struct addr_location *al,
struct addr_location *addr_al)
{
- struct tep_event *event = evsel->tp_format;
+ struct tep_event *event;
PyObject *handler, *context, *t, *obj = NULL, *callchain;
PyObject *dict = NULL, *all_entries_dict = NULL;
static char handler_name[256];
@@ -966,6 +950,7 @@ static void python_process_tracepoint(struct perf_sample *sample,
bitmap_zero(events_defined, TRACE_EVENT_TYPE_MAX);
+ event = evsel__tp_format(evsel);
if (!event) {
snprintf(handler_name, sizeof(handler_name),
"ug! no event found for type %" PRIu64, (u64)evsel->core.attr.config);
@@ -1902,12 +1887,6 @@ static void set_table_handlers(struct tables *tables)
tables->synth_handler = get_handler("synth_data");
}
-#if PY_MAJOR_VERSION < 3
-static void _free_command_line(const char **command_line, int num)
-{
- free(command_line);
-}
-#else
static void _free_command_line(wchar_t **command_line, int num)
{
int i;
@@ -1915,7 +1894,6 @@ static void _free_command_line(wchar_t **command_line, int num)
PyMem_RawFree(command_line[i]);
free(command_line);
}
-#endif
/*
@@ -1925,30 +1903,12 @@ static int python_start_script(const char *script, int argc, const char **argv,
struct perf_session *session)
{
struct tables *tables = &tables_global;
-#if PY_MAJOR_VERSION < 3
- const char **command_line;
-#else
wchar_t **command_line;
-#endif
- /*
- * Use a non-const name variable to cope with python 2.6's
- * PyImport_AppendInittab prototype
- */
- char buf[PATH_MAX], name[19] = "perf_trace_context";
+ char buf[PATH_MAX];
int i, err = 0;
FILE *fp;
scripting_context->session = session;
-#if PY_MAJOR_VERSION < 3
- command_line = malloc((argc + 1) * sizeof(const char *));
- if (!command_line)
- return -1;
-
- command_line[0] = script;
- for (i = 1; i < argc + 1; i++)
- command_line[i] = argv[i - 1];
- PyImport_AppendInittab(name, initperf_trace_context);
-#else
command_line = malloc((argc + 1) * sizeof(wchar_t *));
if (!command_line)
return -1;
@@ -1956,15 +1916,10 @@ static int python_start_script(const char *script, int argc, const char **argv,
command_line[0] = Py_DecodeLocale(script, NULL);
for (i = 1; i < argc + 1; i++)
command_line[i] = Py_DecodeLocale(argv[i - 1], NULL);
- PyImport_AppendInittab(name, PyInit_perf_trace_context);
-#endif
+ PyImport_AppendInittab("perf_trace_context", PyInit_perf_trace_context);
Py_Initialize();
-#if PY_MAJOR_VERSION < 3
- PySys_SetArgv(argc + 1, (char **)command_line);
-#else
PySys_SetArgv(argc + 1, command_line);
-#endif
fp = fopen(script, "r");
if (!fp) {
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 507e6cba9545..60fb9997ea0d 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -37,6 +37,7 @@
#include "arch/common.h"
#include "units.h"
#include "annotate.h"
+#include "perf.h"
#include <internal/lib.h>
static int perf_session__deliver_event(struct perf_session *session,
@@ -949,7 +950,12 @@ static void regs__printf(const char *type, struct regs_dump *regs, const char *a
static void regs_user__printf(struct perf_sample *sample, const char *arch)
{
- struct regs_dump *user_regs = &sample->user_regs;
+ struct regs_dump *user_regs;
+
+ if (!sample->user_regs)
+ return;
+
+ user_regs = perf_sample__user_regs(sample);
if (user_regs->regs)
regs__printf("user", user_regs, arch);
@@ -957,7 +963,12 @@ static void regs_user__printf(struct perf_sample *sample, const char *arch)
static void regs_intr__printf(struct perf_sample *sample, const char *arch)
{
- struct regs_dump *intr_regs = &sample->intr_regs;
+ struct regs_dump *intr_regs;
+
+ if (!sample->intr_regs)
+ return;
+
+ intr_regs = perf_sample__intr_regs(sample);
if (intr_regs->regs)
regs__printf("intr", intr_regs, arch);
@@ -1350,25 +1361,30 @@ static int perf_session__deliver_event(struct perf_session *session,
const char *file_path)
{
struct perf_sample sample;
- int ret = evlist__parse_sample(session->evlist, event, &sample);
+ int ret;
+ perf_sample__init(&sample, /*all=*/false);
+ ret = evlist__parse_sample(session->evlist, event, &sample);
if (ret) {
pr_err("Can't parse sample, err = %d\n", ret);
- return ret;
+ goto out;
}
ret = auxtrace__process_event(session, event, &sample, tool);
if (ret < 0)
- return ret;
- if (ret > 0)
- return 0;
+ goto out;
+ if (ret > 0) {
+ ret = 0;
+ goto out;
+ }
ret = machines__deliver_event(&session->machines, session->evlist,
event, &sample, tool, file_offset, file_path);
if (dump_trace && sample.aux_sample.size)
auxtrace__dump_auxtrace_sample(session, &sample);
-
+out:
+ perf_sample__exit(&sample);
return ret;
}
@@ -1379,10 +1395,11 @@ static s64 perf_session__process_user_event(struct perf_session *session,
{
struct ordered_events *oe = &session->ordered_events;
const struct perf_tool *tool = session->tool;
- struct perf_sample sample = { .time = 0, };
+ struct perf_sample sample;
int fd = perf_data__fd(session->data);
int err;
+ perf_sample__init(&sample, /*all=*/true);
if (event->header.type != PERF_RECORD_COMPRESSED || perf_tool__compressed_is_stub(tool))
dump_event(session->evlist, event, file_offset, &sample, file_path);
@@ -1394,15 +1411,17 @@ static s64 perf_session__process_user_event(struct perf_session *session,
perf_session__set_id_hdr_size(session);
perf_session__set_comm_exec(session);
}
- return err;
+ break;
case PERF_RECORD_EVENT_UPDATE:
- return tool->event_update(tool, event, &session->evlist);
+ err = tool->event_update(tool, event, &session->evlist);
+ break;
case PERF_RECORD_HEADER_EVENT_TYPE:
/*
* Deprecated, but we need to handle it for sake
* of old data files create in pipe mode.
*/
- return 0;
+ err = 0;
+ break;
case PERF_RECORD_HEADER_TRACING_DATA:
/*
* Setup for reading amidst mmap, but only when we
@@ -1411,15 +1430,20 @@ static s64 perf_session__process_user_event(struct perf_session *session,
*/
if (!perf_data__is_pipe(session->data))
lseek(fd, file_offset, SEEK_SET);
- return tool->tracing_data(session, event);
+ err = tool->tracing_data(session, event);
+ break;
case PERF_RECORD_HEADER_BUILD_ID:
- return tool->build_id(session, event);
+ err = tool->build_id(session, event);
+ break;
case PERF_RECORD_FINISHED_ROUND:
- return tool->finished_round(tool, event, oe);
+ err = tool->finished_round(tool, event, oe);
+ break;
case PERF_RECORD_ID_INDEX:
- return tool->id_index(session, event);
+ err = tool->id_index(session, event);
+ break;
case PERF_RECORD_AUXTRACE_INFO:
- return tool->auxtrace_info(session, event);
+ err = tool->auxtrace_info(session, event);
+ break;
case PERF_RECORD_AUXTRACE:
/*
* Setup for reading amidst mmap, but only when we
@@ -1428,35 +1452,48 @@ static s64 perf_session__process_user_event(struct perf_session *session,
*/
if (!perf_data__is_pipe(session->data))
lseek(fd, file_offset + event->header.size, SEEK_SET);
- return tool->auxtrace(session, event);
+ err = tool->auxtrace(session, event);
+ break;
case PERF_RECORD_AUXTRACE_ERROR:
perf_session__auxtrace_error_inc(session, event);
- return tool->auxtrace_error(session, event);
+ err = tool->auxtrace_error(session, event);
+ break;
case PERF_RECORD_THREAD_MAP:
- return tool->thread_map(session, event);
+ err = tool->thread_map(session, event);
+ break;
case PERF_RECORD_CPU_MAP:
- return tool->cpu_map(session, event);
+ err = tool->cpu_map(session, event);
+ break;
case PERF_RECORD_STAT_CONFIG:
- return tool->stat_config(session, event);
+ err = tool->stat_config(session, event);
+ break;
case PERF_RECORD_STAT:
- return tool->stat(session, event);
+ err = tool->stat(session, event);
+ break;
case PERF_RECORD_STAT_ROUND:
- return tool->stat_round(session, event);
+ err = tool->stat_round(session, event);
+ break;
case PERF_RECORD_TIME_CONV:
session->time_conv = event->time_conv;
- return tool->time_conv(session, event);
+ err = tool->time_conv(session, event);
+ break;
case PERF_RECORD_HEADER_FEATURE:
- return tool->feature(session, event);
+ err = tool->feature(session, event);
+ break;
case PERF_RECORD_COMPRESSED:
err = tool->compressed(session, event, file_offset, file_path);
if (err)
dump_event(session->evlist, event, file_offset, &sample, file_path);
- return err;
+ break;
case PERF_RECORD_FINISHED_INIT:
- return tool->finished_init(session, event);
+ err = tool->finished_init(session, event);
+ break;
default:
- return -EINVAL;
+ err = -EINVAL;
+ break;
}
+ perf_sample__exit(&sample);
+ return err;
}
int perf_session__deliver_synth_event(struct perf_session *session,
@@ -2402,6 +2439,18 @@ bool perf_session__has_traces(struct perf_session *session, const char *msg)
return false;
}
+bool perf_session__has_switch_events(struct perf_session *session)
+{
+ struct evsel *evsel;
+
+ evlist__for_each_entry(session->evlist, evsel) {
+ if (evsel->core.attr.context_switch)
+ return true;
+ }
+
+ return false;
+}
+
int map__set_kallsyms_ref_reloc_sym(struct map *map, const char *symbol_name, u64 addr)
{
char *bracket;
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index bcf1bcf06959..db1c120a9e67 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -141,6 +141,7 @@ int perf_session__resolve_callchain(struct perf_session *session,
struct symbol **parent);
bool perf_session__has_traces(struct perf_session *session, const char *msg);
+bool perf_session__has_switch_events(struct perf_session *session);
void perf_event__attr_swap(struct perf_event_attr *attr);
diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py
index 649550e9b7aa..dd289d15acfd 100644
--- a/tools/perf/util/setup.py
+++ b/tools/perf/util/setup.py
@@ -3,6 +3,7 @@ from subprocess import Popen, PIPE
from re import sub
cc = getenv("CC")
+assert cc, "Environment variable CC not set"
# Check if CC has options, as is the case in yocto, where it uses CC="cc --sysroot..."
cc_tokens = cc.split()
@@ -12,8 +13,13 @@ if len(cc_tokens) > 1:
else:
cc_options = ""
+# ignore optional stderr could be None as it is set to PIPE to avoid that.
+# mypy: disable-error-code="union-attr"
cc_is_clang = b"clang version" in Popen([cc, "-v"], stderr=PIPE).stderr.readline()
-src_feature_tests = getenv('srctree') + '/tools/build/feature'
+
+srctree = getenv('srctree')
+assert srctree, "Environment variable srctree, for the Linux sources, not set"
+src_feature_tests = f'{srctree}/tools/build/feature'
def clang_has_option(option):
cc_output = Popen([cc, cc_options + option, path.join(src_feature_tests, "test-hello.c") ], stderr=PIPE).stderr.readlines()
@@ -71,7 +77,7 @@ else:
# The python headers have mixed code with declarations (decls after asserts, for instance)
cflags += [ "-Wno-declaration-after-statement" ]
-src_perf = getenv('srctree') + '/tools/perf'
+src_perf = f'{srctree}/tools/perf'
build_lib = getenv('PYTHON_EXTBUILD_LIB')
build_tmp = getenv('PYTHON_EXTBUILD_TMP')
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 9dd60c7869a2..c51049087e4e 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -892,6 +892,38 @@ struct sort_entry sort_cpu = {
.se_width_idx = HISTC_CPU,
};
+/* --sort parallelism */
+
+static int64_t
+sort__parallelism_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ return right->parallelism - left->parallelism;
+}
+
+static int hist_entry__parallelism_filter(struct hist_entry *he, int type, const void *arg)
+{
+ const unsigned long *parallelism_filter = arg;
+
+ if (type != HIST_FILTER__PARALLELISM)
+ return -1;
+
+ return test_bit(he->parallelism, parallelism_filter);
+}
+
+static int hist_entry__parallelism_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ return repsep_snprintf(bf, size, "%*d", width, he->parallelism);
+}
+
+struct sort_entry sort_parallelism = {
+ .se_header = "Parallelism",
+ .se_cmp = sort__parallelism_cmp,
+ .se_filter = hist_entry__parallelism_filter,
+ .se_snprintf = hist_entry__parallelism_snprintf,
+ .se_width_idx = HISTC_PARALLELISM,
+};
+
/* --sort cgroup_id */
static int64_t _sort__cgroup_dev_cmp(u64 left_dev, u64 right_dev)
@@ -1038,17 +1070,19 @@ static char *get_trace_output(struct hist_entry *he)
.data = he->raw_data,
.size = he->raw_size,
};
+ struct tep_event *tp_format;
evsel = hists_to_evsel(he->hists);
trace_seq_init(&seq);
- if (symbol_conf.raw_trace) {
- tep_print_fields(&seq, he->raw_data, he->raw_size,
- evsel->tp_format);
- } else {
- tep_print_event(evsel->tp_format->tep,
- &seq, &rec, "%s", TEP_PRINT_INFO);
+ tp_format = evsel__tp_format(evsel);
+ if (tp_format) {
+ if (symbol_conf.raw_trace)
+ tep_print_fields(&seq, he->raw_data, he->raw_size, tp_format);
+ else
+ tep_print_event(tp_format->tep, &seq, &rec, "%s", TEP_PRINT_INFO);
}
+
/*
* Trim the buffer, it starts at 4KB and we're not going to
* add anything more to this buffer.
@@ -2369,44 +2403,19 @@ sort__typeoff_sort(struct hist_entry *left, struct hist_entry *right)
return left->mem_type_off - right->mem_type_off;
}
-static void fill_member_name(char *buf, size_t sz, struct annotated_member *m,
- int offset, bool first)
-{
- struct annotated_member *child;
-
- if (list_empty(&m->children))
- return;
-
- list_for_each_entry(child, &m->children, node) {
- if (child->offset <= offset && offset < child->offset + child->size) {
- int len = 0;
-
- /* It can have anonymous struct/union members */
- if (child->var_name) {
- len = scnprintf(buf, sz, "%s%s",
- first ? "" : ".", child->var_name);
- first = false;
- }
-
- fill_member_name(buf + len, sz - len, child, offset, first);
- return;
- }
- }
-}
-
static int hist_entry__typeoff_snprintf(struct hist_entry *he, char *bf,
size_t size, unsigned int width __maybe_unused)
{
struct annotated_data_type *he_type = he->mem_type;
char buf[4096];
- buf[0] = '\0';
- if (list_empty(&he_type->self.children))
- snprintf(buf, sizeof(buf), "no field");
- else
- fill_member_name(buf, sizeof(buf), &he_type->self,
- he->mem_type_off, true);
- buf[4095] = '\0';
+ if (he_type == &unknown_type || he_type == &stackop_type ||
+ he_type == &canary_type)
+ return repsep_snprintf(bf, size, "%s", he_type->self.type_name);
+
+ if (!annotated_data_type__get_member_name(he_type, buf, sizeof(buf),
+ he->mem_type_off))
+ scnprintf(buf, sizeof(buf), "no field");
return repsep_snprintf(bf, size, "%s +%#x (%s)", he_type->self.type_name,
he->mem_type_off, buf);
@@ -2532,6 +2541,7 @@ static struct sort_dimension common_sort_dimensions[] = {
DIM(SORT_ANNOTATE_DATA_TYPE_OFFSET, "typeoff", sort_type_offset),
DIM(SORT_SYM_OFFSET, "symoff", sort_sym_offset),
DIM(SORT_ANNOTATE_DATA_TYPE_CACHELINE, "typecln", sort_type_cacheline),
+ DIM(SORT_PARALLELISM, "parallelism", sort_parallelism),
};
#undef DIM
@@ -2587,17 +2597,20 @@ struct hpp_dimension {
const char *name;
struct perf_hpp_fmt *fmt;
int taken;
+ int was_taken;
};
#define DIM(d, n) { .name = n, .fmt = &perf_hpp__format[d], }
static struct hpp_dimension hpp_sort_dimensions[] = {
DIM(PERF_HPP__OVERHEAD, "overhead"),
+ DIM(PERF_HPP__LATENCY, "latency"),
DIM(PERF_HPP__OVERHEAD_SYS, "overhead_sys"),
DIM(PERF_HPP__OVERHEAD_US, "overhead_us"),
DIM(PERF_HPP__OVERHEAD_GUEST_SYS, "overhead_guest_sys"),
DIM(PERF_HPP__OVERHEAD_GUEST_US, "overhead_guest_us"),
DIM(PERF_HPP__OVERHEAD_ACC, "overhead_children"),
+ DIM(PERF_HPP__LATENCY_ACC, "latency_children"),
DIM(PERF_HPP__SAMPLES, "sample"),
DIM(PERF_HPP__PERIOD, "period"),
DIM(PERF_HPP__WEIGHT1, "weight1"),
@@ -2733,6 +2746,7 @@ MK_SORT_ENTRY_CHK(thread)
MK_SORT_ENTRY_CHK(comm)
MK_SORT_ENTRY_CHK(dso)
MK_SORT_ENTRY_CHK(sym)
+MK_SORT_ENTRY_CHK(parallelism)
static bool __sort__hpp_equal(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b)
@@ -3293,9 +3307,8 @@ static int __dynamic_dimension__add(struct evsel *evsel,
static int add_evsel_fields(struct evsel *evsel, bool raw_trace, int level)
{
int ret;
- struct tep_format_field *field;
-
- field = evsel->tp_format->format.fields;
+ struct tep_event *tp_format = evsel__tp_format(evsel);
+ struct tep_format_field *field = tp_format ? tp_format->format.fields : NULL;
while (field) {
ret = __dynamic_dimension__add(evsel, field, raw_trace, level);
if (ret < 0)
@@ -3328,13 +3341,19 @@ static int add_all_matching_fields(struct evlist *evlist,
{
int ret = -ESRCH;
struct evsel *evsel;
- struct tep_format_field *field;
evlist__for_each_entry(evlist, evsel) {
+ struct tep_event *tp_format;
+ struct tep_format_field *field;
+
if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT)
continue;
- field = tep_find_any_field(evsel->tp_format, field_name);
+ tp_format = evsel__tp_format(evsel);
+ if (tp_format == NULL)
+ continue;
+
+ field = tep_find_any_field(tp_format, field_name);
if (field == NULL)
continue;
@@ -3416,7 +3435,9 @@ static int add_dynamic_entry(struct evlist *evlist, const char *tok,
if (!strcmp(field_name, "*")) {
ret = add_evsel_fields(evsel, raw_trace, level);
} else {
- struct tep_format_field *field = tep_find_any_field(evsel->tp_format, field_name);
+ struct tep_event *tp_format = evsel__tp_format(evsel);
+ struct tep_format_field *field =
+ tp_format ? tep_find_any_field(tp_format, field_name) : NULL;
if (field == NULL) {
pr_debug("Cannot find event field for %s.%s\n",
@@ -3468,6 +3489,7 @@ static int __hpp_dimension__add(struct hpp_dimension *hd,
return -1;
hd->taken = 1;
+ hd->was_taken = 1;
perf_hpp_list__register_sort_field(list, fmt);
return 0;
}
@@ -3502,10 +3524,15 @@ static int __hpp_dimension__add_output(struct perf_hpp_list *list,
return 0;
}
-int hpp_dimension__add_output(unsigned col)
+int hpp_dimension__add_output(unsigned col, bool implicit)
{
+ struct hpp_dimension *hd;
+
BUG_ON(col >= PERF_HPP__MAX_INDEX);
- return __hpp_dimension__add_output(&perf_hpp_list, &hpp_sort_dimensions[col]);
+ hd = &hpp_sort_dimensions[col];
+ if (implicit && !hd->was_taken)
+ return 0;
+ return __hpp_dimension__add_output(&perf_hpp_list, hd);
}
int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
@@ -3630,6 +3657,34 @@ int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
return -ESRCH;
}
+/* This should match with sort_dimension__add() above */
+static bool is_hpp_sort_key(const char *key)
+{
+ unsigned i;
+
+ for (i = 0; i < ARRAY_SIZE(arch_specific_sort_keys); i++) {
+ if (!strcmp(arch_specific_sort_keys[i], key) &&
+ !arch_support_sort_key(key)) {
+ return false;
+ }
+ }
+
+ for (i = 0; i < ARRAY_SIZE(common_sort_dimensions); i++) {
+ struct sort_dimension *sd = &common_sort_dimensions[i];
+
+ if (sd->name && !strncasecmp(key, sd->name, strlen(key)))
+ return false;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(hpp_sort_dimensions); i++) {
+ struct hpp_dimension *hd = &hpp_sort_dimensions[i];
+
+ if (!strncasecmp(key, hd->name, strlen(key)))
+ return true;
+ }
+ return false;
+}
+
static int setup_sort_list(struct perf_hpp_list *list, char *str,
struct evlist *evlist)
{
@@ -3637,7 +3692,9 @@ static int setup_sort_list(struct perf_hpp_list *list, char *str,
int ret = 0;
int level = 0;
int next_level = 1;
+ int prev_level = 0;
bool in_group = false;
+ bool prev_was_hpp = false;
do {
tok = str;
@@ -3658,6 +3715,19 @@ static int setup_sort_list(struct perf_hpp_list *list, char *str,
}
if (*tok) {
+ if (is_hpp_sort_key(tok)) {
+ /* keep output (hpp) sort keys in the same level */
+ if (prev_was_hpp) {
+ bool next_same = (level == next_level);
+
+ level = prev_level;
+ next_level = next_same ? level : level+1;
+ }
+ prev_was_hpp = true;
+ } else {
+ prev_was_hpp = false;
+ }
+
ret = sort_dimension__add(list, tok, evlist, level);
if (ret == -EINVAL) {
if (!cacheline_size() && !strncasecmp(tok, "dcacheline", strlen(tok)))
@@ -3669,6 +3739,7 @@ static int setup_sort_list(struct perf_hpp_list *list, char *str,
ui__error("Unknown --sort key: `%s'", tok);
break;
}
+ prev_level = level;
}
level = next_level;
@@ -3764,10 +3835,24 @@ static char *setup_overhead(char *keys)
if (sort__mode == SORT_MODE__DIFF)
return keys;
- keys = prefix_if_not_in("overhead", keys);
-
- if (symbol_conf.cumulate_callchain)
- keys = prefix_if_not_in("overhead_children", keys);
+ if (symbol_conf.prefer_latency) {
+ keys = prefix_if_not_in("overhead", keys);
+ keys = prefix_if_not_in("latency", keys);
+ if (symbol_conf.cumulate_callchain) {
+ keys = prefix_if_not_in("overhead_children", keys);
+ keys = prefix_if_not_in("latency_children", keys);
+ }
+ } else if (!keys || (!strstr(keys, "overhead") &&
+ !strstr(keys, "latency"))) {
+ if (symbol_conf.enable_latency)
+ keys = prefix_if_not_in("latency", keys);
+ keys = prefix_if_not_in("overhead", keys);
+ if (symbol_conf.cumulate_callchain) {
+ if (symbol_conf.enable_latency)
+ keys = prefix_if_not_in("latency_children", keys);
+ keys = prefix_if_not_in("overhead_children", keys);
+ }
+ }
return keys;
}
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index a8572574e168..180d36a2bea3 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -72,6 +72,7 @@ enum sort_type {
SORT_ANNOTATE_DATA_TYPE_OFFSET,
SORT_SYM_OFFSET,
SORT_ANNOTATE_DATA_TYPE_CACHELINE,
+ SORT_PARALLELISM,
/* branch stack specific sort keys */
__SORT_BRANCH_STACK,
@@ -140,7 +141,7 @@ int report_parse_ignore_callees_opt(const struct option *opt, const char *arg, i
bool is_strict_order(const char *order);
-int hpp_dimension__add_output(unsigned col);
+int hpp_dimension__add_output(unsigned col, bool implicit);
void reset_dimensions(void);
int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
struct evlist *evlist,
diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index a5d72f4a515c..e852ac0d9847 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -115,14 +115,29 @@ static void print_running_csv(struct perf_stat_config *config, u64 run, u64 ena)
config->csv_sep, run, config->csv_sep, enabled_percent);
}
struct outstate {
- FILE *fh;
+ /* Std mode: insert a newline before the next metric */
bool newline;
+ /* JSON mode: track need for comma for a previous field or not */
bool first;
- const char *prefix;
- int nfields;
- int aggr_nr;
+ /* Num CSV separators remaining to pad out when not all fields are printed */
+ int csv_col_pad;
+
+ /*
+ * The following don't track state across fields, but are here as a shortcut to
+ * pass data to the print functions. The alternative would be to update the
+ * function signatures of the entire print stack to pass them through.
+ */
+ /* Place to output to */
+ FILE * const fh;
+ /* Lines are timestamped in --interval-print mode */
+ char timestamp[64];
+ /* Num items aggregated in current line. See struct perf_stat_aggr.nr */
+ int aggr_nr;
+ /* Core/socket/die etc ID for the current line */
struct aggr_cpu_id id;
+ /* Event for current line */
struct evsel *evsel;
+ /* Cgroup for current line */
struct cgroup *cgrp;
};
@@ -419,8 +434,8 @@ static inline void __new_line_std_csv(struct perf_stat_config *config,
struct outstate *os)
{
fputc('\n', os->fh);
- if (os->prefix)
- fputs(os->prefix, os->fh);
+ if (config->interval)
+ fputs(os->timestamp, os->fh);
aggr_printout(config, os, os->evsel, os->id, os->aggr_nr);
}
@@ -472,7 +487,7 @@ static void new_line_csv(struct perf_stat_config *config, void *ctx)
int i;
__new_line_std_csv(config, os);
- for (i = 0; i < os->nfields; i++)
+ for (i = 0; i < os->csv_col_pad; i++)
fputs(config->csv_sep, os->fh);
}
@@ -523,8 +538,8 @@ static void new_line_json(struct perf_stat_config *config, void *ctx)
fputs("\n{", os->fh);
os->first = true;
- if (os->prefix)
- json_out(os, "%s", os->prefix);
+ if (config->interval)
+ json_out(os, "%s", os->timestamp);
aggr_printout(config, os, os->evsel, os->id, os->aggr_nr);
}
@@ -549,12 +564,12 @@ static void print_metricgroup_header_csv(struct perf_stat_config *config,
if (!metricgroup_name) {
/* Leave space for running and enabling */
- for (i = 0; i < os->nfields - 2; i++)
+ for (i = 0; i < os->csv_col_pad - 2; i++)
fputs(config->csv_sep, os->fh);
return;
}
- for (i = 0; i < os->nfields; i++)
+ for (i = 0; i < os->csv_col_pad; i++)
fputs(config->csv_sep, os->fh);
fprintf(config->output, "%s", metricgroup_name);
new_line_csv(config, ctx);
@@ -673,11 +688,6 @@ static void print_metric_only_json(struct perf_stat_config *config __maybe_unuse
json_out(os, "\"%s\" : \"%s\"", unit, vals);
}
-static void new_line_metric(struct perf_stat_config *config __maybe_unused,
- void *ctx __maybe_unused)
-{
-}
-
static void print_metric_header(struct perf_stat_config *config,
void *ctx,
enum metric_threshold_classify thresh __maybe_unused,
@@ -839,22 +849,23 @@ static void printout(struct perf_stat_config *config, struct outstate *os,
if (config->csv_output) {
pm = config->metric_only ? print_metric_only_csv : print_metric_csv;
- nl = config->metric_only ? new_line_metric : new_line_csv;
+ nl = config->metric_only ? NULL : new_line_csv;
pmh = print_metricgroup_header_csv;
- os->nfields = 4 + (counter->cgrp ? 1 : 0);
+ os->csv_col_pad = 4 + (counter->cgrp ? 1 : 0);
} else if (config->json_output) {
pm = config->metric_only ? print_metric_only_json : print_metric_json;
- nl = config->metric_only ? new_line_metric : new_line_json;
+ nl = config->metric_only ? NULL : new_line_json;
pmh = print_metricgroup_header_json;
} else {
pm = config->metric_only ? print_metric_only : print_metric_std;
- nl = config->metric_only ? new_line_metric : new_line_std;
+ nl = config->metric_only ? NULL : new_line_std;
pmh = print_metricgroup_header_std;
}
if (run == 0 || ena == 0 || counter->counts->scaled == -1) {
if (config->metric_only) {
- pm(config, os, METRIC_THRESHOLD_UNKNOWN, "", "", 0);
+ pm(config, os, METRIC_THRESHOLD_UNKNOWN, /*format=*/NULL,
+ /*unit=*/NULL, /*val=*/0);
return;
}
@@ -909,7 +920,7 @@ static void printout(struct perf_stat_config *config, struct outstate *os,
perf_stat__print_shadow_stats(config, counter, uval, aggr_idx,
&out, &config->metric_events);
} else {
- pm(config, os, METRIC_THRESHOLD_UNKNOWN, /*format=*/NULL, /*unit=*/"", /*val=*/0);
+ pm(config, os, METRIC_THRESHOLD_UNKNOWN, /*format=*/NULL, /*unit=*/NULL, /*val=*/0);
}
if (!config->metric_only) {
@@ -918,12 +929,16 @@ static void printout(struct perf_stat_config *config, struct outstate *os,
}
}
-static void uniquify_event_name(struct evsel *counter)
+static void evsel__uniquify_counter(struct evsel *counter)
{
const char *name, *pmu_name;
char *new_name, *config;
int ret;
+ /* No uniquification necessary. */
+ if (!counter->needs_uniquify)
+ return;
+
/* The evsel was already uniquified. */
if (counter->uniquified_name)
return;
@@ -931,19 +946,6 @@ static void uniquify_event_name(struct evsel *counter)
/* Avoid checking to uniquify twice. */
counter->uniquified_name = true;
- /* The evsel has a "name=" config term or is from libpfm. */
- if (counter->use_config_name || counter->is_libpfm_event)
- return;
-
- /* Legacy no PMU event, don't uniquify. */
- if (!counter->pmu ||
- (counter->pmu->type < PERF_TYPE_MAX && counter->pmu->type != PERF_TYPE_RAW))
- return;
-
- /* A sysfs or json event replacing a legacy event, don't uniquify. */
- if (counter->pmu->is_core && counter->alternate_hw_config != PERF_COUNT_HW_MAX)
- return;
-
name = evsel__name(counter);
pmu_name = counter->pmu->name;
/* Already prefixed by the PMU name. */
@@ -982,17 +984,6 @@ static void uniquify_event_name(struct evsel *counter)
}
}
-static bool hybrid_uniquify(struct evsel *evsel, struct perf_stat_config *config)
-{
- return evsel__is_hybrid(evsel) && !config->hybrid_merge;
-}
-
-static void uniquify_counter(struct perf_stat_config *config, struct evsel *counter)
-{
- if (config->aggr_mode == AGGR_NONE || hybrid_uniquify(counter, config))
- uniquify_event_name(counter);
-}
-
/**
* should_skip_zero_count() - Check if the event should print 0 values.
* @config: The perf stat configuration (including aggregation mode).
@@ -1078,7 +1069,7 @@ static void print_counter_aggrdata(struct perf_stat_config *config,
if (counter->merged_stat)
return;
- uniquify_counter(config, counter);
+ evsel__uniquify_counter(counter);
val = aggr->counts.val;
ena = aggr->counts.ena;
@@ -1095,13 +1086,13 @@ static void print_counter_aggrdata(struct perf_stat_config *config,
os->first = true;
fputc('{', output);
}
- if (os->prefix) {
+ if (config->interval) {
if (config->json_output)
- json_out(os, "%s", os->prefix);
+ json_out(os, "%s", os->timestamp);
else
- fprintf(output, "%s", os->prefix);
+ fprintf(output, "%s", os->timestamp);
} else if (config->summary && config->csv_output &&
- !config->no_csv_summary && !config->interval)
+ !config->no_csv_summary)
fprintf(output, "%s%s", "summary", config->csv_sep);
}
@@ -1128,11 +1119,11 @@ static void print_metric_begin(struct perf_stat_config *config,
if (config->json_output)
fputc('{', config->output);
- if (os->prefix) {
+ if (config->interval) {
if (config->json_output)
- json_out(os, "%s", os->prefix);
+ json_out(os, "%s", os->timestamp);
else
- fprintf(config->output, "%s", os->prefix);
+ fprintf(config->output, "%s", os->timestamp);
}
evsel = evlist__first(evlist);
id = config->aggr_map->map[aggr_idx];
@@ -1318,7 +1309,7 @@ static void print_metric_headers(struct perf_stat_config *config,
struct perf_stat_output_ctx out = {
.ctx = &os,
.print_metric = print_metric_header,
- .new_line = new_line_metric,
+ .new_line = NULL,
.force_header = true,
};
@@ -1353,20 +1344,20 @@ static void print_metric_headers(struct perf_stat_config *config,
fputc('\n', config->output);
}
-static void prepare_interval(struct perf_stat_config *config,
- char *prefix, size_t len, struct timespec *ts)
+static void prepare_timestamp(struct perf_stat_config *config,
+ struct outstate *os, struct timespec *ts)
{
if (config->iostat_run)
return;
if (config->json_output)
- scnprintf(prefix, len, "\"interval\" : %lu.%09lu",
+ scnprintf(os->timestamp, sizeof(os->timestamp), "\"interval\" : %lu.%09lu",
(unsigned long) ts->tv_sec, ts->tv_nsec);
else if (config->csv_output)
- scnprintf(prefix, len, "%lu.%09lu%s",
+ scnprintf(os->timestamp, sizeof(os->timestamp), "%lu.%09lu%s",
(unsigned long) ts->tv_sec, ts->tv_nsec, config->csv_sep);
else
- scnprintf(prefix, len, "%6lu.%09lu ",
+ scnprintf(os->timestamp, sizeof(os->timestamp), "%6lu.%09lu ",
(unsigned long) ts->tv_sec, ts->tv_nsec);
}
@@ -1659,7 +1650,8 @@ static void print_cgroup_counter(struct perf_stat_config *config, struct evlist
print_metric_end(config, os);
}
-static void disable_uniquify(struct evlist *evlist)
+/* Should uniquify be disabled for the evlist? */
+static bool evlist__disable_uniquify(const struct evlist *evlist)
{
struct evsel *counter;
struct perf_pmu *last_pmu = NULL;
@@ -1668,20 +1660,84 @@ static void disable_uniquify(struct evlist *evlist)
evlist__for_each_entry(evlist, counter) {
/* If PMUs vary then uniquify can be useful. */
if (!first && counter->pmu != last_pmu)
- return;
+ return false;
first = false;
if (counter->pmu) {
/* Allow uniquify for uncore PMUs. */
if (!counter->pmu->is_core)
- return;
+ return false;
/* Keep hybrid event names uniquified for clarity. */
if (perf_pmus__num_core_pmus() > 1)
- return;
+ return false;
+ }
+ }
+ return true;
+}
+
+static void evsel__set_needs_uniquify(struct evsel *counter, const struct perf_stat_config *config)
+{
+ struct evsel *evsel;
+
+ if (counter->merged_stat) {
+ /* Counter won't be shown. */
+ return;
+ }
+
+ if (counter->use_config_name || counter->is_libpfm_event) {
+ /* Original name will be used. */
+ return;
+ }
+
+ if (!config->hybrid_merge && evsel__is_hybrid(counter)) {
+ /* Unique hybrid counters necessary. */
+ counter->needs_uniquify = true;
+ return;
+ }
+
+ if (counter->core.attr.type < PERF_TYPE_MAX && counter->core.attr.type != PERF_TYPE_RAW) {
+ /* Legacy event, don't uniquify. */
+ return;
+ }
+
+ if (counter->pmu && counter->pmu->is_core &&
+ counter->alternate_hw_config != PERF_COUNT_HW_MAX) {
+ /* A sysfs or json event replacing a legacy event, don't uniquify. */
+ return;
+ }
+
+ if (config->aggr_mode == AGGR_NONE) {
+ /* Always unique with no aggregation. */
+ counter->needs_uniquify = true;
+ return;
+ }
+
+ /*
+ * Do other non-merged events in the evlist have the same name? If so
+ * uniquify is necessary.
+ */
+ evlist__for_each_entry(counter->evlist, evsel) {
+ if (evsel == counter || evsel->merged_stat)
+ continue;
+
+ if (evsel__name_is(counter, evsel__name(evsel))) {
+ counter->needs_uniquify = true;
+ return;
}
}
- evlist__for_each_entry_continue(evlist, counter) {
- counter->uniquified_name = true;
+}
+
+static void evlist__set_needs_uniquify(struct evlist *evlist, const struct perf_stat_config *config)
+{
+ struct evsel *counter;
+
+ if (evlist__disable_uniquify(evlist)) {
+ evlist__for_each_entry(evlist, counter)
+ counter->uniquified_name = true;
+ return;
}
+
+ evlist__for_each_entry(evlist, counter)
+ evsel__set_needs_uniquify(counter, config);
}
void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *config,
@@ -1689,23 +1745,19 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf
int argc, const char **argv)
{
bool metric_only = config->metric_only;
- int interval = config->interval;
struct evsel *counter;
- char buf[64];
struct outstate os = {
.fh = config->output,
.first = true,
};
- disable_uniquify(evlist);
+ evlist__set_needs_uniquify(evlist, config);
if (config->iostat_run)
evlist->selected = evlist__first(evlist);
- if (interval) {
- os.prefix = buf;
- prepare_interval(config, buf, sizeof(buf), ts);
- }
+ if (config->interval)
+ prepare_timestamp(config, &os, ts);
print_header(config, _target, evlist, argc, argv);
@@ -1724,7 +1776,7 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf
case AGGR_THREAD:
case AGGR_GLOBAL:
if (config->iostat_run) {
- iostat_print_counters(evlist, config, ts, buf,
+ iostat_print_counters(evlist, config, ts, os.timestamp,
(iostat_print_counter_t)print_counter, &os);
} else if (config->cgroup_list) {
print_cgroup_counter(config, evlist, &os);
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index 47718610d5d8..d83bda5824d2 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -151,6 +151,7 @@ static double find_stat(const struct evsel *evsel, int aggr_idx, enum stat_type
{
struct evsel *cur;
int evsel_ctx = evsel_context(evsel);
+ struct perf_pmu *evsel_pmu = evsel__find_pmu(evsel);
evlist__for_each_entry(evsel->evlist, cur) {
struct perf_stat_aggr *aggr;
@@ -177,7 +178,7 @@ static double find_stat(const struct evsel *evsel, int aggr_idx, enum stat_type
* Except the SW CLOCK events,
* ignore if not the PMU we're looking for.
*/
- if ((type != STAT_NSECS) && (evsel->pmu != cur->pmu))
+ if ((type != STAT_NSECS) && (evsel_pmu != evsel__find_pmu(cur)))
continue;
aggr = &cur->stats->aggr[aggr_idx];
@@ -327,7 +328,8 @@ static void print_instructions(struct perf_stat_config *config,
"insn per cycle", 0);
}
if (max_stalled && instructions) {
- out->new_line(config, ctxp);
+ if (out->new_line)
+ out->new_line(config, ctxp);
print_metric(config, ctxp, METRIC_THRESHOLD_UNKNOWN, "%7.2f ",
"stalled cycles per insn", max_stalled / instructions);
}
@@ -670,7 +672,7 @@ void *perf_stat__print_shadow_stats_metricgroup(struct perf_stat_config *config,
}
}
- if ((*num)++ > 0)
+ if ((*num)++ > 0 && out->new_line)
out->new_line(config, ctxp);
generic_metric(config, mexp, evsel, aggr_idx, out);
}
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index 7c2ccdcc3fdb..1f7abd8754c7 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -535,7 +535,10 @@ static int evsel__merge_aggr_counters(struct evsel *evsel, struct evsel *alias)
return 0;
}
-/* events should have the same name, scale, unit, cgroup but on different PMUs */
+/*
+ * Events should have the same name, scale, unit, cgroup but on different core
+ * PMUs or on different but matching uncore PMUs.
+ */
static bool evsel__is_alias(struct evsel *evsel_a, struct evsel *evsel_b)
{
if (strcmp(evsel__name(evsel_a), evsel__name(evsel_b)))
@@ -553,7 +556,13 @@ static bool evsel__is_alias(struct evsel *evsel_a, struct evsel *evsel_b)
if (evsel__is_clock(evsel_a) != evsel__is_clock(evsel_b))
return false;
- return evsel_a->pmu != evsel_b->pmu;
+ if (evsel_a->pmu == evsel_b->pmu || evsel_a->pmu == NULL || evsel_b->pmu == NULL)
+ return false;
+
+ if (evsel_a->pmu->is_core)
+ return evsel_b->pmu->is_core;
+
+ return perf_pmu__name_no_suffix_match(evsel_a->pmu, evsel_b->pmu->name);
}
static void evsel__merge_aliases(struct evsel *evsel)
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index 6f8cff3cd39a..2fda9acd7374 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -117,8 +117,9 @@ struct perf_stat_config {
unsigned int topdown_level;
};
+extern struct perf_stat_config stat_config;
+
void perf_stat__set_big_num(int set);
-void perf_stat__set_no_csv_summary(int set);
void update_stats(struct stats *stats, u64 val);
double avg_stats(struct stats *stats);
diff --git a/tools/perf/util/stream.c b/tools/perf/util/stream.c
index 545e44981a27..3de4a6130853 100644
--- a/tools/perf/util/stream.c
+++ b/tools/perf/util/stream.c
@@ -52,7 +52,6 @@ static struct evlist_streams *evlist_streams__new(int nr_evsel,
goto err;
s->nr_streams_max = nr_streams_max;
- s->evsel_idx = -1;
}
els->ev_streams = es;
@@ -139,7 +138,7 @@ static int evlist__init_callchain_streams(struct evlist *evlist,
hists__output_resort(hists, NULL);
init_hot_callchain(hists, &es[i]);
- es[i].evsel_idx = pos->core.idx;
+ es[i].evsel = pos;
i++;
}
@@ -166,12 +165,12 @@ struct evlist_streams *evlist__create_streams(struct evlist *evlist,
}
struct evsel_streams *evsel_streams__entry(struct evlist_streams *els,
- int evsel_idx)
+ const struct evsel *evsel)
{
struct evsel_streams *es = els->ev_streams;
for (int i = 0; i < els->nr_evsel; i++) {
- if (es[i].evsel_idx == evsel_idx)
+ if (es[i].evsel == evsel)
return &es[i];
}
diff --git a/tools/perf/util/stream.h b/tools/perf/util/stream.h
index bee768874fea..50f7e6e04982 100644
--- a/tools/perf/util/stream.h
+++ b/tools/perf/util/stream.h
@@ -2,7 +2,9 @@
#ifndef __PERF_STREAM_H
#define __PERF_STREAM_H
-#include "callchain.h"
+struct callchain_node;
+struct evlist;
+struct evsel;
struct stream {
struct callchain_node *cnode;
@@ -11,9 +13,9 @@ struct stream {
struct evsel_streams {
struct stream *streams;
+ const struct evsel *evsel;
int nr_streams_max;
int nr_streams;
- int evsel_idx;
u64 streams_hits;
};
@@ -22,15 +24,13 @@ struct evlist_streams {
int nr_evsel;
};
-struct evlist;
-
void evlist_streams__delete(struct evlist_streams *els);
struct evlist_streams *evlist__create_streams(struct evlist *evlist,
int nr_streams_max);
struct evsel_streams *evsel_streams__entry(struct evlist_streams *els,
- int evsel_idx);
+ const struct evsel *evsel);
void evsel_streams__match(struct evsel_streams *es_base,
struct evsel_streams *es_pair);
diff --git a/tools/perf/util/string.c b/tools/perf/util/string.c
index 308fc7ec88cc..c0e927bbadf6 100644
--- a/tools/perf/util/string.c
+++ b/tools/perf/util/string.c
@@ -254,11 +254,20 @@ char *strpbrk_esc(char *str, const char *stopset)
do {
ptr = strpbrk(str, stopset);
- if (ptr == str ||
- (ptr == str + 1 && *(ptr - 1) != '\\'))
+ if (!ptr) {
+ /* stopset not in str. */
break;
+ }
+ if (ptr == str) {
+ /* stopset character is first in str. */
+ break;
+ }
+ if (ptr == str + 1 && str[0] != '\\') {
+ /* stopset chacter is second and wasn't preceded by a '\'. */
+ break;
+ }
str = ptr + 1;
- } while (ptr && *(ptr - 1) == '\\' && *(ptr - 2) != '\\');
+ } while (ptr[-1] == '\\' && ptr[-2] != '\\');
return ptr;
}
diff --git a/tools/perf/util/svghelper.c b/tools/perf/util/svghelper.c
index 2b04f47f4db0..b1d259f590e9 100644
--- a/tools/perf/util/svghelper.c
+++ b/tools/perf/util/svghelper.c
@@ -21,6 +21,7 @@
#include <perf/cpumap.h>
#include "env.h"
+#include "perf.h"
#include "svghelper.h"
static u64 first_time, last_time;
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index e398abfd13a0..fbf6d0f73af9 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -7,6 +7,7 @@
#include <unistd.h>
#include <inttypes.h>
+#include "compress.h"
#include "dso.h"
#include "map.h"
#include "maps.h"
@@ -287,8 +288,9 @@ static bool want_demangle(bool is_kernel_sym)
* Demangle C++ function signature, typically replaced by demangle-cxx.cpp
* version.
*/
-__weak char *cxx_demangle_sym(const char *str __maybe_unused, bool params __maybe_unused,
- bool modifiers __maybe_unused)
+#ifndef HAVE_CXA_DEMANGLE_SUPPORT
+char *cxx_demangle_sym(const char *str __maybe_unused, bool params __maybe_unused,
+ bool modifiers __maybe_unused)
{
#ifdef HAVE_LIBBFD_SUPPORT
int flags = (params ? DMGL_PARAMS : 0) | (modifiers ? DMGL_ANSI : 0);
@@ -302,6 +304,7 @@ __weak char *cxx_demangle_sym(const char *str __maybe_unused, bool params __mayb
return NULL;
#endif
}
+#endif /* !HAVE_CXA_DEMANGLE_SUPPORT */
static char *demangle_sym(struct dso *dso, int kmodule, const char *elf_name)
{
@@ -1171,33 +1174,6 @@ out:
#endif
-static int dso__swap_init(struct dso *dso, unsigned char eidata)
-{
- static unsigned int const endian = 1;
-
- dso__set_needs_swap(dso, DSO_SWAP__NO);
-
- switch (eidata) {
- case ELFDATA2LSB:
- /* We are big endian, DSO is little endian. */
- if (*(unsigned char const *)&endian != 1)
- dso__set_needs_swap(dso, DSO_SWAP__YES);
- break;
-
- case ELFDATA2MSB:
- /* We are little endian, DSO is big endian. */
- if (*(unsigned char const *)&endian != 0)
- dso__set_needs_swap(dso, DSO_SWAP__YES);
- break;
-
- default:
- pr_err("unrecognized DSO data encoding %d\n", eidata);
- return -EINVAL;
- }
-
- return 0;
-}
-
bool symsrc__possibly_runtime(struct symsrc *ss)
{
return ss->dynsym || ss->opdsec;
@@ -1226,6 +1202,81 @@ bool elf__needs_adjust_symbols(GElf_Ehdr ehdr)
ehdr.e_type == ET_DYN;
}
+static Elf *read_gnu_debugdata(struct dso *dso, Elf *elf, const char *name, int *fd_ret)
+{
+ Elf *elf_embedded;
+ GElf_Ehdr ehdr;
+ GElf_Shdr shdr;
+ Elf_Scn *scn;
+ Elf_Data *scn_data;
+ FILE *wrapped;
+ size_t shndx;
+ char temp_filename[] = "/tmp/perf.gnu_debugdata.elf.XXXXXX";
+ int ret, temp_fd;
+
+ if (gelf_getehdr(elf, &ehdr) == NULL) {
+ pr_debug("%s: cannot read %s ELF file.\n", __func__, name);
+ *dso__load_errno(dso) = DSO_LOAD_ERRNO__INVALID_ELF;
+ return NULL;
+ }
+
+ scn = elf_section_by_name(elf, &ehdr, &shdr, ".gnu_debugdata", &shndx);
+ if (!scn) {
+ *dso__load_errno(dso) = -ENOENT;
+ return NULL;
+ }
+
+ if (shdr.sh_type == SHT_NOBITS) {
+ pr_debug("%s: .gnu_debugdata of ELF file %s has no data.\n", __func__, name);
+ *dso__load_errno(dso) = DSO_LOAD_ERRNO__INVALID_ELF;
+ return NULL;
+ }
+
+ scn_data = elf_rawdata(scn, NULL);
+ if (!scn_data) {
+ pr_debug("%s: error reading .gnu_debugdata of %s: %s\n", __func__,
+ name, elf_errmsg(-1));
+ *dso__load_errno(dso) = DSO_LOAD_ERRNO__INVALID_ELF;
+ return NULL;
+ }
+
+ wrapped = fmemopen(scn_data->d_buf, scn_data->d_size, "r");
+ if (!wrapped) {
+ pr_debug("%s: fmemopen: %s\n", __func__, strerror(errno));
+ *dso__load_errno(dso) = -errno;
+ return NULL;
+ }
+
+ temp_fd = mkstemp(temp_filename);
+ if (temp_fd < 0) {
+ pr_debug("%s: mkstemp: %s\n", __func__, strerror(errno));
+ *dso__load_errno(dso) = -errno;
+ fclose(wrapped);
+ return NULL;
+ }
+ unlink(temp_filename);
+
+ ret = lzma_decompress_stream_to_file(wrapped, temp_fd);
+ fclose(wrapped);
+ if (ret < 0) {
+ *dso__load_errno(dso) = -errno;
+ close(temp_fd);
+ return NULL;
+ }
+
+ elf_embedded = elf_begin(temp_fd, PERF_ELF_C_READ_MMAP, NULL);
+ if (!elf_embedded) {
+ pr_debug("%s: error reading .gnu_debugdata of %s: %s\n", __func__,
+ name, elf_errmsg(-1));
+ *dso__load_errno(dso) = DSO_LOAD_ERRNO__INVALID_ELF;
+ close(temp_fd);
+ return NULL;
+ }
+ pr_debug("%s: using .gnu_debugdata of %s\n", __func__, name);
+ *fd_ret = temp_fd;
+ return elf_embedded;
+}
+
int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name,
enum dso_binary_type type)
{
@@ -1254,6 +1305,19 @@ int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name,
goto out_close;
}
+ if (type == DSO_BINARY_TYPE__GNU_DEBUGDATA) {
+ int new_fd;
+ Elf *embedded = read_gnu_debugdata(dso, elf, name, &new_fd);
+
+ if (!embedded)
+ goto out_close;
+
+ elf_end(elf);
+ close(fd);
+ fd = new_fd;
+ elf = embedded;
+ }
+
if (gelf_getehdr(elf, &ehdr) == NULL) {
*dso__load_errno(dso) = DSO_LOAD_ERRNO__INVALID_ELF;
pr_debug("%s: cannot get elf header.\n", __func__);
@@ -1852,10 +1916,23 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss,
kmodule, 1);
if (err < 0)
return err;
- err += nr;
+ nr += err;
}
- return err;
+ /*
+ * The .gnu_debugdata is a special situation: it contains a symbol
+ * table, but the runtime file may also contain dynsym entries which are
+ * not present there. We need to load both.
+ */
+ if (syms_ss->type == DSO_BINARY_TYPE__GNU_DEBUGDATA && runtime_ss->dynsym) {
+ err = dso__load_sym_internal(dso, map, runtime_ss, runtime_ss,
+ kmodule, 1);
+ if (err < 0)
+ return err;
+ nr += err;
+ }
+
+ return nr;
}
static int elf_read_maps(Elf *elf, bool exe, mapfn_t mapfn, void *data)
diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c
index c6f369b5d893..36c1d3090689 100644
--- a/tools/perf/util/symbol-minimal.c
+++ b/tools/perf/util/symbol-minimal.c
@@ -90,11 +90,23 @@ int filename__read_build_id(const char *filename, struct build_id *bid)
{
FILE *fp;
int ret = -1;
- bool need_swap = false;
+ bool need_swap = false, elf32;
u8 e_ident[EI_NIDENT];
- size_t buf_size;
- void *buf;
int i;
+ union {
+ struct {
+ Elf32_Ehdr ehdr32;
+ Elf32_Phdr *phdr32;
+ };
+ struct {
+ Elf64_Ehdr ehdr64;
+ Elf64_Phdr *phdr64;
+ };
+ } hdrs;
+ void *phdr;
+ size_t phdr_size;
+ void *buf = NULL;
+ size_t buf_size = 0;
fp = fopen(filename, "r");
if (fp == NULL)
@@ -108,117 +120,79 @@ int filename__read_build_id(const char *filename, struct build_id *bid)
goto out;
need_swap = check_need_swap(e_ident[EI_DATA]);
+ elf32 = e_ident[EI_CLASS] == ELFCLASS32;
- /* for simplicity */
- fseek(fp, 0, SEEK_SET);
-
- if (e_ident[EI_CLASS] == ELFCLASS32) {
- Elf32_Ehdr ehdr;
- Elf32_Phdr *phdr;
-
- if (fread(&ehdr, sizeof(ehdr), 1, fp) != 1)
- goto out;
+ if (fread(elf32 ? (void *)&hdrs.ehdr32 : (void *)&hdrs.ehdr64,
+ elf32 ? sizeof(hdrs.ehdr32) : sizeof(hdrs.ehdr64),
+ 1, fp) != 1)
+ goto out;
- if (need_swap) {
- ehdr.e_phoff = bswap_32(ehdr.e_phoff);
- ehdr.e_phentsize = bswap_16(ehdr.e_phentsize);
- ehdr.e_phnum = bswap_16(ehdr.e_phnum);
+ if (need_swap) {
+ if (elf32) {
+ hdrs.ehdr32.e_phoff = bswap_32(hdrs.ehdr32.e_phoff);
+ hdrs.ehdr32.e_phentsize = bswap_16(hdrs.ehdr32.e_phentsize);
+ hdrs.ehdr32.e_phnum = bswap_16(hdrs.ehdr32.e_phnum);
+ } else {
+ hdrs.ehdr64.e_phoff = bswap_64(hdrs.ehdr64.e_phoff);
+ hdrs.ehdr64.e_phentsize = bswap_16(hdrs.ehdr64.e_phentsize);
+ hdrs.ehdr64.e_phnum = bswap_16(hdrs.ehdr64.e_phnum);
}
+ }
+ phdr_size = elf32 ? hdrs.ehdr32.e_phentsize * hdrs.ehdr32.e_phnum
+ : hdrs.ehdr64.e_phentsize * hdrs.ehdr64.e_phnum;
+ phdr = malloc(phdr_size);
+ if (phdr == NULL)
+ goto out;
- buf_size = ehdr.e_phentsize * ehdr.e_phnum;
- buf = malloc(buf_size);
- if (buf == NULL)
- goto out;
-
- fseek(fp, ehdr.e_phoff, SEEK_SET);
- if (fread(buf, buf_size, 1, fp) != 1)
- goto out_free;
-
- for (i = 0, phdr = buf; i < ehdr.e_phnum; i++, phdr++) {
- void *tmp;
- long offset;
-
- if (need_swap) {
- phdr->p_type = bswap_32(phdr->p_type);
- phdr->p_offset = bswap_32(phdr->p_offset);
- phdr->p_filesz = bswap_32(phdr->p_filesz);
- }
-
- if (phdr->p_type != PT_NOTE)
- continue;
-
- buf_size = phdr->p_filesz;
- offset = phdr->p_offset;
- tmp = realloc(buf, buf_size);
- if (tmp == NULL)
- goto out_free;
-
- buf = tmp;
- fseek(fp, offset, SEEK_SET);
- if (fread(buf, buf_size, 1, fp) != 1)
- goto out_free;
+ fseek(fp, elf32 ? hdrs.ehdr32.e_phoff : hdrs.ehdr64.e_phoff, SEEK_SET);
+ if (fread(phdr, phdr_size, 1, fp) != 1)
+ goto out_free;
- ret = read_build_id(buf, buf_size, bid, need_swap);
- if (ret == 0) {
- ret = bid->size;
- break;
- }
- }
- } else {
- Elf64_Ehdr ehdr;
- Elf64_Phdr *phdr;
+ if (elf32)
+ hdrs.phdr32 = phdr;
+ else
+ hdrs.phdr64 = phdr;
- if (fread(&ehdr, sizeof(ehdr), 1, fp) != 1)
- goto out;
+ for (i = 0; i < elf32 ? hdrs.ehdr32.e_phnum : hdrs.ehdr64.e_phnum; i++) {
+ size_t p_filesz;
if (need_swap) {
- ehdr.e_phoff = bswap_64(ehdr.e_phoff);
- ehdr.e_phentsize = bswap_16(ehdr.e_phentsize);
- ehdr.e_phnum = bswap_16(ehdr.e_phnum);
+ if (elf32) {
+ hdrs.phdr32[i].p_type = bswap_32(hdrs.phdr32[i].p_type);
+ hdrs.phdr32[i].p_offset = bswap_32(hdrs.phdr32[i].p_offset);
+ hdrs.phdr32[i].p_filesz = bswap_32(hdrs.phdr32[i].p_offset);
+ } else {
+ hdrs.phdr64[i].p_type = bswap_32(hdrs.phdr64[i].p_type);
+ hdrs.phdr64[i].p_offset = bswap_64(hdrs.phdr64[i].p_offset);
+ hdrs.phdr64[i].p_filesz = bswap_64(hdrs.phdr64[i].p_filesz);
+ }
}
+ if ((elf32 ? hdrs.phdr32[i].p_type : hdrs.phdr64[i].p_type) != PT_NOTE)
+ continue;
- buf_size = ehdr.e_phentsize * ehdr.e_phnum;
- buf = malloc(buf_size);
- if (buf == NULL)
- goto out;
-
- fseek(fp, ehdr.e_phoff, SEEK_SET);
- if (fread(buf, buf_size, 1, fp) != 1)
- goto out_free;
-
- for (i = 0, phdr = buf; i < ehdr.e_phnum; i++, phdr++) {
+ p_filesz = elf32 ? hdrs.phdr32[i].p_filesz : hdrs.phdr64[i].p_filesz;
+ if (p_filesz > buf_size) {
void *tmp;
- long offset;
-
- if (need_swap) {
- phdr->p_type = bswap_32(phdr->p_type);
- phdr->p_offset = bswap_64(phdr->p_offset);
- phdr->p_filesz = bswap_64(phdr->p_filesz);
- }
- if (phdr->p_type != PT_NOTE)
- continue;
-
- buf_size = phdr->p_filesz;
- offset = phdr->p_offset;
+ buf_size = p_filesz;
tmp = realloc(buf, buf_size);
if (tmp == NULL)
goto out_free;
-
buf = tmp;
- fseek(fp, offset, SEEK_SET);
- if (fread(buf, buf_size, 1, fp) != 1)
- goto out_free;
+ }
+ fseek(fp, elf32 ? hdrs.phdr32[i].p_offset : hdrs.phdr64[i].p_offset, SEEK_SET);
+ if (fread(buf, p_filesz, 1, fp) != 1)
+ goto out_free;
- ret = read_build_id(buf, buf_size, bid, need_swap);
- if (ret == 0) {
- ret = bid->size;
- break;
- }
+ ret = read_build_id(buf, p_filesz, bid, need_swap);
+ if (ret == 0) {
+ ret = bid->size;
+ break;
}
}
out_free:
free(buf);
+ free(phdr);
out:
fclose(fp);
return ret;
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 49b08adc6ee3..11540219481b 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -18,6 +18,7 @@
#include "annotate.h"
#include "build-id.h"
#include "cap.h"
+#include "cpumap.h"
#include "dso.h"
#include "util.h" // lsdir()
#include "debug.h"
@@ -84,6 +85,7 @@ static enum dso_binary_type binary_type_symtab[] = {
DSO_BINARY_TYPE__FEDORA_DEBUGINFO,
DSO_BINARY_TYPE__UBUNTU_DEBUGINFO,
DSO_BINARY_TYPE__BUILDID_DEBUGINFO,
+ DSO_BINARY_TYPE__GNU_DEBUGDATA,
DSO_BINARY_TYPE__SYSTEM_PATH_DSO,
DSO_BINARY_TYPE__GUEST_KMODULE,
DSO_BINARY_TYPE__GUEST_KMODULE_COMP,
@@ -1716,6 +1718,7 @@ static bool dso__is_compatible_symtab_type(struct dso *dso, bool kmod,
case DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO:
case DSO_BINARY_TYPE__BUILDID_DEBUGINFO:
case DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO:
+ case DSO_BINARY_TYPE__GNU_DEBUGDATA:
return !kmod && dso__kernel(dso) == DSO_SPACE__USER;
case DSO_BINARY_TYPE__KALLSYMS:
@@ -2471,6 +2474,36 @@ int symbol__annotation_init(void)
return 0;
}
+static int setup_parallelism_bitmap(void)
+{
+ struct perf_cpu_map *map;
+ struct perf_cpu cpu;
+ int i, err = -1;
+
+ if (symbol_conf.parallelism_list_str == NULL)
+ return 0;
+
+ map = perf_cpu_map__new(symbol_conf.parallelism_list_str);
+ if (map == NULL) {
+ pr_err("failed to parse parallelism filter list\n");
+ return -1;
+ }
+
+ bitmap_fill(symbol_conf.parallelism_filter, MAX_NR_CPUS + 1);
+ perf_cpu_map__for_each_cpu(cpu, i, map) {
+ if (cpu.cpu <= 0 || cpu.cpu > MAX_NR_CPUS) {
+ pr_err("Requested parallelism level %d is invalid.\n", cpu.cpu);
+ goto out_delete_map;
+ }
+ __clear_bit(cpu.cpu, symbol_conf.parallelism_filter);
+ }
+
+ err = 0;
+out_delete_map:
+ perf_cpu_map__put(map);
+ return err;
+}
+
int symbol__init(struct perf_env *env)
{
const char *symfs;
@@ -2490,6 +2523,9 @@ int symbol__init(struct perf_env *env)
return -1;
}
+ if (setup_parallelism_bitmap())
+ return -1;
+
if (setup_list(&symbol_conf.dso_list,
symbol_conf.dso_list_str, "dso") < 0)
return -1;
diff --git a/tools/perf/util/symbol_conf.h b/tools/perf/util/symbol_conf.h
index a9c51acc722f..cd9aa82c7d5a 100644
--- a/tools/perf/util/symbol_conf.h
+++ b/tools/perf/util/symbol_conf.h
@@ -3,6 +3,8 @@
#define __PERF_SYMBOL_CONF 1
#include <stdbool.h>
+#include <linux/bitmap.h>
+#include "perf.h"
struct strlist;
struct intlist;
@@ -47,7 +49,9 @@ struct symbol_conf {
keep_exited_threads,
annotate_data_member,
annotate_data_sample,
- skip_empty;
+ skip_empty,
+ enable_latency,
+ prefer_latency;
const char *vmlinux_name,
*kallsyms_name,
*source_prefix,
@@ -62,6 +66,7 @@ struct symbol_conf {
*pid_list_str,
*tid_list_str,
*sym_list_str,
+ *parallelism_list_str,
*col_width_list_str,
*bt_stop_list_str;
const char *addr2line_path;
@@ -82,6 +87,7 @@ struct symbol_conf {
int pad_output_len_dso;
int group_sort_idx;
int addr_range;
+ DECLARE_BITMAP(parallelism_filter, MAX_NR_CPUS + 1);
};
extern struct symbol_conf symbol_conf;
diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c
index a58444c4aed1..2fc4d0537840 100644
--- a/tools/perf/util/synthetic-events.c
+++ b/tools/perf/util/synthetic-events.c
@@ -38,6 +38,7 @@
#include <uapi/linux/mman.h> /* To get things like MAP_HUGETLB even on older libc headers */
#include <api/fs/fs.h>
#include <api/io.h>
+#include <api/io_dir.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
@@ -767,10 +768,10 @@ static int __event__synthesize_thread(union perf_event *comm_event,
bool needs_mmap, bool mmap_data)
{
char filename[PATH_MAX];
- struct dirent **dirent;
+ struct io_dir iod;
+ struct io_dirent64 *dent;
pid_t tgid, ppid;
int rc = 0;
- int i, n;
/* special case: only send one comm event using passed in pid */
if (!full) {
@@ -802,16 +803,19 @@ static int __event__synthesize_thread(union perf_event *comm_event,
snprintf(filename, sizeof(filename), "%s/proc/%d/task",
machine->root_dir, pid);
- n = scandir(filename, &dirent, filter_task, NULL);
- if (n < 0)
- return n;
+ io_dir__init(&iod, open(filename, O_CLOEXEC | O_DIRECTORY | O_RDONLY));
+ if (iod.dirfd < 0)
+ return -1;
- for (i = 0; i < n; i++) {
+ while ((dent = io_dir__readdir(&iod)) != NULL) {
char *end;
pid_t _pid;
bool kernel_thread = false;
- _pid = strtol(dirent[i]->d_name, &end, 10);
+ if (!isdigit(dent->d_name[0]))
+ continue;
+
+ _pid = strtol(dent->d_name, &end, 10);
if (*end)
continue;
@@ -845,9 +849,7 @@ static int __event__synthesize_thread(union perf_event *comm_event,
}
}
- for (i = 0; i < n; i++)
- zfree(&dirent[i]);
- free(dirent);
+ close(iod.dirfd);
return rc;
}
@@ -1508,9 +1510,9 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type,
}
if (type & PERF_SAMPLE_REGS_USER) {
- if (sample->user_regs.abi) {
+ if (sample->user_regs && sample->user_regs->abi) {
result += sizeof(u64);
- sz = hweight64(sample->user_regs.mask) * sizeof(u64);
+ sz = hweight64(sample->user_regs->mask) * sizeof(u64);
result += sz;
} else {
result += sizeof(u64);
@@ -1536,9 +1538,9 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type,
result += sizeof(u64);
if (type & PERF_SAMPLE_REGS_INTR) {
- if (sample->intr_regs.abi) {
+ if (sample->intr_regs && sample->intr_regs->abi) {
result += sizeof(u64);
- sz = hweight64(sample->intr_regs.mask) * sizeof(u64);
+ sz = hweight64(sample->intr_regs->mask) * sizeof(u64);
result += sz;
} else {
result += sizeof(u64);
@@ -1686,12 +1688,16 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_fo
}
if (type & PERF_SAMPLE_RAW) {
- u.val32[0] = sample->raw_size;
- *array = u.val64;
- array = (void *)array + sizeof(u32);
+ u32 *array32 = (void *)array;
+
+ *array32 = sample->raw_size;
+ array32++;
+
+ memcpy(array32, sample->raw_data, sample->raw_size);
+ array = (void *)(array32 + (sample->raw_size / sizeof(u32)));
- memcpy(array, sample->raw_data, sample->raw_size);
- array = (void *)array + sample->raw_size;
+ /* make sure the array is 64-bit aligned */
+ BUG_ON(((long)array) % sizeof(u64));
}
if (type & PERF_SAMPLE_BRANCH_STACK) {
@@ -1703,10 +1709,10 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_fo
}
if (type & PERF_SAMPLE_REGS_USER) {
- if (sample->user_regs.abi) {
- *array++ = sample->user_regs.abi;
- sz = hweight64(sample->user_regs.mask) * sizeof(u64);
- memcpy(array, sample->user_regs.regs, sz);
+ if (sample->user_regs && sample->user_regs->abi) {
+ *array++ = sample->user_regs->abi;
+ sz = hweight64(sample->user_regs->mask) * sizeof(u64);
+ memcpy(array, sample->user_regs->regs, sz);
array = (void *)array + sz;
} else {
*array++ = 0;
@@ -1739,10 +1745,10 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_fo
}
if (type & PERF_SAMPLE_REGS_INTR) {
- if (sample->intr_regs.abi) {
- *array++ = sample->intr_regs.abi;
- sz = hweight64(sample->intr_regs.mask) * sizeof(u64);
- memcpy(array, sample->intr_regs.regs, sz);
+ if (sample->intr_regs && sample->intr_regs->abi) {
+ *array++ = sample->intr_regs->abi;
+ sz = hweight64(sample->intr_regs->mask) * sizeof(u64);
+ memcpy(array, sample->intr_regs->regs, sz);
array = (void *)array + sz;
} else {
*array++ = 0;
diff --git a/tools/perf/util/syscalltbl.c b/tools/perf/util/syscalltbl.c
index 69d8dcf5cf28..67a8ec10e9e4 100644
--- a/tools/perf/util/syscalltbl.c
+++ b/tools/perf/util/syscalltbl.c
@@ -7,203 +7,127 @@
#include "syscalltbl.h"
#include <stdlib.h>
+#include <asm/bitsperlong.h>
#include <linux/compiler.h>
+#include <linux/kernel.h>
#include <linux/zalloc.h>
-#ifdef HAVE_SYSCALL_TABLE_SUPPORT
#include <string.h>
#include "string2.h"
-#if defined(__x86_64__)
-#include <asm/syscalls_64.c>
-const int syscalltbl_native_max_id = SYSCALLTBL_x86_64_MAX_ID;
-static const char *const *syscalltbl_native = syscalltbl_x86_64;
-#elif defined(__i386__)
-#include <asm/syscalls_32.c>
-const int syscalltbl_native_max_id = SYSCALLTBL_x86_MAX_ID;
-static const char *const *syscalltbl_native = syscalltbl_x86;
-#elif defined(__s390x__)
-#include <asm/syscalls_64.c>
-const int syscalltbl_native_max_id = SYSCALLTBL_S390_64_MAX_ID;
-static const char *const *syscalltbl_native = syscalltbl_s390_64;
-#elif defined(__powerpc64__)
-#include <asm/syscalls_64.c>
-const int syscalltbl_native_max_id = SYSCALLTBL_POWERPC_64_MAX_ID;
-static const char *const *syscalltbl_native = syscalltbl_powerpc_64;
-#elif defined(__powerpc__)
-#include <asm/syscalls_32.c>
-const int syscalltbl_native_max_id = SYSCALLTBL_POWERPC_32_MAX_ID;
-static const char *const *syscalltbl_native = syscalltbl_powerpc_32;
-#elif defined(__aarch64__)
-#include <asm/syscalls.c>
-const int syscalltbl_native_max_id = SYSCALLTBL_ARM64_MAX_ID;
-static const char *const *syscalltbl_native = syscalltbl_arm64;
-#elif defined(__mips__)
-#include <asm/syscalls_n64.c>
-const int syscalltbl_native_max_id = SYSCALLTBL_MIPS_N64_MAX_ID;
-static const char *const *syscalltbl_native = syscalltbl_mips_n64;
-#elif defined(__loongarch__)
-#include <asm/syscalls.c>
-const int syscalltbl_native_max_id = SYSCALLTBL_LOONGARCH_MAX_ID;
-static const char *const *syscalltbl_native = syscalltbl_loongarch;
-#elif defined(__riscv)
-#include <asm/syscalls.c>
-const int syscalltbl_native_max_id = SYSCALLTBL_RISCV_MAX_ID;
-static const char *const *syscalltbl_native = syscalltbl_riscv;
-#else
-const int syscalltbl_native_max_id = 0;
-static const char *const syscalltbl_native[] = {
- [0] = "unknown",
-};
-#endif
-
-struct syscall {
- int id;
- const char *name;
-};
-
-static int syscallcmpname(const void *vkey, const void *ventry)
-{
- const char *key = vkey;
- const struct syscall *entry = ventry;
-
- return strcmp(key, entry->name);
-}
+#include "trace/beauty/generated/syscalltbl.c"
-static int syscallcmp(const void *va, const void *vb)
+static const struct syscalltbl *find_table(int e_machine)
{
- const struct syscall *a = va, *b = vb;
+ static const struct syscalltbl *last_table;
+ static int last_table_machine = EM_NONE;
- return strcmp(a->name, b->name);
-}
+ /* Tables only exist for EM_SPARC. */
+ if (e_machine == EM_SPARCV9)
+ e_machine = EM_SPARC;
-static int syscalltbl__init_native(struct syscalltbl *tbl)
-{
- int nr_entries = 0, i, j;
- struct syscall *entries;
+ if (last_table_machine == e_machine && last_table != NULL)
+ return last_table;
- for (i = 0; i <= syscalltbl_native_max_id; ++i)
- if (syscalltbl_native[i])
- ++nr_entries;
+ for (size_t i = 0; i < ARRAY_SIZE(syscalltbls); i++) {
+ const struct syscalltbl *entry = &syscalltbls[i];
- entries = tbl->syscalls.entries = malloc(sizeof(struct syscall) * nr_entries);
- if (tbl->syscalls.entries == NULL)
- return -1;
+ if (entry->e_machine != e_machine && entry->e_machine != EM_NONE)
+ continue;
- for (i = 0, j = 0; i <= syscalltbl_native_max_id; ++i) {
- if (syscalltbl_native[i]) {
- entries[j].name = syscalltbl_native[i];
- entries[j].id = i;
- ++j;
- }
+ last_table = entry;
+ last_table_machine = e_machine;
+ return entry;
}
-
- qsort(tbl->syscalls.entries, nr_entries, sizeof(struct syscall), syscallcmp);
- tbl->syscalls.nr_entries = nr_entries;
- tbl->syscalls.max_id = syscalltbl_native_max_id;
- return 0;
+ return NULL;
}
-struct syscalltbl *syscalltbl__new(void)
+const char *syscalltbl__name(int e_machine, int id)
{
- struct syscalltbl *tbl = malloc(sizeof(*tbl));
- if (tbl) {
- if (syscalltbl__init_native(tbl)) {
- free(tbl);
- return NULL;
- }
+ const struct syscalltbl *table = find_table(e_machine);
+
+ if (e_machine == EM_MIPS && id > 1000) {
+ /*
+ * MIPS may encode the N32/64/O32 type in the high part of
+ * syscall number. Mask this off if present. See the values of
+ * __NR_N32_Linux, __NR_64_Linux, __NR_O32_Linux and __NR_Linux.
+ */
+ id = id % 1000;
}
- return tbl;
+ if (table && id >= 0 && id < table->num_to_name_len)
+ return table->num_to_name[id];
+ return NULL;
}
-void syscalltbl__delete(struct syscalltbl *tbl)
-{
- zfree(&tbl->syscalls.entries);
- free(tbl);
-}
+struct syscall_cmp_key {
+ const char *name;
+ const char *const *tbl;
+};
-const char *syscalltbl__name(const struct syscalltbl *tbl __maybe_unused, int id)
+static int syscallcmpname(const void *vkey, const void *ventry)
{
- return id <= syscalltbl_native_max_id ? syscalltbl_native[id]: NULL;
+ const struct syscall_cmp_key *key = vkey;
+ const uint16_t *entry = ventry;
+
+ return strcmp(key->name, key->tbl[*entry]);
}
-int syscalltbl__id(struct syscalltbl *tbl, const char *name)
+int syscalltbl__id(int e_machine, const char *name)
{
- struct syscall *sc = bsearch(name, tbl->syscalls.entries,
- tbl->syscalls.nr_entries, sizeof(*sc),
- syscallcmpname);
+ const struct syscalltbl *table = find_table(e_machine);
+ struct syscall_cmp_key key;
+ const uint16_t *id;
- return sc ? sc->id : -1;
-}
+ if (!table)
+ return -1;
-int syscalltbl__id_at_idx(struct syscalltbl *tbl, int idx)
-{
- struct syscall *syscalls = tbl->syscalls.entries;
+ key.name = name;
+ key.tbl = table->num_to_name;
+ id = bsearch(&key, table->sorted_names, table->sorted_names_len,
+ sizeof(table->sorted_names[0]), syscallcmpname);
- return idx < tbl->syscalls.nr_entries ? syscalls[idx].id : -1;
+ return id ? *id : -1;
}
-int syscalltbl__strglobmatch_next(struct syscalltbl *tbl, const char *syscall_glob, int *idx)
+int syscalltbl__num_idx(int e_machine)
{
- int i;
- struct syscall *syscalls = tbl->syscalls.entries;
+ const struct syscalltbl *table = find_table(e_machine);
- for (i = *idx + 1; i < tbl->syscalls.nr_entries; ++i) {
- if (strglobmatch(syscalls[i].name, syscall_glob)) {
- *idx = i;
- return syscalls[i].id;
- }
- }
+ if (!table)
+ return 0;
- return -1;
+ return table->sorted_names_len;
}
-int syscalltbl__strglobmatch_first(struct syscalltbl *tbl, const char *syscall_glob, int *idx)
+int syscalltbl__id_at_idx(int e_machine, int idx)
{
- *idx = -1;
- return syscalltbl__strglobmatch_next(tbl, syscall_glob, idx);
-}
-
-#else /* HAVE_SYSCALL_TABLE_SUPPORT */
+ const struct syscalltbl *table = find_table(e_machine);
-#include <libaudit.h>
+ if (!table)
+ return -1;
-struct syscalltbl *syscalltbl__new(void)
-{
- struct syscalltbl *tbl = zalloc(sizeof(*tbl));
- if (tbl)
- tbl->audit_machine = audit_detect_machine();
- return tbl;
+ assert(idx >= 0 && idx < table->sorted_names_len);
+ return table->sorted_names[idx];
}
-void syscalltbl__delete(struct syscalltbl *tbl)
+int syscalltbl__strglobmatch_next(int e_machine, const char *syscall_glob, int *idx)
{
- free(tbl);
-}
+ const struct syscalltbl *table = find_table(e_machine);
-const char *syscalltbl__name(const struct syscalltbl *tbl, int id)
-{
- return audit_syscall_to_name(id, tbl->audit_machine);
-}
-
-int syscalltbl__id(struct syscalltbl *tbl, const char *name)
-{
- return audit_name_to_syscall(name, tbl->audit_machine);
-}
+ for (int i = *idx + 1; table && i < table->sorted_names_len; ++i) {
+ const char *name = table->num_to_name[table->sorted_names[i]];
-int syscalltbl__id_at_idx(struct syscalltbl *tbl __maybe_unused, int idx)
-{
- return idx;
-}
+ if (strglobmatch(name, syscall_glob)) {
+ *idx = i;
+ return table->sorted_names[i];
+ }
+ }
-int syscalltbl__strglobmatch_next(struct syscalltbl *tbl __maybe_unused,
- const char *syscall_glob __maybe_unused, int *idx __maybe_unused)
-{
return -1;
}
-int syscalltbl__strglobmatch_first(struct syscalltbl *tbl, const char *syscall_glob, int *idx)
+int syscalltbl__strglobmatch_first(int e_machine, const char *syscall_glob, int *idx)
{
- return syscalltbl__strglobmatch_next(tbl, syscall_glob, idx);
+ *idx = -1;
+ return syscalltbl__strglobmatch_next(e_machine, syscall_glob, idx);
}
-#endif /* HAVE_SYSCALL_TABLE_SUPPORT */
diff --git a/tools/perf/util/syscalltbl.h b/tools/perf/util/syscalltbl.h
index 2b53b7ed25a6..2bb628eff367 100644
--- a/tools/perf/util/syscalltbl.h
+++ b/tools/perf/util/syscalltbl.h
@@ -2,23 +2,12 @@
#ifndef __PERF_SYSCALLTBL_H
#define __PERF_SYSCALLTBL_H
-struct syscalltbl {
- int audit_machine;
- struct {
- int max_id;
- int nr_entries;
- void *entries;
- } syscalls;
-};
+const char *syscalltbl__name(int e_machine, int id);
+int syscalltbl__id(int e_machine, const char *name);
+int syscalltbl__num_idx(int e_machine);
+int syscalltbl__id_at_idx(int e_machine, int idx);
-struct syscalltbl *syscalltbl__new(void);
-void syscalltbl__delete(struct syscalltbl *tbl);
-
-const char *syscalltbl__name(const struct syscalltbl *tbl, int id);
-int syscalltbl__id(struct syscalltbl *tbl, const char *name);
-int syscalltbl__id_at_idx(struct syscalltbl *tbl, int idx);
-
-int syscalltbl__strglobmatch_first(struct syscalltbl *tbl, const char *syscall_glob, int *idx);
-int syscalltbl__strglobmatch_next(struct syscalltbl *tbl, const char *syscall_glob, int *idx);
+int syscalltbl__strglobmatch_first(int e_machine, const char *syscall_glob, int *idx);
+int syscalltbl__strglobmatch_next(int e_machine, const char *syscall_glob, int *idx);
#endif /* __PERF_SYSCALLTBL_H */
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index 0ffdd52d86d7..10a01f8fbd40 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -1,5 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
+#include <elf.h>
#include <errno.h>
+#include <fcntl.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
@@ -16,6 +18,7 @@
#include "symbol.h"
#include "unwind.h"
#include "callchain.h"
+#include "dwarf-regs.h"
#include <api/fs/fs.h>
@@ -51,6 +54,7 @@ struct thread *thread__new(pid_t pid, pid_t tid)
thread__set_ppid(thread, -1);
thread__set_cpu(thread, -1);
thread__set_guest_cpu(thread, -1);
+ thread__set_e_machine(thread, EM_NONE);
thread__set_lbr_stitch_enable(thread, false);
INIT_LIST_HEAD(thread__namespaces_list(thread));
INIT_LIST_HEAD(thread__comm_list(thread));
@@ -406,7 +410,7 @@ int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp, bo
}
void thread__find_cpumode_addr_location(struct thread *thread, u64 addr,
- struct addr_location *al)
+ bool symbols, struct addr_location *al)
{
size_t i;
const u8 cpumodes[] = {
@@ -417,12 +421,92 @@ void thread__find_cpumode_addr_location(struct thread *thread, u64 addr,
};
for (i = 0; i < ARRAY_SIZE(cpumodes); i++) {
- thread__find_symbol(thread, cpumodes[i], addr, al);
+ if (symbols)
+ thread__find_symbol(thread, cpumodes[i], addr, al);
+ else
+ thread__find_map(thread, cpumodes[i], addr, al);
+
if (al->map)
break;
}
}
+static uint16_t read_proc_e_machine_for_pid(pid_t pid)
+{
+ char path[6 /* "/proc/" */ + 11 /* max length of pid */ + 5 /* "/exe\0" */];
+ int fd;
+ uint16_t e_machine = EM_NONE;
+
+ snprintf(path, sizeof(path), "/proc/%d/exe", pid);
+ fd = open(path, O_RDONLY);
+ if (fd >= 0) {
+ _Static_assert(offsetof(Elf32_Ehdr, e_machine) == 18, "Unexpected offset");
+ _Static_assert(offsetof(Elf64_Ehdr, e_machine) == 18, "Unexpected offset");
+ if (pread(fd, &e_machine, sizeof(e_machine), 18) != sizeof(e_machine))
+ e_machine = EM_NONE;
+ close(fd);
+ }
+ return e_machine;
+}
+
+static int thread__e_machine_callback(struct map *map, void *machine)
+{
+ struct dso *dso = map__dso(map);
+
+ _Static_assert(0 == EM_NONE, "Unexpected EM_NONE");
+ if (!dso)
+ return EM_NONE;
+
+ return dso__e_machine(dso, machine);
+}
+
+uint16_t thread__e_machine(struct thread *thread, struct machine *machine)
+{
+ pid_t tid, pid;
+ uint16_t e_machine = RC_CHK_ACCESS(thread)->e_machine;
+
+ if (e_machine != EM_NONE)
+ return e_machine;
+
+ tid = thread__tid(thread);
+ pid = thread__pid(thread);
+ if (pid != tid) {
+ struct thread *parent = machine__findnew_thread(machine, pid, pid);
+
+ if (parent) {
+ e_machine = thread__e_machine(parent, machine);
+ thread__set_e_machine(thread, e_machine);
+ return e_machine;
+ }
+ /* Something went wrong, fallback. */
+ }
+ /* Reading on the PID thread. First try to find from the maps. */
+ e_machine = maps__for_each_map(thread__maps(thread),
+ thread__e_machine_callback,
+ machine);
+ if (e_machine == EM_NONE) {
+ /* Maps failed, perhaps we're live with map events disabled. */
+ bool is_live = machine->machines == NULL;
+
+ if (!is_live) {
+ /* Check if the session has a data file. */
+ struct perf_session *session = container_of(machine->machines,
+ struct perf_session,
+ machines);
+
+ is_live = !!session->data;
+ }
+ /* Read from /proc/pid/exe if live. */
+ if (is_live)
+ e_machine = read_proc_e_machine_for_pid(pid);
+ }
+ if (e_machine != EM_NONE)
+ thread__set_e_machine(thread, e_machine);
+ else
+ e_machine = EM_HOST;
+ return e_machine;
+}
+
struct thread *thread__main_thread(struct machine *machine, struct thread *thread)
{
if (thread__pid(thread) == thread__tid(thread))
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index 6cbf6eb2812e..2b90bbed7a61 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -60,7 +60,11 @@ DECLARE_RC_STRUCT(thread) {
struct srccode_state srccode_state;
bool filter;
int filter_entry_depth;
-
+ /**
+ * @e_machine: The ELF EM_* associated with the thread. EM_NONE if not
+ * computed.
+ */
+ uint16_t e_machine;
/* LBR call stack stitch */
bool lbr_stitch_enable;
struct lbr_stitch *lbr_stitch;
@@ -122,7 +126,7 @@ struct symbol *thread__find_symbol_fb(struct thread *thread, u8 cpumode,
u64 addr, struct addr_location *al);
void thread__find_cpumode_addr_location(struct thread *thread, u64 addr,
- struct addr_location *al);
+ bool symbols, struct addr_location *al);
int thread__memcpy(struct thread *thread, struct machine *machine,
void *buf, u64 ip, int len, bool *is64bit);
@@ -302,6 +306,14 @@ static inline void thread__set_filter_entry_depth(struct thread *thread, int dep
RC_CHK_ACCESS(thread)->filter_entry_depth = depth;
}
+uint16_t thread__e_machine(struct thread *thread, struct machine *machine);
+
+static inline void thread__set_e_machine(struct thread *thread, uint16_t e_machine)
+{
+ RC_CHK_ACCESS(thread)->e_machine = e_machine;
+}
+
+
static inline bool thread__lbr_stitch_enable(const struct thread *thread)
{
return RC_CHK_ACCESS(thread)->lbr_stitch_enable;
diff --git a/tools/perf/util/tool_pmu.c b/tools/perf/util/tool_pmu.c
index 4fb097578479..727a10e3f990 100644
--- a/tools/perf/util/tool_pmu.c
+++ b/tools/perf/util/tool_pmu.c
@@ -62,7 +62,8 @@ int tool_pmu__num_skip_events(void)
const char *tool_pmu__event_to_str(enum tool_pmu_event ev)
{
- if (ev > TOOL_PMU__EVENT_NONE && ev < TOOL_PMU__EVENT_MAX)
+ if ((ev > TOOL_PMU__EVENT_NONE && ev < TOOL_PMU__EVENT_MAX) &&
+ !tool_pmu__skip_event(tool_pmu__event_names[ev]))
return tool_pmu__event_names[ev];
return NULL;
@@ -354,6 +355,7 @@ bool tool_pmu__read_event(enum tool_pmu_event ev, u64 *result)
if (online) {
*result = perf_cpu_map__nr(online);
+ perf_cpu_map__put(online);
return true;
}
return false;
@@ -484,22 +486,35 @@ int evsel__tool_pmu_read(struct evsel *evsel, int cpu_map_idx, int thread)
delta_start *= 1000000000 / ticks_per_sec;
}
count->val = delta_start;
- count->ena = count->run = delta_start;
count->lost = 0;
+ /*
+ * The values of enabled and running must make a ratio of 100%. The
+ * exact values don't matter as long as they are non-zero to avoid
+ * issues with evsel__count_has_error.
+ */
+ count->ena++;
+ count->run++;
return 0;
}
-struct perf_pmu *perf_pmus__tool_pmu(void)
+struct perf_pmu *tool_pmu__new(void)
{
- static struct perf_pmu tool = {
- .name = "tool",
- .type = PERF_PMU_TYPE_TOOL,
- .aliases = LIST_HEAD_INIT(tool.aliases),
- .caps = LIST_HEAD_INIT(tool.caps),
- .format = LIST_HEAD_INIT(tool.format),
- };
- if (!tool.events_table)
- tool.events_table = find_core_events_table("common", "common");
-
- return &tool;
+ struct perf_pmu *tool = zalloc(sizeof(struct perf_pmu));
+
+ if (!tool)
+ goto out;
+ tool->name = strdup("tool");
+ if (!tool->name) {
+ zfree(&tool);
+ goto out;
+ }
+
+ tool->type = PERF_PMU_TYPE_TOOL;
+ INIT_LIST_HEAD(&tool->aliases);
+ INIT_LIST_HEAD(&tool->caps);
+ INIT_LIST_HEAD(&tool->format);
+ tool->events_table = find_core_events_table("common", "common");
+
+out:
+ return tool;
}
diff --git a/tools/perf/util/tool_pmu.h b/tools/perf/util/tool_pmu.h
index a60184859080..c6ad1dd90a56 100644
--- a/tools/perf/util/tool_pmu.h
+++ b/tools/perf/util/tool_pmu.h
@@ -51,6 +51,6 @@ int evsel__tool_pmu_open(struct evsel *evsel,
int start_cpu_map_idx, int end_cpu_map_idx);
int evsel__tool_pmu_read(struct evsel *evsel, int cpu_map_idx, int thread);
-struct perf_pmu *perf_pmus__tool_pmu(void);
+struct perf_pmu *tool_pmu__new(void);
#endif /* __TOOL_PMU_H */
diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c
index 41d53e1b43e7..9c015fc2bcfb 100644
--- a/tools/perf/util/trace-event-parse.c
+++ b/tools/perf/util/trace-event-parse.c
@@ -99,7 +99,7 @@ unsigned long long read_size(struct tep_event *event, void *ptr, int size)
return tep_read_number(event->tep, ptr, size);
}
-void event_format__fprintf(struct tep_event *event,
+void event_format__fprintf(const struct tep_event *event,
int cpu, void *data, int size, FILE *fp)
{
struct tep_record record;
diff --git a/tools/perf/util/trace-event-scripting.c b/tools/perf/util/trace-event-scripting.c
index 5596fcda2c10..72abb28b7b5a 100644
--- a/tools/perf/util/trace-event-scripting.c
+++ b/tools/perf/util/trace-event-scripting.c
@@ -13,14 +13,94 @@
#include <event-parse.h>
#endif
+#include "archinsn.h"
#include "debug.h"
+#include "event.h"
#include "trace-event.h"
#include "evsel.h"
+#include <linux/perf_event.h>
#include <linux/zalloc.h>
#include "util/sample.h"
+unsigned int scripting_max_stack = PERF_MAX_STACK_DEPTH;
+
struct scripting_context *scripting_context;
+struct script_spec {
+ struct list_head node;
+ struct scripting_ops *ops;
+ char spec[];
+};
+
+static LIST_HEAD(script_specs);
+
+static struct script_spec *script_spec__new(const char *spec,
+ struct scripting_ops *ops)
+{
+ struct script_spec *s = malloc(sizeof(*s) + strlen(spec) + 1);
+
+ if (s != NULL) {
+ strcpy(s->spec, spec);
+ s->ops = ops;
+ }
+
+ return s;
+}
+
+static void script_spec__add(struct script_spec *s)
+{
+ list_add_tail(&s->node, &script_specs);
+}
+
+static struct script_spec *script_spec__find(const char *spec)
+{
+ struct script_spec *s;
+
+ list_for_each_entry(s, &script_specs, node)
+ if (strcasecmp(s->spec, spec) == 0)
+ return s;
+ return NULL;
+}
+
+static int script_spec_register(const char *spec, struct scripting_ops *ops)
+{
+ struct script_spec *s;
+
+ s = script_spec__find(spec);
+ if (s)
+ return -1;
+
+ s = script_spec__new(spec, ops);
+ if (!s)
+ return -1;
+
+ script_spec__add(s);
+ return 0;
+}
+
+struct scripting_ops *script_spec__lookup(const char *spec)
+{
+ struct script_spec *s = script_spec__find(spec);
+
+ if (!s)
+ return NULL;
+
+ return s->ops;
+}
+
+int script_spec__for_each(int (*cb)(struct scripting_ops *ops, const char *spec))
+{
+ struct script_spec *s;
+ int ret = 0;
+
+ list_for_each_entry(s, &script_specs, node) {
+ ret = cb(s->ops, s->spec);
+ if (ret)
+ break;
+ }
+ return ret;
+}
+
void scripting_context__update(struct scripting_context *c,
union perf_event *event,
struct perf_sample *sample,
@@ -28,12 +108,14 @@ void scripting_context__update(struct scripting_context *c,
struct addr_location *al,
struct addr_location *addr_al)
{
- c->event_data = sample->raw_data;
- c->pevent = NULL;
#ifdef HAVE_LIBTRACEEVENT
- if (evsel->tp_format)
- c->pevent = evsel->tp_format->tep;
+ const struct tep_event *tp_format = evsel__tp_format(evsel);
+
+ c->pevent = tp_format ? tp_format->tep : NULL;
+#else
+ c->pevent = NULL;
#endif
+ c->event_data = sample->raw_data;
c->event = event;
c->sample = sample;
c->evsel = evsel;
@@ -191,3 +273,154 @@ void setup_perl_scripting(void)
}
#endif
#endif
+
+#if !defined(__i386__) && !defined(__x86_64__)
+void arch_fetch_insn(struct perf_sample *sample __maybe_unused,
+ struct thread *thread __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+}
+#endif
+
+void script_fetch_insn(struct perf_sample *sample, struct thread *thread,
+ struct machine *machine, bool native_arch)
+{
+ if (sample->insn_len == 0 && native_arch)
+ arch_fetch_insn(sample, thread, machine);
+}
+
+static const struct {
+ u32 flags;
+ const char *name;
+} sample_flags[] = {
+ {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL, "call"},
+ {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN, "return"},
+ {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CONDITIONAL, "jcc"},
+ {PERF_IP_FLAG_BRANCH, "jmp"},
+ {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_INTERRUPT, "int"},
+ {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN | PERF_IP_FLAG_INTERRUPT, "iret"},
+ {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_SYSCALLRET, "syscall"},
+ {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN | PERF_IP_FLAG_SYSCALLRET, "sysret"},
+ {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_ASYNC, "async"},
+ {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC | PERF_IP_FLAG_INTERRUPT,
+ "hw int"},
+ {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TX_ABORT, "tx abrt"},
+ {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TRACE_BEGIN, "tr strt"},
+ {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TRACE_END, "tr end"},
+ {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_VMENTRY, "vmentry"},
+ {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_VMEXIT, "vmexit"},
+ {0, NULL}
+};
+
+static const struct {
+ u32 flags;
+ const char *name;
+} branch_events[] = {
+ {PERF_IP_FLAG_BRANCH_MISS, "miss"},
+ {PERF_IP_FLAG_NOT_TAKEN, "not_taken"},
+ {0, NULL}
+};
+
+static int sample_flags_to_name(u32 flags, char *str, size_t size)
+{
+ int i;
+ const char *prefix;
+ int pos = 0, ret, ev_idx = 0;
+ u32 xf = flags & PERF_ADDITIONAL_STATE_MASK;
+ u32 types, events;
+ char xs[16] = { 0 };
+
+ /* Clear additional state bits */
+ flags &= ~PERF_ADDITIONAL_STATE_MASK;
+
+ if (flags & PERF_IP_FLAG_TRACE_BEGIN)
+ prefix = "tr strt ";
+ else if (flags & PERF_IP_FLAG_TRACE_END)
+ prefix = "tr end ";
+ else
+ prefix = "";
+
+ ret = snprintf(str + pos, size - pos, "%s", prefix);
+ if (ret < 0)
+ return ret;
+ pos += ret;
+
+ flags &= ~(PERF_IP_FLAG_TRACE_BEGIN | PERF_IP_FLAG_TRACE_END);
+
+ types = flags & ~PERF_IP_FLAG_BRANCH_EVENT_MASK;
+ for (i = 0; sample_flags[i].name; i++) {
+ if (sample_flags[i].flags != types)
+ continue;
+
+ ret = snprintf(str + pos, size - pos, "%s", sample_flags[i].name);
+ if (ret < 0)
+ return ret;
+ pos += ret;
+ break;
+ }
+
+ events = flags & PERF_IP_FLAG_BRANCH_EVENT_MASK;
+ for (i = 0; branch_events[i].name; i++) {
+ if (!(branch_events[i].flags & events))
+ continue;
+
+ ret = snprintf(str + pos, size - pos, !ev_idx ? "/%s" : ",%s",
+ branch_events[i].name);
+ if (ret < 0)
+ return ret;
+ pos += ret;
+ ev_idx++;
+ }
+
+ /* Add an end character '/' for events */
+ if (ev_idx) {
+ ret = snprintf(str + pos, size - pos, "/");
+ if (ret < 0)
+ return ret;
+ pos += ret;
+ }
+
+ if (!xf)
+ return pos;
+
+ snprintf(xs, sizeof(xs), "(%s%s%s)",
+ flags & PERF_IP_FLAG_IN_TX ? "x" : "",
+ flags & PERF_IP_FLAG_INTR_DISABLE ? "D" : "",
+ flags & PERF_IP_FLAG_INTR_TOGGLE ? "t" : "");
+
+ /* Right align the string if its length is less than the limit */
+ if ((pos + strlen(xs)) < SAMPLE_FLAGS_STR_ALIGNED_SIZE)
+ ret = snprintf(str + pos, size - pos, "%*s",
+ (int)(SAMPLE_FLAGS_STR_ALIGNED_SIZE - ret), xs);
+ else
+ ret = snprintf(str + pos, size - pos, " %s", xs);
+ if (ret < 0)
+ return ret;
+
+ return pos + ret;
+}
+
+int perf_sample__sprintf_flags(u32 flags, char *str, size_t sz)
+{
+ const char *chars = PERF_IP_FLAG_CHARS;
+ const size_t n = strlen(PERF_IP_FLAG_CHARS);
+ size_t i, pos = 0;
+ int ret;
+
+ ret = sample_flags_to_name(flags, str, sz);
+ if (ret > 0)
+ return ret;
+
+ for (i = 0; i < n; i++, flags >>= 1) {
+ if ((flags & 1) && pos < sz)
+ str[pos++] = chars[i];
+ }
+ for (; i < 32; i++, flags >>= 1) {
+ if ((flags & 1) && pos < sz)
+ str[pos++] = '?';
+ }
+ if (pos < sz)
+ str[pos] = 0;
+
+ return pos;
+}
diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h
index 79b939f947dd..71e680bc3d4b 100644
--- a/tools/perf/util/trace-event.h
+++ b/tools/perf/util/trace-event.h
@@ -39,7 +39,7 @@ trace_event__tp_format(const char *sys, const char *name);
struct tep_event *trace_event__tp_format_id(int id);
-void event_format__fprintf(struct tep_event *event,
+void event_format__fprintf(const struct tep_event *event,
int cpu, void *data, int size, FILE *fp);
int parse_ftrace_file(struct tep_handle *pevent, char *buf, unsigned long size);
@@ -113,10 +113,11 @@ struct scripting_ops {
extern unsigned int scripting_max_stack;
-int script_spec_register(const char *spec, struct scripting_ops *ops);
+struct scripting_ops *script_spec__lookup(const char *spec);
+int script_spec__for_each(int (*cb)(struct scripting_ops *ops, const char *spec));
void script_fetch_insn(struct perf_sample *sample, struct thread *thread,
- struct machine *machine);
+ struct machine *machine, bool native_arch);
void setup_perl_scripting(void);
void setup_python_scripting(void);
@@ -144,6 +145,8 @@ int common_flags(struct scripting_context *context);
int common_lock_depth(struct scripting_context *context);
#define SAMPLE_FLAGS_BUF_SIZE 64
+#define SAMPLE_FLAGS_STR_ALIGNED_SIZE 21
+
int perf_sample__sprintf_flags(u32 flags, char *str, size_t sz);
#if defined(LIBTRACEEVENT_VERSION) && LIBTRACEEVENT_VERSION >= MAKE_LIBTRACEEVENT_VERSION(1, 5, 0)
diff --git a/tools/perf/util/units.c b/tools/perf/util/units.c
index 32c39cfe209b..4c6a86e1cb54 100644
--- a/tools/perf/util/units.c
+++ b/tools/perf/util/units.c
@@ -64,7 +64,7 @@ unsigned long convert_unit(unsigned long value, char *unit)
int unit_number__scnprintf(char *buf, size_t size, u64 n)
{
- char unit[4] = "BKMG";
+ char unit[] = "BKMG";
int i = 0;
while (((n / 1024) > 1) && (i < 3)) {
diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c
index bde216e630d2..793d11832694 100644
--- a/tools/perf/util/unwind-libdw.c
+++ b/tools/perf/util/unwind-libdw.c
@@ -190,7 +190,10 @@ static bool memory_read(Dwfl *dwfl __maybe_unused, Dwarf_Addr addr, Dwarf_Word *
int offset;
int ret;
- ret = perf_reg_value(&start, &ui->sample->user_regs,
+ if (!ui->sample->user_regs)
+ return false;
+
+ ret = perf_reg_value(&start, ui->sample->user_regs,
perf_arch_reg_sp(arch));
if (ret)
return false;
@@ -273,7 +276,7 @@ int unwind__get_entries(unwind_entry_cb_t cb, void *arg,
Dwarf_Word ip;
int err = -EINVAL, i;
- if (!data->user_regs.regs)
+ if (!data->user_regs || !data->user_regs->regs)
return -EINVAL;
ui = zalloc(sizeof(ui_buf) + sizeof(ui_buf.entries[0]) * max_stack);
@@ -286,7 +289,7 @@ int unwind__get_entries(unwind_entry_cb_t cb, void *arg,
if (!ui->dwfl)
goto out;
- err = perf_reg_value(&ip, &data->user_regs, perf_arch_reg_ip(arch));
+ err = perf_reg_value(&ip, data->user_regs, perf_arch_reg_ip(arch));
if (err)
goto out;
diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c
index 16c2b03831f3..0b037e7389a0 100644
--- a/tools/perf/util/unwind-libunwind-local.c
+++ b/tools/perf/util/unwind-libunwind-local.c
@@ -330,8 +330,7 @@ static int read_unwind_spec_eh_frame(struct dso *dso, struct unwind_info *ui,
int ret, fd;
if (dso__data(dso)->eh_frame_hdr_offset == 0) {
- fd = dso__data_get_fd(dso, ui->machine);
- if (fd < 0)
+ if (!dso__data_get_fd(dso, ui->machine, &fd))
return -EINVAL;
/* Check the .eh_frame section for unwinding info */
@@ -372,8 +371,7 @@ static int read_unwind_spec_debug_frame(struct dso *dso,
* has to be pointed by symsrc_filename
*/
if (ofs == 0) {
- fd = dso__data_get_fd(dso, machine);
- if (fd >= 0) {
+ if (dso__data_get_fd(dso, machine, &fd)) {
ofs = elf_section_offset(fd, ".debug_frame");
dso__data_put_fd(dso);
}
@@ -485,14 +483,16 @@ find_proc_info(unw_addr_space_t as, unw_word_t ip, unw_proc_info_t *pi,
/* Check the .debug_frame section for unwinding info */
if (ret < 0 &&
!read_unwind_spec_debug_frame(dso, ui->machine, &segbase)) {
- int fd = dso__data_get_fd(dso, ui->machine);
- int is_exec = elf_is_exec(fd, dso__name(dso));
+ int fd;
u64 start = map__start(map);
- unw_word_t base = is_exec ? 0 : start;
+ unw_word_t base = start;
const char *symfile;
- if (fd >= 0)
+ if (dso__data_get_fd(dso, ui->machine, &fd)) {
+ if (elf_is_exec(fd, dso__name(dso)))
+ base = 0;
dso__data_put_fd(dso);
+ }
symfile = dso__symsrc_filename(dso) ?: dso__name(dso);
@@ -579,12 +579,12 @@ static int access_mem(unw_addr_space_t __maybe_unused as,
int ret;
/* Don't support write, probably not needed. */
- if (__write || !stack || !ui->sample->user_regs.regs) {
+ if (__write || !stack || !ui->sample->user_regs || !ui->sample->user_regs->regs) {
*valp = 0;
return 0;
}
- ret = perf_reg_value(&start, &ui->sample->user_regs,
+ ret = perf_reg_value(&start, perf_sample__user_regs(ui->sample),
perf_arch_reg_sp(arch));
if (ret)
return ret;
@@ -628,7 +628,7 @@ static int access_reg(unw_addr_space_t __maybe_unused as,
return 0;
}
- if (!ui->sample->user_regs.regs) {
+ if (!ui->sample->user_regs || !ui->sample->user_regs->regs) {
*valp = 0;
return 0;
}
@@ -637,7 +637,7 @@ static int access_reg(unw_addr_space_t __maybe_unused as,
if (id < 0)
return -EINVAL;
- ret = perf_reg_value(&val, &ui->sample->user_regs, id);
+ ret = perf_reg_value(&val, perf_sample__user_regs(ui->sample), id);
if (ret) {
if (!ui->best_effort)
pr_err("unwind: can't read reg %d\n", regnum);
@@ -741,7 +741,7 @@ static int get_entries(struct unwind_info *ui, unwind_entry_cb_t cb,
unw_cursor_t c;
int ret, i = 0;
- ret = perf_reg_value(&val, &ui->sample->user_regs,
+ ret = perf_reg_value(&val, perf_sample__user_regs(ui->sample),
perf_arch_reg_ip(arch));
if (ret)
return ret;
@@ -808,7 +808,7 @@ static int _unwind__get_entries(unwind_entry_cb_t cb, void *arg,
.best_effort = best_effort
};
- if (!data->user_regs.regs)
+ if (!data->user_regs || !data->user_regs->regs)
return -EINVAL;
if (max_stack <= 0)
diff --git a/tools/perf/util/values.c b/tools/perf/util/values.c
index b9823f414f10..ec72d29f3d58 100644
--- a/tools/perf/util/values.c
+++ b/tools/perf/util/values.c
@@ -8,6 +8,7 @@
#include "values.h"
#include "debug.h"
+#include "evsel.h"
int perf_read_values_init(struct perf_read_values *values)
{
@@ -22,21 +23,17 @@ int perf_read_values_init(struct perf_read_values *values)
values->threads = 0;
values->counters_max = 16;
- values->counterrawid = malloc(values->counters_max
- * sizeof(*values->counterrawid));
- values->countername = malloc(values->counters_max
- * sizeof(*values->countername));
- if (!values->counterrawid || !values->countername) {
- pr_debug("failed to allocate read_values counters arrays");
+ values->counters = malloc(values->counters_max * sizeof(*values->counters));
+ if (!values->counters) {
+ pr_debug("failed to allocate read_values counters array");
goto out_free_counter;
}
- values->counters = 0;
+ values->num_counters = 0;
return 0;
out_free_counter:
- zfree(&values->counterrawid);
- zfree(&values->countername);
+ zfree(&values->counters);
out_free_pid:
zfree(&values->pid);
zfree(&values->tid);
@@ -56,10 +53,7 @@ void perf_read_values_destroy(struct perf_read_values *values)
zfree(&values->value);
zfree(&values->pid);
zfree(&values->tid);
- zfree(&values->counterrawid);
- for (i = 0; i < values->counters; i++)
- zfree(&values->countername[i]);
- zfree(&values->countername);
+ zfree(&values->counters);
}
static int perf_read_values__enlarge_threads(struct perf_read_values *values)
@@ -116,81 +110,71 @@ static int perf_read_values__findnew_thread(struct perf_read_values *values,
static int perf_read_values__enlarge_counters(struct perf_read_values *values)
{
- char **countername;
- int i, counters_max = values->counters_max * 2;
- u64 *counterrawid = realloc(values->counterrawid, counters_max * sizeof(*values->counterrawid));
+ int counters_max = values->counters_max * 2;
+ struct evsel **new_counters = realloc(values->counters,
+ counters_max * sizeof(*values->counters));
- if (!counterrawid) {
- pr_debug("failed to enlarge read_values rawid array");
+ if (!new_counters) {
+ pr_debug("failed to enlarge read_values counters array");
goto out_enomem;
}
- countername = realloc(values->countername, counters_max * sizeof(*values->countername));
- if (!countername) {
- pr_debug("failed to enlarge read_values rawid array");
- goto out_free_rawid;
- }
-
- for (i = 0; i < values->threads; i++) {
+ for (int i = 0; i < values->threads; i++) {
u64 *value = realloc(values->value[i], counters_max * sizeof(**values->value));
- int j;
if (!value) {
pr_debug("failed to enlarge read_values ->values array");
- goto out_free_name;
+ goto out_free_counters;
}
- for (j = values->counters_max; j < counters_max; j++)
+ for (int j = values->counters_max; j < counters_max; j++)
value[j] = 0;
values->value[i] = value;
}
values->counters_max = counters_max;
- values->counterrawid = counterrawid;
- values->countername = countername;
+ values->counters = new_counters;
return 0;
-out_free_name:
- free(countername);
-out_free_rawid:
- free(counterrawid);
+out_free_counters:
+ free(new_counters);
out_enomem:
return -ENOMEM;
}
static int perf_read_values__findnew_counter(struct perf_read_values *values,
- u64 rawid, const char *name)
+ struct evsel *evsel)
{
int i;
- for (i = 0; i < values->counters; i++)
- if (values->counterrawid[i] == rawid)
+ for (i = 0; i < values->num_counters; i++)
+ if (values->counters[i] == evsel)
return i;
- if (values->counters == values->counters_max) {
- i = perf_read_values__enlarge_counters(values);
- if (i)
- return i;
+ if (values->num_counters == values->counters_max) {
+ int err = perf_read_values__enlarge_counters(values);
+
+ if (err)
+ return err;
}
- i = values->counters++;
- values->counterrawid[i] = rawid;
- values->countername[i] = strdup(name);
+ i = values->num_counters++;
+ values->counters[i] = evsel;
return i;
}
int perf_read_values_add_value(struct perf_read_values *values,
u32 pid, u32 tid,
- u64 rawid, const char *name, u64 value)
+ struct evsel *evsel, u64 value)
{
int tindex, cindex;
tindex = perf_read_values__findnew_thread(values, pid, tid);
if (tindex < 0)
return tindex;
- cindex = perf_read_values__findnew_counter(values, rawid, name);
+ cindex = perf_read_values__findnew_counter(values, evsel);
if (cindex < 0)
return cindex;
@@ -205,15 +189,15 @@ static void perf_read_values__display_pretty(FILE *fp,
int pidwidth, tidwidth;
int *counterwidth;
- counterwidth = malloc(values->counters * sizeof(*counterwidth));
+ counterwidth = malloc(values->num_counters * sizeof(*counterwidth));
if (!counterwidth) {
fprintf(fp, "INTERNAL ERROR: Failed to allocate counterwidth array\n");
return;
}
tidwidth = 3;
pidwidth = 3;
- for (j = 0; j < values->counters; j++)
- counterwidth[j] = strlen(values->countername[j]);
+ for (j = 0; j < values->num_counters; j++)
+ counterwidth[j] = strlen(evsel__name(values->counters[j]));
for (i = 0; i < values->threads; i++) {
int width;
@@ -223,7 +207,7 @@ static void perf_read_values__display_pretty(FILE *fp,
width = snprintf(NULL, 0, "%d", values->tid[i]);
if (width > tidwidth)
tidwidth = width;
- for (j = 0; j < values->counters; j++) {
+ for (j = 0; j < values->num_counters; j++) {
width = snprintf(NULL, 0, "%" PRIu64, values->value[i][j]);
if (width > counterwidth[j])
counterwidth[j] = width;
@@ -231,14 +215,14 @@ static void perf_read_values__display_pretty(FILE *fp,
}
fprintf(fp, "# %*s %*s", pidwidth, "PID", tidwidth, "TID");
- for (j = 0; j < values->counters; j++)
- fprintf(fp, " %*s", counterwidth[j], values->countername[j]);
+ for (j = 0; j < values->num_counters; j++)
+ fprintf(fp, " %*s", counterwidth[j], evsel__name(values->counters[j]));
fprintf(fp, "\n");
for (i = 0; i < values->threads; i++) {
fprintf(fp, " %*d %*d", pidwidth, values->pid[i],
tidwidth, values->tid[i]);
- for (j = 0; j < values->counters; j++)
+ for (j = 0; j < values->num_counters; j++)
fprintf(fp, " %*" PRIu64,
counterwidth[j], values->value[i][j]);
fprintf(fp, "\n");
@@ -266,16 +250,16 @@ static void perf_read_values__display_raw(FILE *fp,
if (width > tidwidth)
tidwidth = width;
}
- for (j = 0; j < values->counters; j++) {
- width = strlen(values->countername[j]);
+ for (j = 0; j < values->num_counters; j++) {
+ width = strlen(evsel__name(values->counters[j]));
if (width > namewidth)
namewidth = width;
- width = snprintf(NULL, 0, "%" PRIx64, values->counterrawid[j]);
+ width = snprintf(NULL, 0, "%x", values->counters[j]->core.idx);
if (width > rawwidth)
rawwidth = width;
}
for (i = 0; i < values->threads; i++) {
- for (j = 0; j < values->counters; j++) {
+ for (j = 0; j < values->num_counters; j++) {
width = snprintf(NULL, 0, "%" PRIu64, values->value[i][j]);
if (width > countwidth)
countwidth = width;
@@ -287,12 +271,12 @@ static void perf_read_values__display_raw(FILE *fp,
namewidth, "Name", rawwidth, "Raw",
countwidth, "Count");
for (i = 0; i < values->threads; i++)
- for (j = 0; j < values->counters; j++)
- fprintf(fp, " %*d %*d %*s %*" PRIx64 " %*" PRIu64,
+ for (j = 0; j < values->num_counters; j++)
+ fprintf(fp, " %*d %*d %*s %*x %*" PRIu64,
pidwidth, values->pid[i],
tidwidth, values->tid[i],
- namewidth, values->countername[j],
- rawwidth, values->counterrawid[j],
+ namewidth, evsel__name(values->counters[j]),
+ rawwidth, values->counters[j]->core.idx,
countwidth, values->value[i][j]);
}
diff --git a/tools/perf/util/values.h b/tools/perf/util/values.h
index 791c1ad606c2..bbca33daca19 100644
--- a/tools/perf/util/values.h
+++ b/tools/perf/util/values.h
@@ -5,14 +5,15 @@
#include <stdio.h>
#include <linux/types.h>
+struct evsel;
+
struct perf_read_values {
int threads;
int threads_max;
u32 *pid, *tid;
- int counters;
+ int num_counters;
int counters_max;
- u64 *counterrawid;
- char **countername;
+ struct evsel **counters;
u64 **value;
};
@@ -21,7 +22,7 @@ void perf_read_values_destroy(struct perf_read_values *values);
int perf_read_values_add_value(struct perf_read_values *values,
u32 pid, u32 tid,
- u64 rawid, const char *name, u64 value);
+ struct evsel *evsel, u64 value);
void perf_read_values_display(FILE *fp, struct perf_read_values *values,
int raw);