summaryrefslogtreecommitdiff
path: root/tools/perf/util
diff options
context:
space:
mode:
authorDmitry Torokhov <dmitry.torokhov@gmail.com>2025-01-21 08:37:39 +0300
committerDmitry Torokhov <dmitry.torokhov@gmail.com>2025-01-21 08:37:39 +0300
commit25768de50b1f2dbb6ea44bd5148a87fe2c9c3688 (patch)
tree91f4e0c1ea9acb1e8d477a5f4dfedd00de67ae13 /tools/perf/util
parent3a6e5ed2372bcb2a3c554fda32419efd91ff9b0c (diff)
parent08bd5b7c9a2401faabdaa1472d45c7de0755fd7e (diff)
downloadlinux-25768de50b1f2dbb6ea44bd5148a87fe2c9c3688.tar.xz
Merge branch 'next' into for-linus
Prepare input updates for 6.14 merge window.
Diffstat (limited to 'tools/perf/util')
-rw-r--r--tools/perf/util/Build27
-rw-r--r--tools/perf/util/annotate-data.c1164
-rw-r--r--tools/perf/util/annotate-data.h92
-rw-r--r--tools/perf/util/annotate.c372
-rw-r--r--tools/perf/util/annotate.h39
-rw-r--r--tools/perf/util/arm-spe-decoder/arm-spe-decoder.h18
-rw-r--r--tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c2
-rw-r--r--tools/perf/util/arm-spe.c368
-rw-r--r--tools/perf/util/arm-spe.h38
-rw-r--r--tools/perf/util/auxtrace.c16
-rw-r--r--tools/perf/util/auxtrace.h24
-rw-r--r--tools/perf/util/block-info.c66
-rw-r--r--tools/perf/util/block-info.h8
-rw-r--r--tools/perf/util/bpf-event.c4
-rw-r--r--tools/perf/util/bpf-filter.c631
-rw-r--r--tools/perf/util/bpf-filter.h19
-rw-r--r--tools/perf/util/bpf-filter.l28
-rw-r--r--tools/perf/util/bpf-filter.y28
-rw-r--r--tools/perf/util/bpf-prologue.h37
-rw-r--r--tools/perf/util/bpf_counter.c35
-rw-r--r--tools/perf/util/bpf_counter_cgroup.c6
-rw-r--r--tools/perf/util/bpf_ftrace.c8
-rw-r--r--tools/perf/util/bpf_kwork.c9
-rw-r--r--tools/perf/util/bpf_kwork_top.c7
-rw-r--r--tools/perf/util/bpf_lock_contention.c48
-rw-r--r--tools/perf/util/bpf_map.c3
-rw-r--r--tools/perf/util/bpf_off_cpu.c16
-rw-r--r--tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c240
-rw-r--r--tools/perf/util/bpf_skel/bperf_cgroup.bpf.c2
-rw-r--r--tools/perf/util/bpf_skel/bperf_follower.bpf.c98
-rw-r--r--tools/perf/util/bpf_skel/bperf_u.h5
-rw-r--r--tools/perf/util/bpf_skel/func_latency.bpf.c7
-rw-r--r--tools/perf/util/bpf_skel/kwork_top.bpf.c2
-rw-r--r--tools/perf/util/bpf_skel/kwork_trace.bpf.c5
-rw-r--r--tools/perf/util/bpf_skel/lock_contention.bpf.c53
-rw-r--r--tools/perf/util/bpf_skel/lock_data.h4
-rw-r--r--tools/perf/util/bpf_skel/off_cpu.bpf.c9
-rw-r--r--tools/perf/util/bpf_skel/sample-filter.h13
-rw-r--r--tools/perf/util/bpf_skel/sample_filter.bpf.c105
-rw-r--r--tools/perf/util/bpf_skel/vmlinux/vmlinux.h7
-rw-r--r--tools/perf/util/branch.h1
-rw-r--r--tools/perf/util/build-id.c36
-rw-r--r--tools/perf/util/build-id.h8
-rw-r--r--tools/perf/util/callchain.c35
-rw-r--r--tools/perf/util/callchain.h6
-rw-r--r--tools/perf/util/cap.c61
-rw-r--r--tools/perf/util/cap.h23
-rw-r--r--tools/perf/util/color.c28
-rw-r--r--tools/perf/util/color.h11
-rw-r--r--tools/perf/util/config.c22
-rw-r--r--tools/perf/util/config.h1
-rw-r--r--tools/perf/util/cs-etm-decoder/cs-etm-decoder.c43
-rw-r--r--tools/perf/util/cs-etm-decoder/cs-etm-decoder.h2
-rw-r--r--tools/perf/util/cs-etm.c700
-rw-r--r--tools/perf/util/cs-etm.h12
-rw-r--r--tools/perf/util/data-convert-bt.c36
-rw-r--r--tools/perf/util/data-convert-json.c49
-rw-r--r--tools/perf/util/data.c7
-rw-r--r--tools/perf/util/debug.c2
-rw-r--r--tools/perf/util/debuginfo.h8
-rw-r--r--tools/perf/util/disasm.c1058
-rw-r--r--tools/perf/util/disasm.h23
-rw-r--r--tools/perf/util/disasm_bpf.c195
-rw-r--r--tools/perf/util/disasm_bpf.h12
-rw-r--r--tools/perf/util/dso.c4
-rw-r--r--tools/perf/util/dso.h4
-rw-r--r--tools/perf/util/dsos.c12
-rw-r--r--tools/perf/util/dsos.h2
-rw-r--r--tools/perf/util/dump-insn.c2
-rw-r--r--tools/perf/util/dump-insn.h2
-rw-r--r--tools/perf/util/dwarf-aux.c24
-rw-r--r--tools/perf/util/dwarf-aux.h55
-rw-r--r--tools/perf/util/dwarf-regs-csky.c50
-rw-r--r--tools/perf/util/dwarf-regs-powerpc.c61
-rw-r--r--tools/perf/util/dwarf-regs-x86.c50
-rw-r--r--tools/perf/util/dwarf-regs.c38
-rw-r--r--tools/perf/util/env.c42
-rw-r--r--tools/perf/util/env.h7
-rw-r--r--tools/perf/util/event.c54
-rw-r--r--tools/perf/util/event.h39
-rw-r--r--tools/perf/util/events_stats.h15
-rw-r--r--tools/perf/util/evlist.c172
-rw-r--r--tools/perf/util/evlist.h22
-rw-r--r--tools/perf/util/evsel.c984
-rw-r--r--tools/perf/util/evsel.h63
-rw-r--r--tools/perf/util/evsel_fprintf.c4
-rw-r--r--tools/perf/util/expr.c97
-rw-r--r--tools/perf/util/ftrace.h3
-rw-r--r--tools/perf/util/genelf.c4
-rw-r--r--tools/perf/util/genelf.h2
-rw-r--r--tools/perf/util/hashmap.h20
-rw-r--r--tools/perf/util/header.c241
-rw-r--r--tools/perf/util/header.h48
-rw-r--r--tools/perf/util/hisi-ptt.c6
-rw-r--r--tools/perf/util/hist.c66
-rw-r--r--tools/perf/util/hist.h7
-rw-r--r--tools/perf/util/hwmon_pmu.c839
-rw-r--r--tools/perf/util/hwmon_pmu.h151
-rw-r--r--tools/perf/util/include/dwarf-regs.h127
-rw-r--r--tools/perf/util/intel-bts.c41
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c5
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c2
-rw-r--r--tools/perf/util/intel-pt.c34
-rw-r--r--tools/perf/util/intel-tpebs.c432
-rw-r--r--tools/perf/util/intel-tpebs.h35
-rw-r--r--tools/perf/util/jit.h3
-rw-r--r--tools/perf/util/jitdump.c10
-rw-r--r--tools/perf/util/llvm-c-helpers.cpp197
-rw-r--r--tools/perf/util/llvm-c-helpers.h60
-rw-r--r--tools/perf/util/machine.c124
-rw-r--r--tools/perf/util/machine.h36
-rw-r--r--tools/perf/util/map.c25
-rw-r--r--tools/perf/util/map.h22
-rw-r--r--tools/perf/util/map_symbol.c18
-rw-r--r--tools/perf/util/map_symbol.h3
-rw-r--r--tools/perf/util/mem-events.c28
-rw-r--r--tools/perf/util/mem-events.h4
-rw-r--r--tools/perf/util/mem-info.c13
-rw-r--r--tools/perf/util/mem-info.h1
-rw-r--r--tools/perf/util/metricgroup.c49
-rw-r--r--tools/perf/util/mmap.c4
-rw-r--r--tools/perf/util/parse-events.c180
-rw-r--r--tools/perf/util/parse-events.h24
-rw-r--r--tools/perf/util/parse-events.l14
-rw-r--r--tools/perf/util/parse-events.y18
-rw-r--r--tools/perf/util/pfm.c4
-rw-r--r--tools/perf/util/pmu.c210
-rw-r--r--tools/perf/util/pmu.h26
-rw-r--r--tools/perf/util/pmus.c40
-rw-r--r--tools/perf/util/pmus.h4
-rw-r--r--tools/perf/util/print-events.c39
-rw-r--r--tools/perf/util/print-events.h1
-rw-r--r--tools/perf/util/print_insn.c14
-rw-r--r--tools/perf/util/probe-event.c184
-rw-r--r--tools/perf/util/probe-event.h3
-rw-r--r--tools/perf/util/probe-file.c19
-rw-r--r--tools/perf/util/probe-file.h1
-rw-r--r--tools/perf/util/probe-finder.c42
-rw-r--r--tools/perf/util/probe-finder.h13
-rw-r--r--tools/perf/util/python.c6
-rw-r--r--tools/perf/util/s390-cpumsf.c13
-rw-r--r--tools/perf/util/s390-sample-raw.c8
-rw-r--r--tools/perf/util/scripting-engines/trace-event-perl.c2
-rw-r--r--tools/perf/util/scripting-engines/trace-event-python.c27
-rw-r--r--tools/perf/util/session.c420
-rw-r--r--tools/perf/util/session.h61
-rw-r--r--tools/perf/util/setup.py4
-rw-r--r--tools/perf/util/sort.c181
-rw-r--r--tools/perf/util/sort.h6
-rw-r--r--tools/perf/util/srcline.c59
-rw-r--r--tools/perf/util/stat-display.c199
-rw-r--r--tools/perf/util/stat-shadow.c168
-rw-r--r--tools/perf/util/stat.c2
-rw-r--r--tools/perf/util/stat.h16
-rw-r--r--tools/perf/util/string.c100
-rw-r--r--tools/perf/util/string2.h2
-rw-r--r--tools/perf/util/symbol.c13
-rw-r--r--tools/perf/util/symbol_conf.h2
-rw-r--r--tools/perf/util/synthetic-events.c181
-rw-r--r--tools/perf/util/synthetic-events.h89
-rw-r--r--tools/perf/util/syscalltbl.c18
-rw-r--r--tools/perf/util/target.h1
-rw-r--r--tools/perf/util/thread.c4
-rw-r--r--tools/perf/util/thread.h1
-rw-r--r--tools/perf/util/threads.c4
-rw-r--r--tools/perf/util/time-utils.c4
-rw-r--r--tools/perf/util/tool.c294
-rw-r--r--tools/perf/util/tool.h19
-rw-r--r--tools/perf/util/tool_pmu.c505
-rw-r--r--tools/perf/util/tool_pmu.h56
-rw-r--r--tools/perf/util/trace-event-parse.c8
-rw-r--r--tools/perf/util/trace-event-read.c2
-rw-r--r--tools/perf/util/trace-event-scripting.c2
-rw-r--r--tools/perf/util/trace-event.c2
-rw-r--r--tools/perf/util/trace-event.h5
-rw-r--r--tools/perf/util/trace_augment.h6
-rw-r--r--tools/perf/util/tsc.c4
-rw-r--r--tools/perf/util/tsc.h2
-rw-r--r--tools/perf/util/util.c101
-rw-r--r--tools/perf/util/util.h18
-rw-r--r--tools/perf/util/vdso.c4
181 files changed, 9902 insertions, 4058 deletions
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 0f18fe81ef0b..c06d2ee9024c 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -13,6 +13,7 @@ perf-util-y += copyfile.o
perf-util-y += ctype.o
perf-util-y += db-export.o
perf-util-y += disasm.o
+perf-util-y += disasm_bpf.o
perf-util-y += env.o
perf-util-y += event.o
perf-util-y += evlist.o
@@ -65,6 +66,7 @@ perf-util-y += map.o
perf-util-y += maps.o
perf-util-y += pstack.o
perf-util-y += session.o
+perf-util-y += tool.o
perf-util-y += sample-raw.o
perf-util-y += s390-sample-raw.o
perf-util-y += amd-sample-raw.o
@@ -81,6 +83,8 @@ perf-util-y += pmu.o
perf-util-y += pmus.o
perf-util-y += pmu-flex.o
perf-util-y += pmu-bison.o
+perf-util-y += hwmon_pmu.o
+perf-util-y += tool_pmu.o
perf-util-y += svghelper.o
perf-util-$(CONFIG_LIBTRACEEVENT) += trace-event-info.o
perf-util-y += trace-event-scripting.o
@@ -154,6 +158,7 @@ perf-util-y += clockid.o
perf-util-y += list_sort.o
perf-util-y += mutex.o
perf-util-y += sharded_mutex.o
+perf-util-$(CONFIG_X86_64) += intel-tpebs.o
perf-util-$(CONFIG_LIBBPF) += bpf_map.o
perf-util-$(CONFIG_PERF_BPF_SKEL) += bpf_counter.o
@@ -196,11 +201,14 @@ ifndef CONFIG_SETNS
perf-util-y += setns.o
endif
-perf-util-$(CONFIG_DWARF) += probe-finder.o
-perf-util-$(CONFIG_DWARF) += dwarf-aux.o
-perf-util-$(CONFIG_DWARF) += dwarf-regs.o
-perf-util-$(CONFIG_DWARF) += debuginfo.o
-perf-util-$(CONFIG_DWARF) += annotate-data.o
+perf-util-$(CONFIG_LIBDW) += probe-finder.o
+perf-util-$(CONFIG_LIBDW) += dwarf-aux.o
+perf-util-$(CONFIG_LIBDW) += dwarf-regs.o
+perf-util-$(CONFIG_LIBDW) += dwarf-regs-csky.o
+perf-util-$(CONFIG_LIBDW) += dwarf-regs-powerpc.o
+perf-util-$(CONFIG_LIBDW) += dwarf-regs-x86.o
+perf-util-$(CONFIG_LIBDW) += debuginfo.o
+perf-util-$(CONFIG_LIBDW) += annotate-data.o
perf-util-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
perf-util-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind-local.o
@@ -220,17 +228,18 @@ perf-util-$(CONFIG_ZLIB) += zlib.o
perf-util-$(CONFIG_LZMA) += lzma.o
perf-util-$(CONFIG_ZSTD) += zstd.o
-perf-util-$(CONFIG_LIBCAP) += cap.o
+perf-util-y += cap.o
perf-util-$(CONFIG_CXX_DEMANGLE) += demangle-cxx.o
perf-util-y += demangle-ocaml.o
perf-util-y += demangle-java.o
perf-util-y += demangle-rust.o
+perf-util-$(CONFIG_LIBLLVM) += llvm-c-helpers.o
ifdef CONFIG_JITDUMP
perf-util-$(CONFIG_LIBELF) += jitdump.o
perf-util-$(CONFIG_LIBELF) += genelf.o
-perf-util-$(CONFIG_DWARF) += genelf_debug.o
+perf-util-$(CONFIG_LIBDW) += genelf_debug.o
endif
perf-util-y += perf-hooks.o
@@ -275,12 +284,12 @@ $(OUTPUT)util/pmu-bison.c $(OUTPUT)util/pmu-bison.h: util/pmu.y
$(Q)$(call echo-cmd,bison)$(BISON) -v $< -d $(PARSER_DEBUG_BISON) $(BISON_FILE_PREFIX_MAP) \
-o $(OUTPUT)util/pmu-bison.c -p perf_pmu_
-$(OUTPUT)util/bpf-filter-flex.c $(OUTPUT)util/bpf-filter-flex.h: util/bpf-filter.l $(OUTPUT)util/bpf-filter-bison.c
+$(OUTPUT)util/bpf-filter-flex.c $(OUTPUT)util/bpf-filter-flex.h: util/bpf-filter.l $(OUTPUT)util/bpf-filter-bison.c util/bpf-filter.h util/bpf_skel/sample-filter.h
$(call rule_mkdir)
$(Q)$(call echo-cmd,flex)$(FLEX) -o $(OUTPUT)util/bpf-filter-flex.c \
--header-file=$(OUTPUT)util/bpf-filter-flex.h $(PARSER_DEBUG_FLEX) $<
-$(OUTPUT)util/bpf-filter-bison.c $(OUTPUT)util/bpf-filter-bison.h: util/bpf-filter.y
+$(OUTPUT)util/bpf-filter-bison.c $(OUTPUT)util/bpf-filter-bison.h: util/bpf-filter.y util/bpf-filter.h util/bpf_skel/sample-filter.h
$(call rule_mkdir)
$(Q)$(call echo-cmd,bison)$(BISON) -v $< -d $(PARSER_DEBUG_BISON) $(BISON_FILE_PREFIX_MAP) \
-o $(OUTPUT)util/bpf-filter-bison.c -p perf_bpf_filter_
diff --git a/tools/perf/util/annotate-data.c b/tools/perf/util/annotate-data.c
index 965da6c0b542..976abedca09e 100644
--- a/tools/perf/util/annotate-data.c
+++ b/tools/perf/util/annotate-data.c
@@ -31,15 +31,6 @@
static void delete_var_types(struct die_var_type *var_types);
-enum type_state_kind {
- TSR_KIND_INVALID = 0,
- TSR_KIND_TYPE,
- TSR_KIND_PERCPU_BASE,
- TSR_KIND_CONST,
- TSR_KIND_POINTER,
- TSR_KIND_CANARY,
-};
-
#define pr_debug_dtp(fmt, ...) \
do { \
if (debug_type_profile) \
@@ -48,7 +39,7 @@ do { \
pr_debug3(fmt, ##__VA_ARGS__); \
} while (0)
-static void pr_debug_type_name(Dwarf_Die *die, enum type_state_kind kind)
+void pr_debug_type_name(Dwarf_Die *die, enum type_state_kind kind)
{
struct strbuf sb;
char *str;
@@ -104,7 +95,7 @@ static void pr_debug_location(Dwarf_Die *die, u64 pc, int reg)
return;
while ((off = dwarf_getlocations(&attr, off, &base, &start, &end, &ops, &nops)) > 0) {
- if (reg != DWARF_REG_PC && end < pc)
+ if (reg != DWARF_REG_PC && end <= pc)
continue;
if (reg != DWARF_REG_PC && start > pc)
break;
@@ -140,49 +131,27 @@ static void pr_debug_location(Dwarf_Die *die, u64 pc, int reg)
}
}
-/*
- * Type information in a register, valid when @ok is true.
- * The @caller_saved registers are invalidated after a function call.
- */
-struct type_state_reg {
- Dwarf_Die type;
- u32 imm_value;
- bool ok;
- bool caller_saved;
- u8 kind;
-};
+static void pr_debug_scope(Dwarf_Die *scope_die)
+{
+ int tag;
-/* Type information in a stack location, dynamically allocated */
-struct type_state_stack {
- struct list_head list;
- Dwarf_Die type;
- int offset;
- int size;
- bool compound;
- u8 kind;
-};
+ if (!debug_type_profile && verbose < 3)
+ return;
-/* FIXME: This should be arch-dependent */
-#define TYPE_STATE_MAX_REGS 16
+ pr_info("(die:%lx) ", (long)dwarf_dieoffset(scope_die));
-/*
- * State table to maintain type info in each register and stack location.
- * It'll be updated when new variable is allocated or type info is moved
- * to a new location (register or stack). As it'd be used with the
- * shortest path of basic blocks, it only maintains a single table.
- */
-struct type_state {
- /* state of general purpose registers */
- struct type_state_reg regs[TYPE_STATE_MAX_REGS];
- /* state of stack location */
- struct list_head stack_vars;
- /* return value register */
- int ret_reg;
- /* stack pointer register */
- int stack_reg;
-};
+ tag = dwarf_tag(scope_die);
+ if (tag == DW_TAG_subprogram)
+ pr_info("[function] %s\n", dwarf_diename(scope_die));
+ else if (tag == DW_TAG_inlined_subroutine)
+ pr_info("[inlined] %s\n", dwarf_diename(scope_die));
+ else if (tag == DW_TAG_lexical_block)
+ pr_info("[block]\n");
+ else
+ pr_info("[unknown] tag=%x\n", tag);
+}
-static bool has_reg_type(struct type_state *state, int reg)
+bool has_reg_type(struct type_state *state, int reg)
{
return (unsigned)reg < ARRAY_SIZE(state->regs);
}
@@ -253,7 +222,7 @@ static int __add_member_cb(Dwarf_Die *die, void *arg)
struct annotated_member *parent = arg;
struct annotated_member *member;
Dwarf_Die member_type, die_mem;
- Dwarf_Word size, loc;
+ Dwarf_Word size, loc, bit_size = 0;
Dwarf_Attribute attr;
struct strbuf sb;
int tag;
@@ -268,29 +237,56 @@ static int __add_member_cb(Dwarf_Die *die, void *arg)
strbuf_init(&sb, 32);
die_get_typename(die, &sb);
- die_get_real_type(die, &member_type);
- if (dwarf_aggregate_size(&member_type, &size) < 0)
+ __die_get_real_type(die, &member_type);
+ if (dwarf_tag(&member_type) == DW_TAG_typedef)
+ die_get_real_type(&member_type, &die_mem);
+ else
+ die_mem = member_type;
+
+ if (dwarf_aggregate_size(&die_mem, &size) < 0)
size = 0;
- if (!dwarf_attr_integrate(die, DW_AT_data_member_location, &attr))
- loc = 0;
- else
+ if (dwarf_attr_integrate(die, DW_AT_data_member_location, &attr))
dwarf_formudata(&attr, &loc);
+ else {
+ /* bitfield member */
+ if (dwarf_attr_integrate(die, DW_AT_data_bit_offset, &attr) &&
+ dwarf_formudata(&attr, &loc) == 0)
+ loc /= 8;
+ else
+ loc = 0;
+
+ if (dwarf_attr_integrate(die, DW_AT_bit_size, &attr) &&
+ dwarf_formudata(&attr, &bit_size) == 0)
+ size = (bit_size + 7) / 8;
+ }
member->type_name = strbuf_detach(&sb, NULL);
/* member->var_name can be NULL */
- if (dwarf_diename(die))
- member->var_name = strdup(dwarf_diename(die));
+ if (dwarf_diename(die)) {
+ if (bit_size) {
+ if (asprintf(&member->var_name, "%s:%ld",
+ dwarf_diename(die), (long)bit_size) < 0)
+ member->var_name = NULL;
+ } else {
+ member->var_name = strdup(dwarf_diename(die));
+ }
+
+ if (member->var_name == NULL) {
+ free(member);
+ return DIE_FIND_CB_END;
+ }
+ }
member->size = size;
member->offset = loc + parent->offset;
INIT_LIST_HEAD(&member->children);
list_add_tail(&member->node, &parent->children);
- tag = dwarf_tag(&member_type);
+ tag = dwarf_tag(&die_mem);
switch (tag) {
case DW_TAG_structure_type:
case DW_TAG_union_type:
- die_find_child(&member_type, __add_member_cb, member, &die_mem);
+ die_find_child(&die_mem, __add_member_cb, member, &die_mem);
break;
default:
break;
@@ -332,6 +328,10 @@ static struct annotated_data_type *dso__findnew_data_type(struct dso *dso,
if (die_get_typename_from_type(type_die, &sb) < 0)
strbuf_add(&sb, "(unknown type)", 14);
type_name = strbuf_detach(&sb, NULL);
+
+ if (dwarf_tag(type_die) == DW_TAG_typedef)
+ die_get_real_type(type_die, type_die);
+
dwarf_aggregate_size(type_die, &size);
/* Check existing nodes in dso->data_types tree */
@@ -387,61 +387,142 @@ static bool find_cu_die(struct debuginfo *di, u64 pc, Dwarf_Die *cu_die)
return false;
}
+enum type_match_result {
+ PERF_TMR_UNKNOWN = 0,
+ PERF_TMR_OK,
+ PERF_TMR_NO_TYPE,
+ PERF_TMR_NO_POINTER,
+ PERF_TMR_NO_SIZE,
+ PERF_TMR_BAD_OFFSET,
+ PERF_TMR_BAIL_OUT,
+};
+
+static const char *match_result_str(enum type_match_result tmr)
+{
+ switch (tmr) {
+ case PERF_TMR_OK:
+ return "Good!";
+ case PERF_TMR_NO_TYPE:
+ return "no type information";
+ case PERF_TMR_NO_POINTER:
+ return "no/void pointer";
+ case PERF_TMR_NO_SIZE:
+ return "type size is unknown";
+ case PERF_TMR_BAD_OFFSET:
+ return "offset bigger than size";
+ case PERF_TMR_UNKNOWN:
+ case PERF_TMR_BAIL_OUT:
+ default:
+ return "invalid state";
+ }
+}
+
+static bool is_pointer_type(Dwarf_Die *type_die)
+{
+ int tag = dwarf_tag(type_die);
+
+ return tag == DW_TAG_pointer_type || tag == DW_TAG_array_type;
+}
+
+static bool is_compound_type(Dwarf_Die *type_die)
+{
+ int tag = dwarf_tag(type_die);
+
+ return tag == DW_TAG_structure_type || tag == DW_TAG_union_type;
+}
+
+/* returns if Type B has better information than Type A */
+static bool is_better_type(Dwarf_Die *type_a, Dwarf_Die *type_b)
+{
+ Dwarf_Word size_a, size_b;
+ Dwarf_Die die_a, die_b;
+
+ /* pointer type is preferred */
+ if (is_pointer_type(type_a) != is_pointer_type(type_b))
+ return is_pointer_type(type_b);
+
+ if (is_pointer_type(type_b)) {
+ /*
+ * We want to compare the target type, but 'void *' can fail to
+ * get the target type.
+ */
+ if (die_get_real_type(type_a, &die_a) == NULL)
+ return true;
+ if (die_get_real_type(type_b, &die_b) == NULL)
+ return false;
+
+ type_a = &die_a;
+ type_b = &die_b;
+ }
+
+ /* bigger type is preferred */
+ if (dwarf_aggregate_size(type_a, &size_a) < 0 ||
+ dwarf_aggregate_size(type_b, &size_b) < 0)
+ return false;
+
+ if (size_a != size_b)
+ return size_a < size_b;
+
+ /* struct or union is preferred */
+ if (is_compound_type(type_a) != is_compound_type(type_b))
+ return is_compound_type(type_b);
+
+ /* typedef is preferred */
+ if (dwarf_tag(type_b) == DW_TAG_typedef)
+ return true;
+
+ return false;
+}
+
/* The type info will be saved in @type_die */
-static int check_variable(struct data_loc_info *dloc, Dwarf_Die *var_die,
- Dwarf_Die *type_die, int reg, int offset, bool is_fbreg)
+static enum type_match_result check_variable(struct data_loc_info *dloc,
+ Dwarf_Die *var_die,
+ Dwarf_Die *type_die, int reg,
+ int offset, bool is_fbreg)
{
Dwarf_Word size;
- bool is_pointer = true;
+ bool needs_pointer = true;
+ Dwarf_Die sized_type;
if (reg == DWARF_REG_PC)
- is_pointer = false;
+ needs_pointer = false;
else if (reg == dloc->fbreg || is_fbreg)
- is_pointer = false;
+ needs_pointer = false;
else if (arch__is(dloc->arch, "x86") && reg == X86_REG_SP)
- is_pointer = false;
+ needs_pointer = false;
/* Get the type of the variable */
- if (die_get_real_type(var_die, type_die) == NULL) {
- pr_debug_dtp("variable has no type\n");
- ann_data_stat.no_typeinfo++;
- return -1;
- }
+ if (__die_get_real_type(var_die, type_die) == NULL)
+ return PERF_TMR_NO_TYPE;
/*
* Usually it expects a pointer type for a memory access.
* Convert to a real type it points to. But global variables
* and local variables are accessed directly without a pointer.
*/
- if (is_pointer) {
- if ((dwarf_tag(type_die) != DW_TAG_pointer_type &&
- dwarf_tag(type_die) != DW_TAG_array_type) ||
- die_get_real_type(type_die, type_die) == NULL) {
- pr_debug_dtp("no pointer or no type\n");
- ann_data_stat.no_typeinfo++;
- return -1;
- }
+ if (needs_pointer) {
+ if (!is_pointer_type(type_die) ||
+ __die_get_real_type(type_die, type_die) == NULL)
+ return PERF_TMR_NO_POINTER;
}
+ if (dwarf_tag(type_die) == DW_TAG_typedef)
+ die_get_real_type(type_die, &sized_type);
+ else
+ sized_type = *type_die;
+
/* Get the size of the actual type */
- if (dwarf_aggregate_size(type_die, &size) < 0) {
- pr_debug_dtp("type size is unknown\n");
- ann_data_stat.invalid_size++;
- return -1;
- }
+ if (dwarf_aggregate_size(&sized_type, &size) < 0)
+ return PERF_TMR_NO_SIZE;
/* Minimal sanity check */
- if ((unsigned)offset >= size) {
- pr_debug_dtp("offset: %d is bigger than size: %"PRIu64"\n",
- offset, size);
- ann_data_stat.bad_offset++;
- return -1;
- }
+ if ((unsigned)offset >= size)
+ return PERF_TMR_BAD_OFFSET;
- return 0;
+ return PERF_TMR_OK;
}
-static struct type_state_stack *find_stack_state(struct type_state *state,
+struct type_state_stack *find_stack_state(struct type_state *state,
int offset)
{
struct type_state_stack *stack;
@@ -457,7 +538,7 @@ static struct type_state_stack *find_stack_state(struct type_state *state,
return NULL;
}
-static void set_stack_state(struct type_state_stack *stack, int offset, u8 kind,
+void set_stack_state(struct type_state_stack *stack, int offset, u8 kind,
Dwarf_Die *type_die)
{
int tag;
@@ -484,7 +565,7 @@ static void set_stack_state(struct type_state_stack *stack, int offset, u8 kind,
}
}
-static struct type_state_stack *findnew_stack_state(struct type_state *state,
+struct type_state_stack *findnew_stack_state(struct type_state *state,
int offset, u8 kind,
Dwarf_Die *type_die)
{
@@ -588,7 +669,7 @@ void global_var_type__tree_delete(struct rb_root *root)
}
}
-static bool get_global_var_info(struct data_loc_info *dloc, u64 addr,
+bool get_global_var_info(struct data_loc_info *dloc, u64 addr,
const char **var_name, int *var_offset)
{
struct addr_location al;
@@ -662,7 +743,7 @@ static void global_var__collect(struct data_loc_info *dloc)
}
}
-static bool get_global_var_type(Dwarf_Die *cu_die, struct data_loc_info *dloc,
+bool get_global_var_type(Dwarf_Die *cu_die, struct data_loc_info *dloc,
u64 ip, u64 var_addr, int *var_offset,
Dwarf_Die *type_die)
{
@@ -688,7 +769,7 @@ static bool get_global_var_type(Dwarf_Die *cu_die, struct data_loc_info *dloc,
/* Try to get the variable by address first */
if (die_find_variable_by_addr(cu_die, var_addr, &var_die, &offset) &&
check_variable(dloc, &var_die, type_die, DWARF_REG_PC, offset,
- /*is_fbreg=*/false) == 0) {
+ /*is_fbreg=*/false) == PERF_TMR_OK) {
var_name = dwarf_diename(&var_die);
*var_offset = offset;
goto ok;
@@ -702,7 +783,7 @@ static bool get_global_var_type(Dwarf_Die *cu_die, struct data_loc_info *dloc,
/* Try to get the name of global variable */
if (die_find_variable_at(cu_die, var_name, pc, &var_die) &&
check_variable(dloc, &var_die, type_die, DWARF_REG_PC, *var_offset,
- /*is_fbreg=*/false) == 0)
+ /*is_fbreg=*/false) == PERF_TMR_OK)
goto ok;
return false;
@@ -713,6 +794,11 @@ ok:
return true;
}
+static bool die_is_same(Dwarf_Die *die_a, Dwarf_Die *die_b)
+{
+ return (die_a->cu == die_b->cu) && (die_a->addr == die_b->addr);
+}
+
/**
* update_var_state - Update type state using given variables
* @state: type state table
@@ -744,24 +830,36 @@ static void update_var_state(struct type_state *state, struct data_loc_info *dlo
if (!dwarf_offdie(dloc->di->dbg, var->die_off, &mem_die))
continue;
- if (var->reg == DWARF_REG_FB) {
- findnew_stack_state(state, var->offset, TSR_KIND_TYPE,
- &mem_die);
+ if (var->reg == DWARF_REG_FB || var->reg == fbreg) {
+ int offset = var->offset;
+ struct type_state_stack *stack;
- pr_debug_dtp("var [%"PRIx64"] -%#x(stack)",
- insn_offset, -var->offset);
- pr_debug_type_name(&mem_die, TSR_KIND_TYPE);
- } else if (var->reg == fbreg) {
- findnew_stack_state(state, var->offset - fb_offset,
- TSR_KIND_TYPE, &mem_die);
+ if (var->reg != DWARF_REG_FB)
+ offset -= fb_offset;
+
+ stack = find_stack_state(state, offset);
+ if (stack && stack->kind == TSR_KIND_TYPE &&
+ !is_better_type(&stack->type, &mem_die))
+ continue;
+
+ findnew_stack_state(state, offset, TSR_KIND_TYPE,
+ &mem_die);
pr_debug_dtp("var [%"PRIx64"] -%#x(stack)",
- insn_offset, -var->offset + fb_offset);
+ insn_offset, -offset);
pr_debug_type_name(&mem_die, TSR_KIND_TYPE);
} else if (has_reg_type(state, var->reg) && var->offset == 0) {
struct type_state_reg *reg;
+ Dwarf_Die orig_type;
reg = &state->regs[var->reg];
+
+ if (reg->ok && reg->kind == TSR_KIND_TYPE &&
+ !is_better_type(&reg->type, &mem_die))
+ continue;
+
+ orig_type = reg->type;
+
reg->type = mem_die;
reg->kind = TSR_KIND_TYPE;
reg->ok = true;
@@ -769,383 +867,31 @@ static void update_var_state(struct type_state *state, struct data_loc_info *dlo
pr_debug_dtp("var [%"PRIx64"] reg%d",
insn_offset, var->reg);
pr_debug_type_name(&mem_die, TSR_KIND_TYPE);
- }
- }
-}
-
-static void update_insn_state_x86(struct type_state *state,
- struct data_loc_info *dloc, Dwarf_Die *cu_die,
- struct disasm_line *dl)
-{
- struct annotated_insn_loc loc;
- struct annotated_op_loc *src = &loc.ops[INSN_OP_SOURCE];
- struct annotated_op_loc *dst = &loc.ops[INSN_OP_TARGET];
- struct type_state_reg *tsr;
- Dwarf_Die type_die;
- u32 insn_offset = dl->al.offset;
- int fbreg = dloc->fbreg;
- int fboff = 0;
-
- if (annotate_get_insn_location(dloc->arch, dl, &loc) < 0)
- return;
-
- if (ins__is_call(&dl->ins)) {
- struct symbol *func = dl->ops.target.sym;
-
- if (func == NULL)
- return;
-
- /* __fentry__ will preserve all registers */
- if (!strcmp(func->name, "__fentry__"))
- return;
-
- pr_debug_dtp("call [%x] %s\n", insn_offset, func->name);
-
- /* Otherwise invalidate caller-saved registers after call */
- for (unsigned i = 0; i < ARRAY_SIZE(state->regs); i++) {
- if (state->regs[i].caller_saved)
- state->regs[i].ok = false;
- }
-
- /* Update register with the return type (if any) */
- if (die_find_func_rettype(cu_die, func->name, &type_die)) {
- tsr = &state->regs[state->ret_reg];
- tsr->type = type_die;
- tsr->kind = TSR_KIND_TYPE;
- tsr->ok = true;
-
- pr_debug_dtp("call [%x] return -> reg%d",
- insn_offset, state->ret_reg);
- pr_debug_type_name(&type_die, tsr->kind);
- }
- return;
- }
-
- if (!strncmp(dl->ins.name, "add", 3)) {
- u64 imm_value = -1ULL;
- int offset;
- const char *var_name = NULL;
- struct map_symbol *ms = dloc->ms;
- u64 ip = ms->sym->start + dl->al.offset;
-
- if (!has_reg_type(state, dst->reg1))
- return;
-
- tsr = &state->regs[dst->reg1];
-
- if (src->imm)
- imm_value = src->offset;
- else if (has_reg_type(state, src->reg1) &&
- state->regs[src->reg1].kind == TSR_KIND_CONST)
- imm_value = state->regs[src->reg1].imm_value;
- else if (src->reg1 == DWARF_REG_PC) {
- u64 var_addr = annotate_calc_pcrel(dloc->ms, ip,
- src->offset, dl);
-
- if (get_global_var_info(dloc, var_addr,
- &var_name, &offset) &&
- !strcmp(var_name, "this_cpu_off") &&
- tsr->kind == TSR_KIND_CONST) {
- tsr->kind = TSR_KIND_PERCPU_BASE;
- imm_value = tsr->imm_value;
- }
- }
- else
- return;
-
- if (tsr->kind != TSR_KIND_PERCPU_BASE)
- return;
- if (get_global_var_type(cu_die, dloc, ip, imm_value, &offset,
- &type_die) && offset == 0) {
/*
- * This is not a pointer type, but it should be treated
- * as a pointer.
+ * If this register is directly copied from another and it gets a
+ * better type, also update the type of the source register. This
+ * is usually the case of container_of() macro with offset of 0.
*/
- tsr->type = type_die;
- tsr->kind = TSR_KIND_POINTER;
- tsr->ok = true;
-
- pr_debug_dtp("add [%x] percpu %#"PRIx64" -> reg%d",
- insn_offset, imm_value, dst->reg1);
- pr_debug_type_name(&tsr->type, tsr->kind);
- }
- return;
- }
-
- if (strncmp(dl->ins.name, "mov", 3))
- return;
+ if (has_reg_type(state, reg->copied_from)) {
+ struct type_state_reg *copy_reg;
- if (dloc->fb_cfa) {
- u64 ip = dloc->ms->sym->start + dl->al.offset;
- u64 pc = map__rip_2objdump(dloc->ms->map, ip);
+ copy_reg = &state->regs[reg->copied_from];
- if (die_get_cfa(dloc->di->dbg, pc, &fbreg, &fboff) < 0)
- fbreg = -1;
- }
-
- /* Case 1. register to register or segment:offset to register transfers */
- if (!src->mem_ref && !dst->mem_ref) {
- if (!has_reg_type(state, dst->reg1))
- return;
-
- tsr = &state->regs[dst->reg1];
- if (dso__kernel(map__dso(dloc->ms->map)) &&
- src->segment == INSN_SEG_X86_GS && src->imm) {
- u64 ip = dloc->ms->sym->start + dl->al.offset;
- u64 var_addr;
- int offset;
-
- /*
- * In kernel, %gs points to a per-cpu region for the
- * current CPU. Access with a constant offset should
- * be treated as a global variable access.
- */
- var_addr = src->offset;
-
- if (var_addr == 40) {
- tsr->kind = TSR_KIND_CANARY;
- tsr->ok = true;
-
- pr_debug_dtp("mov [%x] stack canary -> reg%d\n",
- insn_offset, dst->reg1);
- return;
- }
-
- if (!get_global_var_type(cu_die, dloc, ip, var_addr,
- &offset, &type_die) ||
- !die_get_member_type(&type_die, offset, &type_die)) {
- tsr->ok = false;
- return;
- }
+ /* TODO: check if type is compatible or embedded */
+ if (!copy_reg->ok || (copy_reg->kind != TSR_KIND_TYPE) ||
+ !die_is_same(&copy_reg->type, &orig_type) ||
+ !is_better_type(&copy_reg->type, &mem_die))
+ continue;
- tsr->type = type_die;
- tsr->kind = TSR_KIND_TYPE;
- tsr->ok = true;
+ copy_reg->type = mem_die;
- pr_debug_dtp("mov [%x] this-cpu addr=%#"PRIx64" -> reg%d",
- insn_offset, var_addr, dst->reg1);
- pr_debug_type_name(&tsr->type, tsr->kind);
- return;
- }
-
- if (src->imm) {
- tsr->kind = TSR_KIND_CONST;
- tsr->imm_value = src->offset;
- tsr->ok = true;
-
- pr_debug_dtp("mov [%x] imm=%#x -> reg%d\n",
- insn_offset, tsr->imm_value, dst->reg1);
- return;
- }
-
- if (!has_reg_type(state, src->reg1) ||
- !state->regs[src->reg1].ok) {
- tsr->ok = false;
- return;
- }
-
- tsr->type = state->regs[src->reg1].type;
- tsr->kind = state->regs[src->reg1].kind;
- tsr->ok = true;
-
- pr_debug_dtp("mov [%x] reg%d -> reg%d",
- insn_offset, src->reg1, dst->reg1);
- pr_debug_type_name(&tsr->type, tsr->kind);
- }
- /* Case 2. memory to register transers */
- if (src->mem_ref && !dst->mem_ref) {
- int sreg = src->reg1;
-
- if (!has_reg_type(state, dst->reg1))
- return;
-
- tsr = &state->regs[dst->reg1];
-
-retry:
- /* Check stack variables with offset */
- if (sreg == fbreg) {
- struct type_state_stack *stack;
- int offset = src->offset - fboff;
-
- stack = find_stack_state(state, offset);
- if (stack == NULL) {
- tsr->ok = false;
- return;
- } else if (!stack->compound) {
- tsr->type = stack->type;
- tsr->kind = stack->kind;
- tsr->ok = true;
- } else if (die_get_member_type(&stack->type,
- offset - stack->offset,
- &type_die)) {
- tsr->type = type_die;
- tsr->kind = TSR_KIND_TYPE;
- tsr->ok = true;
- } else {
- tsr->ok = false;
- return;
- }
-
- pr_debug_dtp("mov [%x] -%#x(stack) -> reg%d",
- insn_offset, -offset, dst->reg1);
- pr_debug_type_name(&tsr->type, tsr->kind);
- }
- /* And then dereference the pointer if it has one */
- else if (has_reg_type(state, sreg) && state->regs[sreg].ok &&
- state->regs[sreg].kind == TSR_KIND_TYPE &&
- die_deref_ptr_type(&state->regs[sreg].type,
- src->offset, &type_die)) {
- tsr->type = type_die;
- tsr->kind = TSR_KIND_TYPE;
- tsr->ok = true;
-
- pr_debug_dtp("mov [%x] %#x(reg%d) -> reg%d",
- insn_offset, src->offset, sreg, dst->reg1);
- pr_debug_type_name(&tsr->type, tsr->kind);
- }
- /* Or check if it's a global variable */
- else if (sreg == DWARF_REG_PC) {
- struct map_symbol *ms = dloc->ms;
- u64 ip = ms->sym->start + dl->al.offset;
- u64 addr;
- int offset;
-
- addr = annotate_calc_pcrel(ms, ip, src->offset, dl);
-
- if (!get_global_var_type(cu_die, dloc, ip, addr, &offset,
- &type_die) ||
- !die_get_member_type(&type_die, offset, &type_die)) {
- tsr->ok = false;
- return;
- }
-
- tsr->type = type_die;
- tsr->kind = TSR_KIND_TYPE;
- tsr->ok = true;
-
- pr_debug_dtp("mov [%x] global addr=%"PRIx64" -> reg%d",
- insn_offset, addr, dst->reg1);
- pr_debug_type_name(&type_die, tsr->kind);
- }
- /* And check percpu access with base register */
- else if (has_reg_type(state, sreg) &&
- state->regs[sreg].kind == TSR_KIND_PERCPU_BASE) {
- u64 ip = dloc->ms->sym->start + dl->al.offset;
- u64 var_addr = src->offset;
- int offset;
-
- if (src->multi_regs) {
- int reg2 = (sreg == src->reg1) ? src->reg2 : src->reg1;
-
- if (has_reg_type(state, reg2) && state->regs[reg2].ok &&
- state->regs[reg2].kind == TSR_KIND_CONST)
- var_addr += state->regs[reg2].imm_value;
- }
-
- /*
- * In kernel, %gs points to a per-cpu region for the
- * current CPU. Access with a constant offset should
- * be treated as a global variable access.
- */
- if (get_global_var_type(cu_die, dloc, ip, var_addr,
- &offset, &type_die) &&
- die_get_member_type(&type_die, offset, &type_die)) {
- tsr->type = type_die;
- tsr->kind = TSR_KIND_TYPE;
- tsr->ok = true;
-
- if (src->multi_regs) {
- pr_debug_dtp("mov [%x] percpu %#x(reg%d,reg%d) -> reg%d",
- insn_offset, src->offset, src->reg1,
- src->reg2, dst->reg1);
- } else {
- pr_debug_dtp("mov [%x] percpu %#x(reg%d) -> reg%d",
- insn_offset, src->offset, sreg, dst->reg1);
- }
- pr_debug_type_name(&tsr->type, tsr->kind);
- } else {
- tsr->ok = false;
- }
- }
- /* And then dereference the calculated pointer if it has one */
- else if (has_reg_type(state, sreg) && state->regs[sreg].ok &&
- state->regs[sreg].kind == TSR_KIND_POINTER &&
- die_get_member_type(&state->regs[sreg].type,
- src->offset, &type_die)) {
- tsr->type = type_die;
- tsr->kind = TSR_KIND_TYPE;
- tsr->ok = true;
-
- pr_debug_dtp("mov [%x] pointer %#x(reg%d) -> reg%d",
- insn_offset, src->offset, sreg, dst->reg1);
- pr_debug_type_name(&tsr->type, tsr->kind);
- }
- /* Or try another register if any */
- else if (src->multi_regs && sreg == src->reg1 &&
- src->reg1 != src->reg2) {
- sreg = src->reg2;
- goto retry;
- }
- else {
- int offset;
- const char *var_name = NULL;
-
- /* it might be per-cpu variable (in kernel) access */
- if (src->offset < 0) {
- if (get_global_var_info(dloc, (s64)src->offset,
- &var_name, &offset) &&
- !strcmp(var_name, "__per_cpu_offset")) {
- tsr->kind = TSR_KIND_PERCPU_BASE;
-
- pr_debug_dtp("mov [%x] percpu base reg%d\n",
- insn_offset, dst->reg1);
- }
- }
-
- tsr->ok = false;
- }
- }
- /* Case 3. register to memory transfers */
- if (!src->mem_ref && dst->mem_ref) {
- if (!has_reg_type(state, src->reg1) ||
- !state->regs[src->reg1].ok)
- return;
-
- /* Check stack variables with offset */
- if (dst->reg1 == fbreg) {
- struct type_state_stack *stack;
- int offset = dst->offset - fboff;
-
- tsr = &state->regs[src->reg1];
-
- stack = find_stack_state(state, offset);
- if (stack) {
- /*
- * The source register is likely to hold a type
- * of member if it's a compound type. Do not
- * update the stack variable type since we can
- * get the member type later by using the
- * die_get_member_type().
- */
- if (!stack->compound)
- set_stack_state(stack, offset, tsr->kind,
- &tsr->type);
- } else {
- findnew_stack_state(state, offset, tsr->kind,
- &tsr->type);
+ pr_debug_dtp("var [%"PRIx64"] copyback reg%d",
+ insn_offset, reg->copied_from);
+ pr_debug_type_name(&mem_die, TSR_KIND_TYPE);
}
-
- pr_debug_dtp("mov [%x] reg%d -> -%#x(stack)",
- insn_offset, src->reg1, -offset);
- pr_debug_type_name(&tsr->type, tsr->kind);
}
- /*
- * Ignore other transfers since it'd set a value in a struct
- * and won't change the type.
- */
}
- /* Case 4. memory to memory transfers (not handled for now) */
}
/**
@@ -1166,8 +912,8 @@ retry:
static void update_insn_state(struct type_state *state, struct data_loc_info *dloc,
Dwarf_Die *cu_die, struct disasm_line *dl)
{
- if (arch__is(dloc->arch, "x86"))
- update_insn_state_x86(state, dloc, cu_die, dl);
+ if (dloc->arch->update_insn_state)
+ dloc->arch->update_insn_state(state, dloc, cu_die, dl);
}
/*
@@ -1254,75 +1000,164 @@ static void setup_stack_canary(struct data_loc_info *dloc)
/*
* It's at the target address, check if it has a matching type.
- * It returns 1 if found, 0 if not or -1 if not found but no need to
- * repeat the search. The last case is for per-cpu variables which
+ * It returns PERF_TMR_BAIL_OUT when it looks up per-cpu variables which
* are similar to global variables and no additional info is needed.
*/
-static int check_matching_type(struct type_state *state,
- struct data_loc_info *dloc,
- Dwarf_Die *cu_die, Dwarf_Die *type_die)
+static enum type_match_result check_matching_type(struct type_state *state,
+ struct data_loc_info *dloc,
+ Dwarf_Die *cu_die,
+ struct disasm_line *dl,
+ Dwarf_Die *type_die)
{
Dwarf_Word size;
- u32 insn_offset = dloc->ip - dloc->ms->sym->start;
+ u32 insn_offset = dl->al.offset;
int reg = dloc->op->reg1;
+ int offset = dloc->op->offset;
+ const char *offset_sign = "";
+ bool retry = true;
+
+ if (offset < 0) {
+ offset = -offset;
+ offset_sign = "-";
+ }
- pr_debug_dtp("chk [%x] reg%d offset=%#x ok=%d kind=%d",
- insn_offset, reg, dloc->op->offset,
+again:
+ pr_debug_dtp("chk [%x] reg%d offset=%s%#x ok=%d kind=%d ",
+ insn_offset, reg, offset_sign, offset,
state->regs[reg].ok, state->regs[reg].kind);
- if (state->regs[reg].ok && state->regs[reg].kind == TSR_KIND_TYPE) {
- int tag = dwarf_tag(&state->regs[reg].type);
+ if (!state->regs[reg].ok)
+ goto check_non_register;
+
+ if (state->regs[reg].kind == TSR_KIND_TYPE) {
+ Dwarf_Die sized_type;
+ struct strbuf sb;
+
+ strbuf_init(&sb, 32);
+ die_get_typename_from_type(&state->regs[reg].type, &sb);
+ pr_debug_dtp("(%s)", sb.buf);
+ strbuf_release(&sb);
/*
* Normal registers should hold a pointer (or array) to
* dereference a memory location.
*/
- if (tag != DW_TAG_pointer_type && tag != DW_TAG_array_type) {
+ if (!is_pointer_type(&state->regs[reg].type)) {
if (dloc->op->offset < 0 && reg != state->stack_reg)
goto check_kernel;
- pr_debug_dtp("\n");
- return -1;
+ return PERF_TMR_NO_POINTER;
}
- pr_debug_dtp("\n");
-
/* Remove the pointer and get the target type */
- if (die_get_real_type(&state->regs[reg].type, type_die) == NULL)
- return -1;
+ if (__die_get_real_type(&state->regs[reg].type, type_die) == NULL)
+ return PERF_TMR_NO_POINTER;
+
+ dloc->type_offset = dloc->op->offset;
+
+ if (dwarf_tag(type_die) == DW_TAG_typedef)
+ die_get_real_type(type_die, &sized_type);
+ else
+ sized_type = *type_die;
+
+ /* Get the size of the actual type */
+ if (dwarf_aggregate_size(&sized_type, &size) < 0 ||
+ (unsigned)dloc->type_offset >= size)
+ return PERF_TMR_BAD_OFFSET;
+
+ return PERF_TMR_OK;
+ }
+
+ if (state->regs[reg].kind == TSR_KIND_POINTER) {
+ pr_debug_dtp("percpu ptr");
+
+ /*
+ * It's actaully pointer but the address was calculated using
+ * some arithmetic. So it points to the actual type already.
+ */
+ *type_die = state->regs[reg].type;
dloc->type_offset = dloc->op->offset;
/* Get the size of the actual type */
if (dwarf_aggregate_size(type_die, &size) < 0 ||
(unsigned)dloc->type_offset >= size)
- return -1;
+ return PERF_TMR_BAIL_OUT;
- return 1;
+ return PERF_TMR_OK;
}
+ if (state->regs[reg].kind == TSR_KIND_CANARY) {
+ pr_debug_dtp("stack canary");
+
+ /*
+ * This is a saved value of the stack canary which will be handled
+ * in the outer logic when it returns failure here. Pretend it's
+ * from the stack canary directly.
+ */
+ setup_stack_canary(dloc);
+
+ return PERF_TMR_BAIL_OUT;
+ }
+
+ if (state->regs[reg].kind == TSR_KIND_PERCPU_BASE) {
+ u64 var_addr = dloc->op->offset;
+ int var_offset;
+
+ pr_debug_dtp("percpu var");
+
+ if (dloc->op->multi_regs) {
+ int reg2 = dloc->op->reg2;
+
+ if (dloc->op->reg2 == reg)
+ reg2 = dloc->op->reg1;
+
+ if (has_reg_type(state, reg2) && state->regs[reg2].ok &&
+ state->regs[reg2].kind == TSR_KIND_CONST)
+ var_addr += state->regs[reg2].imm_value;
+ }
+
+ if (get_global_var_type(cu_die, dloc, dloc->ip, var_addr,
+ &var_offset, type_die)) {
+ dloc->type_offset = var_offset;
+ return PERF_TMR_OK;
+ }
+ /* No need to retry per-cpu (global) variables */
+ return PERF_TMR_BAIL_OUT;
+ }
+
+check_non_register:
if (reg == dloc->fbreg) {
struct type_state_stack *stack;
- pr_debug_dtp(" fbreg\n");
+ pr_debug_dtp("fbreg");
stack = find_stack_state(state, dloc->type_offset);
- if (stack == NULL)
- return 0;
+ if (stack == NULL) {
+ if (retry) {
+ pr_debug_dtp(" : retry\n");
+ retry = false;
+
+ /* update type info it's the first store to the stack */
+ update_insn_state(state, dloc, cu_die, dl);
+ goto again;
+ }
+ return PERF_TMR_NO_TYPE;
+ }
if (stack->kind == TSR_KIND_CANARY) {
setup_stack_canary(dloc);
- return -1;
+ return PERF_TMR_BAIL_OUT;
}
if (stack->kind != TSR_KIND_TYPE)
- return 0;
+ return PERF_TMR_NO_TYPE;
*type_die = stack->type;
/* Update the type offset from the start of slot */
dloc->type_offset -= stack->offset;
- return 1;
+ return PERF_TMR_OK;
}
if (dloc->fb_cfa) {
@@ -1330,109 +1165,59 @@ static int check_matching_type(struct type_state *state,
u64 pc = map__rip_2objdump(dloc->ms->map, dloc->ip);
int fbreg, fboff;
- pr_debug_dtp(" cfa\n");
+ pr_debug_dtp("cfa");
if (die_get_cfa(dloc->di->dbg, pc, &fbreg, &fboff) < 0)
fbreg = -1;
if (reg != fbreg)
- return 0;
+ return PERF_TMR_NO_TYPE;
stack = find_stack_state(state, dloc->type_offset - fboff);
- if (stack == NULL)
- return 0;
+ if (stack == NULL) {
+ if (retry) {
+ pr_debug_dtp(" : retry\n");
+ retry = false;
+
+ /* update type info it's the first store to the stack */
+ update_insn_state(state, dloc, cu_die, dl);
+ goto again;
+ }
+ return PERF_TMR_NO_TYPE;
+ }
if (stack->kind == TSR_KIND_CANARY) {
setup_stack_canary(dloc);
- return -1;
+ return PERF_TMR_BAIL_OUT;
}
if (stack->kind != TSR_KIND_TYPE)
- return 0;
+ return PERF_TMR_NO_TYPE;
*type_die = stack->type;
/* Update the type offset from the start of slot */
dloc->type_offset -= fboff + stack->offset;
- return 1;
- }
-
- if (state->regs[reg].kind == TSR_KIND_PERCPU_BASE) {
- u64 var_addr = dloc->op->offset;
- int var_offset;
-
- pr_debug_dtp(" percpu var\n");
-
- if (dloc->op->multi_regs) {
- int reg2 = dloc->op->reg2;
-
- if (dloc->op->reg2 == reg)
- reg2 = dloc->op->reg1;
-
- if (has_reg_type(state, reg2) && state->regs[reg2].ok &&
- state->regs[reg2].kind == TSR_KIND_CONST)
- var_addr += state->regs[reg2].imm_value;
- }
-
- if (get_global_var_type(cu_die, dloc, dloc->ip, var_addr,
- &var_offset, type_die)) {
- dloc->type_offset = var_offset;
- return 1;
- }
- /* No need to retry per-cpu (global) variables */
- return -1;
- }
-
- if (state->regs[reg].ok && state->regs[reg].kind == TSR_KIND_POINTER) {
- pr_debug_dtp(" percpu ptr\n");
-
- /*
- * It's actaully pointer but the address was calculated using
- * some arithmetic. So it points to the actual type already.
- */
- *type_die = state->regs[reg].type;
-
- dloc->type_offset = dloc->op->offset;
-
- /* Get the size of the actual type */
- if (dwarf_aggregate_size(type_die, &size) < 0 ||
- (unsigned)dloc->type_offset >= size)
- return -1;
-
- return 1;
- }
-
- if (state->regs[reg].ok && state->regs[reg].kind == TSR_KIND_CANARY) {
- pr_debug_dtp(" stack canary\n");
-
- /*
- * This is a saved value of the stack canary which will be handled
- * in the outer logic when it returns failure here. Pretend it's
- * from the stack canary directly.
- */
- setup_stack_canary(dloc);
-
- return -1;
+ return PERF_TMR_OK;
}
check_kernel:
if (dso__kernel(map__dso(dloc->ms->map))) {
u64 addr;
- int offset;
/* Direct this-cpu access like "%gs:0x34740" */
if (dloc->op->segment == INSN_SEG_X86_GS && dloc->op->imm &&
arch__is(dloc->arch, "x86")) {
- pr_debug_dtp(" this-cpu var\n");
+ pr_debug_dtp("this-cpu var");
addr = dloc->op->offset;
if (get_global_var_type(cu_die, dloc, dloc->ip, addr,
&offset, type_die)) {
dloc->type_offset = offset;
- return 1;
+ return PERF_TMR_OK;
}
- return -1;
+ return PERF_TMR_BAIL_OUT;
}
/* Access to global variable like "-0x7dcf0500(,%rdx,8)" */
@@ -1441,31 +1226,30 @@ check_kernel:
if (get_global_var_type(cu_die, dloc, dloc->ip, addr,
&offset, type_die)) {
- pr_debug_dtp(" global var\n");
+ pr_debug_dtp("global var");
dloc->type_offset = offset;
- return 1;
+ return PERF_TMR_OK;
}
- pr_debug_dtp(" negative offset\n");
- return -1;
+ return PERF_TMR_BAIL_OUT;
}
}
- pr_debug_dtp("\n");
- return 0;
+ return PERF_TMR_UNKNOWN;
}
/* Iterate instructions in basic blocks and update type table */
-static int find_data_type_insn(struct data_loc_info *dloc,
- struct list_head *basic_blocks,
- struct die_var_type *var_types,
- Dwarf_Die *cu_die, Dwarf_Die *type_die)
+static enum type_match_result find_data_type_insn(struct data_loc_info *dloc,
+ struct list_head *basic_blocks,
+ struct die_var_type *var_types,
+ Dwarf_Die *cu_die,
+ Dwarf_Die *type_die)
{
struct type_state state;
struct symbol *sym = dloc->ms->sym;
struct annotation *notes = symbol__annotation(sym);
struct annotated_basic_block *bb;
- int ret = 0;
+ enum type_match_result ret = PERF_TMR_UNKNOWN;
init_type_state(&state, dloc->arch);
@@ -1490,7 +1274,8 @@ static int find_data_type_insn(struct data_loc_info *dloc,
if (this_ip == dloc->ip) {
ret = check_matching_type(&state, dloc,
- cu_die, type_die);
+ cu_die, dl, type_die);
+ pr_debug_dtp(" : %s\n", match_result_str(ret));
goto out;
}
@@ -1506,34 +1291,43 @@ out:
return ret;
}
+static int arch_supports_insn_tracking(struct data_loc_info *dloc)
+{
+ if ((arch__is(dloc->arch, "x86")) || (arch__is(dloc->arch, "powerpc")))
+ return 1;
+ return 0;
+}
+
/*
* Construct a list of basic blocks for each scope with variables and try to find
* the data type by updating a type state table through instructions.
*/
-static int find_data_type_block(struct data_loc_info *dloc,
- Dwarf_Die *cu_die, Dwarf_Die *scopes,
- int nr_scopes, Dwarf_Die *type_die)
+static enum type_match_result find_data_type_block(struct data_loc_info *dloc,
+ Dwarf_Die *cu_die,
+ Dwarf_Die *scopes,
+ int nr_scopes,
+ Dwarf_Die *type_die)
{
LIST_HEAD(basic_blocks);
struct die_var_type *var_types = NULL;
u64 src_ip, dst_ip, prev_dst_ip;
- int ret = -1;
+ enum type_match_result ret = PERF_TMR_UNKNOWN;
/* TODO: other architecture support */
- if (!arch__is(dloc->arch, "x86"))
- return -1;
+ if (!arch_supports_insn_tracking(dloc))
+ return PERF_TMR_BAIL_OUT;
prev_dst_ip = dst_ip = dloc->ip;
for (int i = nr_scopes - 1; i >= 0; i--) {
Dwarf_Addr base, start, end;
LIST_HEAD(this_blocks);
- int found;
if (dwarf_ranges(&scopes[i], 0, &base, &start, &end) < 0)
break;
- pr_debug_dtp("scope: [%d/%d] (die:%lx)\n",
- i + 1, nr_scopes, (long)dwarf_dieoffset(&scopes[i]));
+ pr_debug_dtp("scope: [%d/%d] ", i + 1, nr_scopes);
+ pr_debug_scope(&scopes[i]);
+
src_ip = map__objdump_2rip(dloc->ms->map, start);
again:
@@ -1558,10 +1352,17 @@ again:
fixup_var_address(var_types, start);
/* Find from start of this scope to the target instruction */
- found = find_data_type_insn(dloc, &basic_blocks, var_types,
+ ret = find_data_type_insn(dloc, &basic_blocks, var_types,
cu_die, type_die);
- if (found > 0) {
+ if (ret == PERF_TMR_OK) {
char buf[64];
+ int offset = dloc->op->offset;
+ const char *offset_sign = "";
+
+ if (offset < 0) {
+ offset = -offset;
+ offset_sign = "-";
+ }
if (dloc->op->multi_regs)
snprintf(buf, sizeof(buf), "reg%d, reg%d",
@@ -1569,14 +1370,12 @@ again:
else
snprintf(buf, sizeof(buf), "reg%d", dloc->op->reg1);
- pr_debug_dtp("found by insn track: %#x(%s) type-offset=%#x\n",
- dloc->op->offset, buf, dloc->type_offset);
- pr_debug_type_name(type_die, TSR_KIND_TYPE);
- ret = 0;
+ pr_debug_dtp("found by insn track: %s%#x(%s) type-offset=%#x\n",
+ offset_sign, offset, buf, dloc->type_offset);
break;
}
- if (found < 0)
+ if (ret == PERF_TMR_BAIL_OUT)
break;
/* Go up to the next scope and find blocks to the start */
@@ -1595,14 +1394,17 @@ static int find_data_type_die(struct data_loc_info *dloc, Dwarf_Die *type_die)
struct annotated_op_loc *loc = dloc->op;
Dwarf_Die cu_die, var_die;
Dwarf_Die *scopes = NULL;
- int reg, offset;
+ int reg, offset = loc->offset;
int ret = -1;
int i, nr_scopes;
int fbreg = -1;
int fb_offset = 0;
bool is_fbreg = false;
+ bool found = false;
u64 pc;
char buf[64];
+ enum type_match_result result = PERF_TMR_UNKNOWN;
+ const char *offset_sign = "";
if (dloc->op->multi_regs)
snprintf(buf, sizeof(buf), "reg%d, reg%d", dloc->op->reg1, dloc->op->reg2);
@@ -1611,10 +1413,15 @@ static int find_data_type_die(struct data_loc_info *dloc, Dwarf_Die *type_die)
else
snprintf(buf, sizeof(buf), "reg%d", dloc->op->reg1);
+ if (offset < 0) {
+ offset = -offset;
+ offset_sign = "-";
+ }
+
pr_debug_dtp("-----------------------------------------------------------\n");
- pr_debug_dtp("find data type for %#x(%s) at %s+%#"PRIx64"\n",
- dloc->op->offset, buf, dloc->ms->sym->name,
- dloc->ip - dloc->ms->sym->start);
+ pr_debug_dtp("find data type for %s%#x(%s) at %s+%#"PRIx64"\n",
+ offset_sign, offset, buf,
+ dloc->ms->sym->name, dloc->ip - dloc->ms->sym->start);
/*
* IP is a relative instruction address from the start of the map, as
@@ -1644,7 +1451,7 @@ static int find_data_type_die(struct data_loc_info *dloc, Dwarf_Die *type_die)
pr_debug_dtp("found by addr=%#"PRIx64" type_offset=%#x\n",
dloc->var_addr, offset);
pr_debug_type_name(type_die, TSR_KIND_TYPE);
- ret = 0;
+ found = true;
goto out;
}
}
@@ -1685,65 +1492,95 @@ retry:
/* Search from the inner-most scope to the outer */
for (i = nr_scopes - 1; i >= 0; i--) {
+ Dwarf_Die mem_die;
+ int type_offset = offset;
+
if (reg == DWARF_REG_PC) {
if (!die_find_variable_by_addr(&scopes[i], dloc->var_addr,
- &var_die, &offset))
+ &var_die, &type_offset))
continue;
} else {
/* Look up variables/parameters in this scope */
if (!die_find_variable_by_reg(&scopes[i], pc, reg,
- &offset, is_fbreg, &var_die))
+ &type_offset, is_fbreg, &var_die))
continue;
}
+ pr_debug_dtp("found \"%s\" (die: %#lx) in scope=%d/%d (die: %#lx) ",
+ dwarf_diename(&var_die), (long)dwarf_dieoffset(&var_die),
+ i+1, nr_scopes, (long)dwarf_dieoffset(&scopes[i]));
+
/* Found a variable, see if it's correct */
- ret = check_variable(dloc, &var_die, type_die, reg, offset, is_fbreg);
- if (ret == 0) {
- pr_debug_dtp("found \"%s\" in scope=%d/%d (die: %#lx) ",
- dwarf_diename(&var_die), i+1, nr_scopes,
- (long)dwarf_dieoffset(&scopes[i]));
+ result = check_variable(dloc, &var_die, &mem_die, reg, type_offset, is_fbreg);
+ if (result == PERF_TMR_OK) {
if (reg == DWARF_REG_PC) {
pr_debug_dtp("addr=%#"PRIx64" type_offset=%#x\n",
- dloc->var_addr, offset);
+ dloc->var_addr, type_offset);
} else if (reg == DWARF_REG_FB || is_fbreg) {
pr_debug_dtp("stack_offset=%#x type_offset=%#x\n",
- fb_offset, offset);
+ fb_offset, type_offset);
} else {
- pr_debug_dtp("type_offset=%#x\n", offset);
+ pr_debug_dtp("type_offset=%#x\n", type_offset);
+ }
+
+ if (!found || is_better_type(type_die, &mem_die)) {
+ *type_die = mem_die;
+ dloc->type_offset = type_offset;
+ found = true;
}
- pr_debug_location(&var_die, pc, reg);
- pr_debug_type_name(type_die, TSR_KIND_TYPE);
} else {
- pr_debug_dtp("check variable \"%s\" failed (die: %#lx)\n",
- dwarf_diename(&var_die),
- (long)dwarf_dieoffset(&var_die));
- pr_debug_location(&var_die, pc, reg);
- pr_debug_type_name(type_die, TSR_KIND_TYPE);
+ pr_debug_dtp("failed: %s\n", match_result_str(result));
}
- dloc->type_offset = offset;
- goto out;
+
+ pr_debug_location(&var_die, pc, reg);
+ pr_debug_type_name(&mem_die, TSR_KIND_TYPE);
}
- if (loc->multi_regs && reg == loc->reg1 && loc->reg1 != loc->reg2) {
+ if (!found && loc->multi_regs && reg == loc->reg1 && loc->reg1 != loc->reg2) {
reg = loc->reg2;
goto retry;
}
- if (reg != DWARF_REG_PC) {
- ret = find_data_type_block(dloc, &cu_die, scopes,
- nr_scopes, type_die);
- if (ret == 0) {
+ if (!found && reg != DWARF_REG_PC) {
+ result = find_data_type_block(dloc, &cu_die, scopes,
+ nr_scopes, type_die);
+ if (result == PERF_TMR_OK) {
ann_data_stat.insn_track++;
- goto out;
+ found = true;
}
}
- if (ret < 0) {
- pr_debug_dtp("no variable found\n");
- ann_data_stat.no_var++;
+out:
+ pr_debug_dtp("final result: ");
+ if (found) {
+ pr_debug_type_name(type_die, TSR_KIND_TYPE);
+ ret = 0;
+ } else {
+ switch (result) {
+ case PERF_TMR_NO_TYPE:
+ case PERF_TMR_NO_POINTER:
+ pr_debug_dtp("%s\n", match_result_str(result));
+ ann_data_stat.no_typeinfo++;
+ break;
+ case PERF_TMR_NO_SIZE:
+ pr_debug_dtp("%s\n", match_result_str(result));
+ ann_data_stat.invalid_size++;
+ break;
+ case PERF_TMR_BAD_OFFSET:
+ pr_debug_dtp("%s\n", match_result_str(result));
+ ann_data_stat.bad_offset++;
+ break;
+ case PERF_TMR_UNKNOWN:
+ case PERF_TMR_BAIL_OUT:
+ case PERF_TMR_OK: /* should not reach here */
+ default:
+ pr_debug_dtp("no variable found\n");
+ ann_data_stat.no_var++;
+ break;
+ }
+ ret = -1;
}
-out:
free(scopes);
return ret;
}
@@ -1764,16 +1601,9 @@ out:
*/
struct annotated_data_type *find_data_type(struct data_loc_info *dloc)
{
- struct annotated_data_type *result = NULL;
struct dso *dso = map__dso(dloc->ms->map);
Dwarf_Die type_die;
- dloc->di = debuginfo__new(dso__long_name(dso));
- if (dloc->di == NULL) {
- pr_debug_dtp("cannot get the debug info\n");
- return NULL;
- }
-
/*
* The type offset is the same as instruction offset by default.
* But when finding a global variable, the offset won't be valid.
@@ -1783,13 +1613,9 @@ struct annotated_data_type *find_data_type(struct data_loc_info *dloc)
dloc->fbreg = -1;
if (find_data_type_die(dloc, &type_die) < 0)
- goto out;
-
- result = dso__findnew_data_type(dso, &type_die);
+ return NULL;
-out:
- debuginfo__delete(dloc->di);
- return result;
+ return dso__findnew_data_type(dso, &type_die);
}
static int alloc_data_type_histograms(struct annotated_data_type *adt, int nr_entries)
@@ -1911,10 +1737,15 @@ static void print_annotated_data_header(struct hist_entry *he, struct evsel *evs
struct evsel *pos;
int i = 0;
- for_each_group_evsel(pos, evsel)
- printf(" event[%d] = %s\n", i++, pos->name);
+ nr_members = 0;
+ for_each_group_evsel(pos, evsel) {
+ if (symbol_conf.skip_empty &&
+ evsel__hists(pos)->stats.nr_samples == 0)
+ continue;
- nr_members = evsel->core.nr_members;
+ printf(" event[%d] = %s\n", i++, pos->name);
+ nr_members++;
+ }
}
if (symbol_conf.show_total_period) {
@@ -1949,34 +1780,29 @@ static void print_annotated_data_type(struct annotated_data_type *mem_type,
{
struct annotated_member *child;
struct type_hist *h = mem_type->histograms[evsel->core.idx];
- int i, nr_events = 1, samples = 0;
+ int i, nr_events = 0, samples = 0;
u64 period = 0;
int width = symbol_conf.show_total_period ? 11 : 7;
+ struct evsel *pos;
- for (i = 0; i < member->size; i++) {
- samples += h->addr[member->offset + i].nr_samples;
- period += h->addr[member->offset + i].period;
- }
- print_annotated_data_value(h, period, samples);
+ for_each_group_evsel(pos, evsel) {
+ h = mem_type->histograms[pos->core.idx];
- if (evsel__is_group_event(evsel)) {
- struct evsel *pos;
-
- for_each_group_member(pos, evsel) {
- h = mem_type->histograms[pos->core.idx];
+ if (symbol_conf.skip_empty &&
+ evsel__hists(pos)->stats.nr_samples == 0)
+ continue;
- samples = 0;
- period = 0;
- for (i = 0; i < member->size; i++) {
- samples += h->addr[member->offset + i].nr_samples;
- period += h->addr[member->offset + i].period;
- }
- print_annotated_data_value(h, period, samples);
+ samples = 0;
+ period = 0;
+ for (i = 0; i < member->size; i++) {
+ samples += h->addr[member->offset + i].nr_samples;
+ period += h->addr[member->offset + i].period;
}
- nr_events = evsel->core.nr_members;
+ print_annotated_data_value(h, period, samples);
+ nr_events++;
}
- printf(" %10d %10d %*s%s\t%s",
+ printf(" %#10x %#10x %*s%s\t%s",
member->offset, member->size, indent, "", member->type_name,
member->var_name ?: "");
diff --git a/tools/perf/util/annotate-data.h b/tools/perf/util/annotate-data.h
index 0a57d9f5ee78..98c80b2268dd 100644
--- a/tools/perf/util/annotate-data.h
+++ b/tools/perf/util/annotate-data.h
@@ -6,6 +6,12 @@
#include <linux/compiler.h>
#include <linux/rbtree.h>
#include <linux/types.h>
+#include "dwarf-regs.h"
+#include "annotate.h"
+
+#ifdef HAVE_LIBDW_SUPPORT
+#include "debuginfo.h"
+#endif
struct annotated_op_loc;
struct debuginfo;
@@ -15,6 +21,23 @@ struct hist_entry;
struct map_symbol;
struct thread;
+#define pr_debug_dtp(fmt, ...) \
+do { \
+ if (debug_type_profile) \
+ pr_info(fmt, ##__VA_ARGS__); \
+ else \
+ pr_debug3(fmt, ##__VA_ARGS__); \
+} while (0)
+
+enum type_state_kind {
+ TSR_KIND_INVALID = 0,
+ TSR_KIND_TYPE,
+ TSR_KIND_PERCPU_BASE,
+ TSR_KIND_CONST,
+ TSR_KIND_POINTER,
+ TSR_KIND_CANARY,
+};
+
/**
* struct annotated_member - Type of member field
* @node: List entry in the parent list
@@ -100,9 +123,9 @@ struct data_loc_info {
u64 var_addr;
u8 cpumode;
struct annotated_op_loc *op;
+ struct debuginfo *di;
/* These are used internally */
- struct debuginfo *di;
int fbreg;
bool fb_cfa;
@@ -142,7 +165,53 @@ struct annotated_data_stat {
};
extern struct annotated_data_stat ann_data_stat;
-#ifdef HAVE_DWARF_SUPPORT
+#ifdef HAVE_LIBDW_SUPPORT
+/*
+ * Type information in a register, valid when @ok is true.
+ * The @caller_saved registers are invalidated after a function call.
+ */
+struct type_state_reg {
+ Dwarf_Die type;
+ u32 imm_value;
+ bool ok;
+ bool caller_saved;
+ u8 kind;
+ u8 copied_from;
+};
+
+/* Type information in a stack location, dynamically allocated */
+struct type_state_stack {
+ struct list_head list;
+ Dwarf_Die type;
+ int offset;
+ int size;
+ bool compound;
+ u8 kind;
+};
+
+/* FIXME: This should be arch-dependent */
+#ifdef __powerpc__
+#define TYPE_STATE_MAX_REGS 32
+#else
+#define TYPE_STATE_MAX_REGS 16
+#endif
+
+/*
+ * State table to maintain type info in each register and stack location.
+ * It'll be updated when new variable is allocated or type info is moved
+ * to a new location (register or stack). As it'd be used with the
+ * shortest path of basic blocks, it only maintains a single table.
+ */
+struct type_state {
+ /* state of general purpose registers */
+ struct type_state_reg regs[TYPE_STATE_MAX_REGS];
+ /* state of stack location */
+ struct list_head stack_vars;
+ /* return value register */
+ int ret_reg;
+ /* stack pointer register */
+ int stack_reg;
+};
/* Returns data type at the location (ip, reg, offset) */
struct annotated_data_type *find_data_type(struct data_loc_info *dloc);
@@ -160,7 +229,22 @@ void global_var_type__tree_delete(struct rb_root *root);
int hist_entry__annotate_data_tty(struct hist_entry *he, struct evsel *evsel);
-#else /* HAVE_DWARF_SUPPORT */
+bool has_reg_type(struct type_state *state, int reg);
+struct type_state_stack *findnew_stack_state(struct type_state *state,
+ int offset, u8 kind,
+ Dwarf_Die *type_die);
+void set_stack_state(struct type_state_stack *stack, int offset, u8 kind,
+ Dwarf_Die *type_die);
+struct type_state_stack *find_stack_state(struct type_state *state,
+ int offset);
+bool get_global_var_type(Dwarf_Die *cu_die, struct data_loc_info *dloc,
+ u64 ip, u64 var_addr, int *var_offset,
+ Dwarf_Die *type_die);
+bool get_global_var_info(struct data_loc_info *dloc, u64 addr,
+ const char **var_name, int *var_offset);
+void pr_debug_type_name(Dwarf_Die *die, enum type_state_kind kind);
+
+#else /* HAVE_LIBDW_SUPPORT */
static inline struct annotated_data_type *
find_data_type(struct data_loc_info *dloc __maybe_unused)
@@ -192,7 +276,7 @@ static inline int hist_entry__annotate_data_tty(struct hist_entry *he __maybe_un
return -1;
}
-#endif /* HAVE_DWARF_SUPPORT */
+#endif /* HAVE_LIBDW_SUPPORT */
#ifdef HAVE_SLANG_SUPPORT
int hist_entry__annotate_data_tui(struct hist_entry *he, struct evsel *evsel,
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 1451caf25e77..32e15c9f53f3 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -25,6 +25,7 @@
#include "srcline.h"
#include "units.h"
#include "debug.h"
+#include "debuginfo.h"
#include "annotate.h"
#include "annotate-data.h"
#include "evsel.h"
@@ -40,6 +41,7 @@
#include "namespaces.h"
#include "thread.h"
#include "hashmap.h"
+#include "strbuf.h"
#include <regex.h>
#include <linux/bitops.h>
#include <linux/kernel.h>
@@ -47,6 +49,7 @@
#include <linux/zalloc.h>
#include <subcmd/parse-options.h>
#include <subcmd/run-command.h>
+#include <math.h>
/* FIXME: For the HE_COLORSET */
#include "ui/browser.h"
@@ -265,22 +268,30 @@ struct annotated_branch *annotation__get_branch(struct annotation *notes)
return notes->branch;
}
-static struct cyc_hist *symbol__cycles_hist(struct symbol *sym)
+static struct annotated_branch *symbol__find_branch_hist(struct symbol *sym,
+ unsigned int br_cntr_nr)
{
struct annotation *notes = symbol__annotation(sym);
struct annotated_branch *branch;
+ const size_t size = symbol__size(sym);
branch = annotation__get_branch(notes);
if (branch == NULL)
return NULL;
if (branch->cycles_hist == NULL) {
- const size_t size = symbol__size(sym);
-
branch->cycles_hist = calloc(size, sizeof(struct cyc_hist));
+ if (!branch->cycles_hist)
+ return NULL;
+ }
+
+ if (br_cntr_nr && branch->br_cntr == NULL) {
+ branch->br_cntr = calloc(br_cntr_nr * size, sizeof(u64));
+ if (!branch->br_cntr)
+ return NULL;
}
- return branch->cycles_hist;
+ return branch;
}
struct annotated_source *symbol__hists(struct symbol *sym, int nr_hists)
@@ -315,16 +326,45 @@ static int symbol__inc_addr_samples(struct map_symbol *ms,
return src ? __symbol__inc_addr_samples(ms, src, evsel->core.idx, addr, sample) : 0;
}
-static int symbol__account_cycles(u64 addr, u64 start,
- struct symbol *sym, unsigned cycles)
+static int symbol__account_br_cntr(struct annotated_branch *branch,
+ struct evsel *evsel,
+ unsigned offset,
+ u64 br_cntr)
+{
+ unsigned int br_cntr_nr = evsel__leader(evsel)->br_cntr_nr;
+ unsigned int base = evsel__leader(evsel)->br_cntr_idx;
+ unsigned int off = offset * evsel->evlist->nr_br_cntr;
+ u64 *branch_br_cntr = branch->br_cntr;
+ unsigned int i, mask, width;
+
+ if (!br_cntr || !branch_br_cntr)
+ return 0;
+
+ perf_env__find_br_cntr_info(evsel__env(evsel), NULL, &width);
+ mask = (1L << width) - 1;
+ for (i = 0; i < br_cntr_nr; i++) {
+ u64 cntr = (br_cntr >> i * width) & mask;
+
+ branch_br_cntr[off + i + base] += cntr;
+ if (cntr == mask)
+ branch_br_cntr[off + i + base] |= ANNOTATION__BR_CNTR_SATURATED_FLAG;
+ }
+
+ return 0;
+}
+
+static int symbol__account_cycles(u64 addr, u64 start, struct symbol *sym,
+ unsigned cycles, struct evsel *evsel,
+ u64 br_cntr)
{
- struct cyc_hist *cycles_hist;
+ struct annotated_branch *branch;
unsigned offset;
+ int ret;
if (sym == NULL)
return 0;
- cycles_hist = symbol__cycles_hist(sym);
- if (cycles_hist == NULL)
+ branch = symbol__find_branch_hist(sym, evsel->evlist->nr_br_cntr);
+ if (!branch)
return -ENOMEM;
if (addr < sym->start || addr >= sym->end)
return -ERANGE;
@@ -336,15 +376,22 @@ static int symbol__account_cycles(u64 addr, u64 start,
start = 0;
}
offset = addr - sym->start;
- return __symbol__account_cycles(cycles_hist,
+ ret = __symbol__account_cycles(branch->cycles_hist,
start ? start - sym->start : 0,
offset, cycles,
!!start);
+
+ if (ret)
+ return ret;
+
+ return symbol__account_br_cntr(branch, evsel, offset, br_cntr);
}
int addr_map_symbol__account_cycles(struct addr_map_symbol *ams,
struct addr_map_symbol *start,
- unsigned cycles)
+ unsigned cycles,
+ struct evsel *evsel,
+ u64 br_cntr)
{
u64 saddr = 0;
int err;
@@ -370,7 +417,7 @@ int addr_map_symbol__account_cycles(struct addr_map_symbol *ams,
start ? start->addr : 0,
ams->ms.sym ? ams->ms.sym->start + map__start(ams->ms.map) : 0,
saddr);
- err = symbol__account_cycles(ams->al_addr, saddr, ams->ms.sym, cycles);
+ err = symbol__account_cycles(ams->al_addr, saddr, ams->ms.sym, cycles, evsel, br_cntr);
if (err)
pr_debug2("account_cycles failed %d\n", err);
return err;
@@ -411,6 +458,7 @@ static void annotated_branch__delete(struct annotated_branch *branch)
{
if (branch) {
zfree(&branch->cycles_hist);
+ free(branch->br_cntr);
free(branch);
}
}
@@ -454,8 +502,10 @@ static void annotation__count_and_fill(struct annotation *notes, u64 start, u64
}
}
-static int annotation__compute_ipc(struct annotation *notes, size_t size)
+static int annotation__compute_ipc(struct annotation *notes, size_t size,
+ struct evsel *evsel)
{
+ unsigned int br_cntr_nr = evsel->evlist->nr_br_cntr;
int err = 0;
s64 offset;
@@ -490,6 +540,20 @@ static int annotation__compute_ipc(struct annotation *notes, size_t size)
al->cycles->max = ch->cycles_max;
al->cycles->min = ch->cycles_min;
}
+ if (al && notes->branch->br_cntr) {
+ if (!al->br_cntr) {
+ al->br_cntr = calloc(br_cntr_nr, sizeof(u64));
+ if (!al->br_cntr) {
+ err = ENOMEM;
+ break;
+ }
+ }
+ al->num_aggr = ch->num_aggr;
+ al->br_cntr_nr = br_cntr_nr;
+ al->evsel = evsel;
+ memcpy(al->br_cntr, &notes->branch->br_cntr[offset * br_cntr_nr],
+ br_cntr_nr * sizeof(u64));
+ }
}
}
@@ -501,8 +565,10 @@ static int annotation__compute_ipc(struct annotation *notes, size_t size)
struct annotation_line *al;
al = annotated_source__get_line(notes->src, offset);
- if (al)
+ if (al) {
zfree(&al->cycles);
+ zfree(&al->br_cntr);
+ }
}
}
}
@@ -699,13 +765,13 @@ annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start
int percent_type)
{
struct disasm_line *dl = container_of(al, struct disasm_line, al);
+ struct annotation *notes = symbol__annotation(sym);
static const char *prev_line;
if (al->offset != -1) {
double max_percent = 0.0;
int i, nr_percent = 1;
const char *color;
- struct annotation *notes = symbol__annotation(sym);
for (i = 0; i < al->data_nr; i++) {
double percent;
@@ -775,14 +841,11 @@ annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start
} else if (max_lines && printed >= max_lines)
return 1;
else {
- int width = symbol_conf.show_total_period ? 12 : 8;
+ int width = annotation__pcnt_width(notes);
if (queue)
return -1;
- if (evsel__is_group_event(evsel))
- width *= evsel->core.nr_members;
-
if (!*al->line)
printf(" %*s:\n", width, " ");
else
@@ -851,6 +914,10 @@ static void annotation__calc_percent(struct annotation *notes,
BUG_ON(i >= al->data_nr);
+ if (symbol_conf.skip_empty &&
+ evsel__hists(evsel)->stats.nr_samples == 0)
+ continue;
+
data = &al->data[i++];
calc_percent(notes, evsel, data, al->offset, end);
@@ -904,7 +971,7 @@ int symbol__annotate(struct map_symbol *ms, struct evsel *evsel,
.options = &annotate_opts,
};
struct arch *arch = NULL;
- int err;
+ int err, nr;
err = evsel__get_arch(evsel, &arch);
if (err < 0)
@@ -925,6 +992,19 @@ int symbol__annotate(struct map_symbol *ms, struct evsel *evsel,
return -1;
}
+ nr = 0;
+ if (evsel__is_group_event(evsel)) {
+ struct evsel *pos;
+
+ for_each_group_evsel(pos, evsel) {
+ if (symbol_conf.skip_empty &&
+ evsel__hists(pos)->stats.nr_samples == 0)
+ continue;
+ nr++;
+ }
+ }
+ notes->src->nr_events = nr ? nr : 1;
+
if (annotate_opts.full_addr)
notes->src->start = map__objdump_2mem(ms->map, ms->sym->start);
else
@@ -1106,7 +1186,7 @@ int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel)
int more = 0;
bool context = opts->context;
u64 len;
- int width = symbol_conf.show_total_period ? 12 : 8;
+ int width = annotation__pcnt_width(notes);
int graph_dotted_len;
char buf[512];
@@ -1122,7 +1202,6 @@ int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel)
len = symbol__size(sym);
if (evsel__is_group_event(evsel)) {
- width *= evsel->core.nr_members;
evsel__group_desc(evsel, buf, sizeof(buf));
evsel_name = buf;
}
@@ -1594,13 +1673,12 @@ bool ui__has_annotation(void)
static double annotation_line__max_percent(struct annotation_line *al,
- struct annotation *notes,
unsigned int percent_type)
{
double percent_max = 0.0;
int i;
- for (i = 0; i < notes->src->nr_events; i++) {
+ for (i = 0; i < al->data_nr; i++) {
double percent;
percent = annotation_data__percent(&al->data[i],
@@ -1662,6 +1740,149 @@ static void ipc_coverage_string(char *bf, int size, struct annotation *notes)
ipc, coverage);
}
+int annotation_br_cntr_abbr_list(char **str, struct evsel *evsel, bool header)
+{
+ struct evsel *pos;
+ struct strbuf sb;
+
+ if (evsel->evlist->nr_br_cntr <= 0)
+ return -ENOTSUP;
+
+ strbuf_init(&sb, /*hint=*/ 0);
+
+ if (header && strbuf_addf(&sb, "# Branch counter abbr list:\n"))
+ goto err;
+
+ evlist__for_each_entry(evsel->evlist, pos) {
+ if (!(pos->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS))
+ continue;
+ if (header && strbuf_addf(&sb, "#"))
+ goto err;
+
+ if (strbuf_addf(&sb, " %s = %s\n", pos->name, pos->abbr_name))
+ goto err;
+ }
+
+ if (header && strbuf_addf(&sb, "#"))
+ goto err;
+ if (strbuf_addf(&sb, " '-' No event occurs\n"))
+ goto err;
+
+ if (header && strbuf_addf(&sb, "#"))
+ goto err;
+ if (strbuf_addf(&sb, " '+' Event occurrences may be lost due to branch counter saturated\n"))
+ goto err;
+
+ *str = strbuf_detach(&sb, NULL);
+
+ return 0;
+err:
+ strbuf_release(&sb);
+ return -ENOMEM;
+}
+
+/* Assume the branch counter saturated at 3 */
+#define ANNOTATION_BR_CNTR_SATURATION 3
+
+int annotation_br_cntr_entry(char **str, int br_cntr_nr,
+ u64 *br_cntr, int num_aggr,
+ struct evsel *evsel)
+{
+ struct evsel *pos = evsel ? evlist__first(evsel->evlist) : NULL;
+ bool saturated = false;
+ int i, j, avg, used;
+ struct strbuf sb;
+
+ strbuf_init(&sb, /*hint=*/ 0);
+ for (i = 0; i < br_cntr_nr; i++) {
+ used = 0;
+ avg = ceil((double)(br_cntr[i] & ~ANNOTATION__BR_CNTR_SATURATED_FLAG) /
+ (double)num_aggr);
+
+ /*
+ * A histogram with the abbr name is displayed by default.
+ * With -v, the exact number of branch counter is displayed.
+ */
+ if (verbose) {
+ evlist__for_each_entry_from(evsel->evlist, pos) {
+ if ((pos->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS) &&
+ (pos->br_cntr_idx == i))
+ break;
+ }
+ if (strbuf_addstr(&sb, pos->abbr_name))
+ goto err;
+
+ if (!br_cntr[i]) {
+ if (strbuf_addstr(&sb, "=-"))
+ goto err;
+ } else {
+ if (strbuf_addf(&sb, "=%d", avg))
+ goto err;
+ }
+ if (br_cntr[i] & ANNOTATION__BR_CNTR_SATURATED_FLAG) {
+ if (strbuf_addch(&sb, '+'))
+ goto err;
+ } else {
+ if (strbuf_addch(&sb, ' '))
+ goto err;
+ }
+
+ if ((i < br_cntr_nr - 1) && strbuf_addch(&sb, ','))
+ goto err;
+ continue;
+ }
+
+ if (strbuf_addch(&sb, '|'))
+ goto err;
+
+ if (!br_cntr[i]) {
+ if (strbuf_addch(&sb, '-'))
+ goto err;
+ used++;
+ } else {
+ evlist__for_each_entry_from(evsel->evlist, pos) {
+ if ((pos->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS) &&
+ (pos->br_cntr_idx == i))
+ break;
+ }
+ if (br_cntr[i] & ANNOTATION__BR_CNTR_SATURATED_FLAG)
+ saturated = true;
+
+ for (j = 0; j < avg; j++, used++) {
+ /* Print + if the number of logged events > 3 */
+ if (j >= ANNOTATION_BR_CNTR_SATURATION) {
+ saturated = true;
+ break;
+ }
+ if (strbuf_addstr(&sb, pos->abbr_name))
+ goto err;
+ }
+
+ if (saturated) {
+ if (strbuf_addch(&sb, '+'))
+ goto err;
+ used++;
+ }
+ pos = list_next_entry(pos, core.node);
+ }
+
+ for (j = used; j < ANNOTATION_BR_CNTR_SATURATION + 1; j++) {
+ if (strbuf_addch(&sb, ' '))
+ goto err;
+ }
+ }
+
+ if (!verbose && strbuf_addch(&sb, br_cntr_nr ? '|' : ' '))
+ goto err;
+
+ *str = strbuf_detach(&sb, NULL);
+
+ return 0;
+err:
+ strbuf_release(&sb);
+ return -ENOMEM;
+}
+
static void __annotation_line__write(struct annotation_line *al, struct annotation *notes,
bool first_line, bool current_entry, bool change_color, int width,
void *obj, unsigned int percent_type,
@@ -1672,7 +1893,7 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati
void (*obj__write_graph)(void *obj, int graph))
{
- double percent_max = annotation_line__max_percent(al, notes, percent_type);
+ double percent_max = annotation_line__max_percent(al, percent_type);
int pcnt_width = annotation__pcnt_width(notes),
cycles_width = annotation__cycles_width(notes);
bool show_title = false;
@@ -1690,7 +1911,7 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati
if (al->offset != -1 && percent_max != 0.0) {
int i;
- for (i = 0; i < notes->src->nr_events; i++) {
+ for (i = 0; i < al->data_nr; i++) {
double percent;
percent = annotation_data__percent(&al->data[i], percent_type);
@@ -1699,10 +1920,10 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati
if (symbol_conf.show_total_period) {
obj__printf(obj, "%11" PRIu64 " ", al->data[i].he.period);
} else if (symbol_conf.show_nr_samples) {
- obj__printf(obj, "%6" PRIu64 " ",
+ obj__printf(obj, "%7" PRIu64 " ",
al->data[i].he.nr_samples);
} else {
- obj__printf(obj, "%6.2f ", percent);
+ obj__printf(obj, "%7.2f ", percent);
}
}
} else {
@@ -1758,6 +1979,22 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati
"Cycle(min/max)");
}
+ if (annotate_opts.show_br_cntr) {
+ if (show_title) {
+ obj__printf(obj, "%*s ",
+ ANNOTATION__BR_CNTR_WIDTH,
+ "Branch Counter");
+ } else {
+ char *buf;
+
+ if (!annotation_br_cntr_entry(&buf, al->br_cntr_nr, al->br_cntr,
+ al->num_aggr, al->evsel)) {
+ obj__printf(obj, "%*s ", ANNOTATION__BR_CNTR_WIDTH, buf);
+ free(buf);
+ }
+ }
+ }
+
if (show_title && !*al->line) {
ipc_coverage_string(bf, sizeof(bf), notes);
obj__printf(obj, "%*s", ANNOTATION__AVG_IPC_WIDTH, bf);
@@ -1843,10 +2080,7 @@ int symbol__annotate2(struct map_symbol *ms, struct evsel *evsel,
struct symbol *sym = ms->sym;
struct annotation *notes = symbol__annotation(sym);
size_t size = symbol__size(sym);
- int nr_pcnt = 1, err;
-
- if (evsel__is_group_event(evsel))
- nr_pcnt = evsel->core.nr_members;
+ int err;
err = symbol__annotate(ms, evsel, parch);
if (err)
@@ -1857,13 +2091,11 @@ int symbol__annotate2(struct map_symbol *ms, struct evsel *evsel,
annotation__set_index(notes);
annotation__mark_jump_targets(notes, sym);
- err = annotation__compute_ipc(notes, size);
+ err = annotation__compute_ipc(notes, size, evsel);
if (err)
return err;
annotation__init_column_widths(notes, sym);
- notes->src->nr_events = nr_pcnt;
-
annotation__update_column_widths(notes);
sym->annotate2 = 1;
@@ -1884,6 +2116,12 @@ static int annotation__config(const char *var, const char *value, void *data)
opt->offset_level = ANNOTATION__MAX_OFFSET_LEVEL;
else if (opt->offset_level < ANNOTATION__MIN_OFFSET_LEVEL)
opt->offset_level = ANNOTATION__MIN_OFFSET_LEVEL;
+ } else if (!strcmp(var, "annotate.disassemblers")) {
+ opt->disassemblers_str = strdup(value);
+ if (!opt->disassemblers_str) {
+ pr_err("Not enough memory for annotate.disassemblers\n");
+ return -1;
+ }
} else if (!strcmp(var, "annotate.hide_src_code")) {
opt->hide_src_code = perf_config_bool("hide_src_code", value);
} else if (!strcmp(var, "annotate.jump_arrows")) {
@@ -2060,7 +2298,7 @@ static int extract_reg_offset(struct arch *arch, const char *str,
if (regname == NULL)
return -1;
- op_loc->reg1 = get_dwarf_regnum(regname, 0);
+ op_loc->reg1 = get_dwarf_regnum(regname, arch->e_machine, arch->e_flags);
free(regname);
/* Get the second register */
@@ -2073,7 +2311,7 @@ static int extract_reg_offset(struct arch *arch, const char *str,
if (regname == NULL)
return -1;
- op_loc->reg2 = get_dwarf_regnum(regname, 0);
+ op_loc->reg2 = get_dwarf_regnum(regname, arch->e_machine, arch->e_flags);
free(regname);
}
return 0;
@@ -2123,20 +2361,33 @@ int annotate_get_insn_location(struct arch *arch, struct disasm_line *dl,
for_each_insn_op_loc(loc, i, op_loc) {
const char *insn_str = ops->source.raw;
bool multi_regs = ops->source.multi_regs;
+ bool mem_ref = ops->source.mem_ref;
if (i == INSN_OP_TARGET) {
insn_str = ops->target.raw;
multi_regs = ops->target.multi_regs;
+ mem_ref = ops->target.mem_ref;
}
/* Invalidate the register by default */
op_loc->reg1 = -1;
op_loc->reg2 = -1;
- if (insn_str == NULL)
- continue;
+ if (insn_str == NULL) {
+ if (!arch__is(arch, "powerpc"))
+ continue;
+ }
- if (strchr(insn_str, arch->objdump.memory_ref_char)) {
+ /*
+ * For powerpc, call get_powerpc_regs function which extracts the
+ * required fields for op_loc, ie reg1, reg2, offset from the
+ * raw instruction.
+ */
+ if (arch__is(arch, "powerpc")) {
+ op_loc->mem_ref = mem_ref;
+ op_loc->multi_regs = multi_regs;
+ get_powerpc_regs(dl->raw.raw_insn, !i, op_loc);
+ } else if (strchr(insn_str, arch->objdump.memory_ref_char)) {
op_loc->mem_ref = true;
op_loc->multi_regs = multi_regs;
extract_reg_offset(arch, insn_str, op_loc);
@@ -2160,7 +2411,7 @@ int annotate_get_insn_location(struct arch *arch, struct disasm_line *dl,
return -1;
if (*s == arch->objdump.register_char)
- op_loc->reg1 = get_dwarf_regnum(s, 0);
+ op_loc->reg1 = get_dwarf_regnum(s, arch->e_machine, arch->e_flags);
else if (*s == arch->objdump.imm_char) {
op_loc->offset = strtol(s + 1, &p, 0);
if (p && p != s + 1)
@@ -2216,7 +2467,7 @@ static struct annotated_item_stat *annotate_data_stat(struct list_head *head,
return NULL;
istat->name = strdup(name);
- if (istat->name == NULL) {
+ if ((istat->name == NULL) || (!strlen(istat->name))) {
free(istat);
return NULL;
}
@@ -2230,6 +2481,7 @@ static bool is_stack_operation(struct arch *arch, struct disasm_line *dl)
if (arch__is(arch, "x86")) {
if (!strncmp(dl->ins.name, "push", 4) ||
!strncmp(dl->ins.name, "pop", 3) ||
+ !strncmp(dl->ins.name, "call", 4) ||
!strncmp(dl->ins.name, "ret", 3))
return true;
}
@@ -2313,6 +2565,20 @@ u64 annotate_calc_pcrel(struct map_symbol *ms, u64 ip, int offset,
return map__rip_2objdump(ms->map, addr);
}
+static struct debuginfo_cache {
+ struct dso *dso;
+ struct debuginfo *dbg;
+} di_cache;
+
+void debuginfo_cache__delete(void)
+{
+ dso__put(di_cache.dso);
+ di_cache.dso = NULL;
+
+ debuginfo__delete(di_cache.dbg);
+ di_cache.dbg = NULL;
+}
+
/**
* hist_entry__get_data_type - find data type for given hist entry
* @he: hist entry
@@ -2347,6 +2613,27 @@ struct annotated_data_type *hist_entry__get_data_type(struct hist_entry *he)
return NULL;
}
+ /*
+ * di_cache holds a pair of values, but code below assumes
+ * di_cache.dso can be compared/updated and di_cache.dbg can be
+ * read/updated independently from each other. That assumption only
+ * holds in single threaded code.
+ */
+ assert(perf_singlethreaded);
+
+ if (map__dso(ms->map) != di_cache.dso) {
+ dso__put(di_cache.dso);
+ di_cache.dso = dso__get(map__dso(ms->map));
+
+ debuginfo__delete(di_cache.dbg);
+ di_cache.dbg = debuginfo__new(dso__long_name(di_cache.dso));
+ }
+
+ if (di_cache.dbg == NULL) {
+ ann_data_stat.no_dbginfo++;
+ return NULL;
+ }
+
/* Make sure it has the disasm of the function */
if (symbol__annotate(ms, evsel, &arch) < 0) {
ann_data_stat.no_insn++;
@@ -2391,6 +2678,7 @@ retry:
.ip = ms->sym->start + dl->al.offset,
.cpumode = he->cpumode,
.op = op_loc,
+ .di = di_cache.dbg,
};
if (!op_loc->mem_ref && op_loc->segment == INSN_SEG_NONE)
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index d5c821c22f79..194a05cbc506 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -14,6 +14,7 @@
#include "spark.h"
#include "hashmap.h"
#include "disasm.h"
+#include "branch.h"
struct hist_browser_timer;
struct hist_entry;
@@ -30,8 +31,12 @@ struct annotated_data_type;
#define ANNOTATION__CYCLES_WIDTH 6
#define ANNOTATION__MINMAX_CYCLES_WIDTH 19
#define ANNOTATION__AVG_IPC_WIDTH 36
+#define ANNOTATION__BR_CNTR_WIDTH 30
#define ANNOTATION_DUMMY_LEN 256
+// llvm, capstone, objdump
+#define MAX_DISASSEMBLERS 3
+
struct annotation_options {
bool hide_src_code,
use_offset,
@@ -43,14 +48,18 @@ struct annotation_options {
show_nr_jumps,
show_minmax_cycle,
show_asm_raw,
+ show_br_cntr,
annotate_src,
full_addr;
u8 offset_level;
+ u8 nr_disassemblers;
int min_pcnt;
int max_lines;
int context;
char *objdump_path;
char *disassembler_style;
+ const char *disassemblers_str;
+ const char *disassemblers[MAX_DISASSEMBLERS];
const char *prefix;
const char *prefix_strip;
unsigned int percent_type;
@@ -103,6 +112,10 @@ struct annotation_line {
char *fileloc;
char *path;
struct cycles_info *cycles;
+ int num_aggr;
+ int br_cntr_nr;
+ u64 *br_cntr;
+ struct evsel *evsel;
int jump_sources;
u32 idx;
int idx_asm;
@@ -113,7 +126,10 @@ struct annotation_line {
struct disasm_line {
struct ins ins;
struct ins_operands ops;
-
+ union {
+ u8 bytes[4];
+ u32 raw_insn;
+ } raw;
/* This needs to be at the end. */
struct annotation_line al;
};
@@ -285,6 +301,9 @@ struct annotated_source {
struct annotation_line *annotated_source__get_line(struct annotated_source *src,
s64 offset);
+/* A branch counter once saturated */
+#define ANNOTATION__BR_CNTR_SATURATED_FLAG (1ULL << 63)
+
/**
* struct annotated_branch - basic block and IPC information for a symbol.
*
@@ -294,6 +313,7 @@ struct annotation_line *annotated_source__get_line(struct annotated_source *src,
* @cover_insn: Number of distinct, actually executed instructions.
* @cycles_hist: Array of cyc_hist for each instruction.
* @max_coverage: Maximum number of covered basic block (used for block-range).
+ * @br_cntr: Array of the occurrences of events (branch counters) during a block.
*
* This struct is used by two different codes when the sample has branch stack
* and cycles information. annotation__compute_ipc() calculates average IPC
@@ -310,6 +330,7 @@ struct annotated_branch {
unsigned int cover_insn;
struct cyc_hist *cycles_hist;
u64 max_coverage;
+ u64 *br_cntr;
};
struct LOCKABLE annotation {
@@ -336,7 +357,7 @@ static inline int annotation__cycles_width(struct annotation *notes)
static inline int annotation__pcnt_width(struct annotation *notes)
{
- return (symbol_conf.show_total_period ? 12 : 7) * notes->src->nr_events;
+ return (symbol_conf.show_total_period ? 12 : 8) * notes->src->nr_events;
}
static inline bool annotation_line__filter(struct annotation_line *al)
@@ -344,6 +365,11 @@ static inline bool annotation_line__filter(struct annotation_line *al)
return annotate_opts.hide_src_code && al->offset == -1;
}
+static inline u8 annotation__br_cntr_width(void)
+{
+ return annotate_opts.show_br_cntr ? ANNOTATION__BR_CNTR_WIDTH : 0;
+}
+
void annotation__update_column_widths(struct annotation *notes);
void annotation__toggle_full_addr(struct annotation *notes, struct map_symbol *ms);
@@ -380,7 +406,9 @@ struct annotated_branch *annotation__get_branch(struct annotation *notes);
int addr_map_symbol__account_cycles(struct addr_map_symbol *ams,
struct addr_map_symbol *start,
- unsigned cycles);
+ unsigned cycles,
+ struct evsel *evsel,
+ u64 br_cntr);
int hist_entry__inc_addr_samples(struct hist_entry *he, struct perf_sample *sample,
struct evsel *evsel, u64 addr);
@@ -540,4 +568,9 @@ struct annotated_basic_block {
int annotate_get_basic_blocks(struct symbol *sym, s64 src, s64 dst,
struct list_head *head);
+void debuginfo_cache__delete(void);
+
+int annotation_br_cntr_entry(char **str, int br_cntr_nr, u64 *br_cntr,
+ int num_aggr, struct evsel *evsel);
+int annotation_br_cntr_abbr_list(char **str, struct evsel *evsel, bool header);
#endif /* __PERF_ANNOTATE_H */
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
index 1443c28545a9..358c611eeddb 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
@@ -56,15 +56,15 @@ enum arm_spe_op_type {
ARM_SPE_OP_BR_INDIRECT = 1 << 17,
};
-enum arm_spe_neoverse_data_source {
- ARM_SPE_NV_L1D = 0x0,
- ARM_SPE_NV_L2 = 0x8,
- ARM_SPE_NV_PEER_CORE = 0x9,
- ARM_SPE_NV_LOCAL_CLUSTER = 0xa,
- ARM_SPE_NV_SYS_CACHE = 0xb,
- ARM_SPE_NV_PEER_CLUSTER = 0xc,
- ARM_SPE_NV_REMOTE = 0xd,
- ARM_SPE_NV_DRAM = 0xe,
+enum arm_spe_common_data_source {
+ ARM_SPE_COMMON_DS_L1D = 0x0,
+ ARM_SPE_COMMON_DS_L2 = 0x8,
+ ARM_SPE_COMMON_DS_PEER_CORE = 0x9,
+ ARM_SPE_COMMON_DS_LOCAL_CLUSTER = 0xa,
+ ARM_SPE_COMMON_DS_SYS_CACHE = 0xb,
+ ARM_SPE_COMMON_DS_PEER_CLUSTER = 0xc,
+ ARM_SPE_COMMON_DS_REMOTE = 0xd,
+ ARM_SPE_COMMON_DS_DRAM = 0xe,
};
struct arm_spe_record {
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
index 7bf607d0f6d8..4cef10a83962 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
@@ -11,7 +11,7 @@
#include <linux/bitops.h>
#include <stdarg.h>
#include <linux/kernel.h>
-#include <asm-generic/unaligned.h>
+#include <linux/unaligned.h>
#include "arm-spe-pkt-decoder.h"
diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
index afbd5869f6bf..dbf13f47879c 100644
--- a/tools/perf/util/arm-spe.c
+++ b/tools/perf/util/arm-spe.c
@@ -46,7 +46,6 @@ struct arm_spe {
struct perf_session *session;
struct machine *machine;
u32 pmu_type;
- u64 midr;
struct perf_tsc_conversion tc;
@@ -69,7 +68,7 @@ struct arm_spe {
u64 llc_access_id;
u64 tlb_miss_id;
u64 tlb_access_id;
- u64 branch_miss_id;
+ u64 branch_id;
u64 remote_access_id;
u64 memory_id;
u64 instructions_id;
@@ -78,6 +77,11 @@ struct arm_spe {
unsigned long num_events;
u8 use_ctx_pkt_for_pid;
+
+ u64 **metadata;
+ u64 metadata_ver;
+ u64 metadata_nr_cpu;
+ bool is_homogeneous;
};
struct arm_spe_queue {
@@ -96,6 +100,7 @@ struct arm_spe_queue {
u64 timestamp;
struct thread *thread;
u64 period_instructions;
+ u32 flags;
};
static void arm_spe_dump(struct arm_spe *spe __maybe_unused,
@@ -118,7 +123,7 @@ static void arm_spe_dump(struct arm_spe *spe __maybe_unused,
else
pkt_len = 1;
printf(".");
- color_fprintf(stdout, color, " %08x: ", pos);
+ color_fprintf(stdout, color, " %08zx: ", pos);
for (i = 0; i < pkt_len; i++)
color_fprintf(stdout, color, " %02x", buf[i]);
for (; i < 16; i++)
@@ -273,6 +278,20 @@ static int arm_spe_set_tid(struct arm_spe_queue *speq, pid_t tid)
return 0;
}
+static u64 *arm_spe__get_metadata_by_cpu(struct arm_spe *spe, u64 cpu)
+{
+ u64 i;
+
+ if (!spe->metadata)
+ return NULL;
+
+ for (i = 0; i < spe->metadata_nr_cpu; i++)
+ if (spe->metadata[i][ARM_SPE_CPU] == cpu)
+ return spe->metadata[i];
+
+ return NULL;
+}
+
static struct simd_flags arm_spe__synth_simd_flags(const struct arm_spe_record *record)
{
struct simd_flags simd_flags = {};
@@ -376,6 +395,7 @@ static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq,
sample.stream_id = spe_events_id;
sample.addr = record->to_ip;
sample.weight = record->latency;
+ sample.flags = speq->flags;
return arm_spe_deliver_synth_event(spe, speq, event, &sample);
}
@@ -400,24 +420,44 @@ static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq,
sample.id = spe_events_id;
sample.stream_id = spe_events_id;
- sample.addr = record->virt_addr;
+ sample.addr = record->to_ip;
sample.phys_addr = record->phys_addr;
sample.data_src = data_src;
sample.period = spe->instructions_sample_period;
sample.weight = record->latency;
+ sample.flags = speq->flags;
return arm_spe_deliver_synth_event(spe, speq, event, &sample);
}
-static const struct midr_range neoverse_spe[] = {
+static const struct midr_range common_ds_encoding_cpus[] = {
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A720),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A725),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_X1C),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_X3),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_X925),
MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1),
MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2),
MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1),
+ MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V2),
{},
};
-static void arm_spe__synth_data_source_neoverse(const struct arm_spe_record *record,
- union perf_mem_data_src *data_src)
+static void arm_spe__sample_flags(struct arm_spe_queue *speq)
+{
+ const struct arm_spe_record *record = &speq->decoder->record;
+
+ speq->flags = 0;
+ if (record->op & ARM_SPE_OP_BRANCH_ERET) {
+ speq->flags = PERF_IP_FLAG_BRANCH;
+
+ if (record->type & ARM_SPE_BRANCH_MISS)
+ speq->flags |= PERF_IP_FLAG_BRANCH_MISS;
+ }
+}
+
+static void arm_spe__synth_data_source_common(const struct arm_spe_record *record,
+ union perf_mem_data_src *data_src)
{
/*
* Even though four levels of cache hierarchy are possible, no known
@@ -439,17 +479,17 @@ static void arm_spe__synth_data_source_neoverse(const struct arm_spe_record *rec
}
switch (record->source) {
- case ARM_SPE_NV_L1D:
+ case ARM_SPE_COMMON_DS_L1D:
data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1;
data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
break;
- case ARM_SPE_NV_L2:
+ case ARM_SPE_COMMON_DS_L2:
data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
break;
- case ARM_SPE_NV_PEER_CORE:
+ case ARM_SPE_COMMON_DS_PEER_CORE:
data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
@@ -458,8 +498,8 @@ static void arm_spe__synth_data_source_neoverse(const struct arm_spe_record *rec
* We don't know if this is L1, L2 but we do know it was a cache-2-cache
* transfer, so set SNOOPX_PEER
*/
- case ARM_SPE_NV_LOCAL_CLUSTER:
- case ARM_SPE_NV_PEER_CLUSTER:
+ case ARM_SPE_COMMON_DS_LOCAL_CLUSTER:
+ case ARM_SPE_COMMON_DS_PEER_CLUSTER:
data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
@@ -467,7 +507,7 @@ static void arm_spe__synth_data_source_neoverse(const struct arm_spe_record *rec
/*
* System cache is assumed to be L3
*/
- case ARM_SPE_NV_SYS_CACHE:
+ case ARM_SPE_COMMON_DS_SYS_CACHE:
data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
data_src->mem_snoop = PERF_MEM_SNOOP_HIT;
@@ -476,13 +516,13 @@ static void arm_spe__synth_data_source_neoverse(const struct arm_spe_record *rec
* We don't know what level it hit in, except it came from the other
* socket
*/
- case ARM_SPE_NV_REMOTE:
+ case ARM_SPE_COMMON_DS_REMOTE:
data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1;
data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE;
data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
break;
- case ARM_SPE_NV_DRAM:
+ case ARM_SPE_COMMON_DS_DRAM:
data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT;
data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM;
data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
@@ -492,8 +532,8 @@ static void arm_spe__synth_data_source_neoverse(const struct arm_spe_record *rec
}
}
-static void arm_spe__synth_data_source_generic(const struct arm_spe_record *record,
- union perf_mem_data_src *data_src)
+static void arm_spe__synth_memory_level(const struct arm_spe_record *record,
+ union perf_mem_data_src *data_src)
{
if (record->type & (ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS)) {
data_src->mem_lvl = PERF_MEM_LVL_L3;
@@ -515,10 +555,55 @@ static void arm_spe__synth_data_source_generic(const struct arm_spe_record *reco
data_src->mem_lvl |= PERF_MEM_LVL_REM_CCE1;
}
-static u64 arm_spe__synth_data_source(const struct arm_spe_record *record, u64 midr)
+static bool arm_spe__is_common_ds_encoding(struct arm_spe_queue *speq)
+{
+ struct arm_spe *spe = speq->spe;
+ bool is_in_cpu_list;
+ u64 *metadata = NULL;
+ u64 midr = 0;
+
+ /* Metadata version 1 assumes all CPUs are the same (old behavior) */
+ if (spe->metadata_ver == 1) {
+ const char *cpuid;
+
+ pr_warning_once("Old SPE metadata, re-record to improve decode accuracy\n");
+ cpuid = perf_env__cpuid(spe->session->evlist->env);
+ midr = strtol(cpuid, NULL, 16);
+ } else {
+ /* CPU ID is -1 for per-thread mode */
+ if (speq->cpu < 0) {
+ /*
+ * On the heterogeneous system, due to CPU ID is -1,
+ * cannot confirm the data source packet is supported.
+ */
+ if (!spe->is_homogeneous)
+ return false;
+
+ /* In homogeneous system, simply use CPU0's metadata */
+ if (spe->metadata)
+ metadata = spe->metadata[0];
+ } else {
+ metadata = arm_spe__get_metadata_by_cpu(spe, speq->cpu);
+ }
+
+ if (!metadata)
+ return false;
+
+ midr = metadata[ARM_SPE_CPU_MIDR];
+ }
+
+ is_in_cpu_list = is_midr_in_range_list(midr, common_ds_encoding_cpus);
+ if (is_in_cpu_list)
+ return true;
+ else
+ return false;
+}
+
+static u64 arm_spe__synth_data_source(struct arm_spe_queue *speq,
+ const struct arm_spe_record *record)
{
union perf_mem_data_src data_src = { .mem_op = PERF_MEM_OP_NA };
- bool is_neoverse = is_midr_in_range_list(midr, neoverse_spe);
+ bool is_common = arm_spe__is_common_ds_encoding(speq);
if (record->op & ARM_SPE_OP_LD)
data_src.mem_op = PERF_MEM_OP_LOAD;
@@ -527,10 +612,10 @@ static u64 arm_spe__synth_data_source(const struct arm_spe_record *record, u64 m
else
return 0;
- if (is_neoverse)
- arm_spe__synth_data_source_neoverse(record, &data_src);
+ if (is_common)
+ arm_spe__synth_data_source_common(record, &data_src);
else
- arm_spe__synth_data_source_generic(record, &data_src);
+ arm_spe__synth_memory_level(record, &data_src);
if (record->type & (ARM_SPE_TLB_ACCESS | ARM_SPE_TLB_MISS)) {
data_src.mem_dtlb = PERF_MEM_TLB_WK;
@@ -551,7 +636,8 @@ static int arm_spe_sample(struct arm_spe_queue *speq)
u64 data_src;
int err;
- data_src = arm_spe__synth_data_source(record, spe->midr);
+ arm_spe__sample_flags(speq);
+ data_src = arm_spe__synth_data_source(speq, record);
if (spe->sample_flc) {
if (record->type & ARM_SPE_L1D_MISS) {
@@ -601,8 +687,8 @@ static int arm_spe_sample(struct arm_spe_queue *speq)
}
}
- if (spe->sample_branch && (record->type & ARM_SPE_BRANCH_MISS)) {
- err = arm_spe__synth_branch_sample(speq, spe->branch_miss_id);
+ if (spe->sample_branch && (record->op & ARM_SPE_OP_BRANCH_ERET)) {
+ err = arm_spe__synth_branch_sample(speq, spe->branch_id);
if (err)
return err;
}
@@ -899,7 +985,7 @@ static int arm_spe_context_switch(struct arm_spe *spe, union perf_event *event,
static int arm_spe_process_event(struct perf_session *session,
union perf_event *event,
struct perf_sample *sample,
- struct perf_tool *tool)
+ const struct perf_tool *tool)
{
int err = 0;
u64 timestamp;
@@ -947,7 +1033,7 @@ static int arm_spe_process_event(struct perf_session *session,
static int arm_spe_process_auxtrace_event(struct perf_session *session,
union perf_event *event,
- struct perf_tool *tool __maybe_unused)
+ const struct perf_tool *tool __maybe_unused)
{
struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
auxtrace);
@@ -985,7 +1071,7 @@ static int arm_spe_process_auxtrace_event(struct perf_session *session,
}
static int arm_spe_flush(struct perf_session *session __maybe_unused,
- struct perf_tool *tool __maybe_unused)
+ const struct perf_tool *tool __maybe_unused)
{
struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
auxtrace);
@@ -1016,6 +1102,73 @@ static int arm_spe_flush(struct perf_session *session __maybe_unused,
return 0;
}
+static u64 *arm_spe__alloc_per_cpu_metadata(u64 *buf, int per_cpu_size)
+{
+ u64 *metadata;
+
+ metadata = zalloc(per_cpu_size);
+ if (!metadata)
+ return NULL;
+
+ memcpy(metadata, buf, per_cpu_size);
+ return metadata;
+}
+
+static void arm_spe__free_metadata(u64 **metadata, int nr_cpu)
+{
+ int i;
+
+ for (i = 0; i < nr_cpu; i++)
+ zfree(&metadata[i]);
+ free(metadata);
+}
+
+static u64 **arm_spe__alloc_metadata(struct perf_record_auxtrace_info *info,
+ u64 *ver, int *nr_cpu)
+{
+ u64 *ptr = (u64 *)info->priv;
+ u64 metadata_size;
+ u64 **metadata = NULL;
+ int hdr_sz, per_cpu_sz, i;
+
+ metadata_size = info->header.size -
+ sizeof(struct perf_record_auxtrace_info);
+
+ /* Metadata version 1 */
+ if (metadata_size == ARM_SPE_AUXTRACE_V1_PRIV_SIZE) {
+ *ver = 1;
+ *nr_cpu = 0;
+ /* No per CPU metadata */
+ return NULL;
+ }
+
+ *ver = ptr[ARM_SPE_HEADER_VERSION];
+ hdr_sz = ptr[ARM_SPE_HEADER_SIZE];
+ *nr_cpu = ptr[ARM_SPE_CPUS_NUM];
+
+ metadata = calloc(*nr_cpu, sizeof(*metadata));
+ if (!metadata)
+ return NULL;
+
+ /* Locate the start address of per CPU metadata */
+ ptr += hdr_sz;
+ per_cpu_sz = (metadata_size - (hdr_sz * sizeof(u64))) / (*nr_cpu);
+
+ for (i = 0; i < *nr_cpu; i++) {
+ metadata[i] = arm_spe__alloc_per_cpu_metadata(ptr, per_cpu_sz);
+ if (!metadata[i])
+ goto err_per_cpu_metadata;
+
+ ptr += per_cpu_sz / sizeof(u64);
+ }
+
+ return metadata;
+
+err_per_cpu_metadata:
+ arm_spe__free_metadata(metadata, *nr_cpu);
+ return NULL;
+}
+
static void arm_spe_free_queue(void *priv)
{
struct arm_spe_queue *speq = priv;
@@ -1050,6 +1203,7 @@ static void arm_spe_free(struct perf_session *session)
auxtrace_heap__free(&spe->heap);
arm_spe_free_events(session);
session->auxtrace = NULL;
+ arm_spe__free_metadata(spe->metadata, spe->metadata_nr_cpu);
free(spe);
}
@@ -1061,45 +1215,60 @@ static bool arm_spe_evsel_is_auxtrace(struct perf_session *session,
return evsel->core.attr.type == spe->pmu_type;
}
-static const char * const arm_spe_info_fmts[] = {
- [ARM_SPE_PMU_TYPE] = " PMU Type %"PRId64"\n",
+static const char * const metadata_hdr_v1_fmts[] = {
+ [ARM_SPE_PMU_TYPE] = " PMU Type :%"PRId64"\n",
+ [ARM_SPE_PER_CPU_MMAPS] = " Per CPU mmaps :%"PRId64"\n",
};
-static void arm_spe_print_info(__u64 *arr)
-{
- if (!dump_trace)
- return;
-
- fprintf(stdout, arm_spe_info_fmts[ARM_SPE_PMU_TYPE], arr[ARM_SPE_PMU_TYPE]);
-}
+static const char * const metadata_hdr_fmts[] = {
+ [ARM_SPE_HEADER_VERSION] = " Header version :%"PRId64"\n",
+ [ARM_SPE_HEADER_SIZE] = " Header size :%"PRId64"\n",
+ [ARM_SPE_PMU_TYPE_V2] = " PMU type v2 :%"PRId64"\n",
+ [ARM_SPE_CPUS_NUM] = " CPU number :%"PRId64"\n",
+};
-struct arm_spe_synth {
- struct perf_tool dummy_tool;
- struct perf_session *session;
+static const char * const metadata_per_cpu_fmts[] = {
+ [ARM_SPE_MAGIC] = " Magic :0x%"PRIx64"\n",
+ [ARM_SPE_CPU] = " CPU # :%"PRId64"\n",
+ [ARM_SPE_CPU_NR_PARAMS] = " Num of params :%"PRId64"\n",
+ [ARM_SPE_CPU_MIDR] = " MIDR :0x%"PRIx64"\n",
+ [ARM_SPE_CPU_PMU_TYPE] = " PMU Type :%"PRId64"\n",
+ [ARM_SPE_CAP_MIN_IVAL] = " Min Interval :%"PRId64"\n",
};
-static int arm_spe_event_synth(struct perf_tool *tool,
- union perf_event *event,
- struct perf_sample *sample __maybe_unused,
- struct machine *machine __maybe_unused)
+static void arm_spe_print_info(struct arm_spe *spe, __u64 *arr)
{
- struct arm_spe_synth *arm_spe_synth =
- container_of(tool, struct arm_spe_synth, dummy_tool);
+ unsigned int i, cpu, hdr_size, cpu_num, cpu_size;
+ const char * const *hdr_fmts;
- return perf_session__deliver_synth_event(arm_spe_synth->session,
- event, NULL);
-}
+ if (!dump_trace)
+ return;
-static int arm_spe_synth_event(struct perf_session *session,
- struct perf_event_attr *attr, u64 id)
-{
- struct arm_spe_synth arm_spe_synth;
+ if (spe->metadata_ver == 1) {
+ cpu_num = 0;
+ hdr_size = ARM_SPE_AUXTRACE_V1_PRIV_MAX;
+ hdr_fmts = metadata_hdr_v1_fmts;
+ } else {
+ cpu_num = arr[ARM_SPE_CPUS_NUM];
+ hdr_size = arr[ARM_SPE_HEADER_SIZE];
+ hdr_fmts = metadata_hdr_fmts;
+ }
- memset(&arm_spe_synth, 0, sizeof(struct arm_spe_synth));
- arm_spe_synth.session = session;
+ for (i = 0; i < hdr_size; i++)
+ fprintf(stdout, hdr_fmts[i], arr[i]);
- return perf_event__synthesize_attr(&arm_spe_synth.dummy_tool, attr, 1,
- &id, arm_spe_event_synth);
+ arr += hdr_size;
+ for (cpu = 0; cpu < cpu_num; cpu++) {
+ /*
+ * The parameters from ARM_SPE_MAGIC to ARM_SPE_CPU_NR_PARAMS
+ * are fixed. The sequential parameter size is decided by the
+ * field 'ARM_SPE_CPU_NR_PARAMS'.
+ */
+ cpu_size = (ARM_SPE_CPU_NR_PARAMS + 1) + arr[ARM_SPE_CPU_NR_PARAMS];
+ for (i = 0; i < cpu_size; i++)
+ fprintf(stdout, metadata_per_cpu_fmts[i], arr[i]);
+ arr += cpu_size;
+ }
}
static void arm_spe_set_event_name(struct evlist *evlist, u64 id,
@@ -1172,7 +1341,7 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
spe->sample_flc = true;
/* Level 1 data cache miss */
- err = arm_spe_synth_event(session, &attr, id);
+ err = perf_session__deliver_synth_attr_event(session, &attr, id);
if (err)
return err;
spe->l1d_miss_id = id;
@@ -1180,7 +1349,7 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
id += 1;
/* Level 1 data cache access */
- err = arm_spe_synth_event(session, &attr, id);
+ err = perf_session__deliver_synth_attr_event(session, &attr, id);
if (err)
return err;
spe->l1d_access_id = id;
@@ -1192,7 +1361,7 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
spe->sample_llc = true;
/* Last level cache miss */
- err = arm_spe_synth_event(session, &attr, id);
+ err = perf_session__deliver_synth_attr_event(session, &attr, id);
if (err)
return err;
spe->llc_miss_id = id;
@@ -1200,7 +1369,7 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
id += 1;
/* Last level cache access */
- err = arm_spe_synth_event(session, &attr, id);
+ err = perf_session__deliver_synth_attr_event(session, &attr, id);
if (err)
return err;
spe->llc_access_id = id;
@@ -1212,7 +1381,7 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
spe->sample_tlb = true;
/* TLB miss */
- err = arm_spe_synth_event(session, &attr, id);
+ err = perf_session__deliver_synth_attr_event(session, &attr, id);
if (err)
return err;
spe->tlb_miss_id = id;
@@ -1220,7 +1389,7 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
id += 1;
/* TLB access */
- err = arm_spe_synth_event(session, &attr, id);
+ err = perf_session__deliver_synth_attr_event(session, &attr, id);
if (err)
return err;
spe->tlb_access_id = id;
@@ -1231,12 +1400,12 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
if (spe->synth_opts.branches) {
spe->sample_branch = true;
- /* Branch miss */
- err = arm_spe_synth_event(session, &attr, id);
+ /* Branch */
+ err = perf_session__deliver_synth_attr_event(session, &attr, id);
if (err)
return err;
- spe->branch_miss_id = id;
- arm_spe_set_event_name(evlist, id, "branch-miss");
+ spe->branch_id = id;
+ arm_spe_set_event_name(evlist, id, "branch");
id += 1;
}
@@ -1244,7 +1413,7 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
spe->sample_remote_access = true;
/* Remote access */
- err = arm_spe_synth_event(session, &attr, id);
+ err = perf_session__deliver_synth_attr_event(session, &attr, id);
if (err)
return err;
spe->remote_access_id = id;
@@ -1255,7 +1424,7 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
if (spe->synth_opts.mem) {
spe->sample_memory = true;
- err = arm_spe_synth_event(session, &attr, id);
+ err = perf_session__deliver_synth_attr_event(session, &attr, id);
if (err)
return err;
spe->memory_id = id;
@@ -1276,7 +1445,7 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
attr.config = PERF_COUNT_HW_INSTRUCTIONS;
attr.sample_period = spe->synth_opts.period;
spe->instructions_sample_period = attr.sample_period;
- err = arm_spe_synth_event(session, &attr, id);
+ err = perf_session__deliver_synth_attr_event(session, &attr, id);
if (err)
return err;
spe->instructions_id = id;
@@ -1287,24 +1456,57 @@ synth_instructions_out:
return 0;
}
+static bool arm_spe__is_homogeneous(u64 **metadata, int nr_cpu)
+{
+ u64 midr;
+ int i;
+
+ if (!nr_cpu)
+ return false;
+
+ for (i = 0; i < nr_cpu; i++) {
+ if (!metadata[i])
+ return false;
+
+ if (i == 0) {
+ midr = metadata[i][ARM_SPE_CPU_MIDR];
+ continue;
+ }
+
+ if (midr != metadata[i][ARM_SPE_CPU_MIDR])
+ return false;
+ }
+
+ return true;
+}
+
int arm_spe_process_auxtrace_info(union perf_event *event,
struct perf_session *session)
{
struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
- size_t min_sz = sizeof(u64) * ARM_SPE_AUXTRACE_PRIV_MAX;
+ size_t min_sz = ARM_SPE_AUXTRACE_V1_PRIV_SIZE;
struct perf_record_time_conv *tc = &session->time_conv;
- const char *cpuid = perf_env__cpuid(session->evlist->env);
- u64 midr = strtol(cpuid, NULL, 16);
struct arm_spe *spe;
- int err;
+ u64 **metadata = NULL;
+ u64 metadata_ver;
+ int nr_cpu, err;
if (auxtrace_info->header.size < sizeof(struct perf_record_auxtrace_info) +
min_sz)
return -EINVAL;
+ metadata = arm_spe__alloc_metadata(auxtrace_info, &metadata_ver,
+ &nr_cpu);
+ if (!metadata && metadata_ver != 1) {
+ pr_err("Failed to parse Arm SPE metadata.\n");
+ return -EINVAL;
+ }
+
spe = zalloc(sizeof(struct arm_spe));
- if (!spe)
- return -ENOMEM;
+ if (!spe) {
+ err = -ENOMEM;
+ goto err_free_metadata;
+ }
err = auxtrace_queues__init(&spe->queues);
if (err)
@@ -1313,8 +1515,14 @@ int arm_spe_process_auxtrace_info(union perf_event *event,
spe->session = session;
spe->machine = &session->machines.host; /* No kvm support */
spe->auxtrace_type = auxtrace_info->type;
- spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE];
- spe->midr = midr;
+ if (metadata_ver == 1)
+ spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE];
+ else
+ spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE_V2];
+ spe->metadata = metadata;
+ spe->metadata_ver = metadata_ver;
+ spe->metadata_nr_cpu = nr_cpu;
+ spe->is_homogeneous = arm_spe__is_homogeneous(metadata, nr_cpu);
spe->timeless_decoding = arm_spe__is_timeless_decoding(spe);
@@ -1347,7 +1555,7 @@ int arm_spe_process_auxtrace_info(union perf_event *event,
spe->auxtrace.evsel_is_auxtrace = arm_spe_evsel_is_auxtrace;
session->auxtrace = &spe->auxtrace;
- arm_spe_print_info(&auxtrace_info->priv[0]);
+ arm_spe_print_info(spe, &auxtrace_info->priv[0]);
if (dump_trace)
return 0;
@@ -1375,5 +1583,7 @@ err_free_queues:
session->auxtrace = NULL;
err_free:
free(spe);
+err_free_metadata:
+ arm_spe__free_metadata(metadata, nr_cpu);
return err;
}
diff --git a/tools/perf/util/arm-spe.h b/tools/perf/util/arm-spe.h
index 4f4900c18f3e..390679a4af2f 100644
--- a/tools/perf/util/arm-spe.h
+++ b/tools/perf/util/arm-spe.h
@@ -12,10 +12,46 @@
enum {
ARM_SPE_PMU_TYPE,
ARM_SPE_PER_CPU_MMAPS,
+ ARM_SPE_AUXTRACE_V1_PRIV_MAX,
+};
+
+#define ARM_SPE_AUXTRACE_V1_PRIV_SIZE \
+ (ARM_SPE_AUXTRACE_V1_PRIV_MAX * sizeof(u64))
+
+enum {
+ /*
+ * The old metadata format (defined above) does not include a
+ * field for version number. Version 1 is reserved and starts
+ * from version 2.
+ */
+ ARM_SPE_HEADER_VERSION,
+ /* Number of sizeof(u64) */
+ ARM_SPE_HEADER_SIZE,
+ /* PMU type shared by CPUs */
+ ARM_SPE_PMU_TYPE_V2,
+ /* Number of CPUs */
+ ARM_SPE_CPUS_NUM,
ARM_SPE_AUXTRACE_PRIV_MAX,
};
-#define ARM_SPE_AUXTRACE_PRIV_SIZE (ARM_SPE_AUXTRACE_PRIV_MAX * sizeof(u64))
+enum {
+ /* Magic number */
+ ARM_SPE_MAGIC,
+ /* CPU logical number in system */
+ ARM_SPE_CPU,
+ /* Number of parameters */
+ ARM_SPE_CPU_NR_PARAMS,
+ /* CPU MIDR */
+ ARM_SPE_CPU_MIDR,
+ /* Associated PMU type */
+ ARM_SPE_CPU_PMU_TYPE,
+ /* Minimal interval */
+ ARM_SPE_CAP_MIN_IVAL,
+ ARM_SPE_CPU_PRIV_MAX,
+};
+
+#define ARM_SPE_HEADER_CURRENT_VERSION 2
+
union perf_event;
struct perf_session;
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index e2f317063eec..ca8682966fae 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -671,11 +671,11 @@ int auxtrace_record__read_finish(struct auxtrace_record *itr, int idx)
{
struct evsel *evsel;
- if (!itr->evlist || !itr->pmu)
+ if (!itr->evlist)
return -EINVAL;
evlist__for_each_entry(itr->evlist, evsel) {
- if (evsel->core.attr.type == itr->pmu->type) {
+ if (evsel__is_aux_event(evsel)) {
if (evsel->disabled)
return 0;
return evlist__enable_event_idx(itr->evlist, evsel, idx);
@@ -1240,7 +1240,7 @@ void auxtrace_synth_error(struct perf_record_auxtrace_error *auxtrace_error, int
}
int perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr,
- struct perf_tool *tool,
+ const struct perf_tool *tool,
struct perf_session *session,
perf_event__handler_t process)
{
@@ -1831,7 +1831,7 @@ int __weak compat_auxtrace_mmap__write_tail(struct auxtrace_mmap *mm, u64 tail)
static int __auxtrace_mmap__read(struct mmap *map,
struct auxtrace_record *itr,
- struct perf_tool *tool, process_auxtrace_t fn,
+ const struct perf_tool *tool, process_auxtrace_t fn,
bool snapshot, size_t snapshot_size)
{
struct auxtrace_mmap *mm = &map->auxtrace_mmap;
@@ -1942,14 +1942,14 @@ static int __auxtrace_mmap__read(struct mmap *map,
}
int auxtrace_mmap__read(struct mmap *map, struct auxtrace_record *itr,
- struct perf_tool *tool, process_auxtrace_t fn)
+ const struct perf_tool *tool, process_auxtrace_t fn)
{
return __auxtrace_mmap__read(map, itr, tool, fn, false, 0);
}
int auxtrace_mmap__read_snapshot(struct mmap *map,
struct auxtrace_record *itr,
- struct perf_tool *tool, process_auxtrace_t fn,
+ const struct perf_tool *tool, process_auxtrace_t fn,
size_t snapshot_size)
{
return __auxtrace_mmap__read(map, itr, tool, fn, true, snapshot_size);
@@ -2829,7 +2829,7 @@ int auxtrace_parse_filters(struct evlist *evlist)
}
int auxtrace__process_event(struct perf_session *session, union perf_event *event,
- struct perf_sample *sample, struct perf_tool *tool)
+ struct perf_sample *sample, const struct perf_tool *tool)
{
if (!session->auxtrace)
return 0;
@@ -2847,7 +2847,7 @@ void auxtrace__dump_auxtrace_sample(struct perf_session *session,
session->auxtrace->dump_auxtrace_sample(session, sample);
}
-int auxtrace__flush_events(struct perf_session *session, struct perf_tool *tool)
+int auxtrace__flush_events(struct perf_session *session, const struct perf_tool *tool)
{
if (!session->auxtrace)
return 0;
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index 8a6ec9565835..dddaf4f3ffed 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h
@@ -75,7 +75,6 @@ enum itrace_period_type {
* (not fully accurate, since CYC packets are only emitted
* together with other events, such as branches)
* @branches: whether to synthesize 'branches' events
- * (branch misses only for Arm SPE)
* @transactions: whether to synthesize events for transactions
* @ptwrites: whether to synthesize events for ptwrites
* @pwr_events: whether to synthesize power events
@@ -208,17 +207,17 @@ struct auxtrace {
int (*process_event)(struct perf_session *session,
union perf_event *event,
struct perf_sample *sample,
- struct perf_tool *tool);
+ const struct perf_tool *tool);
int (*process_auxtrace_event)(struct perf_session *session,
union perf_event *event,
- struct perf_tool *tool);
+ const struct perf_tool *tool);
int (*queue_data)(struct perf_session *session,
struct perf_sample *sample, union perf_event *event,
u64 data_offset);
void (*dump_auxtrace_sample)(struct perf_session *session,
struct perf_sample *sample);
int (*flush_events)(struct perf_session *session,
- struct perf_tool *tool);
+ const struct perf_tool *tool);
void (*free_events)(struct perf_session *session);
void (*free)(struct perf_session *session);
bool (*evsel_is_auxtrace)(struct perf_session *session,
@@ -411,7 +410,6 @@ struct auxtrace_record {
int (*read_finish)(struct auxtrace_record *itr, int idx);
unsigned int alignment;
unsigned int default_aux_sample_size;
- struct perf_pmu *pmu;
struct evlist *evlist;
};
@@ -508,17 +506,17 @@ void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp,
struct evlist *evlist,
struct evsel *evsel, int idx);
-typedef int (*process_auxtrace_t)(struct perf_tool *tool,
+typedef int (*process_auxtrace_t)(const struct perf_tool *tool,
struct mmap *map,
union perf_event *event, void *data1,
size_t len1, void *data2, size_t len2);
int auxtrace_mmap__read(struct mmap *map, struct auxtrace_record *itr,
- struct perf_tool *tool, process_auxtrace_t fn);
+ const struct perf_tool *tool, process_auxtrace_t fn);
int auxtrace_mmap__read_snapshot(struct mmap *map,
struct auxtrace_record *itr,
- struct perf_tool *tool, process_auxtrace_t fn,
+ const struct perf_tool *tool, process_auxtrace_t fn,
size_t snapshot_size);
int auxtrace_queues__init_nr(struct auxtrace_queues *queues, int nr_queues);
@@ -639,10 +637,10 @@ int addr_filters__parse_bare_filter(struct addr_filters *filts,
int auxtrace_parse_filters(struct evlist *evlist);
int auxtrace__process_event(struct perf_session *session, union perf_event *event,
- struct perf_sample *sample, struct perf_tool *tool);
+ struct perf_sample *sample, const struct perf_tool *tool);
void auxtrace__dump_auxtrace_sample(struct perf_session *session,
struct perf_sample *sample);
-int auxtrace__flush_events(struct perf_session *session, struct perf_tool *tool);
+int auxtrace__flush_events(struct perf_session *session, const struct perf_tool *tool);
void auxtrace__free_events(struct perf_session *session);
void auxtrace__free(struct perf_session *session);
bool auxtrace__evsel_is_auxtrace(struct perf_session *session,
@@ -651,7 +649,7 @@ bool auxtrace__evsel_is_auxtrace(struct perf_session *session,
#define ITRACE_HELP \
" i[period]: synthesize instructions events\n" \
" y[period]: synthesize cycles events (same period as i)\n" \
-" b: synthesize branches events (branch misses for Arm SPE)\n" \
+" b: synthesize branches events\n" \
" c: synthesize branches events (calls only)\n" \
" r: synthesize branches events (returns only)\n" \
" x: synthesize transactions events\n" \
@@ -809,7 +807,7 @@ static inline
int auxtrace__process_event(struct perf_session *session __maybe_unused,
union perf_event *event __maybe_unused,
struct perf_sample *sample __maybe_unused,
- struct perf_tool *tool __maybe_unused)
+ const struct perf_tool *tool __maybe_unused)
{
return 0;
}
@@ -822,7 +820,7 @@ void auxtrace__dump_auxtrace_sample(struct perf_session *session __maybe_unused,
static inline
int auxtrace__flush_events(struct perf_session *session __maybe_unused,
- struct perf_tool *tool __maybe_unused)
+ const struct perf_tool *tool __maybe_unused)
{
return 0;
}
diff --git a/tools/perf/util/block-info.c b/tools/perf/util/block-info.c
index 04068d48683f..649392bee7ed 100644
--- a/tools/perf/util/block-info.c
+++ b/tools/perf/util/block-info.c
@@ -40,16 +40,32 @@ static struct block_header_column {
[PERF_HPP_REPORT__BLOCK_DSO] = {
.name = "Shared Object",
.width = 20,
+ },
+ [PERF_HPP_REPORT__BLOCK_BRANCH_COUNTER] = {
+ .name = "Branch Counter",
+ .width = 30,
}
};
-struct block_info *block_info__new(void)
+static struct block_info *block_info__new(unsigned int br_cntr_nr)
{
- return zalloc(sizeof(struct block_info));
+ struct block_info *bi = zalloc(sizeof(struct block_info));
+
+ if (bi && br_cntr_nr) {
+ bi->br_cntr = calloc(br_cntr_nr, sizeof(u64));
+ if (!bi->br_cntr) {
+ free(bi);
+ return NULL;
+ }
+ }
+
+ return bi;
}
void block_info__delete(struct block_info *bi)
{
+ if (bi)
+ free(bi->br_cntr);
free(bi);
}
@@ -86,7 +102,8 @@ int64_t block_info__cmp(struct perf_hpp_fmt *fmt __maybe_unused,
static void init_block_info(struct block_info *bi, struct symbol *sym,
struct cyc_hist *ch, int offset,
- u64 total_cycles)
+ u64 total_cycles, unsigned int br_cntr_nr,
+ u64 *br_cntr, struct evsel *evsel)
{
bi->sym = sym;
bi->start = ch->start;
@@ -99,10 +116,18 @@ static void init_block_info(struct block_info *bi, struct symbol *sym,
memcpy(bi->cycles_spark, ch->cycles_spark,
NUM_SPARKS * sizeof(u64));
+
+ if (br_cntr && br_cntr_nr) {
+ bi->br_cntr_nr = br_cntr_nr;
+ memcpy(bi->br_cntr, &br_cntr[offset * br_cntr_nr],
+ br_cntr_nr * sizeof(u64));
+ }
+ bi->evsel = evsel;
}
int block_info__process_sym(struct hist_entry *he, struct block_hist *bh,
- u64 *block_cycles_aggr, u64 total_cycles)
+ u64 *block_cycles_aggr, u64 total_cycles,
+ unsigned int br_cntr_nr)
{
struct annotation *notes;
struct cyc_hist *ch;
@@ -125,12 +150,14 @@ int block_info__process_sym(struct hist_entry *he, struct block_hist *bh,
struct block_info *bi;
struct hist_entry *he_block;
- bi = block_info__new();
+ bi = block_info__new(br_cntr_nr);
if (!bi)
return -1;
init_block_info(bi, he->ms.sym, &ch[i], i,
- total_cycles);
+ total_cycles, br_cntr_nr,
+ notes->branch->br_cntr,
+ hists_to_evsel(he->hists));
cycles += bi->cycles_aggr / bi->num_aggr;
he_block = hists__add_entry_block(&bh->block_hists,
@@ -327,6 +354,24 @@ static void init_block_header(struct block_fmt *block_fmt)
fmt->width = block_column_width;
}
+static int block_branch_counter_entry(struct perf_hpp_fmt *fmt,
+ struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ struct block_fmt *block_fmt = container_of(fmt, struct block_fmt, fmt);
+ struct block_info *bi = he->block_info;
+ char *buf;
+ int ret;
+
+ if (annotation_br_cntr_entry(&buf, bi->br_cntr_nr, bi->br_cntr,
+ bi->num_aggr, bi->evsel))
+ return 0;
+
+ ret = scnprintf(hpp->buf, hpp->size, "%*s", block_fmt->width, buf);
+ free(buf);
+ return ret;
+}
+
static void hpp_register(struct block_fmt *block_fmt, int idx,
struct perf_hpp_list *hpp_list)
{
@@ -357,6 +402,9 @@ static void hpp_register(struct block_fmt *block_fmt, int idx,
case PERF_HPP_REPORT__BLOCK_DSO:
fmt->entry = block_dso_entry;
break;
+ case PERF_HPP_REPORT__BLOCK_BRANCH_COUNTER:
+ fmt->entry = block_branch_counter_entry;
+ break;
default:
return;
}
@@ -390,7 +438,7 @@ static void init_block_hist(struct block_hist *bh, struct block_fmt *block_fmts,
static int process_block_report(struct hists *hists,
struct block_report *block_report,
u64 total_cycles, int *block_hpps,
- int nr_hpps)
+ int nr_hpps, unsigned int br_cntr_nr)
{
struct rb_node *next = rb_first_cached(&hists->entries);
struct block_hist *bh = &block_report->hist;
@@ -405,7 +453,7 @@ static int process_block_report(struct hists *hists,
while (next) {
he = rb_entry(next, struct hist_entry, rb_node);
block_info__process_sym(he, bh, &block_report->cycles,
- total_cycles);
+ total_cycles, br_cntr_nr);
next = rb_next(&he->rb_node);
}
@@ -435,7 +483,7 @@ struct block_report *block_info__create_report(struct evlist *evlist,
struct hists *hists = evsel__hists(pos);
process_block_report(hists, &block_reports[i], total_cycles,
- block_hpps, nr_hpps);
+ block_hpps, nr_hpps, evlist->nr_br_cntr);
i++;
}
diff --git a/tools/perf/util/block-info.h b/tools/perf/util/block-info.h
index 0b9e1aad4c55..b9329dc3ab59 100644
--- a/tools/perf/util/block-info.h
+++ b/tools/perf/util/block-info.h
@@ -18,6 +18,9 @@ struct block_info {
u64 total_cycles;
int num;
int num_aggr;
+ int br_cntr_nr;
+ u64 *br_cntr;
+ struct evsel *evsel;
};
struct block_fmt {
@@ -36,6 +39,7 @@ enum {
PERF_HPP_REPORT__BLOCK_AVG_CYCLES,
PERF_HPP_REPORT__BLOCK_RANGE,
PERF_HPP_REPORT__BLOCK_DSO,
+ PERF_HPP_REPORT__BLOCK_BRANCH_COUNTER,
PERF_HPP_REPORT__BLOCK_MAX_INDEX
};
@@ -46,7 +50,6 @@ struct block_report {
int nr_fmts;
};
-struct block_info *block_info__new(void);
void block_info__delete(struct block_info *bi);
int64_t __block_info__cmp(struct hist_entry *left, struct hist_entry *right);
@@ -55,7 +58,8 @@ int64_t block_info__cmp(struct perf_hpp_fmt *fmt __maybe_unused,
struct hist_entry *left, struct hist_entry *right);
int block_info__process_sym(struct hist_entry *he, struct block_hist *bh,
- u64 *block_cycles_aggr, u64 total_cycles);
+ u64 *block_cycles_aggr, u64 total_cycles,
+ unsigned int br_cntr_nr);
struct block_report *block_info__create_report(struct evlist *evlist,
u64 total_cycles,
diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c
index 827695cd0408..13608237c50e 100644
--- a/tools/perf/util/bpf-event.c
+++ b/tools/perf/util/bpf-event.c
@@ -170,7 +170,7 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_session *session,
{
struct perf_record_ksymbol *ksymbol_event = &event->ksymbol;
struct perf_record_bpf_event *bpf_event = &event->bpf;
- struct perf_tool *tool = session->tool;
+ const struct perf_tool *tool = session->tool;
struct bpf_prog_info_node *info_node;
struct perf_bpil *info_linear;
struct bpf_prog_info *info;
@@ -310,7 +310,7 @@ struct kallsyms_parse {
union perf_event *event;
perf_event__handler_t process;
struct machine *machine;
- struct perf_tool *tool;
+ const struct perf_tool *tool;
};
static int
diff --git a/tools/perf/util/bpf-filter.c b/tools/perf/util/bpf-filter.c
index 04f98b6bb291..a4fdf6911ec1 100644
--- a/tools/perf/util/bpf-filter.c
+++ b/tools/perf/util/bpf-filter.c
@@ -1,12 +1,60 @@
/* SPDX-License-Identifier: GPL-2.0 */
+/**
+ * Generic event filter for sampling events in BPF.
+ *
+ * The BPF program is fixed and just to read filter expressions in the 'filters'
+ * map and compare the sample data in order to reject samples that don't match.
+ * Each filter expression contains a sample flag (term) to compare, an operation
+ * (==, >=, and so on) and a value.
+ *
+ * Note that each entry has an array of filter expressions and it only succeeds
+ * when all of the expressions are satisfied. But it supports the logical OR
+ * using a GROUP operation which is satisfied when any of its member expression
+ * is evaluated to true. But it doesn't allow nested GROUP operations for now.
+ *
+ * To support non-root users, the filters map can be loaded and pinned in the BPF
+ * filesystem by root (perf record --setup-filter pin). Then each user will get
+ * a new entry in the shared filters map to fill the filter expressions. And the
+ * BPF program will find the filter using (task-id, event-id) as a key.
+ *
+ * The pinned BPF object (shared for regular users) has:
+ *
+ * event_hash |
+ * | | |
+ * event->id ---> | id | ---+ idx_hash | filters
+ * | | | | | | | |
+ * | .... | +-> | idx | --+--> | exprs | ---> perf_bpf_filter_entry[]
+ * | | | | | | .op
+ * task id (tgid) --------------+ | .... | | | ... | .term (+ part)
+ * | .value
+ * |
+ * ======= (root would skip this part) ======== (compares it in a loop)
+ *
+ * This is used for per-task use cases while system-wide profiling (normally from
+ * root user) uses a separate copy of the program and the maps for its own so that
+ * it can proceed even if a lot of non-root users are using the filters at the
+ * same time. In this case the filters map has a single entry and no need to use
+ * the hash maps to get the index (key) of the filters map (IOW it's always 0).
+ *
+ * The BPF program returns 1 to accept the sample or 0 to drop it.
+ * The 'dropped' map is to keep how many samples it dropped by the filter and
+ * it will be reported as lost samples.
+ */
#include <stdlib.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
#include <bpf/bpf.h>
#include <linux/err.h>
+#include <linux/list.h>
+#include <api/fs/fs.h>
#include <internal/xyarray.h>
+#include <perf/threadmap.h>
#include "util/debug.h"
#include "util/evsel.h"
+#include "util/target.h"
#include "util/bpf-filter.h"
#include <util/bpf-filter-flex.h>
@@ -20,6 +68,16 @@
#define __PERF_SAMPLE_TYPE(tt, st, opt) { tt, #st, opt }
#define PERF_SAMPLE_TYPE(_st, opt) __PERF_SAMPLE_TYPE(PBF_TERM_##_st, PERF_SAMPLE_##_st, opt)
+/* Index in the pinned 'filters' map. Should be released after use. */
+struct pinned_filter_idx {
+ struct list_head list;
+ struct evsel *evsel;
+ u64 event_id;
+ int hash_idx;
+};
+
+static LIST_HEAD(pinned_filters);
+
static const struct perf_sample_info {
enum perf_bpf_filter_term type;
const char *name;
@@ -42,8 +100,11 @@ static const struct perf_sample_info {
PERF_SAMPLE_TYPE(TRANSACTION, "--transaction"),
PERF_SAMPLE_TYPE(CODE_PAGE_SIZE, "--code-page-size"),
PERF_SAMPLE_TYPE(DATA_PAGE_SIZE, "--data-page-size"),
+ PERF_SAMPLE_TYPE(CGROUP, "--all-cgroups"),
};
+static int get_pinned_fd(const char *name);
+
static const struct perf_sample_info *get_sample_info(enum perf_bpf_filter_term type)
{
size_t i;
@@ -91,92 +152,452 @@ static int check_sample_flags(struct evsel *evsel, struct perf_bpf_filter_expr *
return -1;
}
-int perf_bpf_filter__prepare(struct evsel *evsel)
+static int get_filter_entries(struct evsel *evsel, struct perf_bpf_filter_entry *entry)
{
- int i, x, y, fd;
- struct sample_filter_bpf *skel;
- struct bpf_program *prog;
- struct bpf_link *link;
+ int i = 0;
struct perf_bpf_filter_expr *expr;
- skel = sample_filter_bpf__open_and_load();
- if (!skel) {
- pr_err("Failed to load perf sample-filter BPF skeleton\n");
- return -1;
- }
-
- i = 0;
- fd = bpf_map__fd(skel->maps.filters);
list_for_each_entry(expr, &evsel->bpf_filters, list) {
- struct perf_bpf_filter_entry entry = {
- .op = expr->op,
- .part = expr->part,
- .term = expr->term,
- .value = expr->val,
- };
-
if (check_sample_flags(evsel, expr) < 0)
- return -1;
+ return -EINVAL;
+
+ if (i == MAX_FILTERS)
+ return -E2BIG;
- bpf_map_update_elem(fd, &i, &entry, BPF_ANY);
+ entry[i].op = expr->op;
+ entry[i].part = expr->part;
+ entry[i].term = expr->term;
+ entry[i].value = expr->val;
i++;
if (expr->op == PBF_OP_GROUP_BEGIN) {
struct perf_bpf_filter_expr *group;
list_for_each_entry(group, &expr->groups, list) {
- struct perf_bpf_filter_entry group_entry = {
- .op = group->op,
- .part = group->part,
- .term = group->term,
- .value = group->val,
- };
- bpf_map_update_elem(fd, &i, &group_entry, BPF_ANY);
+ if (i == MAX_FILTERS)
+ return -E2BIG;
+
+ entry[i].op = group->op;
+ entry[i].part = group->part;
+ entry[i].term = group->term;
+ entry[i].value = group->val;
i++;
}
- memset(&entry, 0, sizeof(entry));
- entry.op = PBF_OP_GROUP_END;
- bpf_map_update_elem(fd, &i, &entry, BPF_ANY);
+ if (i == MAX_FILTERS)
+ return -E2BIG;
+
+ entry[i].op = PBF_OP_GROUP_END;
i++;
}
}
- if (i > MAX_FILTERS) {
- pr_err("Too many filters: %d (max = %d)\n", i, MAX_FILTERS);
+ if (i < MAX_FILTERS) {
+ /* to terminate the loop early */
+ entry[i].op = PBF_OP_DONE;
+ i++;
+ }
+ return 0;
+}
+
+static int convert_to_tgid(int tid)
+{
+ char path[128];
+ char *buf, *p, *q;
+ int tgid;
+ size_t len;
+
+ scnprintf(path, sizeof(path), "%d/status", tid);
+ if (procfs__read_str(path, &buf, &len) < 0)
+ return -1;
+
+ p = strstr(buf, "Tgid:");
+ if (p == NULL) {
+ free(buf);
+ return -1;
+ }
+
+ tgid = strtol(p + 6, &q, 0);
+ free(buf);
+ if (*q != '\n')
+ return -1;
+
+ return tgid;
+}
+
+/*
+ * The event might be closed already so we cannot get the list of ids using FD
+ * like in create_event_hash() below, let's iterate the event_hash map and
+ * delete all entries that have the event id as a key.
+ */
+static void destroy_event_hash(u64 event_id)
+{
+ int fd;
+ u64 key, *prev_key = NULL;
+ int num = 0, alloced = 32;
+ u64 *ids = calloc(alloced, sizeof(*ids));
+
+ if (ids == NULL)
+ return;
+
+ fd = get_pinned_fd("event_hash");
+ if (fd < 0) {
+ pr_debug("cannot get fd for 'event_hash' map\n");
+ free(ids);
+ return;
+ }
+
+ /* Iterate the whole map to collect keys for the event id. */
+ while (!bpf_map_get_next_key(fd, prev_key, &key)) {
+ u64 id;
+
+ if (bpf_map_lookup_elem(fd, &key, &id) == 0 && id == event_id) {
+ if (num == alloced) {
+ void *tmp;
+
+ alloced *= 2;
+ tmp = realloc(ids, alloced * sizeof(*ids));
+ if (tmp == NULL)
+ break;
+
+ ids = tmp;
+ }
+ ids[num++] = key;
+ }
+
+ prev_key = &key;
+ }
+
+ for (int i = 0; i < num; i++)
+ bpf_map_delete_elem(fd, &ids[i]);
+
+ free(ids);
+ close(fd);
+}
+
+/*
+ * Return a representative id if ok, or 0 for failures.
+ *
+ * The perf_event->id is good for this, but an evsel would have multiple
+ * instances for CPUs and tasks. So pick up the first id and setup a hash
+ * from id of each instance to the representative id (the first one).
+ */
+static u64 create_event_hash(struct evsel *evsel)
+{
+ int x, y, fd;
+ u64 the_id = 0, id;
+
+ fd = get_pinned_fd("event_hash");
+ if (fd < 0) {
+ pr_err("cannot get fd for 'event_hash' map\n");
+ return 0;
+ }
+
+ for (x = 0; x < xyarray__max_x(evsel->core.fd); x++) {
+ for (y = 0; y < xyarray__max_y(evsel->core.fd); y++) {
+ int ret = ioctl(FD(evsel, x, y), PERF_EVENT_IOC_ID, &id);
+
+ if (ret < 0) {
+ pr_err("Failed to get the event id\n");
+ if (the_id)
+ destroy_event_hash(the_id);
+ return 0;
+ }
+
+ if (the_id == 0)
+ the_id = id;
+
+ bpf_map_update_elem(fd, &id, &the_id, BPF_ANY);
+ }
+ }
+
+ close(fd);
+ return the_id;
+}
+
+static void destroy_idx_hash(struct pinned_filter_idx *pfi)
+{
+ int fd, nr;
+ struct perf_thread_map *threads;
+
+ fd = get_pinned_fd("filters");
+ bpf_map_delete_elem(fd, &pfi->hash_idx);
+ close(fd);
+
+ if (pfi->event_id)
+ destroy_event_hash(pfi->event_id);
+
+ threads = perf_evsel__threads(&pfi->evsel->core);
+ if (threads == NULL)
+ return;
+
+ fd = get_pinned_fd("idx_hash");
+ nr = perf_thread_map__nr(threads);
+ for (int i = 0; i < nr; i++) {
+ /* The target task might be dead already, just try the pid */
+ struct idx_hash_key key = {
+ .evt_id = pfi->event_id,
+ .tgid = perf_thread_map__pid(threads, i),
+ };
+
+ bpf_map_delete_elem(fd, &key);
+ }
+ close(fd);
+}
+
+/* Maintain a hashmap from (tgid, event-id) to filter index */
+static int create_idx_hash(struct evsel *evsel, struct perf_bpf_filter_entry *entry)
+{
+ int filter_idx;
+ int fd, nr, last;
+ u64 event_id = 0;
+ struct pinned_filter_idx *pfi = NULL;
+ struct perf_thread_map *threads;
+
+ fd = get_pinned_fd("filters");
+ if (fd < 0) {
+ pr_err("cannot get fd for 'filters' map\n");
+ return fd;
+ }
+
+ /* Find the first available entry in the filters map */
+ for (filter_idx = 0; filter_idx < MAX_FILTERS; filter_idx++) {
+ if (bpf_map_update_elem(fd, &filter_idx, entry, BPF_NOEXIST) == 0)
+ break;
+ }
+ close(fd);
+
+ if (filter_idx == MAX_FILTERS) {
+ pr_err("Too many users for the filter map\n");
+ return -EBUSY;
+ }
+
+ pfi = zalloc(sizeof(*pfi));
+ if (pfi == NULL) {
+ pr_err("Cannot save pinned filter index\n");
+ return -ENOMEM;
+ }
+
+ pfi->evsel = evsel;
+ pfi->hash_idx = filter_idx;
+
+ event_id = create_event_hash(evsel);
+ if (event_id == 0) {
+ pr_err("Cannot update the event hash\n");
+ goto err;
+ }
+
+ pfi->event_id = event_id;
+
+ threads = perf_evsel__threads(&evsel->core);
+ if (threads == NULL) {
+ pr_err("Cannot get the thread list of the event\n");
+ goto err;
+ }
+
+ /* save the index to a hash map */
+ fd = get_pinned_fd("idx_hash");
+ if (fd < 0) {
+ pr_err("cannot get fd for 'idx_hash' map\n");
+ goto err;
+ }
+
+ last = -1;
+ nr = perf_thread_map__nr(threads);
+ for (int i = 0; i < nr; i++) {
+ int pid = perf_thread_map__pid(threads, i);
+ int tgid;
+ struct idx_hash_key key = {
+ .evt_id = event_id,
+ };
+
+ /* it actually needs tgid, let's get tgid from /proc. */
+ tgid = convert_to_tgid(pid);
+ if (tgid < 0) {
+ /* the thread may be dead, ignore. */
+ continue;
+ }
+
+ if (tgid == last)
+ continue;
+ last = tgid;
+ key.tgid = tgid;
+
+ if (bpf_map_update_elem(fd, &key, &filter_idx, BPF_ANY) < 0) {
+ pr_err("Failed to update the idx_hash\n");
+ close(fd);
+ goto err;
+ }
+ pr_debug("bpf-filter: idx_hash (task=%d,%s) -> %d\n",
+ tgid, evsel__name(evsel), filter_idx);
+ }
+
+ list_add(&pfi->list, &pinned_filters);
+ close(fd);
+ return filter_idx;
+
+err:
+ destroy_idx_hash(pfi);
+ free(pfi);
+ return -1;
+}
+
+int perf_bpf_filter__prepare(struct evsel *evsel, struct target *target)
+{
+ int i, x, y, fd, ret;
+ struct sample_filter_bpf *skel = NULL;
+ struct bpf_program *prog;
+ struct bpf_link *link;
+ struct perf_bpf_filter_entry *entry;
+ bool needs_idx_hash = !target__has_cpu(target) && !target->uid_str;
+
+ entry = calloc(MAX_FILTERS, sizeof(*entry));
+ if (entry == NULL)
return -1;
+
+ ret = get_filter_entries(evsel, entry);
+ if (ret < 0) {
+ pr_err("Failed to process filter entries\n");
+ goto err;
}
+
+ if (needs_idx_hash && geteuid() != 0) {
+ int zero = 0;
+
+ /* The filters map is shared among other processes */
+ ret = create_idx_hash(evsel, entry);
+ if (ret < 0)
+ goto err;
+
+ fd = get_pinned_fd("dropped");
+ if (fd < 0) {
+ ret = fd;
+ goto err;
+ }
+
+ /* Reset the lost count */
+ bpf_map_update_elem(fd, &ret, &zero, BPF_ANY);
+ close(fd);
+
+ fd = get_pinned_fd("perf_sample_filter");
+ if (fd < 0) {
+ ret = fd;
+ goto err;
+ }
+
+ for (x = 0; x < xyarray__max_x(evsel->core.fd); x++) {
+ for (y = 0; y < xyarray__max_y(evsel->core.fd); y++) {
+ ret = ioctl(FD(evsel, x, y), PERF_EVENT_IOC_SET_BPF, fd);
+ if (ret < 0) {
+ pr_err("Failed to attach perf sample-filter\n");
+ close(fd);
+ goto err;
+ }
+ }
+ }
+
+ close(fd);
+ free(entry);
+ return 0;
+ }
+
+ skel = sample_filter_bpf__open_and_load();
+ if (!skel) {
+ ret = -errno;
+ pr_err("Failed to load perf sample-filter BPF skeleton\n");
+ goto err;
+ }
+
+ i = 0;
+ fd = bpf_map__fd(skel->maps.filters);
+
+ /* The filters map has only one entry in this case */
+ if (bpf_map_update_elem(fd, &i, entry, BPF_ANY) < 0) {
+ ret = -errno;
+ pr_err("Failed to update the filter map\n");
+ goto err;
+ }
+
prog = skel->progs.perf_sample_filter;
for (x = 0; x < xyarray__max_x(evsel->core.fd); x++) {
for (y = 0; y < xyarray__max_y(evsel->core.fd); y++) {
link = bpf_program__attach_perf_event(prog, FD(evsel, x, y));
if (IS_ERR(link)) {
pr_err("Failed to attach perf sample-filter program\n");
- return PTR_ERR(link);
+ ret = PTR_ERR(link);
+ goto err;
}
}
}
+ free(entry);
evsel->bpf_skel = skel;
return 0;
+
+err:
+ free(entry);
+ if (!list_empty(&pinned_filters)) {
+ struct pinned_filter_idx *pfi, *tmp;
+
+ list_for_each_entry_safe(pfi, tmp, &pinned_filters, list) {
+ destroy_idx_hash(pfi);
+ list_del(&pfi->list);
+ free(pfi);
+ }
+ }
+ sample_filter_bpf__destroy(skel);
+ return ret;
}
int perf_bpf_filter__destroy(struct evsel *evsel)
{
struct perf_bpf_filter_expr *expr, *tmp;
+ struct pinned_filter_idx *pfi, *pos;
list_for_each_entry_safe(expr, tmp, &evsel->bpf_filters, list) {
list_del(&expr->list);
free(expr);
}
sample_filter_bpf__destroy(evsel->bpf_skel);
+
+ list_for_each_entry_safe(pfi, pos, &pinned_filters, list) {
+ destroy_idx_hash(pfi);
+ list_del(&pfi->list);
+ free(pfi);
+ }
return 0;
}
u64 perf_bpf_filter__lost_count(struct evsel *evsel)
{
- struct sample_filter_bpf *skel = evsel->bpf_skel;
+ int count = 0;
+
+ if (list_empty(&evsel->bpf_filters))
+ return 0;
+
+ if (!list_empty(&pinned_filters)) {
+ int fd = get_pinned_fd("dropped");
+ struct pinned_filter_idx *pfi;
+
+ if (fd < 0)
+ return 0;
- return skel ? skel->bss->dropped : 0;
+ list_for_each_entry(pfi, &pinned_filters, list) {
+ if (pfi->evsel != evsel)
+ continue;
+
+ bpf_map_lookup_elem(fd, &pfi->hash_idx, &count);
+ break;
+ }
+ close(fd);
+ } else if (evsel->bpf_skel) {
+ struct sample_filter_bpf *skel = evsel->bpf_skel;
+ int fd = bpf_map__fd(skel->maps.dropped);
+ int idx = 0;
+
+ bpf_map_lookup_elem(fd, &idx, &count);
+ }
+
+ return count;
}
struct perf_bpf_filter_expr *perf_bpf_filter_expr__new(enum perf_bpf_filter_term term,
@@ -212,3 +633,139 @@ int perf_bpf_filter__parse(struct list_head *expr_head, const char *str)
return ret;
}
+
+int perf_bpf_filter__pin(void)
+{
+ struct sample_filter_bpf *skel;
+ char *path = NULL;
+ int dir_fd, ret = -1;
+
+ skel = sample_filter_bpf__open();
+ if (!skel) {
+ ret = -errno;
+ pr_err("Failed to open perf sample-filter BPF skeleton\n");
+ goto err;
+ }
+
+ /* pinned program will use pid-hash */
+ bpf_map__set_max_entries(skel->maps.filters, MAX_FILTERS);
+ bpf_map__set_max_entries(skel->maps.event_hash, MAX_EVT_HASH);
+ bpf_map__set_max_entries(skel->maps.idx_hash, MAX_IDX_HASH);
+ bpf_map__set_max_entries(skel->maps.dropped, MAX_FILTERS);
+ skel->rodata->use_idx_hash = 1;
+
+ if (sample_filter_bpf__load(skel) < 0) {
+ ret = -errno;
+ pr_err("Failed to load perf sample-filter BPF skeleton\n");
+ goto err;
+ }
+
+ if (asprintf(&path, "%s/fs/bpf/%s", sysfs__mountpoint(),
+ PERF_BPF_FILTER_PIN_PATH) < 0) {
+ ret = -errno;
+ pr_err("Failed to allocate pathname in the BPF-fs\n");
+ goto err;
+ }
+
+ ret = bpf_object__pin(skel->obj, path);
+ if (ret < 0) {
+ pr_err("Failed to pin BPF filter objects\n");
+ goto err;
+ }
+
+ /* setup access permissions for the pinned objects */
+ dir_fd = open(path, O_PATH);
+ if (dir_fd < 0) {
+ bpf_object__unpin(skel->obj, path);
+ ret = dir_fd;
+ goto err;
+ }
+
+ /* BPF-fs root has the sticky bit */
+ if (fchmodat(dir_fd, "..", 01755, 0) < 0) {
+ pr_debug("chmod for BPF-fs failed\n");
+ ret = -errno;
+ goto err_close;
+ }
+
+ /* perf_filter directory */
+ if (fchmodat(dir_fd, ".", 0755, 0) < 0) {
+ pr_debug("chmod for perf_filter directory failed?\n");
+ ret = -errno;
+ goto err_close;
+ }
+
+ /* programs need write permission for some reason */
+ if (fchmodat(dir_fd, "perf_sample_filter", 0777, 0) < 0) {
+ pr_debug("chmod for perf_sample_filter failed\n");
+ ret = -errno;
+ }
+ /* maps */
+ if (fchmodat(dir_fd, "filters", 0666, 0) < 0) {
+ pr_debug("chmod for filters failed\n");
+ ret = -errno;
+ }
+ if (fchmodat(dir_fd, "event_hash", 0666, 0) < 0) {
+ pr_debug("chmod for event_hash failed\n");
+ ret = -errno;
+ }
+ if (fchmodat(dir_fd, "idx_hash", 0666, 0) < 0) {
+ pr_debug("chmod for idx_hash failed\n");
+ ret = -errno;
+ }
+ if (fchmodat(dir_fd, "dropped", 0666, 0) < 0) {
+ pr_debug("chmod for dropped failed\n");
+ ret = -errno;
+ }
+
+err_close:
+ close(dir_fd);
+
+err:
+ free(path);
+ sample_filter_bpf__destroy(skel);
+ return ret;
+}
+
+int perf_bpf_filter__unpin(void)
+{
+ struct sample_filter_bpf *skel;
+ char *path = NULL;
+ int ret = -1;
+
+ skel = sample_filter_bpf__open_and_load();
+ if (!skel) {
+ ret = -errno;
+ pr_err("Failed to open perf sample-filter BPF skeleton\n");
+ goto err;
+ }
+
+ if (asprintf(&path, "%s/fs/bpf/%s", sysfs__mountpoint(),
+ PERF_BPF_FILTER_PIN_PATH) < 0) {
+ ret = -errno;
+ pr_err("Failed to allocate pathname in the BPF-fs\n");
+ goto err;
+ }
+
+ ret = bpf_object__unpin(skel->obj, path);
+
+err:
+ free(path);
+ sample_filter_bpf__destroy(skel);
+ return ret;
+}
+
+static int get_pinned_fd(const char *name)
+{
+ char *path = NULL;
+ int fd;
+
+ if (asprintf(&path, "%s/fs/bpf/%s/%s", sysfs__mountpoint(),
+ PERF_BPF_FILTER_PIN_PATH, name) < 0)
+ return -1;
+
+ fd = bpf_obj_get(path);
+
+ free(path);
+ return fd;
+}
diff --git a/tools/perf/util/bpf-filter.h b/tools/perf/util/bpf-filter.h
index cd6764442c16..916ed7770b73 100644
--- a/tools/perf/util/bpf-filter.h
+++ b/tools/perf/util/bpf-filter.h
@@ -16,6 +16,10 @@ struct perf_bpf_filter_expr {
};
struct evsel;
+struct target;
+
+/* path in BPF-fs for the pinned program and maps */
+#define PERF_BPF_FILTER_PIN_PATH "perf_filter"
#ifdef HAVE_BPF_SKEL
struct perf_bpf_filter_expr *perf_bpf_filter_expr__new(enum perf_bpf_filter_term term,
@@ -23,9 +27,11 @@ struct perf_bpf_filter_expr *perf_bpf_filter_expr__new(enum perf_bpf_filter_term
enum perf_bpf_filter_op op,
unsigned long val);
int perf_bpf_filter__parse(struct list_head *expr_head, const char *str);
-int perf_bpf_filter__prepare(struct evsel *evsel);
+int perf_bpf_filter__prepare(struct evsel *evsel, struct target *target);
int perf_bpf_filter__destroy(struct evsel *evsel);
u64 perf_bpf_filter__lost_count(struct evsel *evsel);
+int perf_bpf_filter__pin(void);
+int perf_bpf_filter__unpin(void);
#else /* !HAVE_BPF_SKEL */
@@ -34,7 +40,8 @@ static inline int perf_bpf_filter__parse(struct list_head *expr_head __maybe_unu
{
return -EOPNOTSUPP;
}
-static inline int perf_bpf_filter__prepare(struct evsel *evsel __maybe_unused)
+static inline int perf_bpf_filter__prepare(struct evsel *evsel __maybe_unused,
+ struct target *target __maybe_unused)
{
return -EOPNOTSUPP;
}
@@ -46,5 +53,13 @@ static inline u64 perf_bpf_filter__lost_count(struct evsel *evsel __maybe_unused
{
return 0;
}
+static inline int perf_bpf_filter__pin(void)
+{
+ return -EOPNOTSUPP;
+}
+static inline int perf_bpf_filter__unpin(void)
+{
+ return -EOPNOTSUPP;
+}
#endif /* HAVE_BPF_SKEL*/
#endif /* PERF_UTIL_BPF_FILTER_H */
diff --git a/tools/perf/util/bpf-filter.l b/tools/perf/util/bpf-filter.l
index 2a7c839f3fae..f313404f95a9 100644
--- a/tools/perf/util/bpf-filter.l
+++ b/tools/perf/util/bpf-filter.l
@@ -9,8 +9,11 @@
#include "bpf-filter.h"
#include "bpf-filter-bison.h"
+extern int perf_bpf_filter_needs_path;
+
static int sample(enum perf_bpf_filter_term term)
{
+ perf_bpf_filter_needs_path = 0;
perf_bpf_filter_lval.sample.term = term;
perf_bpf_filter_lval.sample.part = 0;
return BFT_SAMPLE;
@@ -18,11 +21,20 @@ static int sample(enum perf_bpf_filter_term term)
static int sample_part(enum perf_bpf_filter_term term, int part)
{
+ perf_bpf_filter_needs_path = 0;
perf_bpf_filter_lval.sample.term = term;
perf_bpf_filter_lval.sample.part = part;
return BFT_SAMPLE;
}
+static int sample_path(enum perf_bpf_filter_term term)
+{
+ perf_bpf_filter_needs_path = 1;
+ perf_bpf_filter_lval.sample.term = term;
+ perf_bpf_filter_lval.sample.part = 0;
+ return BFT_SAMPLE_PATH;
+}
+
static int operator(enum perf_bpf_filter_op op)
{
perf_bpf_filter_lval.op = op;
@@ -48,10 +60,15 @@ static int constant(int val)
return BFT_NUM;
}
-static int error(const char *str)
+static int path_or_error(void)
{
- printf("perf_bpf_filter: Unexpected filter %s: %s\n", str, perf_bpf_filter_text);
- return BFT_ERROR;
+ if (!perf_bpf_filter_needs_path) {
+ printf("perf_bpf_filter: Error: Unexpected item: %s\n",
+ perf_bpf_filter_text);
+ return BFT_ERROR;
+ }
+ perf_bpf_filter_lval.path = perf_bpf_filter_text;
+ return BFT_PATH;
}
%}
@@ -59,6 +76,7 @@ static int error(const char *str)
num_dec [0-9]+
num_hex 0[Xx][0-9a-fA-F]+
space [ \t]+
+path [^ \t\n]+
ident [_a-zA-Z][_a-zA-Z0-9]+
%%
@@ -97,6 +115,7 @@ mem_blk { return sample_part(PBF_TERM_DATA_SRC, 7); }
mem_hops { return sample_part(PBF_TERM_DATA_SRC, 8); }
uid { return sample(PBF_TERM_UID); }
gid { return sample(PBF_TERM_GID); }
+cgroup { return sample_path(PBF_TERM_CGROUP); }
"==" { return operator(PBF_OP_EQ); }
"!=" { return operator(PBF_OP_NEQ); }
@@ -155,7 +174,6 @@ hops3 { return constant(PERF_MEM_HOPS_3); }
"," { return ','; }
"||" { return BFT_LOGICAL_OR; }
-{ident} { return error("ident"); }
-. { return error("input"); }
+{path} { return path_or_error(); }
%%
diff --git a/tools/perf/util/bpf-filter.y b/tools/perf/util/bpf-filter.y
index 0c56fccb8874..5a79a8e7a45b 100644
--- a/tools/perf/util/bpf-filter.y
+++ b/tools/perf/util/bpf-filter.y
@@ -12,9 +12,13 @@
#include <linux/compiler.h>
#include <linux/list.h>
#include "bpf-filter.h"
+#include "cgroup.h"
int perf_bpf_filter_lex(void);
+/* To indicate if the current term needs a pathname or not */
+int perf_bpf_filter_needs_path;
+
static void perf_bpf_filter_error(struct list_head *expr __maybe_unused,
char const *msg)
{
@@ -26,6 +30,7 @@ static void perf_bpf_filter_error(struct list_head *expr __maybe_unused,
%union
{
unsigned long num;
+ char *path;
struct {
enum perf_bpf_filter_term term;
int part;
@@ -34,12 +39,13 @@ static void perf_bpf_filter_error(struct list_head *expr __maybe_unused,
struct perf_bpf_filter_expr *expr;
}
-%token BFT_SAMPLE BFT_OP BFT_ERROR BFT_NUM BFT_LOGICAL_OR
+%token BFT_SAMPLE BFT_SAMPLE_PATH BFT_OP BFT_ERROR BFT_NUM BFT_LOGICAL_OR BFT_PATH
%type <expr> filter_term filter_expr
%destructor { free ($$); } <expr>
-%type <sample> BFT_SAMPLE
+%type <sample> BFT_SAMPLE BFT_SAMPLE_PATH
%type <op> BFT_OP
%type <num> BFT_NUM
+%type <path> BFT_PATH
%%
@@ -81,5 +87,23 @@ BFT_SAMPLE BFT_OP BFT_NUM
{
$$ = perf_bpf_filter_expr__new($1.term, $1.part, $2, $3);
}
+|
+BFT_SAMPLE_PATH BFT_OP BFT_PATH
+{
+ struct cgroup *cgrp;
+ unsigned long cgroup_id = 0;
+
+ if ($2 != PBF_OP_EQ && $2 != PBF_OP_NEQ) {
+ printf("perf_bpf_filter: cgroup accepts '==' or '!=' only\n");
+ YYERROR;
+ }
+
+ cgrp = cgroup__new($3, /*do_open=*/false);
+ if (cgrp && read_cgroup_id(cgrp) == 0)
+ cgroup_id = cgrp->id;
+
+ $$ = perf_bpf_filter_expr__new($1.term, $1.part, $2, cgroup_id);
+ cgroup__put(cgrp);
+}
%%
diff --git a/tools/perf/util/bpf-prologue.h b/tools/perf/util/bpf-prologue.h
deleted file mode 100644
index 66dcf751ef65..000000000000
--- a/tools/perf/util/bpf-prologue.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C) 2015, He Kuang <hekuang@huawei.com>
- * Copyright (C) 2015, Huawei Inc.
- */
-#ifndef __BPF_PROLOGUE_H
-#define __BPF_PROLOGUE_H
-
-struct probe_trace_arg;
-struct bpf_insn;
-
-#define BPF_PROLOGUE_MAX_ARGS 3
-#define BPF_PROLOGUE_START_ARG_REG BPF_REG_3
-#define BPF_PROLOGUE_FETCH_RESULT_REG BPF_REG_2
-
-#ifdef HAVE_BPF_PROLOGUE
-int bpf__gen_prologue(struct probe_trace_arg *args, int nargs,
- struct bpf_insn *new_prog, size_t *new_cnt,
- size_t cnt_space);
-#else
-#include <linux/compiler.h>
-#include <errno.h>
-
-static inline int
-bpf__gen_prologue(struct probe_trace_arg *args __maybe_unused,
- int nargs __maybe_unused,
- struct bpf_insn *new_prog __maybe_unused,
- size_t *new_cnt,
- size_t cnt_space __maybe_unused)
-{
- if (!new_cnt)
- return -EINVAL;
- *new_cnt = 0;
- return -ENOTSUP;
-}
-#endif
-#endif /* __BPF_PROLOGUE_H */
diff --git a/tools/perf/util/bpf_counter.c b/tools/perf/util/bpf_counter.c
index 7a8af60e0f51..73fcafbffc6a 100644
--- a/tools/perf/util/bpf_counter.c
+++ b/tools/perf/util/bpf_counter.c
@@ -394,6 +394,7 @@ static int bperf_check_target(struct evsel *evsel,
}
static struct perf_cpu_map *all_cpu_map;
+static __u32 filter_entry_cnt;
static int bperf_reload_leader_program(struct evsel *evsel, int attr_map_fd,
struct perf_event_attr_map_entry *entry)
@@ -444,12 +445,32 @@ out:
return err;
}
+static int bperf_attach_follower_program(struct bperf_follower_bpf *skel,
+ enum bperf_filter_type filter_type,
+ bool inherit)
+{
+ struct bpf_link *link;
+ int err = 0;
+
+ if ((filter_type == BPERF_FILTER_PID ||
+ filter_type == BPERF_FILTER_TGID) && inherit)
+ /* attach all follower bpf progs to enable event inheritance */
+ err = bperf_follower_bpf__attach(skel);
+ else {
+ link = bpf_program__attach(skel->progs.fexit_XXX);
+ if (IS_ERR(link))
+ err = PTR_ERR(link);
+ }
+
+ return err;
+}
+
static int bperf__load(struct evsel *evsel, struct target *target)
{
struct perf_event_attr_map_entry entry = {0xffffffff, 0xffffffff};
int attr_map_fd, diff_map_fd = -1, err;
enum bperf_filter_type filter_type;
- __u32 filter_entry_cnt, i;
+ __u32 i;
if (bperf_check_target(evsel, target, &filter_type, &filter_entry_cnt))
return -1;
@@ -529,9 +550,6 @@ static int bperf__load(struct evsel *evsel, struct target *target)
/* set up reading map */
bpf_map__set_max_entries(evsel->follower_skel->maps.accum_readings,
filter_entry_cnt);
- /* set up follower filter based on target */
- bpf_map__set_max_entries(evsel->follower_skel->maps.filter,
- filter_entry_cnt);
err = bperf_follower_bpf__load(evsel->follower_skel);
if (err) {
pr_err("Failed to load follower skeleton\n");
@@ -543,6 +561,7 @@ static int bperf__load(struct evsel *evsel, struct target *target)
for (i = 0; i < filter_entry_cnt; i++) {
int filter_map_fd;
__u32 key;
+ struct bperf_filter_value fval = { i, 0 };
if (filter_type == BPERF_FILTER_PID ||
filter_type == BPERF_FILTER_TGID)
@@ -553,12 +572,14 @@ static int bperf__load(struct evsel *evsel, struct target *target)
break;
filter_map_fd = bpf_map__fd(evsel->follower_skel->maps.filter);
- bpf_map_update_elem(filter_map_fd, &key, &i, BPF_ANY);
+ bpf_map_update_elem(filter_map_fd, &key, &fval, BPF_ANY);
}
evsel->follower_skel->bss->type = filter_type;
+ evsel->follower_skel->bss->inherit = target->inherit;
- err = bperf_follower_bpf__attach(evsel->follower_skel);
+ err = bperf_attach_follower_program(evsel->follower_skel, filter_type,
+ target->inherit);
out:
if (err && evsel->bperf_leader_link_fd >= 0)
@@ -623,7 +644,7 @@ static int bperf__read(struct evsel *evsel)
bperf_sync_counters(evsel);
reading_map_fd = bpf_map__fd(skel->maps.accum_readings);
- for (i = 0; i < bpf_map__max_entries(skel->maps.accum_readings); i++) {
+ for (i = 0; i < filter_entry_cnt; i++) {
struct perf_cpu entry;
__u32 cpu;
diff --git a/tools/perf/util/bpf_counter_cgroup.c b/tools/perf/util/bpf_counter_cgroup.c
index ea29c372f339..6ff42619de12 100644
--- a/tools/perf/util/bpf_counter_cgroup.c
+++ b/tools/perf/util/bpf_counter_cgroup.c
@@ -61,6 +61,9 @@ static int bperf_load_program(struct evlist *evlist)
skel->rodata->num_cpus = total_cpus;
skel->rodata->num_events = evlist->core.nr_entries / nr_cgroups;
+ if (cgroup_is_v2("perf_event") > 0)
+ skel->rodata->use_cgroup_v2 = 1;
+
BUG_ON(evlist->core.nr_entries % nr_cgroups != 0);
/* we need one copy of events per cpu for reading */
@@ -82,9 +85,6 @@ static int bperf_load_program(struct evlist *evlist)
goto out;
}
- if (cgroup_is_v2("perf_event") > 0)
- skel->bss->use_cgroup_v2 = 1;
-
err = -1;
cgrp_switch = evsel__new(&cgrp_switch_attr);
diff --git a/tools/perf/util/bpf_ftrace.c b/tools/perf/util/bpf_ftrace.c
index 7a4297d8fd2c..06d1c4018407 100644
--- a/tools/perf/util/bpf_ftrace.c
+++ b/tools/perf/util/bpf_ftrace.c
@@ -40,13 +40,17 @@ int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace)
if (ftrace->target.cpu_list) {
ncpus = perf_cpu_map__nr(ftrace->evlist->core.user_requested_cpus);
bpf_map__set_max_entries(skel->maps.cpu_filter, ncpus);
+ skel->rodata->has_cpu = 1;
}
if (target__has_task(&ftrace->target) || target__none(&ftrace->target)) {
ntasks = perf_thread_map__nr(ftrace->evlist->core.threads);
bpf_map__set_max_entries(skel->maps.task_filter, ntasks);
+ skel->rodata->has_task = 1;
}
+ skel->rodata->use_nsec = ftrace->use_nsec;
+
set_max_rlimit();
err = func_latency_bpf__load(skel);
@@ -59,7 +63,6 @@ int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace)
u32 cpu;
u8 val = 1;
- skel->bss->has_cpu = 1;
fd = bpf_map__fd(skel->maps.cpu_filter);
for (i = 0; i < ncpus; i++) {
@@ -72,7 +75,6 @@ int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace)
u32 pid;
u8 val = 1;
- skel->bss->has_task = 1;
fd = bpf_map__fd(skel->maps.task_filter);
for (i = 0; i < ntasks; i++) {
@@ -81,8 +83,6 @@ int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace)
}
}
- skel->bss->use_nsec = ftrace->use_nsec;
-
skel->links.func_begin = bpf_program__attach_kprobe(skel->progs.func_begin,
false, func->name);
if (IS_ERR(skel->links.func_begin)) {
diff --git a/tools/perf/util/bpf_kwork.c b/tools/perf/util/bpf_kwork.c
index 44f0f708a15d..6c7126b7670d 100644
--- a/tools/perf/util/bpf_kwork.c
+++ b/tools/perf/util/bpf_kwork.c
@@ -176,8 +176,6 @@ static int setup_filters(struct perf_kwork *kwork)
bpf_map_update_elem(fd, &cpu.cpu, &val, BPF_ANY);
}
perf_cpu_map__put(map);
-
- skel->bss->has_cpu_filter = 1;
}
if (kwork->profile_name != NULL) {
@@ -197,8 +195,6 @@ static int setup_filters(struct perf_kwork *kwork)
key = 0;
bpf_map_update_elem(fd, &key, kwork->profile_name, BPF_ANY);
-
- skel->bss->has_name_filter = 1;
}
return 0;
@@ -239,6 +235,11 @@ int perf_kwork__trace_prepare_bpf(struct perf_kwork *kwork)
class_bpf->load_prepare(kwork);
}
+ if (kwork->cpu_list != NULL)
+ skel->rodata->has_cpu_filter = 1;
+ if (kwork->profile_name != NULL)
+ skel->rodata->has_name_filter = 1;
+
if (kwork_trace_bpf__load(skel)) {
pr_debug("Failed to load kwork trace skeleton\n");
goto out;
diff --git a/tools/perf/util/bpf_kwork_top.c b/tools/perf/util/bpf_kwork_top.c
index 22a3b00a1e23..7261cad43468 100644
--- a/tools/perf/util/bpf_kwork_top.c
+++ b/tools/perf/util/bpf_kwork_top.c
@@ -151,14 +151,12 @@ static int setup_filters(struct perf_kwork *kwork)
bpf_map_update_elem(fd, &cpu.cpu, &val, BPF_ANY);
}
perf_cpu_map__put(map);
-
- skel->bss->has_cpu_filter = 1;
}
return 0;
}
-int perf_kwork__top_prepare_bpf(struct perf_kwork *kwork __maybe_unused)
+int perf_kwork__top_prepare_bpf(struct perf_kwork *kwork)
{
struct bpf_program *prog;
struct kwork_class *class;
@@ -193,6 +191,9 @@ int perf_kwork__top_prepare_bpf(struct perf_kwork *kwork __maybe_unused)
class_bpf->load_prepare();
}
+ if (kwork->cpu_list)
+ skel->rodata->has_cpu_filter = 1;
+
if (kwork_top_bpf__load(skel)) {
pr_debug("Failed to load kwork top skeleton\n");
goto out;
diff --git a/tools/perf/util/bpf_lock_contention.c b/tools/perf/util/bpf_lock_contention.c
index b4cb3fe5cc25..41a1ad087895 100644
--- a/tools/perf/util/bpf_lock_contention.c
+++ b/tools/perf/util/bpf_lock_contention.c
@@ -46,14 +46,22 @@ int lock_contention_prepare(struct lock_contention *con)
else
bpf_map__set_max_entries(skel->maps.stacks, 1);
- if (target__has_cpu(target))
+ if (target__has_cpu(target)) {
+ skel->rodata->has_cpu = 1;
ncpus = perf_cpu_map__nr(evlist->core.user_requested_cpus);
- if (target__has_task(target))
+ }
+ if (target__has_task(target)) {
+ skel->rodata->has_task = 1;
ntasks = perf_thread_map__nr(evlist->core.threads);
- if (con->filters->nr_types)
+ }
+ if (con->filters->nr_types) {
+ skel->rodata->has_type = 1;
ntypes = con->filters->nr_types;
- if (con->filters->nr_cgrps)
+ }
+ if (con->filters->nr_cgrps) {
+ skel->rodata->has_cgroup = 1;
ncgrps = con->filters->nr_cgrps;
+ }
/* resolve lock name filters to addr */
if (con->filters->nr_syms) {
@@ -82,6 +90,7 @@ int lock_contention_prepare(struct lock_contention *con)
con->filters->addrs = addrs;
}
naddrs = con->filters->nr_addrs;
+ skel->rodata->has_addr = 1;
}
bpf_map__set_max_entries(skel->maps.cpu_filter, ncpus);
@@ -90,6 +99,16 @@ int lock_contention_prepare(struct lock_contention *con)
bpf_map__set_max_entries(skel->maps.addr_filter, naddrs);
bpf_map__set_max_entries(skel->maps.cgroup_filter, ncgrps);
+ skel->rodata->stack_skip = con->stack_skip;
+ skel->rodata->aggr_mode = con->aggr_mode;
+ skel->rodata->needs_callstack = con->save_callstack;
+ skel->rodata->lock_owner = con->owner;
+
+ if (con->aggr_mode == LOCK_AGGR_CGROUP || con->filters->nr_cgrps) {
+ if (cgroup_is_v2("perf_event"))
+ skel->rodata->use_cgroup_v2 = 1;
+ }
+
if (lock_contention_bpf__load(skel) < 0) {
pr_err("Failed to load lock-contention BPF skeleton\n");
return -1;
@@ -99,7 +118,6 @@ int lock_contention_prepare(struct lock_contention *con)
u32 cpu;
u8 val = 1;
- skel->bss->has_cpu = 1;
fd = bpf_map__fd(skel->maps.cpu_filter);
for (i = 0; i < ncpus; i++) {
@@ -112,7 +130,6 @@ int lock_contention_prepare(struct lock_contention *con)
u32 pid;
u8 val = 1;
- skel->bss->has_task = 1;
fd = bpf_map__fd(skel->maps.task_filter);
for (i = 0; i < ntasks; i++) {
@@ -125,7 +142,6 @@ int lock_contention_prepare(struct lock_contention *con)
u32 pid = evlist->workload.pid;
u8 val = 1;
- skel->bss->has_task = 1;
fd = bpf_map__fd(skel->maps.task_filter);
bpf_map_update_elem(fd, &pid, &val, BPF_ANY);
}
@@ -133,7 +149,6 @@ int lock_contention_prepare(struct lock_contention *con)
if (con->filters->nr_types) {
u8 val = 1;
- skel->bss->has_type = 1;
fd = bpf_map__fd(skel->maps.type_filter);
for (i = 0; i < con->filters->nr_types; i++)
@@ -143,7 +158,6 @@ int lock_contention_prepare(struct lock_contention *con)
if (con->filters->nr_addrs) {
u8 val = 1;
- skel->bss->has_addr = 1;
fd = bpf_map__fd(skel->maps.addr_filter);
for (i = 0; i < con->filters->nr_addrs; i++)
@@ -153,25 +167,14 @@ int lock_contention_prepare(struct lock_contention *con)
if (con->filters->nr_cgrps) {
u8 val = 1;
- skel->bss->has_cgroup = 1;
fd = bpf_map__fd(skel->maps.cgroup_filter);
for (i = 0; i < con->filters->nr_cgrps; i++)
bpf_map_update_elem(fd, &con->filters->cgrps[i], &val, BPF_ANY);
}
- /* these don't work well if in the rodata section */
- skel->bss->stack_skip = con->stack_skip;
- skel->bss->aggr_mode = con->aggr_mode;
- skel->bss->needs_callstack = con->save_callstack;
- skel->bss->lock_owner = con->owner;
-
- if (con->aggr_mode == LOCK_AGGR_CGROUP) {
- if (cgroup_is_v2("perf_event"))
- skel->bss->use_cgroup_v2 = 1;
-
+ if (con->aggr_mode == LOCK_AGGR_CGROUP)
read_all_cgroups(&con->cgroups);
- }
bpf_program__set_autoload(skel->progs.collect_lock_syms, false);
@@ -286,6 +289,9 @@ static void account_end_timestamp(struct lock_contention *con)
goto next;
for (int i = 0; i < total_cpus; i++) {
+ if (cpu_data[i].lock == 0)
+ continue;
+
update_lock_stat(stat_fd, -1, end_ts, aggr_mode,
&cpu_data[i]);
}
diff --git a/tools/perf/util/bpf_map.c b/tools/perf/util/bpf_map.c
index c863ae0c5cb5..578f27d2d6b4 100644
--- a/tools/perf/util/bpf_map.c
+++ b/tools/perf/util/bpf_map.c
@@ -35,9 +35,6 @@ int bpf_map__fprintf(struct bpf_map *map, FILE *fp)
if (fd < 0)
return fd;
- if (!map)
- return PTR_ERR(map);
-
err = -ENOMEM;
key = malloc(bpf_map__key_size(map));
if (key == NULL)
diff --git a/tools/perf/util/bpf_off_cpu.c b/tools/perf/util/bpf_off_cpu.c
index 6af36142dc5a..a590a8ac1f9d 100644
--- a/tools/perf/util/bpf_off_cpu.c
+++ b/tools/perf/util/bpf_off_cpu.c
@@ -73,14 +73,12 @@ static void off_cpu_start(void *arg)
struct evlist *evlist = arg;
/* update task filter for the given workload */
- if (!skel->bss->has_cpu && !skel->bss->has_task &&
+ if (skel->rodata->has_task && skel->rodata->uses_tgid &&
perf_thread_map__pid(evlist->core.threads, 0) != -1) {
int fd;
u32 pid;
u8 val = 1;
- skel->bss->has_task = 1;
- skel->bss->uses_tgid = 1;
fd = bpf_map__fd(skel->maps.task_filter);
pid = perf_thread_map__pid(evlist->core.threads, 0);
bpf_map_update_elem(fd, &pid, &val, BPF_ANY);
@@ -148,6 +146,7 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target,
if (target->cpu_list) {
ncpus = perf_cpu_map__nr(evlist->core.user_requested_cpus);
bpf_map__set_max_entries(skel->maps.cpu_filter, ncpus);
+ skel->rodata->has_cpu = 1;
}
if (target->pid) {
@@ -173,11 +172,16 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target,
ntasks = MAX_PROC;
bpf_map__set_max_entries(skel->maps.task_filter, ntasks);
+ skel->rodata->has_task = 1;
+ skel->rodata->uses_tgid = 1;
} else if (target__has_task(target)) {
ntasks = perf_thread_map__nr(evlist->core.threads);
bpf_map__set_max_entries(skel->maps.task_filter, ntasks);
+ skel->rodata->has_task = 1;
} else if (target__none(target)) {
bpf_map__set_max_entries(skel->maps.task_filter, MAX_PROC);
+ skel->rodata->has_task = 1;
+ skel->rodata->uses_tgid = 1;
}
if (evlist__first(evlist)->cgrp) {
@@ -186,6 +190,7 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target,
if (!cgroup_is_v2("perf_event"))
skel->rodata->uses_cgroup_v1 = true;
+ skel->rodata->has_cgroup = 1;
}
if (opts->record_cgroup) {
@@ -208,7 +213,6 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target,
u32 cpu;
u8 val = 1;
- skel->bss->has_cpu = 1;
fd = bpf_map__fd(skel->maps.cpu_filter);
for (i = 0; i < ncpus; i++) {
@@ -220,8 +224,6 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target,
if (target->pid) {
u8 val = 1;
- skel->bss->has_task = 1;
- skel->bss->uses_tgid = 1;
fd = bpf_map__fd(skel->maps.task_filter);
strlist__for_each_entry(pos, pid_slist) {
@@ -240,7 +242,6 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target,
u32 pid;
u8 val = 1;
- skel->bss->has_task = 1;
fd = bpf_map__fd(skel->maps.task_filter);
for (i = 0; i < ntasks; i++) {
@@ -253,7 +254,6 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target,
struct evsel *evsel;
u8 val = 1;
- skel->bss->has_cgroup = 1;
fd = bpf_map__fd(skel->maps.cgroup_filter);
evlist__for_each_entry(evlist, evsel) {
diff --git a/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c
index 0acbd74e8c76..4a62ed593e84 100644
--- a/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c
+++ b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c
@@ -7,9 +7,14 @@
*/
#include "vmlinux.h"
+#include "../trace_augment.h"
+
#include <bpf/bpf_helpers.h>
#include <linux/limits.h>
+#define PERF_ALIGN(x, a) __PERF_ALIGN_MASK(x, (typeof(x))(a)-1)
+#define __PERF_ALIGN_MASK(x, mask) (((x)+(mask))&~(mask))
+
/**
* is_power_of_2() - check if a value is a power of two
* @n: the value to check
@@ -66,19 +71,6 @@ struct syscall_exit_args {
long ret;
};
-struct augmented_arg {
- unsigned int size;
- int err;
- char value[PATH_MAX];
-};
-
-struct pids_filtered {
- __uint(type, BPF_MAP_TYPE_HASH);
- __type(key, pid_t);
- __type(value, bool);
- __uint(max_entries, 64);
-} pids_filtered SEC(".maps");
-
/*
* Desired design of maximum size and alignment (see RFC2553)
*/
@@ -105,17 +97,27 @@ struct sockaddr_storage {
};
};
-struct augmented_args_payload {
- struct syscall_enter_args args;
- union {
- struct {
- struct augmented_arg arg, arg2;
- };
+struct augmented_arg {
+ unsigned int size;
+ int err;
+ union {
+ char value[PATH_MAX];
struct sockaddr_storage saddr;
- char __data[sizeof(struct augmented_arg)];
};
};
+struct pids_filtered {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, pid_t);
+ __type(value, bool);
+ __uint(max_entries, 64);
+} pids_filtered SEC(".maps");
+
+struct augmented_args_payload {
+ struct syscall_enter_args args;
+ struct augmented_arg arg, arg2; // We have to reserve space for two arguments (rename, etc)
+};
+
// We need more tmp space than the BPF stack can give us
struct augmented_args_tmp {
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
@@ -124,6 +126,25 @@ struct augmented_args_tmp {
__uint(max_entries, 1);
} augmented_args_tmp SEC(".maps");
+struct beauty_map_enter {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, int);
+ __type(value, __u32[6]);
+ __uint(max_entries, 512);
+} beauty_map_enter SEC(".maps");
+
+struct beauty_payload_enter {
+ struct syscall_enter_args args;
+ struct augmented_arg aug_args[6];
+};
+
+struct beauty_payload_enter_map {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __type(key, int);
+ __type(value, struct beauty_payload_enter);
+ __uint(max_entries, 1);
+} beauty_payload_enter_map SEC(".maps");
+
static inline struct augmented_args_payload *augmented_args_payload(void)
{
int key = 0;
@@ -136,6 +157,11 @@ static inline int augmented__output(void *ctx, struct augmented_args_payload *ar
return bpf_perf_event_output(ctx, &__augmented_syscalls__, BPF_F_CURRENT_CPU, args, len);
}
+static inline int augmented__beauty_output(void *ctx, void *data, int len)
+{
+ return bpf_perf_event_output(ctx, &__augmented_syscalls__, BPF_F_CURRENT_CPU, data, len);
+}
+
static inline
unsigned int augmented_arg__read_str(struct augmented_arg *augmented_arg, const void *arg, unsigned int arg_len)
{
@@ -182,15 +208,17 @@ int sys_enter_connect(struct syscall_enter_args *args)
struct augmented_args_payload *augmented_args = augmented_args_payload();
const void *sockaddr_arg = (const void *)args->args[1];
unsigned int socklen = args->args[2];
- unsigned int len = sizeof(augmented_args->args);
+ unsigned int len = sizeof(u64) + sizeof(augmented_args->args); // the size + err in all 'augmented_arg' structs
if (augmented_args == NULL)
return 1; /* Failure: don't filter */
- _Static_assert(is_power_of_2(sizeof(augmented_args->saddr)), "sizeof(augmented_args->saddr) needs to be a power of two");
- socklen &= sizeof(augmented_args->saddr) - 1;
+ _Static_assert(is_power_of_2(sizeof(augmented_args->arg.saddr)), "sizeof(augmented_args->arg.saddr) needs to be a power of two");
+ socklen &= sizeof(augmented_args->arg.saddr) - 1;
- bpf_probe_read_user(&augmented_args->saddr, socklen, sockaddr_arg);
+ bpf_probe_read_user(&augmented_args->arg.saddr, socklen, sockaddr_arg);
+ augmented_args->arg.size = socklen;
+ augmented_args->arg.err = 0;
return augmented__output(args, augmented_args, len + socklen);
}
@@ -201,14 +229,14 @@ int sys_enter_sendto(struct syscall_enter_args *args)
struct augmented_args_payload *augmented_args = augmented_args_payload();
const void *sockaddr_arg = (const void *)args->args[4];
unsigned int socklen = args->args[5];
- unsigned int len = sizeof(augmented_args->args);
+ unsigned int len = sizeof(u64) + sizeof(augmented_args->args); // the size + err in all 'augmented_arg' structs
if (augmented_args == NULL)
return 1; /* Failure: don't filter */
- socklen &= sizeof(augmented_args->saddr) - 1;
+ socklen &= sizeof(augmented_args->arg.saddr) - 1;
- bpf_probe_read_user(&augmented_args->saddr, socklen, sockaddr_arg);
+ bpf_probe_read_user(&augmented_args->arg.saddr, socklen, sockaddr_arg);
return augmented__output(args, augmented_args, len + socklen);
}
@@ -249,30 +277,58 @@ int sys_enter_rename(struct syscall_enter_args *args)
struct augmented_args_payload *augmented_args = augmented_args_payload();
const void *oldpath_arg = (const void *)args->args[0],
*newpath_arg = (const void *)args->args[1];
- unsigned int len = sizeof(augmented_args->args), oldpath_len;
+ unsigned int len = sizeof(augmented_args->args), oldpath_len, newpath_len;
if (augmented_args == NULL)
return 1; /* Failure: don't filter */
+ len += 2 * sizeof(u64); // The overhead of size and err, just before the payload...
+
oldpath_len = augmented_arg__read_str(&augmented_args->arg, oldpath_arg, sizeof(augmented_args->arg.value));
- len += oldpath_len + augmented_arg__read_str((void *)(&augmented_args->arg) + oldpath_len, newpath_arg, sizeof(augmented_args->arg.value));
+ augmented_args->arg.size = PERF_ALIGN(oldpath_len + 1, sizeof(u64));
+ len += augmented_args->arg.size;
+
+ /* Every read from userspace is limited to value size */
+ if (augmented_args->arg.size > sizeof(augmented_args->arg.value))
+ return 1; /* Failure: don't filter */
+
+ struct augmented_arg *arg2 = (void *)&augmented_args->arg.value + augmented_args->arg.size;
+
+ newpath_len = augmented_arg__read_str(arg2, newpath_arg, sizeof(augmented_args->arg.value));
+ arg2->size = newpath_len;
+
+ len += newpath_len;
return augmented__output(args, augmented_args, len);
}
-SEC("tp/syscalls/sys_enter_renameat")
-int sys_enter_renameat(struct syscall_enter_args *args)
+SEC("tp/syscalls/sys_enter_renameat2")
+int sys_enter_renameat2(struct syscall_enter_args *args)
{
struct augmented_args_payload *augmented_args = augmented_args_payload();
const void *oldpath_arg = (const void *)args->args[1],
*newpath_arg = (const void *)args->args[3];
- unsigned int len = sizeof(augmented_args->args), oldpath_len;
+ unsigned int len = sizeof(augmented_args->args), oldpath_len, newpath_len;
if (augmented_args == NULL)
return 1; /* Failure: don't filter */
+ len += 2 * sizeof(u64); // The overhead of size and err, just before the payload...
+
oldpath_len = augmented_arg__read_str(&augmented_args->arg, oldpath_arg, sizeof(augmented_args->arg.value));
- len += oldpath_len + augmented_arg__read_str((void *)(&augmented_args->arg) + oldpath_len, newpath_arg, sizeof(augmented_args->arg.value));
+ augmented_args->arg.size = PERF_ALIGN(oldpath_len + 1, sizeof(u64));
+ len += augmented_args->arg.size;
+
+ /* Every read from userspace is limited to value size */
+ if (augmented_args->arg.size > sizeof(augmented_args->arg.value))
+ return 1; /* Failure: don't filter */
+
+ struct augmented_arg *arg2 = (void *)&augmented_args->arg.value + augmented_args->arg.size;
+
+ newpath_len = augmented_arg__read_str(arg2, newpath_arg, sizeof(augmented_args->arg.value));
+ arg2->size = newpath_len;
+
+ len += newpath_len;
return augmented__output(args, augmented_args, len);
}
@@ -293,26 +349,26 @@ int sys_enter_perf_event_open(struct syscall_enter_args *args)
{
struct augmented_args_payload *augmented_args = augmented_args_payload();
const struct perf_event_attr_size *attr = (const struct perf_event_attr_size *)args->args[0], *attr_read;
- unsigned int len = sizeof(augmented_args->args);
+ unsigned int len = sizeof(u64) + sizeof(augmented_args->args); // the size + err in all 'augmented_arg' structs
if (augmented_args == NULL)
goto failure;
- if (bpf_probe_read_user(&augmented_args->__data, sizeof(*attr), attr) < 0)
+ if (bpf_probe_read_user(&augmented_args->arg.value, sizeof(*attr), attr) < 0)
goto failure;
- attr_read = (const struct perf_event_attr_size *)augmented_args->__data;
+ attr_read = (const struct perf_event_attr_size *)augmented_args->arg.value;
__u32 size = attr_read->size;
if (!size)
size = PERF_ATTR_SIZE_VER0;
- if (size > sizeof(augmented_args->__data))
+ if (size > sizeof(augmented_args->arg.value))
goto failure;
// Now that we read attr->size and tested it against the size limits, read it completely
- if (bpf_probe_read_user(&augmented_args->__data, size, attr) < 0)
+ if (bpf_probe_read_user(&augmented_args->arg.value, size, attr) < 0)
goto failure;
return augmented__output(args, augmented_args, len + size);
@@ -325,16 +381,16 @@ int sys_enter_clock_nanosleep(struct syscall_enter_args *args)
{
struct augmented_args_payload *augmented_args = augmented_args_payload();
const void *rqtp_arg = (const void *)args->args[2];
- unsigned int len = sizeof(augmented_args->args);
+ unsigned int len = sizeof(u64) + sizeof(augmented_args->args); // the size + err in all 'augmented_arg' structs
__u32 size = sizeof(struct timespec64);
if (augmented_args == NULL)
goto failure;
- if (size > sizeof(augmented_args->__data))
+ if (size > sizeof(augmented_args->arg.value))
goto failure;
- bpf_probe_read_user(&augmented_args->__data, size, rqtp_arg);
+ bpf_probe_read_user(&augmented_args->arg.value, size, rqtp_arg);
return augmented__output(args, augmented_args, len + size);
failure:
@@ -352,10 +408,10 @@ int sys_enter_nanosleep(struct syscall_enter_args *args)
if (augmented_args == NULL)
goto failure;
- if (size > sizeof(augmented_args->__data))
+ if (size > sizeof(augmented_args->arg.value))
goto failure;
- bpf_probe_read_user(&augmented_args->__data, size, req_arg);
+ bpf_probe_read_user(&augmented_args->arg.value, size, req_arg);
return augmented__output(args, augmented_args, len + size);
failure:
@@ -372,6 +428,101 @@ static bool pid_filter__has(struct pids_filtered *pids, pid_t pid)
return bpf_map_lookup_elem(pids, &pid) != NULL;
}
+static int augment_sys_enter(void *ctx, struct syscall_enter_args *args)
+{
+ bool augmented, do_output = false;
+ int zero = 0, size, aug_size, index,
+ value_size = sizeof(struct augmented_arg) - offsetof(struct augmented_arg, value);
+ u64 output = 0; /* has to be u64, otherwise it won't pass the verifier */
+ unsigned int nr, *beauty_map;
+ struct beauty_payload_enter *payload;
+ void *arg, *payload_offset;
+
+ /* fall back to do predefined tail call */
+ if (args == NULL)
+ return 1;
+
+ /* use syscall number to get beauty_map entry */
+ nr = (__u32)args->syscall_nr;
+ beauty_map = bpf_map_lookup_elem(&beauty_map_enter, &nr);
+
+ /* set up payload for output */
+ payload = bpf_map_lookup_elem(&beauty_payload_enter_map, &zero);
+ payload_offset = (void *)&payload->aug_args;
+
+ if (beauty_map == NULL || payload == NULL)
+ return 1;
+
+ /* copy the sys_enter header, which has the syscall_nr */
+ __builtin_memcpy(&payload->args, args, sizeof(struct syscall_enter_args));
+
+ /*
+ * Determine what type of argument and how many bytes to read from user space, using the
+ * value in the beauty_map. This is the relation of parameter type and its corresponding
+ * value in the beauty map, and how many bytes we read eventually:
+ *
+ * string: 1 -> size of string
+ * struct: size of struct -> size of struct
+ * buffer: -1 * (index of paired len) -> value of paired len (maximum: TRACE_AUG_MAX_BUF)
+ */
+ for (int i = 0; i < 6; i++) {
+ arg = (void *)args->args[i];
+ augmented = false;
+ size = beauty_map[i];
+ aug_size = size; /* size of the augmented data read from user space */
+
+ if (size == 0 || arg == NULL)
+ continue;
+
+ if (size == 1) { /* string */
+ aug_size = bpf_probe_read_user_str(((struct augmented_arg *)payload_offset)->value, value_size, arg);
+ /* minimum of 0 to pass the verifier */
+ if (aug_size < 0)
+ aug_size = 0;
+
+ augmented = true;
+ } else if (size > 0 && size <= value_size) { /* struct */
+ if (!bpf_probe_read_user(((struct augmented_arg *)payload_offset)->value, size, arg))
+ augmented = true;
+ } else if (size < 0 && size >= -6) { /* buffer */
+ index = -(size + 1);
+ barrier_var(index); // Prevent clang (noticed with v18) from removing the &= 7 trick.
+ index &= 7; // Satisfy the bounds checking with the verifier in some kernels.
+ aug_size = args->args[index];
+
+ if (aug_size > TRACE_AUG_MAX_BUF)
+ aug_size = TRACE_AUG_MAX_BUF;
+
+ if (aug_size > 0) {
+ if (!bpf_probe_read_user(((struct augmented_arg *)payload_offset)->value, aug_size, arg))
+ augmented = true;
+ }
+ }
+
+ /* Augmented data size is limited to sizeof(augmented_arg->unnamed union with value field) */
+ if (aug_size > value_size)
+ aug_size = value_size;
+
+ /* write data to payload */
+ if (augmented) {
+ int written = offsetof(struct augmented_arg, value) + aug_size;
+
+ if (written < 0 || written > sizeof(struct augmented_arg))
+ return 1;
+
+ ((struct augmented_arg *)payload_offset)->size = aug_size;
+ output += written;
+ payload_offset += written;
+ do_output = true;
+ }
+ }
+
+ if (!do_output || (sizeof(struct syscall_enter_args) + output) > sizeof(struct beauty_payload_enter))
+ return 1;
+
+ return augmented__beauty_output(ctx, payload, sizeof(struct syscall_enter_args) + output);
+}
+
SEC("tp/raw_syscalls/sys_enter")
int sys_enter(struct syscall_enter_args *args)
{
@@ -400,7 +551,8 @@ int sys_enter(struct syscall_enter_args *args)
* "!raw_syscalls:unaugmented" that will just return 1 to return the
* unaugmented tracepoint payload.
*/
- bpf_tail_call(args, &syscalls_sys_enter, augmented_args->args.syscall_nr);
+ if (augment_sys_enter(args, &augmented_args->args))
+ bpf_tail_call(args, &syscalls_sys_enter, augmented_args->args.syscall_nr);
// If not found on the PROG_ARRAY syscalls map, then we're filtering it:
return 0;
diff --git a/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c b/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c
index 6a438e0102c5..57cab7647a9a 100644
--- a/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c
+++ b/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c
@@ -57,9 +57,9 @@ struct cgroup___old {
const volatile __u32 num_events = 1;
const volatile __u32 num_cpus = 1;
+const volatile int use_cgroup_v2 = 0;
int enabled = 0;
-int use_cgroup_v2 = 0;
int perf_subsys_id = -1;
static inline __u64 get_cgroup_v1_ancestor_id(struct cgroup *cgrp, int level)
diff --git a/tools/perf/util/bpf_skel/bperf_follower.bpf.c b/tools/perf/util/bpf_skel/bperf_follower.bpf.c
index f193998530d4..0595063139a3 100644
--- a/tools/perf/util/bpf_skel/bperf_follower.bpf.c
+++ b/tools/perf/util/bpf_skel/bperf_follower.bpf.c
@@ -5,6 +5,8 @@
#include <bpf/bpf_tracing.h>
#include "bperf_u.h"
+#define MAX_ENTRIES 102400
+
struct {
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
__uint(key_size, sizeof(__u32));
@@ -22,25 +24,29 @@ struct {
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__uint(key_size, sizeof(__u32));
- __uint(value_size, sizeof(__u32));
+ __uint(value_size, sizeof(struct bperf_filter_value));
+ __uint(max_entries, MAX_ENTRIES);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
} filter SEC(".maps");
enum bperf_filter_type type = 0;
int enabled = 0;
+int inherit;
SEC("fexit/XXX")
int BPF_PROG(fexit_XXX)
{
struct bpf_perf_event_value *diff_val, *accum_val;
__u32 filter_key, zero = 0;
- __u32 *accum_key;
+ __u32 accum_key;
+ struct bperf_filter_value *fval;
if (!enabled)
return 0;
switch (type) {
case BPERF_FILTER_GLOBAL:
- accum_key = &zero;
+ accum_key = zero;
goto do_add;
case BPERF_FILTER_CPU:
filter_key = bpf_get_smp_processor_id();
@@ -49,22 +55,34 @@ int BPF_PROG(fexit_XXX)
filter_key = bpf_get_current_pid_tgid() & 0xffffffff;
break;
case BPERF_FILTER_TGID:
- filter_key = bpf_get_current_pid_tgid() >> 32;
+ /* Use pid as the filter_key to exclude new task counts
+ * when inherit is disabled. Don't worry about the existing
+ * children in TGID losing their counts, bpf_counter has
+ * already added them to the filter map via perf_thread_map
+ * before this bpf prog runs.
+ */
+ filter_key = inherit ?
+ bpf_get_current_pid_tgid() >> 32 :
+ bpf_get_current_pid_tgid() & 0xffffffff;
break;
default:
return 0;
}
- accum_key = bpf_map_lookup_elem(&filter, &filter_key);
- if (!accum_key)
+ fval = bpf_map_lookup_elem(&filter, &filter_key);
+ if (!fval)
return 0;
+ accum_key = fval->accum_key;
+ if (fval->exited)
+ bpf_map_delete_elem(&filter, &filter_key);
+
do_add:
diff_val = bpf_map_lookup_elem(&diff_readings, &zero);
if (!diff_val)
return 0;
- accum_val = bpf_map_lookup_elem(&accum_readings, accum_key);
+ accum_val = bpf_map_lookup_elem(&accum_readings, &accum_key);
if (!accum_val)
return 0;
@@ -75,4 +93,70 @@ do_add:
return 0;
}
+/* The program is only used for PID or TGID filter types. */
+SEC("tp_btf/task_newtask")
+int BPF_PROG(on_newtask, struct task_struct *task, __u64 clone_flags)
+{
+ __u32 parent_key, child_key;
+ struct bperf_filter_value *parent_fval;
+ struct bperf_filter_value child_fval = { 0 };
+
+ if (!enabled)
+ return 0;
+
+ switch (type) {
+ case BPERF_FILTER_PID:
+ parent_key = bpf_get_current_pid_tgid() & 0xffffffff;
+ child_key = task->pid;
+ break;
+ case BPERF_FILTER_TGID:
+ parent_key = bpf_get_current_pid_tgid() >> 32;
+ child_key = task->tgid;
+ if (child_key == parent_key)
+ return 0;
+ break;
+ default:
+ return 0;
+ }
+
+ /* Check if the current task is one of the target tasks to be counted */
+ parent_fval = bpf_map_lookup_elem(&filter, &parent_key);
+ if (!parent_fval)
+ return 0;
+
+ /* Start counting for the new task by adding it into filter map,
+ * inherit the accum key of its parent task so that they can be
+ * counted together.
+ */
+ child_fval.accum_key = parent_fval->accum_key;
+ child_fval.exited = 0;
+ bpf_map_update_elem(&filter, &child_key, &child_fval, BPF_NOEXIST);
+
+ return 0;
+}
+
+/* The program is only used for PID or TGID filter types. */
+SEC("tp_btf/sched_process_exit")
+int BPF_PROG(on_exittask, struct task_struct *task)
+{
+ __u32 pid;
+ struct bperf_filter_value *fval;
+
+ if (!enabled)
+ return 0;
+
+ /* Stop counting for this task by removing it from filter map.
+ * For TGID type, if the pid can be found in the map, it means that
+ * this pid belongs to the leader task. After the task exits, the
+ * tgid of its child tasks (if any) will be 1, so the pid can be
+ * safely removed.
+ */
+ pid = task->pid;
+ fval = bpf_map_lookup_elem(&filter, &pid);
+ if (fval)
+ fval->exited = 1;
+
+ return 0;
+}
+
char LICENSE[] SEC("license") = "Dual BSD/GPL";
diff --git a/tools/perf/util/bpf_skel/bperf_u.h b/tools/perf/util/bpf_skel/bperf_u.h
index 1ce0c2c905c1..4a4a753980be 100644
--- a/tools/perf/util/bpf_skel/bperf_u.h
+++ b/tools/perf/util/bpf_skel/bperf_u.h
@@ -11,4 +11,9 @@ enum bperf_filter_type {
BPERF_FILTER_TGID,
};
+struct bperf_filter_value {
+ __u32 accum_key;
+ __u8 exited;
+};
+
#endif /* __BPERF_STAT_U_H */
diff --git a/tools/perf/util/bpf_skel/func_latency.bpf.c b/tools/perf/util/bpf_skel/func_latency.bpf.c
index 9d01e3af7479..f613dc9cb123 100644
--- a/tools/perf/util/bpf_skel/func_latency.bpf.c
+++ b/tools/perf/util/bpf_skel/func_latency.bpf.c
@@ -37,9 +37,10 @@ struct {
int enabled = 0;
-int has_cpu = 0;
-int has_task = 0;
-int use_nsec = 0;
+
+const volatile int has_cpu = 0;
+const volatile int has_task = 0;
+const volatile int use_nsec = 0;
SEC("kprobe/func")
int BPF_PROG(func_begin)
diff --git a/tools/perf/util/bpf_skel/kwork_top.bpf.c b/tools/perf/util/bpf_skel/kwork_top.bpf.c
index 84c15ccbab44..594da91965a2 100644
--- a/tools/perf/util/bpf_skel/kwork_top.bpf.c
+++ b/tools/perf/util/bpf_skel/kwork_top.bpf.c
@@ -84,7 +84,7 @@ struct {
int enabled = 0;
-int has_cpu_filter = 0;
+const volatile int has_cpu_filter = 0;
__u64 from_timestamp = 0;
__u64 to_timestamp = 0;
diff --git a/tools/perf/util/bpf_skel/kwork_trace.bpf.c b/tools/perf/util/bpf_skel/kwork_trace.bpf.c
index 063c124e0999..cbd79bc4b330 100644
--- a/tools/perf/util/bpf_skel/kwork_trace.bpf.c
+++ b/tools/perf/util/bpf_skel/kwork_trace.bpf.c
@@ -68,8 +68,9 @@ struct {
} perf_kwork_name_filter SEC(".maps");
int enabled = 0;
-int has_cpu_filter = 0;
-int has_name_filter = 0;
+
+const volatile int has_cpu_filter = 0;
+const volatile int has_name_filter = 0;
static __always_inline int local_strncmp(const char *s1,
unsigned int sz, const char *s2)
diff --git a/tools/perf/util/bpf_skel/lock_contention.bpf.c b/tools/perf/util/bpf_skel/lock_contention.bpf.c
index d931a898c434..1069bda5d733 100644
--- a/tools/perf/util/bpf_skel/lock_contention.bpf.c
+++ b/tools/perf/util/bpf_skel/lock_contention.bpf.c
@@ -117,21 +117,22 @@ struct mm_struct___new {
} __attribute__((preserve_access_index));
/* control flags */
-int enabled;
-int has_cpu;
-int has_task;
-int has_type;
-int has_addr;
-int has_cgroup;
-int needs_callstack;
-int stack_skip;
-int lock_owner;
-
-int use_cgroup_v2;
-int perf_subsys_id = -1;
+const volatile int has_cpu;
+const volatile int has_task;
+const volatile int has_type;
+const volatile int has_addr;
+const volatile int has_cgroup;
+const volatile int needs_callstack;
+const volatile int stack_skip;
+const volatile int lock_owner;
+const volatile int use_cgroup_v2;
/* determine the key of lock stat */
-int aggr_mode;
+const volatile int aggr_mode;
+
+int enabled;
+
+int perf_subsys_id = -1;
__u64 end_ts;
@@ -323,8 +324,7 @@ static inline struct tstamp_data *get_tstamp_elem(__u32 flags)
struct tstamp_data *pelem;
/* Use per-cpu array map for spinlock and rwlock */
- if (flags == (LCB_F_SPIN | LCB_F_READ) || flags == LCB_F_SPIN ||
- flags == (LCB_F_SPIN | LCB_F_WRITE)) {
+ if ((flags & (LCB_F_SPIN | LCB_F_MUTEX)) == LCB_F_SPIN) {
__u32 idx = 0;
pelem = bpf_map_lookup_elem(&tstamp_cpu, &idx);
@@ -439,11 +439,8 @@ int contention_end(u64 *ctx)
duration = bpf_ktime_get_ns() - pelem->timestamp;
if ((__s64)duration < 0) {
- pelem->lock = 0;
- if (need_delete)
- bpf_map_delete_elem(&tstamp, &pid);
__sync_fetch_and_add(&time_fail, 1);
- return 0;
+ goto out;
}
switch (aggr_mode) {
@@ -477,11 +474,8 @@ int contention_end(u64 *ctx)
data = bpf_map_lookup_elem(&lock_stat, &key);
if (!data) {
if (data_map_full) {
- pelem->lock = 0;
- if (need_delete)
- bpf_map_delete_elem(&tstamp, &pid);
__sync_fetch_and_add(&data_fail, 1);
- return 0;
+ goto out;
}
struct contention_data first = {
@@ -498,16 +492,20 @@ int contention_end(u64 *ctx)
err = bpf_map_update_elem(&lock_stat, &key, &first, BPF_NOEXIST);
if (err < 0) {
+ if (err == -EEXIST) {
+ /* it lost the race, try to get it again */
+ data = bpf_map_lookup_elem(&lock_stat, &key);
+ if (data != NULL)
+ goto found;
+ }
if (err == -E2BIG)
data_map_full = 1;
__sync_fetch_and_add(&data_fail, 1);
}
- pelem->lock = 0;
- if (need_delete)
- bpf_map_delete_elem(&tstamp, &pid);
- return 0;
+ goto out;
}
+found:
__sync_fetch_and_add(&data->total_time, duration);
__sync_fetch_and_add(&data->count, 1);
@@ -517,6 +515,7 @@ int contention_end(u64 *ctx)
if (data->min_time > duration)
data->min_time = duration;
+out:
pelem->lock = 0;
if (need_delete)
bpf_map_delete_elem(&tstamp, &pid);
diff --git a/tools/perf/util/bpf_skel/lock_data.h b/tools/perf/util/bpf_skel/lock_data.h
index 36af11faad03..de12892f992f 100644
--- a/tools/perf/util/bpf_skel/lock_data.h
+++ b/tools/perf/util/bpf_skel/lock_data.h
@@ -7,11 +7,11 @@ struct tstamp_data {
u64 timestamp;
u64 lock;
u32 flags;
- u32 stack_id;
+ s32 stack_id;
};
struct contention_key {
- u32 stack_id;
+ s32 stack_id;
u32 pid;
u64 lock_addr_or_cgroup;
};
diff --git a/tools/perf/util/bpf_skel/off_cpu.bpf.c b/tools/perf/util/bpf_skel/off_cpu.bpf.c
index d877a0a9731f..c152116df72f 100644
--- a/tools/perf/util/bpf_skel/off_cpu.bpf.c
+++ b/tools/perf/util/bpf_skel/off_cpu.bpf.c
@@ -85,10 +85,11 @@ struct task_struct___old {
} __attribute__((preserve_access_index));
int enabled = 0;
-int has_cpu = 0;
-int has_task = 0;
-int has_cgroup = 0;
-int uses_tgid = 0;
+
+const volatile int has_cpu = 0;
+const volatile int has_task = 0;
+const volatile int has_cgroup = 0;
+const volatile int uses_tgid = 0;
const volatile bool has_prev_state = false;
const volatile bool needs_cgroup = false;
diff --git a/tools/perf/util/bpf_skel/sample-filter.h b/tools/perf/util/bpf_skel/sample-filter.h
index 350efa121026..683fec85e71e 100644
--- a/tools/perf/util/bpf_skel/sample-filter.h
+++ b/tools/perf/util/bpf_skel/sample-filter.h
@@ -1,7 +1,9 @@
#ifndef PERF_UTIL_BPF_SKEL_SAMPLE_FILTER_H
#define PERF_UTIL_BPF_SKEL_SAMPLE_FILTER_H
-#define MAX_FILTERS 64
+#define MAX_FILTERS 64
+#define MAX_IDX_HASH (16 * 1024)
+#define MAX_EVT_HASH (1024 * 1024)
/* supported filter operations */
enum perf_bpf_filter_op {
@@ -14,6 +16,7 @@ enum perf_bpf_filter_op {
PBF_OP_AND,
PBF_OP_GROUP_BEGIN,
PBF_OP_GROUP_END,
+ PBF_OP_DONE,
};
enum perf_bpf_filter_term {
@@ -42,7 +45,7 @@ enum perf_bpf_filter_term {
__PBF_UNUSED_TERM18 = PBF_TERM_SAMPLE_START + 18, /* SAMPLE_REGS_INTR = 1U << 18 */
PBF_TERM_PHYS_ADDR = PBF_TERM_SAMPLE_START + 19, /* SAMPLE_PHYS_ADDR = 1U << 19 */
__PBF_UNUSED_TERM20 = PBF_TERM_SAMPLE_START + 20, /* SAMPLE_AUX = 1U << 20 */
- __PBF_UNUSED_TERM21 = PBF_TERM_SAMPLE_START + 21, /* SAMPLE_CGROUP = 1U << 21 */
+ PBF_TERM_CGROUP = PBF_TERM_SAMPLE_START + 21, /* SAMPLE_CGROUP = 1U << 21 */
PBF_TERM_DATA_PAGE_SIZE = PBF_TERM_SAMPLE_START + 22, /* SAMPLE_DATA_PAGE_SIZE = 1U << 22 */
PBF_TERM_CODE_PAGE_SIZE = PBF_TERM_SAMPLE_START + 23, /* SAMPLE_CODE_PAGE_SIZE = 1U << 23 */
PBF_TERM_WEIGHT_STRUCT = PBF_TERM_SAMPLE_START + 24, /* SAMPLE_WEIGHT_STRUCT = 1U << 24 */
@@ -60,4 +63,10 @@ struct perf_bpf_filter_entry {
__u64 value;
};
+struct idx_hash_key {
+ __u64 evt_id;
+ __u32 tgid;
+ __u32 reserved;
+};
+
#endif /* PERF_UTIL_BPF_SKEL_SAMPLE_FILTER_H */
diff --git a/tools/perf/util/bpf_skel/sample_filter.bpf.c b/tools/perf/util/bpf_skel/sample_filter.bpf.c
index f59985101973..b195e6efeb8b 100644
--- a/tools/perf/util/bpf_skel/sample_filter.bpf.c
+++ b/tools/perf/util/bpf_skel/sample_filter.bpf.c
@@ -9,13 +9,41 @@
/* BPF map that will be filled by user space */
struct filters {
- __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(type, BPF_MAP_TYPE_HASH);
__type(key, int);
- __type(value, struct perf_bpf_filter_entry);
- __uint(max_entries, MAX_FILTERS);
+ __type(value, struct perf_bpf_filter_entry[MAX_FILTERS]);
+ __uint(max_entries, 1);
} filters SEC(".maps");
-int dropped;
+/*
+ * An evsel has multiple instances for each CPU or task but we need a single
+ * id to be used as a key for the idx_hash. This hashmap would translate the
+ * instance's ID to a representative ID.
+ */
+struct event_hash {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, __u64);
+ __type(value, __u64);
+ __uint(max_entries, 1);
+} event_hash SEC(".maps");
+
+/* tgid/evtid to filter index */
+struct idx_hash {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, struct idx_hash_key);
+ __type(value, int);
+ __uint(max_entries, 1);
+} idx_hash SEC(".maps");
+
+/* tgid to filter index */
+struct lost_count {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, int);
+ __uint(max_entries, 1);
+} dropped SEC(".maps");
+
+volatile const int use_idx_hash;
void *bpf_cast_to_kern_ctx(void *) __ksym;
@@ -65,6 +93,7 @@ static inline __u64 perf_get_sample(struct bpf_perf_event_data_kern *kctx,
BUILD_CHECK_SAMPLE(DATA_SRC);
BUILD_CHECK_SAMPLE(TRANSACTION);
BUILD_CHECK_SAMPLE(PHYS_ADDR);
+ BUILD_CHECK_SAMPLE(CGROUP);
BUILD_CHECK_SAMPLE(DATA_PAGE_SIZE);
BUILD_CHECK_SAMPLE(CODE_PAGE_SIZE);
BUILD_CHECK_SAMPLE(WEIGHT_STRUCT);
@@ -107,6 +136,8 @@ static inline __u64 perf_get_sample(struct bpf_perf_event_data_kern *kctx,
return kctx->data->weight.full;
case PBF_TERM_PHYS_ADDR:
return kctx->data->phys_addr;
+ case PBF_TERM_CGROUP:
+ return kctx->data->cgroup;
case PBF_TERM_CODE_PAGE_SIZE:
return kctx->data->code_page_size;
case PBF_TERM_DATA_PAGE_SIZE:
@@ -155,7 +186,6 @@ static inline __u64 perf_get_sample(struct bpf_perf_event_data_kern *kctx,
case __PBF_UNUSED_TERM16:
case __PBF_UNUSED_TERM18:
case __PBF_UNUSED_TERM20:
- case __PBF_UNUSED_TERM21:
default:
break;
}
@@ -179,39 +209,66 @@ int perf_sample_filter(void *ctx)
__u64 sample_data;
int in_group = 0;
int group_result = 0;
- int i;
+ int i, k;
+ int *losts;
kctx = bpf_cast_to_kern_ctx(ctx);
- for (i = 0; i < MAX_FILTERS; i++) {
- int key = i; /* needed for verifier :( */
+ k = 0;
- entry = bpf_map_lookup_elem(&filters, &key);
- if (entry == NULL)
- break;
- sample_data = perf_get_sample(kctx, entry);
+ if (use_idx_hash) {
+ struct idx_hash_key key = {
+ .tgid = bpf_get_current_pid_tgid() >> 32,
+ };
+ __u64 eid = kctx->event->id;
+ __u64 *key_id;
+ int *idx;
+
+ /* get primary_event_id */
+ if (kctx->event->parent)
+ eid = kctx->event->parent->id;
- switch (entry->op) {
+ key_id = bpf_map_lookup_elem(&event_hash, &eid);
+ if (key_id == NULL)
+ goto drop;
+
+ key.evt_id = *key_id;
+
+ idx = bpf_map_lookup_elem(&idx_hash, &key);
+ if (idx)
+ k = *idx;
+ else
+ goto drop;
+ }
+
+ entry = bpf_map_lookup_elem(&filters, &k);
+ if (entry == NULL)
+ goto drop;
+
+ for (i = 0; i < MAX_FILTERS; i++) {
+ sample_data = perf_get_sample(kctx, &entry[i]);
+
+ switch (entry[i].op) {
case PBF_OP_EQ:
- CHECK_RESULT(sample_data, ==, entry->value)
+ CHECK_RESULT(sample_data, ==, entry[i].value)
break;
case PBF_OP_NEQ:
- CHECK_RESULT(sample_data, !=, entry->value)
+ CHECK_RESULT(sample_data, !=, entry[i].value)
break;
case PBF_OP_GT:
- CHECK_RESULT(sample_data, >, entry->value)
+ CHECK_RESULT(sample_data, >, entry[i].value)
break;
case PBF_OP_GE:
- CHECK_RESULT(sample_data, >=, entry->value)
+ CHECK_RESULT(sample_data, >=, entry[i].value)
break;
case PBF_OP_LT:
- CHECK_RESULT(sample_data, <, entry->value)
+ CHECK_RESULT(sample_data, <, entry[i].value)
break;
case PBF_OP_LE:
- CHECK_RESULT(sample_data, <=, entry->value)
+ CHECK_RESULT(sample_data, <=, entry[i].value)
break;
case PBF_OP_AND:
- CHECK_RESULT(sample_data, &, entry->value)
+ CHECK_RESULT(sample_data, &, entry[i].value)
break;
case PBF_OP_GROUP_BEGIN:
in_group = 1;
@@ -222,13 +279,19 @@ int perf_sample_filter(void *ctx)
goto drop;
in_group = 0;
break;
+ case PBF_OP_DONE:
+ /* no failures so far, accept it */
+ return 1;
}
}
/* generate sample data */
return 1;
drop:
- __sync_fetch_and_add(&dropped, 1);
+ losts = bpf_map_lookup_elem(&dropped, &k);
+ if (losts != NULL)
+ __sync_fetch_and_add(losts, 1);
+
return 0;
}
diff --git a/tools/perf/util/bpf_skel/vmlinux/vmlinux.h b/tools/perf/util/bpf_skel/vmlinux/vmlinux.h
index e9028235d771..4dcad7b682bd 100644
--- a/tools/perf/util/bpf_skel/vmlinux/vmlinux.h
+++ b/tools/perf/util/bpf_skel/vmlinux/vmlinux.h
@@ -15,6 +15,7 @@
typedef __u8 u8;
typedef __u32 u32;
+typedef __s32 s32;
typedef __u64 u64;
typedef __s64 s64;
@@ -170,10 +171,16 @@ struct perf_sample_data {
u32 cpu;
} cpu_entry;
u64 phys_addr;
+ u64 cgroup;
u64 data_page_size;
u64 code_page_size;
} __attribute__((__aligned__(64))) __attribute__((preserve_access_index));
+struct perf_event {
+ struct perf_event *parent;
+ u64 id;
+} __attribute__((preserve_access_index));
+
struct bpf_perf_event_data_kern {
struct perf_sample_data *data;
struct perf_event *event;
diff --git a/tools/perf/util/branch.h b/tools/perf/util/branch.h
index 87704d713ff6..b80c12c74bbb 100644
--- a/tools/perf/util/branch.h
+++ b/tools/perf/util/branch.h
@@ -34,6 +34,7 @@ struct branch_info {
struct addr_map_symbol from;
struct addr_map_symbol to;
struct branch_flags flags;
+ u64 branch_stack_cntr;
char *srcline_from;
char *srcline_to;
};
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index 83a1581e8cf1..e763e8d99a43 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -42,7 +42,7 @@
static bool no_buildid_cache;
-int build_id__mark_dso_hit(struct perf_tool *tool __maybe_unused,
+int build_id__mark_dso_hit(const struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample,
struct evsel *evsel __maybe_unused,
@@ -67,38 +67,6 @@ int build_id__mark_dso_hit(struct perf_tool *tool __maybe_unused,
return 0;
}
-static int perf_event__exit_del_thread(struct perf_tool *tool __maybe_unused,
- union perf_event *event,
- struct perf_sample *sample
- __maybe_unused,
- struct machine *machine)
-{
- struct thread *thread = machine__findnew_thread(machine,
- event->fork.pid,
- event->fork.tid);
-
- dump_printf("(%d:%d):(%d:%d)\n", event->fork.pid, event->fork.tid,
- event->fork.ppid, event->fork.ptid);
-
- if (thread) {
- machine__remove_thread(machine, thread);
- thread__put(thread);
- }
-
- return 0;
-}
-
-struct perf_tool build_id__mark_dso_hit_ops = {
- .sample = build_id__mark_dso_hit,
- .mmap = perf_event__process_mmap,
- .mmap2 = perf_event__process_mmap2,
- .fork = perf_event__process_fork,
- .exit = perf_event__exit_del_thread,
- .attr = perf_event__process_attr,
- .build_id = perf_event__process_build_id,
- .ordered_events = true,
-};
-
int build_id__sprintf(const struct build_id *build_id, char *bf)
{
char *bid = bf;
@@ -310,7 +278,7 @@ static int write_buildid(const char *name, size_t name_len, struct build_id *bid
size_t len;
len = name_len + 1;
- len = PERF_ALIGN(len, NAME_ALIGN);
+ len = PERF_ALIGN(len, sizeof(u64));
memset(&b, 0, sizeof(b));
memcpy(&b.data, bid->data, bid->size);
diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h
index 3fa8bffb07ca..a212497bfdb0 100644
--- a/tools/perf/util/build-id.h
+++ b/tools/perf/util/build-id.h
@@ -16,11 +16,9 @@ struct build_id {
size_t size;
};
-struct nsinfo;
-
-extern struct perf_tool build_id__mark_dso_hit_ops;
struct dso;
struct feat_fd;
+struct nsinfo;
void build_id__init(struct build_id *bid, const u8 *data, size_t size);
int build_id__sprintf(const struct build_id *build_id, char *bf);
@@ -35,11 +33,11 @@ char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size,
char *__dso__build_id_filename(const struct dso *dso, char *bf, size_t size,
bool is_debug, bool is_kallsyms);
-int build_id__mark_dso_hit(struct perf_tool *tool, union perf_event *event,
+int build_id__mark_dso_hit(const struct perf_tool *tool, union perf_event *event,
struct perf_sample *sample, struct evsel *evsel,
struct machine *machine);
-int perf_event__inject_buildid(struct perf_tool *tool, union perf_event *event,
+int perf_event__inject_buildid(const struct perf_tool *tool, union perf_event *event,
struct perf_sample *sample, struct evsel *evsel,
struct machine *machine);
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 6d075648d2cc..0c7564747a14 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -1797,3 +1797,38 @@ s64 callchain_avg_cycles(struct callchain_node *cnode)
return cycles;
}
+
+int sample__for_each_callchain_node(struct thread *thread, struct evsel *evsel,
+ struct perf_sample *sample, int max_stack,
+ bool symbols, callchain_iter_fn cb, void *data)
+{
+ struct callchain_cursor *cursor = get_tls_callchain_cursor();
+ int ret;
+
+ if (!cursor)
+ return -ENOMEM;
+
+ /* Fill in the callchain. */
+ ret = __thread__resolve_callchain(thread, cursor, evsel, sample,
+ /*parent=*/NULL, /*root_al=*/NULL,
+ max_stack, symbols);
+ if (ret)
+ return ret;
+
+ /* Switch from writing the callchain to reading it. */
+ callchain_cursor_commit(cursor);
+
+ while (1) {
+ struct callchain_cursor_node *node = callchain_cursor_current(cursor);
+
+ if (!node)
+ break;
+
+ ret = cb(node, data);
+ if (ret)
+ return ret;
+
+ callchain_cursor_advance(cursor);
+ }
+ return 0;
+}
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index d5c66345ae31..86ed9e4d04f9 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -311,4 +311,10 @@ u64 callchain_total_hits(struct hists *hists);
s64 callchain_avg_cycles(struct callchain_node *cnode);
+typedef int (*callchain_iter_fn)(struct callchain_cursor_node *node, void *data);
+
+int sample__for_each_callchain_node(struct thread *thread, struct evsel *evsel,
+ struct perf_sample *sample, int max_stack,
+ bool symbols, callchain_iter_fn cb, void *data);
+
#endif /* __PERF_CALLCHAIN_H */
diff --git a/tools/perf/util/cap.c b/tools/perf/util/cap.c
index c3ba841bbf37..69d9a2bcd40b 100644
--- a/tools/perf/util/cap.c
+++ b/tools/perf/util/cap.c
@@ -3,27 +3,48 @@
* Capability utilities
*/
-#ifdef HAVE_LIBCAP_SUPPORT
-
#include "cap.h"
-#include <stdbool.h>
-#include <sys/capability.h>
-
-bool perf_cap__capable(cap_value_t cap)
-{
- cap_flag_value_t val;
- cap_t caps = cap_get_proc();
-
- if (!caps)
- return false;
+#include "debug.h"
+#include <errno.h>
+#include <string.h>
+#include <linux/capability.h>
+#include <sys/syscall.h>
+#include <unistd.h>
- if (cap_get_flag(caps, cap, CAP_EFFECTIVE, &val) != 0)
- val = CAP_CLEAR;
+#define MAX_LINUX_CAPABILITY_U32S _LINUX_CAPABILITY_U32S_3
- if (cap_free(caps) != 0)
- return false;
-
- return val == CAP_SET;
+bool perf_cap__capable(int cap, bool *used_root)
+{
+ struct __user_cap_header_struct header = {
+ .version = _LINUX_CAPABILITY_VERSION_3,
+ .pid = 0,
+ };
+ struct __user_cap_data_struct data[MAX_LINUX_CAPABILITY_U32S] = {};
+ __u32 cap_val;
+
+ *used_root = false;
+ while (syscall(SYS_capget, &header, &data[0]) == -1) {
+ /* Retry, first attempt has set the header.version correctly. */
+ if (errno == EINVAL && header.version != _LINUX_CAPABILITY_VERSION_3 &&
+ header.version == _LINUX_CAPABILITY_VERSION_1)
+ continue;
+
+ pr_debug2("capget syscall failed (%s - %d) fall back on root check\n",
+ strerror(errno), errno);
+ *used_root = true;
+ return geteuid() == 0;
+ }
+
+ /* Extract the relevant capability bit. */
+ if (cap >= 32) {
+ if (header.version == _LINUX_CAPABILITY_VERSION_3) {
+ cap_val = data[1].effective;
+ } else {
+ /* Capability beyond 32 is requested but only 32 are supported. */
+ return false;
+ }
+ } else {
+ cap_val = data[0].effective;
+ }
+ return (cap_val & (1 << (cap & 0x1f))) != 0;
}
-
-#endif /* HAVE_LIBCAP_SUPPORT */
diff --git a/tools/perf/util/cap.h b/tools/perf/util/cap.h
index ae52878c0b2e..0c6a1ff55f07 100644
--- a/tools/perf/util/cap.h
+++ b/tools/perf/util/cap.h
@@ -3,26 +3,6 @@
#define __PERF_CAP_H
#include <stdbool.h>
-#include <linux/capability.h>
-#include <linux/compiler.h>
-
-#ifdef HAVE_LIBCAP_SUPPORT
-
-#include <sys/capability.h>
-
-bool perf_cap__capable(cap_value_t cap);
-
-#else
-
-#include <unistd.h>
-#include <sys/types.h>
-
-static inline bool perf_cap__capable(int cap __maybe_unused)
-{
- return geteuid() == 0;
-}
-
-#endif /* HAVE_LIBCAP_SUPPORT */
/* For older systems */
#ifndef CAP_SYSLOG
@@ -33,4 +13,7 @@ static inline bool perf_cap__capable(int cap __maybe_unused)
#define CAP_PERFMON 38
#endif
+/* Query if a capability is supported, used_root is set if the fallback root check was used. */
+bool perf_cap__capable(int cap, bool *used_root);
+
#endif /* __PERF_CAP_H */
diff --git a/tools/perf/util/color.c b/tools/perf/util/color.c
index bffbdd216a6a..e51f0a676a22 100644
--- a/tools/perf/util/color.c
+++ b/tools/perf/util/color.c
@@ -93,34 +93,6 @@ int color_fprintf(FILE *fp, const char *color, const char *fmt, ...)
return r;
}
-/*
- * This function splits the buffer by newlines and colors the lines individually.
- *
- * Returns 0 on success.
- */
-int color_fwrite_lines(FILE *fp, const char *color,
- size_t count, const char *buf)
-{
- if (!*color)
- return fwrite(buf, count, 1, fp) != 1;
-
- while (count) {
- char *p = memchr(buf, '\n', count);
-
- if (p != buf && (fputs(color, fp) < 0 ||
- fwrite(buf, p ? (size_t)(p - buf) : count, 1, fp) != 1 ||
- fputs(PERF_COLOR_RESET, fp) < 0))
- return -1;
- if (!p)
- return 0;
- if (fputc('\n', fp) < 0)
- return -1;
- count -= p + 1 - buf;
- buf = p + 1;
- }
- return 0;
-}
-
const char *get_percent_color(double percent)
{
const char *color = PERF_COLOR_NORMAL;
diff --git a/tools/perf/util/color.h b/tools/perf/util/color.h
index 01f7bed21c9b..9a7248dbe2d7 100644
--- a/tools/perf/util/color.h
+++ b/tools/perf/util/color.h
@@ -2,6 +2,7 @@
#ifndef __PERF_COLOR_H
#define __PERF_COLOR_H
+#include <linux/compiler.h>
#include <stdio.h>
#include <stdarg.h>
@@ -22,6 +23,7 @@
#define MIN_GREEN 0.5
#define MIN_RED 5.0
+#define PERF_COLOR_DELETE_LINE "\033[A\33[2K\r"
/*
* This variable stores the value of color.ui
*/
@@ -37,12 +39,11 @@ int perf_config_colorbool(const char *var, const char *value, int stdout_is_tty)
int color_vsnprintf(char *bf, size_t size, const char *color,
const char *fmt, va_list args);
int color_vfprintf(FILE *fp, const char *color, const char *fmt, va_list args);
-int color_fprintf(FILE *fp, const char *color, const char *fmt, ...);
-int color_snprintf(char *bf, size_t size, const char *color, const char *fmt, ...);
-int color_fwrite_lines(FILE *fp, const char *color, size_t count, const char *buf);
+int color_fprintf(FILE *fp, const char *color, const char *fmt, ...) __printf(3, 4);
+int color_snprintf(char *bf, size_t size, const char *color, const char *fmt, ...) __printf(4, 5);
int value_color_snprintf(char *bf, size_t size, const char *fmt, double value);
-int percent_color_snprintf(char *bf, size_t size, const char *fmt, ...);
-int percent_color_len_snprintf(char *bf, size_t size, const char *fmt, ...);
+int percent_color_snprintf(char *bf, size_t size, const char *fmt, ...) __printf(3, 4);
+int percent_color_len_snprintf(char *bf, size_t size, const char *fmt, ...) __printf(3, 4);
int percent_color_fprintf(FILE *fp, const char *fmt, double percent);
const char *get_percent_color(double percent);
diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c
index 7a650de0db83..68f9407ca74b 100644
--- a/tools/perf/util/config.c
+++ b/tools/perf/util/config.c
@@ -912,6 +912,7 @@ void set_buildid_dir(const char *dir)
struct perf_config_scan_data {
const char *name;
const char *fmt;
+ const char *value;
va_list args;
int ret;
};
@@ -939,3 +940,24 @@ int perf_config_scan(const char *name, const char *fmt, ...)
return d.ret;
}
+
+static int perf_config_get_cb(const char *var, const char *value, void *data)
+{
+ struct perf_config_scan_data *d = data;
+
+ if (!strcmp(var, d->name))
+ d->value = value;
+
+ return 0;
+}
+
+const char *perf_config_get(const char *name)
+{
+ struct perf_config_scan_data d = {
+ .name = name,
+ .value = NULL,
+ };
+
+ perf_config(perf_config_get_cb, &d);
+ return d.value;
+}
diff --git a/tools/perf/util/config.h b/tools/perf/util/config.h
index 2e5e808928a5..9971313d61c1 100644
--- a/tools/perf/util/config.h
+++ b/tools/perf/util/config.h
@@ -30,6 +30,7 @@ typedef int (*config_fn_t)(const char *, const char *, void *);
int perf_default_config(const char *, const char *, void *);
int perf_config(config_fn_t fn, void *);
int perf_config_scan(const char *name, const char *fmt, ...) __scanf(2, 3);
+const char *perf_config_get(const char *name);
int perf_config_set(struct perf_config_set *set,
config_fn_t fn, void *data);
int perf_config_int(int *dest, const char *, const char *);
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
index e917985bbbe6..b85a8837bddc 100644
--- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
+++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
@@ -41,7 +41,7 @@ const u32 INSTR_PER_NS = 10;
struct cs_etm_decoder {
void *data;
- void (*packet_printer)(const char *msg);
+ void (*packet_printer)(const char *msg, void *data);
bool suppress_printing;
dcd_tree_handle_t dcd_tree;
cs_etm_mem_cb_type mem_access;
@@ -202,7 +202,7 @@ static void cs_etm_decoder__print_str_cb(const void *p_context,
const struct cs_etm_decoder *decoder = p_context;
if (p_context && str_len && !decoder->suppress_printing)
- decoder->packet_printer(msg);
+ decoder->packet_printer(msg, decoder->data);
}
static int
@@ -388,7 +388,8 @@ cs_etm_decoder__reset_timestamp(struct cs_etm_packet_queue *packet_queue)
}
static ocsd_datapath_resp_t
-cs_etm_decoder__buffer_packet(struct cs_etm_packet_queue *packet_queue,
+cs_etm_decoder__buffer_packet(struct cs_etm_queue *etmq,
+ struct cs_etm_packet_queue *packet_queue,
const u8 trace_chan_id,
enum cs_etm_sample_type sample_type)
{
@@ -398,7 +399,7 @@ cs_etm_decoder__buffer_packet(struct cs_etm_packet_queue *packet_queue,
if (packet_queue->packet_count >= CS_ETM_PACKET_MAX_BUFFER - 1)
return OCSD_RESP_FATAL_SYS_ERR;
- if (cs_etm__get_cpu(trace_chan_id, &cpu) < 0)
+ if (cs_etm__get_cpu(etmq, trace_chan_id, &cpu) < 0)
return OCSD_RESP_FATAL_SYS_ERR;
et = packet_queue->tail;
@@ -436,7 +437,7 @@ cs_etm_decoder__buffer_range(struct cs_etm_queue *etmq,
int ret = 0;
struct cs_etm_packet *packet;
- ret = cs_etm_decoder__buffer_packet(packet_queue, trace_chan_id,
+ ret = cs_etm_decoder__buffer_packet(etmq, packet_queue, trace_chan_id,
CS_ETM_RANGE);
if (ret != OCSD_RESP_CONT && ret != OCSD_RESP_WAIT)
return ret;
@@ -496,7 +497,8 @@ out:
}
static ocsd_datapath_resp_t
-cs_etm_decoder__buffer_discontinuity(struct cs_etm_packet_queue *queue,
+cs_etm_decoder__buffer_discontinuity(struct cs_etm_queue *etmq,
+ struct cs_etm_packet_queue *queue,
const uint8_t trace_chan_id)
{
/*
@@ -504,18 +506,19 @@ cs_etm_decoder__buffer_discontinuity(struct cs_etm_packet_queue *queue,
* reset time statistics.
*/
cs_etm_decoder__reset_timestamp(queue);
- return cs_etm_decoder__buffer_packet(queue, trace_chan_id,
+ return cs_etm_decoder__buffer_packet(etmq, queue, trace_chan_id,
CS_ETM_DISCONTINUITY);
}
static ocsd_datapath_resp_t
-cs_etm_decoder__buffer_exception(struct cs_etm_packet_queue *queue,
+cs_etm_decoder__buffer_exception(struct cs_etm_queue *etmq,
+ struct cs_etm_packet_queue *queue,
const ocsd_generic_trace_elem *elem,
const uint8_t trace_chan_id)
{ int ret = 0;
struct cs_etm_packet *packet;
- ret = cs_etm_decoder__buffer_packet(queue, trace_chan_id,
+ ret = cs_etm_decoder__buffer_packet(etmq, queue, trace_chan_id,
CS_ETM_EXCEPTION);
if (ret != OCSD_RESP_CONT && ret != OCSD_RESP_WAIT)
return ret;
@@ -527,10 +530,11 @@ cs_etm_decoder__buffer_exception(struct cs_etm_packet_queue *queue,
}
static ocsd_datapath_resp_t
-cs_etm_decoder__buffer_exception_ret(struct cs_etm_packet_queue *queue,
+cs_etm_decoder__buffer_exception_ret(struct cs_etm_queue *etmq,
+ struct cs_etm_packet_queue *queue,
const uint8_t trace_chan_id)
{
- return cs_etm_decoder__buffer_packet(queue, trace_chan_id,
+ return cs_etm_decoder__buffer_packet(etmq, queue, trace_chan_id,
CS_ETM_EXCEPTION_RET);
}
@@ -599,7 +603,7 @@ static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer(
case OCSD_GEN_TRC_ELEM_EO_TRACE:
case OCSD_GEN_TRC_ELEM_NO_SYNC:
case OCSD_GEN_TRC_ELEM_TRACE_ON:
- resp = cs_etm_decoder__buffer_discontinuity(packet_queue,
+ resp = cs_etm_decoder__buffer_discontinuity(etmq, packet_queue,
trace_chan_id);
break;
case OCSD_GEN_TRC_ELEM_INSTR_RANGE:
@@ -607,11 +611,11 @@ static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer(
trace_chan_id);
break;
case OCSD_GEN_TRC_ELEM_EXCEPTION:
- resp = cs_etm_decoder__buffer_exception(packet_queue, elem,
+ resp = cs_etm_decoder__buffer_exception(etmq, packet_queue, elem,
trace_chan_id);
break;
case OCSD_GEN_TRC_ELEM_EXCEPTION_RET:
- resp = cs_etm_decoder__buffer_exception_ret(packet_queue,
+ resp = cs_etm_decoder__buffer_exception_ret(etmq, packet_queue,
trace_chan_id);
break;
case OCSD_GEN_TRC_ELEM_TIMESTAMP:
@@ -680,14 +684,15 @@ cs_etm_decoder__create_etm_decoder(struct cs_etm_decoder_params *d_params,
return -1;
}
- /* if the CPU has no trace ID associated, no decoder needed */
- if (csid == CORESIGHT_TRACE_ID_UNUSED_VAL)
- return 0;
-
if (d_params->operation == CS_ETM_OPERATION_DECODE) {
+ int decode_flags = OCSD_CREATE_FLG_FULL_DECODER;
+#ifdef OCSD_OPFLG_N_UNCOND_DIR_BR_CHK
+ decode_flags |= OCSD_OPFLG_N_UNCOND_DIR_BR_CHK | OCSD_OPFLG_CHK_RANGE_CONTINUE |
+ ETM4_OPFLG_PKTDEC_AA64_OPCODE_CHK;
+#endif
if (ocsd_dt_create_decoder(decoder->dcd_tree,
decoder->decoder_name,
- OCSD_CREATE_FLG_FULL_DECODER,
+ decode_flags,
trace_config, &csid))
return -1;
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
index 272c2efe78ee..12c782fa6db2 100644
--- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
+++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
@@ -60,7 +60,7 @@ struct cs_etm_trace_params {
struct cs_etm_decoder_params {
int operation;
- void (*packet_printer)(const char *msg);
+ void (*packet_printer)(const char *msg, void *data);
cs_etm_mem_cb_type mem_acc_cb;
bool formatted;
bool fsyncs;
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index 5e9fbcfad7d4..0bf9e5c27b59 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -97,28 +97,43 @@ struct cs_etm_traceid_queue {
struct cs_etm_packet_queue packet_queue;
};
+enum cs_etm_format {
+ UNSET,
+ FORMATTED,
+ UNFORMATTED
+};
+
struct cs_etm_queue {
struct cs_etm_auxtrace *etm;
struct cs_etm_decoder *decoder;
struct auxtrace_buffer *buffer;
unsigned int queue_nr;
u8 pending_timestamp_chan_id;
+ enum cs_etm_format format;
u64 offset;
const unsigned char *buf;
size_t buf_len, buf_used;
/* Conversion between traceID and index in traceid_queues array */
struct intlist *traceid_queues_list;
struct cs_etm_traceid_queue **traceid_queues;
+ /* Conversion between traceID and metadata pointers */
+ struct intlist *traceid_list;
+ /*
+ * Same as traceid_list, but traceid_list may be a reference to another
+ * queue's which has a matching sink ID.
+ */
+ struct intlist *own_traceid_list;
+ u32 sink_id;
};
-/* RB tree for quick conversion between traceID and metadata pointers */
-static struct intlist *traceid_list;
-
static int cs_etm__process_timestamped_queues(struct cs_etm_auxtrace *etm);
static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
pid_t tid);
static int cs_etm__get_data_block(struct cs_etm_queue *etmq);
static int cs_etm__decode_data_block(struct cs_etm_queue *etmq);
+static int cs_etm__metadata_get_trace_id(u8 *trace_chan_id, u64 *cpu_metadata);
+static u64 *get_cpu_data(struct cs_etm_auxtrace *etm, int cpu);
+static int cs_etm__metadata_set_trace_id(u8 trace_chan_id, u64 *cpu_metadata);
/* PTMs ETMIDR [11:8] set to b0011 */
#define ETMIDR_PTM_VERSION 0x00000300
@@ -133,6 +148,7 @@ static int cs_etm__decode_data_block(struct cs_etm_queue *etmq);
(queue_nr << 16 | trace_chan_id)
#define TO_QUEUE_NR(cs_queue_nr) (cs_queue_nr >> 16)
#define TO_TRACE_CHAN_ID(cs_queue_nr) (cs_queue_nr & 0x0000ffff)
+#define SINK_UNSET ((u32) -1)
static u32 cs_etm__get_v7_protocol_version(u32 etmidr)
{
@@ -144,12 +160,12 @@ static u32 cs_etm__get_v7_protocol_version(u32 etmidr)
return CS_ETM_PROTO_ETMV3;
}
-static int cs_etm__get_magic(u8 trace_chan_id, u64 *magic)
+static int cs_etm__get_magic(struct cs_etm_queue *etmq, u8 trace_chan_id, u64 *magic)
{
struct int_node *inode;
u64 *metadata;
- inode = intlist__find(traceid_list, trace_chan_id);
+ inode = intlist__find(etmq->traceid_list, trace_chan_id);
if (!inode)
return -EINVAL;
@@ -158,12 +174,12 @@ static int cs_etm__get_magic(u8 trace_chan_id, u64 *magic)
return 0;
}
-int cs_etm__get_cpu(u8 trace_chan_id, int *cpu)
+int cs_etm__get_cpu(struct cs_etm_queue *etmq, u8 trace_chan_id, int *cpu)
{
struct int_node *inode;
u64 *metadata;
- inode = intlist__find(traceid_list, trace_chan_id);
+ inode = intlist__find(etmq->traceid_list, trace_chan_id);
if (!inode)
return -EINVAL;
@@ -215,26 +231,171 @@ enum cs_etm_pid_fmt cs_etm__get_pid_fmt(struct cs_etm_queue *etmq)
return etmq->etm->pid_fmt;
}
-static int cs_etm__map_trace_id(u8 trace_chan_id, u64 *cpu_metadata)
+static int cs_etm__insert_trace_id_node(struct cs_etm_queue *etmq,
+ u8 trace_chan_id, u64 *cpu_metadata)
{
- struct int_node *inode;
-
/* Get an RB node for this CPU */
- inode = intlist__findnew(traceid_list, trace_chan_id);
+ struct int_node *inode = intlist__findnew(etmq->traceid_list, trace_chan_id);
/* Something went wrong, no need to continue */
if (!inode)
return -ENOMEM;
+ /* Disallow re-mapping a different traceID to metadata pair. */
+ if (inode->priv) {
+ u64 *curr_cpu_data = inode->priv;
+ u8 curr_chan_id;
+ int err;
+
+ if (curr_cpu_data[CS_ETM_CPU] != cpu_metadata[CS_ETM_CPU]) {
+ /*
+ * With > CORESIGHT_TRACE_IDS_MAX ETMs, overlapping IDs
+ * are expected (but not supported) in per-thread mode,
+ * rather than signifying an error.
+ */
+ if (etmq->etm->per_thread_decoding)
+ pr_err("CS_ETM: overlapping Trace IDs aren't currently supported in per-thread mode\n");
+ else
+ pr_err("CS_ETM: map mismatch between HW_ID packet CPU and Trace ID\n");
+
+ return -EINVAL;
+ }
+
+ /* check that the mapped ID matches */
+ err = cs_etm__metadata_get_trace_id(&curr_chan_id, curr_cpu_data);
+ if (err)
+ return err;
+
+ if (curr_chan_id != trace_chan_id) {
+ pr_err("CS_ETM: mismatch between CPU trace ID and HW_ID packet ID\n");
+ return -EINVAL;
+ }
+
+ /* Skip re-adding the same mappings if everything matched */
+ return 0;
+ }
+
+ /* Not one we've seen before, associate the traceID with the metadata pointer */
+ inode->priv = cpu_metadata;
+
+ return 0;
+}
+
+static struct cs_etm_queue *cs_etm__get_queue(struct cs_etm_auxtrace *etm, int cpu)
+{
+ if (etm->per_thread_decoding)
+ return etm->queues.queue_array[0].priv;
+ else
+ return etm->queues.queue_array[cpu].priv;
+}
+
+static int cs_etm__map_trace_id_v0(struct cs_etm_auxtrace *etm, u8 trace_chan_id,
+ u64 *cpu_metadata)
+{
+ struct cs_etm_queue *etmq;
+
/*
- * The node for that CPU should not be taken.
- * Back out if that's the case.
+ * If the queue is unformatted then only save one mapping in the
+ * queue associated with that CPU so only one decoder is made.
*/
- if (inode->priv)
+ etmq = cs_etm__get_queue(etm, cpu_metadata[CS_ETM_CPU]);
+ if (etmq->format == UNFORMATTED)
+ return cs_etm__insert_trace_id_node(etmq, trace_chan_id,
+ cpu_metadata);
+
+ /*
+ * Otherwise, version 0 trace IDs are global so save them into every
+ * queue.
+ */
+ for (unsigned int i = 0; i < etm->queues.nr_queues; ++i) {
+ int ret;
+
+ etmq = etm->queues.queue_array[i].priv;
+ ret = cs_etm__insert_trace_id_node(etmq, trace_chan_id,
+ cpu_metadata);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int cs_etm__process_trace_id_v0(struct cs_etm_auxtrace *etm, int cpu,
+ u64 hw_id)
+{
+ int err;
+ u64 *cpu_data;
+ u8 trace_chan_id = FIELD_GET(CS_AUX_HW_ID_TRACE_ID_MASK, hw_id);
+
+ cpu_data = get_cpu_data(etm, cpu);
+ if (cpu_data == NULL)
return -EINVAL;
- /* All good, associate the traceID with the metadata pointer */
- inode->priv = cpu_metadata;
+ err = cs_etm__map_trace_id_v0(etm, trace_chan_id, cpu_data);
+ if (err)
+ return err;
+
+ /*
+ * if we are picking up the association from the packet, need to plug
+ * the correct trace ID into the metadata for setting up decoders later.
+ */
+ return cs_etm__metadata_set_trace_id(trace_chan_id, cpu_data);
+}
+
+static int cs_etm__process_trace_id_v0_1(struct cs_etm_auxtrace *etm, int cpu,
+ u64 hw_id)
+{
+ struct cs_etm_queue *etmq = cs_etm__get_queue(etm, cpu);
+ int ret;
+ u64 *cpu_data;
+ u32 sink_id = FIELD_GET(CS_AUX_HW_ID_SINK_ID_MASK, hw_id);
+ u8 trace_id = FIELD_GET(CS_AUX_HW_ID_TRACE_ID_MASK, hw_id);
+
+ /*
+ * Check sink id hasn't changed in per-cpu mode. In per-thread mode,
+ * let it pass for now until an actual overlapping trace ID is hit. In
+ * most cases IDs won't overlap even if the sink changes.
+ */
+ if (!etmq->etm->per_thread_decoding && etmq->sink_id != SINK_UNSET &&
+ etmq->sink_id != sink_id) {
+ pr_err("CS_ETM: mismatch between sink IDs\n");
+ return -EINVAL;
+ }
+
+ etmq->sink_id = sink_id;
+
+ /* Find which other queues use this sink and link their ID maps */
+ for (unsigned int i = 0; i < etm->queues.nr_queues; ++i) {
+ struct cs_etm_queue *other_etmq = etm->queues.queue_array[i].priv;
+
+ /* Different sinks, skip */
+ if (other_etmq->sink_id != etmq->sink_id)
+ continue;
+
+ /* Already linked, skip */
+ if (other_etmq->traceid_list == etmq->traceid_list)
+ continue;
+
+ /* At the point of first linking, this one should be empty */
+ if (!intlist__empty(etmq->traceid_list)) {
+ pr_err("CS_ETM: Can't link populated trace ID lists\n");
+ return -EINVAL;
+ }
+
+ etmq->own_traceid_list = NULL;
+ intlist__delete(etmq->traceid_list);
+ etmq->traceid_list = other_etmq->traceid_list;
+ break;
+ }
+
+ cpu_data = get_cpu_data(etm, cpu);
+ ret = cs_etm__insert_trace_id_node(etmq, trace_id, cpu_data);
+ if (ret)
+ return ret;
+
+ ret = cs_etm__metadata_set_trace_id(trace_id, cpu_data);
+ if (ret)
+ return ret;
return 0;
}
@@ -261,7 +422,6 @@ static int cs_etm__metadata_get_trace_id(u8 *trace_chan_id, u64 *cpu_metadata)
/*
* update metadata trace ID from the value found in the AUX_HW_INFO packet.
- * This will also clear the CORESIGHT_TRACE_ID_UNUSED_FLAG flag if present.
*/
static int cs_etm__metadata_set_trace_id(u8 trace_chan_id, u64 *cpu_metadata)
{
@@ -322,20 +482,16 @@ static int cs_etm__process_aux_output_hw_id(struct perf_session *session,
{
struct cs_etm_auxtrace *etm;
struct perf_sample sample;
- struct int_node *inode;
struct evsel *evsel;
- u64 *cpu_data;
u64 hw_id;
int cpu, version, err;
- u8 trace_chan_id, curr_chan_id;
/* extract and parse the HW ID */
hw_id = event->aux_output_hw_id.hw_id;
- version = FIELD_GET(CS_AUX_HW_ID_VERSION_MASK, hw_id);
- trace_chan_id = FIELD_GET(CS_AUX_HW_ID_TRACE_ID_MASK, hw_id);
+ version = FIELD_GET(CS_AUX_HW_ID_MAJOR_VERSION_MASK, hw_id);
/* check that we can handle this version */
- if (version > CS_AUX_HW_ID_CURR_VERSION) {
+ if (version > CS_AUX_HW_ID_MAJOR_VERSION) {
pr_err("CS ETM Trace: PERF_RECORD_AUX_OUTPUT_HW_ID version %d not supported. Please update Perf.\n",
version);
return -EINVAL;
@@ -360,43 +516,10 @@ static int cs_etm__process_aux_output_hw_id(struct perf_session *session,
return -EINVAL;
}
- /* See if the ID is mapped to a CPU, and it matches the current CPU */
- inode = intlist__find(traceid_list, trace_chan_id);
- if (inode) {
- cpu_data = inode->priv;
- if ((int)cpu_data[CS_ETM_CPU] != cpu) {
- pr_err("CS_ETM: map mismatch between HW_ID packet CPU and Trace ID\n");
- return -EINVAL;
- }
+ if (FIELD_GET(CS_AUX_HW_ID_MINOR_VERSION_MASK, hw_id) == 0)
+ return cs_etm__process_trace_id_v0(etm, cpu, hw_id);
- /* check that the mapped ID matches */
- err = cs_etm__metadata_get_trace_id(&curr_chan_id, cpu_data);
- if (err)
- return err;
- if (curr_chan_id != trace_chan_id) {
- pr_err("CS_ETM: mismatch between CPU trace ID and HW_ID packet ID\n");
- return -EINVAL;
- }
-
- /* mapped and matched - return OK */
- return 0;
- }
-
- cpu_data = get_cpu_data(etm, cpu);
- if (cpu_data == NULL)
- return err;
-
- /* not one we've seen before - lets map it */
- err = cs_etm__map_trace_id(trace_chan_id, cpu_data);
- if (err)
- return err;
-
- /*
- * if we are picking up the association from the packet, need to plug
- * the correct trace ID into the metadata for setting up decoders later.
- */
- err = cs_etm__metadata_set_trace_id(trace_chan_id, cpu_data);
- return err;
+ return cs_etm__process_trace_id_v0_1(etm, cpu, hw_id);
}
void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq,
@@ -639,94 +762,79 @@ static void cs_etm__packet_swap(struct cs_etm_auxtrace *etm,
}
}
-static void cs_etm__packet_dump(const char *pkt_string)
+static void cs_etm__packet_dump(const char *pkt_string, void *data)
{
const char *color = PERF_COLOR_BLUE;
int len = strlen(pkt_string);
+ struct cs_etm_queue *etmq = data;
+ char queue_nr[64];
+
+ if (verbose)
+ snprintf(queue_nr, sizeof(queue_nr), "Qnr:%d; ", etmq->queue_nr);
+ else
+ queue_nr[0] = '\0';
if (len && (pkt_string[len-1] == '\n'))
- color_fprintf(stdout, color, " %s", pkt_string);
+ color_fprintf(stdout, color, " %s%s", queue_nr, pkt_string);
else
- color_fprintf(stdout, color, " %s\n", pkt_string);
+ color_fprintf(stdout, color, " %s%s\n", queue_nr, pkt_string);
fflush(stdout);
}
static void cs_etm__set_trace_param_etmv3(struct cs_etm_trace_params *t_params,
- struct cs_etm_auxtrace *etm, int t_idx,
- int m_idx, u32 etmidr)
+ u64 *metadata, u32 etmidr)
{
- u64 **metadata = etm->metadata;
-
- t_params[t_idx].protocol = cs_etm__get_v7_protocol_version(etmidr);
- t_params[t_idx].etmv3.reg_ctrl = metadata[m_idx][CS_ETM_ETMCR];
- t_params[t_idx].etmv3.reg_trc_id = metadata[m_idx][CS_ETM_ETMTRACEIDR];
+ t_params->protocol = cs_etm__get_v7_protocol_version(etmidr);
+ t_params->etmv3.reg_ctrl = metadata[CS_ETM_ETMCR];
+ t_params->etmv3.reg_trc_id = metadata[CS_ETM_ETMTRACEIDR];
}
static void cs_etm__set_trace_param_etmv4(struct cs_etm_trace_params *t_params,
- struct cs_etm_auxtrace *etm, int t_idx,
- int m_idx)
+ u64 *metadata)
{
- u64 **metadata = etm->metadata;
-
- t_params[t_idx].protocol = CS_ETM_PROTO_ETMV4i;
- t_params[t_idx].etmv4.reg_idr0 = metadata[m_idx][CS_ETMV4_TRCIDR0];
- t_params[t_idx].etmv4.reg_idr1 = metadata[m_idx][CS_ETMV4_TRCIDR1];
- t_params[t_idx].etmv4.reg_idr2 = metadata[m_idx][CS_ETMV4_TRCIDR2];
- t_params[t_idx].etmv4.reg_idr8 = metadata[m_idx][CS_ETMV4_TRCIDR8];
- t_params[t_idx].etmv4.reg_configr = metadata[m_idx][CS_ETMV4_TRCCONFIGR];
- t_params[t_idx].etmv4.reg_traceidr = metadata[m_idx][CS_ETMV4_TRCTRACEIDR];
+ t_params->protocol = CS_ETM_PROTO_ETMV4i;
+ t_params->etmv4.reg_idr0 = metadata[CS_ETMV4_TRCIDR0];
+ t_params->etmv4.reg_idr1 = metadata[CS_ETMV4_TRCIDR1];
+ t_params->etmv4.reg_idr2 = metadata[CS_ETMV4_TRCIDR2];
+ t_params->etmv4.reg_idr8 = metadata[CS_ETMV4_TRCIDR8];
+ t_params->etmv4.reg_configr = metadata[CS_ETMV4_TRCCONFIGR];
+ t_params->etmv4.reg_traceidr = metadata[CS_ETMV4_TRCTRACEIDR];
}
static void cs_etm__set_trace_param_ete(struct cs_etm_trace_params *t_params,
- struct cs_etm_auxtrace *etm, int t_idx,
- int m_idx)
+ u64 *metadata)
{
- u64 **metadata = etm->metadata;
-
- t_params[t_idx].protocol = CS_ETM_PROTO_ETE;
- t_params[t_idx].ete.reg_idr0 = metadata[m_idx][CS_ETE_TRCIDR0];
- t_params[t_idx].ete.reg_idr1 = metadata[m_idx][CS_ETE_TRCIDR1];
- t_params[t_idx].ete.reg_idr2 = metadata[m_idx][CS_ETE_TRCIDR2];
- t_params[t_idx].ete.reg_idr8 = metadata[m_idx][CS_ETE_TRCIDR8];
- t_params[t_idx].ete.reg_configr = metadata[m_idx][CS_ETE_TRCCONFIGR];
- t_params[t_idx].ete.reg_traceidr = metadata[m_idx][CS_ETE_TRCTRACEIDR];
- t_params[t_idx].ete.reg_devarch = metadata[m_idx][CS_ETE_TRCDEVARCH];
+ t_params->protocol = CS_ETM_PROTO_ETE;
+ t_params->ete.reg_idr0 = metadata[CS_ETE_TRCIDR0];
+ t_params->ete.reg_idr1 = metadata[CS_ETE_TRCIDR1];
+ t_params->ete.reg_idr2 = metadata[CS_ETE_TRCIDR2];
+ t_params->ete.reg_idr8 = metadata[CS_ETE_TRCIDR8];
+ t_params->ete.reg_configr = metadata[CS_ETE_TRCCONFIGR];
+ t_params->ete.reg_traceidr = metadata[CS_ETE_TRCTRACEIDR];
+ t_params->ete.reg_devarch = metadata[CS_ETE_TRCDEVARCH];
}
static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params,
- struct cs_etm_auxtrace *etm,
- bool formatted,
- int sample_cpu,
- int decoders)
-{
- int t_idx, m_idx;
- u32 etmidr;
- u64 architecture;
-
- for (t_idx = 0; t_idx < decoders; t_idx++) {
- if (formatted)
- m_idx = t_idx;
- else {
- m_idx = get_cpu_data_idx(etm, sample_cpu);
- if (m_idx == -1) {
- pr_warning("CS_ETM: unknown CPU, falling back to first metadata\n");
- m_idx = 0;
- }
- }
+ struct cs_etm_queue *etmq)
+{
+ struct int_node *inode;
- architecture = etm->metadata[m_idx][CS_ETM_MAGIC];
+ intlist__for_each_entry(inode, etmq->traceid_list) {
+ u64 *metadata = inode->priv;
+ u64 architecture = metadata[CS_ETM_MAGIC];
+ u32 etmidr;
switch (architecture) {
case __perf_cs_etmv3_magic:
- etmidr = etm->metadata[m_idx][CS_ETM_ETMIDR];
- cs_etm__set_trace_param_etmv3(t_params, etm, t_idx, m_idx, etmidr);
+ etmidr = metadata[CS_ETM_ETMIDR];
+ cs_etm__set_trace_param_etmv3(t_params++, metadata, etmidr);
break;
case __perf_cs_etmv4_magic:
- cs_etm__set_trace_param_etmv4(t_params, etm, t_idx, m_idx);
+ cs_etm__set_trace_param_etmv4(t_params++, metadata);
break;
case __perf_cs_ete_magic:
- cs_etm__set_trace_param_ete(t_params, etm, t_idx, m_idx);
+ cs_etm__set_trace_param_ete(t_params++, metadata);
break;
default:
return -EINVAL;
@@ -738,8 +846,7 @@ static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params,
static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params,
struct cs_etm_queue *etmq,
- enum cs_etm_decoder_operation mode,
- bool formatted)
+ enum cs_etm_decoder_operation mode)
{
int ret = -EINVAL;
@@ -749,7 +856,7 @@ static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params,
d_params->packet_printer = cs_etm__packet_dump;
d_params->operation = mode;
d_params->data = etmq;
- d_params->formatted = formatted;
+ d_params->formatted = etmq->format == FORMATTED;
d_params->fsyncs = false;
d_params->hsyncs = false;
d_params->frame_aligned = true;
@@ -788,7 +895,7 @@ static void cs_etm__dump_event(struct cs_etm_queue *etmq,
}
static int cs_etm__flush_events(struct perf_session *session,
- struct perf_tool *tool)
+ const struct perf_tool *tool)
{
struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
struct cs_etm_auxtrace,
@@ -850,6 +957,7 @@ static void cs_etm__free_traceid_queues(struct cs_etm_queue *etmq)
static void cs_etm__free_queue(void *priv)
{
+ struct int_node *inode, *tmp;
struct cs_etm_queue *etmq = priv;
if (!etmq)
@@ -857,6 +965,16 @@ static void cs_etm__free_queue(void *priv)
cs_etm_decoder__free(etmq->decoder);
cs_etm__free_traceid_queues(etmq);
+
+ if (etmq->own_traceid_list) {
+ /* First remove all traceID/metadata nodes for the RB tree */
+ intlist__for_each_entry_safe(inode, tmp, etmq->own_traceid_list)
+ intlist__remove(etmq->own_traceid_list, inode);
+
+ /* Then the RB tree itself */
+ intlist__delete(etmq->own_traceid_list);
+ }
+
free(etmq);
}
@@ -879,19 +997,12 @@ static void cs_etm__free_events(struct perf_session *session)
static void cs_etm__free(struct perf_session *session)
{
int i;
- struct int_node *inode, *tmp;
struct cs_etm_auxtrace *aux = container_of(session->auxtrace,
struct cs_etm_auxtrace,
auxtrace);
cs_etm__free_events(session);
session->auxtrace = NULL;
- /* First remove all traceID/metadata nodes for the RB tree */
- intlist__for_each_entry_safe(inode, tmp, traceid_list)
- intlist__remove(traceid_list, inode);
- /* Then the RB tree itself */
- intlist__delete(traceid_list);
-
for (i = 0; i < aux->num_cpu; i++)
zfree(&aux->metadata[i]);
@@ -1041,19 +1152,9 @@ out:
return ret;
}
-static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm,
- bool formatted, int sample_cpu)
+static struct cs_etm_queue *cs_etm__alloc_queue(void)
{
- struct cs_etm_decoder_params d_params;
- struct cs_etm_trace_params *t_params = NULL;
- struct cs_etm_queue *etmq;
- /*
- * Each queue can only contain data from one CPU when unformatted, so only one decoder is
- * needed.
- */
- int decoders = formatted ? etm->num_cpu : 1;
-
- etmq = zalloc(sizeof(*etmq));
+ struct cs_etm_queue *etmq = zalloc(sizeof(*etmq));
if (!etmq)
return NULL;
@@ -1061,42 +1162,17 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm,
if (!etmq->traceid_queues_list)
goto out_free;
- /* Use metadata to fill in trace parameters for trace decoder */
- t_params = zalloc(sizeof(*t_params) * decoders);
-
- if (!t_params)
- goto out_free;
-
- if (cs_etm__init_trace_params(t_params, etm, formatted, sample_cpu, decoders))
- goto out_free;
-
- /* Set decoder parameters to decode trace packets */
- if (cs_etm__init_decoder_params(&d_params, etmq,
- dump_trace ? CS_ETM_OPERATION_PRINT :
- CS_ETM_OPERATION_DECODE,
- formatted))
- goto out_free;
-
- etmq->decoder = cs_etm_decoder__new(decoders, &d_params,
- t_params);
-
- if (!etmq->decoder)
- goto out_free;
-
/*
- * Register a function to handle all memory accesses required by
- * the trace decoder library.
+ * Create an RB tree for traceID-metadata tuple. Since the conversion
+ * has to be made for each packet that gets decoded, optimizing access
+ * in anything other than a sequential array is worth doing.
*/
- if (cs_etm_decoder__add_mem_access_cb(etmq->decoder,
- 0x0L, ((u64) -1L),
- cs_etm__mem_access))
- goto out_free_decoder;
+ etmq->traceid_list = etmq->own_traceid_list = intlist__new(NULL);
+ if (!etmq->traceid_list)
+ goto out_free;
- zfree(&t_params);
return etmq;
-out_free_decoder:
- cs_etm_decoder__free(etmq->decoder);
out_free:
intlist__delete(etmq->traceid_queues_list);
free(etmq);
@@ -1106,16 +1182,14 @@ out_free:
static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm,
struct auxtrace_queue *queue,
- unsigned int queue_nr,
- bool formatted,
- int sample_cpu)
+ unsigned int queue_nr)
{
struct cs_etm_queue *etmq = queue->priv;
- if (list_empty(&queue->head) || etmq)
+ if (etmq)
return 0;
- etmq = cs_etm__alloc_queue(etm, formatted, sample_cpu);
+ etmq = cs_etm__alloc_queue();
if (!etmq)
return -ENOMEM;
@@ -1123,7 +1197,9 @@ static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm,
queue->priv = etmq;
etmq->etm = etm;
etmq->queue_nr = queue_nr;
+ queue->cpu = queue_nr; /* Placeholder, may be reset to -1 in per-thread mode */
etmq->offset = 0;
+ etmq->sink_id = SINK_UNSET;
return 0;
}
@@ -1267,8 +1343,12 @@ static inline int cs_etm__t32_instr_size(struct cs_etm_queue *etmq,
static inline u64 cs_etm__first_executed_instr(struct cs_etm_packet *packet)
{
- /* Returns 0 for the CS_ETM_DISCONTINUITY packet */
- if (packet->sample_type == CS_ETM_DISCONTINUITY)
+ /*
+ * Return 0 for packets that have no addresses so that CS_ETM_INVAL_ADDR doesn't
+ * appear in samples.
+ */
+ if (packet->sample_type == CS_ETM_DISCONTINUITY ||
+ packet->sample_type == CS_ETM_EXCEPTION)
return 0;
return packet->start_addr;
@@ -1595,35 +1675,6 @@ static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq,
return ret;
}
-struct cs_etm_synth {
- struct perf_tool dummy_tool;
- struct perf_session *session;
-};
-
-static int cs_etm__event_synth(struct perf_tool *tool,
- union perf_event *event,
- struct perf_sample *sample __maybe_unused,
- struct machine *machine __maybe_unused)
-{
- struct cs_etm_synth *cs_etm_synth =
- container_of(tool, struct cs_etm_synth, dummy_tool);
-
- return perf_session__deliver_synth_event(cs_etm_synth->session,
- event, NULL);
-}
-
-static int cs_etm__synth_event(struct perf_session *session,
- struct perf_event_attr *attr, u64 id)
-{
- struct cs_etm_synth cs_etm_synth;
-
- memset(&cs_etm_synth, 0, sizeof(struct cs_etm_synth));
- cs_etm_synth.session = session;
-
- return perf_event__synthesize_attr(&cs_etm_synth.dummy_tool, attr, 1,
- &id, cs_etm__event_synth);
-}
-
static int cs_etm__synth_events(struct cs_etm_auxtrace *etm,
struct perf_session *session)
{
@@ -1675,7 +1726,7 @@ static int cs_etm__synth_events(struct cs_etm_auxtrace *etm,
attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
attr.sample_period = 1;
attr.sample_type |= PERF_SAMPLE_ADDR;
- err = cs_etm__synth_event(session, &attr, id);
+ err = perf_session__deliver_synth_attr_event(session, &attr, id);
if (err)
return err;
etm->branches_sample_type = attr.sample_type;
@@ -1698,7 +1749,7 @@ static int cs_etm__synth_events(struct cs_etm_auxtrace *etm,
attr.config = PERF_COUNT_HW_INSTRUCTIONS;
attr.sample_period = etm->synth_opts.period;
etm->instructions_sample_period = attr.sample_period;
- err = cs_etm__synth_event(session, &attr, id);
+ err = perf_session__deliver_synth_attr_event(session, &attr, id);
if (err)
return err;
etm->instructions_sample_type = attr.sample_type;
@@ -2252,7 +2303,7 @@ static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq,
PERF_IP_FLAG_TRACE_END;
break;
case CS_ETM_EXCEPTION:
- ret = cs_etm__get_magic(packet->trace_chan_id, &magic);
+ ret = cs_etm__get_magic(etmq, packet->trace_chan_id, &magic);
if (ret)
return ret;
@@ -2439,12 +2490,6 @@ static void cs_etm__clear_all_traceid_queues(struct cs_etm_queue *etmq)
/* Ignore return value */
cs_etm__process_traceid_queue(etmq, tidq);
-
- /*
- * Generate an instruction sample with the remaining
- * branchstack entries.
- */
- cs_etm__flush(etmq, tidq);
}
}
@@ -2587,7 +2632,7 @@ static int cs_etm__process_timestamped_queues(struct cs_etm_auxtrace *etm)
while (1) {
if (!etm->heap.heap_cnt)
- goto out;
+ break;
/* Take the entry at the top of the min heap */
cs_queue_nr = etm->heap.heap_array[0].queue_nr;
@@ -2670,6 +2715,23 @@ refetch:
ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, cs_timestamp);
}
+ for (i = 0; i < etm->queues.nr_queues; i++) {
+ struct int_node *inode;
+
+ etmq = etm->queues.queue_array[i].priv;
+ if (!etmq)
+ continue;
+
+ intlist__for_each_entry(inode, etmq->traceid_queues_list) {
+ int idx = (int)(intptr_t)inode->priv;
+
+ /* Flush any remaining branch stack entries */
+ tidq = etmq->traceid_queues[idx];
+ ret = cs_etm__end_block(etmq, tidq);
+ if (ret)
+ return ret;
+ }
+ }
out:
return ret;
}
@@ -2740,7 +2802,7 @@ static int cs_etm__process_switch_cpu_wide(struct cs_etm_auxtrace *etm,
static int cs_etm__process_event(struct perf_session *session,
union perf_event *event,
struct perf_sample *sample,
- struct perf_tool *tool)
+ const struct perf_tool *tool)
{
struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
struct cs_etm_auxtrace,
@@ -2810,7 +2872,7 @@ static void dump_queued_data(struct cs_etm_auxtrace *etm,
static int cs_etm__process_auxtrace_event(struct perf_session *session,
union perf_event *event,
- struct perf_tool *tool __maybe_unused)
+ const struct perf_tool *tool __maybe_unused)
{
struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
struct cs_etm_auxtrace,
@@ -2836,17 +2898,6 @@ static int cs_etm__process_auxtrace_event(struct perf_session *session,
if (err)
return err;
- /*
- * Knowing if the trace is formatted or not requires a lookup of
- * the aux record so only works in non-piped mode where data is
- * queued in cs_etm__queue_aux_records(). Always assume
- * formatted in piped mode (true).
- */
- err = cs_etm__setup_queue(etm, &etm->queues.queue_array[idx],
- idx, true, -1);
- if (err)
- return err;
-
if (dump_trace)
if (auxtrace_buffer__get_data(buffer, fd)) {
cs_etm__dump_event(etm->queues.queue_array[idx].priv, buffer);
@@ -2963,8 +3014,7 @@ static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_o
struct perf_record_auxtrace *auxtrace_event;
union perf_event auxtrace_fragment;
__u64 aux_offset, aux_size;
- __u32 idx;
- bool formatted;
+ enum cs_etm_format format;
struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
struct cs_etm_auxtrace,
@@ -3030,6 +3080,8 @@ static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_o
if (aux_offset >= auxtrace_event->offset &&
aux_offset + aux_size <= auxtrace_event->offset + auxtrace_event->size) {
+ struct cs_etm_queue *etmq = etm->queues.queue_array[auxtrace_event->idx].priv;
+
/*
* If this AUX event was inside this buffer somewhere, create a new auxtrace event
* based on the sizes of the aux event, and queue that fragment.
@@ -3046,10 +3098,14 @@ static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_o
if (err)
return err;
- idx = auxtrace_event->idx;
- formatted = !(aux_event->flags & PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW);
- return cs_etm__setup_queue(etm, &etm->queues.queue_array[idx],
- idx, formatted, sample->cpu);
+ format = (aux_event->flags & PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW) ?
+ UNFORMATTED : FORMATTED;
+ if (etmq->format != UNSET && format != etmq->format) {
+ pr_err("CS_ETM: mixed formatted and unformatted trace not supported\n");
+ return -EINVAL;
+ }
+ etmq->format = format;
+ return 0;
}
/* Wasn't inside this buffer, but there were no parse errors. 1 == 'not found' */
@@ -3175,7 +3231,8 @@ static bool cs_etm__has_virtual_ts(u64 **metadata, int num_cpu)
}
/* map trace ids to correct metadata block, from information in metadata */
-static int cs_etm__map_trace_ids_metadata(int num_cpu, u64 **metadata)
+static int cs_etm__map_trace_ids_metadata(struct cs_etm_auxtrace *etm, int num_cpu,
+ u64 **metadata)
{
u64 cs_etm_magic;
u8 trace_chan_id;
@@ -3197,7 +3254,7 @@ static int cs_etm__map_trace_ids_metadata(int num_cpu, u64 **metadata)
/* unknown magic number */
return -EINVAL;
}
- err = cs_etm__map_trace_id(trace_chan_id, metadata[i]);
+ err = cs_etm__map_trace_id_v0(etm, trace_chan_id, metadata[i]);
if (err)
return err;
}
@@ -3205,30 +3262,85 @@ static int cs_etm__map_trace_ids_metadata(int num_cpu, u64 **metadata)
}
/*
- * If we found AUX_HW_ID packets, then set any metadata marked as unused to the
- * unused value to reduce the number of unneeded decoders created.
+ * Use the data gathered by the peeks for HW_ID (trace ID mappings) and AUX
+ * (formatted or not) packets to create the decoders.
*/
-static int cs_etm__clear_unused_trace_ids_metadata(int num_cpu, u64 **metadata)
+static int cs_etm__create_queue_decoders(struct cs_etm_queue *etmq)
{
- u64 cs_etm_magic;
- int i;
+ struct cs_etm_decoder_params d_params;
+ struct cs_etm_trace_params *t_params;
+ int decoders = intlist__nr_entries(etmq->traceid_list);
- for (i = 0; i < num_cpu; i++) {
- cs_etm_magic = metadata[i][CS_ETM_MAGIC];
- switch (cs_etm_magic) {
- case __perf_cs_etmv3_magic:
- if (metadata[i][CS_ETM_ETMTRACEIDR] & CORESIGHT_TRACE_ID_UNUSED_FLAG)
- metadata[i][CS_ETM_ETMTRACEIDR] = CORESIGHT_TRACE_ID_UNUSED_VAL;
- break;
- case __perf_cs_etmv4_magic:
- case __perf_cs_ete_magic:
- if (metadata[i][CS_ETMV4_TRCTRACEIDR] & CORESIGHT_TRACE_ID_UNUSED_FLAG)
- metadata[i][CS_ETMV4_TRCTRACEIDR] = CORESIGHT_TRACE_ID_UNUSED_VAL;
- break;
- default:
- /* unknown magic number */
- return -EINVAL;
- }
+ if (decoders == 0)
+ return 0;
+
+ /*
+ * Each queue can only contain data from one CPU when unformatted, so only one decoder is
+ * needed.
+ */
+ if (etmq->format == UNFORMATTED)
+ assert(decoders == 1);
+
+ /* Use metadata to fill in trace parameters for trace decoder */
+ t_params = zalloc(sizeof(*t_params) * decoders);
+
+ if (!t_params)
+ goto out_free;
+
+ if (cs_etm__init_trace_params(t_params, etmq))
+ goto out_free;
+
+ /* Set decoder parameters to decode trace packets */
+ if (cs_etm__init_decoder_params(&d_params, etmq,
+ dump_trace ? CS_ETM_OPERATION_PRINT :
+ CS_ETM_OPERATION_DECODE))
+ goto out_free;
+
+ etmq->decoder = cs_etm_decoder__new(decoders, &d_params,
+ t_params);
+
+ if (!etmq->decoder)
+ goto out_free;
+
+ /*
+ * Register a function to handle all memory accesses required by
+ * the trace decoder library.
+ */
+ if (cs_etm_decoder__add_mem_access_cb(etmq->decoder,
+ 0x0L, ((u64) -1L),
+ cs_etm__mem_access))
+ goto out_free_decoder;
+
+ zfree(&t_params);
+ return 0;
+
+out_free_decoder:
+ cs_etm_decoder__free(etmq->decoder);
+out_free:
+ zfree(&t_params);
+ return -EINVAL;
+}
+
+static int cs_etm__create_decoders(struct cs_etm_auxtrace *etm)
+{
+ struct auxtrace_queues *queues = &etm->queues;
+
+ for (unsigned int i = 0; i < queues->nr_queues; i++) {
+ bool empty = list_empty(&queues->queue_array[i].head);
+ struct cs_etm_queue *etmq = queues->queue_array[i].priv;
+ int ret;
+
+ /*
+ * Don't create decoders for empty queues, mainly because
+ * etmq->format is unknown for empty queues.
+ */
+ assert(empty || etmq->format != UNSET);
+ if (empty)
+ continue;
+
+ ret = cs_etm__create_queue_decoders(etmq);
+ if (ret)
+ return ret;
}
return 0;
}
@@ -3242,30 +3354,19 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event,
int event_header_size = sizeof(struct perf_event_header);
int total_size = auxtrace_info->header.size;
int priv_size = 0;
- int num_cpu;
+ int num_cpu, max_cpu = 0;
int err = 0;
int aux_hw_id_found;
- int i, j;
+ int i;
u64 *ptr = NULL;
u64 **metadata = NULL;
- /*
- * Create an RB tree for traceID-metadata tuple. Since the conversion
- * has to be made for each packet that gets decoded, optimizing access
- * in anything other than a sequential array is worth doing.
- */
- traceid_list = intlist__new(NULL);
- if (!traceid_list)
- return -ENOMEM;
-
/* First the global part */
ptr = (u64 *) auxtrace_info->priv;
num_cpu = ptr[CS_PMU_TYPE_CPUS] & 0xffffffff;
metadata = zalloc(sizeof(*metadata) * num_cpu);
- if (!metadata) {
- err = -ENOMEM;
- goto err_free_traceid_list;
- }
+ if (!metadata)
+ return -ENOMEM;
/* Start parsing after the common part of the header */
i = CS_HEADER_VERSION_MAX;
@@ -3276,7 +3377,7 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event,
* required by the trace decoder to properly decode the trace due
* to its highly compressed nature.
*/
- for (j = 0; j < num_cpu; j++) {
+ for (int j = 0; j < num_cpu; j++) {
if (ptr[i] == __perf_cs_etmv3_magic) {
metadata[j] =
cs_etm__create_meta_blk(ptr, &i,
@@ -3300,6 +3401,9 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event,
err = -ENOMEM;
goto err_free_metadata;
}
+
+ if ((int) metadata[j][CS_ETM_CPU] > max_cpu)
+ max_cpu = metadata[j][CS_ETM_CPU];
}
/*
@@ -3329,10 +3433,16 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event,
*/
etm->pid_fmt = cs_etm__init_pid_fmt(metadata[0]);
- err = auxtrace_queues__init(&etm->queues);
+ err = auxtrace_queues__init_nr(&etm->queues, max_cpu + 1);
if (err)
goto err_free_etm;
+ for (unsigned int j = 0; j < etm->queues.nr_queues; ++j) {
+ err = cs_etm__setup_queue(etm, &etm->queues.queue_array[j], j);
+ if (err)
+ goto err_free_queues;
+ }
+
if (session->itrace_synth_opts->set) {
etm->synth_opts = *session->itrace_synth_opts;
} else {
@@ -3396,12 +3506,16 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event,
if (err)
goto err_free_queues;
+ err = cs_etm__queue_aux_records(session);
+ if (err)
+ goto err_free_queues;
+
/*
* Map Trace ID values to CPU metadata.
*
- * Trace metadata will always contain Trace ID values from the legacy algorithm. If the
- * files has been recorded by a "new" perf updated to handle AUX_HW_ID then the metadata
- * ID value will also have the CORESIGHT_TRACE_ID_UNUSED_FLAG set.
+ * Trace metadata will always contain Trace ID values from the legacy algorithm
+ * in case it's read by a version of Perf that doesn't know about HW_ID packets
+ * or the kernel doesn't emit them.
*
* The updated kernel drivers that use AUX_HW_ID to sent Trace IDs will attempt to use
* the same IDs as the old algorithm as far as is possible, unless there are clashes
@@ -3410,15 +3524,14 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event,
*
* For a perf able to interpret AUX_HW_ID packets we first check for the presence of
* those packets. If they are there then the values will be mapped and plugged into
- * the metadata. We then set any remaining metadata values with the used flag to a
- * value CORESIGHT_TRACE_ID_UNUSED_VAL - which indicates no decoder is required.
+ * the metadata and decoders are only created for each mapping received.
*
* If no AUX_HW_ID packets are present - which means a file recorded on an old kernel
- * then we map Trace ID values to CPU directly from the metadata - clearing any unused
- * flags if present.
+ * then we map Trace ID values to CPU directly from the metadata and create decoders
+ * for all mappings.
*/
- /* first scan for AUX_OUTPUT_HW_ID records to map trace ID values to CPU metadata */
+ /* Scan for AUX_OUTPUT_HW_ID records to map trace ID values to CPU metadata */
aux_hw_id_found = 0;
err = perf_session__peek_events(session, session->header.data_offset,
session->header.data_size,
@@ -3426,17 +3539,14 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event,
if (err)
goto err_free_queues;
- /* if HW ID found then clear any unused metadata ID values */
- if (aux_hw_id_found)
- err = cs_etm__clear_unused_trace_ids_metadata(num_cpu, metadata);
- /* otherwise, this is a file with metadata values only, map from metadata */
- else
- err = cs_etm__map_trace_ids_metadata(num_cpu, metadata);
-
- if (err)
- goto err_free_queues;
+ /* if no HW ID found this is a file with metadata values only, map from metadata */
+ if (!aux_hw_id_found) {
+ err = cs_etm__map_trace_ids_metadata(etm, num_cpu, metadata);
+ if (err)
+ goto err_free_queues;
+ }
- err = cs_etm__queue_aux_records(session);
+ err = cs_etm__create_decoders(etm);
if (err)
goto err_free_queues;
@@ -3450,10 +3560,8 @@ err_free_etm:
zfree(&etm);
err_free_metadata:
/* No need to check @metadata[j], free(NULL) is supported */
- for (j = 0; j < num_cpu; j++)
+ for (int j = 0; j < num_cpu; j++)
zfree(&metadata[j]);
zfree(&metadata);
-err_free_traceid_list:
- intlist__delete(traceid_list);
return err;
}
diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h
index 4696267a32f0..a8caeea720aa 100644
--- a/tools/perf/util/cs-etm.h
+++ b/tools/perf/util/cs-etm.h
@@ -230,16 +230,6 @@ struct cs_etm_packet_queue {
/* CoreSight trace ID is currently the bottom 7 bits of the value */
#define CORESIGHT_TRACE_ID_VAL_MASK GENMASK(6, 0)
-/*
- * perf record will set the legacy meta data values as unused initially.
- * This allows perf report to manage the decoders created when dynamic
- * allocation in operation.
- */
-#define CORESIGHT_TRACE_ID_UNUSED_FLAG BIT(31)
-
-/* Value to set for unused trace ID values */
-#define CORESIGHT_TRACE_ID_UNUSED_VAL 0x7F
-
int cs_etm__process_auxtrace_info(union perf_event *event,
struct perf_session *session);
void cs_etm_get_default_config(const struct perf_pmu *pmu, struct perf_event_attr *attr);
@@ -252,7 +242,7 @@ enum cs_etm_pid_fmt {
#ifdef HAVE_CSTRACE_SUPPORT
#include <opencsd/ocsd_if_types.h>
-int cs_etm__get_cpu(u8 trace_chan_id, int *cpu);
+int cs_etm__get_cpu(struct cs_etm_queue *etmq, u8 trace_chan_id, int *cpu);
enum cs_etm_pid_fmt cs_etm__get_pid_fmt(struct cs_etm_queue *etmq);
int cs_etm__etmq_set_tid_el(struct cs_etm_queue *etmq, pid_t tid,
u8 trace_chan_id, ocsd_ex_level el);
diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c
index 2b732bccabad..f0599c61fab4 100644
--- a/tools/perf/util/data-convert-bt.c
+++ b/tools/perf/util/data-convert-bt.c
@@ -36,7 +36,7 @@
#include "util/sample.h"
#ifdef HAVE_LIBTRACEEVENT
-#include <traceevent/event-parse.h>
+#include <event-parse.h>
#endif
#define pr_N(n, fmt, ...) \
@@ -792,7 +792,7 @@ static bool is_flush_needed(struct ctf_stream *cs)
return cs->count >= STREAM_FLUSH_COUNT;
}
-static int process_sample_event(struct perf_tool *tool,
+static int process_sample_event(const struct perf_tool *tool,
union perf_event *_event,
struct perf_sample *sample,
struct evsel *evsel,
@@ -871,7 +871,7 @@ do { \
} while(0)
#define __FUNC_PROCESS_NON_SAMPLE(_name, body) \
-static int process_##_name##_event(struct perf_tool *tool, \
+static int process_##_name##_event(const struct perf_tool *tool, \
union perf_event *_event, \
struct perf_sample *sample, \
struct machine *machine) \
@@ -1607,25 +1607,23 @@ int bt_convert__perf2ctf(const char *input, const char *path,
.mode = PERF_DATA_MODE_READ,
.force = opts->force,
};
- struct convert c = {
- .tool = {
- .sample = process_sample_event,
- .mmap = perf_event__process_mmap,
- .mmap2 = perf_event__process_mmap2,
- .comm = perf_event__process_comm,
- .exit = perf_event__process_exit,
- .fork = perf_event__process_fork,
- .lost = perf_event__process_lost,
- .tracing_data = perf_event__process_tracing_data,
- .build_id = perf_event__process_build_id,
- .namespaces = perf_event__process_namespaces,
- .ordered_events = true,
- .ordering_requires_timestamps = true,
- },
- };
+ struct convert c = {};
struct ctf_writer *cw = &c.writer;
int err;
+ perf_tool__init(&c.tool, /*ordered_events=*/true);
+ c.tool.sample = process_sample_event;
+ c.tool.mmap = perf_event__process_mmap;
+ c.tool.mmap2 = perf_event__process_mmap2;
+ c.tool.comm = perf_event__process_comm;
+ c.tool.exit = perf_event__process_exit;
+ c.tool.fork = perf_event__process_fork;
+ c.tool.lost = perf_event__process_lost;
+ c.tool.tracing_data = perf_event__process_tracing_data;
+ c.tool.build_id = perf_event__process_build_id;
+ c.tool.namespaces = perf_event__process_namespaces;
+ c.tool.ordering_requires_timestamps = true;
+
if (opts->all) {
c.tool.comm = process_comm_event;
c.tool.exit = process_exit_event;
diff --git a/tools/perf/util/data-convert-json.c b/tools/perf/util/data-convert-json.c
index 3cf64f5b23ee..8304cd2d4a9c 100644
--- a/tools/perf/util/data-convert-json.c
+++ b/tools/perf/util/data-convert-json.c
@@ -28,7 +28,7 @@
#include "util/tool.h"
#ifdef HAVE_LIBTRACEEVENT
-#include <traceevent/event-parse.h>
+#include <event-parse.h>
#endif
struct convert_json {
@@ -118,7 +118,7 @@ static void output_json_key_format(FILE *out, bool comma, int depth,
va_end(args);
}
-static void output_sample_callchain_entry(struct perf_tool *tool,
+static void output_sample_callchain_entry(const struct perf_tool *tool,
u64 ip, struct addr_location *al)
{
struct convert_json *c = container_of(tool, struct convert_json, tool);
@@ -146,7 +146,7 @@ static void output_sample_callchain_entry(struct perf_tool *tool,
output_json_format(out, false, 4, "}");
}
-static int process_sample_event(struct perf_tool *tool,
+static int process_sample_event(const struct perf_tool *tool,
union perf_event *event __maybe_unused,
struct perf_sample *sample,
struct evsel *evsel __maybe_unused,
@@ -316,39 +316,36 @@ int bt_convert__perf2json(const char *input_name, const char *output_name,
struct perf_session *session;
int fd;
int ret = -1;
-
struct convert_json c = {
- .tool = {
- .sample = process_sample_event,
- .mmap = perf_event__process_mmap,
- .mmap2 = perf_event__process_mmap2,
- .comm = perf_event__process_comm,
- .namespaces = perf_event__process_namespaces,
- .cgroup = perf_event__process_cgroup,
- .exit = perf_event__process_exit,
- .fork = perf_event__process_fork,
- .lost = perf_event__process_lost,
-#ifdef HAVE_LIBTRACEEVENT
- .tracing_data = perf_event__process_tracing_data,
-#endif
- .build_id = perf_event__process_build_id,
- .id_index = perf_event__process_id_index,
- .auxtrace_info = perf_event__process_auxtrace_info,
- .auxtrace = perf_event__process_auxtrace,
- .event_update = perf_event__process_event_update,
- .ordered_events = true,
- .ordering_requires_timestamps = true,
- },
.first = true,
.events_count = 0,
};
-
struct perf_data data = {
.mode = PERF_DATA_MODE_READ,
.path = input_name,
.force = opts->force,
};
+ perf_tool__init(&c.tool, /*ordered_events=*/true);
+ c.tool.sample = process_sample_event;
+ c.tool.mmap = perf_event__process_mmap;
+ c.tool.mmap2 = perf_event__process_mmap2;
+ c.tool.comm = perf_event__process_comm;
+ c.tool.namespaces = perf_event__process_namespaces;
+ c.tool.cgroup = perf_event__process_cgroup;
+ c.tool.exit = perf_event__process_exit;
+ c.tool.fork = perf_event__process_fork;
+ c.tool.lost = perf_event__process_lost;
+#ifdef HAVE_LIBTRACEEVENT
+ c.tool.tracing_data = perf_event__process_tracing_data;
+#endif
+ c.tool.build_id = perf_event__process_build_id;
+ c.tool.id_index = perf_event__process_id_index;
+ c.tool.auxtrace_info = perf_event__process_auxtrace_info;
+ c.tool.auxtrace = perf_event__process_auxtrace;
+ c.tool.event_update = perf_event__process_event_update;
+ c.tool.ordering_requires_timestamps = true;
+
if (opts->all) {
pr_err("--all is currently unsupported for JSON output.\n");
goto err;
diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c
index 08c4bfbd817f..98661ede2a73 100644
--- a/tools/perf/util/data.c
+++ b/tools/perf/util/data.c
@@ -204,7 +204,12 @@ static bool check_pipe(struct perf_data *data)
data->file.fd = fd;
data->use_stdio = false;
}
- } else {
+
+ /*
+ * When is_pipe and data->file.fd is given, use given fd
+ * instead of STDIN_FILENO or STDOUT_FILENO
+ */
+ } else if (data->file.fd <= 0) {
data->file.fd = fd;
}
}
diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c
index d633d15329fa..995f6bb05b5f 100644
--- a/tools/perf/util/debug.c
+++ b/tools/perf/util/debug.c
@@ -27,7 +27,7 @@
#include <linux/ctype.h>
#ifdef HAVE_LIBTRACEEVENT
-#include <traceevent/event-parse.h>
+#include <event-parse.h>
#else
#define LIBTRACEEVENT_VERSION 0
#endif
diff --git a/tools/perf/util/debuginfo.h b/tools/perf/util/debuginfo.h
index 4d65b8c605fc..a52d69932815 100644
--- a/tools/perf/util/debuginfo.h
+++ b/tools/perf/util/debuginfo.h
@@ -5,7 +5,7 @@
#include <errno.h>
#include <linux/compiler.h>
-#ifdef HAVE_DWARF_SUPPORT
+#ifdef HAVE_LIBDW_SUPPORT
#include "dwarf-aux.h"
@@ -25,7 +25,7 @@ void debuginfo__delete(struct debuginfo *dbg);
int debuginfo__get_text_offset(struct debuginfo *dbg, Dwarf_Addr *offs,
bool adjust_offset);
-#else /* HAVE_DWARF_SUPPORT */
+#else /* HAVE_LIBDW_SUPPORT */
/* dummy debug information structure */
struct debuginfo {
@@ -40,6 +40,8 @@ static inline void debuginfo__delete(struct debuginfo *dbg __maybe_unused)
{
}
+typedef void Dwarf_Addr;
+
static inline int debuginfo__get_text_offset(struct debuginfo *dbg __maybe_unused,
Dwarf_Addr *offs __maybe_unused,
bool adjust_offset __maybe_unused)
@@ -47,7 +49,7 @@ static inline int debuginfo__get_text_offset(struct debuginfo *dbg __maybe_unuse
return -EINVAL;
}
-#endif /* HAVE_DWARF_SUPPORT */
+#endif /* HAVE_LIBDW_SUPPORT */
#ifdef HAVE_DEBUGINFOD_SUPPORT
int get_source_from_debuginfod(const char *raw_path, const char *sbuild_id,
diff --git a/tools/perf/util/disasm.c b/tools/perf/util/disasm.c
index e10558b79504..41a2b08670dc 100644
--- a/tools/perf/util/disasm.c
+++ b/tools/perf/util/disasm.c
@@ -12,10 +12,13 @@
#include <subcmd/run-command.h>
#include "annotate.h"
+#include "annotate-data.h"
#include "build-id.h"
#include "debug.h"
#include "disasm.h"
+#include "disasm_bpf.h"
#include "dso.h"
+#include "dwarf-regs.h"
#include "env.h"
#include "evsel.h"
#include "map.h"
@@ -35,6 +38,8 @@ static struct ins_ops mov_ops;
static struct ins_ops nop_ops;
static struct ins_ops lock_ops;
static struct ins_ops ret_ops;
+static struct ins_ops load_store_ops;
+static struct ins_ops arithmetic_ops;
static int jump__scnprintf(struct ins *ins, char *bf, size_t size,
struct ins_operands *ops, int max_ins_name);
@@ -43,6 +48,8 @@ static int call__scnprintf(struct ins *ins, char *bf, size_t size,
static void ins__sort(struct arch *arch);
static int disasm_line__parse(char *line, const char **namep, char **rawp);
+static int disasm_line__parse_powerpc(struct disasm_line *dl);
+static char *expand_tabs(char *line, char **storage, size_t *storage_len);
static __attribute__((constructor)) void symbol__init_regexpr(void)
{
@@ -145,10 +152,16 @@ static struct arch architectures[] = {
.memory_ref_char = '(',
.imm_char = '$',
},
+#ifdef HAVE_LIBDW_SUPPORT
+ .update_insn_state = update_insn_state_x86,
+#endif
},
{
.name = "powerpc",
.init = powerpc__annotate_init,
+#ifdef HAVE_LIBDW_SUPPORT
+ .update_insn_state = update_insn_state_powerpc,
+#endif
},
{
.name = "riscv64",
@@ -250,7 +263,8 @@ bool ins__is_fused(struct arch *arch, const char *ins1, const char *ins2)
return arch->ins_is_fused(arch, ins1, ins2);
}
-static int call__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms)
+static int call__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms,
+ struct disasm_line *dl __maybe_unused)
{
char *endptr, *tok, *name;
struct map *map = ms->map;
@@ -345,7 +359,8 @@ static inline const char *validate_comma(const char *c, struct ins_operands *ops
return c;
}
-static int jump__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms)
+static int jump__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms,
+ struct disasm_line *dl __maybe_unused)
{
struct map *map = ms->map;
struct symbol *sym = ms->sym;
@@ -504,7 +519,8 @@ static int comment__symbol(char *raw, char *comment, u64 *addrp, char **namep)
return 0;
}
-static int lock__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms)
+static int lock__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms,
+ struct disasm_line *dl __maybe_unused)
{
ops->locked.ops = zalloc(sizeof(*ops->locked.ops));
if (ops->locked.ops == NULL)
@@ -513,13 +529,13 @@ static int lock__parse(struct arch *arch, struct ins_operands *ops, struct map_s
if (disasm_line__parse(ops->raw, &ops->locked.ins.name, &ops->locked.ops->raw) < 0)
goto out_free_ops;
- ops->locked.ins.ops = ins__find(arch, ops->locked.ins.name);
+ ops->locked.ins.ops = ins__find(arch, ops->locked.ins.name, 0);
if (ops->locked.ins.ops == NULL)
goto out_free_ops;
if (ops->locked.ins.ops->parse &&
- ops->locked.ins.ops->parse(arch, ops->locked.ops, ms) < 0)
+ ops->locked.ins.ops->parse(arch, ops->locked.ops, ms, NULL) < 0)
goto out_free_ops;
return 0;
@@ -552,6 +568,7 @@ static void lock__delete(struct ins_operands *ops)
ins_ops__delete(ops->locked.ops);
zfree(&ops->locked.ops);
+ zfree(&ops->locked.ins.name);
zfree(&ops->target.raw);
zfree(&ops->target.name);
}
@@ -590,7 +607,8 @@ static bool check_multi_regs(struct arch *arch, const char *op)
return count > 1;
}
-static int mov__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms __maybe_unused)
+static int mov__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms __maybe_unused,
+ struct disasm_line *dl __maybe_unused)
{
char *s = strchr(ops->raw, ','), *target, *comment, prev;
@@ -668,7 +686,92 @@ static struct ins_ops mov_ops = {
.scnprintf = mov__scnprintf,
};
-static int dec__parse(struct arch *arch __maybe_unused, struct ins_operands *ops, struct map_symbol *ms __maybe_unused)
+#define PPC_22_30(R) (((R) >> 1) & 0x1ff)
+#define MINUS_EXT_XO_FORM 234
+#define SUB_EXT_XO_FORM 232
+#define ADD_ZERO_EXT_XO_FORM 202
+#define SUB_ZERO_EXT_XO_FORM 200
+
+static int arithmetic__scnprintf(struct ins *ins, char *bf, size_t size,
+ struct ins_operands *ops, int max_ins_name)
+{
+ return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name,
+ ops->raw);
+}
+
+/*
+ * Sets the fields: multi_regs and "mem_ref".
+ * "mem_ref" is set for ops->source which is later used to
+ * fill the objdump->memory_ref-char field. This ops is currently
+ * used by powerpc and since binary instruction code is used to
+ * extract opcode, regs and offset, no other parsing is needed here.
+ *
+ * Dont set multi regs for 4 cases since it has only one operand
+ * for source:
+ * - Add to Minus One Extended XO-form ( Ex: addme, addmeo )
+ * - Subtract From Minus One Extended XO-form ( Ex: subfme )
+ * - Add to Zero Extended XO-form ( Ex: addze, addzeo )
+ * - Subtract From Zero Extended XO-form ( Ex: subfze )
+ */
+static int arithmetic__parse(struct arch *arch __maybe_unused, struct ins_operands *ops,
+ struct map_symbol *ms __maybe_unused, struct disasm_line *dl)
+{
+ int opcode = PPC_OP(dl->raw.raw_insn);
+
+ ops->source.mem_ref = false;
+ if (opcode == 31) {
+ if ((opcode != MINUS_EXT_XO_FORM) && (opcode != SUB_EXT_XO_FORM) \
+ && (opcode != ADD_ZERO_EXT_XO_FORM) && (opcode != SUB_ZERO_EXT_XO_FORM))
+ ops->source.multi_regs = true;
+ }
+
+ ops->target.mem_ref = false;
+ ops->target.multi_regs = false;
+
+ return 0;
+}
+
+static struct ins_ops arithmetic_ops = {
+ .parse = arithmetic__parse,
+ .scnprintf = arithmetic__scnprintf,
+};
+
+static int load_store__scnprintf(struct ins *ins, char *bf, size_t size,
+ struct ins_operands *ops, int max_ins_name)
+{
+ return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name,
+ ops->raw);
+}
+
+/*
+ * Sets the fields: multi_regs and "mem_ref".
+ * "mem_ref" is set for ops->source which is later used to
+ * fill the objdump->memory_ref-char field. This ops is currently
+ * used by powerpc and since binary instruction code is used to
+ * extract opcode, regs and offset, no other parsing is needed here
+ */
+static int load_store__parse(struct arch *arch __maybe_unused, struct ins_operands *ops,
+ struct map_symbol *ms __maybe_unused, struct disasm_line *dl __maybe_unused)
+{
+ ops->source.mem_ref = true;
+ ops->source.multi_regs = false;
+ /* opcode 31 is of X form */
+ if (PPC_OP(dl->raw.raw_insn) == 31)
+ ops->source.multi_regs = true;
+
+ ops->target.mem_ref = false;
+ ops->target.multi_regs = false;
+
+ return 0;
+}
+
+static struct ins_ops load_store_ops = {
+ .parse = load_store__parse,
+ .scnprintf = load_store__scnprintf,
+};
+
+static int dec__parse(struct arch *arch __maybe_unused, struct ins_operands *ops, struct map_symbol *ms __maybe_unused,
+ struct disasm_line *dl __maybe_unused)
{
char *target, *comment, *s, prev;
@@ -758,11 +861,23 @@ static void ins__sort(struct arch *arch)
qsort(arch->instructions, nmemb, sizeof(struct ins), ins__cmp);
}
-static struct ins_ops *__ins__find(struct arch *arch, const char *name)
+static struct ins_ops *__ins__find(struct arch *arch, const char *name, struct disasm_line *dl)
{
struct ins *ins;
const int nmemb = arch->nr_instructions;
+ if (arch__is(arch, "powerpc")) {
+ /*
+ * For powerpc, identify the instruction ops
+ * from the opcode using raw_insn.
+ */
+ struct ins_ops *ops;
+
+ ops = check_ppc_insn(dl);
+ if (ops)
+ return ops;
+ }
+
if (!arch->sorted_instructions) {
ins__sort(arch);
arch->sorted_instructions = true;
@@ -792,9 +907,9 @@ static struct ins_ops *__ins__find(struct arch *arch, const char *name)
return ins ? ins->ops : NULL;
}
-struct ins_ops *ins__find(struct arch *arch, const char *name)
+struct ins_ops *ins__find(struct arch *arch, const char *name, struct disasm_line *dl)
{
- struct ins_ops *ops = __ins__find(arch, name);
+ struct ins_ops *ops = __ins__find(arch, name, dl);
if (!ops && arch->associate_instruction_ops)
ops = arch->associate_instruction_ops(arch, name);
@@ -804,12 +919,12 @@ struct ins_ops *ins__find(struct arch *arch, const char *name)
static void disasm_line__init_ins(struct disasm_line *dl, struct arch *arch, struct map_symbol *ms)
{
- dl->ins.ops = ins__find(arch, dl->ins.name);
+ dl->ins.ops = ins__find(arch, dl->ins.name, dl);
if (!dl->ins.ops)
return;
- if (dl->ins.ops->parse && dl->ins.ops->parse(arch, &dl->ops, ms) < 0)
+ if (dl->ins.ops->parse && dl->ins.ops->parse(arch, &dl->ops, ms, dl) < 0)
dl->ins.ops = NULL;
}
@@ -841,6 +956,51 @@ out:
return -1;
}
+/*
+ * Parses the result captured from symbol__disassemble_*
+ * Example, line read from DSO file in powerpc:
+ * line: 38 01 81 e8
+ * opcode: fetched from arch specific get_opcode_insn
+ * rawp_insn: e8810138
+ *
+ * rawp_insn is used later to extract the reg/offset fields
+ */
+#define PPC_OP(op) (((op) >> 26) & 0x3F)
+#define RAW_BYTES 11
+
+static int disasm_line__parse_powerpc(struct disasm_line *dl)
+{
+ char *line = dl->al.line;
+ const char **namep = &dl->ins.name;
+ char **rawp = &dl->ops.raw;
+ char *tmp_raw_insn, *name_raw_insn = skip_spaces(line);
+ char *name = skip_spaces(name_raw_insn + RAW_BYTES);
+ int objdump = 0;
+
+ if (strlen(line) > RAW_BYTES)
+ objdump = 1;
+
+ if (name_raw_insn[0] == '\0')
+ return -1;
+
+ if (objdump) {
+ disasm_line__parse(name, namep, rawp);
+ } else
+ *namep = "";
+
+ tmp_raw_insn = strndup(name_raw_insn, 11);
+ if (tmp_raw_insn == NULL)
+ return -1;
+
+ remove_spaces(tmp_raw_insn);
+
+ sscanf(tmp_raw_insn, "%x", &dl->raw.raw_insn);
+ if (objdump)
+ dl->raw.raw_insn = be32_to_cpu(dl->raw.raw_insn);
+
+ return 0;
+}
+
static void annotation_line__init(struct annotation_line *al,
struct annotate_args *args,
int nr)
@@ -857,6 +1017,7 @@ static void annotation_line__exit(struct annotation_line *al)
zfree_srcline(&al->path);
zfree(&al->line);
zfree(&al->cycles);
+ zfree(&al->br_cntr);
}
static size_t disasm_line_size(int nr)
@@ -880,10 +1041,8 @@ static size_t disasm_line_size(int nr)
struct disasm_line *disasm_line__new(struct annotate_args *args)
{
struct disasm_line *dl = NULL;
- int nr = 1;
-
- if (evsel__is_group_event(args->evsel))
- nr = args->evsel->core.nr_members;
+ struct annotation *notes = symbol__annotation(args->ms.sym);
+ int nr = notes->src->nr_events;
dl = zalloc(disasm_line_size(nr));
if (!dl)
@@ -894,7 +1053,10 @@ struct disasm_line *disasm_line__new(struct annotate_args *args)
goto out_delete;
if (args->offset != -1) {
- if (disasm_line__parse(dl->al.line, &dl->ins.name, &dl->ops.raw) < 0)
+ if (arch__is(args->arch, "powerpc")) {
+ if (disasm_line__parse_powerpc(dl) < 0)
+ goto out_free_line;
+ } else if (disasm_line__parse(dl->al.line, &dl->ins.name, &dl->ops.raw) < 0)
goto out_free_line;
disasm_line__init_ins(dl, args->arch, &args->ms);
@@ -1164,195 +1326,11 @@ fallback:
return 0;
}
-#if defined(HAVE_LIBBFD_SUPPORT) && defined(HAVE_LIBBPF_SUPPORT)
-#define PACKAGE "perf"
-#include <bfd.h>
-#include <dis-asm.h>
-#include <bpf/bpf.h>
-#include <bpf/btf.h>
-#include <bpf/libbpf.h>
-#include <linux/btf.h>
-#include <tools/dis-asm-compat.h>
-
-#include "bpf-event.h"
-#include "bpf-utils.h"
-
-static int symbol__disassemble_bpf(struct symbol *sym,
- struct annotate_args *args)
-{
- struct annotation *notes = symbol__annotation(sym);
- struct bpf_prog_linfo *prog_linfo = NULL;
- struct bpf_prog_info_node *info_node;
- int len = sym->end - sym->start;
- disassembler_ftype disassemble;
- struct map *map = args->ms.map;
- struct perf_bpil *info_linear;
- struct disassemble_info info;
- struct dso *dso = map__dso(map);
- int pc = 0, count, sub_id;
- struct btf *btf = NULL;
- char tpath[PATH_MAX];
- size_t buf_size;
- int nr_skip = 0;
- char *buf;
- bfd *bfdf;
- int ret;
- FILE *s;
-
- if (dso__binary_type(dso) != DSO_BINARY_TYPE__BPF_PROG_INFO)
- return SYMBOL_ANNOTATE_ERRNO__BPF_INVALID_FILE;
-
- pr_debug("%s: handling sym %s addr %" PRIx64 " len %" PRIx64 "\n", __func__,
- sym->name, sym->start, sym->end - sym->start);
-
- memset(tpath, 0, sizeof(tpath));
- perf_exe(tpath, sizeof(tpath));
-
- bfdf = bfd_openr(tpath, NULL);
- if (bfdf == NULL)
- abort();
-
- if (!bfd_check_format(bfdf, bfd_object))
- abort();
-
- s = open_memstream(&buf, &buf_size);
- if (!s) {
- ret = errno;
- goto out;
- }
- init_disassemble_info_compat(&info, s,
- (fprintf_ftype) fprintf,
- fprintf_styled);
- info.arch = bfd_get_arch(bfdf);
- info.mach = bfd_get_mach(bfdf);
-
- info_node = perf_env__find_bpf_prog_info(dso__bpf_prog(dso)->env,
- dso__bpf_prog(dso)->id);
- if (!info_node) {
- ret = SYMBOL_ANNOTATE_ERRNO__BPF_MISSING_BTF;
- goto out;
- }
- info_linear = info_node->info_linear;
- sub_id = dso__bpf_prog(dso)->sub_id;
-
- info.buffer = (void *)(uintptr_t)(info_linear->info.jited_prog_insns);
- info.buffer_length = info_linear->info.jited_prog_len;
-
- if (info_linear->info.nr_line_info)
- prog_linfo = bpf_prog_linfo__new(&info_linear->info);
-
- if (info_linear->info.btf_id) {
- struct btf_node *node;
-
- node = perf_env__find_btf(dso__bpf_prog(dso)->env,
- info_linear->info.btf_id);
- if (node)
- btf = btf__new((__u8 *)(node->data),
- node->data_size);
- }
-
- disassemble_init_for_target(&info);
-
-#ifdef DISASM_FOUR_ARGS_SIGNATURE
- disassemble = disassembler(info.arch,
- bfd_big_endian(bfdf),
- info.mach,
- bfdf);
-#else
- disassemble = disassembler(bfdf);
-#endif
- if (disassemble == NULL)
- abort();
-
- fflush(s);
- do {
- const struct bpf_line_info *linfo = NULL;
- struct disasm_line *dl;
- size_t prev_buf_size;
- const char *srcline;
- u64 addr;
-
- addr = pc + ((u64 *)(uintptr_t)(info_linear->info.jited_ksyms))[sub_id];
- count = disassemble(pc, &info);
-
- if (prog_linfo)
- linfo = bpf_prog_linfo__lfind_addr_func(prog_linfo,
- addr, sub_id,
- nr_skip);
-
- if (linfo && btf) {
- srcline = btf__name_by_offset(btf, linfo->line_off);
- nr_skip++;
- } else
- srcline = NULL;
-
- fprintf(s, "\n");
- prev_buf_size = buf_size;
- fflush(s);
-
- if (!annotate_opts.hide_src_code && srcline) {
- args->offset = -1;
- args->line = strdup(srcline);
- args->line_nr = 0;
- args->fileloc = NULL;
- args->ms.sym = sym;
- dl = disasm_line__new(args);
- if (dl) {
- annotation_line__add(&dl->al,
- &notes->src->source);
- }
- }
-
- args->offset = pc;
- args->line = buf + prev_buf_size;
- args->line_nr = 0;
- args->fileloc = NULL;
- args->ms.sym = sym;
- dl = disasm_line__new(args);
- if (dl)
- annotation_line__add(&dl->al, &notes->src->source);
-
- pc += count;
- } while (count > 0 && pc < len);
-
- ret = 0;
-out:
- free(prog_linfo);
- btf__free(btf);
- fclose(s);
- bfd_close(bfdf);
- return ret;
-}
-#else // defined(HAVE_LIBBFD_SUPPORT) && defined(HAVE_LIBBPF_SUPPORT)
-static int symbol__disassemble_bpf(struct symbol *sym __maybe_unused,
- struct annotate_args *args __maybe_unused)
-{
- return SYMBOL_ANNOTATE_ERRNO__NO_LIBOPCODES_FOR_BPF;
-}
-#endif // defined(HAVE_LIBBFD_SUPPORT) && defined(HAVE_LIBBPF_SUPPORT)
-
-static int
-symbol__disassemble_bpf_image(struct symbol *sym,
- struct annotate_args *args)
-{
- struct annotation *notes = symbol__annotation(sym);
- struct disasm_line *dl;
-
- args->offset = -1;
- args->line = strdup("to be implemented");
- args->line_nr = 0;
- args->fileloc = NULL;
- dl = disasm_line__new(args);
- if (dl)
- annotation_line__add(&dl->al, &notes->src->source);
-
- zfree(&args->line);
- return 0;
-}
-
#ifdef HAVE_LIBCAPSTONE_SUPPORT
#include <capstone/capstone.h>
+int capstone_init(struct machine *machine, csh *cs_handle, bool is64, bool disassembler_style);
+
static int open_capstone_handle(struct annotate_args *args, bool is_64bit,
csh *handle)
{
@@ -1378,7 +1356,9 @@ static int open_capstone_handle(struct annotate_args *args, bool is_64bit,
return 0;
}
+#endif
+#if defined(HAVE_LIBCAPSTONE_SUPPORT) || defined(HAVE_LIBLLVM_SUPPORT)
struct find_file_offset_data {
u64 ip;
u64 offset;
@@ -1396,6 +1376,64 @@ static int find_file_offset(u64 start, u64 len, u64 pgoff, void *arg)
return 0;
}
+static u8 *
+read_symbol(const char *filename, struct map *map, struct symbol *sym,
+ u64 *len, bool *is_64bit)
+{
+ struct dso *dso = map__dso(map);
+ struct nscookie nsc;
+ u64 start = map__rip_2objdump(map, sym->start);
+ u64 end = map__rip_2objdump(map, sym->end);
+ int fd, count;
+ u8 *buf = NULL;
+ struct find_file_offset_data data = {
+ .ip = start,
+ };
+
+ *is_64bit = false;
+
+ nsinfo__mountns_enter(dso__nsinfo(dso), &nsc);
+ fd = open(filename, O_RDONLY);
+ nsinfo__mountns_exit(&nsc);
+ if (fd < 0)
+ return NULL;
+
+ if (file__read_maps(fd, /*exe=*/true, find_file_offset, &data,
+ is_64bit) == 0)
+ goto err;
+
+ *len = end - start;
+ buf = malloc(*len);
+ if (buf == NULL)
+ goto err;
+
+ count = pread(fd, buf, *len, data.offset);
+ close(fd);
+ fd = -1;
+
+ if ((u64)count != *len)
+ goto err;
+
+ return buf;
+
+err:
+ if (fd >= 0)
+ close(fd);
+ free(buf);
+ return NULL;
+}
+#endif
+
+#if !defined(HAVE_LIBCAPSTONE_SUPPORT) || !defined(HAVE_LIBLLVM_SUPPORT)
+static void symbol__disassembler_missing(const char *disassembler, const char *filename,
+ struct symbol *sym)
+{
+ pr_debug("The %s disassembler isn't linked in for %s in %s\n",
+ disassembler, sym->name, filename);
+}
+#endif
+
+#ifdef HAVE_LIBCAPSTONE_SUPPORT
static void print_capstone_detail(cs_insn *insn, char *buf, size_t len,
struct annotate_args *args, u64 addr)
{
@@ -1453,7 +1491,7 @@ static void print_capstone_detail(cs_insn *insn, char *buf, size_t len,
}
}
-static int symbol__disassemble_capstone(char *filename, struct symbol *sym,
+static int symbol__disassemble_capstone_powerpc(char *filename, struct symbol *sym,
struct annotate_args *args)
{
struct annotation *notes = symbol__annotation(sym);
@@ -1472,9 +1510,10 @@ static int symbol__disassemble_capstone(char *filename, struct symbol *sym,
.ip = start,
};
csh handle;
- cs_insn *insn;
char disasm_buf[512];
struct disasm_line *dl;
+ u32 *line;
+ bool disassembler_style = false;
if (args->options->objdump_path)
return -1;
@@ -1489,7 +1528,11 @@ static int symbol__disassemble_capstone(char *filename, struct symbol *sym,
&is_64bit) == 0)
goto err;
- if (open_capstone_handle(args, is_64bit, &handle) < 0)
+ if (!args->options->disassembler_style ||
+ !strcmp(args->options->disassembler_style, "att"))
+ disassembler_style = true;
+
+ if (capstone_init(maps__machine(args->ms.maps), &handle, is_64bit, disassembler_style) < 0)
goto err;
needs_cs_close = true;
@@ -1505,6 +1548,99 @@ static int symbol__disassemble_capstone(char *filename, struct symbol *sym,
if ((u64)count != len)
goto err;
+ line = (u32 *)buf;
+
+ /* add the function address and name */
+ scnprintf(disasm_buf, sizeof(disasm_buf), "%#"PRIx64" <%s>:",
+ start, sym->name);
+
+ args->offset = -1;
+ args->line = disasm_buf;
+ args->line_nr = 0;
+ args->fileloc = NULL;
+ args->ms.sym = sym;
+
+ dl = disasm_line__new(args);
+ if (dl == NULL)
+ goto err;
+
+ annotation_line__add(&dl->al, &notes->src->source);
+
+ /*
+ * TODO: enable disassm for powerpc
+ * count = cs_disasm(handle, buf, len, start, len, &insn);
+ *
+ * For now, only binary code is saved in disassembled line
+ * to be used in "type" and "typeoff" sort keys. Each raw code
+ * is 32 bit instruction. So use "len/4" to get the number of
+ * entries.
+ */
+ count = len/4;
+
+ for (i = 0, offset = 0; i < count; i++) {
+ args->offset = offset;
+ sprintf(args->line, "%x", line[i]);
+
+ dl = disasm_line__new(args);
+ if (dl == NULL)
+ break;
+
+ annotation_line__add(&dl->al, &notes->src->source);
+
+ offset += 4;
+ }
+
+ /* It failed in the middle */
+ if (offset != len) {
+ struct list_head *list = &notes->src->source;
+
+ /* Discard all lines and fallback to objdump */
+ while (!list_empty(list)) {
+ dl = list_first_entry(list, struct disasm_line, al.node);
+
+ list_del_init(&dl->al.node);
+ disasm_line__free(dl);
+ }
+ count = -1;
+ }
+
+out:
+ if (needs_cs_close)
+ cs_close(&handle);
+ free(buf);
+ return count < 0 ? count : 0;
+
+err:
+ if (fd >= 0)
+ close(fd);
+ count = -1;
+ goto out;
+}
+
+static int symbol__disassemble_capstone(char *filename, struct symbol *sym,
+ struct annotate_args *args)
+{
+ struct annotation *notes = symbol__annotation(sym);
+ struct map *map = args->ms.map;
+ u64 start = map__rip_2objdump(map, sym->start);
+ u64 len;
+ u64 offset;
+ int i, count, free_count;
+ bool is_64bit = false;
+ bool needs_cs_close = false;
+ u8 *buf = NULL;
+ csh handle;
+ cs_insn *insn = NULL;
+ char disasm_buf[512];
+ struct disasm_line *dl;
+
+ if (args->options->objdump_path)
+ return -1;
+
+ buf = read_symbol(filename, map, sym, &len, &is_64bit);
+ if (buf == NULL)
+ return -1;
+
/* add the function address and name */
scnprintf(disasm_buf, sizeof(disasm_buf), "%#"PRIx64" <%s>:",
start, sym->name);
@@ -1521,7 +1657,12 @@ static int symbol__disassemble_capstone(char *filename, struct symbol *sym,
annotation_line__add(&dl->al, &notes->src->source);
- count = cs_disasm(handle, buf, len, start, len, &insn);
+ if (open_capstone_handle(args, is_64bit, &handle) < 0)
+ goto err;
+
+ needs_cs_close = true;
+
+ free_count = count = cs_disasm(handle, buf, len, start, len, &insn);
for (i = 0, offset = 0; i < count; i++) {
int printed;
@@ -1559,14 +1700,15 @@ static int symbol__disassemble_capstone(char *filename, struct symbol *sym,
}
out:
- if (needs_cs_close)
+ if (needs_cs_close) {
cs_close(&handle);
+ if (free_count > 0)
+ cs_free(insn, free_count);
+ }
free(buf);
return count < 0 ? count : 0;
err:
- if (fd >= 0)
- close(fd);
if (needs_cs_close) {
struct disasm_line *tmp;
@@ -1576,13 +1718,302 @@ err:
*/
list_for_each_entry_safe(dl, tmp, &notes->src->source, al.node) {
list_del(&dl->al.node);
- free(dl);
+ disasm_line__free(dl);
}
}
count = -1;
goto out;
}
-#endif
+#else // HAVE_LIBCAPSTONE_SUPPORT
+static int symbol__disassemble_capstone(char *filename, struct symbol *sym,
+ struct annotate_args *args __maybe_unused)
+{
+ symbol__disassembler_missing("capstone", filename, sym);
+ return -1;
+}
+
+static int symbol__disassemble_capstone_powerpc(char *filename, struct symbol *sym,
+ struct annotate_args *args __maybe_unused)
+{
+ symbol__disassembler_missing("capstone powerpc", filename, sym);
+ return -1;
+}
+#endif // HAVE_LIBCAPSTONE_SUPPORT
+
+static int symbol__disassemble_raw(char *filename, struct symbol *sym,
+ struct annotate_args *args)
+{
+ struct annotation *notes = symbol__annotation(sym);
+ struct map *map = args->ms.map;
+ struct dso *dso = map__dso(map);
+ u64 start = map__rip_2objdump(map, sym->start);
+ u64 end = map__rip_2objdump(map, sym->end);
+ u64 len = end - start;
+ u64 offset;
+ int i, count;
+ u8 *buf = NULL;
+ char disasm_buf[512];
+ struct disasm_line *dl;
+ u32 *line;
+
+ /* Return if objdump is specified explicitly */
+ if (args->options->objdump_path)
+ return -1;
+
+ pr_debug("Reading raw instruction from : %s using dso__data_read_offset\n", filename);
+
+ buf = malloc(len);
+ if (buf == NULL)
+ goto err;
+
+ count = dso__data_read_offset(dso, NULL, sym->start, buf, len);
+
+ line = (u32 *)buf;
+
+ if ((u64)count != len)
+ goto err;
+
+ /* add the function address and name */
+ scnprintf(disasm_buf, sizeof(disasm_buf), "%#"PRIx64" <%s>:",
+ start, sym->name);
+
+ args->offset = -1;
+ args->line = disasm_buf;
+ args->line_nr = 0;
+ args->fileloc = NULL;
+ args->ms.sym = sym;
+
+ dl = disasm_line__new(args);
+ if (dl == NULL)
+ goto err;
+
+ annotation_line__add(&dl->al, &notes->src->source);
+
+ /* Each raw instruction is 4 byte */
+ count = len/4;
+
+ for (i = 0, offset = 0; i < count; i++) {
+ args->offset = offset;
+ sprintf(args->line, "%x", line[i]);
+ dl = disasm_line__new(args);
+ if (dl == NULL)
+ break;
+
+ annotation_line__add(&dl->al, &notes->src->source);
+ offset += 4;
+ }
+
+ /* It failed in the middle */
+ if (offset != len) {
+ struct list_head *list = &notes->src->source;
+
+ /* Discard all lines and fallback to objdump */
+ while (!list_empty(list)) {
+ dl = list_first_entry(list, struct disasm_line, al.node);
+
+ list_del_init(&dl->al.node);
+ disasm_line__free(dl);
+ }
+ count = -1;
+ }
+
+out:
+ free(buf);
+ return count < 0 ? count : 0;
+
+err:
+ count = -1;
+ goto out;
+}
+
+#ifdef HAVE_LIBLLVM_SUPPORT
+#include <llvm-c/Disassembler.h>
+#include <llvm-c/Target.h>
+#include "util/llvm-c-helpers.h"
+
+struct symbol_lookup_storage {
+ u64 branch_addr;
+ u64 pcrel_load_addr;
+};
+
+/*
+ * Whenever LLVM wants to resolve an address into a symbol, it calls this
+ * callback. We don't ever actually _return_ anything (in particular, because
+ * it puts quotation marks around what we return), but we use this as a hint
+ * that there is a branch or PC-relative address in the expression that we
+ * should add some textual annotation for after the instruction. The caller
+ * will use this information to add the actual annotation.
+ */
+static const char *
+symbol_lookup_callback(void *disinfo, uint64_t value,
+ uint64_t *ref_type,
+ uint64_t address __maybe_unused,
+ const char **ref __maybe_unused)
+{
+ struct symbol_lookup_storage *storage = disinfo;
+
+ if (*ref_type == LLVMDisassembler_ReferenceType_In_Branch)
+ storage->branch_addr = value;
+ else if (*ref_type == LLVMDisassembler_ReferenceType_In_PCrel_Load)
+ storage->pcrel_load_addr = value;
+ *ref_type = LLVMDisassembler_ReferenceType_InOut_None;
+ return NULL;
+}
+
+static int symbol__disassemble_llvm(char *filename, struct symbol *sym,
+ struct annotate_args *args)
+{
+ struct annotation *notes = symbol__annotation(sym);
+ struct map *map = args->ms.map;
+ struct dso *dso = map__dso(map);
+ u64 start = map__rip_2objdump(map, sym->start);
+ u8 *buf;
+ u64 len;
+ u64 pc;
+ bool is_64bit;
+ char triplet[64];
+ char disasm_buf[2048];
+ size_t disasm_len;
+ struct disasm_line *dl;
+ LLVMDisasmContextRef disasm = NULL;
+ struct symbol_lookup_storage storage;
+ char *line_storage = NULL;
+ size_t line_storage_len = 0;
+ int ret = -1;
+
+ if (args->options->objdump_path)
+ return -1;
+
+ LLVMInitializeAllTargetInfos();
+ LLVMInitializeAllTargetMCs();
+ LLVMInitializeAllDisassemblers();
+
+ buf = read_symbol(filename, map, sym, &len, &is_64bit);
+ if (buf == NULL)
+ return -1;
+
+ if (arch__is(args->arch, "x86")) {
+ if (is_64bit)
+ scnprintf(triplet, sizeof(triplet), "x86_64-pc-linux");
+ else
+ scnprintf(triplet, sizeof(triplet), "i686-pc-linux");
+ } else {
+ scnprintf(triplet, sizeof(triplet), "%s-linux-gnu",
+ args->arch->name);
+ }
+
+ disasm = LLVMCreateDisasm(triplet, &storage, 0, NULL,
+ symbol_lookup_callback);
+ if (disasm == NULL)
+ goto err;
+
+ if (args->options->disassembler_style &&
+ !strcmp(args->options->disassembler_style, "intel"))
+ LLVMSetDisasmOptions(disasm,
+ LLVMDisassembler_Option_AsmPrinterVariant);
+
+ /*
+ * This needs to be set after AsmPrinterVariant, due to a bug in LLVM;
+ * setting AsmPrinterVariant makes a new instruction printer, making it
+ * forget about the PrintImmHex flag (which is applied before if both
+ * are given to the same call).
+ */
+ LLVMSetDisasmOptions(disasm, LLVMDisassembler_Option_PrintImmHex);
+
+ /* add the function address and name */
+ scnprintf(disasm_buf, sizeof(disasm_buf), "%#"PRIx64" <%s>:",
+ start, sym->name);
+
+ args->offset = -1;
+ args->line = disasm_buf;
+ args->line_nr = 0;
+ args->fileloc = NULL;
+ args->ms.sym = sym;
+
+ dl = disasm_line__new(args);
+ if (dl == NULL)
+ goto err;
+
+ annotation_line__add(&dl->al, &notes->src->source);
+
+ pc = start;
+ for (u64 offset = 0; offset < len; ) {
+ unsigned int ins_len;
+
+ storage.branch_addr = 0;
+ storage.pcrel_load_addr = 0;
+
+ ins_len = LLVMDisasmInstruction(disasm, buf + offset,
+ len - offset, pc,
+ disasm_buf, sizeof(disasm_buf));
+ if (ins_len == 0)
+ goto err;
+ disasm_len = strlen(disasm_buf);
+
+ if (storage.branch_addr != 0) {
+ char *name = llvm_name_for_code(dso, filename,
+ storage.branch_addr);
+ if (name != NULL) {
+ disasm_len += scnprintf(disasm_buf + disasm_len,
+ sizeof(disasm_buf) -
+ disasm_len,
+ " <%s>", name);
+ free(name);
+ }
+ }
+ if (storage.pcrel_load_addr != 0) {
+ char *name = llvm_name_for_data(dso, filename,
+ storage.pcrel_load_addr);
+ disasm_len += scnprintf(disasm_buf + disasm_len,
+ sizeof(disasm_buf) - disasm_len,
+ " # %#"PRIx64,
+ storage.pcrel_load_addr);
+ if (name) {
+ disasm_len += scnprintf(disasm_buf + disasm_len,
+ sizeof(disasm_buf) -
+ disasm_len,
+ " <%s>", name);
+ free(name);
+ }
+ }
+
+ args->offset = offset;
+ args->line = expand_tabs(disasm_buf, &line_storage,
+ &line_storage_len);
+ args->line_nr = 0;
+ args->fileloc = NULL;
+ args->ms.sym = sym;
+
+ llvm_addr2line(filename, pc, &args->fileloc,
+ (unsigned int *)&args->line_nr, false, NULL);
+
+ dl = disasm_line__new(args);
+ if (dl == NULL)
+ goto err;
+
+ annotation_line__add(&dl->al, &notes->src->source);
+
+ free(args->fileloc);
+ pc += ins_len;
+ offset += ins_len;
+ }
+
+ ret = 0;
+
+err:
+ LLVMDisasmDispose(disasm);
+ free(buf);
+ free(line_storage);
+ return ret;
+}
+#else // HAVE_LIBLLVM_SUPPORT
+static int symbol__disassemble_llvm(char *filename, struct symbol *sym,
+ struct annotate_args *args __maybe_unused)
+{
+ symbol__disassembler_missing("LLVM", filename, sym);
+ return -1;
+}
+#endif // HAVE_LIBLLVM_SUPPORT
/*
* Possibly create a new version of line with tabs expanded. Returns the
@@ -1644,17 +2075,14 @@ static char *expand_tabs(char *line, char **storage, size_t *storage_len)
return new_line;
}
-int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
+static int symbol__disassemble_objdump(const char *filename, struct symbol *sym,
+ struct annotate_args *args)
{
struct annotation_options *opts = &annotate_opts;
struct map *map = args->ms.map;
struct dso *dso = map__dso(map);
char *command;
FILE *file;
- char symfs_filename[PATH_MAX];
- struct kcore_extract kce;
- bool delete_extract = false;
- bool decomp = false;
int lineno = 0;
char *fileloc = NULL;
int nline;
@@ -1669,50 +2097,7 @@ int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
NULL,
};
struct child_process objdump_process;
- int err = dso__disassemble_filename(dso, symfs_filename, sizeof(symfs_filename));
-
- if (err)
- return err;
-
- pr_debug("%s: filename=%s, sym=%s, start=%#" PRIx64 ", end=%#" PRIx64 "\n", __func__,
- symfs_filename, sym->name, map__unmap_ip(map, sym->start),
- map__unmap_ip(map, sym->end));
-
- pr_debug("annotating [%p] %30s : [%p] %30s\n",
- dso, dso__long_name(dso), sym, sym->name);
-
- if (dso__binary_type(dso) == DSO_BINARY_TYPE__BPF_PROG_INFO) {
- return symbol__disassemble_bpf(sym, args);
- } else if (dso__binary_type(dso) == DSO_BINARY_TYPE__BPF_IMAGE) {
- return symbol__disassemble_bpf_image(sym, args);
- } else if (dso__binary_type(dso) == DSO_BINARY_TYPE__NOT_FOUND) {
- return -1;
- } else if (dso__is_kcore(dso)) {
- kce.kcore_filename = symfs_filename;
- kce.addr = map__rip_2objdump(map, sym->start);
- kce.offs = sym->start;
- kce.len = sym->end - sym->start;
- if (!kcore_extract__create(&kce)) {
- delete_extract = true;
- strlcpy(symfs_filename, kce.extract_filename,
- sizeof(symfs_filename));
- }
- } else if (dso__needs_decompress(dso)) {
- char tmp[KMOD_DECOMP_LEN];
-
- if (dso__decompress_kmodule_path(dso, symfs_filename,
- tmp, sizeof(tmp)) < 0)
- return -1;
-
- decomp = true;
- strcpy(symfs_filename, tmp);
- }
-
-#ifdef HAVE_LIBCAPSTONE_SUPPORT
- err = symbol__disassemble_capstone(symfs_filename, sym, args);
- if (err == 0)
- goto out_remove_tmp;
-#endif
+ int err;
err = asprintf(&command,
"%s %s%s --start-address=0x%016" PRIx64
@@ -1735,13 +2120,13 @@ int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
if (err < 0) {
pr_err("Failure allocating memory for the command to run\n");
- goto out_remove_tmp;
+ return err;
}
pr_debug("Executing: %s\n", command);
objdump_argv[2] = command;
- objdump_argv[4] = symfs_filename;
+ objdump_argv[4] = filename;
/* Create a pipe to read from for stdout */
memset(&objdump_process, 0, sizeof(objdump_process));
@@ -1779,8 +2164,8 @@ int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
break;
/* Skip lines containing "filename:" */
- match = strstr(line, symfs_filename);
- if (match && match[strlen(symfs_filename)] == ':')
+ match = strstr(line, filename);
+ if (match && match[strlen(filename)] == ':')
continue;
expanded_line = strim(line);
@@ -1825,7 +2210,150 @@ out_close_stdout:
out_free_command:
free(command);
+ return err;
+}
+
+static int annotation_options__init_disassemblers(struct annotation_options *options)
+{
+ char *disassembler;
+
+ if (options->disassemblers_str == NULL) {
+ const char *default_disassemblers_str =
+#ifdef HAVE_LIBLLVM_SUPPORT
+ "llvm,"
+#endif
+#ifdef HAVE_LIBCAPSTONE_SUPPORT
+ "capstone,"
+#endif
+ "objdump";
+
+ options->disassemblers_str = strdup(default_disassemblers_str);
+ if (!options->disassemblers_str)
+ goto out_enomem;
+ }
+
+ disassembler = strdup(options->disassemblers_str);
+ if (disassembler == NULL)
+ goto out_enomem;
+
+ while (1) {
+ char *comma = strchr(disassembler, ',');
+
+ if (comma != NULL)
+ *comma = '\0';
+
+ options->disassemblers[options->nr_disassemblers++] = strim(disassembler);
+
+ if (comma == NULL)
+ break;
+
+ disassembler = comma + 1;
+
+ if (options->nr_disassemblers >= MAX_DISASSEMBLERS) {
+ pr_debug("annotate.disassemblers can have at most %d entries, ignoring \"%s\"\n",
+ MAX_DISASSEMBLERS, disassembler);
+ break;
+ }
+ }
+
+ return 0;
+
+out_enomem:
+ pr_err("Not enough memory for annotate.disassemblers\n");
+ return -1;
+}
+
+int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
+{
+ struct annotation_options *options = args->options;
+ struct map *map = args->ms.map;
+ struct dso *dso = map__dso(map);
+ char symfs_filename[PATH_MAX];
+ bool delete_extract = false;
+ struct kcore_extract kce;
+ const char *disassembler;
+ bool decomp = false;
+ int err = dso__disassemble_filename(dso, symfs_filename, sizeof(symfs_filename));
+
+ if (err)
+ return err;
+
+ pr_debug("%s: filename=%s, sym=%s, start=%#" PRIx64 ", end=%#" PRIx64 "\n", __func__,
+ symfs_filename, sym->name, map__unmap_ip(map, sym->start),
+ map__unmap_ip(map, sym->end));
+
+ pr_debug("annotating [%p] %30s : [%p] %30s\n", dso, dso__long_name(dso), sym, sym->name);
+
+ if (dso__binary_type(dso) == DSO_BINARY_TYPE__BPF_PROG_INFO) {
+ return symbol__disassemble_bpf(sym, args);
+ } else if (dso__binary_type(dso) == DSO_BINARY_TYPE__BPF_IMAGE) {
+ return symbol__disassemble_bpf_image(sym, args);
+ } else if (dso__binary_type(dso) == DSO_BINARY_TYPE__NOT_FOUND) {
+ return -1;
+ } else if (dso__is_kcore(dso)) {
+ kce.addr = map__rip_2objdump(map, sym->start);
+ kce.kcore_filename = symfs_filename;
+ kce.len = sym->end - sym->start;
+ kce.offs = sym->start;
+
+ if (!kcore_extract__create(&kce)) {
+ delete_extract = true;
+ strlcpy(symfs_filename, kce.extract_filename, sizeof(symfs_filename));
+ }
+ } else if (dso__needs_decompress(dso)) {
+ char tmp[KMOD_DECOMP_LEN];
+
+ if (dso__decompress_kmodule_path(dso, symfs_filename, tmp, sizeof(tmp)) < 0)
+ return -1;
+
+ decomp = true;
+ strcpy(symfs_filename, tmp);
+ }
+ /*
+ * For powerpc data type profiling, use the dso__data_read_offset to
+ * read raw instruction directly and interpret the binary code to
+ * understand instructions and register fields. For sort keys as type
+ * and typeoff, disassemble to mnemonic notation is not required in
+ * case of powerpc.
+ */
+ if (arch__is(args->arch, "powerpc")) {
+ extern const char *sort_order;
+
+ if (sort_order && !strstr(sort_order, "sym")) {
+ err = symbol__disassemble_raw(symfs_filename, sym, args);
+ if (err == 0)
+ goto out_remove_tmp;
+
+ err = symbol__disassemble_capstone_powerpc(symfs_filename, sym, args);
+ if (err == 0)
+ goto out_remove_tmp;
+ }
+ }
+
+ err = annotation_options__init_disassemblers(options);
+ if (err)
+ goto out_remove_tmp;
+
+ err = -1;
+
+ for (int i = 0; i < options->nr_disassemblers && err != 0; ++i) {
+ disassembler = options->disassemblers[i];
+
+ if (!strcmp(disassembler, "llvm"))
+ err = symbol__disassemble_llvm(symfs_filename, sym, args);
+ else if (!strcmp(disassembler, "capstone"))
+ err = symbol__disassemble_capstone(symfs_filename, sym, args);
+ else if (!strcmp(disassembler, "objdump"))
+ err = symbol__disassemble_objdump(symfs_filename, sym, args);
+ else
+ pr_debug("Unknown disassembler %s, skipping...\n", disassembler);
+ }
+
+ if (err == 0) {
+ pr_debug("Disassembled with %s\nannotate.disassemblers=%s\n",
+ disassembler, options->disassemblers_str);
+ }
out_remove_tmp:
if (decomp)
unlink(symfs_filename);
diff --git a/tools/perf/util/disasm.h b/tools/perf/util/disasm.h
index 3d381a043520..c135db2416b5 100644
--- a/tools/perf/util/disasm.h
+++ b/tools/perf/util/disasm.h
@@ -4,11 +4,18 @@
#include "map_symbol.h"
+#ifdef HAVE_LIBDW_SUPPORT
+#include "dwarf-aux.h"
+#endif
+
struct annotation_options;
struct disasm_line;
struct ins;
struct evsel;
struct symbol;
+struct data_loc_info;
+struct type_state;
+struct disasm_line;
struct arch {
const char *name;
@@ -32,6 +39,15 @@ struct arch {
char memory_ref_char;
char imm_char;
} objdump;
+#ifdef HAVE_LIBDW_SUPPORT
+ void (*update_insn_state)(struct type_state *state,
+ struct data_loc_info *dloc, Dwarf_Die *cu_die,
+ struct disasm_line *dl);
+#endif
+ /** @e_machine: ELF machine associated with arch. */
+ unsigned int e_machine;
+ /** @e_flags: Optional ELF flags associated with arch. */
+ unsigned int e_flags;
};
struct ins {
@@ -50,6 +66,7 @@ struct ins_operands {
bool offset_avail;
bool outside;
bool multi_regs;
+ bool mem_ref;
} target;
union {
struct {
@@ -57,6 +74,7 @@ struct ins_operands {
char *name;
u64 addr;
bool multi_regs;
+ bool mem_ref;
} source;
struct {
struct ins ins;
@@ -71,7 +89,8 @@ struct ins_operands {
struct ins_ops {
void (*free)(struct ins_operands *ops);
- int (*parse)(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms);
+ int (*parse)(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms,
+ struct disasm_line *dl);
int (*scnprintf)(struct ins *ins, char *bf, size_t size,
struct ins_operands *ops, int max_ins_name);
};
@@ -90,7 +109,7 @@ struct annotate_args {
struct arch *arch__find(const char *name);
bool arch__is(struct arch *arch, const char *name);
-struct ins_ops *ins__find(struct arch *arch, const char *name);
+struct ins_ops *ins__find(struct arch *arch, const char *name, struct disasm_line *dl);
int ins__scnprintf(struct ins *ins, char *bf, size_t size,
struct ins_operands *ops, int max_ins_name);
diff --git a/tools/perf/util/disasm_bpf.c b/tools/perf/util/disasm_bpf.c
new file mode 100644
index 000000000000..1fee71c79b62
--- /dev/null
+++ b/tools/perf/util/disasm_bpf.c
@@ -0,0 +1,195 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include "util/annotate.h"
+#include "util/disasm_bpf.h"
+#include "util/symbol.h"
+#include <linux/zalloc.h>
+#include <string.h>
+
+#if defined(HAVE_LIBBFD_SUPPORT) && defined(HAVE_LIBBPF_SUPPORT)
+#define PACKAGE "perf"
+#include <bfd.h>
+#include <bpf/bpf.h>
+#include <bpf/btf.h>
+#include <bpf/libbpf.h>
+#include <dis-asm.h>
+#include <errno.h>
+#include <linux/btf.h>
+#include <tools/dis-asm-compat.h>
+
+#include "util/bpf-event.h"
+#include "util/bpf-utils.h"
+#include "util/debug.h"
+#include "util/dso.h"
+#include "util/map.h"
+#include "util/env.h"
+#include "util/util.h"
+
+int symbol__disassemble_bpf(struct symbol *sym, struct annotate_args *args)
+{
+ struct annotation *notes = symbol__annotation(sym);
+ struct bpf_prog_linfo *prog_linfo = NULL;
+ struct bpf_prog_info_node *info_node;
+ int len = sym->end - sym->start;
+ disassembler_ftype disassemble;
+ struct map *map = args->ms.map;
+ struct perf_bpil *info_linear;
+ struct disassemble_info info;
+ struct dso *dso = map__dso(map);
+ int pc = 0, count, sub_id;
+ struct btf *btf = NULL;
+ char tpath[PATH_MAX];
+ size_t buf_size;
+ int nr_skip = 0;
+ char *buf;
+ bfd *bfdf;
+ int ret;
+ FILE *s;
+
+ if (dso__binary_type(dso) != DSO_BINARY_TYPE__BPF_PROG_INFO)
+ return SYMBOL_ANNOTATE_ERRNO__BPF_INVALID_FILE;
+
+ pr_debug("%s: handling sym %s addr %" PRIx64 " len %" PRIx64 "\n", __func__,
+ sym->name, sym->start, sym->end - sym->start);
+
+ memset(tpath, 0, sizeof(tpath));
+ perf_exe(tpath, sizeof(tpath));
+
+ bfdf = bfd_openr(tpath, NULL);
+ if (bfdf == NULL)
+ abort();
+
+ if (!bfd_check_format(bfdf, bfd_object))
+ abort();
+
+ s = open_memstream(&buf, &buf_size);
+ if (!s) {
+ ret = errno;
+ goto out;
+ }
+ init_disassemble_info_compat(&info, s,
+ (fprintf_ftype) fprintf,
+ fprintf_styled);
+ info.arch = bfd_get_arch(bfdf);
+ info.mach = bfd_get_mach(bfdf);
+
+ info_node = perf_env__find_bpf_prog_info(dso__bpf_prog(dso)->env,
+ dso__bpf_prog(dso)->id);
+ if (!info_node) {
+ ret = SYMBOL_ANNOTATE_ERRNO__BPF_MISSING_BTF;
+ goto out;
+ }
+ info_linear = info_node->info_linear;
+ sub_id = dso__bpf_prog(dso)->sub_id;
+
+ info.buffer = (void *)(uintptr_t)(info_linear->info.jited_prog_insns);
+ info.buffer_length = info_linear->info.jited_prog_len;
+
+ if (info_linear->info.nr_line_info)
+ prog_linfo = bpf_prog_linfo__new(&info_linear->info);
+
+ if (info_linear->info.btf_id) {
+ struct btf_node *node;
+
+ node = perf_env__find_btf(dso__bpf_prog(dso)->env,
+ info_linear->info.btf_id);
+ if (node)
+ btf = btf__new((__u8 *)(node->data),
+ node->data_size);
+ }
+
+ disassemble_init_for_target(&info);
+
+#ifdef DISASM_FOUR_ARGS_SIGNATURE
+ disassemble = disassembler(info.arch,
+ bfd_big_endian(bfdf),
+ info.mach,
+ bfdf);
+#else
+ disassemble = disassembler(bfdf);
+#endif
+ if (disassemble == NULL)
+ abort();
+
+ fflush(s);
+ do {
+ const struct bpf_line_info *linfo = NULL;
+ struct disasm_line *dl;
+ size_t prev_buf_size;
+ const char *srcline;
+ u64 addr;
+
+ addr = pc + ((u64 *)(uintptr_t)(info_linear->info.jited_ksyms))[sub_id];
+ count = disassemble(pc, &info);
+
+ if (prog_linfo)
+ linfo = bpf_prog_linfo__lfind_addr_func(prog_linfo,
+ addr, sub_id,
+ nr_skip);
+
+ if (linfo && btf) {
+ srcline = btf__name_by_offset(btf, linfo->line_off);
+ nr_skip++;
+ } else
+ srcline = NULL;
+
+ fprintf(s, "\n");
+ prev_buf_size = buf_size;
+ fflush(s);
+
+ if (!annotate_opts.hide_src_code && srcline) {
+ args->offset = -1;
+ args->line = strdup(srcline);
+ args->line_nr = 0;
+ args->fileloc = NULL;
+ args->ms.sym = sym;
+ dl = disasm_line__new(args);
+ if (dl) {
+ annotation_line__add(&dl->al,
+ &notes->src->source);
+ }
+ }
+
+ args->offset = pc;
+ args->line = buf + prev_buf_size;
+ args->line_nr = 0;
+ args->fileloc = NULL;
+ args->ms.sym = sym;
+ dl = disasm_line__new(args);
+ if (dl)
+ annotation_line__add(&dl->al, &notes->src->source);
+
+ pc += count;
+ } while (count > 0 && pc < len);
+
+ ret = 0;
+out:
+ free(prog_linfo);
+ btf__free(btf);
+ fclose(s);
+ bfd_close(bfdf);
+ return ret;
+}
+#else // defined(HAVE_LIBBFD_SUPPORT) && defined(HAVE_LIBBPF_SUPPORT)
+int symbol__disassemble_bpf(struct symbol *sym __maybe_unused, struct annotate_args *args __maybe_unused)
+{
+ return SYMBOL_ANNOTATE_ERRNO__NO_LIBOPCODES_FOR_BPF;
+}
+#endif // defined(HAVE_LIBBFD_SUPPORT) && defined(HAVE_LIBBPF_SUPPORT)
+
+int symbol__disassemble_bpf_image(struct symbol *sym, struct annotate_args *args)
+{
+ struct annotation *notes = symbol__annotation(sym);
+ struct disasm_line *dl;
+
+ args->offset = -1;
+ args->line = strdup("to be implemented");
+ args->line_nr = 0;
+ args->fileloc = NULL;
+ dl = disasm_line__new(args);
+ if (dl)
+ annotation_line__add(&dl->al, &notes->src->source);
+
+ zfree(&args->line);
+ return 0;
+}
diff --git a/tools/perf/util/disasm_bpf.h b/tools/perf/util/disasm_bpf.h
new file mode 100644
index 000000000000..2ecb19545388
--- /dev/null
+++ b/tools/perf/util/disasm_bpf.h
@@ -0,0 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#ifndef __PERF_DISASM_BPF_H
+#define __PERF_DISASM_BPF_H
+
+struct symbol;
+struct annotate_args;
+
+int symbol__disassemble_bpf(struct symbol *sym, struct annotate_args *args);
+int symbol__disassemble_bpf_image(struct symbol *sym, struct annotate_args *args);
+
+#endif /* __PERF_DISASM_BPF_H */
diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index 67414944f245..5c6e85fdae0d 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -1327,7 +1327,7 @@ bool dso_id__empty(const struct dso_id *id)
return !id->maj && !id->min && !id->ino && !id->ino_generation;
}
-void __dso__inject_id(struct dso *dso, struct dso_id *id)
+void __dso__inject_id(struct dso *dso, const struct dso_id *id)
{
struct dsos *dsos = dso__dsos(dso);
struct dso_id *dso_id = dso__id(dso);
@@ -1417,7 +1417,7 @@ void dso__set_sorted_by_name(struct dso *dso)
RC_CHK_ACCESS(dso)->sorted_by_name = true;
}
-struct dso *dso__new_id(const char *name, struct dso_id *id)
+struct dso *dso__new_id(const char *name, const struct dso_id *id)
{
RC_STRUCT(dso) *dso = zalloc(sizeof(*dso) + strlen(name) + 1);
struct dso *res;
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
index ed0068251c65..bb8e8f444054 100644
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h
@@ -640,14 +640,14 @@ static inline void dso__set_text_offset(struct dso *dso, u64 val)
int dso_id__cmp(const struct dso_id *a, const struct dso_id *b);
bool dso_id__empty(const struct dso_id *id);
-struct dso *dso__new_id(const char *name, struct dso_id *id);
+struct dso *dso__new_id(const char *name, const struct dso_id *id);
struct dso *dso__new(const char *name);
void dso__delete(struct dso *dso);
int dso__cmp_id(struct dso *a, struct dso *b);
void dso__set_short_name(struct dso *dso, const char *name, bool name_allocated);
void dso__set_long_name(struct dso *dso, const char *name, bool name_allocated);
-void __dso__inject_id(struct dso *dso, struct dso_id *id);
+void __dso__inject_id(struct dso *dso, const struct dso_id *id);
int dso__name_len(const struct dso *dso);
diff --git a/tools/perf/util/dsos.c b/tools/perf/util/dsos.c
index d4acdb37f046..e0998e2a7c4e 100644
--- a/tools/perf/util/dsos.c
+++ b/tools/perf/util/dsos.c
@@ -155,7 +155,7 @@ static int dsos__cmp_key_long_name_id(const void *vkey, const void *vdso)
*/
static struct dso *__dsos__find_by_longname_id(struct dsos *dsos,
const char *name,
- struct dso_id *id,
+ const struct dso_id *id,
bool write_locked)
{
struct dsos__key key = {
@@ -244,7 +244,7 @@ int dsos__add(struct dsos *dsos, struct dso *dso)
struct dsos__find_id_cb_args {
const char *name;
- struct dso_id *id;
+ const struct dso_id *id;
struct dso *res;
};
@@ -260,7 +260,7 @@ static int dsos__find_id_cb(struct dso *dso, void *data)
}
-static struct dso *__dsos__find_id(struct dsos *dsos, const char *name, struct dso_id *id,
+static struct dso *__dsos__find_id(struct dsos *dsos, const char *name, const struct dso_id *id,
bool cmp_short, bool write_locked)
{
struct dso *res;
@@ -321,7 +321,7 @@ static void dso__set_basename(struct dso *dso)
dso__set_short_name(dso, base, true);
}
-static struct dso *__dsos__addnew_id(struct dsos *dsos, const char *name, struct dso_id *id)
+static struct dso *__dsos__addnew_id(struct dsos *dsos, const char *name, const struct dso_id *id)
{
struct dso *dso = dso__new_id(name, id);
@@ -337,7 +337,7 @@ static struct dso *__dsos__addnew_id(struct dsos *dsos, const char *name, struct
return dso;
}
-static struct dso *__dsos__findnew_id(struct dsos *dsos, const char *name, struct dso_id *id)
+static struct dso *__dsos__findnew_id(struct dsos *dsos, const char *name, const struct dso_id *id)
{
struct dso *dso = __dsos__find_id(dsos, name, id, false, /*write_locked=*/true);
@@ -347,7 +347,7 @@ static struct dso *__dsos__findnew_id(struct dsos *dsos, const char *name, struc
return dso ? dso : __dsos__addnew_id(dsos, name, id);
}
-struct dso *dsos__findnew_id(struct dsos *dsos, const char *name, struct dso_id *id)
+struct dso *dsos__findnew_id(struct dsos *dsos, const char *name, const struct dso_id *id)
{
struct dso *dso;
down_write(&dsos->lock);
diff --git a/tools/perf/util/dsos.h b/tools/perf/util/dsos.h
index 6c13b65648bc..a26774950866 100644
--- a/tools/perf/util/dsos.h
+++ b/tools/perf/util/dsos.h
@@ -32,7 +32,7 @@ int __dsos__add(struct dsos *dsos, struct dso *dso);
int dsos__add(struct dsos *dsos, struct dso *dso);
struct dso *dsos__find(struct dsos *dsos, const char *name, bool cmp_short);
-struct dso *dsos__findnew_id(struct dsos *dsos, const char *name, struct dso_id *id);
+struct dso *dsos__findnew_id(struct dsos *dsos, const char *name, const struct dso_id *id);
bool dsos__read_build_ids(struct dsos *dsos, bool with_hits);
diff --git a/tools/perf/util/dump-insn.c b/tools/perf/util/dump-insn.c
index 2bd8585db93c..c1cc0ade48d0 100644
--- a/tools/perf/util/dump-insn.c
+++ b/tools/perf/util/dump-insn.c
@@ -15,7 +15,7 @@ const char *dump_insn(struct perf_insn *x __maybe_unused,
}
__weak
-int arch_is_branch(const unsigned char *buf __maybe_unused,
+int arch_is_uncond_branch(const unsigned char *buf __maybe_unused,
size_t len __maybe_unused,
int x86_64 __maybe_unused)
{
diff --git a/tools/perf/util/dump-insn.h b/tools/perf/util/dump-insn.h
index 4a7797dd6d09..20d4d7bb5275 100644
--- a/tools/perf/util/dump-insn.h
+++ b/tools/perf/util/dump-insn.h
@@ -21,6 +21,6 @@ struct perf_insn {
const char *dump_insn(struct perf_insn *x, u64 ip,
u8 *inbuf, int inlen, int *lenp);
-int arch_is_branch(const unsigned char *buf, size_t len, int x86_64);
+int arch_is_uncond_branch(const unsigned char *buf, size_t len, int x86_64);
#endif
diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c
index 44ef968a7ad3..559c953ca172 100644
--- a/tools/perf/util/dwarf-aux.c
+++ b/tools/perf/util/dwarf-aux.c
@@ -267,7 +267,7 @@ Dwarf_Die *die_get_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem)
}
/* Get a type die, but skip qualifiers */
-static Dwarf_Die *__die_get_real_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem)
+Dwarf_Die *__die_get_real_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem)
{
int tag;
@@ -1182,7 +1182,6 @@ int die_get_varname(Dwarf_Die *vr_die, struct strbuf *buf)
return ret < 0 ? ret : strbuf_addf(buf, "\t%s", dwarf_diename(vr_die));
}
-#if defined(HAVE_DWARF_GETLOCATIONS_SUPPORT) || defined(HAVE_DWARF_CFI_SUPPORT)
static int reg_from_dwarf_op(Dwarf_Op *op)
{
switch (op->atom) {
@@ -1245,9 +1244,7 @@ static bool check_allowed_ops(Dwarf_Op *ops, size_t nops)
}
return true;
}
-#endif /* HAVE_DWARF_GETLOCATIONS_SUPPORT || HAVE_DWARF_CFI_SUPPORT */
-#ifdef HAVE_DWARF_GETLOCATIONS_SUPPORT
/**
* die_get_var_innermost_scope - Get innermost scope range of given variable DIE
* @sp_die: a subprogram DIE
@@ -1444,7 +1441,7 @@ static int __die_find_var_reg_cb(Dwarf_Die *die_mem, void *arg)
while ((off = dwarf_getlocations(&attr, off, &base, &start, &end, &ops, &nops)) > 0) {
/* Assuming the location list is sorted by address */
- if (end < data->pc)
+ if (end <= data->pc)
continue;
if (start > data->pc)
break;
@@ -1598,6 +1595,9 @@ static int __die_collect_vars_cb(Dwarf_Die *die_mem, void *arg)
if (dwarf_getlocations(&attr, 0, &base, &start, &end, &ops, &nops) <= 0)
return DIE_FIND_CB_SIBLING;
+ if (!check_allowed_ops(ops, nops))
+ return DIE_FIND_CB_SIBLING;
+
if (die_get_real_type(die_mem, &type_die) == NULL)
return DIE_FIND_CB_SIBLING;
@@ -1694,9 +1694,7 @@ void die_collect_global_vars(Dwarf_Die *cu_die, struct die_var_type **var_types)
die_find_child(cu_die, __die_collect_global_vars_cb, (void *)var_types, &die_mem);
}
-#endif /* HAVE_DWARF_GETLOCATIONS_SUPPORT */
-#ifdef HAVE_DWARF_CFI_SUPPORT
/**
* die_get_cfa - Get frame base information
* @dwarf: a Dwarf info
@@ -1729,7 +1727,6 @@ int die_get_cfa(Dwarf *dwarf, u64 pc, int *preg, int *poffset)
}
return -1;
}
-#endif /* HAVE_DWARF_CFI_SUPPORT */
/*
* die_has_loclist - Check if DW_AT_location of @vr_die is a location list
@@ -1974,8 +1971,15 @@ static int __die_find_member_offset_cb(Dwarf_Die *die_mem, void *arg)
return DIE_FIND_CB_SIBLING;
/* Unions might not have location */
- if (die_get_data_member_location(die_mem, &loc) < 0)
- loc = 0;
+ if (die_get_data_member_location(die_mem, &loc) < 0) {
+ Dwarf_Attribute attr;
+
+ if (dwarf_attr_integrate(die_mem, DW_AT_data_bit_offset, &attr) &&
+ dwarf_formudata(&attr, &loc) == 0)
+ loc /= 8;
+ else
+ loc = 0;
+ }
if (offset == loc)
return DIE_FIND_CB_END;
diff --git a/tools/perf/util/dwarf-aux.h b/tools/perf/util/dwarf-aux.h
index 24446412b869..892c8c5c23fc 100644
--- a/tools/perf/util/dwarf-aux.h
+++ b/tools/perf/util/dwarf-aux.h
@@ -56,6 +56,8 @@ const char *die_get_decl_file(Dwarf_Die *dw_die);
/* Get type die */
Dwarf_Die *die_get_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem);
+/* Get a type die, but skip qualifiers */
+Dwarf_Die *__die_get_real_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem);
/* Get a type die, but skip qualifiers and typedef */
Dwarf_Die *die_get_real_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem);
@@ -154,8 +156,6 @@ Dwarf_Die *die_get_member_type(Dwarf_Die *type_die, int offset, Dwarf_Die *die_m
/* Return type info where the pointer and offset point to */
Dwarf_Die *die_deref_ptr_type(Dwarf_Die *ptr_die, int offset, Dwarf_Die *die_mem);
-#ifdef HAVE_DWARF_GETLOCATIONS_SUPPORT
-
/* Get byte offset range of given variable DIE */
int die_get_var_range(Dwarf_Die *sp_die, Dwarf_Die *vr_die, struct strbuf *buf);
@@ -174,58 +174,7 @@ void die_collect_vars(Dwarf_Die *sc_die, struct die_var_type **var_types);
/* Save all global variables in this CU */
void die_collect_global_vars(Dwarf_Die *cu_die, struct die_var_type **var_types);
-#else /* HAVE_DWARF_GETLOCATIONS_SUPPORT */
-
-static inline int die_get_var_range(Dwarf_Die *sp_die __maybe_unused,
- Dwarf_Die *vr_die __maybe_unused,
- struct strbuf *buf __maybe_unused)
-{
- return -ENOTSUP;
-}
-
-static inline Dwarf_Die *die_find_variable_by_reg(Dwarf_Die *sc_die __maybe_unused,
- Dwarf_Addr pc __maybe_unused,
- int reg __maybe_unused,
- int *poffset __maybe_unused,
- bool is_fbreg __maybe_unused,
- Dwarf_Die *die_mem __maybe_unused)
-{
- return NULL;
-}
-
-static inline Dwarf_Die *die_find_variable_by_addr(Dwarf_Die *sc_die __maybe_unused,
- Dwarf_Addr addr __maybe_unused,
- Dwarf_Die *die_mem __maybe_unused,
- int *offset __maybe_unused)
-{
- return NULL;
-}
-
-static inline void die_collect_vars(Dwarf_Die *sc_die __maybe_unused,
- struct die_var_type **var_types __maybe_unused)
-{
-}
-
-static inline void die_collect_global_vars(Dwarf_Die *cu_die __maybe_unused,
- struct die_var_type **var_types __maybe_unused)
-{
-}
-
-#endif /* HAVE_DWARF_GETLOCATIONS_SUPPORT */
-
-#ifdef HAVE_DWARF_CFI_SUPPORT
-
/* Get the frame base information from CFA */
int die_get_cfa(Dwarf *dwarf, u64 pc, int *preg, int *poffset);
-#else /* HAVE_DWARF_CFI_SUPPORT */
-
-static inline int die_get_cfa(Dwarf *dwarf __maybe_unused, u64 pc __maybe_unused,
- int *preg __maybe_unused, int *poffset __maybe_unused)
-{
- return -1;
-}
-
-#endif /* HAVE_DWARF_CFI_SUPPORT */
-
#endif /* _DWARF_AUX_H */
diff --git a/tools/perf/util/dwarf-regs-csky.c b/tools/perf/util/dwarf-regs-csky.c
new file mode 100644
index 000000000000..d38ef1f07f3e
--- /dev/null
+++ b/tools/perf/util/dwarf-regs-csky.c
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2019 Hangzhou C-SKY Microsystems co.,ltd.
+// Mapping of DWARF debug register numbers into register names.
+
+#include <stddef.h>
+#include <dwarf-regs.h>
+
+#define CSKY_ABIV2_MAX_REGS 73
+const char *csky_dwarf_regs_table_abiv2[CSKY_ABIV2_MAX_REGS] = {
+ /* r0 ~ r8 */
+ "%a0", "%a1", "%a2", "%a3", "%regs0", "%regs1", "%regs2", "%regs3",
+ /* r9 ~ r15 */
+ "%regs4", "%regs5", "%regs6", "%regs7", "%regs8", "%regs9", "%sp",
+ "%lr",
+ /* r16 ~ r23 */
+ "%exregs0", "%exregs1", "%exregs2", "%exregs3", "%exregs4",
+ "%exregs5", "%exregs6", "%exregs7",
+ /* r24 ~ r31 */
+ "%exregs8", "%exregs9", "%exregs10", "%exregs11", "%exregs12",
+ "%exregs13", "%exregs14", "%tls",
+ "%pc", NULL, NULL, NULL, "%hi", "%lo", NULL, NULL,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ "%epc",
+};
+
+#define CSKY_ABIV1_MAX_REGS 57
+const char *csky_dwarf_regs_table_abiv1[CSKY_ABIV1_MAX_REGS] = {
+ /* r0 ~ r8 */
+ "%sp", "%regs9", "%a0", "%a1", "%a2", "%a3", "%regs0", "%regs1",
+ /* r9 ~ r15 */
+ "%regs2", "%regs3", "%regs4", "%regs5", "%regs6", "%regs7", "%regs8",
+ "%lr",
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ "%epc",
+};
+
+const char *get_csky_regstr(unsigned int n, unsigned int flags)
+{
+ if (flags & EF_CSKY_ABIV2)
+ return (n < CSKY_ABIV2_MAX_REGS) ? csky_dwarf_regs_table_abiv2[n] : NULL;
+
+ return (n < CSKY_ABIV1_MAX_REGS) ? csky_dwarf_regs_table_abiv1[n] : NULL;
+}
diff --git a/tools/perf/util/dwarf-regs-powerpc.c b/tools/perf/util/dwarf-regs-powerpc.c
new file mode 100644
index 000000000000..caf77a234c78
--- /dev/null
+++ b/tools/perf/util/dwarf-regs-powerpc.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Mapping of DWARF debug register numbers into register names.
+ *
+ * Copyright (C) 2010 Ian Munsie, IBM Corporation.
+ */
+
+#include <dwarf-regs.h>
+
+#define PPC_OP(op) (((op) >> 26) & 0x3F)
+#define PPC_RA(a) (((a) >> 16) & 0x1f)
+#define PPC_RT(t) (((t) >> 21) & 0x1f)
+#define PPC_RB(b) (((b) >> 11) & 0x1f)
+#define PPC_D(D) ((D) & 0xfffe)
+#define PPC_DS(DS) ((DS) & 0xfffc)
+#define OP_LD 58
+#define OP_STD 62
+
+static int get_source_reg(u32 raw_insn)
+{
+ return PPC_RA(raw_insn);
+}
+
+static int get_target_reg(u32 raw_insn)
+{
+ return PPC_RT(raw_insn);
+}
+
+static int get_offset_opcode(u32 raw_insn)
+{
+ int opcode = PPC_OP(raw_insn);
+
+ /* DS- form */
+ if ((opcode == OP_LD) || (opcode == OP_STD))
+ return PPC_DS(raw_insn);
+ else
+ return PPC_D(raw_insn);
+}
+
+/*
+ * Fills the required fields for op_loc depending on if it
+ * is a source or target.
+ * D form: ins RT,D(RA) -> src_reg1 = RA, offset = D, dst_reg1 = RT
+ * DS form: ins RT,DS(RA) -> src_reg1 = RA, offset = DS, dst_reg1 = RT
+ * X form: ins RT,RA,RB -> src_reg1 = RA, src_reg2 = RB, dst_reg1 = RT
+ */
+void get_powerpc_regs(u32 raw_insn, int is_source,
+ struct annotated_op_loc *op_loc)
+{
+ if (is_source)
+ op_loc->reg1 = get_source_reg(raw_insn);
+ else
+ op_loc->reg1 = get_target_reg(raw_insn);
+
+ if (op_loc->multi_regs)
+ op_loc->reg2 = PPC_RB(raw_insn);
+
+ /* TODO: Implement offset handling for X Form */
+ if ((op_loc->mem_ref) && (PPC_OP(raw_insn) != 31))
+ op_loc->offset = get_offset_opcode(raw_insn);
+}
diff --git a/tools/perf/util/dwarf-regs-x86.c b/tools/perf/util/dwarf-regs-x86.c
new file mode 100644
index 000000000000..7a55c65e8da6
--- /dev/null
+++ b/tools/perf/util/dwarf-regs-x86.c
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * dwarf-regs.c : Mapping of DWARF debug register numbers into register names.
+ * Extracted from probe-finder.c
+ *
+ * Written by Masami Hiramatsu <mhiramat@redhat.com>
+ */
+
+#include <errno.h> /* for EINVAL */
+#include <string.h> /* for strcmp */
+#include <linux/kernel.h> /* for ARRAY_SIZE */
+#include <dwarf-regs.h>
+
+struct dwarf_regs_idx {
+ const char *name;
+ int idx;
+};
+
+static const struct dwarf_regs_idx x86_regidx_table[] = {
+ { "rax", 0 }, { "eax", 0 }, { "ax", 0 }, { "al", 0 },
+ { "rdx", 1 }, { "edx", 1 }, { "dx", 1 }, { "dl", 1 },
+ { "rcx", 2 }, { "ecx", 2 }, { "cx", 2 }, { "cl", 2 },
+ { "rbx", 3 }, { "edx", 3 }, { "bx", 3 }, { "bl", 3 },
+ { "rsi", 4 }, { "esi", 4 }, { "si", 4 }, { "sil", 4 },
+ { "rdi", 5 }, { "edi", 5 }, { "di", 5 }, { "dil", 5 },
+ { "rbp", 6 }, { "ebp", 6 }, { "bp", 6 }, { "bpl", 6 },
+ { "rsp", 7 }, { "esp", 7 }, { "sp", 7 }, { "spl", 7 },
+ { "r8", 8 }, { "r8d", 8 }, { "r8w", 8 }, { "r8b", 8 },
+ { "r9", 9 }, { "r9d", 9 }, { "r9w", 9 }, { "r9b", 9 },
+ { "r10", 10 }, { "r10d", 10 }, { "r10w", 10 }, { "r10b", 10 },
+ { "r11", 11 }, { "r11d", 11 }, { "r11w", 11 }, { "r11b", 11 },
+ { "r12", 12 }, { "r12d", 12 }, { "r12w", 12 }, { "r12b", 12 },
+ { "r13", 13 }, { "r13d", 13 }, { "r13w", 13 }, { "r13b", 13 },
+ { "r14", 14 }, { "r14d", 14 }, { "r14w", 14 }, { "r14b", 14 },
+ { "r15", 15 }, { "r15d", 15 }, { "r15w", 15 }, { "r15b", 15 },
+ { "rip", DWARF_REG_PC },
+};
+
+int get_x86_regnum(const char *name)
+{
+ unsigned int i;
+
+ if (*name != '%')
+ return -EINVAL;
+
+ for (i = 0; i < ARRAY_SIZE(x86_regidx_table); i++)
+ if (!strcmp(x86_regidx_table[i].name, name + 1))
+ return x86_regidx_table[i].idx;
+ return -ENOENT;
+}
diff --git a/tools/perf/util/dwarf-regs.c b/tools/perf/util/dwarf-regs.c
index 5b7f86c0063f..28a1cfdf26d4 100644
--- a/tools/perf/util/dwarf-regs.c
+++ b/tools/perf/util/dwarf-regs.c
@@ -13,14 +13,6 @@
#include <errno.h>
#include <linux/kernel.h>
-#ifndef EM_AARCH64
-#define EM_AARCH64 183 /* ARM 64 bit */
-#endif
-
-#ifndef EM_LOONGARCH
-#define EM_LOONGARCH 258 /* LoongArch */
-#endif
-
/* Define const char * {arch}_register_tbl[] */
#define DEFINE_DWARF_REGSTR_TABLE
#include "../arch/x86/include/dwarf-regs-table.h"
@@ -28,6 +20,7 @@
#include "../arch/arm64/include/dwarf-regs-table.h"
#include "../arch/sh/include/dwarf-regs-table.h"
#include "../arch/powerpc/include/dwarf-regs-table.h"
+#include "../arch/riscv/include/dwarf-regs-table.h"
#include "../arch/s390/include/dwarf-regs-table.h"
#include "../arch/sparc/include/dwarf-regs-table.h"
#include "../arch/xtensa/include/dwarf-regs-table.h"
@@ -37,11 +30,13 @@
#define __get_dwarf_regstr(tbl, n) (((n) < ARRAY_SIZE(tbl)) ? (tbl)[(n)] : NULL)
/* Return architecture dependent register string (for kprobe-tracer) */
-const char *get_dwarf_regstr(unsigned int n, unsigned int machine)
+const char *get_dwarf_regstr(unsigned int n, unsigned int machine, unsigned int flags)
{
+ if (machine == EM_NONE) {
+ /* Generic arch - use host arch */
+ machine = EM_HOST;
+ }
switch (machine) {
- case EM_NONE: /* Generic arch - use host arch */
- return get_arch_regstr(n);
case EM_386:
return __get_dwarf_regstr(x86_32_regstr_tbl, n);
case EM_X86_64:
@@ -50,6 +45,8 @@ const char *get_dwarf_regstr(unsigned int n, unsigned int machine)
return __get_dwarf_regstr(arm_regstr_tbl, n);
case EM_AARCH64:
return __get_dwarf_regstr(aarch64_regstr_tbl, n);
+ case EM_CSKY:
+ return get_csky_regstr(n, flags);
case EM_SH:
return __get_dwarf_regstr(sh_regstr_tbl, n);
case EM_S390:
@@ -57,6 +54,8 @@ const char *get_dwarf_regstr(unsigned int n, unsigned int machine)
case EM_PPC:
case EM_PPC64:
return __get_dwarf_regstr(powerpc_regstr_tbl, n);
+ case EM_RISCV:
+ return __get_dwarf_regstr(riscv_regstr_tbl, n);
case EM_SPARC:
case EM_SPARCV9:
return __get_dwarf_regstr(sparc_regstr_tbl, n);
@@ -72,13 +71,15 @@ const char *get_dwarf_regstr(unsigned int n, unsigned int machine)
return NULL;
}
+#if EM_HOST != EM_X86_64 && EM_HOST != EM_386
__weak int get_arch_regnum(const char *name __maybe_unused)
{
return -ENOTSUP;
}
+#endif
/* Return DWARF register number from architecture register name */
-int get_dwarf_regnum(const char *name, unsigned int machine)
+int get_dwarf_regnum(const char *name, unsigned int machine, unsigned int flags __maybe_unused)
{
char *regname = strdup(name);
int reg = -1;
@@ -92,10 +93,21 @@ int get_dwarf_regnum(const char *name, unsigned int machine)
if (p)
*p = '\0';
+ if (machine == EM_NONE) {
+ /* Generic arch - use host arch */
+ machine = EM_HOST;
+ }
switch (machine) {
- case EM_NONE: /* Generic arch - use host arch */
+#if EM_HOST != EM_X86_64 && EM_HOST != EM_386
+ case EM_HOST:
reg = get_arch_regnum(regname);
break;
+#endif
+ case EM_X86_64:
+ fallthrough;
+ case EM_386:
+ reg = get_x86_regnum(regname);
+ break;
default:
pr_err("ELF MACHINE %x is not supported.\n", machine);
}
diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c
index a459374d0a1a..e2843ca2edd9 100644
--- a/tools/perf/util/env.c
+++ b/tools/perf/util/env.c
@@ -5,12 +5,14 @@
#include "util/header.h"
#include "linux/compiler.h"
#include <linux/ctype.h>
+#include <linux/string.h>
#include <linux/zalloc.h>
#include "cgroup.h"
#include <errno.h>
#include <sys/utsname.h>
#include <stdlib.h>
#include <string.h>
+#include "pmu.h"
#include "pmus.h"
#include "strbuf.h"
#include "trace/beauty/beauty.h"
@@ -372,7 +374,8 @@ error:
int perf_env__read_cpuid(struct perf_env *env)
{
char cpuid[128];
- int err = get_cpuid(cpuid, sizeof(cpuid));
+ struct perf_cpu cpu = {-1};
+ int err = get_cpuid(cpuid, sizeof(cpuid), cpu);
if (err)
return err;
@@ -624,3 +627,40 @@ out:
free(cap_eq);
return NULL;
}
+
+void perf_env__find_br_cntr_info(struct perf_env *env,
+ unsigned int *nr,
+ unsigned int *width)
+{
+ if (nr) {
+ *nr = env->cpu_pmu_caps ? env->br_cntr_nr :
+ env->pmu_caps->br_cntr_nr;
+ }
+
+ if (width) {
+ *width = env->cpu_pmu_caps ? env->br_cntr_width :
+ env->pmu_caps->br_cntr_width;
+ }
+}
+
+bool perf_env__is_x86_amd_cpu(struct perf_env *env)
+{
+ static int is_amd; /* 0: Uninitialized, 1: Yes, -1: No */
+
+ if (is_amd == 0)
+ is_amd = env->cpuid && strstarts(env->cpuid, "AuthenticAMD") ? 1 : -1;
+
+ return is_amd >= 1 ? true : false;
+}
+
+bool x86__is_amd_cpu(void)
+{
+ struct perf_env env = { .total_mem = 0, };
+ bool is_amd;
+
+ perf_env__cpuid(&env);
+ is_amd = perf_env__is_x86_amd_cpu(&env);
+ perf_env__exit(&env);
+
+ return is_amd;
+}
diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
index 2a2c37cc40b7..ae604c4edbb7 100644
--- a/tools/perf/util/env.h
+++ b/tools/perf/util/env.h
@@ -192,4 +192,11 @@ char *perf_env__find_pmu_cap(struct perf_env *env, const char *pmu_name,
const char *cap);
bool perf_env__has_pmu_mapping(struct perf_env *env, const char *pmu_name);
+void perf_env__find_br_cntr_info(struct perf_env *env,
+ unsigned int *nr,
+ unsigned int *width);
+
+bool x86__is_amd_cpu(void);
+bool perf_env__is_x86_amd_cpu(struct perf_env *env);
+
#endif /* __PERF_ENV_H */
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index f32f9abf6344..aac96d5d1917 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -216,7 +216,7 @@ size_t perf_event__fprintf_cgroup(union perf_event *event, FILE *fp)
event->cgroup.id, event->cgroup.path);
}
-int perf_event__process_comm(struct perf_tool *tool __maybe_unused,
+int perf_event__process_comm(const struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine)
@@ -224,7 +224,7 @@ int perf_event__process_comm(struct perf_tool *tool __maybe_unused,
return machine__process_comm_event(machine, event, sample);
}
-int perf_event__process_namespaces(struct perf_tool *tool __maybe_unused,
+int perf_event__process_namespaces(const struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine)
@@ -232,7 +232,7 @@ int perf_event__process_namespaces(struct perf_tool *tool __maybe_unused,
return machine__process_namespaces_event(machine, event, sample);
}
-int perf_event__process_cgroup(struct perf_tool *tool __maybe_unused,
+int perf_event__process_cgroup(const struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine)
@@ -240,7 +240,7 @@ int perf_event__process_cgroup(struct perf_tool *tool __maybe_unused,
return machine__process_cgroup_event(machine, event, sample);
}
-int perf_event__process_lost(struct perf_tool *tool __maybe_unused,
+int perf_event__process_lost(const struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine)
@@ -248,7 +248,7 @@ int perf_event__process_lost(struct perf_tool *tool __maybe_unused,
return machine__process_lost_event(machine, event, sample);
}
-int perf_event__process_aux(struct perf_tool *tool __maybe_unused,
+int perf_event__process_aux(const struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample __maybe_unused,
struct machine *machine)
@@ -256,7 +256,7 @@ int perf_event__process_aux(struct perf_tool *tool __maybe_unused,
return machine__process_aux_event(machine, event);
}
-int perf_event__process_itrace_start(struct perf_tool *tool __maybe_unused,
+int perf_event__process_itrace_start(const struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample __maybe_unused,
struct machine *machine)
@@ -264,7 +264,7 @@ int perf_event__process_itrace_start(struct perf_tool *tool __maybe_unused,
return machine__process_itrace_start_event(machine, event);
}
-int perf_event__process_aux_output_hw_id(struct perf_tool *tool __maybe_unused,
+int perf_event__process_aux_output_hw_id(const struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample __maybe_unused,
struct machine *machine)
@@ -272,7 +272,7 @@ int perf_event__process_aux_output_hw_id(struct perf_tool *tool __maybe_unused,
return machine__process_aux_output_hw_id_event(machine, event);
}
-int perf_event__process_lost_samples(struct perf_tool *tool __maybe_unused,
+int perf_event__process_lost_samples(const struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine)
@@ -280,7 +280,7 @@ int perf_event__process_lost_samples(struct perf_tool *tool __maybe_unused,
return machine__process_lost_samples_event(machine, event, sample);
}
-int perf_event__process_switch(struct perf_tool *tool __maybe_unused,
+int perf_event__process_switch(const struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample __maybe_unused,
struct machine *machine)
@@ -288,7 +288,7 @@ int perf_event__process_switch(struct perf_tool *tool __maybe_unused,
return machine__process_switch_event(machine, event);
}
-int perf_event__process_ksymbol(struct perf_tool *tool __maybe_unused,
+int perf_event__process_ksymbol(const struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample __maybe_unused,
struct machine *machine)
@@ -296,7 +296,7 @@ int perf_event__process_ksymbol(struct perf_tool *tool __maybe_unused,
return machine__process_ksymbol(machine, event, sample);
}
-int perf_event__process_bpf(struct perf_tool *tool __maybe_unused,
+int perf_event__process_bpf(const struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine)
@@ -304,7 +304,7 @@ int perf_event__process_bpf(struct perf_tool *tool __maybe_unused,
return machine__process_bpf(machine, event, sample);
}
-int perf_event__process_text_poke(struct perf_tool *tool __maybe_unused,
+int perf_event__process_text_poke(const struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine)
@@ -387,7 +387,7 @@ size_t perf_event__fprintf_cpu_map(union perf_event *event, FILE *fp)
return ret;
}
-int perf_event__process_mmap(struct perf_tool *tool __maybe_unused,
+int perf_event__process_mmap(const struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine)
@@ -395,7 +395,7 @@ int perf_event__process_mmap(struct perf_tool *tool __maybe_unused,
return machine__process_mmap_event(machine, event, sample);
}
-int perf_event__process_mmap2(struct perf_tool *tool __maybe_unused,
+int perf_event__process_mmap2(const struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine)
@@ -410,7 +410,7 @@ size_t perf_event__fprintf_task(union perf_event *event, FILE *fp)
event->fork.ppid, event->fork.ptid);
}
-int perf_event__process_fork(struct perf_tool *tool __maybe_unused,
+int perf_event__process_fork(const struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine)
@@ -418,7 +418,7 @@ int perf_event__process_fork(struct perf_tool *tool __maybe_unused,
return machine__process_fork_event(machine, event, sample);
}
-int perf_event__process_exit(struct perf_tool *tool __maybe_unused,
+int perf_event__process_exit(const struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine)
@@ -426,6 +426,26 @@ int perf_event__process_exit(struct perf_tool *tool __maybe_unused,
return machine__process_exit_event(machine, event, sample);
}
+int perf_event__exit_del_thread(const struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine)
+{
+ struct thread *thread = machine__findnew_thread(machine,
+ event->fork.pid,
+ event->fork.tid);
+
+ dump_printf("(%d:%d):(%d:%d)\n", event->fork.pid, event->fork.tid,
+ event->fork.ppid, event->fork.ptid);
+
+ if (thread) {
+ machine__remove_thread(machine, thread);
+ thread__put(thread);
+ }
+
+ return 0;
+}
+
size_t perf_event__fprintf_aux(union perf_event *event, FILE *fp)
{
return fprintf(fp, " offset: %#"PRI_lx64" size: %#"PRI_lx64" flags: %#"PRI_lx64" [%s%s%s]\n",
@@ -587,7 +607,7 @@ size_t perf_event__fprintf(union perf_event *event, struct machine *machine, FIL
return ret;
}
-int perf_event__process(struct perf_tool *tool __maybe_unused,
+int perf_event__process(const struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine)
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index d8bcee2e9b93..2744c54f404e 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -66,6 +66,7 @@ enum {
PERF_IP_FLAG_VMEXIT = 1ULL << 12,
PERF_IP_FLAG_INTR_DISABLE = 1ULL << 13,
PERF_IP_FLAG_INTR_TOGGLE = 1ULL << 14,
+ PERF_IP_FLAG_BRANCH_MISS = 1ULL << 15,
};
#define PERF_IP_FLAG_CHARS "bcrosyiABExghDt"
@@ -267,71 +268,75 @@ struct perf_tool;
void perf_event__read_stat_config(struct perf_stat_config *config,
struct perf_record_stat_config *event);
-int perf_event__process_comm(struct perf_tool *tool,
+int perf_event__process_comm(const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine);
-int perf_event__process_lost(struct perf_tool *tool,
+int perf_event__process_lost(const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine);
-int perf_event__process_lost_samples(struct perf_tool *tool,
+int perf_event__process_lost_samples(const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine);
-int perf_event__process_aux(struct perf_tool *tool,
+int perf_event__process_aux(const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine);
-int perf_event__process_itrace_start(struct perf_tool *tool,
+int perf_event__process_itrace_start(const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine);
-int perf_event__process_aux_output_hw_id(struct perf_tool *tool,
+int perf_event__process_aux_output_hw_id(const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine);
-int perf_event__process_switch(struct perf_tool *tool,
+int perf_event__process_switch(const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine);
-int perf_event__process_namespaces(struct perf_tool *tool,
+int perf_event__process_namespaces(const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine);
-int perf_event__process_cgroup(struct perf_tool *tool,
+int perf_event__process_cgroup(const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine);
-int perf_event__process_mmap(struct perf_tool *tool,
+int perf_event__process_mmap(const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine);
-int perf_event__process_mmap2(struct perf_tool *tool,
+int perf_event__process_mmap2(const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine);
-int perf_event__process_fork(struct perf_tool *tool,
+int perf_event__process_fork(const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine);
-int perf_event__process_exit(struct perf_tool *tool,
+int perf_event__process_exit(const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine);
-int perf_event__process_ksymbol(struct perf_tool *tool,
+int perf_event__exit_del_thread(const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine);
-int perf_event__process_bpf(struct perf_tool *tool,
+int perf_event__process_ksymbol(const struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine);
+int perf_event__process_bpf(const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine);
-int perf_event__process_text_poke(struct perf_tool *tool,
+int perf_event__process_text_poke(const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine);
-int perf_event__process(struct perf_tool *tool,
+int perf_event__process(const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine);
diff --git a/tools/perf/util/events_stats.h b/tools/perf/util/events_stats.h
index f43e5b1a366a..eabd7913c309 100644
--- a/tools/perf/util/events_stats.h
+++ b/tools/perf/util/events_stats.h
@@ -18,7 +18,18 @@
* PERF_RECORD_LOST_SAMPLES event. The number of lost-samples events is stored
* in .nr_events[PERF_RECORD_LOST_SAMPLES] while total_lost_samples tells
* exactly how many samples the kernel in fact dropped, i.e. it is the sum of
- * all struct perf_record_lost_samples.lost fields reported.
+ * all struct perf_record_lost_samples.lost fields reported without setting the
+ * misc field in the header.
+ *
+ * The BPF program can discard samples according to the filter expressions given
+ * by the user. This number is kept in a BPF map and dumped at the end of perf
+ * record in a PERF_RECORD_LOST_SAMPLES event. To differentiate it from other
+ * lost samples, perf tools sets PERF_RECORD_MISC_LOST_SAMPLES_BPF flag in the
+ * header.misc field. The number of dropped-samples events is stored in
+ * .nr_events[PERF_RECORD_LOST_SAMPLES] while total_dropped_samples tells
+ * exactly how many samples the BPF program in fact dropped, i.e. it is the sum
+ * of all struct perf_record_lost_samples.lost fields reported with the misc
+ * field set in the header.
*
* The total_period is needed because by default auto-freq is used, so
* multiplying nr_events[PERF_EVENT_SAMPLE] by a frequency isn't possible to get
@@ -28,6 +39,7 @@
struct events_stats {
u64 total_lost;
u64 total_lost_samples;
+ u64 total_dropped_samples;
u64 total_aux_lost;
u64 total_aux_partial;
u64 total_aux_collision;
@@ -48,6 +60,7 @@ struct hists_stats {
u32 nr_samples;
u32 nr_non_filtered_samples;
u32 nr_lost_samples;
+ u32 nr_dropped_samples;
};
void events_stats__inc(struct events_stats *stats, u32 type);
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 3a719edafc7a..f0dd174e2deb 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -33,6 +33,8 @@
#include "util/bpf-filter.h"
#include "util/stat.h"
#include "util/util.h"
+#include "util/env.h"
+#include "util/intel-tpebs.h"
#include <signal.h>
#include <unistd.h>
#include <sched.h>
@@ -46,6 +48,7 @@
#include <sys/mman.h>
#include <sys/prctl.h>
#include <sys/timerfd.h>
+#include <sys/wait.h>
#include <linux/bitops.h>
#include <linux/hash.h>
@@ -78,6 +81,7 @@ void evlist__init(struct evlist *evlist, struct perf_cpu_map *cpus,
evlist->ctl_fd.fd = -1;
evlist->ctl_fd.ack = -1;
evlist->ctl_fd.pos = -1;
+ evlist->nr_br_cntr = -1;
}
struct evlist *evlist__new(void)
@@ -179,6 +183,7 @@ void evlist__delete(struct evlist *evlist)
if (evlist == NULL)
return;
+ tpebs_delete();
evlist__free_stats(evlist);
evlist__munmap(evlist);
evlist__close(evlist);
@@ -315,62 +320,6 @@ struct evsel *evlist__add_sched_switch(struct evlist *evlist, bool system_wide)
}
#endif
-int evlist__add_attrs(struct evlist *evlist, struct perf_event_attr *attrs, size_t nr_attrs)
-{
- struct evsel *evsel, *n;
- LIST_HEAD(head);
- size_t i;
-
- for (i = 0; i < nr_attrs; i++) {
- evsel = evsel__new_idx(attrs + i, evlist->core.nr_entries + i);
- if (evsel == NULL)
- goto out_delete_partial_list;
- list_add_tail(&evsel->core.node, &head);
- }
-
- evlist__splice_list_tail(evlist, &head);
-
- return 0;
-
-out_delete_partial_list:
- __evlist__for_each_entry_safe(&head, n, evsel)
- evsel__delete(evsel);
- return -1;
-}
-
-int __evlist__add_default_attrs(struct evlist *evlist, struct perf_event_attr *attrs, size_t nr_attrs)
-{
- size_t i;
-
- for (i = 0; i < nr_attrs; i++)
- event_attr_init(attrs + i);
-
- return evlist__add_attrs(evlist, attrs, nr_attrs);
-}
-
-__weak int arch_evlist__add_default_attrs(struct evlist *evlist,
- struct perf_event_attr *attrs,
- size_t nr_attrs)
-{
- if (!nr_attrs)
- return 0;
-
- return __evlist__add_default_attrs(evlist, attrs, nr_attrs);
-}
-
-struct evsel *evlist__find_tracepoint_by_id(struct evlist *evlist, int id)
-{
- struct evsel *evsel;
-
- evlist__for_each_entry(evlist, evsel) {
- if (evsel->core.attr.type == PERF_TYPE_TRACEPOINT &&
- (int)evsel->core.attr.config == id)
- return evsel;
- }
-
- return NULL;
-}
-
struct evsel *evlist__find_tracepoint_by_name(struct evlist *evlist, const char *name)
{
struct evsel *evsel;
@@ -1063,7 +1012,7 @@ int evlist__create_maps(struct evlist *evlist, struct target *target)
if (!threads)
return -1;
- if (target__uses_dummy_map(target))
+ if (target__uses_dummy_map(target) && !evlist__has_bpf_output(evlist))
cpus = perf_cpu_map__new_any_cpu();
else
cpus = perf_cpu_map__new(target->cpu_list);
@@ -1086,7 +1035,8 @@ out_delete_threads:
return -1;
}
-int evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel)
+int evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel,
+ struct target *target)
{
struct evsel *evsel;
int err = 0;
@@ -1108,7 +1058,7 @@ int evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel)
* non-tracepoint events can have BPF filters.
*/
if (!list_empty(&evsel->bpf_filters)) {
- err = perf_bpf_filter__prepare(evsel);
+ err = perf_bpf_filter__prepare(evsel, target);
if (err) {
*err_evsel = evsel;
break;
@@ -1194,11 +1144,6 @@ int evlist__set_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids)
return ret;
}
-int evlist__set_tp_filter_pid(struct evlist *evlist, pid_t pid)
-{
- return evlist__set_tp_filter_pids(evlist, 1, &pid);
-}
-
int evlist__append_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids)
{
char *filter = asprintf__tp_filter_pids(npids, pids);
@@ -1261,6 +1206,72 @@ u64 evlist__combined_branch_type(struct evlist *evlist)
return branch_type;
}
+static struct evsel *
+evlist__find_dup_event_from_prev(struct evlist *evlist, struct evsel *event)
+{
+ struct evsel *pos;
+
+ evlist__for_each_entry(evlist, pos) {
+ if (event == pos)
+ break;
+ if ((pos->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS) &&
+ !strcmp(pos->name, event->name))
+ return pos;
+ }
+ return NULL;
+}
+
+#define MAX_NR_ABBR_NAME (26 * 11)
+
+/*
+ * The abbr name is from A to Z9. If the number of event
+ * which requires the branch counter > MAX_NR_ABBR_NAME,
+ * return NA.
+ */
+static void evlist__new_abbr_name(char *name)
+{
+ static int idx;
+ int i = idx / 26;
+
+ if (idx >= MAX_NR_ABBR_NAME) {
+ name[0] = 'N';
+ name[1] = 'A';
+ name[2] = '\0';
+ return;
+ }
+
+ name[0] = 'A' + (idx % 26);
+
+ if (!i)
+ name[1] = '\0';
+ else {
+ name[1] = '0' + i - 1;
+ name[2] = '\0';
+ }
+
+ idx++;
+}
+
+void evlist__update_br_cntr(struct evlist *evlist)
+{
+ struct evsel *evsel, *dup;
+ int i = 0;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS) {
+ evsel->br_cntr_idx = i++;
+ evsel__leader(evsel)->br_cntr_nr++;
+
+ dup = evlist__find_dup_event_from_prev(evlist, evsel);
+ if (dup)
+ memcpy(evsel->abbr_name, dup->abbr_name, 3 * sizeof(char));
+ else
+ evlist__new_abbr_name(evsel->abbr_name);
+ }
+ }
+ evlist->nr_br_cntr = i;
+}
+
bool evlist__valid_read_format(struct evlist *evlist)
{
struct evsel *first = evlist__first(evlist), *pos = first;
@@ -1413,6 +1424,8 @@ int evlist__prepare_workload(struct evlist *evlist, struct target *target, const
int child_ready_pipe[2], go_pipe[2];
char bf;
+ evlist->workload.cork_fd = -1;
+
if (pipe(child_ready_pipe) < 0) {
perror("failed to create 'ready' pipe");
return -1;
@@ -1465,7 +1478,7 @@ int evlist__prepare_workload(struct evlist *evlist, struct target *target, const
* For cancelling the workload without actually running it,
* the parent will just close workload.cork_fd, without writing
* anything, i.e. read will return zero and we just exit()
- * here.
+ * here (See evlist__cancel_workload()).
*/
if (ret != 1) {
if (ret == -1)
@@ -1529,7 +1542,7 @@ out_close_ready_pipe:
int evlist__start_workload(struct evlist *evlist)
{
- if (evlist->workload.cork_fd > 0) {
+ if (evlist->workload.cork_fd >= 0) {
char bf = 0;
int ret;
/*
@@ -1540,12 +1553,24 @@ int evlist__start_workload(struct evlist *evlist)
perror("unable to write to pipe");
close(evlist->workload.cork_fd);
+ evlist->workload.cork_fd = -1;
return ret;
}
return 0;
}
+void evlist__cancel_workload(struct evlist *evlist)
+{
+ int status;
+
+ if (evlist->workload.cork_fd >= 0) {
+ close(evlist->workload.cork_fd);
+ evlist->workload.cork_fd = -1;
+ waitpid(evlist->workload.pid, &status, WNOHANG);
+ }
+}
+
int evlist__parse_sample(struct evlist *evlist, union perf_event *event, struct perf_sample *sample)
{
struct evsel *evsel = evlist__event2evsel(evlist, event);
@@ -2548,7 +2573,8 @@ void evlist__uniquify_name(struct evlist *evlist)
else
attributes = empty_attributes;
- if (asprintf(&new_name, "%s/%s/%s", pos->pmu_name, pos->name, attributes + 1)) {
+ if (asprintf(&new_name, "%s/%s/%s", pos->pmu ? pos->pmu->name : "",
+ pos->name, attributes + 1)) {
free(pos->name);
pos->name = new_name;
} else {
@@ -2556,3 +2582,15 @@ void evlist__uniquify_name(struct evlist *evlist)
}
}
}
+
+bool evlist__has_bpf_output(struct evlist *evlist)
+{
+ struct evsel *evsel;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel__is_bpf_output(evsel))
+ return true;
+ }
+
+ return false;
+}
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index cb91dc9117a2..adddb1db1ad2 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -20,6 +20,7 @@ struct pollfd;
struct thread_map;
struct perf_cpu_map;
struct record_opts;
+struct target;
/*
* State machine of bkw_mmap_state:
@@ -56,6 +57,7 @@ struct evlist {
bool enabled;
int id_pos;
int is_pos;
+ int nr_br_cntr;
u64 combined_sample_type;
enum bkw_mmap_state bkw_mmap_state;
struct {
@@ -100,18 +102,6 @@ void evlist__delete(struct evlist *evlist);
void evlist__add(struct evlist *evlist, struct evsel *entry);
void evlist__remove(struct evlist *evlist, struct evsel *evsel);
-int evlist__add_attrs(struct evlist *evlist, struct perf_event_attr *attrs, size_t nr_attrs);
-
-int __evlist__add_default_attrs(struct evlist *evlist,
- struct perf_event_attr *attrs, size_t nr_attrs);
-
-int arch_evlist__add_default_attrs(struct evlist *evlist,
- struct perf_event_attr *attrs,
- size_t nr_attrs);
-
-#define evlist__add_default_attrs(evlist, array) \
- arch_evlist__add_default_attrs(evlist, array, ARRAY_SIZE(array))
-
int arch_evlist__cmp(const struct evsel *lhs, const struct evsel *rhs);
int evlist__add_dummy(struct evlist *evlist);
@@ -142,7 +132,6 @@ int __evlist__set_tracepoints_handlers(struct evlist *evlist,
__evlist__set_tracepoints_handlers(evlist, array, ARRAY_SIZE(array))
int evlist__set_tp_filter(struct evlist *evlist, const char *filter);
-int evlist__set_tp_filter_pid(struct evlist *evlist, pid_t pid);
int evlist__set_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids);
int evlist__append_tp_filter(struct evlist *evlist, const char *filter);
@@ -150,7 +139,6 @@ int evlist__append_tp_filter(struct evlist *evlist, const char *filter);
int evlist__append_tp_filter_pid(struct evlist *evlist, pid_t pid);
int evlist__append_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids);
-struct evsel *evlist__find_tracepoint_by_id(struct evlist *evlist, int id);
struct evsel *evlist__find_tracepoint_by_name(struct evlist *evlist, const char *name);
int evlist__add_pollfd(struct evlist *evlist, int fd);
@@ -184,6 +172,7 @@ int evlist__prepare_workload(struct evlist *evlist, struct target *target,
const char *argv[], bool pipe_output,
void (*exec_error)(int signo, siginfo_t *info, void *ucontext));
int evlist__start_workload(struct evlist *evlist);
+void evlist__cancel_workload(struct evlist *evlist);
struct option;
@@ -212,11 +201,13 @@ void evlist__enable_non_dummy(struct evlist *evlist);
void evlist__set_selected(struct evlist *evlist, struct evsel *evsel);
int evlist__create_maps(struct evlist *evlist, struct target *target);
-int evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel);
+int evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel,
+ struct target *target);
u64 __evlist__combined_sample_type(struct evlist *evlist);
u64 evlist__combined_sample_type(struct evlist *evlist);
u64 evlist__combined_branch_type(struct evlist *evlist);
+void evlist__update_br_cntr(struct evlist *evlist);
bool evlist__sample_id_all(struct evlist *evlist);
u16 evlist__id_hdr_size(struct evlist *evlist);
@@ -443,5 +434,6 @@ int evlist__scnprintf_evsels(struct evlist *evlist, size_t size, char *bf);
void evlist__check_mem_load_aux(struct evlist *evlist);
void evlist__warn_user_requested_cpus(struct evlist *evlist, const char *cpu_list);
void evlist__uniquify_name(struct evlist *evlist);
+bool evlist__has_bpf_output(struct evlist *evlist);
#endif /* __PERF_EVLIST_H */
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index bc603193c477..d22c5df1701e 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -5,12 +5,16 @@
* Parts came from builtin-{top,stat,record}.c, see those files for further
* copyright notes.
*/
+/*
+ * Powerpc needs __SANE_USERSPACE_TYPES__ before <linux/types.h> to select
+ * 'int-ll64.h' and avoid compile warnings when printing __u64 with %llu.
+ */
+#define __SANE_USERSPACE_TYPES__
#include <byteswap.h>
#include <errno.h>
#include <inttypes.h>
#include <linux/bitops.h>
-#include <api/io.h>
#include <api/fs/fs.h>
#include <api/fs/tracing_path.h>
#include <linux/hw_breakpoint.h>
@@ -20,6 +24,7 @@
#include <linux/zalloc.h>
#include <sys/ioctl.h>
#include <sys/resource.h>
+#include <sys/syscall.h>
#include <sys/types.h>
#include <dirent.h>
#include <stdlib.h>
@@ -51,6 +56,8 @@
#include "off_cpu.h"
#include "pmu.h"
#include "pmus.h"
+#include "hwmon_pmu.h"
+#include "tool_pmu.h"
#include "rlimit.h"
#include "../perf-sys.h"
#include "util/parse-branch-options.h"
@@ -59,50 +66,140 @@
#include <internal/xyarray.h>
#include <internal/lib.h>
#include <internal/threadmap.h>
+#include "util/intel-tpebs.h"
#include <linux/ctype.h>
#ifdef HAVE_LIBTRACEEVENT
-#include <traceevent/event-parse.h>
+#include <event-parse.h>
#endif
struct perf_missing_features perf_missing_features;
static clockid_t clockid;
-static const char *const perf_tool_event__tool_names[PERF_TOOL_MAX] = {
- NULL,
- "duration_time",
- "user_time",
- "system_time",
-};
-
-const char *perf_tool_event__to_str(enum perf_tool_event ev)
+static int evsel__no_extra_init(struct evsel *evsel __maybe_unused)
{
- if (ev > PERF_TOOL_NONE && ev < PERF_TOOL_MAX)
- return perf_tool_event__tool_names[ev];
-
- return NULL;
+ return 0;
}
-enum perf_tool_event perf_tool_event__from_str(const char *str)
+static bool test_attr__enabled(void)
{
- int i;
+ static bool test_attr__enabled;
+ static bool test_attr__enabled_tested;
+
+ if (!test_attr__enabled_tested) {
+ char *dir = getenv("PERF_TEST_ATTR");
- perf_tool_event__for_each_event(i) {
- if (!strcmp(str, perf_tool_event__tool_names[i]))
- return i;
+ test_attr__enabled = (dir != NULL);
+ test_attr__enabled_tested = true;
}
- return PERF_TOOL_NONE;
+ return test_attr__enabled;
}
+#define __WRITE_ASS(str, fmt, data) \
+do { \
+ if (fprintf(file, #str "=%"fmt "\n", data) < 0) { \
+ perror("test attr - failed to write event file"); \
+ fclose(file); \
+ return -1; \
+ } \
+} while (0)
-static int evsel__no_extra_init(struct evsel *evsel __maybe_unused)
+#define WRITE_ASS(field, fmt) __WRITE_ASS(field, fmt, attr->field)
+
+static int store_event(struct perf_event_attr *attr, pid_t pid, struct perf_cpu cpu,
+ int fd, int group_fd, unsigned long flags)
{
+ FILE *file;
+ char path[PATH_MAX];
+ char *dir = getenv("PERF_TEST_ATTR");
+
+ snprintf(path, PATH_MAX, "%s/event-%d-%llu-%d", dir,
+ attr->type, attr->config, fd);
+
+ file = fopen(path, "w+");
+ if (!file) {
+ perror("test attr - failed to open event file");
+ return -1;
+ }
+
+ if (fprintf(file, "[event-%d-%llu-%d]\n",
+ attr->type, attr->config, fd) < 0) {
+ perror("test attr - failed to write event file");
+ fclose(file);
+ return -1;
+ }
+
+ /* syscall arguments */
+ __WRITE_ASS(fd, "d", fd);
+ __WRITE_ASS(group_fd, "d", group_fd);
+ __WRITE_ASS(cpu, "d", cpu.cpu);
+ __WRITE_ASS(pid, "d", pid);
+ __WRITE_ASS(flags, "lu", flags);
+
+ /* struct perf_event_attr */
+ WRITE_ASS(type, PRIu32);
+ WRITE_ASS(size, PRIu32);
+ WRITE_ASS(config, "llu");
+ WRITE_ASS(sample_period, "llu");
+ WRITE_ASS(sample_type, "llu");
+ WRITE_ASS(read_format, "llu");
+ WRITE_ASS(disabled, "d");
+ WRITE_ASS(inherit, "d");
+ WRITE_ASS(pinned, "d");
+ WRITE_ASS(exclusive, "d");
+ WRITE_ASS(exclude_user, "d");
+ WRITE_ASS(exclude_kernel, "d");
+ WRITE_ASS(exclude_hv, "d");
+ WRITE_ASS(exclude_idle, "d");
+ WRITE_ASS(mmap, "d");
+ WRITE_ASS(comm, "d");
+ WRITE_ASS(freq, "d");
+ WRITE_ASS(inherit_stat, "d");
+ WRITE_ASS(enable_on_exec, "d");
+ WRITE_ASS(task, "d");
+ WRITE_ASS(watermark, "d");
+ WRITE_ASS(precise_ip, "d");
+ WRITE_ASS(mmap_data, "d");
+ WRITE_ASS(sample_id_all, "d");
+ WRITE_ASS(exclude_host, "d");
+ WRITE_ASS(exclude_guest, "d");
+ WRITE_ASS(exclude_callchain_kernel, "d");
+ WRITE_ASS(exclude_callchain_user, "d");
+ WRITE_ASS(mmap2, "d");
+ WRITE_ASS(comm_exec, "d");
+ WRITE_ASS(context_switch, "d");
+ WRITE_ASS(write_backward, "d");
+ WRITE_ASS(namespaces, "d");
+ WRITE_ASS(use_clockid, "d");
+ WRITE_ASS(wakeup_events, PRIu32);
+ WRITE_ASS(bp_type, PRIu32);
+ WRITE_ASS(config1, "llu");
+ WRITE_ASS(config2, "llu");
+ WRITE_ASS(branch_sample_type, "llu");
+ WRITE_ASS(sample_regs_user, "llu");
+ WRITE_ASS(sample_stack_user, PRIu32);
+
+ fclose(file);
return 0;
}
-void __weak test_attr__ready(void) { }
+#undef __WRITE_ASS
+#undef WRITE_ASS
+
+static void test_attr__open(struct perf_event_attr *attr, pid_t pid, struct perf_cpu cpu,
+ int fd, int group_fd, unsigned long flags)
+{
+ int errno_saved = errno;
+
+ if ((fd != -1) && store_event(attr, pid, cpu, fd, group_fd, flags)) {
+ pr_err("test attr FAILED");
+ exit(128);
+ }
+
+ errno = errno_saved;
+}
static void evsel__no_extra_fini(struct evsel *evsel __maybe_unused)
{
@@ -295,9 +392,9 @@ void evsel__init(struct evsel *evsel,
evsel->metric_events = NULL;
evsel->per_pkg_mask = NULL;
evsel->collect_stat = false;
- evsel->pmu_name = NULL;
evsel->group_pmu_name = NULL;
evsel->skippable = false;
+ evsel->alternate_hw_config = PERF_COUNT_HW_MAX;
}
struct evsel *evsel__new_idx(struct perf_event_attr *attr, int idx)
@@ -392,11 +489,6 @@ struct evsel *evsel__clone(struct evsel *orig)
if (evsel->group_name == NULL)
goto out_err;
}
- if (orig->pmu_name) {
- evsel->pmu_name = strdup(orig->pmu_name);
- if (evsel->pmu_name == NULL)
- goto out_err;
- }
if (orig->group_pmu_name) {
evsel->group_pmu_name = strdup(orig->group_pmu_name);
if (evsel->group_pmu_name == NULL)
@@ -420,7 +512,6 @@ struct evsel *evsel__clone(struct evsel *orig)
evsel->core.leader = orig->core.leader;
evsel->max_events = orig->max_events;
- evsel->tool_event = orig->tool_event;
free((char *)evsel->unit);
evsel->unit = strdup(orig->unit);
if (evsel->unit == NULL)
@@ -444,6 +535,8 @@ struct evsel *evsel__clone(struct evsel *orig)
if (evsel__copy_config_terms(evsel, orig) < 0)
goto out_err;
+ evsel->alternate_hw_config = orig->alternate_hw_config;
+
return evsel;
out_err:
@@ -547,7 +640,6 @@ static int evsel__add_modifiers(struct evsel *evsel, char *bf, size_t size)
{
int colon = 0, r = 0;
struct perf_event_attr *attr = &evsel->core.attr;
- bool exclude_guest_default = false;
#define MOD_PRINT(context, mod) do { \
if (!attr->exclude_##context) { \
@@ -559,17 +651,15 @@ static int evsel__add_modifiers(struct evsel *evsel, char *bf, size_t size)
MOD_PRINT(kernel, 'k');
MOD_PRINT(user, 'u');
MOD_PRINT(hv, 'h');
- exclude_guest_default = true;
}
if (attr->precise_ip) {
if (!colon)
colon = ++r;
r += scnprintf(bf + r, size - r, "%.*s", attr->precise_ip, "ppp");
- exclude_guest_default = true;
}
- if (attr->exclude_host || attr->exclude_guest == exclude_guest_default) {
+ if (attr->exclude_host || attr->exclude_guest) {
MOD_PRINT(host, 'H');
MOD_PRINT(guest, 'G');
}
@@ -616,11 +706,6 @@ static int evsel__sw_name(struct evsel *evsel, char *bf, size_t size)
return r + evsel__add_modifiers(evsel, bf + r, size - r);
}
-static int evsel__tool_name(enum perf_tool_event ev, char *bf, size_t size)
-{
- return scnprintf(bf, size, "%s", perf_tool_event__to_str(ev));
-}
-
static int __evsel__bp_name(char *bf, size_t size, u64 addr, u64 type)
{
int r;
@@ -771,10 +856,7 @@ const char *evsel__name(struct evsel *evsel)
break;
case PERF_TYPE_SOFTWARE:
- if (evsel__is_tool(evsel))
- evsel__tool_name(evsel->tool_event, bf, sizeof(bf));
- else
- evsel__sw_name(evsel, bf, sizeof(bf));
+ evsel__sw_name(evsel, bf, sizeof(bf));
break;
case PERF_TYPE_TRACEPOINT:
@@ -785,6 +867,10 @@ const char *evsel__name(struct evsel *evsel)
evsel__bp_name(evsel, bf, sizeof(bf));
break;
+ case PERF_PMU_TYPE_TOOL:
+ scnprintf(bf, sizeof(bf), "%s", evsel__tool_pmu_event_name(evsel));
+ break;
+
default:
scnprintf(bf, sizeof(bf), "unknown attr type: %d",
evsel->core.attr.type);
@@ -810,7 +896,7 @@ const char *evsel__metric_id(const struct evsel *evsel)
return evsel->metric_id;
if (evsel__is_tool(evsel))
- return perf_tool_event__to_str(evsel->tool_event);
+ return evsel__tool_pmu_event_name(evsel);
return "unknown";
}
@@ -861,7 +947,6 @@ static void __evsel__config_callchain(struct evsel *evsel, struct record_opts *o
{
bool function = evsel__is_function_event(evsel);
struct perf_event_attr *attr = &evsel->core.attr;
- const char *arch = perf_env__arch(evsel__env(evsel));
evsel__set_sample_bit(evsel, CALLCHAIN);
@@ -892,6 +977,8 @@ static void __evsel__config_callchain(struct evsel *evsel, struct record_opts *o
if (param->record_mode == CALLCHAIN_DWARF) {
if (!function) {
+ const char *arch = perf_env__arch(evsel__env(evsel));
+
evsel__set_sample_bit(evsel, REGS_USER);
evsel__set_sample_bit(evsel, STACK_USER);
if (opts->sample_user_regs &&
@@ -1149,7 +1236,7 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts,
bool per_cpu = opts->target.default_per_cpu && !opts->target.per_thread;
attr->sample_id_all = perf_missing_features.sample_id_all ? 0 : 1;
- attr->inherit = !opts->no_inherit;
+ attr->inherit = target__has_cpu(&opts->target) ? 0 : !opts->no_inherit;
attr->write_backward = opts->overwrite ? 1 : 0;
attr->read_format = PERF_FORMAT_LOST;
@@ -1171,7 +1258,15 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts,
*/
if (leader->core.nr_members > 1) {
attr->read_format |= PERF_FORMAT_GROUP;
- attr->inherit = 0;
+ }
+
+ /*
+ * Inherit + SAMPLE_READ requires SAMPLE_TID in the read_format
+ */
+ if (attr->inherit) {
+ evsel__set_sample_bit(evsel, TID);
+ evsel->core.attr.read_format |=
+ PERF_FORMAT_ID;
}
}
@@ -1493,7 +1588,6 @@ void evsel__exit(struct evsel *evsel)
zfree(&evsel->group_name);
zfree(&evsel->name);
zfree(&evsel->filter);
- zfree(&evsel->pmu_name);
zfree(&evsel->group_pmu_name);
zfree(&evsel->unit);
zfree(&evsel->metric_id);
@@ -1502,8 +1596,8 @@ void evsel__exit(struct evsel *evsel)
evsel->per_pkg_mask = NULL;
zfree(&evsel->metric_events);
perf_evsel__object.fini(evsel);
- if (evsel->tool_event == PERF_TOOL_SYSTEM_TIME ||
- evsel->tool_event == PERF_TOOL_USER_TIME)
+ if (evsel__tool_event(evsel) == TOOL_PMU__EVENT_SYSTEM_TIME ||
+ evsel__tool_event(evsel) == TOOL_PMU__EVENT_USER_TIME)
xyarray__delete(evsel->start_times);
}
@@ -1539,6 +1633,11 @@ static int evsel__read_one(struct evsel *evsel, int cpu_map_idx, int thread)
return perf_evsel__read(&evsel->core, cpu_map_idx, thread, count);
}
+static int evsel__read_retire_lat(struct evsel *evsel, int cpu_map_idx, int thread)
+{
+ return tpebs_set_evsel(evsel, cpu_map_idx, thread);
+}
+
static void evsel__set_count(struct evsel *counter, int cpu_map_idx, int thread,
u64 val, u64 ena, u64 run, u64 lost)
{
@@ -1546,6 +1645,12 @@ static void evsel__set_count(struct evsel *counter, int cpu_map_idx, int thread,
count = perf_counts(counter->counts, cpu_map_idx, thread);
+ if (counter->retire_lat) {
+ evsel__read_retire_lat(counter, cpu_map_idx, thread);
+ perf_counts__set_loaded(counter->counts, cpu_map_idx, thread, true);
+ return;
+ }
+
count->val = val;
count->ena = ena;
count->run = run;
@@ -1554,6 +1659,60 @@ static void evsel__set_count(struct evsel *counter, int cpu_map_idx, int thread,
perf_counts__set_loaded(counter->counts, cpu_map_idx, thread, true);
}
+static bool evsel__group_has_tpebs(struct evsel *leader)
+{
+ struct evsel *evsel;
+
+ for_each_group_evsel(evsel, leader) {
+ if (evsel__is_retire_lat(evsel))
+ return true;
+ }
+ return false;
+}
+
+static u64 evsel__group_read_nr_members(struct evsel *leader)
+{
+ u64 nr = leader->core.nr_members;
+ struct evsel *evsel;
+
+ for_each_group_evsel(evsel, leader) {
+ if (evsel__is_retire_lat(evsel))
+ nr--;
+ }
+ return nr;
+}
+
+static u64 evsel__group_read_size(struct evsel *leader)
+{
+ u64 read_format = leader->core.attr.read_format;
+ int entry = sizeof(u64); /* value */
+ int size = 0;
+ int nr = 1;
+
+ if (!evsel__group_has_tpebs(leader))
+ return perf_evsel__read_size(&leader->core);
+
+ if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+ size += sizeof(u64);
+
+ if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+ size += sizeof(u64);
+
+ if (read_format & PERF_FORMAT_ID)
+ entry += sizeof(u64);
+
+ if (read_format & PERF_FORMAT_LOST)
+ entry += sizeof(u64);
+
+ if (read_format & PERF_FORMAT_GROUP) {
+ nr = evsel__group_read_nr_members(leader);
+ size += sizeof(u64);
+ }
+
+ size += entry * nr;
+ return size;
+}
+
static int evsel__process_group_data(struct evsel *leader, int cpu_map_idx, int thread, u64 *data)
{
u64 read_format = leader->core.attr.read_format;
@@ -1562,7 +1721,7 @@ static int evsel__process_group_data(struct evsel *leader, int cpu_map_idx, int
nr = *data++;
- if (nr != (u64) leader->core.nr_members)
+ if (nr != evsel__group_read_nr_members(leader))
return -EINVAL;
if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
@@ -1592,7 +1751,7 @@ static int evsel__read_group(struct evsel *leader, int cpu_map_idx, int thread)
{
struct perf_stat_evsel *ps = leader->stats;
u64 read_format = leader->core.attr.read_format;
- int size = perf_evsel__read_size(&leader->core);
+ int size = evsel__group_read_size(leader);
u64 *data = ps->group_data;
if (!(read_format & PERF_FORMAT_ID))
@@ -1618,171 +1777,34 @@ static int evsel__read_group(struct evsel *leader, int cpu_map_idx, int thread)
return evsel__process_group_data(leader, cpu_map_idx, thread, data);
}
-static bool read_until_char(struct io *io, char e)
+bool __evsel__match(const struct evsel *evsel, u32 type, u64 config)
{
- int c;
- do {
- c = io__get_char(io);
- if (c == -1)
- return false;
- } while (c != e);
- return true;
-}
+ u32 e_type = evsel->core.attr.type;
+ u64 e_config = evsel->core.attr.config;
-static int read_stat_field(int fd, struct perf_cpu cpu, int field, __u64 *val)
-{
- char buf[256];
- struct io io;
- int i;
-
- io__init(&io, fd, buf, sizeof(buf));
-
- /* Skip lines to relevant CPU. */
- for (i = -1; i < cpu.cpu; i++) {
- if (!read_until_char(&io, '\n'))
- return -EINVAL;
+ if (e_type != type) {
+ return type == PERF_TYPE_HARDWARE && evsel->pmu && evsel->pmu->is_core &&
+ evsel->alternate_hw_config == config;
}
- /* Skip to "cpu". */
- if (io__get_char(&io) != 'c') return -EINVAL;
- if (io__get_char(&io) != 'p') return -EINVAL;
- if (io__get_char(&io) != 'u') return -EINVAL;
- /* Skip N of cpuN. */
- if (!read_until_char(&io, ' '))
- return -EINVAL;
+ if ((type == PERF_TYPE_HARDWARE || type == PERF_TYPE_HW_CACHE) &&
+ perf_pmus__supports_extended_type())
+ e_config &= PERF_HW_EVENT_MASK;
- i = 1;
- while (true) {
- if (io__get_dec(&io, val) != ' ')
- break;
- if (field == i)
- return 0;
- i++;
- }
- return -EINVAL;
-}
-
-static int read_pid_stat_field(int fd, int field, __u64 *val)
-{
- char buf[256];
- struct io io;
- int c, i;
-
- io__init(&io, fd, buf, sizeof(buf));
- if (io__get_dec(&io, val) != ' ')
- return -EINVAL;
- if (field == 1)
- return 0;
-
- /* Skip comm. */
- if (io__get_char(&io) != '(' || !read_until_char(&io, ')'))
- return -EINVAL;
- if (field == 2)
- return -EINVAL; /* String can't be returned. */
-
- /* Skip state */
- if (io__get_char(&io) != ' ' || io__get_char(&io) == -1)
- return -EINVAL;
- if (field == 3)
- return -EINVAL; /* String can't be returned. */
-
- /* Loop over numeric fields*/
- if (io__get_char(&io) != ' ')
- return -EINVAL;
-
- i = 4;
- while (true) {
- c = io__get_dec(&io, val);
- if (c == -1)
- return -EINVAL;
- if (c == -2) {
- /* Assume a -ve was read */
- c = io__get_dec(&io, val);
- *val *= -1;
- }
- if (c != ' ')
- return -EINVAL;
- if (field == i)
- return 0;
- i++;
- }
- return -EINVAL;
-}
-
-static int evsel__read_tool(struct evsel *evsel, int cpu_map_idx, int thread)
-{
- __u64 *start_time, cur_time, delta_start;
- int fd, err = 0;
- struct perf_counts_values *count;
- bool adjust = false;
-
- count = perf_counts(evsel->counts, cpu_map_idx, thread);
-
- switch (evsel->tool_event) {
- case PERF_TOOL_DURATION_TIME:
- /*
- * Pretend duration_time is only on the first CPU and thread, or
- * else aggregation will scale duration_time by the number of
- * CPUs/threads.
- */
- start_time = &evsel->start_time;
- if (cpu_map_idx == 0 && thread == 0)
- cur_time = rdclock();
- else
- cur_time = *start_time;
- break;
- case PERF_TOOL_USER_TIME:
- case PERF_TOOL_SYSTEM_TIME: {
- bool system = evsel->tool_event == PERF_TOOL_SYSTEM_TIME;
-
- start_time = xyarray__entry(evsel->start_times, cpu_map_idx, thread);
- fd = FD(evsel, cpu_map_idx, thread);
- lseek(fd, SEEK_SET, 0);
- if (evsel->pid_stat) {
- /* The event exists solely on 1 CPU. */
- if (cpu_map_idx == 0)
- err = read_pid_stat_field(fd, system ? 15 : 14, &cur_time);
- else
- cur_time = 0;
- } else {
- /* The event is for all threads. */
- if (thread == 0) {
- struct perf_cpu cpu = perf_cpu_map__cpu(evsel->core.cpus,
- cpu_map_idx);
-
- err = read_stat_field(fd, cpu, system ? 3 : 1, &cur_time);
- } else {
- cur_time = 0;
- }
- }
- adjust = true;
- break;
- }
- case PERF_TOOL_NONE:
- case PERF_TOOL_MAX:
- default:
- err = -EINVAL;
- }
- if (err)
- return err;
-
- delta_start = cur_time - *start_time;
- if (adjust) {
- __u64 ticks_per_sec = sysconf(_SC_CLK_TCK);
-
- delta_start *= 1000000000 / ticks_per_sec;
- }
- count->val = delta_start;
- count->ena = count->run = delta_start;
- count->lost = 0;
- return 0;
+ return e_config == config;
}
int evsel__read_counter(struct evsel *evsel, int cpu_map_idx, int thread)
{
if (evsel__is_tool(evsel))
- return evsel__read_tool(evsel, cpu_map_idx, thread);
+ return evsel__tool_pmu_read(evsel, cpu_map_idx, thread);
+
+ if (evsel__is_hwmon(evsel))
+ return evsel__hwmon_pmu_read(evsel, cpu_map_idx, thread);
+
+ if (evsel__is_retire_lat(evsel))
+ return evsel__read_retire_lat(evsel, cpu_map_idx, thread);
if (evsel->core.attr.read_format & PERF_FORMAT_GROUP)
return evsel__read_group(evsel, cpu_map_idx, thread);
@@ -1973,6 +1995,7 @@ static struct perf_thread_map *empty_thread_map;
static int __evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus,
struct perf_thread_map *threads)
{
+ int ret = 0;
int nthreads = perf_thread_map__nr(threads);
if ((perf_missing_features.write_backward && evsel->core.attr.write_backward) ||
@@ -2003,23 +2026,21 @@ static int __evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus,
perf_evsel__alloc_fd(&evsel->core, perf_cpu_map__nr(cpus), nthreads) < 0)
return -ENOMEM;
- if ((evsel->tool_event == PERF_TOOL_SYSTEM_TIME ||
- evsel->tool_event == PERF_TOOL_USER_TIME) &&
- !evsel->start_times) {
- evsel->start_times = xyarray__new(perf_cpu_map__nr(cpus), nthreads, sizeof(__u64));
- if (!evsel->start_times)
- return -ENOMEM;
- }
+ if (evsel__is_tool(evsel))
+ ret = evsel__tool_pmu_prepare_open(evsel, cpus, nthreads);
evsel->open_flags = PERF_FLAG_FD_CLOEXEC;
if (evsel->cgrp)
evsel->open_flags |= PERF_FLAG_PID_CGROUP;
- return 0;
+ return ret;
}
static void evsel__disable_missing_features(struct evsel *evsel)
{
+ if (perf_missing_features.inherit_sample_read && evsel->core.attr.inherit &&
+ (evsel->core.attr.sample_type & PERF_SAMPLE_READ))
+ evsel->core.attr.inherit = 0;
if (perf_missing_features.branch_counters)
evsel->core.attr.branch_sample_type &= ~PERF_SAMPLE_BRANCH_COUNTERS;
if (perf_missing_features.read_lost)
@@ -2069,120 +2090,346 @@ int evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus,
return err;
}
-bool evsel__detect_missing_features(struct evsel *evsel)
+static bool has_attr_feature(struct perf_event_attr *attr, unsigned long flags)
+{
+ int fd = syscall(SYS_perf_event_open, attr, /*pid=*/0, /*cpu=*/-1,
+ /*group_fd=*/-1, flags);
+ close(fd);
+
+ if (fd < 0) {
+ attr->exclude_kernel = 1;
+
+ fd = syscall(SYS_perf_event_open, attr, /*pid=*/0, /*cpu=*/-1,
+ /*group_fd=*/-1, flags);
+ close(fd);
+ }
+
+ if (fd < 0) {
+ attr->exclude_hv = 1;
+
+ fd = syscall(SYS_perf_event_open, attr, /*pid=*/0, /*cpu=*/-1,
+ /*group_fd=*/-1, flags);
+ close(fd);
+ }
+
+ if (fd < 0) {
+ attr->exclude_guest = 1;
+
+ fd = syscall(SYS_perf_event_open, attr, /*pid=*/0, /*cpu=*/-1,
+ /*group_fd=*/-1, flags);
+ close(fd);
+ }
+
+ attr->exclude_kernel = 0;
+ attr->exclude_guest = 0;
+ attr->exclude_hv = 0;
+
+ return fd >= 0;
+}
+
+static void evsel__detect_missing_pmu_features(struct evsel *evsel)
{
+ struct perf_event_attr attr = {
+ .type = evsel->core.attr.type,
+ .config = evsel->core.attr.config,
+ .disabled = 1,
+ };
+ struct perf_pmu *pmu = evsel->pmu;
+ int old_errno;
+
+ old_errno = errno;
+
+ if (pmu == NULL)
+ pmu = evsel->pmu = evsel__find_pmu(evsel);
+
+ if (pmu == NULL || pmu->missing_features.checked)
+ goto out;
+
/*
* Must probe features in the order they were added to the
- * perf_event_attr interface.
+ * perf_event_attr interface. These are kernel core limitation but
+ * specific to PMUs with branch stack. So we can detect with the given
+ * hardware event and stop on the first one succeeded.
*/
- if (!perf_missing_features.branch_counters &&
- (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS)) {
- perf_missing_features.branch_counters = true;
- pr_debug2("switching off branch counters support\n");
+
+ /* Please add new feature detection here. */
+
+ attr.exclude_guest = 1;
+ if (has_attr_feature(&attr, /*flags=*/0))
+ goto found;
+ pmu->missing_features.exclude_guest = true;
+ pr_debug2("switching off exclude_guest for PMU %s\n", pmu->name);
+
+found:
+ pmu->missing_features.checked = true;
+out:
+ errno = old_errno;
+}
+
+static void evsel__detect_missing_brstack_features(struct evsel *evsel)
+{
+ static bool detection_done = false;
+ struct perf_event_attr attr = {
+ .type = evsel->core.attr.type,
+ .config = evsel->core.attr.config,
+ .disabled = 1,
+ .sample_type = PERF_SAMPLE_BRANCH_STACK,
+ .sample_period = 1000,
+ };
+ int old_errno;
+
+ if (detection_done)
+ return;
+
+ old_errno = errno;
+
+ /*
+ * Must probe features in the order they were added to the
+ * perf_event_attr interface. These are PMU specific limitation
+ * so we can detect with the given hardware event and stop on the
+ * first one succeeded.
+ */
+
+ /* Please add new feature detection here. */
+
+ attr.branch_sample_type = PERF_SAMPLE_BRANCH_COUNTERS;
+ if (has_attr_feature(&attr, /*flags=*/0))
+ goto found;
+ perf_missing_features.branch_counters = true;
+ pr_debug2("switching off branch counters support\n");
+
+ attr.branch_sample_type = PERF_SAMPLE_BRANCH_HW_INDEX;
+ if (has_attr_feature(&attr, /*flags=*/0))
+ goto found;
+ perf_missing_features.branch_hw_idx = true;
+ pr_debug2("switching off branch HW index support\n");
+
+ attr.branch_sample_type = PERF_SAMPLE_BRANCH_NO_CYCLES | PERF_SAMPLE_BRANCH_NO_FLAGS;
+ if (has_attr_feature(&attr, /*flags=*/0))
+ goto found;
+ perf_missing_features.lbr_flags = true;
+ pr_debug2_peo("switching off branch sample type no (cycles/flags)\n");
+
+found:
+ detection_done = true;
+ errno = old_errno;
+}
+
+static bool evsel__detect_missing_features(struct evsel *evsel)
+{
+ static bool detection_done = false;
+ struct perf_event_attr attr = {
+ .type = PERF_TYPE_SOFTWARE,
+ .config = PERF_COUNT_SW_TASK_CLOCK,
+ .disabled = 1,
+ };
+ int old_errno;
+
+ evsel__detect_missing_pmu_features(evsel);
+
+ if (evsel__has_br_stack(evsel))
+ evsel__detect_missing_brstack_features(evsel);
+
+ if (detection_done)
+ goto check;
+
+ old_errno = errno;
+
+ /*
+ * Must probe features in the order they were added to the
+ * perf_event_attr interface. These are kernel core limitation
+ * not PMU-specific so we can detect with a software event and
+ * stop on the first one succeeded.
+ */
+
+ /* Please add new feature detection here. */
+
+ attr.inherit = true;
+ attr.sample_type = PERF_SAMPLE_READ;
+ if (has_attr_feature(&attr, /*flags=*/0))
+ goto found;
+ perf_missing_features.inherit_sample_read = true;
+ pr_debug2("Using PERF_SAMPLE_READ / :S modifier is not compatible with inherit, falling back to no-inherit.\n");
+ attr.inherit = false;
+ attr.sample_type = 0;
+
+ attr.read_format = PERF_FORMAT_LOST;
+ if (has_attr_feature(&attr, /*flags=*/0))
+ goto found;
+ perf_missing_features.read_lost = true;
+ pr_debug2("switching off PERF_FORMAT_LOST support\n");
+ attr.read_format = 0;
+
+ attr.sample_type = PERF_SAMPLE_WEIGHT_STRUCT;
+ if (has_attr_feature(&attr, /*flags=*/0))
+ goto found;
+ perf_missing_features.weight_struct = true;
+ pr_debug2("switching off weight struct support\n");
+ attr.sample_type = 0;
+
+ attr.sample_type = PERF_SAMPLE_CODE_PAGE_SIZE;
+ if (has_attr_feature(&attr, /*flags=*/0))
+ goto found;
+ perf_missing_features.code_page_size = true;
+ pr_debug2_peo("Kernel has no PERF_SAMPLE_CODE_PAGE_SIZE support\n");
+ attr.sample_type = 0;
+
+ attr.sample_type = PERF_SAMPLE_DATA_PAGE_SIZE;
+ if (has_attr_feature(&attr, /*flags=*/0))
+ goto found;
+ perf_missing_features.data_page_size = true;
+ pr_debug2_peo("Kernel has no PERF_SAMPLE_DATA_PAGE_SIZE support\n");
+ attr.sample_type = 0;
+
+ attr.cgroup = 1;
+ if (has_attr_feature(&attr, /*flags=*/0))
+ goto found;
+ perf_missing_features.cgroup = true;
+ pr_debug2_peo("Kernel has no cgroup sampling support\n");
+ attr.cgroup = 0;
+
+ attr.aux_output = 1;
+ if (has_attr_feature(&attr, /*flags=*/0))
+ goto found;
+ perf_missing_features.aux_output = true;
+ pr_debug2_peo("Kernel has no attr.aux_output support\n");
+ attr.aux_output = 0;
+
+ attr.bpf_event = 1;
+ if (has_attr_feature(&attr, /*flags=*/0))
+ goto found;
+ perf_missing_features.bpf = true;
+ pr_debug2_peo("switching off bpf_event\n");
+ attr.bpf_event = 0;
+
+ attr.ksymbol = 1;
+ if (has_attr_feature(&attr, /*flags=*/0))
+ goto found;
+ perf_missing_features.ksymbol = true;
+ pr_debug2_peo("switching off ksymbol\n");
+ attr.ksymbol = 0;
+
+ attr.write_backward = 1;
+ if (has_attr_feature(&attr, /*flags=*/0))
+ goto found;
+ perf_missing_features.write_backward = true;
+ pr_debug2_peo("switching off write_backward\n");
+ attr.write_backward = 0;
+
+ attr.use_clockid = 1;
+ attr.clockid = CLOCK_MONOTONIC;
+ if (has_attr_feature(&attr, /*flags=*/0))
+ goto found;
+ perf_missing_features.clockid = true;
+ pr_debug2_peo("switching off clockid\n");
+ attr.use_clockid = 0;
+ attr.clockid = 0;
+
+ if (has_attr_feature(&attr, /*flags=*/PERF_FLAG_FD_CLOEXEC))
+ goto found;
+ perf_missing_features.cloexec = true;
+ pr_debug2_peo("switching off cloexec flag\n");
+
+ attr.mmap2 = 1;
+ if (has_attr_feature(&attr, /*flags=*/0))
+ goto found;
+ perf_missing_features.mmap2 = true;
+ pr_debug2_peo("switching off mmap2\n");
+ attr.mmap2 = 0;
+
+ /* set this unconditionally? */
+ perf_missing_features.sample_id_all = true;
+ pr_debug2_peo("switching off sample_id_all\n");
+
+ attr.inherit = 1;
+ attr.read_format = PERF_FORMAT_GROUP;
+ if (has_attr_feature(&attr, /*flags=*/0))
+ goto found;
+ perf_missing_features.group_read = true;
+ pr_debug2_peo("switching off group read\n");
+ attr.inherit = 0;
+ attr.read_format = 0;
+
+found:
+ detection_done = true;
+ errno = old_errno;
+
+check:
+ if (evsel->core.attr.inherit &&
+ (evsel->core.attr.sample_type & PERF_SAMPLE_READ) &&
+ perf_missing_features.inherit_sample_read)
return true;
- } else if (!perf_missing_features.read_lost &&
- (evsel->core.attr.read_format & PERF_FORMAT_LOST)) {
- perf_missing_features.read_lost = true;
- pr_debug2("switching off PERF_FORMAT_LOST support\n");
+
+ if ((evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS) &&
+ perf_missing_features.branch_counters)
return true;
- } else if (!perf_missing_features.weight_struct &&
- (evsel->core.attr.sample_type & PERF_SAMPLE_WEIGHT_STRUCT)) {
- perf_missing_features.weight_struct = true;
- pr_debug2("switching off weight struct support\n");
+
+ if ((evsel->core.attr.read_format & PERF_FORMAT_LOST) &&
+ perf_missing_features.read_lost)
return true;
- } else if (!perf_missing_features.code_page_size &&
- (evsel->core.attr.sample_type & PERF_SAMPLE_CODE_PAGE_SIZE)) {
- perf_missing_features.code_page_size = true;
- pr_debug2_peo("Kernel has no PERF_SAMPLE_CODE_PAGE_SIZE support, bailing out\n");
- return false;
- } else if (!perf_missing_features.data_page_size &&
- (evsel->core.attr.sample_type & PERF_SAMPLE_DATA_PAGE_SIZE)) {
- perf_missing_features.data_page_size = true;
- pr_debug2_peo("Kernel has no PERF_SAMPLE_DATA_PAGE_SIZE support, bailing out\n");
- return false;
- } else if (!perf_missing_features.cgroup && evsel->core.attr.cgroup) {
- perf_missing_features.cgroup = true;
- pr_debug2_peo("Kernel has no cgroup sampling support, bailing out\n");
- return false;
- } else if (!perf_missing_features.branch_hw_idx &&
- (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX)) {
- perf_missing_features.branch_hw_idx = true;
- pr_debug2("switching off branch HW index support\n");
+
+ if ((evsel->core.attr.sample_type & PERF_SAMPLE_WEIGHT_STRUCT) &&
+ perf_missing_features.weight_struct)
return true;
- } else if (!perf_missing_features.aux_output && evsel->core.attr.aux_output) {
- perf_missing_features.aux_output = true;
- pr_debug2_peo("Kernel has no attr.aux_output support, bailing out\n");
- return false;
- } else if (!perf_missing_features.bpf && evsel->core.attr.bpf_event) {
- perf_missing_features.bpf = true;
- pr_debug2_peo("switching off bpf_event\n");
+
+ if (evsel->core.attr.use_clockid && evsel->core.attr.clockid != CLOCK_MONOTONIC &&
+ !perf_missing_features.clockid) {
+ perf_missing_features.clockid_wrong = true;
return true;
- } else if (!perf_missing_features.ksymbol && evsel->core.attr.ksymbol) {
- perf_missing_features.ksymbol = true;
- pr_debug2_peo("switching off ksymbol\n");
+ }
+
+ if (evsel->core.attr.use_clockid && perf_missing_features.clockid)
return true;
- } else if (!perf_missing_features.write_backward && evsel->core.attr.write_backward) {
- perf_missing_features.write_backward = true;
- pr_debug2_peo("switching off write_backward\n");
- return false;
- } else if (!perf_missing_features.clockid_wrong && evsel->core.attr.use_clockid) {
- perf_missing_features.clockid_wrong = true;
- pr_debug2_peo("switching off clockid\n");
+
+ if ((evsel->open_flags & PERF_FLAG_FD_CLOEXEC) &&
+ perf_missing_features.cloexec)
+ return true;
+
+ if (evsel->core.attr.mmap2 && perf_missing_features.mmap2)
return true;
- } else if (!perf_missing_features.clockid && evsel->core.attr.use_clockid) {
- perf_missing_features.clockid = true;
- pr_debug2_peo("switching off use_clockid\n");
+
+ if ((evsel->core.attr.branch_sample_type & (PERF_SAMPLE_BRANCH_NO_FLAGS |
+ PERF_SAMPLE_BRANCH_NO_CYCLES)) &&
+ perf_missing_features.lbr_flags)
return true;
- } else if (!perf_missing_features.cloexec && (evsel->open_flags & PERF_FLAG_FD_CLOEXEC)) {
- perf_missing_features.cloexec = true;
- pr_debug2_peo("switching off cloexec flag\n");
+
+ if (evsel->core.attr.inherit && (evsel->core.attr.read_format & PERF_FORMAT_GROUP) &&
+ perf_missing_features.group_read)
return true;
- } else if (!perf_missing_features.mmap2 && evsel->core.attr.mmap2) {
- perf_missing_features.mmap2 = true;
- pr_debug2_peo("switching off mmap2\n");
+
+ if (evsel->core.attr.ksymbol && perf_missing_features.ksymbol)
return true;
- } else if (evsel->core.attr.exclude_guest || evsel->core.attr.exclude_host) {
- if (evsel->pmu == NULL)
- evsel->pmu = evsel__find_pmu(evsel);
-
- if (evsel->pmu)
- evsel->pmu->missing_features.exclude_guest = true;
- else {
- /* we cannot find PMU, disable attrs now */
- evsel->core.attr.exclude_host = false;
- evsel->core.attr.exclude_guest = false;
- }
- if (evsel->exclude_GH) {
- pr_debug2_peo("PMU has no exclude_host/guest support, bailing out\n");
- return false;
- }
- if (!perf_missing_features.exclude_guest) {
- perf_missing_features.exclude_guest = true;
- pr_debug2_peo("switching off exclude_guest, exclude_host\n");
- }
+ if (evsel->core.attr.bpf_event && perf_missing_features.bpf)
return true;
- } else if (!perf_missing_features.sample_id_all) {
- perf_missing_features.sample_id_all = true;
- pr_debug2_peo("switching off sample_id_all\n");
+
+ if ((evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX) &&
+ perf_missing_features.branch_hw_idx)
return true;
- } else if (!perf_missing_features.lbr_flags &&
- (evsel->core.attr.branch_sample_type &
- (PERF_SAMPLE_BRANCH_NO_CYCLES |
- PERF_SAMPLE_BRANCH_NO_FLAGS))) {
- perf_missing_features.lbr_flags = true;
- pr_debug2_peo("switching off branch sample type no (cycles/flags)\n");
+
+ if (evsel->core.attr.sample_id_all && perf_missing_features.sample_id_all)
return true;
- } else if (!perf_missing_features.group_read &&
- evsel->core.attr.inherit &&
- (evsel->core.attr.read_format & PERF_FORMAT_GROUP) &&
- evsel__is_group_leader(evsel)) {
- perf_missing_features.group_read = true;
- pr_debug2_peo("switching off group read\n");
+
+ return false;
+}
+
+static bool evsel__handle_error_quirks(struct evsel *evsel, int error)
+{
+ /*
+ * AMD core PMU tries to forward events with precise_ip to IBS PMU
+ * implicitly. But IBS PMU has more restrictions so it can fail with
+ * supported event attributes. Let's forward it back to the core PMU
+ * by clearing precise_ip only if it's from precise_max (:P).
+ */
+ if ((error == -EINVAL || error == -ENOENT) && x86__is_amd_cpu() &&
+ evsel->core.attr.precise_ip && evsel->precise_max) {
+ evsel->core.attr.precise_ip = 0;
+ pr_debug2_peo("removing precise_ip on AMD\n");
+ display_attr(&evsel->core.attr);
return true;
- } else {
- return false;
}
+
+ return false;
}
static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus,
@@ -2193,12 +2440,8 @@ static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus,
int pid = -1, err, old_errno;
enum rlimit_action set_rlimit = NO_CHANGE;
- if (evsel->tool_event == PERF_TOOL_DURATION_TIME) {
- if (evsel->core.attr.sample_period) /* no sampling */
- return -EINVAL;
- evsel->start_time = rdclock();
- return 0;
- }
+ if (evsel__is_retire_lat(evsel))
+ return tpebs_start(evsel->evlist);
err = __evsel__prepare_open(evsel, cpus, threads);
if (err)
@@ -2221,6 +2464,17 @@ fallback_missing_features:
pr_debug3("Opening: %s\n", evsel__name(evsel));
display_attr(&evsel->core.attr);
+ if (evsel__is_tool(evsel)) {
+ return evsel__tool_pmu_open(evsel, threads,
+ start_cpu_map_idx,
+ end_cpu_map_idx);
+ }
+ if (evsel__is_hwmon(evsel)) {
+ return evsel__hwmon_pmu_open(evsel, threads,
+ start_cpu_map_idx,
+ end_cpu_map_idx);
+ }
+
for (idx = start_cpu_map_idx; idx < end_cpu_map_idx; idx++) {
for (thread = 0; thread < nthreads; thread++) {
@@ -2232,46 +2486,6 @@ retry_open:
if (!evsel->cgrp && !evsel->core.system_wide)
pid = perf_thread_map__pid(threads, thread);
- if (evsel->tool_event == PERF_TOOL_USER_TIME ||
- evsel->tool_event == PERF_TOOL_SYSTEM_TIME) {
- bool system = evsel->tool_event == PERF_TOOL_SYSTEM_TIME;
- __u64 *start_time = NULL;
-
- if (evsel->core.attr.sample_period) {
- /* no sampling */
- err = -EINVAL;
- goto out_close;
- }
- if (pid > -1) {
- char buf[64];
-
- snprintf(buf, sizeof(buf), "/proc/%d/stat", pid);
- fd = open(buf, O_RDONLY);
- evsel->pid_stat = true;
- } else {
- fd = open("/proc/stat", O_RDONLY);
- }
- FD(evsel, idx, thread) = fd;
- if (fd < 0) {
- err = -errno;
- goto out_close;
- }
- start_time = xyarray__entry(evsel->start_times, idx, thread);
- if (pid > -1) {
- err = read_pid_stat_field(fd, system ? 15 : 14,
- start_time);
- } else {
- struct perf_cpu cpu;
-
- cpu = perf_cpu_map__cpu(evsel->core.cpus, idx);
- err = read_stat_field(fd, cpu, system ? 3 : 1,
- start_time);
- }
- if (err)
- goto out_close;
- continue;
- }
-
group_fd = get_group_fd(evsel, idx, thread);
if (group_fd == -2) {
@@ -2280,8 +2494,6 @@ retry_open:
goto out_close;
}
- test_attr__ready();
-
/* Debug message used by test scripts */
pr_debug2_peo("sys_perf_event_open: pid %d cpu %d group_fd %d flags %#lx",
pid, perf_cpu_map__cpu(cpus, idx).cpu, group_fd, evsel->open_flags);
@@ -2302,7 +2514,7 @@ retry_open:
bpf_counter__install_pe(evsel, idx, fd);
- if (unlikely(test_attr__enabled)) {
+ if (unlikely(test_attr__enabled())) {
test_attr__open(&evsel->core.attr, pid,
perf_cpu_map__cpu(cpus, idx),
fd, group_fd, evsel->open_flags);
@@ -2343,9 +2555,6 @@ retry_open:
return 0;
try_fallback:
- if (evsel__precise_ip_fallback(evsel))
- goto retry_open;
-
if (evsel__ignore_missing_thread(evsel, perf_cpu_map__nr(cpus),
idx, threads, thread, err)) {
/* We just removed 1 thread, so lower the upper nthreads limit. */
@@ -2362,11 +2571,15 @@ try_fallback:
if (err == -EMFILE && rlimit__increase_nofile(&set_rlimit))
goto retry_open;
- if (err != -EINVAL || idx > 0 || thread > 0)
- goto out_close;
-
- if (evsel__detect_missing_features(evsel))
+ if (err == -EINVAL && evsel__detect_missing_features(evsel))
goto fallback_missing_features;
+
+ if (evsel__precise_ip_fallback(evsel))
+ goto retry_open;
+
+ if (evsel__handle_error_quirks(evsel, err))
+ goto retry_open;
+
out_close:
if (err)
threads->err_thread = thread;
@@ -2392,6 +2605,8 @@ int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus,
void evsel__close(struct evsel *evsel)
{
+ if (evsel__is_retire_lat(evsel))
+ tpebs_delete();
perf_evsel__close(&evsel->core);
perf_evsel__free_id(&evsel->core);
}
@@ -2562,17 +2777,18 @@ u64 evsel__bitfield_swap_branch_flags(u64 value)
static inline bool evsel__has_branch_counters(const struct evsel *evsel)
{
- struct evsel *cur, *leader = evsel__leader(evsel);
+ struct evsel *leader = evsel__leader(evsel);
/* The branch counters feature only supports group */
if (!leader || !evsel->evlist)
return false;
- evlist__for_each_entry(evsel->evlist, cur) {
- if ((leader == evsel__leader(cur)) &&
- (cur->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS))
- return true;
- }
+ if (evsel->evlist->nr_br_cntr < 0)
+ evlist__update_br_cntr(evsel->evlist);
+
+ if (leader->br_cntr_nr > 0)
+ return true;
+
return false;
}
@@ -2810,8 +3026,6 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
array = (void *)array + sz;
if (evsel__has_branch_counters(evsel)) {
- OVERFLOW_CHECK_u64(array);
-
data->branch_stack_cntr = (u64 *)array;
sz = data->branch_stack->nr * sizeof(u64);
@@ -2975,7 +3189,7 @@ int evsel__parse_sample_timestamp(struct evsel *evsel, union perf_event *event,
return 0;
}
-u16 evsel__id_hdr_size(struct evsel *evsel)
+u16 evsel__id_hdr_size(const struct evsel *evsel)
{
u64 sample_type = evsel->core.attr.sample_type;
u16 size = 0;
@@ -3172,6 +3386,27 @@ bool evsel__fallback(struct evsel *evsel, struct target *target, int err,
evsel->core.attr.exclude_hv = 1;
return true;
+ } else if (err == EOPNOTSUPP && !evsel->core.attr.exclude_guest &&
+ !evsel->exclude_GH) {
+ const char *name = evsel__name(evsel);
+ char *new_name;
+ const char *sep = ":";
+
+ /* Is there already the separator in the name. */
+ if (strchr(name, '/') ||
+ (strchr(name, ':') && !evsel->is_libpfm_event))
+ sep = "";
+
+ if (asprintf(&new_name, "%s%sH", name, sep) < 0)
+ return false;
+
+ free(evsel->name);
+ evsel->name = new_name;
+ /* Apple M1 requires exclude_guest */
+ scnprintf(msg, msgsize, "trying to fall back to excluding guest samples");
+ evsel->core.attr.exclude_guest = 1;
+
+ return true;
}
return false;
@@ -3342,7 +3577,7 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target,
return scnprintf(msg, size,
"The sys_perf_event_open() syscall returned with %d (%s) for event (%s).\n"
- "/bin/dmesg | grep -i perf may provide additional information.\n",
+ "\"dmesg | grep -i perf\" may provide additional information.\n",
err, str_error_r(err, sbuf, sizeof(sbuf)), evsel__name(evsel));
}
@@ -3357,6 +3592,9 @@ static int store_evsel_ids(struct evsel *evsel, struct evlist *evlist)
{
int cpu_map_idx, thread;
+ if (evsel__is_retire_lat(evsel))
+ return 0;
+
for (cpu_map_idx = 0; cpu_map_idx < xyarray__max_x(evsel->core.fd); cpu_map_idx++) {
for (thread = 0; thread < xyarray__max_y(evsel->core.fd);
thread++) {
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 80b5f6dd868e..04934a7af174 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -11,6 +11,7 @@
#include <perf/evsel.h>
#include "symbol_conf.h"
#include "pmus.h"
+#include "pmu.h"
struct bpf_object;
struct cgroup;
@@ -22,25 +23,9 @@ struct target;
struct hashmap;
struct bperf_leader_bpf;
struct bperf_follower_bpf;
-struct perf_pmu;
typedef int (evsel__sb_cb_t)(union perf_event *event, void *data);
-enum perf_tool_event {
- PERF_TOOL_NONE = 0,
- PERF_TOOL_DURATION_TIME = 1,
- PERF_TOOL_USER_TIME = 2,
- PERF_TOOL_SYSTEM_TIME = 3,
-
- PERF_TOOL_MAX,
-};
-
-const char *perf_tool_event__to_str(enum perf_tool_event ev);
-enum perf_tool_event perf_tool_event__from_str(const char *str);
-
-#define perf_tool_event__for_each_event(ev) \
- for ((ev) = PERF_TOOL_DURATION_TIME; (ev) < PERF_TOOL_MAX; ev++)
-
/** struct evsel - event selector
*
* @evlist - evlist this evsel is in, if it is in one.
@@ -72,7 +57,6 @@ struct evsel {
struct {
char *name;
char *group_name;
- const char *pmu_name;
const char *group_pmu_name;
#ifdef HAVE_LIBTRACEEVENT
struct tep_event *tp_format;
@@ -83,7 +67,6 @@ struct evsel {
const char *unit;
struct cgroup *cgrp;
const char *metric_id;
- enum perf_tool_event tool_event;
/* parse modifier helper */
int exclude_GH;
int sample_read;
@@ -98,9 +81,11 @@ struct evsel {
bool bpf_counter;
bool use_config_name;
bool skippable;
+ bool retire_lat;
int bpf_fd;
struct bpf_object *bpf_obj;
struct list_head config_terms;
+ u64 alternate_hw_config;
};
/*
@@ -148,6 +133,20 @@ struct evsel {
__u64 synth_sample_type;
/*
+ * Store the branch counter related information.
+ * br_cntr_idx: The idx of the branch counter event in the evlist
+ * br_cntr_nr: The number of the branch counter event in the group
+ * (Only available for the leader event)
+ * abbr_name: The abbreviation name assigned to an event which is
+ * logged by the branch counter.
+ * The abbr name is from A to Z9. NA is applied if out
+ * of the range.
+ */
+ int br_cntr_idx;
+ int br_cntr_nr;
+ char abbr_name[3];
+
+ /*
* bpf_counter_ops serves two use cases:
* 1. perf-stat -b counting events used byBPF programs
* 2. perf-stat --use-bpf use BPF programs to aggregate counts
@@ -168,7 +167,7 @@ struct evsel {
unsigned long open_flags;
int precise_ip_original;
- /* for missing_features */
+ /* The PMU the event is from. Used for missing_features, PMU name, etc. */
struct perf_pmu *pmu;
/* For tool events */
@@ -206,6 +205,7 @@ struct perf_missing_features {
bool weight_struct;
bool read_lost;
bool branch_counters;
+ bool inherit_sample_read;
};
extern struct perf_missing_features perf_missing_features;
@@ -305,9 +305,9 @@ const char *evsel__name(struct evsel *evsel);
bool evsel__name_is(struct evsel *evsel, const char *name);
const char *evsel__metric_id(const struct evsel *evsel);
-static inline bool evsel__is_tool(const struct evsel *evsel)
+static inline bool evsel__is_retire_lat(const struct evsel *evsel)
{
- return evsel->tool_event != PERF_TOOL_NONE;
+ return evsel->retire_lat;
}
const char *evsel__group_name(struct evsel *evsel);
@@ -343,7 +343,6 @@ int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus,
void evsel__close(struct evsel *evsel);
int evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus,
struct perf_thread_map *threads);
-bool evsel__detect_missing_features(struct evsel *evsel);
bool evsel__precise_ip_fallback(struct evsel *evsel);
@@ -368,26 +367,10 @@ u64 format_field__intval(struct tep_format_field *field, struct perf_sample *sam
struct tep_format_field *evsel__field(struct evsel *evsel, const char *name);
struct tep_format_field *evsel__common_field(struct evsel *evsel, const char *name);
-static inline bool __evsel__match(const struct evsel *evsel, u32 type, u64 config)
-{
- if (evsel->core.attr.type != type)
- return false;
-
- if ((type == PERF_TYPE_HARDWARE || type == PERF_TYPE_HW_CACHE) &&
- perf_pmus__supports_extended_type())
- return (evsel->core.attr.config & PERF_HW_EVENT_MASK) == config;
-
- return evsel->core.attr.config == config;
-}
+bool __evsel__match(const struct evsel *evsel, u32 type, u64 config);
#define evsel__match(evsel, t, c) __evsel__match(evsel, PERF_TYPE_##t, PERF_COUNT_##c)
-static inline bool evsel__match2(struct evsel *e1, struct evsel *e2)
-{
- return (e1->core.attr.type == e2->core.attr.type) &&
- (e1->core.attr.config == e2->core.attr.config);
-}
-
int evsel__read_counter(struct evsel *evsel, int cpu_map_idx, int thread);
int __evsel__read_on_cpu(struct evsel *evsel, int cpu_map_idx, int thread, bool scale);
@@ -422,7 +405,7 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
int evsel__parse_sample_timestamp(struct evsel *evsel, union perf_event *event,
u64 *timestamp);
-u16 evsel__id_hdr_size(struct evsel *evsel);
+u16 evsel__id_hdr_size(const struct evsel *evsel);
static inline struct evsel *evsel__next(struct evsel *evsel)
{
diff --git a/tools/perf/util/evsel_fprintf.c b/tools/perf/util/evsel_fprintf.c
index 8719b3cb5646..86b7f46f9e2a 100644
--- a/tools/perf/util/evsel_fprintf.c
+++ b/tools/perf/util/evsel_fprintf.c
@@ -14,7 +14,7 @@
#include "dso.h"
#ifdef HAVE_LIBTRACEEVENT
-#include <traceevent/event-parse.h>
+#include <event-parse.h>
#endif
static int comma_fprintf(FILE *fp, bool *first, const char *fmt, ...)
@@ -107,7 +107,6 @@ out:
return ++printed;
}
-#ifndef PYTHON_PERF
int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment,
unsigned int print_opts, struct callchain_cursor *cursor,
struct strlist *bt_stop_list, FILE *fp)
@@ -248,4 +247,3 @@ int sample__fprintf_sym(struct perf_sample *sample, struct addr_location *al,
return printed;
}
-#endif /* PYTHON_PERF */
diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c
index b2536a59c44e..f289044a1f7c 100644
--- a/tools/perf/util/expr.c
+++ b/tools/perf/util/expr.c
@@ -5,25 +5,22 @@
#include <stdlib.h>
#include <string.h>
#include "metricgroup.h"
-#include "cpumap.h"
-#include "cputopo.h"
#include "debug.h"
#include "evlist.h"
#include "expr.h"
+#include "smt.h"
+#include "tool_pmu.h"
#include <util/expr-bison.h>
#include <util/expr-flex.h>
#include "util/hashmap.h"
#include "util/header.h"
#include "util/pmu.h"
-#include "smt.h"
-#include "tsc.h"
-#include <api/fs/fs.h>
+#include <perf/cpumap.h>
#include <linux/err.h>
#include <linux/kernel.h>
#include <linux/zalloc.h>
#include <ctype.h>
#include <math.h>
-#include "pmu.h"
struct expr_id_data {
union {
@@ -393,90 +390,26 @@ double expr_id_data__source_count(const struct expr_id_data *data)
return data->val.source_count;
}
-#if !defined(__i386__) && !defined(__x86_64__)
-double arch_get_tsc_freq(void)
-{
- return 0.0;
-}
-#endif
-
-static double has_pmem(void)
-{
- static bool has_pmem, cached;
- const char *sysfs = sysfs__mountpoint();
- char path[PATH_MAX];
-
- if (!cached) {
- snprintf(path, sizeof(path), "%s/firmware/acpi/tables/NFIT", sysfs);
- has_pmem = access(path, F_OK) == 0;
- cached = true;
- }
- return has_pmem ? 1.0 : 0.0;
-}
-
double expr__get_literal(const char *literal, const struct expr_scanner_ctx *ctx)
{
- const struct cpu_topology *topology;
double result = NAN;
+ enum tool_pmu_event ev = tool_pmu__str_to_event(literal + 1);
- if (!strcmp("#num_cpus", literal)) {
- result = cpu__max_present_cpu().cpu;
- goto out;
- }
- if (!strcmp("#num_cpus_online", literal)) {
- struct perf_cpu_map *online = cpu_map__online();
-
- if (online)
- result = perf_cpu_map__nr(online);
- goto out;
- }
+ if (ev != TOOL_PMU__EVENT_NONE) {
+ u64 count;
- if (!strcasecmp("#system_tsc_freq", literal)) {
- result = arch_get_tsc_freq();
- goto out;
- }
+ if (tool_pmu__read_event(ev, &count))
+ result = count;
+ else
+ pr_err("Failure to read '%s'", literal);
- /*
- * Assume that topology strings are consistent, such as CPUs "0-1"
- * wouldn't be listed as "0,1", and so after deduplication the number of
- * these strings gives an indication of the number of packages, dies,
- * etc.
- */
- if (!strcasecmp("#smt_on", literal)) {
- result = smt_on() ? 1.0 : 0.0;
- goto out;
- }
- if (!strcmp("#core_wide", literal)) {
+ } else if (!strcmp("#core_wide", literal)) {
result = core_wide(ctx->system_wide, ctx->user_requested_cpu_list)
? 1.0 : 0.0;
- goto out;
- }
- if (!strcmp("#num_packages", literal)) {
- topology = online_topology();
- result = topology->package_cpus_lists;
- goto out;
- }
- if (!strcmp("#num_dies", literal)) {
- topology = online_topology();
- result = topology->die_cpus_lists;
- goto out;
- }
- if (!strcmp("#num_cores", literal)) {
- topology = online_topology();
- result = topology->core_cpus_lists;
- goto out;
- }
- if (!strcmp("#slots", literal)) {
- result = perf_pmu__cpu_slots_per_cycle();
- goto out;
- }
- if (!strcmp("#has_pmem", literal)) {
- result = has_pmem();
- goto out;
+ } else {
+ pr_err("Unrecognized literal '%s'", literal);
}
- pr_err("Unrecognized literal '%s'", literal);
-out:
pr_debug2("literal: %s = %f\n", literal, result);
return result;
}
@@ -523,8 +456,8 @@ double expr__strcmp_cpuid_str(const struct expr_parse_ctx *ctx __maybe_unused,
bool compute_ids __maybe_unused, const char *test_id)
{
double ret;
- struct perf_pmu *pmu = perf_pmus__find_core_pmu();
- char *cpuid = perf_pmu__getcpuid(pmu);
+ struct perf_cpu cpu = {-1};
+ char *cpuid = get_cpuid_allow_env_override(cpu);
if (!cpuid)
return NAN;
diff --git a/tools/perf/util/ftrace.h b/tools/perf/util/ftrace.h
index 558efcb98d25..bae649ef50e8 100644
--- a/tools/perf/util/ftrace.h
+++ b/tools/perf/util/ftrace.h
@@ -6,6 +6,7 @@
#include "target.h"
struct evlist;
+struct hashamp;
struct perf_ftrace {
struct evlist *evlist;
@@ -15,6 +16,7 @@ struct perf_ftrace {
struct list_head notrace;
struct list_head graph_funcs;
struct list_head nograph_funcs;
+ struct hashmap *profile_hash;
unsigned long percpu_buffer_size;
bool inherit;
bool use_nsec;
@@ -25,6 +27,7 @@ struct perf_ftrace {
int graph_noirqs;
int graph_verbose;
int graph_thresh;
+ int graph_tail;
};
struct filter_entry {
diff --git a/tools/perf/util/genelf.c b/tools/perf/util/genelf.c
index c8f6bee1fa61..cdce7f173d00 100644
--- a/tools/perf/util/genelf.c
+++ b/tools/perf/util/genelf.c
@@ -16,7 +16,7 @@
#include <inttypes.h>
#include <fcntl.h>
#include <err.h>
-#ifdef HAVE_DWARF_SUPPORT
+#ifdef HAVE_LIBDW_SUPPORT
#include <dwarf.h>
#endif
@@ -499,7 +499,7 @@ jit_write_elf(int fd, uint64_t load_addr, const char *sym,
shdr->sh_size = sizeof(bnote);
shdr->sh_entsize = 0;
-#ifdef HAVE_DWARF_SUPPORT
+#ifdef HAVE_LIBDW_SUPPORT
if (debug && nr_debug_entries) {
retval = jit_add_debug_info(e, load_addr, debug, nr_debug_entries);
if (retval)
diff --git a/tools/perf/util/genelf.h b/tools/perf/util/genelf.h
index 4e2e4f40e134..9f0b875d6548 100644
--- a/tools/perf/util/genelf.h
+++ b/tools/perf/util/genelf.h
@@ -8,7 +8,7 @@
int jit_write_elf(int fd, uint64_t code_addr, const char *sym,
const void *code, int csize, void *debug, int nr_debug_entries,
void *unwinding, uint64_t unwinding_header_size, uint64_t unwinding_size);
-#ifdef HAVE_DWARF_SUPPORT
+#ifdef HAVE_LIBDW_SUPPORT
/* genelf_debug.c */
int jit_add_debug_info(Elf *e, uint64_t code_addr, void *debug, int nr_debug_entries);
#endif
diff --git a/tools/perf/util/hashmap.h b/tools/perf/util/hashmap.h
index c12f8320e668..0c4f155e8eb7 100644
--- a/tools/perf/util/hashmap.h
+++ b/tools/perf/util/hashmap.h
@@ -166,8 +166,8 @@ bool hashmap_find(const struct hashmap *map, long key, long *value);
* @bkt: integer used as a bucket loop cursor
*/
#define hashmap__for_each_entry(map, cur, bkt) \
- for (bkt = 0; bkt < map->cap; bkt++) \
- for (cur = map->buckets[bkt]; cur; cur = cur->next)
+ for (bkt = 0; bkt < (map)->cap; bkt++) \
+ for (cur = (map)->buckets[bkt]; cur; cur = cur->next)
/*
* hashmap__for_each_entry_safe - iterate over all entries in hashmap, safe
@@ -178,8 +178,8 @@ bool hashmap_find(const struct hashmap *map, long key, long *value);
* @bkt: integer used as a bucket loop cursor
*/
#define hashmap__for_each_entry_safe(map, cur, tmp, bkt) \
- for (bkt = 0; bkt < map->cap; bkt++) \
- for (cur = map->buckets[bkt]; \
+ for (bkt = 0; bkt < (map)->cap; bkt++) \
+ for (cur = (map)->buckets[bkt]; \
cur && ({tmp = cur->next; true; }); \
cur = tmp)
@@ -190,19 +190,19 @@ bool hashmap_find(const struct hashmap *map, long key, long *value);
* @key: key to iterate entries for
*/
#define hashmap__for_each_key_entry(map, cur, _key) \
- for (cur = map->buckets \
- ? map->buckets[hash_bits(map->hash_fn((_key), map->ctx), map->cap_bits)] \
+ for (cur = (map)->buckets \
+ ? (map)->buckets[hash_bits((map)->hash_fn((_key), (map)->ctx), (map)->cap_bits)] \
: NULL; \
cur; \
cur = cur->next) \
- if (map->equal_fn(cur->key, (_key), map->ctx))
+ if ((map)->equal_fn(cur->key, (_key), (map)->ctx))
#define hashmap__for_each_key_entry_safe(map, cur, tmp, _key) \
- for (cur = map->buckets \
- ? map->buckets[hash_bits(map->hash_fn((_key), map->ctx), map->cap_bits)] \
+ for (cur = (map)->buckets \
+ ? (map)->buckets[hash_bits((map)->hash_fn((_key), (map)->ctx), (map)->cap_bits)] \
: NULL; \
cur && ({ tmp = cur->next; true; }); \
cur = tmp) \
- if (map->equal_fn(cur->key, (_key), map->ctx))
+ if ((map)->equal_fn(cur->key, (_key), (map)->ctx))
#endif /* __LIBBPF_HASHMAP_H */
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 55e9553861d0..3451e542b69a 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -58,7 +58,7 @@
#include <internal/lib.h>
#ifdef HAVE_LIBTRACEEVENT
-#include <traceevent/event-parse.h>
+#include <event-parse.h>
#endif
/*
@@ -819,11 +819,31 @@ static int write_group_desc(struct feat_fd *ff,
* Each architecture should provide a more precise id string that
* can be use to match the architecture's "mapfile".
*/
-char * __weak get_cpuid_str(struct perf_pmu *pmu __maybe_unused)
+char * __weak get_cpuid_str(struct perf_cpu cpu __maybe_unused)
{
return NULL;
}
+char *get_cpuid_allow_env_override(struct perf_cpu cpu)
+{
+ char *cpuid;
+ static bool printed;
+
+ cpuid = getenv("PERF_CPUID");
+ if (cpuid)
+ cpuid = strdup(cpuid);
+ if (!cpuid)
+ cpuid = get_cpuid_str(cpu);
+ if (!cpuid)
+ return NULL;
+
+ if (!printed) {
+ pr_debug("Using CPUID %s\n", cpuid);
+ printed = true;
+ }
+ return cpuid;
+}
+
/* Return zero when the cpuid from the mapfile.csv matches the
* cpuid string generated on this platform.
* Otherwise return non-zero.
@@ -856,18 +876,19 @@ int __weak strcmp_cpuid_str(const char *mapcpuid, const char *cpuid)
* default get_cpuid(): nothing gets recorded
* actual implementation must be in arch/$(SRCARCH)/util/header.c
*/
-int __weak get_cpuid(char *buffer __maybe_unused, size_t sz __maybe_unused)
+int __weak get_cpuid(char *buffer __maybe_unused, size_t sz __maybe_unused,
+ struct perf_cpu cpu __maybe_unused)
{
return ENOSYS; /* Not implemented */
}
-static int write_cpuid(struct feat_fd *ff,
- struct evlist *evlist __maybe_unused)
+static int write_cpuid(struct feat_fd *ff, struct evlist *evlist)
{
+ struct perf_cpu cpu = perf_cpu_map__min(evlist->core.all_cpus);
char buffer[64];
int ret;
- ret = get_cpuid(buffer, sizeof(buffer));
+ ret = get_cpuid(buffer, sizeof(buffer), cpu);
if (ret)
return -1;
@@ -987,57 +1008,6 @@ static int write_dir_format(struct feat_fd *ff,
return do_write(ff, &data->dir.version, sizeof(data->dir.version));
}
-/*
- * Check whether a CPU is online
- *
- * Returns:
- * 1 -> if CPU is online
- * 0 -> if CPU is offline
- * -1 -> error case
- */
-int is_cpu_online(unsigned int cpu)
-{
- char *str;
- size_t strlen;
- char buf[256];
- int status = -1;
- struct stat statbuf;
-
- snprintf(buf, sizeof(buf),
- "/sys/devices/system/cpu/cpu%d", cpu);
- if (stat(buf, &statbuf) != 0)
- return 0;
-
- /*
- * Check if /sys/devices/system/cpu/cpux/online file
- * exists. Some cases cpu0 won't have online file since
- * it is not expected to be turned off generally.
- * In kernels without CONFIG_HOTPLUG_CPU, this
- * file won't exist
- */
- snprintf(buf, sizeof(buf),
- "/sys/devices/system/cpu/cpu%d/online", cpu);
- if (stat(buf, &statbuf) != 0)
- return 1;
-
- /*
- * Read online file using sysfs__read_str.
- * If read or open fails, return -1.
- * If read succeeds, return value from file
- * which gets stored in "str"
- */
- snprintf(buf, sizeof(buf),
- "devices/system/cpu/cpu%d/online", cpu);
-
- if (sysfs__read_str(buf, &str, &strlen) < 0)
- return status;
-
- status = atoi(str);
-
- free(str);
- return status;
-}
-
#ifdef HAVE_LIBBPF_SUPPORT
static int write_bpf_prog_info(struct feat_fd *ff,
struct evlist *evlist __maybe_unused)
@@ -3676,32 +3646,50 @@ int perf_header__write_pipe(int fd)
static int perf_session__do_write_header(struct perf_session *session,
struct evlist *evlist,
int fd, bool at_exit,
- struct feat_copier *fc)
+ struct feat_copier *fc,
+ bool write_attrs_after_data)
{
struct perf_file_header f_header;
- struct perf_file_attr f_attr;
struct perf_header *header = &session->header;
struct evsel *evsel;
struct feat_fd ff = {
.fd = fd,
};
- u64 attr_offset;
+ u64 attr_offset = sizeof(f_header), attr_size = 0;
int err;
- lseek(fd, sizeof(f_header), SEEK_SET);
+ if (write_attrs_after_data && at_exit) {
+ /*
+ * Write features at the end of the file first so that
+ * attributes may come after them.
+ */
+ if (!header->data_offset && header->data_size) {
+ pr_err("File contains data but offset unknown\n");
+ err = -1;
+ goto err_out;
+ }
+ header->feat_offset = header->data_offset + header->data_size;
+ err = perf_header__adds_write(header, evlist, fd, fc);
+ if (err < 0)
+ goto err_out;
+ attr_offset = lseek(fd, 0, SEEK_CUR);
+ } else {
+ lseek(fd, attr_offset, SEEK_SET);
+ }
evlist__for_each_entry(session->evlist, evsel) {
- evsel->id_offset = lseek(fd, 0, SEEK_CUR);
- err = do_write(&ff, evsel->core.id, evsel->core.ids * sizeof(u64));
- if (err < 0) {
- pr_debug("failed to write perf header\n");
- free(ff.buf);
- return err;
+ evsel->id_offset = attr_offset;
+ /* Avoid writing at the end of the file until the session is exiting. */
+ if (!write_attrs_after_data || at_exit) {
+ err = do_write(&ff, evsel->core.id, evsel->core.ids * sizeof(u64));
+ if (err < 0) {
+ pr_debug("failed to write perf header\n");
+ goto err_out;
+ }
}
+ attr_offset += evsel->core.ids * sizeof(u64);
}
- attr_offset = lseek(ff.fd, 0, SEEK_CUR);
-
evlist__for_each_entry(evlist, evsel) {
if (evsel->core.attr.size < sizeof(evsel->core.attr)) {
/*
@@ -3711,40 +3699,46 @@ static int perf_session__do_write_header(struct perf_session *session,
*/
evsel->core.attr.size = sizeof(evsel->core.attr);
}
- f_attr = (struct perf_file_attr){
- .attr = evsel->core.attr,
- .ids = {
- .offset = evsel->id_offset,
- .size = evsel->core.ids * sizeof(u64),
+ /* Avoid writing at the end of the file until the session is exiting. */
+ if (!write_attrs_after_data || at_exit) {
+ struct perf_file_attr f_attr = {
+ .attr = evsel->core.attr,
+ .ids = {
+ .offset = evsel->id_offset,
+ .size = evsel->core.ids * sizeof(u64),
+ }
+ };
+ err = do_write(&ff, &f_attr, sizeof(f_attr));
+ if (err < 0) {
+ pr_debug("failed to write perf header attribute\n");
+ goto err_out;
}
- };
- err = do_write(&ff, &f_attr, sizeof(f_attr));
- if (err < 0) {
- pr_debug("failed to write perf header attribute\n");
- free(ff.buf);
- return err;
}
+ attr_size += sizeof(struct perf_file_attr);
}
- if (!header->data_offset)
- header->data_offset = lseek(fd, 0, SEEK_CUR);
+ if (!header->data_offset) {
+ if (write_attrs_after_data)
+ header->data_offset = sizeof(f_header);
+ else
+ header->data_offset = attr_offset + attr_size;
+ }
header->feat_offset = header->data_offset + header->data_size;
- if (at_exit) {
+ if (!write_attrs_after_data && at_exit) {
+ /* Write features now feat_offset is known. */
err = perf_header__adds_write(header, evlist, fd, fc);
- if (err < 0) {
- free(ff.buf);
- return err;
- }
+ if (err < 0)
+ goto err_out;
}
f_header = (struct perf_file_header){
.magic = PERF_MAGIC,
.size = sizeof(f_header),
- .attr_size = sizeof(f_attr),
+ .attr_size = sizeof(struct perf_file_attr),
.attrs = {
.offset = attr_offset,
- .size = evlist->core.nr_entries * sizeof(f_attr),
+ .size = attr_size,
},
.data = {
.offset = header->data_offset,
@@ -3757,21 +3751,24 @@ static int perf_session__do_write_header(struct perf_session *session,
lseek(fd, 0, SEEK_SET);
err = do_write(&ff, &f_header, sizeof(f_header));
- free(ff.buf);
if (err < 0) {
pr_debug("failed to write perf header\n");
- return err;
+ goto err_out;
+ } else {
+ lseek(fd, 0, SEEK_END);
+ err = 0;
}
- lseek(fd, header->data_offset + header->data_size, SEEK_SET);
-
- return 0;
+err_out:
+ free(ff.buf);
+ return err;
}
int perf_session__write_header(struct perf_session *session,
struct evlist *evlist,
int fd, bool at_exit)
{
- return perf_session__do_write_header(session, evlist, fd, at_exit, NULL);
+ return perf_session__do_write_header(session, evlist, fd, at_exit, /*fc=*/NULL,
+ /*write_attrs_after_data=*/false);
}
size_t perf_session__data_offset(const struct evlist *evlist)
@@ -3791,9 +3788,11 @@ size_t perf_session__data_offset(const struct evlist *evlist)
int perf_session__inject_header(struct perf_session *session,
struct evlist *evlist,
int fd,
- struct feat_copier *fc)
+ struct feat_copier *fc,
+ bool write_attrs_after_data)
{
- return perf_session__do_write_header(session, evlist, fd, true, fc);
+ return perf_session__do_write_header(session, evlist, fd, true, fc,
+ write_attrs_after_data);
}
static int perf_header__getbuffer64(struct perf_header *header,
@@ -3986,6 +3985,24 @@ int perf_file_header__read(struct perf_file_header *header,
adds_features));
}
+ if (header->size > header->attrs.offset) {
+ pr_err("Perf file header corrupt: header overlaps attrs\n");
+ return -1;
+ }
+
+ if (header->size > header->data.offset) {
+ pr_err("Perf file header corrupt: header overlaps data\n");
+ return -1;
+ }
+
+ if ((header->attrs.offset <= header->data.offset &&
+ header->attrs.offset + header->attrs.size > header->data.offset) ||
+ (header->attrs.offset > header->data.offset &&
+ header->data.offset + header->data.size > header->attrs.offset)) {
+ pr_err("Perf file header corrupt: Attributes and data overlap\n");
+ return -1;
+ }
+
if (header->size != sizeof(*header)) {
/* Support the previous format */
if (header->size == offsetof(typeof(*header), adds_features))
@@ -4066,13 +4083,8 @@ static int perf_file_section__process(struct perf_file_section *section,
static int perf_file_header__read_pipe(struct perf_pipe_file_header *header,
struct perf_header *ph,
- struct perf_data* data,
- bool repipe, int repipe_fd)
+ struct perf_data *data)
{
- struct feat_fd ff = {
- .fd = repipe_fd,
- .ph = ph,
- };
ssize_t ret;
ret = perf_data__read(data, header, sizeof(*header));
@@ -4087,19 +4099,15 @@ static int perf_file_header__read_pipe(struct perf_pipe_file_header *header,
if (ph->needs_swap)
header->size = bswap_64(header->size);
- if (repipe && do_write(&ff, header, sizeof(*header)) < 0)
- return -1;
-
return 0;
}
-static int perf_header__read_pipe(struct perf_session *session, int repipe_fd)
+static int perf_header__read_pipe(struct perf_session *session)
{
struct perf_header *header = &session->header;
struct perf_pipe_file_header f_header;
- if (perf_file_header__read_pipe(&f_header, header, session->data,
- session->repipe, repipe_fd) < 0) {
+ if (perf_file_header__read_pipe(&f_header, header, session->data) < 0) {
pr_debug("incompatible file format\n");
return -EINVAL;
}
@@ -4199,7 +4207,7 @@ static int evlist__prepare_tracepoint_events(struct evlist *evlist, struct tep_h
}
#endif
-int perf_session__read_header(struct perf_session *session, int repipe_fd)
+int perf_session__read_header(struct perf_session *session)
{
struct perf_data *data = session->data;
struct perf_header *header = &session->header;
@@ -4220,7 +4228,7 @@ int perf_session__read_header(struct perf_session *session, int repipe_fd)
* We can read 'pipe' data event from regular file,
* check for the pipe header regardless of source.
*/
- err = perf_header__read_pipe(session, repipe_fd);
+ err = perf_header__read_pipe(session);
if (!err || perf_data__is_pipe(data)) {
data->is_pipe = true;
return err;
@@ -4326,7 +4334,7 @@ out_delete_evlist:
int perf_event__process_feature(struct perf_session *session,
union perf_event *event)
{
- struct perf_tool *tool = session->tool;
+ const struct perf_tool *tool = session->tool;
struct feat_fd ff = { .fd = 0 };
struct perf_record_header_feature *fe = (struct perf_record_header_feature *)event;
int type = fe->header.type;
@@ -4405,7 +4413,7 @@ size_t perf_event__fprintf_event_update(union perf_event *event, FILE *fp)
return ret;
}
-int perf_event__process_attr(struct perf_tool *tool __maybe_unused,
+int perf_event__process_attr(const struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct evlist **pevlist)
{
@@ -4444,7 +4452,7 @@ int perf_event__process_attr(struct perf_tool *tool __maybe_unused,
return 0;
}
-int perf_event__process_event_update(struct perf_tool *tool __maybe_unused,
+int perf_event__process_event_update(const struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct evlist **pevlist)
{
@@ -4514,15 +4522,14 @@ int perf_event__process_tracing_data(struct perf_session *session,
SEEK_SET);
}
- size_read = trace_report(fd, &session->tevent,
- session->repipe);
+ size_read = trace_report(fd, &session->tevent, session->trace_event_repipe);
padding = PERF_ALIGN(size_read, sizeof(u64)) - size_read;
if (readn(fd, buf, padding) < 0) {
pr_err("%s: reading input file", __func__);
return -1;
}
- if (session->repipe) {
+ if (session->trace_event_repipe) {
int retw = write(STDOUT_FILENO, buf, padding);
if (retw <= 0 || retw != padding) {
pr_err("%s: repiping tracing data padding", __func__);
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index 7c16a250e738..5201af6305f4 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -10,7 +10,13 @@
#include <linux/bitmap.h>
#include <linux/types.h>
#include "env.h"
-#include "pmu.h"
+#include <perf/cpumap.h>
+
+struct evlist;
+union perf_event;
+struct perf_header;
+struct perf_session;
+struct perf_tool;
enum {
HEADER_RESERVED = 0, /* always cleared */
@@ -61,14 +67,28 @@ struct perf_file_section {
u64 size;
};
+/**
+ * struct perf_file_header: Header representation on disk.
+ */
struct perf_file_header {
+ /** @magic: Holds "PERFILE2". */
u64 magic;
+ /** @size: Size of this header - sizeof(struct perf_file_header). */
u64 size;
+ /**
+ * @attr_size: Size of attrs entries - sizeof(struct perf_event_attr) +
+ * sizeof(struct perf_file_section).
+ */
u64 attr_size;
+ /** @attrs: Offset and size of file section holding attributes. */
struct perf_file_section attrs;
+ /** @data: Offset and size of file section holding regular event data. */
struct perf_file_section data;
- /* event_types is ignored */
+ /** @event_types: Ignored. */
struct perf_file_section event_types;
+ /**
+ * @adds_features: Bitmap of features. The features are immediately after the data section.
+ */
DECLARE_BITMAP(adds_features, HEADER_FEAT_BITS);
};
@@ -77,8 +97,6 @@ struct perf_pipe_file_header {
u64 size;
};
-struct perf_header;
-
int perf_file_header__read(struct perf_file_header *header,
struct perf_header *ph, int fd);
@@ -110,14 +128,9 @@ struct perf_header_feature_ops {
bool synthesize;
};
-struct evlist;
-struct perf_session;
-struct perf_tool;
-union perf_event;
-
extern const char perf_version_string[];
-int perf_session__read_header(struct perf_session *session, int repipe_fd);
+int perf_session__read_header(struct perf_session *session);
int perf_session__write_header(struct perf_session *session,
struct evlist *evlist,
int fd, bool at_exit);
@@ -136,7 +149,8 @@ struct feat_copier {
int perf_session__inject_header(struct perf_session *session,
struct evlist *evlist,
int fd,
- struct feat_copier *fc);
+ struct feat_copier *fc,
+ bool write_attrs_after_data);
size_t perf_session__data_offset(const struct evlist *evlist);
@@ -156,9 +170,9 @@ int perf_header__fprintf_info(struct perf_session *s, FILE *fp, bool full);
int perf_event__process_feature(struct perf_session *session,
union perf_event *event);
-int perf_event__process_attr(struct perf_tool *tool, union perf_event *event,
+int perf_event__process_attr(const struct perf_tool *tool, union perf_event *event,
struct evlist **pevlist);
-int perf_event__process_event_update(struct perf_tool *tool,
+int perf_event__process_event_update(const struct perf_tool *tool,
union perf_event *event,
struct evlist **pevlist);
size_t perf_event__fprintf_event_update(union perf_event *event, FILE *fp);
@@ -181,14 +195,16 @@ int write_padded(struct feat_fd *fd, const void *bf,
#define MAX_CACHE_LVL 4
-int is_cpu_online(unsigned int cpu);
int build_caches_for_cpu(u32 cpu, struct cpu_cache_level caches[], u32 *cntp);
/*
* arch specific callback
*/
-int get_cpuid(char *buffer, size_t sz);
+int get_cpuid(char *buffer, size_t sz, struct perf_cpu cpu);
+
+char *get_cpuid_str(struct perf_cpu cpu);
+
+char *get_cpuid_allow_env_override(struct perf_cpu cpu);
-char *get_cpuid_str(struct perf_pmu *pmu __maybe_unused);
int strcmp_cpuid_str(const char *s1, const char *s2);
#endif /* __PERF_HEADER_H */
diff --git a/tools/perf/util/hisi-ptt.c b/tools/perf/util/hisi-ptt.c
index 37ea987017f6..e4cc4785f744 100644
--- a/tools/perf/util/hisi-ptt.c
+++ b/tools/perf/util/hisi-ptt.c
@@ -79,14 +79,14 @@ static void hisi_ptt_dump_event(struct hisi_ptt *ptt, unsigned char *buf,
static int hisi_ptt_process_event(struct perf_session *session __maybe_unused,
union perf_event *event __maybe_unused,
struct perf_sample *sample __maybe_unused,
- struct perf_tool *tool __maybe_unused)
+ const struct perf_tool *tool __maybe_unused)
{
return 0;
}
static int hisi_ptt_process_auxtrace_event(struct perf_session *session,
union perf_event *event,
- struct perf_tool *tool __maybe_unused)
+ const struct perf_tool *tool __maybe_unused)
{
struct hisi_ptt *ptt = container_of(session->auxtrace, struct hisi_ptt,
auxtrace);
@@ -123,7 +123,7 @@ static int hisi_ptt_process_auxtrace_event(struct perf_session *session,
}
static int hisi_ptt_flush(struct perf_session *session __maybe_unused,
- struct perf_tool *tool __maybe_unused)
+ const struct perf_tool *tool __maybe_unused)
{
return 0;
}
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index f028f113c4fd..fff134565801 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -218,6 +218,9 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
hists__new_col_len(hists, HISTC_LOCAL_P_STAGE_CYC, 13);
hists__new_col_len(hists, HISTC_GLOBAL_P_STAGE_CYC, 13);
hists__new_col_len(hists, HISTC_ADDR, BITS_PER_LONG / 4 + 2);
+ hists__new_col_len(hists, HISTC_CALLCHAIN_BRANCH_PREDICTED, 9);
+ hists__new_col_len(hists, HISTC_CALLCHAIN_BRANCH_ABORT, 5);
+ hists__new_col_len(hists, HISTC_CALLCHAIN_BRANCH_CYCLES, 6);
if (symbol_conf.nanosecs)
hists__new_col_len(hists, HISTC_TIME, 16);
@@ -472,10 +475,18 @@ static int hist_entry__init(struct hist_entry *he,
memcpy(he->branch_info, template->branch_info,
sizeof(*he->branch_info));
+ he->branch_info->from.ms.maps = maps__get(he->branch_info->from.ms.maps);
he->branch_info->from.ms.map = map__get(he->branch_info->from.ms.map);
+ he->branch_info->to.ms.maps = maps__get(he->branch_info->to.ms.maps);
he->branch_info->to.ms.map = map__get(he->branch_info->to.ms.map);
}
+ if (he->mem_info) {
+ he->mem_info = mem_info__clone(template->mem_info);
+ if (he->mem_info == NULL)
+ goto err_infos;
+ }
+
if (hist_entry__has_callchains(he) && symbol_conf.use_callchain)
callchain_init(he->callchain);
@@ -620,12 +631,6 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists,
if (symbol_conf.cumulate_callchain)
he_stat__add_period(he->stat_acc, period);
- /*
- * This mem info was allocated from sample__resolve_mem
- * and will not be used anymore.
- */
- mem_info__zput(entry->mem_info);
-
block_info__delete(entry->block_info);
kvm_info__zput(entry->kvm_info);
@@ -636,7 +641,12 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists,
* mis-adjust symbol addresses when computing
* the history counter to increment.
*/
- if (he->ms.map != entry->ms.map) {
+ if (hists__has(hists, sym) && he->ms.map != entry->ms.map) {
+ if (he->ms.sym) {
+ u64 addr = he->ms.sym->start;
+ he->ms.sym = map__find_symbol(entry->ms.map, addr);
+ }
+
map__put(he->ms.map);
he->ms.map = map__get(entry->ms.map);
}
@@ -739,7 +749,7 @@ __hists__add_entry(struct hists *hists,
.filtered = symbol__parent_filter(sym_parent) | al->filtered,
.hists = hists,
.branch_info = bi,
- .mem_info = mem_info__get(mi),
+ .mem_info = mi,
.kvm_info = ki,
.block_info = block_info,
.transaction = sample->transaction,
@@ -970,10 +980,21 @@ out:
return err;
}
+static void branch_info__exit(struct branch_info *bi)
+{
+ map_symbol__exit(&bi->from.ms);
+ map_symbol__exit(&bi->to.ms);
+ zfree_srcline(&bi->srcline_from);
+ zfree_srcline(&bi->srcline_to);
+}
+
static int
iter_finish_branch_entry(struct hist_entry_iter *iter,
struct addr_location *al __maybe_unused)
{
+ for (int i = 0; i < iter->total; i++)
+ branch_info__exit(&iter->bi[i]);
+
zfree(&iter->bi);
iter->he = NULL;
@@ -1319,10 +1340,7 @@ void hist_entry__delete(struct hist_entry *he)
map_symbol__exit(&he->ms);
if (he->branch_info) {
- map_symbol__exit(&he->branch_info->from.ms);
- map_symbol__exit(&he->branch_info->to.ms);
- zfree_srcline(&he->branch_info->srcline_from);
- zfree_srcline(&he->branch_info->srcline_to);
+ branch_info__exit(he->branch_info);
zfree(&he->branch_info);
}
@@ -2370,6 +2388,11 @@ void hists__inc_nr_lost_samples(struct hists *hists, u32 lost)
hists->stats.nr_lost_samples += lost;
}
+void hists__inc_nr_dropped_samples(struct hists *hists, u32 lost)
+{
+ hists->stats.nr_dropped_samples += lost;
+}
+
static struct hist_entry *hists__add_dummy_entry(struct hists *hists,
struct hist_entry *pair)
{
@@ -2667,7 +2690,7 @@ int hists__unlink(struct hists *hists)
void hist__account_cycles(struct branch_stack *bs, struct addr_location *al,
struct perf_sample *sample, bool nonany_branch_mode,
- u64 *total_cycles)
+ u64 *total_cycles, struct evsel *evsel)
{
struct branch_info *bi;
struct branch_entry *entries = perf_sample__branch_entries(sample);
@@ -2691,7 +2714,8 @@ void hist__account_cycles(struct branch_stack *bs, struct addr_location *al,
for (int i = bs->nr - 1; i >= 0; i--) {
addr_map_symbol__account_cycles(&bi[i].from,
nonany_branch_mode ? NULL : prev,
- bi[i].flags.cycles);
+ bi[i].flags.cycles, evsel,
+ bi[i].branch_stack_cntr);
prev = &bi[i].to;
if (total_cycles)
@@ -2713,18 +2737,24 @@ size_t evlist__fprintf_nr_events(struct evlist *evlist, FILE *fp)
evlist__for_each_entry(evlist, pos) {
struct hists *hists = evsel__hists(pos);
+ u64 total_samples = hists->stats.nr_samples;
+
+ total_samples += hists->stats.nr_lost_samples;
+ total_samples += hists->stats.nr_dropped_samples;
- if (symbol_conf.skip_empty && !hists->stats.nr_samples &&
- !hists->stats.nr_lost_samples)
+ if (symbol_conf.skip_empty && total_samples == 0)
continue;
ret += fprintf(fp, "%s stats:\n", evsel__name(pos));
if (hists->stats.nr_samples)
- ret += fprintf(fp, "%16s events: %10d\n",
+ ret += fprintf(fp, "%20s events: %10d\n",
"SAMPLE", hists->stats.nr_samples);
if (hists->stats.nr_lost_samples)
- ret += fprintf(fp, "%16s events: %10d\n",
+ ret += fprintf(fp, "%20s events: %10d\n",
"LOST_SAMPLES", hists->stats.nr_lost_samples);
+ if (hists->stats.nr_dropped_samples)
+ ret += fprintf(fp, "%20s events: %10d\n",
+ "LOST_SAMPLES (BPF)", hists->stats.nr_dropped_samples);
}
return ret;
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 5273f5c37050..1131056924d9 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -86,6 +86,10 @@ enum hist_column {
HISTC_TYPE,
HISTC_TYPE_OFFSET,
HISTC_SYMBOL_OFFSET,
+ HISTC_TYPE_CACHELINE,
+ HISTC_CALLCHAIN_BRANCH_PREDICTED,
+ HISTC_CALLCHAIN_BRANCH_ABORT,
+ HISTC_CALLCHAIN_BRANCH_CYCLES,
HISTC_NR_COLS, /* Last entry */
};
@@ -371,6 +375,7 @@ void hists__inc_stats(struct hists *hists, struct hist_entry *h);
void hists__inc_nr_events(struct hists *hists);
void hists__inc_nr_samples(struct hists *hists, bool filtered);
void hists__inc_nr_lost_samples(struct hists *hists, u32 lost);
+void hists__inc_nr_dropped_samples(struct hists *hists, u32 lost);
size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows,
int max_cols, float min_pcnt, FILE *fp,
@@ -742,7 +747,7 @@ unsigned int hists__overhead_width(struct hists *hists);
void hist__account_cycles(struct branch_stack *bs, struct addr_location *al,
struct perf_sample *sample, bool nonany_branch_mode,
- u64 *total_cycles);
+ u64 *total_cycles, struct evsel *evsel);
struct option;
int parse_filter_percentage(const struct option *opt, const char *arg, int unset);
diff --git a/tools/perf/util/hwmon_pmu.c b/tools/perf/util/hwmon_pmu.c
new file mode 100644
index 000000000000..4acb9bb19b84
--- /dev/null
+++ b/tools/perf/util/hwmon_pmu.c
@@ -0,0 +1,839 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+#include "counts.h"
+#include "debug.h"
+#include "evsel.h"
+#include "hashmap.h"
+#include "hwmon_pmu.h"
+#include "pmu.h"
+#include <internal/xyarray.h>
+#include <internal/threadmap.h>
+#include <perf/threadmap.h>
+#include <sys/types.h>
+#include <assert.h>
+#include <ctype.h>
+#include <dirent.h>
+#include <fcntl.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <api/fs/fs.h>
+#include <api/io.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/zalloc.h>
+
+/** Strings that correspond to enum hwmon_type. */
+static const char * const hwmon_type_strs[HWMON_TYPE_MAX] = {
+ NULL,
+ "cpu",
+ "curr",
+ "energy",
+ "fan",
+ "humidity",
+ "in",
+ "intrusion",
+ "power",
+ "pwm",
+ "temp",
+};
+#define LONGEST_HWMON_TYPE_STR "intrusion"
+
+/** Strings that correspond to enum hwmon_item. */
+static const char * const hwmon_item_strs[HWMON_ITEM__MAX] = {
+ NULL,
+ "accuracy",
+ "alarm",
+ "auto_channels_temp",
+ "average",
+ "average_highest",
+ "average_interval",
+ "average_interval_max",
+ "average_interval_min",
+ "average_lowest",
+ "average_max",
+ "average_min",
+ "beep",
+ "cap",
+ "cap_hyst",
+ "cap_max",
+ "cap_min",
+ "crit",
+ "crit_hyst",
+ "div",
+ "emergency",
+ "emergency_hist",
+ "enable",
+ "fault",
+ "freq",
+ "highest",
+ "input",
+ "label",
+ "lcrit",
+ "lcrit_hyst",
+ "lowest",
+ "max",
+ "max_hyst",
+ "min",
+ "min_hyst",
+ "mod",
+ "offset",
+ "pulses",
+ "rated_max",
+ "rated_min",
+ "reset_history",
+ "target",
+ "type",
+ "vid",
+};
+#define LONGEST_HWMON_ITEM_STR "average_interval_max"
+
+static const char *const hwmon_units[HWMON_TYPE_MAX] = {
+ NULL,
+ "V", /* cpu */
+ "A", /* curr */
+ "J", /* energy */
+ "rpm", /* fan */
+ "%", /* humidity */
+ "V", /* in */
+ "", /* intrusion */
+ "W", /* power */
+ "Hz", /* pwm */
+ "'C", /* temp */
+};
+
+struct hwmon_pmu {
+ struct perf_pmu pmu;
+ struct hashmap events;
+ int hwmon_dir_fd;
+};
+
+/**
+ * union hwmon_pmu_event_key: Key for hwmon_pmu->events as such each key
+ * represents an event.
+ *
+ * Related hwmon files start <type><number> that this key represents.
+ */
+union hwmon_pmu_event_key {
+ long type_and_num;
+ struct {
+ int num :16;
+ enum hwmon_type type :8;
+ };
+};
+
+/**
+ * struct hwmon_pmu_event_value: Value in hwmon_pmu->events.
+ *
+ * Hwmon files are of the form <type><number>_<item> and may have a suffix
+ * _alarm.
+ */
+struct hwmon_pmu_event_value {
+ /** @items: which item files are present. */
+ DECLARE_BITMAP(items, HWMON_ITEM__MAX);
+ /** @alarm_items: which item files are present. */
+ DECLARE_BITMAP(alarm_items, HWMON_ITEM__MAX);
+ /** @label: contents of <type><number>_label if present. */
+ char *label;
+ /** @name: name computed from label of the form <type>_<label>. */
+ char *name;
+};
+
+bool perf_pmu__is_hwmon(const struct perf_pmu *pmu)
+{
+ return pmu && pmu->type >= PERF_PMU_TYPE_HWMON_START &&
+ pmu->type <= PERF_PMU_TYPE_HWMON_END;
+}
+
+bool evsel__is_hwmon(const struct evsel *evsel)
+{
+ return perf_pmu__is_hwmon(evsel->pmu);
+}
+
+static size_t hwmon_pmu__event_hashmap_hash(long key, void *ctx __maybe_unused)
+{
+ return ((union hwmon_pmu_event_key)key).type_and_num;
+}
+
+static bool hwmon_pmu__event_hashmap_equal(long key1, long key2, void *ctx __maybe_unused)
+{
+ return ((union hwmon_pmu_event_key)key1).type_and_num ==
+ ((union hwmon_pmu_event_key)key2).type_and_num;
+}
+
+static int hwmon_strcmp(const void *a, const void *b)
+{
+ const char *sa = a;
+ const char * const *sb = b;
+
+ return strcmp(sa, *sb);
+}
+
+bool parse_hwmon_filename(const char *filename,
+ enum hwmon_type *type,
+ int *number,
+ enum hwmon_item *item,
+ bool *alarm)
+{
+ char fn_type[24];
+ const char **elem;
+ const char *fn_item = NULL;
+ size_t fn_item_len;
+
+ assert(strlen(LONGEST_HWMON_TYPE_STR) < sizeof(fn_type));
+ strlcpy(fn_type, filename, sizeof(fn_type));
+ for (size_t i = 0; fn_type[i] != '\0'; i++) {
+ if (fn_type[i] >= '0' && fn_type[i] <= '9') {
+ fn_type[i] = '\0';
+ *number = strtoul(&filename[i], (char **)&fn_item, 10);
+ if (*fn_item == '_')
+ fn_item++;
+ break;
+ }
+ if (fn_type[i] == '_') {
+ fn_type[i] = '\0';
+ *number = -1;
+ fn_item = &filename[i + 1];
+ break;
+ }
+ }
+ if (fn_item == NULL || fn_type[0] == '\0' || (item != NULL && fn_item[0] == '\0')) {
+ pr_debug3("hwmon_pmu: not a hwmon file '%s'\n", filename);
+ return false;
+ }
+ elem = bsearch(&fn_type, hwmon_type_strs + 1, ARRAY_SIZE(hwmon_type_strs) - 1,
+ sizeof(hwmon_type_strs[0]), hwmon_strcmp);
+ if (!elem) {
+ pr_debug3("hwmon_pmu: not a hwmon type '%s' in file name '%s'\n",
+ fn_type, filename);
+ return false;
+ }
+
+ *type = elem - &hwmon_type_strs[0];
+ if (!item)
+ return true;
+
+ *alarm = false;
+ fn_item_len = strlen(fn_item);
+ if (fn_item_len > 6 && !strcmp(&fn_item[fn_item_len - 6], "_alarm")) {
+ assert(strlen(LONGEST_HWMON_ITEM_STR) < sizeof(fn_type));
+ strlcpy(fn_type, fn_item, fn_item_len - 5);
+ fn_item = fn_type;
+ *alarm = true;
+ }
+ elem = bsearch(fn_item, hwmon_item_strs + 1, ARRAY_SIZE(hwmon_item_strs) - 1,
+ sizeof(hwmon_item_strs[0]), hwmon_strcmp);
+ if (!elem) {
+ pr_debug3("hwmon_pmu: not a hwmon item '%s' in file name '%s'\n",
+ fn_item, filename);
+ return false;
+ }
+ *item = elem - &hwmon_item_strs[0];
+ return true;
+}
+
+static void fix_name(char *p)
+{
+ char *s = strchr(p, '\n');
+
+ if (s)
+ *s = '\0';
+
+ while (*p != '\0') {
+ if (strchr(" :,/\n\t", *p))
+ *p = '_';
+ else
+ *p = tolower(*p);
+ p++;
+ }
+}
+
+static int hwmon_pmu__read_events(struct hwmon_pmu *pmu)
+{
+ DIR *dir;
+ struct dirent *ent;
+ int dup_fd, err = 0;
+ struct hashmap_entry *cur, *tmp;
+ size_t bkt;
+
+ if (pmu->pmu.sysfs_aliases_loaded)
+ return 0;
+
+ /*
+ * Use a dup-ed fd as closedir will close it. Use openat so that the
+ * directory contents are refreshed.
+ */
+ dup_fd = openat(pmu->hwmon_dir_fd, ".", O_DIRECTORY);
+
+ if (dup_fd == -1)
+ return -ENOMEM;
+
+ dir = fdopendir(dup_fd);
+ if (!dir) {
+ close(dup_fd);
+ return -ENOMEM;
+ }
+
+ while ((ent = readdir(dir)) != NULL) {
+ enum hwmon_type type;
+ int number;
+ enum hwmon_item item;
+ bool alarm;
+ union hwmon_pmu_event_key key = { .type_and_num = 0 };
+ struct hwmon_pmu_event_value *value;
+
+ if (ent->d_type != DT_REG)
+ continue;
+
+ if (!parse_hwmon_filename(ent->d_name, &type, &number, &item, &alarm)) {
+ pr_debug3("Not a hwmon file '%s'\n", ent->d_name);
+ continue;
+ }
+ key.num = number;
+ key.type = type;
+ if (!hashmap__find(&pmu->events, key.type_and_num, &value)) {
+ value = zalloc(sizeof(*value));
+ if (!value) {
+ err = -ENOMEM;
+ goto err_out;
+ }
+ err = hashmap__add(&pmu->events, key.type_and_num, value);
+ if (err) {
+ free(value);
+ err = -ENOMEM;
+ goto err_out;
+ }
+ }
+ __set_bit(item, alarm ? value->alarm_items : value->items);
+ if (item == HWMON_ITEM_LABEL) {
+ char buf[128];
+ int fd = openat(pmu->hwmon_dir_fd, ent->d_name, O_RDONLY);
+ ssize_t read_len;
+
+ if (fd < 0)
+ continue;
+
+ read_len = read(fd, buf, sizeof(buf));
+
+ while (read_len > 0 && buf[read_len - 1] == '\n')
+ read_len--;
+
+ if (read_len > 0)
+ buf[read_len] = '\0';
+
+ if (buf[0] == '\0') {
+ pr_debug("hwmon_pmu: empty label file %s %s\n",
+ pmu->pmu.name, ent->d_name);
+ close(fd);
+ continue;
+ }
+ value->label = strdup(buf);
+ if (!value->label) {
+ pr_debug("hwmon_pmu: memory allocation failure\n");
+ close(fd);
+ continue;
+ }
+ snprintf(buf, sizeof(buf), "%s_%s", hwmon_type_strs[type], value->label);
+ fix_name(buf);
+ value->name = strdup(buf);
+ if (!value->name)
+ pr_debug("hwmon_pmu: memory allocation failure\n");
+ close(fd);
+ }
+ }
+ if (hashmap__size(&pmu->events) == 0)
+ pr_debug2("hwmon_pmu: %s has no events\n", pmu->pmu.name);
+
+ hashmap__for_each_entry_safe((&pmu->events), cur, tmp, bkt) {
+ union hwmon_pmu_event_key key = {
+ .type_and_num = cur->key,
+ };
+ struct hwmon_pmu_event_value *value = cur->pvalue;
+
+ if (!test_bit(HWMON_ITEM_INPUT, value->items)) {
+ pr_debug("hwmon_pmu: %s removing event '%s%d' that has no input file\n",
+ pmu->pmu.name, hwmon_type_strs[key.type], key.num);
+ hashmap__delete(&pmu->events, key.type_and_num, &key, &value);
+ zfree(&value->label);
+ zfree(&value->name);
+ free(value);
+ }
+ }
+ pmu->pmu.sysfs_aliases_loaded = true;
+
+err_out:
+ closedir(dir);
+ return err;
+}
+
+struct perf_pmu *hwmon_pmu__new(struct list_head *pmus, int hwmon_dir, const char *sysfs_name, const char *name)
+{
+ char buf[32];
+ struct hwmon_pmu *hwm;
+
+ hwm = zalloc(sizeof(*hwm));
+ if (!hwm)
+ return NULL;
+
+ hwm->hwmon_dir_fd = hwmon_dir;
+ hwm->pmu.type = PERF_PMU_TYPE_HWMON_START + strtoul(sysfs_name + 5, NULL, 10);
+ if (hwm->pmu.type > PERF_PMU_TYPE_HWMON_END) {
+ pr_err("Unable to encode hwmon type from %s in valid PMU type\n", sysfs_name);
+ goto err_out;
+ }
+ snprintf(buf, sizeof(buf), "hwmon_%s", name);
+ fix_name(buf + 6);
+ hwm->pmu.name = strdup(buf);
+ if (!hwm->pmu.name)
+ goto err_out;
+ hwm->pmu.alias_name = strdup(sysfs_name);
+ if (!hwm->pmu.alias_name)
+ goto err_out;
+ hwm->pmu.cpus = perf_cpu_map__new("0");
+ if (!hwm->pmu.cpus)
+ goto err_out;
+ INIT_LIST_HEAD(&hwm->pmu.format);
+ INIT_LIST_HEAD(&hwm->pmu.aliases);
+ INIT_LIST_HEAD(&hwm->pmu.caps);
+ hashmap__init(&hwm->events, hwmon_pmu__event_hashmap_hash,
+ hwmon_pmu__event_hashmap_equal, /*ctx=*/NULL);
+
+ list_add_tail(&hwm->pmu.list, pmus);
+ return &hwm->pmu;
+err_out:
+ free((char *)hwm->pmu.name);
+ free(hwm->pmu.alias_name);
+ free(hwm);
+ close(hwmon_dir);
+ return NULL;
+}
+
+void hwmon_pmu__exit(struct perf_pmu *pmu)
+{
+ struct hwmon_pmu *hwm = container_of(pmu, struct hwmon_pmu, pmu);
+ struct hashmap_entry *cur, *tmp;
+ size_t bkt;
+
+ hashmap__for_each_entry_safe((&hwm->events), cur, tmp, bkt) {
+ struct hwmon_pmu_event_value *value = cur->pvalue;
+
+ zfree(&value->label);
+ zfree(&value->name);
+ free(value);
+ }
+ hashmap__clear(&hwm->events);
+ close(hwm->hwmon_dir_fd);
+}
+
+static size_t hwmon_pmu__describe_items(struct hwmon_pmu *hwm, char *out_buf, size_t out_buf_len,
+ union hwmon_pmu_event_key key,
+ const unsigned long *items, bool is_alarm)
+{
+ size_t bit;
+ char buf[64];
+ size_t len = 0;
+
+ for_each_set_bit(bit, items, HWMON_ITEM__MAX) {
+ int fd;
+
+ if (bit == HWMON_ITEM_LABEL || bit == HWMON_ITEM_INPUT)
+ continue;
+
+ snprintf(buf, sizeof(buf), "%s%d_%s%s",
+ hwmon_type_strs[key.type],
+ key.num,
+ hwmon_item_strs[bit],
+ is_alarm ? "_alarm" : "");
+ fd = openat(hwm->hwmon_dir_fd, buf, O_RDONLY);
+ if (fd > 0) {
+ ssize_t read_len = read(fd, buf, sizeof(buf));
+
+ while (read_len > 0 && buf[read_len - 1] == '\n')
+ read_len--;
+
+ if (read_len > 0) {
+ long long val;
+
+ buf[read_len] = '\0';
+ val = strtoll(buf, /*endptr=*/NULL, 10);
+ len += snprintf(out_buf + len, out_buf_len - len, "%s%s%s=%g%s",
+ len == 0 ? " " : ", ",
+ hwmon_item_strs[bit],
+ is_alarm ? "_alarm" : "",
+ (double)val / 1000.0,
+ hwmon_units[key.type]);
+ }
+ close(fd);
+ }
+ }
+ return len;
+}
+
+int hwmon_pmu__for_each_event(struct perf_pmu *pmu, void *state, pmu_event_callback cb)
+{
+ struct hwmon_pmu *hwm = container_of(pmu, struct hwmon_pmu, pmu);
+ struct hashmap_entry *cur;
+ size_t bkt;
+
+ if (hwmon_pmu__read_events(hwm))
+ return false;
+
+ hashmap__for_each_entry((&hwm->events), cur, bkt) {
+ static const char *const hwmon_scale_units[HWMON_TYPE_MAX] = {
+ NULL,
+ "0.001V", /* cpu */
+ "0.001A", /* curr */
+ "0.001J", /* energy */
+ "1rpm", /* fan */
+ "0.001%", /* humidity */
+ "0.001V", /* in */
+ NULL, /* intrusion */
+ "0.001W", /* power */
+ "1Hz", /* pwm */
+ "0.001'C", /* temp */
+ };
+ static const char *const hwmon_desc[HWMON_TYPE_MAX] = {
+ NULL,
+ "CPU core reference voltage", /* cpu */
+ "Current", /* curr */
+ "Cumulative energy use", /* energy */
+ "Fan", /* fan */
+ "Humidity", /* humidity */
+ "Voltage", /* in */
+ "Chassis intrusion detection", /* intrusion */
+ "Power use", /* power */
+ "Pulse width modulation fan control", /* pwm */
+ "Temperature", /* temp */
+ };
+ char alias_buf[64];
+ char desc_buf[256];
+ char encoding_buf[128];
+ union hwmon_pmu_event_key key = {
+ .type_and_num = cur->key,
+ };
+ struct hwmon_pmu_event_value *value = cur->pvalue;
+ struct pmu_event_info info = {
+ .pmu = pmu,
+ .name = value->name,
+ .alias = alias_buf,
+ .scale_unit = hwmon_scale_units[key.type],
+ .desc = desc_buf,
+ .long_desc = NULL,
+ .encoding_desc = encoding_buf,
+ .topic = "hwmon",
+ .pmu_name = pmu->name,
+ .event_type_desc = "Hwmon event",
+ };
+ int ret;
+ size_t len;
+
+ len = snprintf(alias_buf, sizeof(alias_buf), "%s%d",
+ hwmon_type_strs[key.type], key.num);
+ if (!info.name) {
+ info.name = info.alias;
+ info.alias = NULL;
+ }
+
+ len = snprintf(desc_buf, sizeof(desc_buf), "%s in unit %s named %s.",
+ hwmon_desc[key.type],
+ pmu->name + 6,
+ value->label ?: info.name);
+
+ len += hwmon_pmu__describe_items(hwm, desc_buf + len, sizeof(desc_buf) - len,
+ key, value->items, /*is_alarm=*/false);
+
+ len += hwmon_pmu__describe_items(hwm, desc_buf + len, sizeof(desc_buf) - len,
+ key, value->alarm_items, /*is_alarm=*/true);
+
+ snprintf(encoding_buf, sizeof(encoding_buf), "%s/config=0x%lx/",
+ pmu->name, cur->key);
+
+ ret = cb(state, &info);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
+size_t hwmon_pmu__num_events(struct perf_pmu *pmu)
+{
+ struct hwmon_pmu *hwm = container_of(pmu, struct hwmon_pmu, pmu);
+
+ hwmon_pmu__read_events(hwm);
+ return hashmap__size(&hwm->events);
+}
+
+bool hwmon_pmu__have_event(struct perf_pmu *pmu, const char *name)
+{
+ struct hwmon_pmu *hwm = container_of(pmu, struct hwmon_pmu, pmu);
+ enum hwmon_type type;
+ int number;
+ union hwmon_pmu_event_key key = { .type_and_num = 0 };
+ struct hashmap_entry *cur;
+ size_t bkt;
+
+ if (!parse_hwmon_filename(name, &type, &number, /*item=*/NULL, /*is_alarm=*/NULL))
+ return false;
+
+ if (hwmon_pmu__read_events(hwm))
+ return false;
+
+ key.type = type;
+ key.num = number;
+ if (hashmap_find(&hwm->events, key.type_and_num, /*value=*/NULL))
+ return true;
+ if (key.num != -1)
+ return false;
+ /* Item is of form <type>_ which means we should match <type>_<label>. */
+ hashmap__for_each_entry((&hwm->events), cur, bkt) {
+ struct hwmon_pmu_event_value *value = cur->pvalue;
+
+ key.type_and_num = cur->key;
+ if (key.type == type && value->name && !strcasecmp(name, value->name))
+ return true;
+ }
+ return false;
+}
+
+static int hwmon_pmu__config_term(const struct hwmon_pmu *hwm,
+ struct perf_event_attr *attr,
+ struct parse_events_term *term,
+ struct parse_events_error *err)
+{
+ if (term->type_term == PARSE_EVENTS__TERM_TYPE_USER) {
+ enum hwmon_type type;
+ int number;
+
+ if (parse_hwmon_filename(term->config, &type, &number,
+ /*item=*/NULL, /*is_alarm=*/NULL)) {
+ if (number == -1) {
+ /*
+ * Item is of form <type>_ which means we should
+ * match <type>_<label>.
+ */
+ struct hashmap_entry *cur;
+ size_t bkt;
+
+ attr->config = 0;
+ hashmap__for_each_entry((&hwm->events), cur, bkt) {
+ union hwmon_pmu_event_key key = {
+ .type_and_num = cur->key,
+ };
+ struct hwmon_pmu_event_value *value = cur->pvalue;
+
+ if (key.type == type && value->name &&
+ !strcasecmp(term->config, value->name)) {
+ attr->config = key.type_and_num;
+ break;
+ }
+ }
+ if (attr->config == 0)
+ return -EINVAL;
+ } else {
+ union hwmon_pmu_event_key key = {
+ .type_and_num = 0,
+ };
+
+ key.type = type;
+ key.num = number;
+ attr->config = key.type_and_num;
+ }
+ return 0;
+ }
+ }
+ if (err) {
+ char *err_str;
+
+ parse_events_error__handle(err, term->err_val,
+ asprintf(&err_str,
+ "unexpected hwmon event term (%s) %s",
+ parse_events__term_type_str(term->type_term),
+ term->config) < 0
+ ? strdup("unexpected hwmon event term")
+ : err_str,
+ NULL);
+ }
+ return -EINVAL;
+}
+
+int hwmon_pmu__config_terms(const struct perf_pmu *pmu,
+ struct perf_event_attr *attr,
+ struct parse_events_terms *terms,
+ struct parse_events_error *err)
+{
+ struct hwmon_pmu *hwm = container_of(pmu, struct hwmon_pmu, pmu);
+ struct parse_events_term *term;
+ int ret;
+
+ ret = hwmon_pmu__read_events(hwm);
+ if (ret)
+ return ret;
+
+ list_for_each_entry(term, &terms->terms, list) {
+ if (hwmon_pmu__config_term(hwm, attr, term, err))
+ return -EINVAL;
+ }
+
+ return 0;
+
+}
+
+int hwmon_pmu__check_alias(struct parse_events_terms *terms, struct perf_pmu_info *info,
+ struct parse_events_error *err)
+{
+ struct parse_events_term *term =
+ list_first_entry(&terms->terms, struct parse_events_term, list);
+
+ if (term->type_term == PARSE_EVENTS__TERM_TYPE_USER) {
+ enum hwmon_type type;
+ int number;
+
+ if (parse_hwmon_filename(term->config, &type, &number,
+ /*item=*/NULL, /*is_alarm=*/NULL)) {
+ info->unit = hwmon_units[type];
+ if (type == HWMON_TYPE_FAN || type == HWMON_TYPE_PWM ||
+ type == HWMON_TYPE_INTRUSION)
+ info->scale = 1;
+ else
+ info->scale = 0.001;
+ }
+ return 0;
+ }
+ if (err) {
+ char *err_str;
+
+ parse_events_error__handle(err, term->err_val,
+ asprintf(&err_str,
+ "unexpected hwmon event term (%s) %s",
+ parse_events__term_type_str(term->type_term),
+ term->config) < 0
+ ? strdup("unexpected hwmon event term")
+ : err_str,
+ NULL);
+ }
+ return -EINVAL;
+}
+
+int perf_pmus__read_hwmon_pmus(struct list_head *pmus)
+{
+ char *line = NULL;
+ DIR *class_hwmon_dir;
+ struct dirent *class_hwmon_ent;
+ char buf[PATH_MAX];
+ const char *sysfs = sysfs__mountpoint();
+
+ if (!sysfs)
+ return 0;
+
+ scnprintf(buf, sizeof(buf), "%s/class/hwmon/", sysfs);
+ class_hwmon_dir = opendir(buf);
+ if (!class_hwmon_dir)
+ return 0;
+
+ while ((class_hwmon_ent = readdir(class_hwmon_dir)) != NULL) {
+ size_t line_len;
+ int hwmon_dir, name_fd;
+ struct io io;
+
+ if (class_hwmon_ent->d_type != DT_LNK)
+ continue;
+
+ scnprintf(buf, sizeof(buf), "%s/class/hwmon/%s", sysfs, class_hwmon_ent->d_name);
+ hwmon_dir = open(buf, O_DIRECTORY);
+ if (hwmon_dir == -1) {
+ pr_debug("hwmon_pmu: not a directory: '%s/class/hwmon/%s'\n",
+ sysfs, class_hwmon_ent->d_name);
+ continue;
+ }
+ name_fd = openat(hwmon_dir, "name", O_RDONLY);
+ if (name_fd == -1) {
+ pr_debug("hwmon_pmu: failure to open '%s/class/hwmon/%s/name'\n",
+ sysfs, class_hwmon_ent->d_name);
+ close(hwmon_dir);
+ continue;
+ }
+ io__init(&io, name_fd, buf, sizeof(buf));
+ io__getline(&io, &line, &line_len);
+ if (line_len > 0 && line[line_len - 1] == '\n')
+ line[line_len - 1] = '\0';
+ hwmon_pmu__new(pmus, hwmon_dir, class_hwmon_ent->d_name, line);
+ close(name_fd);
+ }
+ free(line);
+ closedir(class_hwmon_dir);
+ return 0;
+}
+
+#define FD(e, x, y) (*(int *)xyarray__entry(e->core.fd, x, y))
+
+int evsel__hwmon_pmu_open(struct evsel *evsel,
+ struct perf_thread_map *threads,
+ int start_cpu_map_idx, int end_cpu_map_idx)
+{
+ struct hwmon_pmu *hwm = container_of(evsel->pmu, struct hwmon_pmu, pmu);
+ union hwmon_pmu_event_key key = {
+ .type_and_num = evsel->core.attr.config,
+ };
+ int idx = 0, thread = 0, nthreads, err = 0;
+
+ nthreads = perf_thread_map__nr(threads);
+ for (idx = start_cpu_map_idx; idx < end_cpu_map_idx; idx++) {
+ for (thread = 0; thread < nthreads; thread++) {
+ char buf[64];
+ int fd;
+
+ snprintf(buf, sizeof(buf), "%s%d_input",
+ hwmon_type_strs[key.type], key.num);
+
+ fd = openat(hwm->hwmon_dir_fd, buf, O_RDONLY);
+ FD(evsel, idx, thread) = fd;
+ if (fd < 0) {
+ err = -errno;
+ goto out_close;
+ }
+ }
+ }
+ return 0;
+out_close:
+ if (err)
+ threads->err_thread = thread;
+
+ do {
+ while (--thread >= 0) {
+ if (FD(evsel, idx, thread) >= 0)
+ close(FD(evsel, idx, thread));
+ FD(evsel, idx, thread) = -1;
+ }
+ thread = nthreads;
+ } while (--idx >= 0);
+ return err;
+}
+
+int evsel__hwmon_pmu_read(struct evsel *evsel, int cpu_map_idx, int thread)
+{
+ char buf[32];
+ int fd;
+ ssize_t len;
+ struct perf_counts_values *count, *old_count = NULL;
+
+ if (evsel->prev_raw_counts)
+ old_count = perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread);
+
+ count = perf_counts(evsel->counts, cpu_map_idx, thread);
+ fd = FD(evsel, cpu_map_idx, thread);
+ len = pread(fd, buf, sizeof(buf), 0);
+ if (len <= 0) {
+ count->lost++;
+ return -EINVAL;
+ }
+ buf[len] = '\0';
+ if (old_count) {
+ count->val = old_count->val + strtoll(buf, NULL, 10);
+ count->run = old_count->run + 1;
+ count->ena = old_count->ena + 1;
+ } else {
+ count->val = strtoll(buf, NULL, 10);
+ count->run++;
+ count->ena++;
+ }
+ return 0;
+}
diff --git a/tools/perf/util/hwmon_pmu.h b/tools/perf/util/hwmon_pmu.h
new file mode 100644
index 000000000000..882566846df4
--- /dev/null
+++ b/tools/perf/util/hwmon_pmu.h
@@ -0,0 +1,151 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+#ifndef __HWMON_PMU_H
+#define __HWMON_PMU_H
+
+#include "pmu.h"
+#include <stdbool.h>
+
+struct list_head;
+struct perf_thread_map;
+
+/**
+ * enum hwmon_type:
+ *
+ * As described in Documentation/hwmon/sysfs-interface.rst hwmon events are
+ * defined over multiple files of the form <type><num>_<item>. This enum
+ * captures potential <type> values.
+ *
+ * This enum is exposed for testing.
+ */
+enum hwmon_type {
+ HWMON_TYPE_NONE,
+
+ HWMON_TYPE_CPU,
+ HWMON_TYPE_CURR,
+ HWMON_TYPE_ENERGY,
+ HWMON_TYPE_FAN,
+ HWMON_TYPE_HUMIDITY,
+ HWMON_TYPE_IN,
+ HWMON_TYPE_INTRUSION,
+ HWMON_TYPE_POWER,
+ HWMON_TYPE_PWM,
+ HWMON_TYPE_TEMP,
+
+ HWMON_TYPE_MAX
+};
+
+/**
+ * enum hwmon_item:
+ *
+ * Similar to enum hwmon_type but describes the item part of a a sysfs filename.
+ *
+ * This enum is exposed for testing.
+ */
+enum hwmon_item {
+ HWMON_ITEM_NONE,
+
+ HWMON_ITEM_ACCURACY,
+ HWMON_ITEM_ALARM,
+ HWMON_ITEM_AUTO_CHANNELS_TEMP,
+ HWMON_ITEM_AVERAGE,
+ HWMON_ITEM_AVERAGE_HIGHEST,
+ HWMON_ITEM_AVERAGE_INTERVAL,
+ HWMON_ITEM_AVERAGE_INTERVAL_MAX,
+ HWMON_ITEM_AVERAGE_INTERVAL_MIN,
+ HWMON_ITEM_AVERAGE_LOWEST,
+ HWMON_ITEM_AVERAGE_MAX,
+ HWMON_ITEM_AVERAGE_MIN,
+ HWMON_ITEM_BEEP,
+ HWMON_ITEM_CAP,
+ HWMON_ITEM_CAP_HYST,
+ HWMON_ITEM_CAP_MAX,
+ HWMON_ITEM_CAP_MIN,
+ HWMON_ITEM_CRIT,
+ HWMON_ITEM_CRIT_HYST,
+ HWMON_ITEM_DIV,
+ HWMON_ITEM_EMERGENCY,
+ HWMON_ITEM_EMERGENCY_HIST,
+ HWMON_ITEM_ENABLE,
+ HWMON_ITEM_FAULT,
+ HWMON_ITEM_FREQ,
+ HWMON_ITEM_HIGHEST,
+ HWMON_ITEM_INPUT,
+ HWMON_ITEM_LABEL,
+ HWMON_ITEM_LCRIT,
+ HWMON_ITEM_LCRIT_HYST,
+ HWMON_ITEM_LOWEST,
+ HWMON_ITEM_MAX,
+ HWMON_ITEM_MAX_HYST,
+ HWMON_ITEM_MIN,
+ HWMON_ITEM_MIN_HYST,
+ HWMON_ITEM_MOD,
+ HWMON_ITEM_OFFSET,
+ HWMON_ITEM_PULSES,
+ HWMON_ITEM_RATED_MAX,
+ HWMON_ITEM_RATED_MIN,
+ HWMON_ITEM_RESET_HISTORY,
+ HWMON_ITEM_TARGET,
+ HWMON_ITEM_TYPE,
+ HWMON_ITEM_VID,
+
+ HWMON_ITEM__MAX,
+};
+
+bool perf_pmu__is_hwmon(const struct perf_pmu *pmu);
+bool evsel__is_hwmon(const struct evsel *evsel);
+
+/**
+ * parse_hwmon_filename() - Parse filename into constituent parts.
+ *
+ * @filename: To be parsed, of the form <type><number>_<item>.
+ * @type: The type defined from the parsed file name.
+ * @number: The number of the type, for example there may be more than 1 fan.
+ * @item: A hwmon <type><number> may have multiple associated items.
+ * @alarm: Is the filename for an alarm value?
+ *
+ * An example of a hwmon filename is "temp1_input". The type is temp for a
+ * temperature value. The number is 1. The item within the file is an input
+ * value - the temperature itself. This file doesn't contain an alarm value.
+ *
+ * Exposed for testing.
+ */
+bool parse_hwmon_filename(const char *filename,
+ enum hwmon_type *type,
+ int *number,
+ enum hwmon_item *item,
+ bool *alarm);
+
+/**
+ * hwmon_pmu__new() - Allocate and construct a hwmon PMU.
+ *
+ * @pmus: The list of PMUs to be added to.
+ * @hwmon_dir: An O_DIRECTORY file descriptor for a hwmon directory.
+ * @sysfs_name: Name of the hwmon sysfs directory like hwmon0.
+ * @name: The contents of the "name" file in the hwmon directory.
+ *
+ * Exposed for testing. Regular construction should happen via
+ * perf_pmus__read_hwmon_pmus.
+ */
+struct perf_pmu *hwmon_pmu__new(struct list_head *pmus, int hwmon_dir,
+ const char *sysfs_name, const char *name);
+void hwmon_pmu__exit(struct perf_pmu *pmu);
+
+int hwmon_pmu__for_each_event(struct perf_pmu *pmu, void *state, pmu_event_callback cb);
+size_t hwmon_pmu__num_events(struct perf_pmu *pmu);
+bool hwmon_pmu__have_event(struct perf_pmu *pmu, const char *name);
+int hwmon_pmu__config_terms(const struct perf_pmu *pmu,
+ struct perf_event_attr *attr,
+ struct parse_events_terms *terms,
+ struct parse_events_error *err);
+int hwmon_pmu__check_alias(struct parse_events_terms *terms, struct perf_pmu_info *info,
+ struct parse_events_error *err);
+
+int perf_pmus__read_hwmon_pmus(struct list_head *pmus);
+
+
+int evsel__hwmon_pmu_open(struct evsel *evsel,
+ struct perf_thread_map *threads,
+ int start_cpu_map_idx, int end_cpu_map_idx);
+int evsel__hwmon_pmu_read(struct evsel *evsel, int cpu_map_idx, int thread);
+
+#endif /* __HWMON_PMU_H */
diff --git a/tools/perf/util/include/dwarf-regs.h b/tools/perf/util/include/dwarf-regs.h
index 01fb25a1150a..6f1b9f6b2466 100644
--- a/tools/perf/util/include/dwarf-regs.h
+++ b/tools/perf/util/include/dwarf-regs.h
@@ -1,42 +1,133 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _PERF_DWARF_REGS_H_
#define _PERF_DWARF_REGS_H_
+#include "annotate.h"
+#include <elf.h>
+
+#ifndef EM_AARCH64
+#define EM_AARCH64 183 /* ARM 64 bit */
+#endif
+
+#ifndef EM_CSKY
+#define EM_CSKY 252 /* C-SKY */
+#endif
+#ifndef EF_CSKY_ABIV1
+#define EF_CSKY_ABIV1 0X10000000
+#endif
+#ifndef EF_CSKY_ABIV2
+#define EF_CSKY_ABIV2 0X20000000
+#endif
+
+#ifndef EM_LOONGARCH
+#define EM_LOONGARCH 258 /* LoongArch */
+#endif
+
+/* EM_HOST gives the ELF machine for host, EF_HOST gives additional flags. */
+#if defined(__x86_64__)
+ #define EM_HOST EM_X86_64
+#elif defined(__i386__)
+ #define EM_HOST EM_386
+#elif defined(__aarch64__)
+ #define EM_HOST EM_AARCH64
+#elif defined(__arm__)
+ #define EM_HOST EM_ARM
+#elif defined(__alpha__)
+ #define EM_HOST EM_ALPHA
+#elif defined(__arc__)
+ #define EM_HOST EM_ARC
+#elif defined(__AVR__)
+ #define EM_HOST EM_AVR
+#elif defined(__AVR32__)
+ #define EM_HOST EM_AVR32
+#elif defined(__bfin__)
+ #define EM_HOST EM_BLACKFIN
+#elif defined(__csky__)
+ #define EM_HOST EM_CSKY
+ #if defined(__CSKYABIV2__)
+ #define EF_HOST EF_CSKY_ABIV2
+ #else
+ #define EF_HOST EF_CSKY_ABIV1
+ #endif
+#elif defined(__cris__)
+ #define EM_HOST EM_CRIS
+#elif defined(__hppa__) // HP PA-RISC
+ #define EM_HOST EM_PARISC
+#elif defined(__loongarch__)
+ #define EM_HOST EM_LOONGARCH
+#elif defined(__mips__)
+ #define EM_HOST EM_MIPS
+#elif defined(__m32r__)
+ #define EM_HOST EM_M32R
+#elif defined(__microblaze__)
+ #define EM_HOST EM_MICROBLAZE
+#elif defined(__MSP430__)
+ #define EM_HOST EM_MSP430
+#elif defined(__powerpc64__)
+ #define EM_HOST EM_PPC64
+#elif defined(__powerpc__)
+ #define EM_HOST EM_PPC
+#elif defined(__riscv)
+ #define EM_HOST EM_RISCV
+#elif defined(__s390x__)
+ #define EM_HOST EM_S390
+#elif defined(__sh__)
+ #define EM_HOST EM_SH
+#elif defined(__sparc64__) || defined(__sparc__)
+ #define EM_HOST EM_SPARC
+#elif defined(__xtensa__)
+ #define EM_HOST EM_XTENSA
+#else
+ /* Unknown host ELF machine type. */
+ #define EM_HOST EM_NONE
+#endif
+
+#if !defined(EF_HOST)
+ #define EF_HOST 0
+#endif
#define DWARF_REG_PC 0xd3af9c /* random number */
#define DWARF_REG_FB 0xd3affb /* random number */
-#ifdef HAVE_DWARF_SUPPORT
-const char *get_arch_regstr(unsigned int n);
-/*
- * get_dwarf_regstr - Returns ftrace register string from DWARF regnum
- * n: DWARF register number
- * machine: ELF machine signature (EM_*)
+#ifdef HAVE_LIBDW_SUPPORT
+const char *get_csky_regstr(unsigned int n, unsigned int flags);
+
+/**
+ * get_dwarf_regstr() - Returns ftrace register string from DWARF regnum.
+ * @n: DWARF register number.
+ * @machine: ELF machine signature (EM_*).
+ * @flags: ELF flags for things like ABI differences.
*/
-const char *get_dwarf_regstr(unsigned int n, unsigned int machine);
+const char *get_dwarf_regstr(unsigned int n, unsigned int machine, unsigned int flags);
+
+int get_x86_regnum(const char *name);
+#if !defined(__x86_64__) && !defined(__i386__)
int get_arch_regnum(const char *name);
+#endif
+
/*
* get_dwarf_regnum - Returns DWARF regnum from register name
* name: architecture register name
* machine: ELF machine signature (EM_*)
*/
-int get_dwarf_regnum(const char *name, unsigned int machine);
+int get_dwarf_regnum(const char *name, unsigned int machine, unsigned int flags);
+
+void get_powerpc_regs(u32 raw_insn, int is_source, struct annotated_op_loc *op_loc);
-#else /* HAVE_DWARF_SUPPORT */
+#else /* HAVE_LIBDW_SUPPORT */
static inline int get_dwarf_regnum(const char *name __maybe_unused,
- unsigned int machine __maybe_unused)
+ unsigned int machine __maybe_unused,
+ unsigned int flags __maybe_unused)
{
return -1;
}
-#endif
-#ifdef HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET
-/*
- * Arch should support fetching the offset of a register in pt_regs
- * by its name. See kernel's regs_query_register_offset in
- * arch/xxx/kernel/ptrace.c.
- */
-int regs_query_register_offset(const char *name);
+static inline void get_powerpc_regs(u32 raw_insn __maybe_unused, int is_source __maybe_unused,
+ struct annotated_op_loc *op_loc __maybe_unused)
+{
+ return;
+}
#endif
+
#endif
diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c
index ec1b3bd9f530..a7c589fecb98 100644
--- a/tools/perf/util/intel-bts.c
+++ b/tools/perf/util/intel-bts.c
@@ -100,7 +100,7 @@ static void intel_bts_dump(struct intel_bts *bts __maybe_unused,
else
sz = len;
printf(".");
- color_fprintf(stdout, color, " %08x: ", pos);
+ color_fprintf(stdout, color, " %08zx: ", pos);
for (i = 0; i < sz; i++)
color_fprintf(stdout, color, " %02x", buf[i]);
for (; i < br_sz; i++)
@@ -591,7 +591,7 @@ static int intel_bts_process_queues(struct intel_bts *bts, u64 timestamp)
static int intel_bts_process_event(struct perf_session *session,
union perf_event *event,
struct perf_sample *sample,
- struct perf_tool *tool)
+ const struct perf_tool *tool)
{
struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
auxtrace);
@@ -634,7 +634,7 @@ static int intel_bts_process_event(struct perf_session *session,
static int intel_bts_process_auxtrace_event(struct perf_session *session,
union perf_event *event,
- struct perf_tool *tool __maybe_unused)
+ const struct perf_tool *tool __maybe_unused)
{
struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
auxtrace);
@@ -675,7 +675,7 @@ static int intel_bts_process_auxtrace_event(struct perf_session *session,
}
static int intel_bts_flush(struct perf_session *session,
- struct perf_tool *tool __maybe_unused)
+ const struct perf_tool *tool __maybe_unused)
{
struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
auxtrace);
@@ -737,35 +737,6 @@ static bool intel_bts_evsel_is_auxtrace(struct perf_session *session,
return evsel->core.attr.type == bts->pmu_type;
}
-struct intel_bts_synth {
- struct perf_tool dummy_tool;
- struct perf_session *session;
-};
-
-static int intel_bts_event_synth(struct perf_tool *tool,
- union perf_event *event,
- struct perf_sample *sample __maybe_unused,
- struct machine *machine __maybe_unused)
-{
- struct intel_bts_synth *intel_bts_synth =
- container_of(tool, struct intel_bts_synth, dummy_tool);
-
- return perf_session__deliver_synth_event(intel_bts_synth->session,
- event, NULL);
-}
-
-static int intel_bts_synth_event(struct perf_session *session,
- struct perf_event_attr *attr, u64 id)
-{
- struct intel_bts_synth intel_bts_synth;
-
- memset(&intel_bts_synth, 0, sizeof(struct intel_bts_synth));
- intel_bts_synth.session = session;
-
- return perf_event__synthesize_attr(&intel_bts_synth.dummy_tool, attr, 1,
- &id, intel_bts_event_synth);
-}
-
static int intel_bts_synth_events(struct intel_bts *bts,
struct perf_session *session)
{
@@ -814,7 +785,7 @@ static int intel_bts_synth_events(struct intel_bts *bts,
attr.sample_type |= PERF_SAMPLE_ADDR;
pr_debug("Synthesizing 'branches' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
id, (u64)attr.sample_type);
- err = intel_bts_synth_event(session, &attr, id);
+ err = perf_session__deliver_synth_attr_event(session, &attr, id);
if (err) {
pr_err("%s: failed to synthesize 'branches' event type\n",
__func__);
@@ -837,7 +808,7 @@ static int intel_bts_synth_events(struct intel_bts *bts,
static const char * const intel_bts_info_fmts[] = {
[INTEL_BTS_PMU_TYPE] = " PMU Type %"PRId64"\n",
[INTEL_BTS_TIME_SHIFT] = " Time Shift %"PRIu64"\n",
- [INTEL_BTS_TIME_MULT] = " Time Muliplier %"PRIu64"\n",
+ [INTEL_BTS_TIME_MULT] = " Time Multiplier %"PRIu64"\n",
[INTEL_BTS_TIME_ZERO] = " Time Zero %"PRIu64"\n",
[INTEL_BTS_CAP_USER_TIME_ZERO] = " Cap Time Zero %"PRId64"\n",
[INTEL_BTS_SNAPSHOT_MODE] = " Snapshot mode %"PRId64"\n",
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
index 4407130d91f8..47cf35799a4d 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
@@ -209,12 +209,13 @@ int intel_pt_get_insn(const unsigned char *buf, size_t len, int x86_64,
return 0;
}
-int arch_is_branch(const unsigned char *buf, size_t len, int x86_64)
+int arch_is_uncond_branch(const unsigned char *buf, size_t len, int x86_64)
{
struct intel_pt_insn in;
if (intel_pt_get_insn(buf, len, x86_64, &in) < 0)
return -1;
- return in.branch != INTEL_PT_BR_NO_BRANCH;
+ return in.branch == INTEL_PT_BR_UNCONDITIONAL ||
+ in.branch == INTEL_PT_BR_INDIRECT;
}
const char *dump_insn(struct perf_insn *x, uint64_t ip __maybe_unused,
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
index bccb988a7a44..94fb16cf9e0c 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
@@ -10,7 +10,7 @@
#include <byteswap.h>
#include <linux/kernel.h>
#include <linux/compiler.h>
-#include <asm-generic/unaligned.h>
+#include <linux/unaligned.h>
#include "intel-pt-pkt-decoder.h"
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index d6d7b7512505..30be6dfe09eb 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -249,7 +249,7 @@ static void intel_pt_dump(struct intel_pt *pt __maybe_unused,
else
pkt_len = 1;
printf(".");
- color_fprintf(stdout, color, " %08x: ", pos);
+ color_fprintf(stdout, color, " %08zx: ", pos);
for (i = 0; i < pkt_len; i++)
color_fprintf(stdout, color, " %02x", buf[i]);
for (; i < 16; i++)
@@ -3449,7 +3449,7 @@ out:
static int intel_pt_process_event(struct perf_session *session,
union perf_event *event,
struct perf_sample *sample,
- struct perf_tool *tool)
+ const struct perf_tool *tool)
{
struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
auxtrace);
@@ -3533,7 +3533,7 @@ static int intel_pt_process_event(struct perf_session *session,
return err;
}
-static int intel_pt_flush(struct perf_session *session, struct perf_tool *tool)
+static int intel_pt_flush(struct perf_session *session, const struct perf_tool *tool)
{
struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
auxtrace);
@@ -3600,7 +3600,7 @@ static bool intel_pt_evsel_is_auxtrace(struct perf_session *session,
static int intel_pt_process_auxtrace_event(struct perf_session *session,
union perf_event *event,
- struct perf_tool *tool __maybe_unused)
+ const struct perf_tool *tool __maybe_unused)
{
struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
auxtrace);
@@ -3659,37 +3659,15 @@ static int intel_pt_queue_data(struct perf_session *session,
data_offset, timestamp);
}
-struct intel_pt_synth {
- struct perf_tool dummy_tool;
- struct perf_session *session;
-};
-
-static int intel_pt_event_synth(struct perf_tool *tool,
- union perf_event *event,
- struct perf_sample *sample __maybe_unused,
- struct machine *machine __maybe_unused)
-{
- struct intel_pt_synth *intel_pt_synth =
- container_of(tool, struct intel_pt_synth, dummy_tool);
-
- return perf_session__deliver_synth_event(intel_pt_synth->session, event,
- NULL);
-}
-
static int intel_pt_synth_event(struct perf_session *session, const char *name,
struct perf_event_attr *attr, u64 id)
{
- struct intel_pt_synth intel_pt_synth;
int err;
pr_debug("Synthesizing '%s' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
name, id, (u64)attr->sample_type);
- memset(&intel_pt_synth, 0, sizeof(struct intel_pt_synth));
- intel_pt_synth.session = session;
-
- err = perf_event__synthesize_attr(&intel_pt_synth.dummy_tool, attr, 1,
- &id, intel_pt_event_synth);
+ err = perf_session__deliver_synth_attr_event(session, attr, id);
if (err)
pr_err("%s: failed to synthesize '%s' event type\n",
__func__, name);
@@ -4132,7 +4110,7 @@ static int intel_pt_parse_vm_tm_corr_args(struct intel_pt *pt)
static const char * const intel_pt_info_fmts[] = {
[INTEL_PT_PMU_TYPE] = " PMU Type %"PRId64"\n",
[INTEL_PT_TIME_SHIFT] = " Time Shift %"PRIu64"\n",
- [INTEL_PT_TIME_MULT] = " Time Muliplier %"PRIu64"\n",
+ [INTEL_PT_TIME_MULT] = " Time Multiplier %"PRIu64"\n",
[INTEL_PT_TIME_ZERO] = " Time Zero %"PRIu64"\n",
[INTEL_PT_CAP_USER_TIME_ZERO] = " Cap Time Zero %"PRId64"\n",
[INTEL_PT_TSC_BIT] = " TSC bit %#"PRIx64"\n",
diff --git a/tools/perf/util/intel-tpebs.c b/tools/perf/util/intel-tpebs.c
new file mode 100644
index 000000000000..50a3c3e07160
--- /dev/null
+++ b/tools/perf/util/intel-tpebs.c
@@ -0,0 +1,432 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * intel_tpebs.c: Intel TPEBS support
+ */
+
+
+#include <sys/param.h>
+#include <subcmd/run-command.h>
+#include <thread.h>
+#include "intel-tpebs.h"
+#include <linux/list.h>
+#include <linux/zalloc.h>
+#include <linux/err.h>
+#include "sample.h"
+#include "debug.h"
+#include "evlist.h"
+#include "evsel.h"
+#include "session.h"
+#include "tool.h"
+#include "cpumap.h"
+#include "metricgroup.h"
+#include <sys/stat.h>
+#include <sys/file.h>
+#include <poll.h>
+#include <math.h>
+
+#define PERF_DATA "-"
+
+bool tpebs_recording;
+static pid_t tpebs_pid = -1;
+static size_t tpebs_event_size;
+static LIST_HEAD(tpebs_results);
+static pthread_t tpebs_reader_thread;
+static struct child_process *tpebs_cmd;
+
+struct tpebs_retire_lat {
+ struct list_head nd;
+ /* Event name */
+ const char *name;
+ /* Event name with the TPEBS modifier R */
+ const char *tpebs_name;
+ /* Count of retire_latency values found in sample data */
+ size_t count;
+ /* Sum of all the retire_latency values in sample data */
+ int sum;
+ /* Average of retire_latency, val = sum / count */
+ double val;
+};
+
+static int get_perf_record_args(const char **record_argv, char buf[],
+ const char *cpumap_buf)
+{
+ struct tpebs_retire_lat *e;
+ int i = 0;
+
+ pr_debug("tpebs: Prepare perf record for retire_latency\n");
+
+ record_argv[i++] = "perf";
+ record_argv[i++] = "record";
+ record_argv[i++] = "-W";
+ record_argv[i++] = "--synth=no";
+ record_argv[i++] = buf;
+
+ if (!cpumap_buf) {
+ pr_err("tpebs: Require cpumap list to run sampling\n");
+ return -ECANCELED;
+ }
+ /* Use -C when cpumap_buf is not "-1" */
+ if (strcmp(cpumap_buf, "-1")) {
+ record_argv[i++] = "-C";
+ record_argv[i++] = cpumap_buf;
+ }
+
+ list_for_each_entry(e, &tpebs_results, nd) {
+ record_argv[i++] = "-e";
+ record_argv[i++] = e->name;
+ }
+
+ record_argv[i++] = "-o";
+ record_argv[i++] = PERF_DATA;
+
+ return 0;
+}
+
+static int prepare_run_command(const char **argv)
+{
+ tpebs_cmd = zalloc(sizeof(struct child_process));
+ if (!tpebs_cmd)
+ return -ENOMEM;
+ tpebs_cmd->argv = argv;
+ tpebs_cmd->out = -1;
+ return 0;
+}
+
+static int start_perf_record(int control_fd[], int ack_fd[],
+ const char *cpumap_buf)
+{
+ const char **record_argv;
+ int ret;
+ char buf[32];
+
+ scnprintf(buf, sizeof(buf), "--control=fd:%d,%d", control_fd[0], ack_fd[1]);
+
+ record_argv = calloc(12 + 2 * tpebs_event_size, sizeof(char *));
+ if (!record_argv)
+ return -ENOMEM;
+
+ ret = get_perf_record_args(record_argv, buf, cpumap_buf);
+ if (ret)
+ goto out;
+
+ ret = prepare_run_command(record_argv);
+ if (ret)
+ goto out;
+ ret = start_command(tpebs_cmd);
+out:
+ free(record_argv);
+ return ret;
+}
+
+static int process_sample_event(const struct perf_tool *tool __maybe_unused,
+ union perf_event *event __maybe_unused,
+ struct perf_sample *sample,
+ struct evsel *evsel,
+ struct machine *machine __maybe_unused)
+{
+ int ret = 0;
+ const char *evname;
+ struct tpebs_retire_lat *t;
+
+ evname = evsel__name(evsel);
+
+ /*
+ * Need to handle per core results? We are assuming average retire
+ * latency value will be used. Save the number of samples and the sum of
+ * retire latency value for each event.
+ */
+ list_for_each_entry(t, &tpebs_results, nd) {
+ if (!strcmp(evname, t->name)) {
+ t->count += 1;
+ t->sum += sample->retire_lat;
+ t->val = (double) t->sum / t->count;
+ break;
+ }
+ }
+
+ return ret;
+}
+
+static int process_feature_event(struct perf_session *session,
+ union perf_event *event)
+{
+ if (event->feat.feat_id < HEADER_LAST_FEATURE)
+ return perf_event__process_feature(session, event);
+ return 0;
+}
+
+static void *__sample_reader(void *arg)
+{
+ struct child_process *child = arg;
+ struct perf_session *session;
+ struct perf_data data = {
+ .mode = PERF_DATA_MODE_READ,
+ .path = PERF_DATA,
+ .file.fd = child->out,
+ };
+ struct perf_tool tool;
+
+ perf_tool__init(&tool, /*ordered_events=*/false);
+ tool.sample = process_sample_event;
+ tool.feature = process_feature_event;
+ tool.attr = perf_event__process_attr;
+
+ session = perf_session__new(&data, &tool);
+ if (IS_ERR(session))
+ return NULL;
+ perf_session__process_events(session);
+ perf_session__delete(session);
+
+ return NULL;
+}
+
+/*
+ * tpebs_stop - stop the sample data read thread and the perf record process.
+ */
+static int tpebs_stop(void)
+{
+ int ret = 0;
+
+ /* Like tpebs_start, we should only run tpebs_end once. */
+ if (tpebs_pid != -1) {
+ kill(tpebs_cmd->pid, SIGTERM);
+ tpebs_pid = -1;
+ pthread_join(tpebs_reader_thread, NULL);
+ close(tpebs_cmd->out);
+ ret = finish_command(tpebs_cmd);
+ if (ret == -ERR_RUN_COMMAND_WAITPID_SIGNAL)
+ ret = 0;
+ }
+ return ret;
+}
+
+/*
+ * tpebs_start - start tpebs execution.
+ * @evsel_list: retire_latency evsels in this list will be selected and sampled
+ * to get the average retire_latency value.
+ *
+ * This function will be called from evlist level later when evlist__open() is
+ * called consistently.
+ */
+int tpebs_start(struct evlist *evsel_list)
+{
+ int ret = 0;
+ struct evsel *evsel;
+ char cpumap_buf[50];
+
+ /*
+ * We should only run tpebs_start when tpebs_recording is enabled.
+ * And we should only run it once with all the required events.
+ */
+ if (tpebs_pid != -1 || !tpebs_recording)
+ return 0;
+
+ cpu_map__snprint(evsel_list->core.user_requested_cpus, cpumap_buf, sizeof(cpumap_buf));
+ /*
+ * Prepare perf record for sampling event retire_latency before fork and
+ * prepare workload
+ */
+ evlist__for_each_entry(evsel_list, evsel) {
+ int i;
+ char *name;
+ struct tpebs_retire_lat *new;
+
+ if (!evsel->retire_lat)
+ continue;
+
+ pr_debug("tpebs: Retire_latency of event %s is required\n", evsel->name);
+ for (i = strlen(evsel->name) - 1; i > 0; i--) {
+ if (evsel->name[i] == 'R')
+ break;
+ }
+ if (i <= 0 || evsel->name[i] != 'R') {
+ ret = -1;
+ goto err;
+ }
+
+ name = strdup(evsel->name);
+ if (!name) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ name[i] = 'p';
+
+ new = zalloc(sizeof(*new));
+ if (!new) {
+ ret = -1;
+ zfree(name);
+ goto err;
+ }
+ new->name = name;
+ new->tpebs_name = evsel->name;
+ list_add_tail(&new->nd, &tpebs_results);
+ tpebs_event_size += 1;
+ }
+
+ if (tpebs_event_size > 0) {
+ struct pollfd pollfd = { .events = POLLIN, };
+ int control_fd[2], ack_fd[2], len;
+ char ack_buf[8];
+
+ /*Create control and ack fd for --control*/
+ if (pipe(control_fd) < 0) {
+ pr_err("tpebs: Failed to create control fifo");
+ ret = -1;
+ goto out;
+ }
+ if (pipe(ack_fd) < 0) {
+ pr_err("tpebs: Failed to create control fifo");
+ ret = -1;
+ goto out;
+ }
+
+ ret = start_perf_record(control_fd, ack_fd, cpumap_buf);
+ if (ret)
+ goto out;
+ tpebs_pid = tpebs_cmd->pid;
+ if (pthread_create(&tpebs_reader_thread, NULL, __sample_reader, tpebs_cmd)) {
+ kill(tpebs_cmd->pid, SIGTERM);
+ close(tpebs_cmd->out);
+ pr_err("Could not create thread to process sample data.\n");
+ ret = -1;
+ goto out;
+ }
+ /* Wait for perf record initialization.*/
+ len = strlen(EVLIST_CTL_CMD_ENABLE_TAG);
+ ret = write(control_fd[1], EVLIST_CTL_CMD_ENABLE_TAG, len);
+ if (ret != len) {
+ pr_err("perf record control write control message failed\n");
+ goto out;
+ }
+
+ /* wait for an ack */
+ pollfd.fd = ack_fd[0];
+
+ /*
+ * We need this poll to ensure the ack_fd PIPE will not hang
+ * when perf record failed for any reason. The timeout value
+ * 3000ms is an empirical selection.
+ */
+ if (!poll(&pollfd, 1, 3000)) {
+ pr_err("tpebs failed: perf record ack timeout\n");
+ ret = -1;
+ goto out;
+ }
+
+ if (!(pollfd.revents & POLLIN)) {
+ pr_err("tpebs failed: did not received an ack\n");
+ ret = -1;
+ goto out;
+ }
+
+ ret = read(ack_fd[0], ack_buf, sizeof(ack_buf));
+ if (ret > 0)
+ ret = strcmp(ack_buf, EVLIST_CTL_CMD_ACK_TAG);
+ else {
+ pr_err("tpebs: perf record control ack failed\n");
+ goto out;
+ }
+out:
+ close(control_fd[0]);
+ close(control_fd[1]);
+ close(ack_fd[0]);
+ close(ack_fd[1]);
+ }
+err:
+ if (ret)
+ tpebs_delete();
+ return ret;
+}
+
+
+int tpebs_set_evsel(struct evsel *evsel, int cpu_map_idx, int thread)
+{
+ __u64 val;
+ bool found = false;
+ struct tpebs_retire_lat *t;
+ struct perf_counts_values *count;
+
+ /* Non reitre_latency evsel should never enter this function. */
+ if (!evsel__is_retire_lat(evsel))
+ return -1;
+
+ /*
+ * Need to stop the forked record to ensure get sampled data from the
+ * PIPE to process and get non-zero retire_lat value for hybrid.
+ */
+ tpebs_stop();
+ count = perf_counts(evsel->counts, cpu_map_idx, thread);
+
+ list_for_each_entry(t, &tpebs_results, nd) {
+ if (t->tpebs_name == evsel->name ||
+ (evsel->metric_id && !strcmp(t->tpebs_name, evsel->metric_id))) {
+ found = true;
+ break;
+ }
+ }
+
+ /* Set ena and run to non-zero */
+ count->ena = count->run = 1;
+ count->lost = 0;
+
+ if (!found) {
+ /*
+ * Set default value or 0 when retire_latency for this event is
+ * not found from sampling data (record_tpebs not set or 0
+ * sample recorded).
+ */
+ count->val = 0;
+ return 0;
+ }
+
+ /*
+ * Only set retire_latency value to the first CPU and thread.
+ */
+ if (cpu_map_idx == 0 && thread == 0)
+ val = rint(t->val);
+ else
+ val = 0;
+
+ count->val = val;
+ return 0;
+}
+
+static void tpebs_retire_lat__delete(struct tpebs_retire_lat *r)
+{
+ zfree(&r->name);
+ free(r);
+}
+
+
+/*
+ * tpebs_delete - delete tpebs related data and stop the created thread and
+ * process by calling tpebs_stop().
+ *
+ * This function is called from evlist_delete() and also from builtin-stat
+ * stat_handle_error(). If tpebs_start() is called from places other then perf
+ * stat, need to ensure tpebs_delete() is also called to safely free mem and
+ * close the data read thread and the forked perf record process.
+ *
+ * This function is also called in evsel__close() to be symmetric with
+ * tpebs_start() being called in evsel__open(). We will update this call site
+ * when move tpebs_start() to evlist level.
+ */
+void tpebs_delete(void)
+{
+ struct tpebs_retire_lat *r, *rtmp;
+
+ if (tpebs_pid == -1)
+ return;
+
+ tpebs_stop();
+
+ list_for_each_entry_safe(r, rtmp, &tpebs_results, nd) {
+ list_del_init(&r->nd);
+ tpebs_retire_lat__delete(r);
+ }
+
+ if (tpebs_cmd) {
+ free(tpebs_cmd);
+ tpebs_cmd = NULL;
+ }
+}
diff --git a/tools/perf/util/intel-tpebs.h b/tools/perf/util/intel-tpebs.h
new file mode 100644
index 000000000000..766b3fbd79f1
--- /dev/null
+++ b/tools/perf/util/intel-tpebs.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * intel_tpebs.h: Intel TEPBS support
+ */
+#ifndef INCLUDE__PERF_INTEL_TPEBS_H__
+#define INCLUDE__PERF_INTEL_TPEBS_H__
+
+#include "stat.h"
+#include "evsel.h"
+
+#ifdef HAVE_ARCH_X86_64_SUPPORT
+
+extern bool tpebs_recording;
+int tpebs_start(struct evlist *evsel_list);
+void tpebs_delete(void);
+int tpebs_set_evsel(struct evsel *evsel, int cpu_map_idx, int thread);
+
+#else
+
+static inline int tpebs_start(struct evlist *evsel_list __maybe_unused)
+{
+ return 0;
+}
+
+static inline void tpebs_delete(void) {};
+
+static inline int tpebs_set_evsel(struct evsel *evsel __maybe_unused,
+ int cpu_map_idx __maybe_unused,
+ int thread __maybe_unused)
+{
+ return 0;
+}
+
+#endif
+#endif
diff --git a/tools/perf/util/jit.h b/tools/perf/util/jit.h
index fb810e1b2de7..f4037203e9ec 100644
--- a/tools/perf/util/jit.h
+++ b/tools/perf/util/jit.h
@@ -5,7 +5,8 @@
#include <data.h>
int jit_process(struct perf_session *session, struct perf_data *output,
- struct machine *machine, char *filename, pid_t pid, pid_t tid, u64 *nbytes);
+ struct machine *machine, const char *filename,
+ pid_t pid, pid_t tid, u64 *nbytes);
int jit_inject_record(const char *filename);
diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c
index 1f657ef8975f..346513e5e9b7 100644
--- a/tools/perf/util/jitdump.c
+++ b/tools/perf/util/jitdump.c
@@ -424,7 +424,7 @@ static int jit_repipe_code_load(struct jit_buf_desc *jd, union jr_entry *jr)
{
struct perf_sample sample;
union perf_event *event;
- struct perf_tool *tool = jd->session->tool;
+ const struct perf_tool *tool = jd->session->tool;
uint64_t code, addr;
uintptr_t uaddr;
char *filename;
@@ -543,7 +543,7 @@ static int jit_repipe_code_move(struct jit_buf_desc *jd, union jr_entry *jr)
{
struct perf_sample sample;
union perf_event *event;
- struct perf_tool *tool = jd->session->tool;
+ const struct perf_tool *tool = jd->session->tool;
char *filename;
size_t size;
struct stat st;
@@ -710,7 +710,7 @@ jit_process_dump(struct jit_buf_desc *jd)
}
static int
-jit_inject(struct jit_buf_desc *jd, char *path)
+jit_inject(struct jit_buf_desc *jd, const char *path)
{
int ret;
@@ -737,7 +737,7 @@ jit_inject(struct jit_buf_desc *jd, char *path)
* as captured in the RECORD_MMAP record
*/
static int
-jit_detect(char *mmap_name, pid_t pid, struct nsinfo *nsi)
+jit_detect(const char *mmap_name, pid_t pid, struct nsinfo *nsi)
{
char *p;
char *end = NULL;
@@ -821,7 +821,7 @@ int
jit_process(struct perf_session *session,
struct perf_data *output,
struct machine *machine,
- char *filename,
+ const char *filename,
pid_t pid,
pid_t tid,
u64 *nbytes)
diff --git a/tools/perf/util/llvm-c-helpers.cpp b/tools/perf/util/llvm-c-helpers.cpp
new file mode 100644
index 000000000000..663bcaba2041
--- /dev/null
+++ b/tools/perf/util/llvm-c-helpers.cpp
@@ -0,0 +1,197 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Must come before the linux/compiler.h include, which defines several
+ * macros (e.g. noinline) that conflict with compiler builtins used
+ * by LLVM.
+ */
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-parameter" /* Needed for LLVM <= 15 */
+#include <llvm/DebugInfo/Symbolize/Symbolize.h>
+#include <llvm/Support/TargetSelect.h>
+#pragma GCC diagnostic pop
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <linux/compiler.h>
+extern "C" {
+#include <linux/zalloc.h>
+}
+#include "symbol_conf.h"
+#include "llvm-c-helpers.h"
+
+extern "C"
+char *dso__demangle_sym(struct dso *dso, int kmodule, const char *elf_name);
+
+using namespace llvm;
+using llvm::symbolize::LLVMSymbolizer;
+
+/*
+ * Allocate a static LLVMSymbolizer, which will live to the end of the program.
+ * Unlike the bfd paths, LLVMSymbolizer has its own cache, so we do not need
+ * to store anything in the dso struct.
+ */
+static LLVMSymbolizer *get_symbolizer()
+{
+ static LLVMSymbolizer *instance = nullptr;
+ if (instance == nullptr) {
+ LLVMSymbolizer::Options opts;
+ /*
+ * LLVM sometimes demangles slightly different from the rest
+ * of the code, and this mismatch can cause new_inline_sym()
+ * to get confused and mark non-inline symbol as inlined
+ * (since the name does not properly match up with base_sym).
+ * Thus, disable the demangling and let the rest of the code
+ * handle it.
+ */
+ opts.Demangle = false;
+ instance = new LLVMSymbolizer(opts);
+ }
+ return instance;
+}
+
+/* Returns 0 on error, 1 on success. */
+static int extract_file_and_line(const DILineInfo &line_info, char **file,
+ unsigned int *line)
+{
+ if (file) {
+ if (line_info.FileName == "<invalid>") {
+ /* Match the convention of libbfd. */
+ *file = nullptr;
+ } else {
+ /* The caller expects to get something it can free(). */
+ *file = strdup(line_info.FileName.c_str());
+ if (*file == nullptr)
+ return 0;
+ }
+ }
+ if (line)
+ *line = line_info.Line;
+ return 1;
+}
+
+extern "C"
+int llvm_addr2line(const char *dso_name, u64 addr,
+ char **file, unsigned int *line,
+ bool unwind_inlines,
+ llvm_a2l_frame **inline_frames)
+{
+ LLVMSymbolizer *symbolizer = get_symbolizer();
+ object::SectionedAddress sectioned_addr = {
+ addr,
+ object::SectionedAddress::UndefSection
+ };
+
+ if (unwind_inlines) {
+ Expected<DIInliningInfo> res_or_err =
+ symbolizer->symbolizeInlinedCode(dso_name,
+ sectioned_addr);
+ if (!res_or_err)
+ return 0;
+ unsigned num_frames = res_or_err->getNumberOfFrames();
+ if (num_frames == 0)
+ return 0;
+
+ if (extract_file_and_line(res_or_err->getFrame(0),
+ file, line) == 0)
+ return 0;
+
+ *inline_frames = (llvm_a2l_frame *)calloc(
+ num_frames, sizeof(**inline_frames));
+ if (*inline_frames == nullptr)
+ return 0;
+
+ for (unsigned i = 0; i < num_frames; ++i) {
+ const DILineInfo &src = res_or_err->getFrame(i);
+
+ llvm_a2l_frame &dst = (*inline_frames)[i];
+ if (src.FileName == "<invalid>")
+ /* Match the convention of libbfd. */
+ dst.filename = nullptr;
+ else
+ dst.filename = strdup(src.FileName.c_str());
+ dst.funcname = strdup(src.FunctionName.c_str());
+ dst.line = src.Line;
+
+ if (dst.filename == nullptr ||
+ dst.funcname == nullptr) {
+ for (unsigned j = 0; j <= i; ++j) {
+ zfree(&(*inline_frames)[j].filename);
+ zfree(&(*inline_frames)[j].funcname);
+ }
+ zfree(inline_frames);
+ return 0;
+ }
+ }
+
+ return num_frames;
+ } else {
+ if (inline_frames)
+ *inline_frames = nullptr;
+
+ Expected<DILineInfo> res_or_err =
+ symbolizer->symbolizeCode(dso_name, sectioned_addr);
+ if (!res_or_err)
+ return 0;
+ return extract_file_and_line(*res_or_err, file, line);
+ }
+}
+
+static char *
+make_symbol_relative_string(struct dso *dso, const char *sym_name,
+ u64 addr, u64 base_addr)
+{
+ if (!strcmp(sym_name, "<invalid>"))
+ return NULL;
+
+ char *demangled = dso__demangle_sym(dso, 0, sym_name);
+ if (base_addr && base_addr != addr) {
+ char buf[256];
+ snprintf(buf, sizeof(buf), "%s+0x%" PRIx64,
+ demangled ? demangled : sym_name, addr - base_addr);
+ free(demangled);
+ return strdup(buf);
+ } else {
+ if (demangled)
+ return demangled;
+ else
+ return strdup(sym_name);
+ }
+}
+
+extern "C"
+char *llvm_name_for_code(struct dso *dso, const char *dso_name, u64 addr)
+{
+ LLVMSymbolizer *symbolizer = get_symbolizer();
+ object::SectionedAddress sectioned_addr = {
+ addr,
+ object::SectionedAddress::UndefSection
+ };
+ Expected<DILineInfo> res_or_err =
+ symbolizer->symbolizeCode(dso_name, sectioned_addr);
+ if (!res_or_err) {
+ return NULL;
+ }
+ return make_symbol_relative_string(
+ dso, res_or_err->FunctionName.c_str(),
+ addr, res_or_err->StartAddress ? *res_or_err->StartAddress : 0);
+}
+
+extern "C"
+char *llvm_name_for_data(struct dso *dso, const char *dso_name, u64 addr)
+{
+ LLVMSymbolizer *symbolizer = get_symbolizer();
+ object::SectionedAddress sectioned_addr = {
+ addr,
+ object::SectionedAddress::UndefSection
+ };
+ Expected<DIGlobal> res_or_err =
+ symbolizer->symbolizeData(dso_name, sectioned_addr);
+ if (!res_or_err) {
+ return NULL;
+ }
+ return make_symbol_relative_string(
+ dso, res_or_err->Name.c_str(),
+ addr, res_or_err->Start);
+}
diff --git a/tools/perf/util/llvm-c-helpers.h b/tools/perf/util/llvm-c-helpers.h
new file mode 100644
index 000000000000..d2b99637a28a
--- /dev/null
+++ b/tools/perf/util/llvm-c-helpers.h
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_LLVM_C_HELPERS
+#define __PERF_LLVM_C_HELPERS 1
+
+/*
+ * Helpers to call into LLVM C++ code from C, for the parts that do not have
+ * C APIs.
+ */
+
+#include <linux/compiler.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct dso;
+
+struct llvm_a2l_frame {
+ char* filename;
+ char* funcname;
+ unsigned int line;
+};
+
+/*
+ * Implement addr2line() using libLLVM. LLVM is a C++ API, and
+ * many of the linux/ headers cannot be included in a C++ compile unit,
+ * so we need to make a little bridge code here. llvm_addr2line() will
+ * convert the inline frame information from LLVM's internal structures
+ * and put them into a flat array given in inline_frames. The caller
+ * is then responsible for taking that array and convert it into perf's
+ * regular inline frame structures (which depend on e.g. struct list_head).
+ *
+ * If the address could not be resolved, or an error occurred (e.g. OOM),
+ * returns 0. Otherwise, returns the number of inline frames (which means 1
+ * if the address was not part of an inlined function). If unwind_inlines
+ * is set and the return code is nonzero, inline_frames will be set to
+ * a newly allocated array with that length. The caller is then responsible
+ * for freeing both the strings and the array itself.
+ */
+int llvm_addr2line(const char* dso_name,
+ u64 addr,
+ char** file,
+ unsigned int* line,
+ bool unwind_inlines,
+ struct llvm_a2l_frame** inline_frames);
+
+/*
+ * Simple symbolizers for addresses; will convert something like
+ * 0x12345 to "func+0x123". Will return NULL if no symbol was found.
+ *
+ * The returned value must be freed by the caller, with free().
+ */
+char *llvm_name_for_code(struct dso *dso, const char *dso_name, u64 addr);
+char *llvm_name_for_data(struct dso *dso, const char *dso_name, u64 addr);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __PERF_LLVM_C_HELPERS */
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 8477edefc299..27d5345d2b30 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -134,6 +134,8 @@ struct machine *machine__new_host(void)
if (machine__create_kernel_maps(machine) < 0)
goto out_delete;
+
+ machine->env = &perf_env;
}
return machine;
@@ -642,8 +644,9 @@ int machine__process_lost_event(struct machine *machine __maybe_unused,
int machine__process_lost_samples_event(struct machine *machine __maybe_unused,
union perf_event *event, struct perf_sample *sample)
{
- dump_printf(": id:%" PRIu64 ": lost samples :%" PRI_lu64 "\n",
- sample->id, event->lost_samples.lost);
+ dump_printf(": id:%" PRIu64 ": lost samples :%" PRI_lu64 "%s\n",
+ sample->id, event->lost_samples.lost,
+ event->header.misc & PERF_RECORD_MISC_LOST_SAMPLES_BPF ? " (BPF)" : "");
return 0;
}
@@ -1342,7 +1345,7 @@ static int maps__set_module_path(struct maps *maps, const char *path, struct kmo
* we need to update the symtab_type if needed.
*/
if (m->comp && is_kmod_dso(dso)) {
- dso__set_symtab_type(dso, dso__symtab_type(dso));
+ dso__set_symtab_type(dso, dso__symtab_type(dso)+1);
dso__set_comp(dso, m->comp);
}
map__put(map);
@@ -2059,7 +2062,8 @@ static int add_callchain_ip(struct thread *thread,
bool branch,
struct branch_flags *flags,
struct iterations *iter,
- u64 branch_from)
+ u64 branch_from,
+ bool symbols)
{
struct map_symbol ms = {};
struct addr_location al;
@@ -2098,7 +2102,8 @@ static int add_callchain_ip(struct thread *thread,
}
goto out;
}
- thread__find_symbol(thread, *cpumode, ip, &al);
+ if (symbols)
+ thread__find_symbol(thread, *cpumode, ip, &al);
}
if (al.sym != NULL) {
@@ -2141,6 +2146,7 @@ struct branch_info *sample__resolve_bstack(struct perf_sample *sample,
unsigned int i;
const struct branch_stack *bs = sample->branch_stack;
struct branch_entry *entries = perf_sample__branch_entries(sample);
+ u64 *branch_stack_cntr = sample->branch_stack_cntr;
struct branch_info *bi = calloc(bs->nr, sizeof(struct branch_info));
if (!bi)
@@ -2150,6 +2156,8 @@ struct branch_info *sample__resolve_bstack(struct perf_sample *sample,
ip__resolve_ams(al->thread, &bi[i].to, entries[i].to);
ip__resolve_ams(al->thread, &bi[i].from, entries[i].from);
bi[i].flags = entries[i].flags;
+ if (branch_stack_cntr)
+ bi[i].branch_stack_cntr = branch_stack_cntr[i];
}
return bi;
}
@@ -2224,7 +2232,8 @@ static int lbr_callchain_add_kernel_ip(struct thread *thread,
struct symbol **parent,
struct addr_location *root_al,
u64 branch_from,
- bool callee, int end)
+ bool callee, int end,
+ bool symbols)
{
struct ip_callchain *chain = sample->callchain;
u8 cpumode = PERF_RECORD_MISC_USER;
@@ -2234,7 +2243,8 @@ static int lbr_callchain_add_kernel_ip(struct thread *thread,
for (i = 0; i < end + 1; i++) {
err = add_callchain_ip(thread, cursor, parent,
root_al, &cpumode, chain->ips[i],
- false, NULL, NULL, branch_from);
+ false, NULL, NULL, branch_from,
+ symbols);
if (err)
return err;
}
@@ -2244,7 +2254,8 @@ static int lbr_callchain_add_kernel_ip(struct thread *thread,
for (i = end; i >= 0; i--) {
err = add_callchain_ip(thread, cursor, parent,
root_al, &cpumode, chain->ips[i],
- false, NULL, NULL, branch_from);
+ false, NULL, NULL, branch_from,
+ symbols);
if (err)
return err;
}
@@ -2270,8 +2281,12 @@ static void save_lbr_cursor_node(struct thread *thread,
cursor->curr = cursor->first;
else
cursor->curr = cursor->curr->next;
+
+ map_symbol__exit(&lbr_stitch->prev_lbr_cursor[idx].ms);
memcpy(&lbr_stitch->prev_lbr_cursor[idx], cursor->curr,
sizeof(struct callchain_cursor_node));
+ lbr_stitch->prev_lbr_cursor[idx].ms.maps = maps__get(cursor->curr->ms.maps);
+ lbr_stitch->prev_lbr_cursor[idx].ms.map = map__get(cursor->curr->ms.map);
lbr_stitch->prev_lbr_cursor[idx].valid = true;
cursor->pos++;
@@ -2283,7 +2298,8 @@ static int lbr_callchain_add_lbr_ip(struct thread *thread,
struct symbol **parent,
struct addr_location *root_al,
u64 *branch_from,
- bool callee)
+ bool callee,
+ bool symbols)
{
struct branch_stack *lbr_stack = sample->branch_stack;
struct branch_entry *entries = perf_sample__branch_entries(sample);
@@ -2316,7 +2332,7 @@ static int lbr_callchain_add_lbr_ip(struct thread *thread,
err = add_callchain_ip(thread, cursor, parent,
root_al, &cpumode, ip,
true, flags, NULL,
- *branch_from);
+ *branch_from, symbols);
if (err)
return err;
@@ -2341,7 +2357,7 @@ static int lbr_callchain_add_lbr_ip(struct thread *thread,
err = add_callchain_ip(thread, cursor, parent,
root_al, &cpumode, ip,
true, flags, NULL,
- *branch_from);
+ *branch_from, symbols);
if (err)
return err;
save_lbr_cursor_node(thread, cursor, i);
@@ -2356,7 +2372,7 @@ static int lbr_callchain_add_lbr_ip(struct thread *thread,
err = add_callchain_ip(thread, cursor, parent,
root_al, &cpumode, ip,
true, flags, NULL,
- *branch_from);
+ *branch_from, symbols);
if (err)
return err;
save_lbr_cursor_node(thread, cursor, i);
@@ -2370,7 +2386,7 @@ static int lbr_callchain_add_lbr_ip(struct thread *thread,
err = add_callchain_ip(thread, cursor, parent,
root_al, &cpumode, ip,
true, flags, NULL,
- *branch_from);
+ *branch_from, symbols);
if (err)
return err;
}
@@ -2482,6 +2498,9 @@ static bool has_stitched_lbr(struct thread *thread,
memcpy(&stitch_node->cursor, &lbr_stitch->prev_lbr_cursor[i],
sizeof(struct callchain_cursor_node));
+ stitch_node->cursor.ms.maps = maps__get(lbr_stitch->prev_lbr_cursor[i].ms.maps);
+ stitch_node->cursor.ms.map = map__get(lbr_stitch->prev_lbr_cursor[i].ms.map);
+
if (callee)
list_add(&stitch_node->node, &lbr_stitch->lists);
else
@@ -2505,6 +2524,8 @@ static bool alloc_lbr_stitch(struct thread *thread, unsigned int max_lbr)
if (!thread__lbr_stitch(thread)->prev_lbr_cursor)
goto free_lbr_stitch;
+ thread__lbr_stitch(thread)->prev_lbr_cursor_size = max_lbr + 1;
+
INIT_LIST_HEAD(&thread__lbr_stitch(thread)->lists);
INIT_LIST_HEAD(&thread__lbr_stitch(thread)->free_lists);
@@ -2532,7 +2553,8 @@ static int resolve_lbr_callchain_sample(struct thread *thread,
struct symbol **parent,
struct addr_location *root_al,
int max_stack,
- unsigned int max_lbr)
+ unsigned int max_lbr,
+ bool symbols)
{
bool callee = (callchain_param.order == ORDER_CALLEE);
struct ip_callchain *chain = sample->callchain;
@@ -2560,8 +2582,12 @@ static int resolve_lbr_callchain_sample(struct thread *thread,
max_lbr, callee);
if (!stitched_lbr && !list_empty(&lbr_stitch->lists)) {
- list_replace_init(&lbr_stitch->lists,
- &lbr_stitch->free_lists);
+ struct stitch_list *stitch_node;
+
+ list_for_each_entry(stitch_node, &lbr_stitch->lists, node)
+ map_symbol__exit(&stitch_node->cursor.ms);
+
+ list_splice_init(&lbr_stitch->lists, &lbr_stitch->free_lists);
}
memcpy(&lbr_stitch->prev_sample, sample, sizeof(*sample));
}
@@ -2570,12 +2596,12 @@ static int resolve_lbr_callchain_sample(struct thread *thread,
/* Add kernel ip */
err = lbr_callchain_add_kernel_ip(thread, cursor, sample,
parent, root_al, branch_from,
- true, i);
+ true, i, symbols);
if (err)
goto error;
err = lbr_callchain_add_lbr_ip(thread, cursor, sample, parent,
- root_al, &branch_from, true);
+ root_al, &branch_from, true, symbols);
if (err)
goto error;
@@ -2592,14 +2618,14 @@ static int resolve_lbr_callchain_sample(struct thread *thread,
goto error;
}
err = lbr_callchain_add_lbr_ip(thread, cursor, sample, parent,
- root_al, &branch_from, false);
+ root_al, &branch_from, false, symbols);
if (err)
goto error;
/* Add kernel ip */
err = lbr_callchain_add_kernel_ip(thread, cursor, sample,
parent, root_al, branch_from,
- false, i);
+ false, i, symbols);
if (err)
goto error;
}
@@ -2613,7 +2639,7 @@ static int find_prev_cpumode(struct ip_callchain *chain, struct thread *thread,
struct callchain_cursor *cursor,
struct symbol **parent,
struct addr_location *root_al,
- u8 *cpumode, int ent)
+ u8 *cpumode, int ent, bool symbols)
{
int err = 0;
@@ -2623,7 +2649,7 @@ static int find_prev_cpumode(struct ip_callchain *chain, struct thread *thread,
if (ip >= PERF_CONTEXT_MAX) {
err = add_callchain_ip(thread, cursor, parent,
root_al, cpumode, ip,
- false, NULL, NULL, 0);
+ false, NULL, NULL, 0, symbols);
break;
}
}
@@ -2645,7 +2671,8 @@ static int thread__resolve_callchain_sample(struct thread *thread,
struct perf_sample *sample,
struct symbol **parent,
struct addr_location *root_al,
- int max_stack)
+ int max_stack,
+ bool symbols)
{
struct branch_stack *branch = sample->branch_stack;
struct branch_entry *entries = perf_sample__branch_entries(sample);
@@ -2665,7 +2692,8 @@ static int thread__resolve_callchain_sample(struct thread *thread,
err = resolve_lbr_callchain_sample(thread, cursor, sample, parent,
root_al, max_stack,
- !env ? 0 : env->max_branches);
+ !env ? 0 : env->max_branches,
+ symbols);
if (err)
return (err < 0) ? err : 0;
}
@@ -2730,13 +2758,14 @@ static int thread__resolve_callchain_sample(struct thread *thread,
root_al,
NULL, be[i].to,
true, &be[i].flags,
- NULL, be[i].from);
+ NULL, be[i].from, symbols);
- if (!err)
+ if (!err) {
err = add_callchain_ip(thread, cursor, parent, root_al,
NULL, be[i].from,
true, &be[i].flags,
- &iter[i], 0);
+ &iter[i], 0, symbols);
+ }
if (err == -EINVAL)
break;
if (err)
@@ -2752,7 +2781,7 @@ static int thread__resolve_callchain_sample(struct thread *thread,
check_calls:
if (chain && callchain_param.order != ORDER_CALLEE) {
err = find_prev_cpumode(chain, thread, cursor, parent, root_al,
- &cpumode, chain->nr - first_call);
+ &cpumode, chain->nr - first_call, symbols);
if (err)
return (err < 0) ? err : 0;
}
@@ -2774,7 +2803,7 @@ check_calls:
++nr_entries;
else if (callchain_param.order != ORDER_CALLEE) {
err = find_prev_cpumode(chain, thread, cursor, parent,
- root_al, &cpumode, j);
+ root_al, &cpumode, j, symbols);
if (err)
return (err < 0) ? err : 0;
continue;
@@ -2801,8 +2830,8 @@ check_calls:
if (leaf_frame_caller && leaf_frame_caller != ip) {
err = add_callchain_ip(thread, cursor, parent,
- root_al, &cpumode, leaf_frame_caller,
- false, NULL, NULL, 0);
+ root_al, &cpumode, leaf_frame_caller,
+ false, NULL, NULL, 0, symbols);
if (err)
return (err < 0) ? err : 0;
}
@@ -2810,7 +2839,7 @@ check_calls:
err = add_callchain_ip(thread, cursor, parent,
root_al, &cpumode, ip,
- false, NULL, NULL, 0);
+ false, NULL, NULL, 0, symbols);
if (err)
return (err < 0) ? err : 0;
@@ -2890,7 +2919,7 @@ static int thread__resolve_callchain_unwind(struct thread *thread,
struct callchain_cursor *cursor,
struct evsel *evsel,
struct perf_sample *sample,
- int max_stack)
+ int max_stack, bool symbols)
{
/* Can we do dwarf post unwind? */
if (!((evsel->core.attr.sample_type & PERF_SAMPLE_REGS_USER) &&
@@ -2902,17 +2931,21 @@ static int thread__resolve_callchain_unwind(struct thread *thread,
(!sample->user_stack.size))
return 0;
+ if (!symbols)
+ pr_debug("Not resolving symbols with an unwinder isn't currently supported\n");
+
return unwind__get_entries(unwind_entry, cursor,
thread, sample, max_stack, false);
}
-int thread__resolve_callchain(struct thread *thread,
- struct callchain_cursor *cursor,
- struct evsel *evsel,
- struct perf_sample *sample,
- struct symbol **parent,
- struct addr_location *root_al,
- int max_stack)
+int __thread__resolve_callchain(struct thread *thread,
+ struct callchain_cursor *cursor,
+ struct evsel *evsel,
+ struct perf_sample *sample,
+ struct symbol **parent,
+ struct addr_location *root_al,
+ int max_stack,
+ bool symbols)
{
int ret = 0;
@@ -2925,22 +2958,22 @@ int thread__resolve_callchain(struct thread *thread,
ret = thread__resolve_callchain_sample(thread, cursor,
evsel, sample,
parent, root_al,
- max_stack);
+ max_stack, symbols);
if (ret)
return ret;
ret = thread__resolve_callchain_unwind(thread, cursor,
evsel, sample,
- max_stack);
+ max_stack, symbols);
} else {
ret = thread__resolve_callchain_unwind(thread, cursor,
evsel, sample,
- max_stack);
+ max_stack, symbols);
if (ret)
return ret;
ret = thread__resolve_callchain_sample(thread, cursor,
evsel, sample,
parent, root_al,
- max_stack);
+ max_stack, symbols);
}
return ret;
@@ -3112,7 +3145,8 @@ out:
return addr_cpumode;
}
-struct dso *machine__findnew_dso_id(struct machine *machine, const char *filename, struct dso_id *id)
+struct dso *machine__findnew_dso_id(struct machine *machine, const char *filename,
+ const struct dso_id *id)
{
return dsos__findnew_id(&machine->dsos, filename, id);
}
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index 82a47bac8023..2e5a4cb342d8 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -178,13 +178,32 @@ struct mem_info *sample__resolve_mem(struct perf_sample *sample,
struct callchain_cursor;
-int thread__resolve_callchain(struct thread *thread,
- struct callchain_cursor *cursor,
- struct evsel *evsel,
- struct perf_sample *sample,
- struct symbol **parent,
- struct addr_location *root_al,
- int max_stack);
+int __thread__resolve_callchain(struct thread *thread,
+ struct callchain_cursor *cursor,
+ struct evsel *evsel,
+ struct perf_sample *sample,
+ struct symbol **parent,
+ struct addr_location *root_al,
+ int max_stack,
+ bool symbols);
+
+static inline int thread__resolve_callchain(struct thread *thread,
+ struct callchain_cursor *cursor,
+ struct evsel *evsel,
+ struct perf_sample *sample,
+ struct symbol **parent,
+ struct addr_location *root_al,
+ int max_stack)
+{
+ return __thread__resolve_callchain(thread,
+ cursor,
+ evsel,
+ sample,
+ parent,
+ root_al,
+ max_stack,
+ /*symbols=*/true);
+}
/*
* Default guest kernel is defined by parameter --guestkallsyms
@@ -207,7 +226,8 @@ int machine__nr_cpus_avail(struct machine *machine);
struct thread *machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid);
-struct dso *machine__findnew_dso_id(struct machine *machine, const char *filename, struct dso_id *id);
+struct dso *machine__findnew_dso_id(struct machine *machine, const char *filename,
+ const struct dso_id *id);
struct dso *machine__findnew_dso(struct machine *machine, const char *filename);
size_t machine__fprintf(struct machine *machine, FILE *fp);
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index e1d14936a60d..d729438b7d65 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -102,16 +102,21 @@ static inline bool replace_android_lib(const char *filename, char *newfilename)
return false;
}
-void map__init(struct map *map, u64 start, u64 end, u64 pgoff, struct dso *dso)
+static void map__init(struct map *map, u64 start, u64 end, u64 pgoff,
+ struct dso *dso, u32 prot, u32 flags)
{
map__set_start(map, start);
map__set_end(map, end);
map__set_pgoff(map, pgoff);
- map__set_reloc(map, 0);
+ assert(map__reloc(map) == 0);
map__set_dso(map, dso__get(dso));
- map__set_mapping_type(map, MAPPING_TYPE__DSO);
- map__set_erange_warned(map, false);
refcount_set(map__refcnt(map), 1);
+ RC_CHK_ACCESS(map)->prot = prot;
+ RC_CHK_ACCESS(map)->flags = flags;
+ map__set_mapping_type(map, MAPPING_TYPE__DSO);
+ assert(map__erange_warned(map) == false);
+ assert(map__priv(map) == false);
+ assert(map__hit(map) == false);
}
struct map *map__new(struct machine *machine, u64 start, u64 len,
@@ -124,7 +129,7 @@ struct map *map__new(struct machine *machine, u64 start, u64 len,
struct nsinfo *nsi = NULL;
struct nsinfo *nnsi;
- map = malloc(sizeof(*map));
+ map = zalloc(sizeof(*map));
if (ADD_RC_CHK(result, map)) {
char newfilename[PATH_MAX];
struct dso *dso, *header_bid_dso;
@@ -134,8 +139,6 @@ struct map *map__new(struct machine *machine, u64 start, u64 len,
anon = is_anon_memory(filename) || flags & MAP_HUGETLB;
vdso = is_vdso_map(filename);
no_dso = is_no_dso_memory(filename);
- map->prot = prot;
- map->flags = flags;
nsi = nsinfo__get(thread__nsinfo(thread));
if ((anon || no_dso) && nsi && (prot & PROT_EXEC)) {
@@ -169,7 +172,7 @@ struct map *map__new(struct machine *machine, u64 start, u64 len,
goto out_delete;
assert(!dso__kernel(dso));
- map__init(result, start, start + len, pgoff, dso);
+ map__init(result, start, start + len, pgoff, dso, prot, flags);
if (anon || no_dso) {
map->mapping_type = MAPPING_TYPE__IDENTITY;
@@ -223,10 +226,8 @@ struct map *map__new2(u64 start, struct dso *dso)
map = calloc(1, sizeof(*map) + (dso__kernel(dso) ? sizeof(struct kmap) : 0));
if (ADD_RC_CHK(result, map)) {
- /*
- * ->end will be filled after we load all the symbols
- */
- map__init(result, start, 0, 0, dso);
+ /* ->end will be filled after we load all the symbols. */
+ map__init(result, start, /*end=*/0, /*pgoff=*/0, dso, /*prot=*/0, /*flags=*/0);
}
return result;
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
index 65e2609fa1b1..4262f5a143be 100644
--- a/tools/perf/util/map.h
+++ b/tools/perf/util/map.h
@@ -35,6 +35,7 @@ DECLARE_RC_STRUCT(map) {
enum mapping_type mapping_type:8;
bool erange_warned;
bool priv;
+ bool hit;
};
struct kmap;
@@ -83,6 +84,11 @@ static inline bool map__priv(const struct map *map)
return RC_CHK_ACCESS(map)->priv;
}
+static inline bool map__hit(const struct map *map)
+{
+ return RC_CHK_ACCESS(map)->hit;
+}
+
static inline refcount_t *map__refcnt(struct map *map)
{
return &RC_CHK_ACCESS(map)->refcnt;
@@ -166,9 +172,6 @@ struct thread;
#define map__for_each_symbol_by_name(map, sym_name, pos, idx) \
__map__for_each_symbol_by_name(map, sym_name, (pos), idx)
-void map__init(struct map *map,
- u64 start, u64 end, u64 pgoff, struct dso *dso);
-
struct dso_id;
struct build_id;
@@ -285,14 +288,19 @@ static inline void map__set_reloc(struct map *map, u64 reloc)
RC_CHK_ACCESS(map)->reloc = reloc;
}
-static inline void map__set_priv(struct map *map, int priv)
+static inline void map__set_priv(struct map *map)
+{
+ RC_CHK_ACCESS(map)->priv = true;
+}
+
+static inline void map__set_hit(struct map *map)
{
- RC_CHK_ACCESS(map)->priv = priv;
+ RC_CHK_ACCESS(map)->hit = true;
}
-static inline void map__set_erange_warned(struct map *map, bool erange_warned)
+static inline void map__set_erange_warned(struct map *map)
{
- RC_CHK_ACCESS(map)->erange_warned = erange_warned;
+ RC_CHK_ACCESS(map)->erange_warned = true;
}
static inline void map__set_dso(struct map *map, struct dso *dso)
diff --git a/tools/perf/util/map_symbol.c b/tools/perf/util/map_symbol.c
index bef5079f2403..6ad2960bc289 100644
--- a/tools/perf/util/map_symbol.c
+++ b/tools/perf/util/map_symbol.c
@@ -13,3 +13,21 @@ void addr_map_symbol__exit(struct addr_map_symbol *ams)
{
map_symbol__exit(&ams->ms);
}
+
+void map_symbol__copy(struct map_symbol *dst, struct map_symbol *src)
+{
+ dst->maps = maps__get(src->maps);
+ dst->map = map__get(src->map);
+ dst->sym = src->sym;
+}
+
+void addr_map_symbol__copy(struct addr_map_symbol *dst, struct addr_map_symbol *src)
+{
+ map_symbol__copy(&dst->ms, &src->ms);
+
+ dst->addr = src->addr;
+ dst->al_addr = src->al_addr;
+ dst->al_level = src->al_level;
+ dst->phys_addr = src->phys_addr;
+ dst->data_page_size = src->data_page_size;
+}
diff --git a/tools/perf/util/map_symbol.h b/tools/perf/util/map_symbol.h
index 72d5ed938ed6..e370bb32ed47 100644
--- a/tools/perf/util/map_symbol.h
+++ b/tools/perf/util/map_symbol.h
@@ -26,4 +26,7 @@ struct addr_map_symbol {
void map_symbol__exit(struct map_symbol *ms);
void addr_map_symbol__exit(struct addr_map_symbol *ams);
+void map_symbol__copy(struct map_symbol *dst, struct map_symbol *src);
+void addr_map_symbol__copy(struct addr_map_symbol *dst, struct addr_map_symbol *src);
+
#endif // __PERF_MAP_SYMBOL
diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c
index be048bd02f36..bf5090f5220b 100644
--- a/tools/perf/util/mem-events.c
+++ b/tools/perf/util/mem-events.c
@@ -29,6 +29,8 @@ struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = {
};
#undef E
+bool perf_mem_record[PERF_MEM_EVENTS__MAX] = { 0 };
+
static char mem_loads_name[100];
static char mem_stores_name[100];
@@ -163,7 +165,7 @@ int perf_pmu__mem_events_parse(struct perf_pmu *pmu, const char *str)
continue;
if (strstr(e->tag, tok))
- e->record = found = true;
+ perf_mem_record[j] = found = true;
}
tok = strtok_r(NULL, ",", &saveptr);
@@ -192,7 +194,7 @@ static bool perf_pmu__mem_events_supported(const char *mnt, struct perf_pmu *pmu
return !stat(path, &st);
}
-int perf_pmu__mem_events_init(struct perf_pmu *pmu)
+static int __perf_pmu__mem_events_init(struct perf_pmu *pmu)
{
const char *mnt = sysfs__mount();
bool found = false;
@@ -219,6 +221,18 @@ int perf_pmu__mem_events_init(struct perf_pmu *pmu)
return found ? 0 : -ENOENT;
}
+int perf_pmu__mem_events_init(void)
+{
+ struct perf_pmu *pmu = NULL;
+
+ while ((pmu = perf_pmus__scan_mem(pmu)) != NULL) {
+ if (__perf_pmu__mem_events_init(pmu))
+ return -ENOENT;
+ }
+
+ return 0;
+}
+
void perf_pmu__mem_events_list(struct perf_pmu *pmu)
{
int j;
@@ -249,7 +263,7 @@ int perf_mem_events__record_args(const char **rec_argv, int *argv_nr)
for (int j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
e = perf_pmu__mem_events_ptr(pmu, j);
- if (!e->record)
+ if (!perf_mem_record[j])
continue;
if (!e->supported) {
@@ -352,6 +366,12 @@ static const char * const mem_lvl[] = {
};
static const char * const mem_lvlnum[] = {
+ [PERF_MEM_LVLNUM_L1] = "L1",
+ [PERF_MEM_LVLNUM_L2] = "L2",
+ [PERF_MEM_LVLNUM_L3] = "L3",
+ [PERF_MEM_LVLNUM_L4] = "L4",
+ [PERF_MEM_LVLNUM_L2_MHB] = "L2 MHB",
+ [PERF_MEM_LVLNUM_MSC] = "Memory-side Cache",
[PERF_MEM_LVLNUM_UNC] = "Uncached",
[PERF_MEM_LVLNUM_CXL] = "CXL",
[PERF_MEM_LVLNUM_IO] = "I/O",
@@ -434,7 +454,7 @@ int perf_mem__lvl_scnprintf(char *out, size_t sz, const struct mem_info *mem_inf
if (mem_lvlnum[lvl])
l += scnprintf(out + l, sz - l, mem_lvlnum[lvl]);
else
- l += scnprintf(out + l, sz - l, "L%d", lvl);
+ l += scnprintf(out + l, sz - l, "Unknown level %d", lvl);
l += scnprintf(out + l, sz - l, " %s", hit_miss);
return l;
diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h
index ca31014d7934..8dc27db9fd52 100644
--- a/tools/perf/util/mem-events.h
+++ b/tools/perf/util/mem-events.h
@@ -6,7 +6,6 @@
#include <linux/types.h>
struct perf_mem_event {
- bool record;
bool supported;
bool ldlat;
u32 aux_event;
@@ -28,9 +27,10 @@ struct perf_pmu;
extern unsigned int perf_mem_events__loads_ldlat;
extern struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX];
+extern bool perf_mem_record[PERF_MEM_EVENTS__MAX];
int perf_pmu__mem_events_parse(struct perf_pmu *pmu, const char *str);
-int perf_pmu__mem_events_init(struct perf_pmu *pmu);
+int perf_pmu__mem_events_init(void);
struct perf_mem_event *perf_pmu__mem_events_ptr(struct perf_pmu *pmu, int i);
struct perf_pmu *perf_mem_events_find_pmu(void);
diff --git a/tools/perf/util/mem-info.c b/tools/perf/util/mem-info.c
index 27d67721a695..d3efa9c139f2 100644
--- a/tools/perf/util/mem-info.c
+++ b/tools/perf/util/mem-info.c
@@ -33,3 +33,16 @@ struct mem_info *mem_info__new(void)
return result;
}
+
+struct mem_info *mem_info__clone(struct mem_info *mi)
+{
+ struct mem_info *result = mem_info__new();
+
+ if (result) {
+ addr_map_symbol__copy(mem_info__iaddr(result), mem_info__iaddr(mi));
+ addr_map_symbol__copy(mem_info__daddr(result), mem_info__daddr(mi));
+ mem_info__data_src(result)->val = mem_info__data_src(mi)->val;
+ }
+
+ return result;
+}
diff --git a/tools/perf/util/mem-info.h b/tools/perf/util/mem-info.h
index 0f68e29f311b..df75e94ed3d0 100644
--- a/tools/perf/util/mem-info.h
+++ b/tools/perf/util/mem-info.h
@@ -15,6 +15,7 @@ DECLARE_RC_STRUCT(mem_info) {
};
struct mem_info *mem_info__new(void);
+struct mem_info *mem_info__clone(struct mem_info *mi);
struct mem_info *mem_info__get(struct mem_info *mi);
void mem_info__put(struct mem_info *mi);
diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
index 69f6a46402c3..46920ebadfd1 100644
--- a/tools/perf/util/metricgroup.c
+++ b/tools/perf/util/metricgroup.c
@@ -14,6 +14,7 @@
#include "pmus.h"
#include "print-events.h"
#include "smt.h"
+#include "tool_pmu.h"
#include "expr.h"
#include "rblist.h"
#include <string.h>
@@ -297,8 +298,8 @@ static int setup_metric_events(const char *pmu, struct hashmap *ids,
struct expr_id_data *val_ptr;
/* Don't match events for the wrong hybrid PMU. */
- if (!all_pmus && ev->pmu_name && evsel__is_hybrid(ev) &&
- strcmp(ev->pmu_name, pmu))
+ if (!all_pmus && ev->pmu && evsel__is_hybrid(ev) &&
+ strcmp(ev->pmu->name, pmu))
continue;
/*
* Check for duplicate events with the same name. For
@@ -673,20 +674,20 @@ static int metricgroup__build_event_string(struct strbuf *events,
struct hashmap_entry *cur;
size_t bkt;
bool no_group = true, has_tool_events = false;
- bool tool_events[PERF_TOOL_MAX] = {false};
+ bool tool_events[TOOL_PMU__EVENT_MAX] = {false};
int ret = 0;
#define RETURN_IF_NON_ZERO(x) do { if (x) return x; } while (0)
hashmap__for_each_entry(ctx->ids, cur, bkt) {
const char *sep, *rsep, *id = cur->pkey;
- enum perf_tool_event ev;
+ enum tool_pmu_event ev;
pr_debug("found event %s\n", id);
/* Always move tool events outside of the group. */
- ev = perf_tool_event__from_str(id);
- if (ev != PERF_TOOL_NONE) {
+ ev = tool_pmu__str_to_event(id);
+ if (ev != TOOL_PMU__EVENT_NONE) {
has_tool_events = true;
tool_events[ev] = true;
continue;
@@ -754,14 +755,14 @@ static int metricgroup__build_event_string(struct strbuf *events,
if (has_tool_events) {
int i;
- perf_tool_event__for_each_event(i) {
+ tool_pmu__for_each_event(i) {
if (tool_events[i]) {
if (!no_group) {
ret = strbuf_addch(events, ',');
RETURN_IF_NON_ZERO(ret);
}
no_group = false;
- ret = strbuf_addstr(events, perf_tool_event__to_str(i));
+ ret = strbuf_addstr(events, tool_pmu__event_to_str(i));
RETURN_IF_NON_ZERO(ret);
}
}
@@ -1147,14 +1148,14 @@ static int metric_list_cmp(void *priv __maybe_unused, const struct list_head *l,
int i, left_count, right_count;
left_count = hashmap__size(left->pctx->ids);
- perf_tool_event__for_each_event(i) {
- if (!expr__get_id(left->pctx, perf_tool_event__to_str(i), &data))
+ tool_pmu__for_each_event(i) {
+ if (!expr__get_id(left->pctx, tool_pmu__event_to_str(i), &data))
left_count--;
}
right_count = hashmap__size(right->pctx->ids);
- perf_tool_event__for_each_event(i) {
- if (!expr__get_id(right->pctx, perf_tool_event__to_str(i), &data))
+ tool_pmu__for_each_event(i) {
+ if (!expr__get_id(right->pctx, tool_pmu__event_to_str(i), &data))
right_count--;
}
@@ -1374,18 +1375,18 @@ static void metricgroup__free_metrics(struct list_head *metric_list)
* to true if tool event is found.
*/
static void find_tool_events(const struct list_head *metric_list,
- bool tool_events[PERF_TOOL_MAX])
+ bool tool_events[TOOL_PMU__EVENT_MAX])
{
struct metric *m;
list_for_each_entry(m, metric_list, nd) {
int i;
- perf_tool_event__for_each_event(i) {
+ tool_pmu__for_each_event(i) {
struct expr_id_data *data;
if (!tool_events[i] &&
- !expr__get_id(m->pctx, perf_tool_event__to_str(i), &data))
+ !expr__get_id(m->pctx, tool_pmu__event_to_str(i), &data))
tool_events[i] = true;
}
}
@@ -1436,7 +1437,7 @@ err_out:
* parse_ids - Build the event string for the ids and parse them creating an
* evlist. The encoded metric_ids are decoded.
* @metric_no_merge: is metric sharing explicitly disabled.
- * @fake_pmu: used when testing metrics not supported by the current CPU.
+ * @fake_pmu: use a fake PMU when testing metrics not supported by the current CPU.
* @ids: the event identifiers parsed from a metric.
* @modifier: any modifiers added to the events.
* @group_events: should events be placed in a weak group.
@@ -1444,9 +1445,9 @@ err_out:
* the overall list of metrics.
* @out_evlist: the created list of events.
*/
-static int parse_ids(bool metric_no_merge, struct perf_pmu *fake_pmu,
+static int parse_ids(bool metric_no_merge, bool fake_pmu,
struct expr_parse_ctx *ids, const char *modifier,
- bool group_events, const bool tool_events[PERF_TOOL_MAX],
+ bool group_events, const bool tool_events[TOOL_PMU__EVENT_MAX],
struct evlist **out_evlist)
{
struct parse_events_error parse_error;
@@ -1471,9 +1472,9 @@ static int parse_ids(bool metric_no_merge, struct perf_pmu *fake_pmu,
* event1 if #smt_on else 0
* Add a tool event to avoid a parse error on an empty string.
*/
- perf_tool_event__for_each_event(i) {
+ tool_pmu__for_each_event(i) {
if (tool_events[i]) {
- char *tmp = strdup(perf_tool_event__to_str(i));
+ char *tmp = strdup(tool_pmu__event_to_str(i));
if (!tmp)
return -ENOMEM;
@@ -1528,14 +1529,14 @@ static int parse_groups(struct evlist *perf_evlist,
bool metric_no_threshold,
const char *user_requested_cpu_list,
bool system_wide,
- struct perf_pmu *fake_pmu,
+ bool fake_pmu,
struct rblist *metric_events_list,
const struct pmu_metrics_table *table)
{
struct evlist *combined_evlist = NULL;
LIST_HEAD(metric_list);
struct metric *m;
- bool tool_events[PERF_TOOL_MAX] = {false};
+ bool tool_events[TOOL_PMU__EVENT_MAX] = {false};
bool is_default = !strcmp(str, "Default");
int ret;
@@ -1703,7 +1704,7 @@ int metricgroup__parse_groups(struct evlist *perf_evlist,
return parse_groups(perf_evlist, pmu, str, metric_no_group, metric_no_merge,
metric_no_threshold, user_requested_cpu_list, system_wide,
- /*fake_pmu=*/NULL, metric_events, table);
+ /*fake_pmu=*/false, metric_events, table);
}
int metricgroup__parse_groups_test(struct evlist *evlist,
@@ -1717,7 +1718,7 @@ int metricgroup__parse_groups_test(struct evlist *evlist,
/*metric_no_threshold=*/false,
/*user_requested_cpu_list=*/NULL,
/*system_wide=*/false,
- &perf_pmu__fake, metric_events, table);
+ /*fake_pmu=*/true, metric_events, table);
}
struct metricgroup__has_metric_data {
diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index 122ee198a86e..43b02293f1d2 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -230,9 +230,7 @@ void mmap__munmap(struct mmap *map)
{
bitmap_free(map->affinity_mask.bits);
-#ifndef PYTHON_PERF
zstd_fini(&map->zstd_data);
-#endif
perf_mmap__aio_munmap(map);
if (map->data != NULL) {
@@ -295,12 +293,10 @@ int mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, struct perf_cpu
map->core.flush = mp->flush;
-#ifndef PYTHON_PERF
if (zstd_init(&map->zstd_data, mp->comp_level)) {
pr_debug2("failed to init mmap compressor, error %d\n", errno);
return -1;
}
-#endif
if (mp->comp_level && !perf_mmap__aio_enabled(map)) {
map->data = mmap(NULL, mmap__mmap_len(map), PROT_READ|PROT_WRITE,
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 321586fb5556..afeb8d815bbf 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -8,6 +8,7 @@
#include <sys/ioctl.h>
#include <sys/param.h>
#include "term.h"
+#include "env.h"
#include "evlist.h"
#include "evsel.h"
#include <subcmd/parse-options.h>
@@ -227,12 +228,12 @@ __add_event(struct list_head *list, int *idx,
bool init_attr,
const char *name, const char *metric_id, struct perf_pmu *pmu,
struct list_head *config_terms, bool auto_merge_stats,
- const char *cpu_list)
+ struct perf_cpu_map *cpu_list, u64 alternate_hw_config)
{
struct evsel *evsel;
- struct perf_cpu_map *cpus = pmu ? perf_cpu_map__get(pmu->cpus) :
- cpu_list ? perf_cpu_map__new(cpu_list) : NULL;
+ struct perf_cpu_map *cpus = perf_cpu_map__is_empty(cpu_list) && pmu ? pmu->cpus : cpu_list;
+ cpus = perf_cpu_map__get(cpus);
if (pmu)
perf_pmu__warn_invalid_formats(pmu);
@@ -262,7 +263,7 @@ __add_event(struct list_head *list, int *idx,
evsel->core.is_pmu_core = pmu ? pmu->is_core : false;
evsel->auto_merge_stats = auto_merge_stats;
evsel->pmu = pmu;
- evsel->pmu_name = pmu ? strdup(pmu->name) : NULL;
+ evsel->alternate_hw_config = alternate_hw_config;
if (name)
evsel->name = strdup(name);
@@ -285,46 +286,19 @@ struct evsel *parse_events__add_event(int idx, struct perf_event_attr *attr,
{
return __add_event(/*list=*/NULL, &idx, attr, /*init_attr=*/false, name,
metric_id, pmu, /*config_terms=*/NULL,
- /*auto_merge_stats=*/false, /*cpu_list=*/NULL);
+ /*auto_merge_stats=*/false, /*cpu_list=*/NULL,
+ /*alternate_hw_config=*/PERF_COUNT_HW_MAX);
}
static int add_event(struct list_head *list, int *idx,
struct perf_event_attr *attr, const char *name,
- const char *metric_id, struct list_head *config_terms)
+ const char *metric_id, struct list_head *config_terms,
+ u64 alternate_hw_config)
{
return __add_event(list, idx, attr, /*init_attr*/true, name, metric_id,
/*pmu=*/NULL, config_terms,
- /*auto_merge_stats=*/false, /*cpu_list=*/NULL) ? 0 : -ENOMEM;
-}
-
-static int add_event_tool(struct list_head *list, int *idx,
- enum perf_tool_event tool_event)
-{
- struct evsel *evsel;
- struct perf_event_attr attr = {
- .type = PERF_TYPE_SOFTWARE,
- .config = PERF_COUNT_SW_DUMMY,
- };
- const char *cpu_list = NULL;
-
- if (tool_event == PERF_TOOL_DURATION_TIME) {
- /* Duration time is gathered globally, pretend it is only on CPU0. */
- cpu_list = "0";
- }
- evsel = __add_event(list, idx, &attr, /*init_attr=*/true, /*name=*/NULL,
- /*metric_id=*/NULL, /*pmu=*/NULL,
- /*config_terms=*/NULL, /*auto_merge_stats=*/false,
- cpu_list);
- if (!evsel)
- return -ENOMEM;
- evsel->tool_event = tool_event;
- if (tool_event == PERF_TOOL_DURATION_TIME
- || tool_event == PERF_TOOL_USER_TIME
- || tool_event == PERF_TOOL_SYSTEM_TIME) {
- free((char *)evsel->unit);
- evsel->unit = strdup("ns");
- }
- return 0;
+ /*auto_merge_stats=*/false, /*cpu_list=*/NULL,
+ alternate_hw_config) ? 0 : -ENOMEM;
}
/**
@@ -448,7 +422,7 @@ bool parse_events__filter_pmu(const struct parse_events_state *parse_state,
static int parse_events_add_pmu(struct parse_events_state *parse_state,
struct list_head *list, struct perf_pmu *pmu,
const struct parse_events_terms *const_parsed_terms,
- bool auto_merge_stats);
+ bool auto_merge_stats, u64 alternate_hw_config);
int parse_events_add_cache(struct list_head *list, int *idx, const char *name,
struct parse_events_state *parse_state,
@@ -474,7 +448,8 @@ int parse_events_add_cache(struct list_head *list, int *idx, const char *name,
*/
ret = parse_events_add_pmu(parse_state, list, pmu,
parsed_terms,
- perf_pmu__auto_merge_stats(pmu));
+ perf_pmu__auto_merge_stats(pmu),
+ /*alternate_hw_config=*/PERF_COUNT_HW_MAX);
if (ret)
return ret;
continue;
@@ -505,7 +480,8 @@ int parse_events_add_cache(struct list_head *list, int *idx, const char *name,
if (__add_event(list, idx, &attr, /*init_attr*/true, config_name ?: name,
metric_id, pmu, &config_terms, /*auto_merge_stats=*/false,
- /*cpu_list=*/NULL) == NULL)
+ /*cpu_list=*/NULL,
+ /*alternate_hw_config=*/PERF_COUNT_HW_MAX) == NULL)
return -ENOMEM;
free_config_terms(&config_terms);
@@ -670,6 +646,26 @@ static int add_tracepoint_multi_sys(struct parse_events_state *parse_state,
}
#endif /* HAVE_LIBTRACEEVENT */
+size_t default_breakpoint_len(void)
+{
+#if defined(__i386__)
+ static int len;
+
+ if (len == 0) {
+ struct perf_env env = {};
+
+ perf_env__init(&env);
+ len = perf_env__kernel_is_64_bit(&env) ? sizeof(u64) : sizeof(long);
+ perf_env__exit(&env);
+ }
+ return len;
+#elif defined(__aarch64__)
+ return 4;
+#else
+ return sizeof(long);
+#endif
+}
+
static int
parse_breakpoint_type(const char *type, struct perf_event_attr *attr)
{
@@ -728,7 +724,7 @@ int parse_events_add_breakpoint(struct parse_events_state *parse_state,
/* Provide some defaults if len is not specified */
if (!len) {
if (attr.bp_type == HW_BREAKPOINT_X)
- len = sizeof(long);
+ len = default_breakpoint_len();
else
len = HW_BREAKPOINT_LEN_4;
}
@@ -750,7 +746,7 @@ int parse_events_add_breakpoint(struct parse_events_state *parse_state,
name = get_config_name(head_config);
return add_event(list, &parse_state->idx, &attr, name, /*mertic_id=*/NULL,
- &config_terms);
+ &config_terms, /*alternate_hw_config=*/PERF_COUNT_HW_MAX);
}
static int check_type_val(struct parse_events_term *term,
@@ -772,7 +768,7 @@ static int check_type_val(struct parse_events_term *term,
static bool config_term_shrinked;
-static const char *config_term_name(enum parse_events__term_type term_type)
+const char *parse_events__term_type_str(enum parse_events__term_type term_type)
{
/*
* Update according to parse-events.l
@@ -858,7 +854,7 @@ config_term_avail(enum parse_events__term_type term_type, struct parse_events_er
/* term_type is validated so indexing is safe */
if (asprintf(&err_str, "'%s' is not usable in 'perf stat'",
- config_term_name(term_type)) >= 0)
+ parse_events__term_type_str(term_type)) >= 0)
parse_events_error__handle(err, -1, err_str, NULL);
return false;
}
@@ -982,7 +978,7 @@ do { \
case PARSE_EVENTS__TERM_TYPE_HARDWARE:
default:
parse_events_error__handle(err, term->err_term,
- strdup(config_term_name(term->type_term)),
+ strdup(parse_events__term_type_str(term->type_term)),
parse_events_formats_error_string(NULL));
return -EINVAL;
}
@@ -1050,6 +1046,7 @@ static int config_term_pmu(struct perf_event_attr *attr,
if (perf_pmu__have_event(pmu, term->config)) {
term->type_term = PARSE_EVENTS__TERM_TYPE_USER;
term->no_value = true;
+ term->alternate_hw_config = true;
} else {
attr->type = PERF_TYPE_HARDWARE;
attr->config = term->val.num;
@@ -1105,8 +1102,9 @@ static int config_term_tracepoint(struct perf_event_attr *attr,
default:
if (err) {
parse_events_error__handle(err, term->err_term,
- strdup(config_term_name(term->type_term)),
- strdup("valid terms: call-graph,stack-size\n"));
+ strdup(parse_events__term_type_str(term->type_term)),
+ strdup("valid terms: call-graph,stack-size\n")
+ );
}
return -EINVAL;
}
@@ -1362,8 +1360,9 @@ static int __parse_events_add_numeric(struct parse_events_state *parse_state,
name = get_config_name(head_config);
metric_id = get_config_metric_id(head_config);
ret = __add_event(list, &parse_state->idx, &attr, /*init_attr*/true, name,
- metric_id, pmu, &config_terms, /*auto_merge_stats=*/false,
- /*cpu_list=*/NULL) ? 0 : -ENOMEM;
+ metric_id, pmu, &config_terms, /*auto_merge_stats=*/false,
+ /*cpu_list=*/NULL, /*alternate_hw_config=*/PERF_COUNT_HW_MAX
+ ) == NULL ? -ENOMEM : 0;
free_config_terms(&config_terms);
return ret;
}
@@ -1399,13 +1398,6 @@ int parse_events_add_numeric(struct parse_events_state *parse_state,
type, /*extended_type=*/0, config, head_config);
}
-int parse_events_add_tool(struct parse_events_state *parse_state,
- struct list_head *list,
- int tool_event)
-{
- return add_event_tool(list, &parse_state->idx, tool_event);
-}
-
static bool config_term_percore(struct list_head *config_terms)
{
struct evsel_config_term *term;
@@ -1421,7 +1413,7 @@ static bool config_term_percore(struct list_head *config_terms)
static int parse_events_add_pmu(struct parse_events_state *parse_state,
struct list_head *list, struct perf_pmu *pmu,
const struct parse_events_terms *const_parsed_terms,
- bool auto_merge_stats)
+ bool auto_merge_stats, u64 alternate_hw_config)
{
struct perf_event_attr attr;
struct perf_pmu_info info;
@@ -1458,7 +1450,7 @@ static int parse_events_add_pmu(struct parse_events_state *parse_state,
/*init_attr=*/true, /*name=*/NULL,
/*metric_id=*/NULL, pmu,
/*config_terms=*/NULL, auto_merge_stats,
- /*cpu_list=*/NULL);
+ /*cpu_list=*/NULL, alternate_hw_config);
return evsel ? 0 : -ENOMEM;
}
@@ -1478,8 +1470,9 @@ static int parse_events_add_pmu(struct parse_events_state *parse_state,
}
/* Look for event names in the terms and rewrite into format based terms. */
- if (!parse_state->fake_pmu && perf_pmu__check_alias(pmu, &parsed_terms,
- &info, &alias_rewrote_terms, err)) {
+ if (perf_pmu__check_alias(pmu, &parsed_terms,
+ &info, &alias_rewrote_terms,
+ &alternate_hw_config, err)) {
parse_events_terms__exit(&parsed_terms);
return -EINVAL;
}
@@ -1515,8 +1508,9 @@ static int parse_events_add_pmu(struct parse_events_state *parse_state,
return -ENOMEM;
}
- if (!parse_state->fake_pmu &&
- perf_pmu__config(pmu, &attr, &parsed_terms, parse_state->error)) {
+ /* Skip configuring hard coded terms that were applied by config_attr. */
+ if (perf_pmu__config(pmu, &attr, &parsed_terms, /*apply_hardcoded=*/false,
+ parse_state->error)) {
free_config_terms(&config_terms);
parse_events_terms__exit(&parsed_terms);
return -EINVAL;
@@ -1525,7 +1519,8 @@ static int parse_events_add_pmu(struct parse_events_state *parse_state,
evsel = __add_event(list, &parse_state->idx, &attr, /*init_attr=*/true,
get_config_name(&parsed_terms),
get_config_metric_id(&parsed_terms), pmu,
- &config_terms, auto_merge_stats, /*cpu_list=*/NULL);
+ &config_terms, auto_merge_stats, /*cpu_list=*/NULL,
+ alternate_hw_config);
if (!evsel) {
parse_events_terms__exit(&parsed_terms);
return -ENOMEM;
@@ -1536,11 +1531,6 @@ static int parse_events_add_pmu(struct parse_events_state *parse_state,
evsel->percore = config_term_percore(&evsel->config_terms);
- if (parse_state->fake_pmu) {
- parse_events_terms__exit(&parsed_terms);
- return 0;
- }
-
parse_events_terms__exit(&parsed_terms);
free((char *)evsel->unit);
evsel->unit = strdup(info.unit);
@@ -1551,7 +1541,7 @@ static int parse_events_add_pmu(struct parse_events_state *parse_state,
}
int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
- const char *event_name,
+ const char *event_name, u64 hw_config,
const struct parse_events_terms *const_parsed_terms,
struct list_head **listp, void *loc_)
{
@@ -1604,7 +1594,7 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
auto_merge_stats = perf_pmu__auto_merge_stats(pmu);
if (!parse_events_add_pmu(parse_state, list, pmu,
- &parsed_terms, auto_merge_stats)) {
+ &parsed_terms, auto_merge_stats, hw_config)) {
struct strbuf sb;
strbuf_init(&sb, /*hint=*/ 0);
@@ -1616,13 +1606,13 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
}
if (parse_state->fake_pmu) {
- if (!parse_events_add_pmu(parse_state, list, parse_state->fake_pmu, &parsed_terms,
- /*auto_merge_stats=*/true)) {
+ if (!parse_events_add_pmu(parse_state, list, perf_pmus__fake_pmu(), &parsed_terms,
+ /*auto_merge_stats=*/true, hw_config)) {
struct strbuf sb;
strbuf_init(&sb, /*hint=*/ 0);
parse_events_terms__to_strbuf(&parsed_terms, &sb);
- pr_debug("%s -> %s/%s/\n", event_name, "fake_pmu", sb.buf);
+ pr_debug("%s -> fake/%s/\n", event_name, sb.buf);
strbuf_release(&sb);
ok++;
}
@@ -1656,11 +1646,20 @@ int parse_events_multi_pmu_add_or_add_pmu(struct parse_events_state *parse_state
INIT_LIST_HEAD(*listp);
/* Attempt to add to list assuming event_or_pmu is a PMU name. */
- pmu = parse_state->fake_pmu ?: perf_pmus__find(event_or_pmu);
+ pmu = perf_pmus__find(event_or_pmu);
if (pmu && !parse_events_add_pmu(parse_state, *listp, pmu, const_parsed_terms,
- /*auto_merge_stats=*/false))
+ /*auto_merge_stats=*/false,
+ /*alternate_hw_config=*/PERF_COUNT_HW_MAX))
return 0;
+ if (parse_state->fake_pmu) {
+ if (!parse_events_add_pmu(parse_state, *listp, perf_pmus__fake_pmu(),
+ const_parsed_terms,
+ /*auto_merge_stats=*/false,
+ /*alternate_hw_config=*/PERF_COUNT_HW_MAX))
+ return 0;
+ }
+
pmu = NULL;
/* Failed to add, try wildcard expansion of event_or_pmu as a PMU name. */
while ((pmu = perf_pmus__scan(pmu)) != NULL) {
@@ -1670,7 +1669,8 @@ int parse_events_multi_pmu_add_or_add_pmu(struct parse_events_state *parse_state
if (!parse_events_add_pmu(parse_state, *listp, pmu,
const_parsed_terms,
- auto_merge_stats)) {
+ auto_merge_stats,
+ /*alternate_hw_config=*/PERF_COUNT_HW_MAX)) {
ok++;
parse_state->wild_card_pmus = true;
}
@@ -1681,7 +1681,8 @@ int parse_events_multi_pmu_add_or_add_pmu(struct parse_events_state *parse_state
/* Failure to add, assume event_or_pmu is an event name. */
zfree(listp);
- if (!parse_events_multi_pmu_add(parse_state, event_or_pmu, const_parsed_terms, listp, loc))
+ if (!parse_events_multi_pmu_add(parse_state, event_or_pmu, PERF_COUNT_HW_MAX,
+ const_parsed_terms, listp, loc))
return 0;
if (asprintf(&help, "Unable to find PMU or event on a PMU of '%s'", event_or_pmu) < 0)
@@ -1732,14 +1733,10 @@ static int parse_events__modifier_list(struct parse_events_state *parse_state,
int exclude = eu | ek | eh;
int exclude_GH = group ? evsel->exclude_GH : 0;
- if (mod.precise) {
- /* use of precise requires exclude_guest */
- eG = 1;
- }
if (mod.user) {
if (!exclude)
exclude = eu = ek = eh = 1;
- if (!exclude_GH && !perf_guest)
+ if (!exclude_GH && !perf_guest && exclude_GH_default)
eG = 1;
eu = 0;
}
@@ -1811,6 +1808,8 @@ static int parse_events__modifier_list(struct parse_events_state *parse_state,
evsel->weak_group = true;
if (mod.bpf)
evsel->bpf_counter = true;
+ if (mod.retire_lat)
+ evsel->retire_lat = true;
}
return 0;
}
@@ -1959,8 +1958,8 @@ static int evsel__compute_group_pmu_name(struct evsel *evsel,
}
}
}
- /* Assign the actual name taking care that the fake PMU lacks a name. */
- evsel->group_pmu_name = strdup(group_pmu_name ?: "fake");
+ /* Record computed name. */
+ evsel->group_pmu_name = strdup(group_pmu_name);
return evsel->group_pmu_name ? 0 : -ENOMEM;
}
@@ -2122,7 +2121,7 @@ static int parse_events__sort_events_and_fix_groups(struct list_head *list)
}
int __parse_events(struct evlist *evlist, const char *str, const char *pmu_filter,
- struct parse_events_error *err, struct perf_pmu *fake_pmu,
+ struct parse_events_error *err, bool fake_pmu,
bool warn_if_reordered, bool fake_tp)
{
struct parse_events_state parse_state = {
@@ -2341,7 +2340,7 @@ int parse_events_option(const struct option *opt, const char *str,
parse_events_error__init(&err);
ret = __parse_events(*args->evlistp, str, args->pmu_filter, &err,
- /*fake_pmu=*/NULL, /*warn_if_reordered=*/true,
+ /*fake_pmu=*/false, /*warn_if_reordered=*/true,
/*fake_tp=*/false);
if (ret) {
@@ -2541,7 +2540,7 @@ int parse_events_term__num(struct parse_events_term **term,
struct parse_events_term temp = {
.type_val = PARSE_EVENTS__TERM_TYPE_NUM,
.type_term = type_term,
- .config = config ? : strdup(config_term_name(type_term)),
+ .config = config ? : strdup(parse_events__term_type_str(type_term)),
.no_value = no_value,
.err_term = loc_term ? loc_term->first_column : 0,
.err_val = loc_val ? loc_val->first_column : 0,
@@ -2575,7 +2574,7 @@ int parse_events_term__term(struct parse_events_term **term,
void *loc_term, void *loc_val)
{
return parse_events_term__str(term, term_lhs, NULL,
- strdup(config_term_name(term_rhs)),
+ strdup(parse_events__term_type_str(term_rhs)),
loc_term, loc_val);
}
@@ -2682,7 +2681,8 @@ int parse_events_terms__to_strbuf(const struct parse_events_terms *terms, struct
if (ret < 0)
return ret;
} else if ((unsigned int)term->type_term < __PARSE_EVENTS__TERM_TYPE_NR) {
- ret = strbuf_addf(sb, "%s=", config_term_name(term->type_term));
+ ret = strbuf_addf(sb, "%s=",
+ parse_events__term_type_str(term->type_term));
if (ret < 0)
return ret;
}
@@ -2702,7 +2702,7 @@ static void config_terms_list(char *buf, size_t buf_sz)
buf[0] = '\0';
for (i = 0; i < __PARSE_EVENTS__TERM_TYPE_NR; i++) {
- const char *name = config_term_name(i);
+ const char *name = parse_events__term_type_str(i);
if (!config_term_avail(i, NULL))
continue;
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index e13de2c8b706..3f4334ec6231 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -31,14 +31,14 @@ int parse_events_option(const struct option *opt, const char *str, int unset);
int parse_events_option_new_evlist(const struct option *opt, const char *str, int unset);
__attribute__((nonnull(1, 2, 4)))
int __parse_events(struct evlist *evlist, const char *str, const char *pmu_filter,
- struct parse_events_error *error, struct perf_pmu *fake_pmu,
+ struct parse_events_error *error, bool fake_pmu,
bool warn_if_reordered, bool fake_tp);
__attribute__((nonnull(1, 2, 3)))
static inline int parse_events(struct evlist *evlist, const char *str,
struct parse_events_error *err)
{
- return __parse_events(evlist, str, /*pmu_filter=*/NULL, err, /*fake_pmu=*/NULL,
+ return __parse_events(evlist, str, /*pmu_filter=*/NULL, err, /*fake_pmu=*/false,
/*warn_if_reordered=*/true, /*fake_tp=*/false);
}
@@ -127,6 +127,12 @@ struct parse_events_term {
* value is assumed to be 1. An event name also has no value.
*/
bool no_value;
+ /**
+ * @alternate_hw_config: config is the event name but num is an
+ * alternate PERF_TYPE_HARDWARE config value which is often nice for the
+ * sake of quick matching.
+ */
+ bool alternate_hw_config;
};
struct parse_events_error {
@@ -150,8 +156,8 @@ struct parse_events_state {
struct parse_events_terms *terms;
/* Start token. */
int stoken;
- /* Special fake PMU marker for testing. */
- struct perf_pmu *fake_pmu;
+ /* Use the fake PMU marker for testing. */
+ bool fake_pmu;
/* Skip actual tracepoint processing for testing. */
bool fake_tp;
/* If non-null, when wildcard matching only match the given PMU. */
@@ -162,6 +168,8 @@ struct parse_events_state {
bool wild_card_pmus;
};
+const char *parse_events__term_type_str(enum parse_events__term_type term_type);
+
bool parse_events__filter_pmu(const struct parse_events_state *parse_state,
const struct perf_pmu *pmu);
void parse_events__shrink_config_terms(void);
@@ -203,6 +211,7 @@ struct parse_events_modifier {
bool hypervisor : 1; /* 'h' */
bool guest : 1; /* 'G' */
bool host : 1; /* 'H' */
+ bool retire_lat : 1; /* 'R' */
};
int parse_events__modifier_event(struct parse_events_state *parse_state, void *loc,
@@ -220,9 +229,6 @@ int parse_events_add_numeric(struct parse_events_state *parse_state,
u32 type, u64 config,
const struct parse_events_terms *head_config,
bool wildcard);
-int parse_events_add_tool(struct parse_events_state *parse_state,
- struct list_head *list,
- int tool_event);
int parse_events_add_cache(struct list_head *list, int *idx, const char *name,
struct parse_events_state *parse_state,
struct parse_events_terms *parsed_terms);
@@ -237,7 +243,7 @@ struct evsel *parse_events__add_event(int idx, struct perf_event_attr *attr,
struct perf_pmu *pmu);
int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
- const char *event_name,
+ const char *event_name, u64 hw_config,
const struct parse_events_terms *const_parsed_terms,
struct list_head **listp, void *loc);
@@ -285,4 +291,6 @@ static inline bool is_sdt_event(char *str __maybe_unused)
}
#endif /* HAVE_LIBELF_SUPPORT */
+size_t default_breakpoint_len(void);
+
#endif /* __PERF_PARSE_EVENTS_H */
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index 16045c383ada..14e5bd856a18 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -121,14 +121,6 @@ static int sym(yyscan_t scanner, int type, int config)
return type == PERF_TYPE_HARDWARE ? PE_VALUE_SYM_HW : PE_VALUE_SYM_SW;
}
-static int tool(yyscan_t scanner, enum perf_tool_event event)
-{
- YYSTYPE *yylval = parse_events_get_lval(scanner);
-
- yylval->num = event;
- return PE_VALUE_SYM_TOOL;
-}
-
static int term(yyscan_t scanner, enum parse_events__term_type type)
{
YYSTYPE *yylval = parse_events_get_lval(scanner);
@@ -209,6 +201,7 @@ static int modifiers(struct parse_events_state *parse_state, yyscan_t scanner)
CASE('W', weak);
CASE('e', exclusive);
CASE('b', bpf);
+ CASE('R', retire_lat);
default:
return PE_ERROR;
}
@@ -250,7 +243,7 @@ drv_cfg_term [a-zA-Z0-9_\.]+(=[a-zA-Z0-9_*?\.:]+)?
* If you add a modifier you need to update check_modifier().
* Also, the letters in modifier_event must not be in modifier_bp.
*/
-modifier_event [ukhpPGHSDIWeb]{1,15}
+modifier_event [ukhpPGHSDIWebR]{1,16}
modifier_bp [rwx]{1,3}
lc_type (L1-dcache|l1-d|l1d|L1-data|L1-icache|l1-i|l1i|L1-instruction|LLC|L2|dTLB|d-tlb|Data-TLB|iTLB|i-tlb|Instruction-TLB|branch|branches|bpu|btb|bpc|node)
lc_op_result (load|loads|read|store|stores|write|prefetch|prefetches|speculative-read|speculative-load|refs|Reference|ops|access|misses|miss)
@@ -403,9 +396,6 @@ cpu-migrations|migrations { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COU
alignment-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_ALIGNMENT_FAULTS); }
emulation-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_EMULATION_FAULTS); }
dummy { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_DUMMY); }
-duration_time { return tool(yyscanner, PERF_TOOL_DURATION_TIME); }
-user_time { return tool(yyscanner, PERF_TOOL_USER_TIME); }
-system_time { return tool(yyscanner, PERF_TOOL_SYSTEM_TIME); }
bpf-output { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_BPF_OUTPUT); }
cgroup-switches { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CGROUP_SWITCHES); }
diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y
index b3c51f06cbdc..f888cbb076d6 100644
--- a/tools/perf/util/parse-events.y
+++ b/tools/perf/util/parse-events.y
@@ -56,7 +56,6 @@ static void free_list_evsel(struct list_head* list_evsel)
%token PE_START_EVENTS PE_START_TERMS
%token PE_VALUE PE_VALUE_SYM_HW PE_VALUE_SYM_SW PE_TERM
-%token PE_VALUE_SYM_TOOL
%token PE_EVENT_NAME
%token PE_RAW PE_NAME
%token PE_MODIFIER_EVENT PE_MODIFIER_BP PE_BP_COLON PE_BP_SLASH
@@ -68,7 +67,6 @@ static void free_list_evsel(struct list_head* list_evsel)
%type <num> PE_VALUE
%type <num> PE_VALUE_SYM_HW
%type <num> PE_VALUE_SYM_SW
-%type <num> PE_VALUE_SYM_TOOL
%type <mod> PE_MODIFIER_EVENT
%type <term_type> PE_TERM
%type <num> value_sym
@@ -292,7 +290,7 @@ PE_NAME sep_dc
struct list_head *list;
int err;
- err = parse_events_multi_pmu_add(_parse_state, $1, NULL, &list, &@1);
+ err = parse_events_multi_pmu_add(_parse_state, $1, PERF_COUNT_HW_MAX, NULL, &list, &@1);
if (err < 0) {
struct parse_events_state *parse_state = _parse_state;
struct parse_events_error *error = parse_state->error;
@@ -350,20 +348,6 @@ value_sym sep_slash_slash_dc
PE_ABORT(err);
$$ = list;
}
-|
-PE_VALUE_SYM_TOOL sep_slash_slash_dc
-{
- struct list_head *list;
- int err;
-
- list = alloc_list();
- if (!list)
- YYNOMEM;
- err = parse_events_add_tool(_parse_state, list, $1);
- if (err)
- YYNOMEM;
- $$ = list;
-}
event_legacy_cache:
PE_LEGACY_CACHE opt_event_config
diff --git a/tools/perf/util/pfm.c b/tools/perf/util/pfm.c
index 5ccfe4b64cdf..0dacc133ed39 100644
--- a/tools/perf/util/pfm.c
+++ b/tools/perf/util/pfm.c
@@ -233,7 +233,7 @@ print_libpfm_event(const struct print_callbacks *print_cb, void *print_state,
}
if (is_libpfm_event_supported(name, cpus, threads)) {
- print_cb->print_event(print_state, pinfo->name, topic,
+ print_cb->print_event(print_state, topic, pinfo->name,
name, info->equiv,
/*scale_unit=*/NULL,
/*deprecated=*/NULL, "PFM event",
@@ -267,8 +267,8 @@ print_libpfm_event(const struct print_callbacks *print_cb, void *print_state,
continue;
print_cb->print_event(print_state,
- pinfo->name,
topic,
+ pinfo->name,
name, /*alias=*/NULL,
/*scale_unit=*/NULL,
/*deprecated=*/NULL, "PFM event",
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 986166bc7c78..08a9d0bd9301 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -18,7 +18,9 @@
#include "debug.h"
#include "evsel.h"
#include "pmu.h"
+#include "hwmon_pmu.h"
#include "pmus.h"
+#include "tool_pmu.h"
#include <util/pmu-bison.h>
#include <util/pmu-flex.h>
#include "parse-events.h"
@@ -30,10 +32,6 @@
#include "util/evsel_config.h"
#include <regex.h>
-struct perf_pmu perf_pmu__fake = {
- .name = "fake",
-};
-
#define UNIT_MAX_LEN 31 /* max length for event unit name */
enum event_source {
@@ -367,8 +365,8 @@ error:
return -1;
}
-static int
-perf_pmu__parse_per_pkg(struct perf_pmu *pmu, struct perf_pmu_alias *alias)
+static bool perf_pmu__parse_event_source_bool(const char *pmu_name, const char *event_name,
+ const char *suffix)
{
char path[PATH_MAX];
size_t len;
@@ -376,37 +374,36 @@ perf_pmu__parse_per_pkg(struct perf_pmu *pmu, struct perf_pmu_alias *alias)
len = perf_pmu__event_source_devices_scnprintf(path, sizeof(path));
if (!len)
- return 0;
- scnprintf(path + len, sizeof(path) - len, "%s/events/%s.per-pkg", pmu->name, alias->name);
+ return false;
+
+ scnprintf(path + len, sizeof(path) - len, "%s/events/%s.%s", pmu_name, event_name, suffix);
fd = open(path, O_RDONLY);
if (fd == -1)
- return -1;
+ return false;
- close(fd);
+#ifndef NDEBUG
+ {
+ char buf[8];
- alias->per_pkg = true;
- return 0;
+ len = read(fd, buf, sizeof(buf));
+ assert(len == 1 || len == 2);
+ assert(buf[0] == '1');
+ }
+#endif
+
+ close(fd);
+ return true;
}
-static int perf_pmu__parse_snapshot(struct perf_pmu *pmu, struct perf_pmu_alias *alias)
+static void perf_pmu__parse_per_pkg(struct perf_pmu *pmu, struct perf_pmu_alias *alias)
{
- char path[PATH_MAX];
- size_t len;
- int fd;
-
- len = perf_pmu__event_source_devices_scnprintf(path, sizeof(path));
- if (!len)
- return 0;
- scnprintf(path + len, sizeof(path) - len, "%s/events/%s.snapshot", pmu->name, alias->name);
-
- fd = open(path, O_RDONLY);
- if (fd == -1)
- return -1;
+ alias->per_pkg = perf_pmu__parse_event_source_bool(pmu->name, alias->name, "per-pkg");
+}
- alias->snapshot = true;
- close(fd);
- return 0;
+static void perf_pmu__parse_snapshot(struct perf_pmu *pmu, struct perf_pmu_alias *alias)
+{
+ alias->snapshot = perf_pmu__parse_event_source_bool(pmu->name, alias->name, "snapshot");
}
/* Delete an alias entry. */
@@ -822,31 +819,6 @@ static int is_sysfs_pmu_core(const char *name)
return file_available(path);
}
-char *perf_pmu__getcpuid(struct perf_pmu *pmu)
-{
- char *cpuid;
- static bool printed;
-
- cpuid = getenv("PERF_CPUID");
- if (cpuid)
- cpuid = strdup(cpuid);
- if (!cpuid)
- cpuid = get_cpuid_str(pmu);
- if (!cpuid)
- return NULL;
-
- if (!printed) {
- pr_debug("Using CPUID %s\n", cpuid);
- printed = true;
- }
- return cpuid;
-}
-
-__weak const struct pmu_metrics_table *pmu_metrics_table__find(void)
-{
- return perf_pmu__find_metrics_table(NULL);
-}
-
/**
* Return the length of the PMU name not including the suffix for uncore PMUs.
*
@@ -1173,6 +1145,11 @@ struct perf_pmu *perf_pmu__create_placeholder_core_pmu(struct list_head *core_pm
return pmu;
}
+bool perf_pmu__is_fake(const struct perf_pmu *pmu)
+{
+ return pmu->type == PERF_PMU_TYPE_FAKE;
+}
+
void perf_pmu__warn_invalid_formats(struct perf_pmu *pmu)
{
struct perf_pmu_format *format;
@@ -1183,7 +1160,7 @@ void perf_pmu__warn_invalid_formats(struct perf_pmu *pmu)
pmu->formats_checked = true;
/* fake pmu doesn't have format list */
- if (pmu == &perf_pmu__fake)
+ if (perf_pmu__is_fake(pmu))
return;
list_for_each_entry(format, &pmu->format, list) {
@@ -1199,8 +1176,12 @@ void perf_pmu__warn_invalid_formats(struct perf_pmu *pmu)
bool evsel__is_aux_event(const struct evsel *evsel)
{
- struct perf_pmu *pmu = evsel__find_pmu(evsel);
+ struct perf_pmu *pmu;
+
+ if (evsel->needs_auxtrace_mmap)
+ return true;
+ pmu = evsel__find_pmu(evsel);
return pmu && pmu->auxtrace;
}
@@ -1362,7 +1343,8 @@ static int pmu_config_term(const struct perf_pmu *pmu,
struct perf_event_attr *attr,
struct parse_events_term *term,
struct parse_events_terms *head_terms,
- bool zero, struct parse_events_error *err)
+ bool zero, bool apply_hardcoded,
+ struct parse_events_error *err)
{
struct perf_pmu_format *format;
__u64 *vp;
@@ -1376,11 +1358,46 @@ static int pmu_config_term(const struct perf_pmu *pmu,
return 0;
/*
- * Hardcoded terms should be already in, so nothing
- * to be done for them.
+ * Hardcoded terms are generally handled in event parsing, which
+ * traditionally have had to handle not having a PMU. An alias may
+ * have hard coded config values, optionally apply them below.
*/
- if (parse_events__is_hardcoded_term(term))
+ if (parse_events__is_hardcoded_term(term)) {
+ /* Config terms set all bits in the config. */
+ DECLARE_BITMAP(bits, PERF_PMU_FORMAT_BITS);
+
+ if (!apply_hardcoded)
+ return 0;
+
+ bitmap_fill(bits, PERF_PMU_FORMAT_BITS);
+
+ switch (term->type_term) {
+ case PARSE_EVENTS__TERM_TYPE_CONFIG:
+ assert(term->type_val == PARSE_EVENTS__TERM_TYPE_NUM);
+ pmu_format_value(bits, term->val.num, &attr->config, zero);
+ break;
+ case PARSE_EVENTS__TERM_TYPE_CONFIG1:
+ assert(term->type_val == PARSE_EVENTS__TERM_TYPE_NUM);
+ pmu_format_value(bits, term->val.num, &attr->config1, zero);
+ break;
+ case PARSE_EVENTS__TERM_TYPE_CONFIG2:
+ assert(term->type_val == PARSE_EVENTS__TERM_TYPE_NUM);
+ pmu_format_value(bits, term->val.num, &attr->config2, zero);
+ break;
+ case PARSE_EVENTS__TERM_TYPE_CONFIG3:
+ assert(term->type_val == PARSE_EVENTS__TERM_TYPE_NUM);
+ pmu_format_value(bits, term->val.num, &attr->config3, zero);
+ break;
+ case PARSE_EVENTS__TERM_TYPE_USER: /* Not hardcoded. */
+ return -EINVAL;
+ case PARSE_EVENTS__TERM_TYPE_NAME ... PARSE_EVENTS__TERM_TYPE_HARDWARE:
+ /* Skip non-config terms. */
+ break;
+ default:
+ break;
+ }
return 0;
+ }
format = pmu_find_format(&pmu->format, term->config);
if (!format) {
@@ -1462,13 +1479,12 @@ static int pmu_config_term(const struct perf_pmu *pmu,
if (err) {
char *err_str;
- parse_events_error__handle(err, term->err_val,
- asprintf(&err_str,
- "value too big for format (%s), maximum is %llu",
- format->name, (unsigned long long)max_val) < 0
- ? strdup("value too big for format")
- : err_str,
- NULL);
+ if (asprintf(&err_str,
+ "value too big for format (%s), maximum is %llu",
+ format->name, (unsigned long long)max_val) < 0) {
+ err_str = strdup("value too big for format");
+ }
+ parse_events_error__handle(err, term->err_val, err_str, /*help=*/NULL);
return -EINVAL;
}
/*
@@ -1484,12 +1500,16 @@ static int pmu_config_term(const struct perf_pmu *pmu,
int perf_pmu__config_terms(const struct perf_pmu *pmu,
struct perf_event_attr *attr,
struct parse_events_terms *terms,
- bool zero, struct parse_events_error *err)
+ bool zero, bool apply_hardcoded,
+ struct parse_events_error *err)
{
struct parse_events_term *term;
+ if (perf_pmu__is_hwmon(pmu))
+ return hwmon_pmu__config_terms(pmu, attr, terms, err);
+
list_for_each_entry(term, &terms->terms, list) {
- if (pmu_config_term(pmu, attr, term, terms, zero, err))
+ if (pmu_config_term(pmu, attr, term, terms, zero, apply_hardcoded, err))
return -EINVAL;
}
@@ -1503,11 +1523,16 @@ int perf_pmu__config_terms(const struct perf_pmu *pmu,
*/
int perf_pmu__config(struct perf_pmu *pmu, struct perf_event_attr *attr,
struct parse_events_terms *head_terms,
+ bool apply_hardcoded,
struct parse_events_error *err)
{
bool zero = !!pmu->perf_event_attr_init_default;
- return perf_pmu__config_terms(pmu, attr, head_terms, zero, err);
+ /* Fake PMU doesn't have proper terms so nothing to configure in attr. */
+ if (perf_pmu__is_fake(pmu))
+ return 0;
+
+ return perf_pmu__config_terms(pmu, attr, head_terms, zero, apply_hardcoded, err);
}
static struct perf_pmu_alias *pmu_find_alias(struct perf_pmu *pmu,
@@ -1598,7 +1623,7 @@ static int check_info_data(struct perf_pmu *pmu,
*/
int perf_pmu__check_alias(struct perf_pmu *pmu, struct parse_events_terms *head_terms,
struct perf_pmu_info *info, bool *rewrote_terms,
- struct parse_events_error *err)
+ u64 *alternate_hw_config, struct parse_events_error *err)
{
struct parse_events_term *term, *h;
struct perf_pmu_alias *alias;
@@ -1615,6 +1640,15 @@ int perf_pmu__check_alias(struct perf_pmu *pmu, struct parse_events_terms *head_
info->scale = 0.0;
info->snapshot = false;
+ if (perf_pmu__is_hwmon(pmu)) {
+ ret = hwmon_pmu__check_alias(head_terms, info, err);
+ goto out;
+ }
+
+ /* Fake PMU doesn't rewrite terms. */
+ if (perf_pmu__is_fake(pmu))
+ goto out;
+
list_for_each_entry_safe(term, h, &head_terms->terms, list) {
alias = pmu_find_alias(pmu, term);
if (!alias)
@@ -1626,6 +1660,7 @@ int perf_pmu__check_alias(struct perf_pmu *pmu, struct parse_events_terms *head_
NULL);
return ret;
}
+
*rewrote_terms = true;
ret = check_info_data(pmu, alias, info, err, term->err_term);
if (ret)
@@ -1634,10 +1669,13 @@ int perf_pmu__check_alias(struct perf_pmu *pmu, struct parse_events_terms *head_
if (alias->per_pkg)
info->per_pkg = true;
+ if (term->alternate_hw_config)
+ *alternate_hw_config = term->val.num;
+
list_del_init(&term->list);
parse_events_term__delete(term);
}
-
+out:
/*
* if no unit or scale found in aliases, then
* set defaults as for evsel
@@ -1778,6 +1816,10 @@ bool perf_pmu__have_event(struct perf_pmu *pmu, const char *name)
{
if (!name)
return false;
+ if (perf_pmu__is_tool(pmu) && tool_pmu__skip_event(name))
+ return false;
+ if (perf_pmu__is_hwmon(pmu))
+ return hwmon_pmu__have_event(pmu, name);
if (perf_pmu__find_alias(pmu, name, /*load=*/ true) != NULL)
return true;
if (pmu->cpu_aliases_added || !pmu->events_table)
@@ -1789,6 +1831,9 @@ size_t perf_pmu__num_events(struct perf_pmu *pmu)
{
size_t nr;
+ if (perf_pmu__is_hwmon(pmu))
+ return hwmon_pmu__num_events(pmu);
+
pmu_aliases_parse(pmu);
nr = pmu->sysfs_aliases + pmu->sys_json_aliases;
@@ -1799,6 +1844,9 @@ size_t perf_pmu__num_events(struct perf_pmu *pmu)
else
assert(pmu->cpu_json_aliases == 0);
+ if (perf_pmu__is_tool(pmu))
+ nr -= tool_pmu__num_skip_events();
+
return pmu->selectable ? nr + 1 : nr;
}
@@ -1844,16 +1892,23 @@ int perf_pmu__for_each_event(struct perf_pmu *pmu, bool skip_duplicate_pmus,
struct perf_pmu_alias *event;
struct pmu_event_info info = {
.pmu = pmu,
+ .event_type_desc = "Kernel PMU event",
};
int ret = 0;
struct strbuf sb;
+ if (perf_pmu__is_hwmon(pmu))
+ return hwmon_pmu__for_each_event(pmu, state, cb);
+
strbuf_init(&sb, /*hint=*/ 0);
pmu_aliases_parse(pmu);
pmu_add_cpu_aliases(pmu);
list_for_each_entry(event, &pmu->aliases, list) {
size_t buf_used, pmu_name_len;
+ if (perf_pmu__is_tool(pmu) && tool_pmu__skip_event(event->name))
+ continue;
+
info.pmu_name = event->pmu_name ?: pmu->name;
pmu_name_len = pmu_deduped_name_len(pmu, info.pmu_name,
skip_duplicate_pmus);
@@ -1936,6 +1991,7 @@ bool perf_pmu__is_software(const struct perf_pmu *pmu)
case PERF_TYPE_HW_CACHE: return false;
case PERF_TYPE_RAW: return false;
case PERF_TYPE_BREAKPOINT: return true;
+ case PERF_PMU_TYPE_TOOL: return true;
default: break;
}
for (size_t i = 0; i < ARRAY_SIZE(known_sw_pmus); i++) {
@@ -2187,11 +2243,6 @@ bool perf_pmu__match(const struct perf_pmu *pmu, const char *tok)
(need_fnmatch && !fnmatch(tok, name, 0));
}
-double __weak perf_pmu__cpu_slots_per_cycle(void)
-{
- return NAN;
-}
-
int perf_pmu__event_source_devices_scnprintf(char *pathname, size_t size)
{
const char *sysfs = sysfs__mountpoint();
@@ -2244,6 +2295,9 @@ int perf_pmu__pathname_fd(int dirfd, const char *pmu_name, const char *filename,
void perf_pmu__delete(struct perf_pmu *pmu)
{
+ if (perf_pmu__is_hwmon(pmu))
+ hwmon_pmu__exit(pmu);
+
perf_pmu__del_formats(&pmu->format);
perf_pmu__del_aliases(pmu);
perf_pmu__del_caps(pmu);
@@ -2267,7 +2321,9 @@ const char *perf_pmu__name_from_config(struct perf_pmu *pmu, u64 config)
pmu_add_cpu_aliases(pmu);
list_for_each_entry(event, &pmu->aliases, list) {
struct perf_event_attr attr = {.config = 0,};
- int ret = perf_pmu__config(pmu, &attr, &event->terms, NULL);
+
+ int ret = perf_pmu__config(pmu, &attr, &event->terms, /*apply_hardcoded=*/true,
+ /*err=*/NULL);
if (ret == 0 && config == attr.config)
return event->name;
diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
index b2d3fd291f02..dbed6c243a5e 100644
--- a/tools/perf/util/pmu.h
+++ b/tools/perf/util/pmu.h
@@ -36,6 +36,13 @@ struct perf_pmu_caps {
struct list_head list;
};
+enum {
+ PERF_PMU_TYPE_HWMON_START = 0xFFFF0000,
+ PERF_PMU_TYPE_HWMON_END = 0xFFFFFFFD,
+ PERF_PMU_TYPE_TOOL = 0xFFFFFFFE,
+ PERF_PMU_TYPE_FAKE = 0xFFFFFFFF,
+};
+
/**
* struct perf_pmu
*/
@@ -165,6 +172,10 @@ struct perf_pmu {
* exclude_host.
*/
bool exclude_guest;
+ /**
+ * @checked: Are the missing features checked?
+ */
+ bool checked;
} missing_features;
/**
@@ -173,9 +184,6 @@ struct perf_pmu {
struct perf_mem_event *mem_events;
};
-/** @perf_pmu__fake: A special global PMU used for testing. */
-extern struct perf_pmu perf_pmu__fake;
-
struct perf_pmu_info {
const char *unit;
double scale;
@@ -193,6 +201,7 @@ struct pmu_event_info {
const char *encoding_desc;
const char *topic;
const char *pmu_name;
+ const char *event_type_desc;
const char *str;
bool deprecated;
};
@@ -204,16 +213,18 @@ typedef int (*pmu_format_callback)(void *state, const char *name, int config,
void pmu_add_sys_aliases(struct perf_pmu *pmu);
int perf_pmu__config(struct perf_pmu *pmu, struct perf_event_attr *attr,
struct parse_events_terms *head_terms,
+ bool apply_hardcoded,
struct parse_events_error *error);
int perf_pmu__config_terms(const struct perf_pmu *pmu,
struct perf_event_attr *attr,
struct parse_events_terms *terms,
- bool zero, struct parse_events_error *error);
+ bool zero, bool apply_hardcoded,
+ struct parse_events_error *error);
__u64 perf_pmu__format_bits(struct perf_pmu *pmu, const char *name);
int perf_pmu__format_type(struct perf_pmu *pmu, const char *name);
int perf_pmu__check_alias(struct perf_pmu *pmu, struct parse_events_terms *head_terms,
struct perf_pmu_info *info, bool *rewrote_terms,
- struct parse_events_error *err);
+ u64 *alternate_hw_config, struct parse_events_error *err);
int perf_pmu__find_event(struct perf_pmu *pmu, const char *event, void *state, pmu_event_callback cb);
void perf_pmu_format__set_value(void *format, int config, unsigned long *bits);
@@ -251,8 +262,6 @@ void perf_pmu__arch_init(struct perf_pmu *pmu);
void pmu_add_cpu_aliases_table(struct perf_pmu *pmu,
const struct pmu_events_table *table);
-char *perf_pmu__getcpuid(struct perf_pmu *pmu);
-const struct pmu_metrics_table *pmu_metrics_table__find(void);
bool pmu_uncore_identifier_match(const char *compat, const char *id);
int perf_pmu__convert_scale(const char *scale, char **end, double *sval);
@@ -266,7 +275,6 @@ void perf_pmu__warn_invalid_formats(struct perf_pmu *pmu);
bool perf_pmu__match(const struct perf_pmu *pmu, const char *tok);
-double perf_pmu__cpu_slots_per_cycle(void);
int perf_pmu__event_source_devices_scnprintf(char *pathname, size_t size);
int perf_pmu__pathname_scnprintf(char *buf, size_t size,
const char *pmu_name, const char *filename);
@@ -278,6 +286,8 @@ struct perf_pmu *perf_pmu__lookup(struct list_head *pmus, int dirfd, const char
struct perf_pmu *perf_pmu__create_placeholder_core_pmu(struct list_head *core_pmus);
void perf_pmu__delete(struct perf_pmu *pmu);
struct perf_pmu *perf_pmus__find_core_pmu(void);
+
const char *perf_pmu__name_from_config(struct perf_pmu *pmu, u64 config);
+bool perf_pmu__is_fake(const struct perf_pmu *pmu);
#endif /* __PMU_H */
diff --git a/tools/perf/util/pmus.c b/tools/perf/util/pmus.c
index 3fcabfd8fca1..b493da0d22ef 100644
--- a/tools/perf/util/pmus.c
+++ b/tools/perf/util/pmus.c
@@ -15,6 +15,8 @@
#include "evsel.h"
#include "pmus.h"
#include "pmu.h"
+#include "hwmon_pmu.h"
+#include "tool_pmu.h"
#include "print-events.h"
#include "strbuf.h"
@@ -69,7 +71,7 @@ size_t pmu_name_len_no_suffix(const char *str)
int pmu_name_cmp(const char *lhs_pmu_name, const char *rhs_pmu_name)
{
- unsigned long lhs_num = 0, rhs_num = 0;
+ unsigned long long lhs_num = 0, rhs_num = 0;
size_t lhs_pmu_name_len = pmu_name_len_no_suffix(lhs_pmu_name);
size_t rhs_pmu_name_len = pmu_name_len_no_suffix(rhs_pmu_name);
int ret = strncmp(lhs_pmu_name, rhs_pmu_name,
@@ -79,9 +81,9 @@ int pmu_name_cmp(const char *lhs_pmu_name, const char *rhs_pmu_name)
return ret;
if (lhs_pmu_name_len + 1 < strlen(lhs_pmu_name))
- lhs_num = strtoul(&lhs_pmu_name[lhs_pmu_name_len + 1], NULL, 16);
+ lhs_num = strtoull(&lhs_pmu_name[lhs_pmu_name_len + 1], NULL, 16);
if (rhs_pmu_name_len + 1 < strlen(rhs_pmu_name))
- rhs_num = strtoul(&rhs_pmu_name[rhs_pmu_name_len + 1], NULL, 16);
+ rhs_num = strtoull(&rhs_pmu_name[rhs_pmu_name_len + 1], NULL, 16);
return lhs_num < rhs_num ? -1 : (lhs_num > rhs_num ? 1 : 0);
}
@@ -200,6 +202,7 @@ static void pmu_read_sysfs(bool core_only)
int fd;
DIR *dir;
struct dirent *dent;
+ struct perf_pmu *tool_pmu;
if (read_sysfs_all_pmus || (core_only && read_sysfs_core_pmus))
return;
@@ -229,6 +232,11 @@ static void pmu_read_sysfs(bool core_only)
pr_err("Failure to set up any core PMUs\n");
}
list_sort(NULL, &core_pmus, pmus_cmp);
+ if (!core_only) {
+ tool_pmu = perf_pmus__tool_pmu();
+ list_add_tail(&tool_pmu->list, &other_pmus);
+ perf_pmus__read_hwmon_pmus(&other_pmus);
+ }
list_sort(NULL, &other_pmus, pmus_cmp);
if (!list_empty(&core_pmus)) {
read_sysfs_core_pmus = true;
@@ -371,6 +379,7 @@ struct sevent {
const char *encoding_desc;
const char *topic;
const char *pmu_name;
+ const char *event_type_desc;
bool deprecated;
};
@@ -433,6 +442,7 @@ static int perf_pmus__print_pmu_events__callback(void *vstate,
pr_err("Unexpected event %s/%s/\n", info->pmu->name, info->name);
return 1;
}
+ assert(info->pmu != NULL || info->name != NULL);
s = &state->aliases[state->index];
s->pmu = info->pmu;
#define COPY_STR(str) s->str = info->str ? strdup(info->str) : NULL
@@ -444,6 +454,7 @@ static int perf_pmus__print_pmu_events__callback(void *vstate,
COPY_STR(encoding_desc);
COPY_STR(topic);
COPY_STR(pmu_name);
+ COPY_STR(event_type_desc);
#undef COPY_STR
s->deprecated = info->deprecated;
state->index++;
@@ -492,13 +503,13 @@ void perf_pmus__print_pmu_events(const struct print_callbacks *print_cb, void *p
goto free;
print_cb->print_event(print_state,
- aliases[j].pmu_name,
aliases[j].topic,
+ aliases[j].pmu_name,
aliases[j].name,
aliases[j].alias,
aliases[j].scale_unit,
aliases[j].deprecated,
- "Kernel PMU event",
+ aliases[j].event_type_desc,
aliases[j].desc,
aliases[j].long_desc,
aliases[j].encoding_desc);
@@ -511,6 +522,7 @@ free:
zfree(&aliases[j].encoding_desc);
zfree(&aliases[j].topic);
zfree(&aliases[j].pmu_name);
+ zfree(&aliases[j].event_type_desc);
}
if (printed && pager_in_use())
printf("\n");
@@ -720,3 +732,21 @@ struct perf_pmu *perf_pmus__add_test_pmu(int test_sysfs_dirfd, const char *name)
*/
return perf_pmu__lookup(&other_pmus, test_sysfs_dirfd, name, /*eager_load=*/true);
}
+
+struct perf_pmu *perf_pmus__add_test_hwmon_pmu(int hwmon_dir,
+ const char *sysfs_name,
+ const char *name)
+{
+ return hwmon_pmu__new(&other_pmus, hwmon_dir, sysfs_name, name);
+}
+
+struct perf_pmu *perf_pmus__fake_pmu(void)
+{
+ static struct perf_pmu fake = {
+ .name = "fake",
+ .type = PERF_PMU_TYPE_FAKE,
+ .format = LIST_HEAD_INIT(fake.format),
+ };
+
+ return &fake;
+}
diff --git a/tools/perf/util/pmus.h b/tools/perf/util/pmus.h
index bdbff02324bb..a0cb0eb2ff97 100644
--- a/tools/perf/util/pmus.h
+++ b/tools/perf/util/pmus.h
@@ -30,5 +30,9 @@ bool perf_pmus__supports_extended_type(void);
char *perf_pmus__default_pmu_name(void);
struct perf_pmu *perf_pmus__add_test_pmu(int test_sysfs_dirfd, const char *name);
+struct perf_pmu *perf_pmus__add_test_hwmon_pmu(int hwmon_dir,
+ const char *sysfs_name,
+ const char *name);
+struct perf_pmu *perf_pmus__fake_pmu(void);
#endif /* __PMUS_H */
diff --git a/tools/perf/util/print-events.c b/tools/perf/util/print-events.c
index 3f38c27f0157..a786cbfb0ff5 100644
--- a/tools/perf/util/print-events.c
+++ b/tools/perf/util/print-events.c
@@ -29,6 +29,7 @@
#include "tracepoint.h"
#include "pfm.h"
#include "thread_map.h"
+#include "tool_pmu.h"
#include "util.h"
#define MAX_NAME_LEN 100
@@ -43,21 +44,6 @@ static const char * const event_type_descriptors[] = {
"Hardware breakpoint",
};
-static const struct event_symbol event_symbols_tool[PERF_TOOL_MAX] = {
- [PERF_TOOL_DURATION_TIME] = {
- .symbol = "duration_time",
- .alias = "",
- },
- [PERF_TOOL_USER_TIME] = {
- .symbol = "user_time",
- .alias = "",
- },
- [PERF_TOOL_SYSTEM_TIME] = {
- .symbol = "system_time",
- .alias = "",
- },
-};
-
/*
* Print the events from <debugfs_mount_point>/tracing/events
*/
@@ -68,11 +54,12 @@ void print_tracepoint_events(const struct print_callbacks *print_cb __maybe_unus
struct dirent **sys_namelist = NULL;
int sys_items;
- put_tracing_file(events_path);
if (events_fd < 0) {
pr_err("Error: failed to open tracing events directory\n");
+ pr_err("%s: %s\n", events_path, strerror(errno));
return;
}
+ put_tracing_file(events_path);
sys_items = tracing_events__scandir_alphasort(&sys_namelist);
@@ -340,24 +327,6 @@ int print_hwcache_events(const struct print_callbacks *print_cb, void *print_sta
return 0;
}
-void print_tool_events(const struct print_callbacks *print_cb, void *print_state)
-{
- // Start at 1 because the first enum entry means no tool event.
- for (int i = 1; i < PERF_TOOL_MAX; ++i) {
- print_cb->print_event(print_state,
- "tool",
- /*pmu_name=*/NULL,
- event_symbols_tool[i].symbol,
- event_symbols_tool[i].alias,
- /*scale_unit=*/NULL,
- /*deprecated=*/false,
- "Tool event",
- /*desc=*/NULL,
- /*long_desc=*/NULL,
- /*encoding_desc=*/NULL);
- }
-}
-
void print_symbol_events(const struct print_callbacks *print_cb, void *print_state,
unsigned int type, const struct event_symbol *syms,
unsigned int max)
@@ -421,8 +390,6 @@ void print_events(const struct print_callbacks *print_cb, void *print_state)
print_symbol_events(print_cb, print_state, PERF_TYPE_SOFTWARE,
event_symbols_sw, PERF_COUNT_SW_MAX);
- print_tool_events(print_cb, print_state);
-
print_hwcache_events(print_cb, print_state);
perf_pmus__print_pmu_events(print_cb, print_state);
diff --git a/tools/perf/util/print-events.h b/tools/perf/util/print-events.h
index bf4290bef0cd..445efa1636c1 100644
--- a/tools/perf/util/print-events.h
+++ b/tools/perf/util/print-events.h
@@ -36,7 +36,6 @@ void print_sdt_events(const struct print_callbacks *print_cb, void *print_state)
void print_symbol_events(const struct print_callbacks *print_cb, void *print_state,
unsigned int type, const struct event_symbol *syms,
unsigned int max);
-void print_tool_events(const struct print_callbacks *print_cb, void *print_state);
void print_tracepoint_events(const struct print_callbacks *print_cb, void *print_state);
bool is_event_supported(u8 type, u64 config);
diff --git a/tools/perf/util/print_insn.c b/tools/perf/util/print_insn.c
index a950e9157d2d..a33a7726422d 100644
--- a/tools/perf/util/print_insn.c
+++ b/tools/perf/util/print_insn.c
@@ -32,7 +32,9 @@ size_t sample__fprintf_insn_raw(struct perf_sample *sample, FILE *fp)
#ifdef HAVE_LIBCAPSTONE_SUPPORT
#include <capstone/capstone.h>
-static int capstone_init(struct machine *machine, csh *cs_handle, bool is64)
+int capstone_init(struct machine *machine, csh *cs_handle, bool is64, bool disassembler_style);
+
+int capstone_init(struct machine *machine, csh *cs_handle, bool is64, bool disassembler_style)
{
cs_arch arch;
cs_mode mode;
@@ -62,7 +64,13 @@ static int capstone_init(struct machine *machine, csh *cs_handle, bool is64)
}
if (machine__normalized_is(machine, "x86")) {
- cs_option(*cs_handle, CS_OPT_SYNTAX, CS_OPT_SYNTAX_ATT);
+ /*
+ * In case of using capstone_init while symbol__disassemble
+ * setting CS_OPT_SYNTAX_ATT depends if disassembler_style opts
+ * is set via annotation args
+ */
+ if (disassembler_style)
+ cs_option(*cs_handle, CS_OPT_SYNTAX, CS_OPT_SYNTAX_ATT);
/*
* Resolving address operands to symbols is implemented
* on x86 by investigating instruction details.
@@ -122,7 +130,7 @@ ssize_t fprintf_insn_asm(struct machine *machine, struct thread *thread, u8 cpum
int ret;
/* TODO: Try to initiate capstone only once but need a proper place. */
- ret = capstone_init(machine, &cs_handle, is64bit);
+ ret = capstone_init(machine, &cs_handle, is64bit, true);
if (ret < 0)
return ret;
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index a17c9b8a7a79..eaa0318e9b87 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -40,6 +40,7 @@
#include "session.h"
#include "string2.h"
#include "strbuf.h"
+#include "parse-events.h"
#include <subcmd/pager.h>
#include <linux/ctype.h>
@@ -51,6 +52,9 @@
#define PERFPROBE_GROUP "probe"
+/* Defined in kernel/trace/trace.h */
+#define MAX_EVENT_NAME_LEN 64
+
bool probe_event_dry_run; /* Dry run flag */
struct probe_conf probe_conf = { .magic_num = DEFAULT_PROBE_MAGIC_NUM };
@@ -342,7 +346,7 @@ elf_err:
return mod_name;
}
-#ifdef HAVE_DWARF_SUPPORT
+#ifdef HAVE_LIBDW_SUPPORT
static int kernel_get_module_dso(const char *module, struct dso **pdso)
{
@@ -1036,6 +1040,17 @@ static int _show_one_line(FILE *fp, int l, bool skip, bool show_num)
return rv;
}
+static int sprint_line_description(char *sbuf, size_t size, struct line_range *lr)
+{
+ if (!lr->function)
+ return snprintf(sbuf, size, "file: %s, line: %d", lr->file, lr->start);
+
+ if (lr->file)
+ return snprintf(sbuf, size, "function: %s, file:%s, line: %d", lr->function, lr->file, lr->start);
+
+ return snprintf(sbuf, size, "function: %s, line:%d", lr->function, lr->start);
+}
+
#define show_one_line_with_num(f,l) _show_one_line(f,l,false,true)
#define show_one_line(f,l) _show_one_line(f,l,false,false)
#define skip_one_line(f,l) _show_one_line(f,l,true,false)
@@ -1065,9 +1080,12 @@ static int __show_line_range(struct line_range *lr, const char *module,
ret = debuginfo__find_line_range(dinfo, lr);
if (!ret) { /* Not found, retry with an alternative */
+ pr_debug2("Failed to find line range in debuginfo. Fallback to alternative\n");
ret = get_alternative_line_range(dinfo, lr, module, user);
if (!ret)
ret = debuginfo__find_line_range(dinfo, lr);
+ else /* Ignore error, we just failed to find it. */
+ ret = -ENOENT;
}
if (dinfo->build_id) {
build_id__init(&bid, dinfo->build_id, BUILD_ID_SIZE);
@@ -1075,7 +1093,8 @@ static int __show_line_range(struct line_range *lr, const char *module,
}
debuginfo__delete(dinfo);
if (ret == 0 || ret == -ENOENT) {
- pr_warning("Specified source line is not found.\n");
+ sprint_line_description(sbuf, sizeof(sbuf), lr);
+ pr_warning("Specified source line(%s) is not found.\n", sbuf);
return -ENOENT;
} else if (ret < 0) {
pr_warning("Debuginfo analysis failed.\n");
@@ -1250,7 +1269,7 @@ out:
return ret;
}
-#else /* !HAVE_DWARF_SUPPORT */
+#else /* !HAVE_LIBDW_SUPPORT */
static void debuginfo_cache__exit(void)
{
@@ -1343,30 +1362,39 @@ static bool is_c_func_name(const char *name)
*
* SRC[:SLN[+NUM|-ELN]]
* FNC[@SRC][:SLN[+NUM|-ELN]]
+ *
+ * FNC@SRC accepts `FNC@*` which forcibly specify FNC as function name.
+ * SRC and FUNC can be quoted by double/single quotes.
*/
int parse_line_range_desc(const char *arg, struct line_range *lr)
{
- char *range, *file, *name = strdup(arg);
- int err;
+ char *buf = strdup(arg);
+ char *p;
+ int err = 0;
- if (!name)
+ if (!buf)
return -ENOMEM;
lr->start = 0;
lr->end = INT_MAX;
- range = strchr(name, ':');
- if (range) {
- *range++ = '\0';
+ p = strpbrk_esq(buf, ":");
+ if (p) {
+ if (p == buf) {
+ semantic_error("No file/function name in '%s'.\n", p);
+ err = -EINVAL;
+ goto err;
+ }
+ *(p++) = '\0';
- err = parse_line_num(&range, &lr->start, "start line");
+ err = parse_line_num(&p, &lr->start, "start line");
if (err)
goto err;
- if (*range == '+' || *range == '-') {
- const char c = *range++;
+ if (*p == '+' || *p == '-') {
+ const char c = *(p++);
- err = parse_line_num(&range, &lr->end, "end line");
+ err = parse_line_num(&p, &lr->end, "end line");
if (err)
goto err;
@@ -1390,34 +1418,41 @@ int parse_line_range_desc(const char *arg, struct line_range *lr)
" than end line.\n");
goto err;
}
- if (*range != '\0') {
- semantic_error("Tailing with invalid str '%s'.\n", range);
+ if (*p != '\0') {
+ semantic_error("Tailing with invalid str '%s'.\n", p);
goto err;
}
}
- file = strchr(name, '@');
- if (file) {
- *file = '\0';
- lr->file = strdup(++file);
- if (lr->file == NULL) {
- err = -ENOMEM;
+ p = strpbrk_esq(buf, "@");
+ if (p) {
+ *p++ = '\0';
+ if (strcmp(p, "*")) {
+ lr->file = strdup_esq(p);
+ if (lr->file == NULL) {
+ err = -ENOMEM;
+ goto err;
+ }
+ }
+ if (*buf != '\0')
+ lr->function = strdup_esq(buf);
+ if (!lr->function && !lr->file) {
+ semantic_error("Only '@*' is not allowed.\n");
+ err = -EINVAL;
goto err;
}
- lr->function = name;
- } else if (strchr(name, '/') || strchr(name, '.'))
- lr->file = name;
- else if (is_c_func_name(name))/* We reuse it for checking funcname */
- lr->function = name;
+ } else if (strpbrk_esq(buf, "/."))
+ lr->file = strdup_esq(buf);
+ else if (is_c_func_name(buf))/* We reuse it for checking funcname */
+ lr->function = strdup_esq(buf);
else { /* Invalid name */
- semantic_error("'%s' is not a valid function name.\n", name);
+ semantic_error("'%s' is not a valid function name.\n", buf);
err = -EINVAL;
goto err;
}
- return 0;
err:
- free(name);
+ free(buf);
return err;
}
@@ -1425,19 +1460,19 @@ static int parse_perf_probe_event_name(char **arg, struct perf_probe_event *pev)
{
char *ptr;
- ptr = strpbrk_esc(*arg, ":");
+ ptr = strpbrk_esq(*arg, ":");
if (ptr) {
*ptr = '\0';
if (!pev->sdt && !is_c_func_name(*arg))
goto ng_name;
- pev->group = strdup_esc(*arg);
+ pev->group = strdup_esq(*arg);
if (!pev->group)
return -ENOMEM;
*arg = ptr + 1;
} else
pev->group = NULL;
- pev->event = strdup_esc(*arg);
+ pev->event = strdup_esq(*arg);
if (pev->event == NULL)
return -ENOMEM;
@@ -1476,7 +1511,7 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev)
arg++;
}
- ptr = strpbrk_esc(arg, ";=@+%");
+ ptr = strpbrk_esq(arg, ";=@+%");
if (pev->sdt) {
if (ptr) {
if (*ptr != '@') {
@@ -1490,7 +1525,7 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev)
pev->target = build_id_cache__origname(tmp);
free(tmp);
} else
- pev->target = strdup_esc(ptr + 1);
+ pev->target = strdup_esq(ptr + 1);
if (!pev->target)
return -ENOMEM;
*ptr = '\0';
@@ -1531,7 +1566,7 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev)
file_spec = true;
}
- ptr = strpbrk_esc(arg, ";:+@%");
+ ptr = strpbrk_esq(arg, ";:+@%");
if (ptr) {
nc = *ptr;
*ptr++ = '\0';
@@ -1540,7 +1575,7 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev)
if (arg[0] == '\0')
tmp = NULL;
else {
- tmp = strdup_esc(arg);
+ tmp = strdup_esq(arg);
if (tmp == NULL)
return -ENOMEM;
}
@@ -1578,7 +1613,7 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev)
return -ENOMEM;
break;
}
- ptr = strpbrk_esc(arg, ";:+@%");
+ ptr = strpbrk_esq(arg, ";:+@%");
if (ptr) {
nc = *ptr;
*ptr++ = '\0';
@@ -1605,7 +1640,9 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev)
semantic_error("SRC@SRC is not allowed.\n");
return -EINVAL;
}
- pp->file = strdup_esc(arg);
+ if (!strcmp(arg, "*"))
+ break;
+ pp->file = strdup_esq(arg);
if (pp->file == NULL)
return -ENOMEM;
break;
@@ -2757,7 +2794,10 @@ static int get_new_event_name(char *buf, size_t len, const char *base,
/* Try no suffix number */
ret = e_snprintf(buf, len, "%s%s", nbase, ret_event ? "__return" : "");
if (ret < 0) {
- pr_warning("snprintf() failed: %d; the event name nbase='%s' is too long\n", ret, nbase);
+ pr_warning("snprintf() failed: %d; the event name '%s' is too long\n"
+ " Hint: Set a shorter event with syntax \"EVENT=PROBEDEF\"\n"
+ " EVENT: Event name (max length: %d bytes).\n",
+ ret, nbase, MAX_EVENT_NAME_LEN);
goto out;
}
if (!strlist__has_entry(namelist, buf))
@@ -2777,7 +2817,10 @@ static int get_new_event_name(char *buf, size_t len, const char *base,
for (i = 1; i < MAX_EVENT_INDEX; i++) {
ret = e_snprintf(buf, len, "%s_%d", nbase, i);
if (ret < 0) {
- pr_debug("snprintf() failed: %d\n", ret);
+ pr_warning("Add suffix failed: %d; the event name '%s' is too long\n"
+ " Hint: Set a shorter event with syntax \"EVENT=PROBEDEF\"\n"
+ " EVENT: Event name (max length: %d bytes).\n",
+ ret, nbase, MAX_EVENT_NAME_LEN);
goto out;
}
if (!strlist__has_entry(namelist, buf))
@@ -2841,7 +2884,7 @@ static int probe_trace_event__set_name(struct probe_trace_event *tev,
bool allow_suffix)
{
const char *event, *group;
- char buf[64];
+ char buf[MAX_EVENT_NAME_LEN];
int ret;
/* If probe_event or trace_event already have the name, reuse it */
@@ -2865,6 +2908,12 @@ static int probe_trace_event__set_name(struct probe_trace_event *tev,
else
group = PERFPROBE_GROUP;
+ if (strlen(group) >= MAX_EVENT_NAME_LEN) {
+ pr_err("Probe group string='%s' is too long (>= %d bytes)\n",
+ group, MAX_EVENT_NAME_LEN);
+ return -ENOMEM;
+ }
+
/* Get an unused new event name */
ret = get_new_event_name(buf, sizeof(buf), event, namelist,
tev->point.retprobe, allow_suffix);
@@ -3705,59 +3754,6 @@ void cleanup_perf_probe_events(struct perf_probe_event *pevs, int npevs)
}
}
-int add_perf_probe_events(struct perf_probe_event *pevs, int npevs)
-{
- int ret;
-
- ret = init_probe_symbol_maps(pevs->uprobes);
- if (ret < 0)
- return ret;
-
- ret = convert_perf_probe_events(pevs, npevs);
- if (ret == 0)
- ret = apply_perf_probe_events(pevs, npevs);
-
- cleanup_perf_probe_events(pevs, npevs);
-
- exit_probe_symbol_maps();
- return ret;
-}
-
-int del_perf_probe_events(struct strfilter *filter)
-{
- int ret, ret2, ufd = -1, kfd = -1;
- char *str = strfilter__string(filter);
-
- if (!str)
- return -EINVAL;
-
- /* Get current event names */
- ret = probe_file__open_both(&kfd, &ufd, PF_FL_RW);
- if (ret < 0)
- goto out;
-
- ret = probe_file__del_events(kfd, filter);
- if (ret < 0 && ret != -ENOENT)
- goto error;
-
- ret2 = probe_file__del_events(ufd, filter);
- if (ret2 < 0 && ret2 != -ENOENT) {
- ret = ret2;
- goto error;
- }
- ret = 0;
-
-error:
- if (kfd >= 0)
- close(kfd);
- if (ufd >= 0)
- close(ufd);
-out:
- free(str);
-
- return ret;
-}
-
int show_available_funcs(const char *target, struct nsinfo *nsi,
struct strfilter *_filter, bool user)
{
diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h
index 7e3b6c3d1f74..61a5f4ff4e9c 100644
--- a/tools/perf/util/probe-event.h
+++ b/tools/perf/util/probe-event.h
@@ -159,7 +159,6 @@ void line_range__clear(struct line_range *lr);
/* Initialize line range */
int line_range__init(struct line_range *lr);
-int add_perf_probe_events(struct perf_probe_event *pevs, int npevs);
int convert_perf_probe_events(struct perf_probe_event *pevs, int npevs);
int apply_perf_probe_events(struct perf_probe_event *pevs, int npevs);
int show_probe_trace_events(struct perf_probe_event *pevs, int npevs);
@@ -168,8 +167,6 @@ void cleanup_perf_probe_events(struct perf_probe_event *pevs, int npevs);
struct strfilter;
-int del_perf_probe_events(struct strfilter *filter);
-
int show_perf_probe_event(const char *group, const char *event,
struct perf_probe_event *pev,
const char *module, bool use_stdout);
diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c
index 3d50de3217d5..ec8ac242fedb 100644
--- a/tools/perf/util/probe-file.c
+++ b/tools/perf/util/probe-file.c
@@ -366,25 +366,6 @@ int probe_file__del_strlist(int fd, struct strlist *namelist)
return ret;
}
-int probe_file__del_events(int fd, struct strfilter *filter)
-{
- struct strlist *namelist;
- int ret;
-
- namelist = strlist__new(NULL, NULL);
- if (!namelist)
- return -ENOMEM;
-
- ret = probe_file__get_events(fd, filter, namelist);
- if (ret < 0)
- goto out;
-
- ret = probe_file__del_strlist(fd, namelist);
-out:
- strlist__delete(namelist);
- return ret;
-}
-
/* Caller must ensure to remove this entry from list */
static void probe_cache_entry__delete(struct probe_cache_entry *entry)
{
diff --git a/tools/perf/util/probe-file.h b/tools/perf/util/probe-file.h
index 0dba88c0f5f0..c2bb6a5b9dcc 100644
--- a/tools/perf/util/probe-file.h
+++ b/tools/perf/util/probe-file.h
@@ -44,7 +44,6 @@ struct strlist *probe_file__get_namelist(int fd);
struct strlist *probe_file__get_rawlist(int fd);
int probe_file__add_event(int fd, struct probe_trace_event *tev);
-int probe_file__del_events(int fd, struct strfilter *filter);
int probe_file__get_events(int fd, struct strfilter *filter,
struct strlist *plist);
int probe_file__del_strlist(int fd, struct strlist *namelist);
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index 630e16c54ed5..7f2ee0cb43ca 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -56,7 +56,7 @@ static struct probe_trace_arg_ref *alloc_trace_arg_ref(long offs)
*/
static int convert_variable_location(Dwarf_Die *vr_die, Dwarf_Addr addr,
Dwarf_Op *fb_ops, Dwarf_Die *sp_die,
- unsigned int machine,
+ const struct probe_finder *pf,
struct probe_trace_arg *tvar)
{
Dwarf_Attribute attr;
@@ -166,7 +166,7 @@ static_var:
if (!tvar)
return ret2;
- regs = get_dwarf_regstr(regn, machine);
+ regs = get_dwarf_regstr(regn, pf->e_machine, pf->e_flags);
if (!regs) {
/* This should be a bug in DWARF or this tool */
pr_warning("Mapping for the register number %u "
@@ -451,7 +451,7 @@ static int convert_variable(Dwarf_Die *vr_die, struct probe_finder *pf)
dwarf_diename(vr_die));
ret = convert_variable_location(vr_die, pf->addr, pf->fb_ops,
- &pf->sp_die, pf->machine, pf->tvar);
+ &pf->sp_die, pf, pf->tvar);
if (ret == -ENOENT && pf->skip_empty_arg)
/* This can be found in other place. skip it */
return 0;
@@ -602,7 +602,6 @@ static int call_probe_finder(Dwarf_Die *sc_die, struct probe_finder *pf)
ret = dwarf_getlocation_addr(&fb_attr, pf->addr, &pf->fb_ops, &nops, 1);
if (ret <= 0 || nops == 0) {
pf->fb_ops = NULL;
-#ifdef HAVE_DWARF_CFI_SUPPORT
} else if (nops == 1 && pf->fb_ops[0].atom == DW_OP_call_frame_cfa &&
(pf->cfi_eh != NULL || pf->cfi_dbg != NULL)) {
if ((dwarf_cfi_addrframe(pf->cfi_eh, pf->addr, &frame) != 0 &&
@@ -613,7 +612,6 @@ static int call_probe_finder(Dwarf_Die *sc_die, struct probe_finder *pf)
free(frame);
return -ENOENT;
}
-#endif /* HAVE_DWARF_CFI_SUPPORT */
}
/* Call finder's callback handler */
@@ -1136,9 +1134,9 @@ static int debuginfo__find_probes(struct debuginfo *dbg,
if (gelf_getehdr(elf, &ehdr) == NULL)
return -EINVAL;
- pf->machine = ehdr.e_machine;
+ pf->e_machine = ehdr.e_machine;
+ pf->e_flags = ehdr.e_flags;
-#ifdef HAVE_DWARF_CFI_SUPPORT
do {
GElf_Shdr shdr;
@@ -1148,7 +1146,6 @@ static int debuginfo__find_probes(struct debuginfo *dbg,
pf->cfi_dbg = dwarf_getcfi(dbg->dbg);
} while (0);
-#endif /* HAVE_DWARF_CFI_SUPPORT */
ret = debuginfo__find_probe_location(dbg, pf);
return ret;
@@ -1175,7 +1172,7 @@ static int copy_variables_cb(Dwarf_Die *die_mem, void *data)
(tag == DW_TAG_variable && vf->vars)) {
if (convert_variable_location(die_mem, vf->pf->addr,
vf->pf->fb_ops, &pf->sp_die,
- pf->machine, NULL) == 0) {
+ pf, /*tvar=*/NULL) == 0) {
vf->args[vf->nargs].var = (char *)dwarf_diename(die_mem);
if (vf->args[vf->nargs].var == NULL) {
vf->ret = -ENOMEM;
@@ -1379,6 +1376,8 @@ int debuginfo__find_trace_events(struct debuginfo *dbg,
if (ret >= 0 && tf.pf.skip_empty_arg)
ret = fill_empty_trace_arg(pev, tf.tevs, tf.ntevs);
+ dwarf_cfi_end(tf.pf.cfi_eh);
+
if (ret < 0 || tf.ntevs == 0) {
for (i = 0; i < tf.ntevs; i++)
clear_probe_trace_event(&tf.tevs[i]);
@@ -1404,7 +1403,7 @@ static int collect_variables_cb(Dwarf_Die *die_mem, void *data)
tag == DW_TAG_variable) {
ret = convert_variable_location(die_mem, af->pf.addr,
af->pf.fb_ops, &af->pf.sp_die,
- af->pf.machine, NULL);
+ &af->pf, /*tvar=*/NULL);
if (ret == 0 || ret == -ERANGE) {
int ret2;
bool externs = !af->child;
@@ -1583,8 +1582,21 @@ int debuginfo__find_probe_point(struct debuginfo *dbg, u64 addr,
/* Find a corresponding function (name, baseline and baseaddr) */
if (die_find_realfunc(&cudie, (Dwarf_Addr)addr, &spdie)) {
- /* Get function entry information */
- func = basefunc = dwarf_diename(&spdie);
+ /*
+ * Get function entry information.
+ *
+ * As described in the document DWARF Debugging Information
+ * Format Version 5, section 2.22 Linkage Names, "mangled names,
+ * are used in various ways, ... to distinguish multiple
+ * entities that have the same name".
+ *
+ * Firstly try to get distinct linkage name, if fail then
+ * rollback to get associated name in DIE.
+ */
+ func = basefunc = die_get_linkage_name(&spdie);
+ if (!func)
+ func = basefunc = dwarf_diename(&spdie);
+
if (!func ||
die_entrypc(&spdie, &baseaddr) != 0 ||
dwarf_decl_line(&spdie, &baseline) != 0) {
@@ -1863,7 +1875,11 @@ int find_source_path(const char *raw_path, const char *sbuild_id,
const char *prefix = symbol_conf.source_prefix;
if (sbuild_id && !prefix) {
- if (!get_source_from_debuginfod(raw_path, sbuild_id, new_path))
+ char prefixed_raw_path[PATH_MAX];
+
+ path__join(prefixed_raw_path, sizeof(prefixed_raw_path), comp_dir, raw_path);
+
+ if (!get_source_from_debuginfod(prefixed_raw_path, sbuild_id, new_path))
return 0;
}
diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h
index 3add5ff516e1..be7b46ea2460 100644
--- a/tools/perf/util/probe-finder.h
+++ b/tools/perf/util/probe-finder.h
@@ -21,7 +21,7 @@ static inline int is_c_varname(const char *name)
return isalpha(name[0]) || name[0] == '_';
}
-#ifdef HAVE_DWARF_SUPPORT
+#ifdef HAVE_LIBDW_SUPPORT
#include "dwarf-aux.h"
#include "debuginfo.h"
@@ -63,14 +63,13 @@ struct probe_finder {
struct intlist *lcache; /* Line cache for lazy match */
/* For variable searching */
-#if _ELFUTILS_PREREQ(0, 142)
- /* Call Frame Information from .eh_frame */
+ /* Call Frame Information from .eh_frame. Owned by this struct. */
Dwarf_CFI *cfi_eh;
- /* Call Frame Information from .debug_frame */
+ /* Call Frame Information from .debug_frame. Not owned. */
Dwarf_CFI *cfi_dbg;
-#endif
Dwarf_Op *fb_ops; /* Frame base attribute */
- unsigned int machine; /* Target machine arch */
+ unsigned int e_machine; /* ELF target machine arch */
+ unsigned int e_flags; /* ELF target machine flags */
struct perf_probe_arg *pvar; /* Current target variable */
struct probe_trace_arg *tvar; /* Current result variable */
bool skip_empty_arg; /* Skip non-exist args */
@@ -104,6 +103,6 @@ struct line_finder {
int found;
};
-#endif /* HAVE_DWARF_SUPPORT */
+#endif /* HAVE_LIBDW_SUPPORT */
#endif /*_PROBE_FINDER_H */
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
index 3be882b2e845..2096cdbaa53b 100644
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -6,7 +6,7 @@
#include <linux/err.h>
#include <perf/cpumap.h>
#ifdef HAVE_LIBTRACEEVENT
-#include <traceevent/event-parse.h>
+#include <event-parse.h>
#endif
#include <perf/mmap.h>
#include "evlist.h"
@@ -19,7 +19,9 @@
#include "util/bpf-filter.h"
#include "util/env.h"
#include "util/kvm-stat.h"
+#include "util/stat.h"
#include "util/kwork.h"
+#include "util/sample.h"
#include "util/lock-contention.h"
#include <internal/lib.h>
#include "../builtin.h"
@@ -1354,6 +1356,7 @@ error:
unsigned int scripting_max_stack = PERF_MAX_STACK_DEPTH;
+#ifdef HAVE_KVM_STAT_SUPPORT
bool kvm_entry_event(struct evsel *evsel __maybe_unused)
{
return false;
@@ -1383,6 +1386,7 @@ void exit_event_decode_key(struct perf_kvm_stat *kvm __maybe_unused,
char *decode __maybe_unused)
{
}
+#endif // HAVE_KVM_STAT_SUPPORT
int find_scripts(char **scripts_array __maybe_unused, char **scripts_path_array __maybe_unused,
int num __maybe_unused, int pathlen __maybe_unused)
diff --git a/tools/perf/util/s390-cpumsf.c b/tools/perf/util/s390-cpumsf.c
index 6fe478b0b61b..30638653ad2d 100644
--- a/tools/perf/util/s390-cpumsf.c
+++ b/tools/perf/util/s390-cpumsf.c
@@ -345,7 +345,7 @@ static bool s390_cpumsf_trailer_show(const char *color, size_t pos,
}
color_fprintf(stdout, color, " [%#08zx] Trailer %c%c%c bsdes:%d"
" dsdes:%d Overflow:%lld Time:%#llx\n"
- "\t\tC:%d TOD:%#lx\n",
+ "\t\tC:%d TOD:%#llx\n",
pos,
te->f ? 'F' : ' ',
te->a ? 'A' : ' ',
@@ -912,7 +912,7 @@ static int
s390_cpumsf_process_event(struct perf_session *session,
union perf_event *event,
struct perf_sample *sample,
- struct perf_tool *tool)
+ const struct perf_tool *tool)
{
struct s390_cpumsf *sf = container_of(session->auxtrace,
struct s390_cpumsf,
@@ -952,15 +952,10 @@ s390_cpumsf_process_event(struct perf_session *session,
return err;
}
-struct s390_cpumsf_synth {
- struct perf_tool cpumsf_tool;
- struct perf_session *session;
-};
-
static int
s390_cpumsf_process_auxtrace_event(struct perf_session *session,
union perf_event *event __maybe_unused,
- struct perf_tool *tool __maybe_unused)
+ const struct perf_tool *tool __maybe_unused)
{
struct s390_cpumsf *sf = container_of(session->auxtrace,
struct s390_cpumsf,
@@ -1003,7 +998,7 @@ static void s390_cpumsf_free_events(struct perf_session *session __maybe_unused)
}
static int s390_cpumsf_flush(struct perf_session *session __maybe_unused,
- struct perf_tool *tool __maybe_unused)
+ const struct perf_tool *tool __maybe_unused)
{
return 0;
}
diff --git a/tools/perf/util/s390-sample-raw.c b/tools/perf/util/s390-sample-raw.c
index 53383e97ec9d..335217bb532b 100644
--- a/tools/perf/util/s390-sample-raw.c
+++ b/tools/perf/util/s390-sample-raw.c
@@ -98,12 +98,12 @@ static void s390_cpumcfdg_dumptrail(const char *color, size_t offset,
te.res2 = be32_to_cpu(tep->res2);
color_fprintf(stdout, color, " [%#08zx] Trailer:%c%c%c%c%c"
- " Cfvn:%d Csvn:%d Speed:%d TOD:%#llx\n",
+ " Cfvn:%d Csvn:%d Speed:%d TOD:%#lx\n",
offset, te.clock_base ? 'T' : ' ',
te.speed ? 'S' : ' ', te.mtda ? 'M' : ' ',
te.caca ? 'C' : ' ', te.lcda ? 'L' : ' ',
te.cfvn, te.csvn, te.cpu_speed, te.timestamp);
- color_fprintf(stdout, color, "\t\t1:%lx 2:%lx 3:%lx TOD-Base:%#llx"
+ color_fprintf(stdout, color, "\t\t1:%lx 2:%lx 3:%lx TOD-Base:%#lx"
" Type:%x\n\n",
te.progusage1, te.progusage2, te.progusage3,
te.tod_base, te.mach_type);
@@ -205,7 +205,7 @@ static void s390_cpumcfdg_dump(struct perf_pmu *pmu, struct perf_sample *sample)
char *ev_name = get_counter_name(ce.set, i, pmu);
color_fprintf(stdout, color,
- "\tCounter:%03d %s Value:%#018lx\n", i,
+ "\tCounter:%03zd %s Value:%#018"PRIx64"\n", i,
ev_name ?: "<unknown>", be64_to_cpu(*p));
free(ev_name);
}
@@ -260,7 +260,7 @@ static void s390_pai_all_dump(struct evsel *evsel, struct perf_sample *sample)
ev_name = get_counter_name(evsel->core.attr.config,
pai_data.event_nr, evsel->pmu);
- color_fprintf(stdout, color, "\tCounter:%03d %s Value:%#018lx\n",
+ color_fprintf(stdout, color, "\tCounter:%03d %s Value:%#018"PRIx64"\n",
pai_data.event_nr, ev_name ?: "<unknown>",
pai_data.event_val);
free(ev_name);
diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c
index e16257d5ab2c..85b7f188f729 100644
--- a/tools/perf/util/scripting-engines/trace-event-perl.c
+++ b/tools/perf/util/scripting-engines/trace-event-perl.c
@@ -27,7 +27,7 @@
#include <errno.h>
#include <linux/bitmap.h>
#include <linux/time64.h>
-#include <traceevent/event-parse.h>
+#include <event-parse.h>
#include <stdbool.h>
/* perl needs the following define, right after including stdbool.h */
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index fb00f3ad6815..8bdae066e839 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -31,7 +31,7 @@
#include <linux/compiler.h>
#include <linux/time64.h>
#ifdef HAVE_LIBTRACEEVENT
-#include <traceevent/event-parse.h>
+#include <event-parse.h>
#endif
#include "../build-id.h"
@@ -762,6 +762,8 @@ static void regs_map(struct regs_dump *regs, uint64_t mask, const char *arch, ch
}
}
+#define MAX_REG_SIZE 128
+
static int set_regs_in_dict(PyObject *dict,
struct perf_sample *sample,
struct evsel *evsel)
@@ -769,14 +771,7 @@ static int set_regs_in_dict(PyObject *dict,
struct perf_event_attr *attr = &evsel->core.attr;
const char *arch = perf_env__arch(evsel__env(evsel));
- /*
- * Here value 28 is a constant size which can be used to print
- * one register value and its corresponds to:
- * 16 chars is to specify 64 bit register in hexadecimal.
- * 2 chars is for appending "0x" to the hexadecimal value and
- * 10 chars is for register name.
- */
- int size = __sw_hweight64(attr->sample_regs_intr) * 28;
+ int size = (__sw_hweight64(attr->sample_regs_intr) * MAX_REG_SIZE) + 1;
char *bf = malloc(size);
if (!bf)
return -1;
@@ -798,7 +793,8 @@ static int set_regs_in_dict(PyObject *dict,
static void set_sym_in_dict(PyObject *dict, struct addr_location *al,
const char *dso_field, const char *dso_bid_field,
const char *dso_map_start, const char *dso_map_end,
- const char *sym_field, const char *symoff_field)
+ const char *sym_field, const char *symoff_field,
+ const char *map_pgoff)
{
char sbuild_id[SBUILD_ID_SIZE];
@@ -814,6 +810,8 @@ static void set_sym_in_dict(PyObject *dict, struct addr_location *al,
PyLong_FromUnsignedLong(map__start(al->map)));
pydict_set_item_string_decref(dict, dso_map_end,
PyLong_FromUnsignedLong(map__end(al->map)));
+ pydict_set_item_string_decref(dict, map_pgoff,
+ PyLong_FromUnsignedLongLong(map__pgoff(al->map)));
}
if (al->sym) {
pydict_set_item_string_decref(dict, sym_field,
@@ -888,6 +886,8 @@ static PyObject *get_perf_sample_dict(struct perf_sample *sample,
set_sample_read_in_dict(dict_sample, sample, evsel);
pydict_set_item_string_decref(dict_sample, "weight",
PyLong_FromUnsignedLongLong(sample->weight));
+ pydict_set_item_string_decref(dict_sample, "ins_lat",
+ PyLong_FromUnsignedLong(sample->ins_lat));
pydict_set_item_string_decref(dict_sample, "transaction",
PyLong_FromUnsignedLongLong(sample->transaction));
set_sample_datasrc_in_dict(dict_sample, sample);
@@ -898,7 +898,7 @@ static PyObject *get_perf_sample_dict(struct perf_sample *sample,
pydict_set_item_string_decref(dict, "comm",
_PyUnicode_FromString(thread__comm_str(al->thread)));
set_sym_in_dict(dict, al, "dso", "dso_bid", "dso_map_start", "dso_map_end",
- "symbol", "symoff");
+ "symbol", "symoff", "map_pgoff");
pydict_set_item_string_decref(dict, "callchain", callchain);
@@ -923,7 +923,7 @@ static PyObject *get_perf_sample_dict(struct perf_sample *sample,
PyBool_FromLong(1));
set_sym_in_dict(dict_sample, addr_al, "addr_dso", "addr_dso_bid",
"addr_dso_map_start", "addr_dso_map_end",
- "addr_symbol", "addr_symoff");
+ "addr_symbol", "addr_symoff", "addr_map_pgoff");
}
if (sample->flags)
@@ -1317,7 +1317,7 @@ static void python_export_sample_table(struct db_export *dbe,
struct tables *tables = container_of(dbe, struct tables, dbe);
PyObject *t;
- t = tuple_new(27);
+ t = tuple_new(28);
tuple_set_d64(t, 0, es->db_id);
tuple_set_d64(t, 1, es->evsel->db_id);
@@ -1346,6 +1346,7 @@ static void python_export_sample_table(struct db_export *dbe,
tuple_set_s32(t, 24, es->sample->flags);
tuple_set_d64(t, 25, es->sample->id);
tuple_set_d64(t, 26, es->sample->stream_id);
+ tuple_set_u32(t, 27, es->sample->ins_lat);
call_object(tables->sample_handler, t, "sample_table");
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 5596bed1b8c8..507e6cba9545 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -36,81 +36,20 @@
#include "util.h"
#include "arch/common.h"
#include "units.h"
+#include "annotate.h"
#include <internal/lib.h>
-#ifdef HAVE_ZSTD_SUPPORT
-static int perf_session__process_compressed_event(struct perf_session *session,
- union perf_event *event, u64 file_offset,
- const char *file_path)
-{
- void *src;
- size_t decomp_size, src_size;
- u64 decomp_last_rem = 0;
- size_t mmap_len, decomp_len = session->header.env.comp_mmap_len;
- struct decomp *decomp, *decomp_last = session->active_decomp->decomp_last;
-
- if (decomp_last) {
- decomp_last_rem = decomp_last->size - decomp_last->head;
- decomp_len += decomp_last_rem;
- }
-
- mmap_len = sizeof(struct decomp) + decomp_len;
- decomp = mmap(NULL, mmap_len, PROT_READ|PROT_WRITE,
- MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
- if (decomp == MAP_FAILED) {
- pr_err("Couldn't allocate memory for decompression\n");
- return -1;
- }
-
- decomp->file_pos = file_offset;
- decomp->file_path = file_path;
- decomp->mmap_len = mmap_len;
- decomp->head = 0;
-
- if (decomp_last_rem) {
- memcpy(decomp->data, &(decomp_last->data[decomp_last->head]), decomp_last_rem);
- decomp->size = decomp_last_rem;
- }
-
- src = (void *)event + sizeof(struct perf_record_compressed);
- src_size = event->pack.header.size - sizeof(struct perf_record_compressed);
-
- decomp_size = zstd_decompress_stream(session->active_decomp->zstd_decomp, src, src_size,
- &(decomp->data[decomp_last_rem]), decomp_len - decomp_last_rem);
- if (!decomp_size) {
- munmap(decomp, mmap_len);
- pr_err("Couldn't decompress data\n");
- return -1;
- }
-
- decomp->size += decomp_size;
-
- if (session->active_decomp->decomp == NULL)
- session->active_decomp->decomp = decomp;
- else
- session->active_decomp->decomp_last->next = decomp;
-
- session->active_decomp->decomp_last = decomp;
-
- pr_debug("decomp (B): %zd to %zd\n", src_size, decomp_size);
-
- return 0;
-}
-#else /* !HAVE_ZSTD_SUPPORT */
-#define perf_session__process_compressed_event perf_session__process_compressed_event_stub
-#endif
-
static int perf_session__deliver_event(struct perf_session *session,
union perf_event *event,
- struct perf_tool *tool,
+ const struct perf_tool *tool,
u64 file_offset,
const char *file_path);
-static int perf_session__open(struct perf_session *session, int repipe_fd)
+static int perf_session__open(struct perf_session *session)
{
struct perf_data *data = session->data;
- if (perf_session__read_header(session, repipe_fd) < 0) {
+ if (perf_session__read_header(session) < 0) {
pr_err("incompatible file format (rerun with -v to learn more)\n");
return -1;
}
@@ -196,8 +135,8 @@ static int ordered_events__deliver_event(struct ordered_events *oe,
}
struct perf_session *__perf_session__new(struct perf_data *data,
- bool repipe, int repipe_fd,
- struct perf_tool *tool)
+ struct perf_tool *tool,
+ bool trace_event_repipe)
{
int ret = -ENOMEM;
struct perf_session *session = zalloc(sizeof(*session));
@@ -205,7 +144,7 @@ struct perf_session *__perf_session__new(struct perf_data *data,
if (!session)
goto out;
- session->repipe = repipe;
+ session->trace_event_repipe = trace_event_repipe;
session->tool = tool;
session->decomp_data.zstd_decomp = &session->zstd_data;
session->active_decomp = &session->decomp_data;
@@ -223,7 +162,7 @@ struct perf_session *__perf_session__new(struct perf_data *data,
session->data = data;
if (perf_data__is_read(data)) {
- ret = perf_session__open(session, repipe_fd);
+ ret = perf_session__open(session);
if (ret < 0)
goto out_delete;
@@ -304,6 +243,7 @@ void perf_session__delete(struct perf_session *session)
return;
auxtrace__free(session);
auxtrace_index__free(&session->auxtrace_index);
+ debuginfo_cache__delete();
perf_session__destroy_kernel_maps(session);
perf_decomp__release_events(session->decomp_data.decomp);
perf_env__exit(&session->header.env);
@@ -319,251 +259,6 @@ void perf_session__delete(struct perf_session *session)
free(session);
}
-static int process_event_synth_tracing_data_stub(struct perf_session *session
- __maybe_unused,
- union perf_event *event
- __maybe_unused)
-{
- dump_printf(": unhandled!\n");
- return 0;
-}
-
-static int process_event_synth_attr_stub(struct perf_tool *tool __maybe_unused,
- union perf_event *event __maybe_unused,
- struct evlist **pevlist
- __maybe_unused)
-{
- dump_printf(": unhandled!\n");
- return 0;
-}
-
-static int process_event_synth_event_update_stub(struct perf_tool *tool __maybe_unused,
- union perf_event *event __maybe_unused,
- struct evlist **pevlist
- __maybe_unused)
-{
- if (dump_trace)
- perf_event__fprintf_event_update(event, stdout);
-
- dump_printf(": unhandled!\n");
- return 0;
-}
-
-static int process_event_sample_stub(struct perf_tool *tool __maybe_unused,
- union perf_event *event __maybe_unused,
- struct perf_sample *sample __maybe_unused,
- struct evsel *evsel __maybe_unused,
- struct machine *machine __maybe_unused)
-{
- dump_printf(": unhandled!\n");
- return 0;
-}
-
-static int process_event_stub(struct perf_tool *tool __maybe_unused,
- union perf_event *event __maybe_unused,
- struct perf_sample *sample __maybe_unused,
- struct machine *machine __maybe_unused)
-{
- dump_printf(": unhandled!\n");
- return 0;
-}
-
-static int process_finished_round_stub(struct perf_tool *tool __maybe_unused,
- union perf_event *event __maybe_unused,
- struct ordered_events *oe __maybe_unused)
-{
- dump_printf(": unhandled!\n");
- return 0;
-}
-
-static int skipn(int fd, off_t n)
-{
- char buf[4096];
- ssize_t ret;
-
- while (n > 0) {
- ret = read(fd, buf, min(n, (off_t)sizeof(buf)));
- if (ret <= 0)
- return ret;
- n -= ret;
- }
-
- return 0;
-}
-
-static s64 process_event_auxtrace_stub(struct perf_session *session __maybe_unused,
- union perf_event *event)
-{
- dump_printf(": unhandled!\n");
- if (perf_data__is_pipe(session->data))
- skipn(perf_data__fd(session->data), event->auxtrace.size);
- return event->auxtrace.size;
-}
-
-static int process_event_op2_stub(struct perf_session *session __maybe_unused,
- union perf_event *event __maybe_unused)
-{
- dump_printf(": unhandled!\n");
- return 0;
-}
-
-
-static
-int process_event_thread_map_stub(struct perf_session *session __maybe_unused,
- union perf_event *event __maybe_unused)
-{
- if (dump_trace)
- perf_event__fprintf_thread_map(event, stdout);
-
- dump_printf(": unhandled!\n");
- return 0;
-}
-
-static
-int process_event_cpu_map_stub(struct perf_session *session __maybe_unused,
- union perf_event *event __maybe_unused)
-{
- if (dump_trace)
- perf_event__fprintf_cpu_map(event, stdout);
-
- dump_printf(": unhandled!\n");
- return 0;
-}
-
-static
-int process_event_stat_config_stub(struct perf_session *session __maybe_unused,
- union perf_event *event __maybe_unused)
-{
- if (dump_trace)
- perf_event__fprintf_stat_config(event, stdout);
-
- dump_printf(": unhandled!\n");
- return 0;
-}
-
-static int process_stat_stub(struct perf_session *perf_session __maybe_unused,
- union perf_event *event)
-{
- if (dump_trace)
- perf_event__fprintf_stat(event, stdout);
-
- dump_printf(": unhandled!\n");
- return 0;
-}
-
-static int process_stat_round_stub(struct perf_session *perf_session __maybe_unused,
- union perf_event *event)
-{
- if (dump_trace)
- perf_event__fprintf_stat_round(event, stdout);
-
- dump_printf(": unhandled!\n");
- return 0;
-}
-
-static int process_event_time_conv_stub(struct perf_session *perf_session __maybe_unused,
- union perf_event *event)
-{
- if (dump_trace)
- perf_event__fprintf_time_conv(event, stdout);
-
- dump_printf(": unhandled!\n");
- return 0;
-}
-
-static int perf_session__process_compressed_event_stub(struct perf_session *session __maybe_unused,
- union perf_event *event __maybe_unused,
- u64 file_offset __maybe_unused,
- const char *file_path __maybe_unused)
-{
- dump_printf(": unhandled!\n");
- return 0;
-}
-
-void perf_tool__fill_defaults(struct perf_tool *tool)
-{
- if (tool->sample == NULL)
- tool->sample = process_event_sample_stub;
- if (tool->mmap == NULL)
- tool->mmap = process_event_stub;
- if (tool->mmap2 == NULL)
- tool->mmap2 = process_event_stub;
- if (tool->comm == NULL)
- tool->comm = process_event_stub;
- if (tool->namespaces == NULL)
- tool->namespaces = process_event_stub;
- if (tool->cgroup == NULL)
- tool->cgroup = process_event_stub;
- if (tool->fork == NULL)
- tool->fork = process_event_stub;
- if (tool->exit == NULL)
- tool->exit = process_event_stub;
- if (tool->lost == NULL)
- tool->lost = perf_event__process_lost;
- if (tool->lost_samples == NULL)
- tool->lost_samples = perf_event__process_lost_samples;
- if (tool->aux == NULL)
- tool->aux = perf_event__process_aux;
- if (tool->itrace_start == NULL)
- tool->itrace_start = perf_event__process_itrace_start;
- if (tool->context_switch == NULL)
- tool->context_switch = perf_event__process_switch;
- if (tool->ksymbol == NULL)
- tool->ksymbol = perf_event__process_ksymbol;
- if (tool->bpf == NULL)
- tool->bpf = perf_event__process_bpf;
- if (tool->text_poke == NULL)
- tool->text_poke = perf_event__process_text_poke;
- if (tool->aux_output_hw_id == NULL)
- tool->aux_output_hw_id = perf_event__process_aux_output_hw_id;
- if (tool->read == NULL)
- tool->read = process_event_sample_stub;
- if (tool->throttle == NULL)
- tool->throttle = process_event_stub;
- if (tool->unthrottle == NULL)
- tool->unthrottle = process_event_stub;
- if (tool->attr == NULL)
- tool->attr = process_event_synth_attr_stub;
- if (tool->event_update == NULL)
- tool->event_update = process_event_synth_event_update_stub;
- if (tool->tracing_data == NULL)
- tool->tracing_data = process_event_synth_tracing_data_stub;
- if (tool->build_id == NULL)
- tool->build_id = process_event_op2_stub;
- if (tool->finished_round == NULL) {
- if (tool->ordered_events)
- tool->finished_round = perf_event__process_finished_round;
- else
- tool->finished_round = process_finished_round_stub;
- }
- if (tool->id_index == NULL)
- tool->id_index = process_event_op2_stub;
- if (tool->auxtrace_info == NULL)
- tool->auxtrace_info = process_event_op2_stub;
- if (tool->auxtrace == NULL)
- tool->auxtrace = process_event_auxtrace_stub;
- if (tool->auxtrace_error == NULL)
- tool->auxtrace_error = process_event_op2_stub;
- if (tool->thread_map == NULL)
- tool->thread_map = process_event_thread_map_stub;
- if (tool->cpu_map == NULL)
- tool->cpu_map = process_event_cpu_map_stub;
- if (tool->stat_config == NULL)
- tool->stat_config = process_event_stat_config_stub;
- if (tool->stat == NULL)
- tool->stat = process_stat_stub;
- if (tool->stat_round == NULL)
- tool->stat_round = process_stat_round_stub;
- if (tool->time_conv == NULL)
- tool->time_conv = process_event_time_conv_stub;
- if (tool->feature == NULL)
- tool->feature = process_event_op2_stub;
- if (tool->compressed == NULL)
- tool->compressed = perf_session__process_compressed_event;
- if (tool->finished_init == NULL)
- tool->finished_init = process_event_op2_stub;
-}
-
static void swap_sample_id_all(union perf_event *event, void *data)
{
void *end = (void *) event + event->header.size;
@@ -1076,7 +771,7 @@ static perf_event__swap_op perf_event__swap_ops[] = {
* Flush every events below timestamp 7
* etc...
*/
-int perf_event__process_finished_round(struct perf_tool *tool __maybe_unused,
+int perf_event__process_finished_round(const struct perf_tool *tool __maybe_unused,
union perf_event *event __maybe_unused,
struct ordered_events *oe)
{
@@ -1161,7 +856,6 @@ static void branch_stack__printf(struct perf_sample *sample,
struct branch_entry *entries = perf_sample__branch_entries(sample);
bool callstack = evsel__has_branch_callstack(evsel);
u64 *branch_stack_cntr = sample->branch_stack_cntr;
- struct perf_env *env = evsel__env(evsel);
uint64_t i;
if (!callstack) {
@@ -1205,8 +899,11 @@ static void branch_stack__printf(struct perf_sample *sample,
}
if (branch_stack_cntr) {
+ unsigned int br_cntr_width, br_cntr_nr;
+
+ perf_env__find_br_cntr_info(evsel__env(evsel), &br_cntr_nr, &br_cntr_width);
printf("... branch stack counters: nr:%" PRIu64 " (counter width: %u max counter nr:%u)\n",
- sample->branch_stack->nr, env->br_cntr_width, env->br_cntr_nr);
+ sample->branch_stack->nr, br_cntr_width, br_cntr_nr);
for (i = 0; i < sample->branch_stack->nr; i++)
printf("..... %2"PRIu64": %016" PRIx64 "\n", i, branch_stack_cntr[i]);
}
@@ -1470,22 +1167,28 @@ static struct machine *machines__find_for_cpumode(struct machines *machines,
}
static int deliver_sample_value(struct evlist *evlist,
- struct perf_tool *tool,
+ const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct sample_read_value *v,
- struct machine *machine)
+ struct machine *machine,
+ bool per_thread)
{
struct perf_sample_id *sid = evlist__id2sid(evlist, v->id);
struct evsel *evsel;
+ u64 *storage = NULL;
if (sid) {
+ storage = perf_sample_id__get_period_storage(sid, sample->tid, per_thread);
+ }
+
+ if (storage) {
sample->id = v->id;
- sample->period = v->value - sid->period;
- sid->period = v->value;
+ sample->period = v->value - *storage;
+ *storage = v->value;
}
- if (!sid || sid->evsel == NULL) {
+ if (!storage || sid->evsel == NULL) {
++evlist->stats.nr_unknown_id;
return 0;
}
@@ -1502,18 +1205,23 @@ static int deliver_sample_value(struct evlist *evlist,
}
static int deliver_sample_group(struct evlist *evlist,
- struct perf_tool *tool,
+ const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine,
- u64 read_format)
+ u64 read_format,
+ bool per_thread)
{
int ret = -EINVAL;
struct sample_read_value *v = sample->read.group.values;
+ if (tool->dont_split_sample_group)
+ return deliver_sample_value(evlist, tool, event, sample, v, machine,
+ per_thread);
+
sample_read_group__for_each(v, sample->read.group.nr, read_format) {
ret = deliver_sample_value(evlist, tool, event, sample, v,
- machine);
+ machine, per_thread);
if (ret)
break;
}
@@ -1521,13 +1229,14 @@ static int deliver_sample_group(struct evlist *evlist,
return ret;
}
-static int evlist__deliver_sample(struct evlist *evlist, struct perf_tool *tool,
+static int evlist__deliver_sample(struct evlist *evlist, const struct perf_tool *tool,
union perf_event *event, struct perf_sample *sample,
struct evsel *evsel, struct machine *machine)
{
/* We know evsel != NULL. */
u64 sample_type = evsel->core.attr.sample_type;
u64 read_format = evsel->core.attr.read_format;
+ bool per_thread = perf_evsel__attr_has_per_thread_sample_period(&evsel->core);
/* Standard sample delivery. */
if (!(sample_type & PERF_SAMPLE_READ))
@@ -1536,17 +1245,18 @@ static int evlist__deliver_sample(struct evlist *evlist, struct perf_tool *tool,
/* For PERF_SAMPLE_READ we have either single or group mode. */
if (read_format & PERF_FORMAT_GROUP)
return deliver_sample_group(evlist, tool, event, sample,
- machine, read_format);
+ machine, read_format, per_thread);
else
return deliver_sample_value(evlist, tool, event, sample,
- &sample->read.one, machine);
+ &sample->read.one, machine,
+ per_thread);
}
static int machines__deliver_event(struct machines *machines,
struct evlist *evlist,
union perf_event *event,
struct perf_sample *sample,
- struct perf_tool *tool, u64 file_offset,
+ const struct perf_tool *tool, u64 file_offset,
const char *file_path)
{
struct evsel *evsel;
@@ -1592,8 +1302,9 @@ static int machines__deliver_event(struct machines *machines,
evlist->stats.total_lost += event->lost.lost;
return tool->lost(tool, event, sample, machine);
case PERF_RECORD_LOST_SAMPLES:
- if (tool->lost_samples == perf_event__process_lost_samples &&
- !(event->header.misc & PERF_RECORD_MISC_LOST_SAMPLES_BPF))
+ if (event->header.misc & PERF_RECORD_MISC_LOST_SAMPLES_BPF)
+ evlist->stats.total_dropped_samples += event->lost_samples.lost;
+ else if (tool->lost_samples == perf_event__process_lost_samples)
evlist->stats.total_lost_samples += event->lost_samples.lost;
return tool->lost_samples(tool, event, sample, machine);
case PERF_RECORD_READ:
@@ -1634,7 +1345,7 @@ static int machines__deliver_event(struct machines *machines,
static int perf_session__deliver_event(struct perf_session *session,
union perf_event *event,
- struct perf_tool *tool,
+ const struct perf_tool *tool,
u64 file_offset,
const char *file_path)
{
@@ -1667,13 +1378,12 @@ static s64 perf_session__process_user_event(struct perf_session *session,
const char *file_path)
{
struct ordered_events *oe = &session->ordered_events;
- struct perf_tool *tool = session->tool;
+ const struct perf_tool *tool = session->tool;
struct perf_sample sample = { .time = 0, };
int fd = perf_data__fd(session->data);
int err;
- if (event->header.type != PERF_RECORD_COMPRESSED ||
- tool->compressed == perf_session__process_compressed_event_stub)
+ if (event->header.type != PERF_RECORD_COMPRESSED || perf_tool__compressed_is_stub(tool))
dump_event(session->evlist, event, file_offset, &sample, file_path);
/* These events are processed right away */
@@ -1754,7 +1464,7 @@ int perf_session__deliver_synth_event(struct perf_session *session,
struct perf_sample *sample)
{
struct evlist *evlist = session->evlist;
- struct perf_tool *tool = session->tool;
+ const struct perf_tool *tool = session->tool;
events_stats__inc(&evlist->stats, event->header.type);
@@ -1764,6 +1474,30 @@ int perf_session__deliver_synth_event(struct perf_session *session,
return machines__deliver_event(&session->machines, evlist, event, sample, tool, 0, NULL);
}
+int perf_session__deliver_synth_attr_event(struct perf_session *session,
+ const struct perf_event_attr *attr,
+ u64 id)
+{
+ union {
+ struct {
+ struct perf_record_header_attr attr;
+ u64 ids[1];
+ } attr_id;
+ union perf_event ev;
+ } ev = {
+ .attr_id.attr.header.type = PERF_RECORD_HEADER_ATTR,
+ .attr_id.attr.header.size = sizeof(ev.attr_id),
+ .attr_id.ids[0] = id,
+ };
+
+ if (attr->size != sizeof(ev.attr_id.attr.attr)) {
+ pr_debug("Unexpected perf_event_attr size\n");
+ return -EINVAL;
+ }
+ ev.attr_id.attr.attr = *attr;
+ return perf_session__deliver_synth_event(session, &ev.ev, NULL);
+}
+
static void event_swap(union perf_event *event, bool sample_id_all)
{
perf_event__swap_op swap;
@@ -1862,7 +1596,7 @@ static s64 perf_session__process_event(struct perf_session *session,
const char *file_path)
{
struct evlist *evlist = session->evlist;
- struct perf_tool *tool = session->tool;
+ const struct perf_tool *tool = session->tool;
int ret;
if (session->header.needs_swap)
@@ -2049,7 +1783,7 @@ static int __perf_session__process_decomp_events(struct perf_session *session);
static int __perf_session__process_pipe_events(struct perf_session *session)
{
struct ordered_events *oe = &session->ordered_events;
- struct perf_tool *tool = session->tool;
+ const struct perf_tool *tool = session->tool;
struct ui_progress prog;
union perf_event *event;
uint32_t size, cur_size = 0;
@@ -2060,8 +1794,6 @@ static int __perf_session__process_pipe_events(struct perf_session *session)
void *p;
bool update_prog = false;
- perf_tool__fill_defaults(tool);
-
/*
* If it's from a file saving pipe data (by redirection), it would have
* a file name other than "-". Then we can get the total size and show
@@ -2481,12 +2213,10 @@ static int __perf_session__process_events(struct perf_session *session)
.in_place_update = session->data->in_place_update,
};
struct ordered_events *oe = &session->ordered_events;
- struct perf_tool *tool = session->tool;
+ const struct perf_tool *tool = session->tool;
struct ui_progress prog;
int err;
- perf_tool__fill_defaults(tool);
-
if (rd.data_size == 0)
return -1;
@@ -2533,14 +2263,12 @@ out_err:
static int __perf_session__process_dir_events(struct perf_session *session)
{
struct perf_data *data = session->data;
- struct perf_tool *tool = session->tool;
+ const struct perf_tool *tool = session->tool;
int i, ret, readers, nr_readers;
struct ui_progress prog;
u64 total_size = perf_data__size(session->data);
struct reader *rd;
- perf_tool__fill_defaults(tool);
-
ui_progress__init_size(&prog, total_size, "Processing events...");
nr_readers = 1;
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index 4c29dc86956f..bcf1bcf06959 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -26,26 +26,68 @@ struct decomp_data {
struct zstd_data *zstd_decomp;
};
+/**
+ * struct perf_session- A Perf session holds the main state when the program is
+ * working with live perf events or reading data from an input file.
+ *
+ * The rough organization of a perf_session is:
+ * ```
+ * +--------------+ +-----------+ +------------+
+ * | Session |1..* ----->| Machine |1..* ----->| Thread |
+ * +--------------+ +-----------+ +------------+
+ * ```
+ */
struct perf_session {
+ /**
+ * @header: The read version of a perf_file_header, or captures global
+ * information from a live session.
+ */
struct perf_header header;
+ /** @machines: Machines within the session a host and 0 or more guests. */
struct machines machines;
+ /** @evlist: List of evsels/events of the session. */
struct evlist *evlist;
- struct auxtrace *auxtrace;
+ /** @auxtrace: callbacks to allow AUX area data decoding. */
+ const struct auxtrace *auxtrace;
+ /** @itrace_synth_opts: AUX area tracing synthesis options. */
struct itrace_synth_opts *itrace_synth_opts;
+ /** @auxtrace_index: index of AUX area tracing events within a perf.data file. */
struct list_head auxtrace_index;
#ifdef HAVE_LIBTRACEEVENT
+ /** @tevent: handles for libtraceevent and plugins. */
struct trace_event tevent;
#endif
+ /** @time_conv: Holds contents of last PERF_RECORD_TIME_CONV event. */
struct perf_record_time_conv time_conv;
- bool repipe;
+ /** @trace_event_repipe: When set causes read trace events to be written to stdout. */
+ bool trace_event_repipe;
+ /**
+ * @one_mmap: The reader will use a single mmap by default. There may be
+ * multiple data files in particular for aux events. If this is true
+ * then the single big mmap for the data file can be assumed.
+ */
bool one_mmap;
+ /** @one_mmap_addr: Address of initial perf data file reader mmap. */
void *one_mmap_addr;
+ /** @one_mmap_offset: File offset in perf.data file when mapped. */
u64 one_mmap_offset;
+ /** @ordered_events: Used to turn unordered events into ordered ones. */
struct ordered_events ordered_events;
+ /** @data: Optional perf data file being read from. */
struct perf_data *data;
- struct perf_tool *tool;
+ /** @tool: callbacks for event handling. */
+ const struct perf_tool *tool;
+ /**
+ * @bytes_transferred: Used by perf record to count written bytes before
+ * compression.
+ */
u64 bytes_transferred;
+ /**
+ * @bytes_compressed: Used by perf record to count written bytes after
+ * compression.
+ */
u64 bytes_compressed;
+ /** @zstd_data: Owner of global compression state, buffers, etc. */
struct zstd_data zstd_data;
struct decomp_data decomp_data;
struct decomp_data *active_decomp;
@@ -64,13 +106,13 @@ struct decomp {
struct perf_tool;
struct perf_session *__perf_session__new(struct perf_data *data,
- bool repipe, int repipe_fd,
- struct perf_tool *tool);
+ struct perf_tool *tool,
+ bool trace_event_repipe);
static inline struct perf_session *perf_session__new(struct perf_data *data,
struct perf_tool *tool)
{
- return __perf_session__new(data, false, -1, tool);
+ return __perf_session__new(data, tool, /*trace_event_repipe=*/false);
}
void perf_session__delete(struct perf_session *session);
@@ -92,8 +134,6 @@ int perf_session__process_events(struct perf_session *session);
int perf_session__queue_event(struct perf_session *s, union perf_event *event,
u64 timestamp, u64 file_offset, const char *file_path);
-void perf_tool__fill_defaults(struct perf_tool *tool);
-
int perf_session__resolve_callchain(struct perf_session *session,
struct evsel *evsel,
struct thread *thread,
@@ -154,13 +194,16 @@ extern volatile int session_done;
int perf_session__deliver_synth_event(struct perf_session *session,
union perf_event *event,
struct perf_sample *sample);
+int perf_session__deliver_synth_attr_event(struct perf_session *session,
+ const struct perf_event_attr *attr,
+ u64 id);
int perf_session__dsos_hit_all(struct perf_session *session);
int perf_event__process_id_index(struct perf_session *session,
union perf_event *event);
-int perf_event__process_finished_round(struct perf_tool *tool,
+int perf_event__process_finished_round(const struct perf_tool *tool,
union perf_event *event,
struct ordered_events *oe);
diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py
index 142e9d447ce7..649550e9b7aa 100644
--- a/tools/perf/util/setup.py
+++ b/tools/perf/util/setup.py
@@ -17,7 +17,7 @@ src_feature_tests = getenv('srctree') + '/tools/build/feature'
def clang_has_option(option):
cc_output = Popen([cc, cc_options + option, path.join(src_feature_tests, "test-hello.c") ], stderr=PIPE).stderr.readlines()
- return [o for o in cc_output if ((b"unknown argument" in o) or (b"is not supported" in o))] == [ ]
+ return [o for o in cc_output if ((b"unknown argument" in o) or (b"is not supported" in o) or (b"unknown warning option" in o))] == [ ]
if cc_is_clang:
from sysconfig import get_config_vars
@@ -63,6 +63,8 @@ cflags = getenv('CFLAGS', '').split()
cflags += ['-fno-strict-aliasing', '-Wno-write-strings', '-Wno-unused-parameter', '-Wno-redundant-decls' ]
if cc_is_clang:
cflags += ["-Wno-unused-command-line-argument" ]
+ if clang_has_option("-Wno-cast-function-type-mismatch"):
+ cflags += ["-Wno-cast-function-type-mismatch" ]
else:
cflags += ['-Wno-cast-function-type' ]
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index ab7c7ff35f9b..9dd60c7869a2 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -35,7 +35,7 @@
#include <linux/string.h>
#ifdef HAVE_LIBTRACEEVENT
-#include <traceevent/event-parse.h>
+#include <event-parse.h>
#endif
regex_t parent_regex;
@@ -677,6 +677,102 @@ struct sort_entry sort_sym_ipc_null = {
.se_width_idx = HISTC_SYMBOL_IPC,
};
+/* --sort callchain_branch_predicted */
+
+static int64_t
+sort__callchain_branch_predicted_cmp(struct hist_entry *left __maybe_unused,
+ struct hist_entry *right __maybe_unused)
+{
+ return 0;
+}
+
+static int hist_entry__callchain_branch_predicted_snprintf(
+ struct hist_entry *he, char *bf, size_t size, unsigned int width)
+{
+ u64 branch_count, predicted_count;
+ double percent = 0.0;
+ char str[32];
+
+ callchain_branch_counts(he->callchain, &branch_count,
+ &predicted_count, NULL, NULL);
+
+ if (branch_count)
+ percent = predicted_count * 100.0 / branch_count;
+
+ snprintf(str, sizeof(str), "%.1f%%", percent);
+ return repsep_snprintf(bf, size, "%-*.*s", width, width, str);
+}
+
+struct sort_entry sort_callchain_branch_predicted = {
+ .se_header = "Predicted",
+ .se_cmp = sort__callchain_branch_predicted_cmp,
+ .se_snprintf = hist_entry__callchain_branch_predicted_snprintf,
+ .se_width_idx = HISTC_CALLCHAIN_BRANCH_PREDICTED,
+};
+
+/* --sort callchain_branch_abort */
+
+static int64_t
+sort__callchain_branch_abort_cmp(struct hist_entry *left __maybe_unused,
+ struct hist_entry *right __maybe_unused)
+{
+ return 0;
+}
+
+static int hist_entry__callchain_branch_abort_snprintf(struct hist_entry *he,
+ char *bf, size_t size,
+ unsigned int width)
+{
+ u64 branch_count, abort_count;
+ char str[32];
+
+ callchain_branch_counts(he->callchain, &branch_count,
+ NULL, &abort_count, NULL);
+
+ snprintf(str, sizeof(str), "%" PRId64, abort_count);
+ return repsep_snprintf(bf, size, "%-*.*s", width, width, str);
+}
+
+struct sort_entry sort_callchain_branch_abort = {
+ .se_header = "Abort",
+ .se_cmp = sort__callchain_branch_abort_cmp,
+ .se_snprintf = hist_entry__callchain_branch_abort_snprintf,
+ .se_width_idx = HISTC_CALLCHAIN_BRANCH_ABORT,
+};
+
+/* --sort callchain_branch_cycles */
+
+static int64_t
+sort__callchain_branch_cycles_cmp(struct hist_entry *left __maybe_unused,
+ struct hist_entry *right __maybe_unused)
+{
+ return 0;
+}
+
+static int hist_entry__callchain_branch_cycles_snprintf(struct hist_entry *he,
+ char *bf, size_t size,
+ unsigned int width)
+{
+ u64 branch_count, cycles_count, cycles = 0;
+ char str[32];
+
+ callchain_branch_counts(he->callchain, &branch_count,
+ NULL, NULL, &cycles_count);
+
+ if (branch_count)
+ cycles = cycles_count / branch_count;
+
+ snprintf(str, sizeof(str), "%" PRId64 "", cycles);
+ return repsep_snprintf(bf, size, "%-*.*s", width, width, str);
+}
+
+struct sort_entry sort_callchain_branch_cycles = {
+ .se_header = "Cycles",
+ .se_cmp = sort__callchain_branch_cycles_cmp,
+ .se_snprintf = hist_entry__callchain_branch_cycles_snprintf,
+ .se_width_idx = HISTC_CALLCHAIN_BRANCH_CYCLES,
+};
+
/* --sort srcfile */
static char no_srcfile[1];
@@ -2312,7 +2408,7 @@ static int hist_entry__typeoff_snprintf(struct hist_entry *he, char *bf,
he->mem_type_off, true);
buf[4095] = '\0';
- return repsep_snprintf(bf, size, "%s %+d (%s)", he_type->self.type_name,
+ return repsep_snprintf(bf, size, "%s +%#x (%s)", he_type->self.type_name,
he->mem_type_off, buf);
}
@@ -2326,6 +2422,57 @@ struct sort_entry sort_type_offset = {
.se_width_idx = HISTC_TYPE_OFFSET,
};
+/* --sort typecln */
+
+/* TODO: use actual value in the system */
+#define TYPE_CACHELINE_SIZE 64
+
+static int64_t
+sort__typecln_sort(struct hist_entry *left, struct hist_entry *right)
+{
+ struct annotated_data_type *left_type = left->mem_type;
+ struct annotated_data_type *right_type = right->mem_type;
+ int64_t left_cln, right_cln;
+ int64_t ret;
+
+ if (!left_type) {
+ sort__type_init(left);
+ left_type = left->mem_type;
+ }
+
+ if (!right_type) {
+ sort__type_init(right);
+ right_type = right->mem_type;
+ }
+
+ ret = strcmp(left_type->self.type_name, right_type->self.type_name);
+ if (ret)
+ return ret;
+
+ left_cln = left->mem_type_off / TYPE_CACHELINE_SIZE;
+ right_cln = right->mem_type_off / TYPE_CACHELINE_SIZE;
+ return left_cln - right_cln;
+}
+
+static int hist_entry__typecln_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width __maybe_unused)
+{
+ struct annotated_data_type *he_type = he->mem_type;
+
+ return repsep_snprintf(bf, size, "%s: cache-line %d", he_type->self.type_name,
+ he->mem_type_off / TYPE_CACHELINE_SIZE);
+}
+
+struct sort_entry sort_type_cacheline = {
+ .se_header = "Data Type Cacheline",
+ .se_cmp = sort__type_cmp,
+ .se_collapse = sort__typecln_sort,
+ .se_sort = sort__typecln_sort,
+ .se_init = sort__type_init,
+ .se_snprintf = hist_entry__typecln_snprintf,
+ .se_width_idx = HISTC_TYPE_CACHELINE,
+};
+
struct sort_dimension {
const char *name;
@@ -2384,6 +2531,7 @@ static struct sort_dimension common_sort_dimensions[] = {
DIM(SORT_ANNOTATE_DATA_TYPE, "type", sort_type),
DIM(SORT_ANNOTATE_DATA_TYPE_OFFSET, "typeoff", sort_type_offset),
DIM(SORT_SYM_OFFSET, "symoff", sort_sym_offset),
+ DIM(SORT_ANNOTATE_DATA_TYPE_CACHELINE, "typecln", sort_type_cacheline),
};
#undef DIM
@@ -2404,6 +2552,15 @@ static struct sort_dimension bstack_sort_dimensions[] = {
DIM(SORT_SYM_IPC, "ipc_lbr", sort_sym_ipc),
DIM(SORT_ADDR_FROM, "addr_from", sort_addr_from),
DIM(SORT_ADDR_TO, "addr_to", sort_addr_to),
+ DIM(SORT_CALLCHAIN_BRANCH_PREDICTED,
+ "callchain_branch_predicted",
+ sort_callchain_branch_predicted),
+ DIM(SORT_CALLCHAIN_BRANCH_ABORT,
+ "callchain_branch_abort",
+ sort_callchain_branch_abort),
+ DIM(SORT_CALLCHAIN_BRANCH_CYCLES,
+ "callchain_branch_cycles",
+ sort_callchain_branch_cycles)
};
#undef DIM
@@ -3432,7 +3589,13 @@ int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
if (!sd->name || strncasecmp(tok, sd->name, strlen(tok)))
continue;
- if (sort__mode != SORT_MODE__BRANCH)
+ if ((sort__mode != SORT_MODE__BRANCH) &&
+ strncasecmp(tok, "callchain_branch_predicted",
+ strlen(tok)) &&
+ strncasecmp(tok, "callchain_branch_abort",
+ strlen(tok)) &&
+ strncasecmp(tok, "callchain_branch_cycles",
+ strlen(tok)))
return -EINVAL;
if (sd->entry == &sort_sym_from || sd->entry == &sort_sym_to)
@@ -3960,7 +4123,7 @@ static void add_hpp_sort_string(struct strbuf *sb, struct hpp_dimension *s, int
add_key(sb, s[i].name, llen);
}
-char *sort_help(const char *prefix)
+char *sort_help(const char *prefix, enum sort_mode mode)
{
struct strbuf sb;
char *s;
@@ -3972,10 +4135,12 @@ char *sort_help(const char *prefix)
ARRAY_SIZE(hpp_sort_dimensions), &len);
add_sort_string(&sb, common_sort_dimensions,
ARRAY_SIZE(common_sort_dimensions), &len);
- add_sort_string(&sb, bstack_sort_dimensions,
- ARRAY_SIZE(bstack_sort_dimensions), &len);
- add_sort_string(&sb, memory_sort_dimensions,
- ARRAY_SIZE(memory_sort_dimensions), &len);
+ if (mode == SORT_MODE__NORMAL || mode == SORT_MODE__BRANCH)
+ add_sort_string(&sb, bstack_sort_dimensions,
+ ARRAY_SIZE(bstack_sort_dimensions), &len);
+ if (mode == SORT_MODE__NORMAL || mode == SORT_MODE__MEMORY)
+ add_sort_string(&sb, memory_sort_dimensions,
+ ARRAY_SIZE(memory_sort_dimensions), &len);
s = strbuf_detach(&sb, NULL);
strbuf_release(&sb);
return s;
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 0bd0ee3ae76b..a8572574e168 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -71,6 +71,7 @@ enum sort_type {
SORT_ANNOTATE_DATA_TYPE,
SORT_ANNOTATE_DATA_TYPE_OFFSET,
SORT_SYM_OFFSET,
+ SORT_ANNOTATE_DATA_TYPE_CACHELINE,
/* branch stack specific sort keys */
__SORT_BRANCH_STACK,
@@ -87,6 +88,9 @@ enum sort_type {
SORT_SYM_IPC,
SORT_ADDR_FROM,
SORT_ADDR_TO,
+ SORT_CALLCHAIN_BRANCH_PREDICTED,
+ SORT_CALLCHAIN_BRANCH_ABORT,
+ SORT_CALLCHAIN_BRANCH_CYCLES,
/* memory mode specific sort keys */
__SORT_MEMORY_MODE,
@@ -130,7 +134,7 @@ void reset_output_field(void);
void sort__setup_elide(FILE *fp);
void perf_hpp__set_elide(int idx, bool elide);
-char *sort_help(const char *prefix);
+char *sort_help(const char *prefix, enum sort_mode mode);
int report_parse_ignore_callees_opt(const struct option *opt, const char *arg, int unset);
diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c
index 760742fd4a7d..f32d0d4f4bc9 100644
--- a/tools/perf/util/srcline.c
+++ b/tools/perf/util/srcline.c
@@ -6,6 +6,7 @@
#include <string.h>
#include <sys/types.h>
+#include <linux/compiler.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/zalloc.h>
@@ -16,6 +17,9 @@
#include "util/debug.h"
#include "util/callchain.h"
#include "util/symbol_conf.h"
+#ifdef HAVE_LIBLLVM_SUPPORT
+#include "util/llvm-c-helpers.h"
+#endif
#include "srcline.h"
#include "string2.h"
#include "symbol.h"
@@ -130,7 +134,60 @@ static struct symbol *new_inline_sym(struct dso *dso,
#define MAX_INLINE_NEST 1024
-#ifdef HAVE_LIBBFD_SUPPORT
+#ifdef HAVE_LIBLLVM_SUPPORT
+
+static void free_llvm_inline_frames(struct llvm_a2l_frame *inline_frames,
+ int num_frames)
+{
+ if (inline_frames != NULL) {
+ for (int i = 0; i < num_frames; ++i) {
+ zfree(&inline_frames[i].filename);
+ zfree(&inline_frames[i].funcname);
+ }
+ zfree(&inline_frames);
+ }
+}
+
+static int addr2line(const char *dso_name, u64 addr,
+ char **file, unsigned int *line, struct dso *dso,
+ bool unwind_inlines, struct inline_node *node,
+ struct symbol *sym)
+{
+ struct llvm_a2l_frame *inline_frames = NULL;
+ int num_frames = llvm_addr2line(dso_name, addr, file, line,
+ node && unwind_inlines, &inline_frames);
+
+ if (num_frames == 0 || !inline_frames) {
+ /* Error, or we didn't want inlines. */
+ return num_frames;
+ }
+
+ for (int i = 0; i < num_frames; ++i) {
+ struct symbol *inline_sym =
+ new_inline_sym(dso, sym, inline_frames[i].funcname);
+ char *srcline = NULL;
+
+ if (inline_frames[i].filename) {
+ srcline =
+ srcline_from_fileline(inline_frames[i].filename,
+ inline_frames[i].line);
+ }
+ if (inline_list__append(inline_sym, srcline, node) != 0) {
+ free_llvm_inline_frames(inline_frames, num_frames);
+ return 0;
+ }
+ }
+ free_llvm_inline_frames(inline_frames, num_frames);
+
+ return num_frames;
+}
+
+void dso__free_a2l(struct dso *dso __maybe_unused)
+{
+ /* Nothing to free. */
+}
+
+#elif defined(HAVE_LIBBFD_SUPPORT)
/*
* Implement addr2line using libbfd.
diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index c38bcb6f4c78..53dcdf07f5a2 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -7,6 +7,7 @@
#include <perf/cpumap.h>
#include "color.h"
#include "counts.h"
+#include "debug.h"
#include "evlist.h"
#include "evsel.h"
#include "stat.h"
@@ -21,6 +22,7 @@
#include "iostat.h"
#include "pmu.h"
#include "pmus.h"
+#include "tool_pmu.h"
#define CNTR_NOT_SUPPORTED "<not supported>"
#define CNTR_NOT_COUNTED "<not counted>"
@@ -71,6 +73,32 @@ static const char *aggr_header_std[] = {
[AGGR_GLOBAL] = ""
};
+const char *metric_threshold_classify__color(enum metric_threshold_classify thresh)
+{
+ const char * const colors[] = {
+ "", /* unknown */
+ PERF_COLOR_RED, /* bad */
+ PERF_COLOR_MAGENTA, /* nearly bad */
+ PERF_COLOR_YELLOW, /* less good */
+ PERF_COLOR_GREEN, /* good */
+ };
+ static_assert(ARRAY_SIZE(colors) - 1 == METRIC_THRESHOLD_GOOD, "missing enum value");
+ return colors[thresh];
+}
+
+static const char *metric_threshold_classify__str(enum metric_threshold_classify thresh)
+{
+ const char * const strs[] = {
+ "unknown",
+ "bad",
+ "nearly bad",
+ "less good",
+ "good",
+ };
+ static_assert(ARRAY_SIZE(strs) - 1 == METRIC_THRESHOLD_GOOD, "missing enum value");
+ return strs[thresh];
+}
+
static void print_running_std(struct perf_stat_config *config, u64 run, u64 ena)
{
if (run != ena)
@@ -403,13 +431,14 @@ static void do_new_line_std(struct perf_stat_config *config,
}
static void print_metric_std(struct perf_stat_config *config,
- void *ctx, const char *color, const char *fmt,
- const char *unit, double val)
+ void *ctx, enum metric_threshold_classify thresh,
+ const char *fmt, const char *unit, double val)
{
struct outstate *os = ctx;
FILE *out = os->fh;
int n;
bool newline = os->newline;
+ const char *color = metric_threshold_classify__color(thresh);
os->newline = false;
@@ -441,7 +470,7 @@ static void new_line_csv(struct perf_stat_config *config, void *ctx)
static void print_metric_csv(struct perf_stat_config *config __maybe_unused,
void *ctx,
- const char *color __maybe_unused,
+ enum metric_threshold_classify thresh __maybe_unused,
const char *fmt, const char *unit, double val)
{
struct outstate *os = ctx;
@@ -462,15 +491,20 @@ static void print_metric_csv(struct perf_stat_config *config __maybe_unused,
static void print_metric_json(struct perf_stat_config *config __maybe_unused,
void *ctx,
- const char *color __maybe_unused,
+ enum metric_threshold_classify thresh,
const char *fmt __maybe_unused,
const char *unit, double val)
{
struct outstate *os = ctx;
FILE *out = os->fh;
- fprintf(out, "\"metric-value\" : \"%f\", ", val);
- fprintf(out, "\"metric-unit\" : \"%s\"", unit);
+ if (unit) {
+ fprintf(out, "\"metric-value\" : \"%f\", \"metric-unit\" : \"%s\"", val, unit);
+ if (thresh != METRIC_THRESHOLD_UNKNOWN) {
+ fprintf(out, ", \"metric-threshold\" : \"%s\"",
+ metric_threshold_classify__str(thresh));
+ }
+ }
if (!config->metric_only)
fprintf(out, "}");
}
@@ -557,13 +591,14 @@ static const char *fixunit(char *buf, struct evsel *evsel,
}
static void print_metric_only(struct perf_stat_config *config,
- void *ctx, const char *color, const char *fmt,
- const char *unit, double val)
+ void *ctx, enum metric_threshold_classify thresh,
+ const char *fmt, const char *unit, double val)
{
struct outstate *os = ctx;
FILE *out = os->fh;
char buf[1024], str[1024];
unsigned mlen = config->metric_only_len;
+ const char *color = metric_threshold_classify__color(thresh);
if (!valid_only_metric(unit))
return;
@@ -580,7 +615,8 @@ static void print_metric_only(struct perf_stat_config *config,
}
static void print_metric_only_csv(struct perf_stat_config *config __maybe_unused,
- void *ctx, const char *color __maybe_unused,
+ void *ctx,
+ enum metric_threshold_classify thresh __maybe_unused,
const char *fmt,
const char *unit, double val)
{
@@ -602,25 +638,29 @@ static void print_metric_only_csv(struct perf_stat_config *config __maybe_unused
}
static void print_metric_only_json(struct perf_stat_config *config __maybe_unused,
- void *ctx, const char *color __maybe_unused,
+ void *ctx,
+ enum metric_threshold_classify thresh __maybe_unused,
const char *fmt,
const char *unit, double val)
{
struct outstate *os = ctx;
FILE *out = os->fh;
- char buf[64], *vals, *ends;
+ char buf[64], *ends;
char tbuf[1024];
+ const char *vals;
if (!valid_only_metric(unit))
return;
unit = fixunit(tbuf, os->evsel, unit);
+ if (!unit[0])
+ return;
snprintf(buf, sizeof(buf), fmt ?: "", val);
- ends = vals = skip_spaces(buf);
+ vals = ends = skip_spaces(buf);
while (isdigit(*ends) || *ends == '.')
ends++;
*ends = 0;
- if (!unit[0] || !vals[0])
- return;
+ if (!vals[0])
+ vals = "none";
fprintf(out, "%s\"%s\" : \"%s\"", os->first ? "" : ", ", unit, vals);
os->first = false;
}
@@ -631,7 +671,8 @@ static void new_line_metric(struct perf_stat_config *config __maybe_unused,
}
static void print_metric_header(struct perf_stat_config *config,
- void *ctx, const char *color __maybe_unused,
+ void *ctx,
+ enum metric_threshold_classify thresh __maybe_unused,
const char *fmt __maybe_unused,
const char *unit, double val __maybe_unused)
{
@@ -805,7 +846,7 @@ static void printout(struct perf_stat_config *config, struct outstate *os,
if (run == 0 || ena == 0 || counter->counts->scaled == -1) {
if (config->metric_only) {
- pm(config, os, NULL, "", "", 0);
+ pm(config, os, METRIC_THRESHOLD_UNKNOWN, "", "", 0);
return;
}
@@ -860,7 +901,7 @@ static void printout(struct perf_stat_config *config, struct outstate *os,
perf_stat__print_shadow_stats(config, counter, uval, aggr_idx,
&out, &config->metric_events);
} else {
- pm(config, os, /*color=*/NULL, /*format=*/NULL, /*unit=*/"", /*val=*/0);
+ pm(config, os, METRIC_THRESHOLD_UNKNOWN, /*format=*/NULL, /*unit=*/"", /*val=*/0);
}
if (!config->metric_only) {
@@ -871,38 +912,66 @@ static void printout(struct perf_stat_config *config, struct outstate *os,
static void uniquify_event_name(struct evsel *counter)
{
- char *new_name;
- char *config;
- int ret = 0;
+ const char *name, *pmu_name;
+ char *new_name, *config;
+ int ret;
+
+ /* The evsel was already uniquified. */
+ if (counter->uniquified_name)
+ return;
+
+ /* Avoid checking to uniquify twice. */
+ counter->uniquified_name = true;
+
+ /* The evsel has a "name=" config term or is from libpfm. */
+ if (counter->use_config_name || counter->is_libpfm_event)
+ return;
+
+ /* Legacy no PMU event, don't uniquify. */
+ if (!counter->pmu ||
+ (counter->pmu->type < PERF_TYPE_MAX && counter->pmu->type != PERF_TYPE_RAW))
+ return;
- if (counter->uniquified_name || counter->use_config_name ||
- !counter->pmu_name || !strncmp(evsel__name(counter), counter->pmu_name,
- strlen(counter->pmu_name)))
+ /* A sysfs or json event replacing a legacy event, don't uniquify. */
+ if (counter->pmu->is_core && counter->alternate_hw_config != PERF_COUNT_HW_MAX)
return;
- config = strchr(counter->name, '/');
+ name = evsel__name(counter);
+ pmu_name = counter->pmu->name;
+ /* Already prefixed by the PMU name. */
+ if (!strncmp(name, pmu_name, strlen(pmu_name)))
+ return;
+
+ config = strchr(name, '/');
if (config) {
- if (asprintf(&new_name,
- "%s%s", counter->pmu_name, config) > 0) {
- free(counter->name);
- counter->name = new_name;
- }
- } else {
- if (evsel__is_hybrid(counter)) {
- ret = asprintf(&new_name, "%s/%s/",
- counter->pmu_name, counter->name);
+ int len = config - name;
+
+ if (config[1] == '/') {
+ /* case: event// */
+ ret = asprintf(&new_name, "%s/%.*s/%s", pmu_name, len, name, config + 2);
} else {
- ret = asprintf(&new_name, "%s [%s]",
- counter->name, counter->pmu_name);
+ /* case: event/.../ */
+ ret = asprintf(&new_name, "%s/%.*s,%s", pmu_name, len, name, config + 1);
}
+ } else {
+ config = strchr(name, ':');
+ if (config) {
+ /* case: event:.. */
+ int len = config - name;
- if (ret) {
- free(counter->name);
- counter->name = new_name;
+ ret = asprintf(&new_name, "%s/%.*s/%s", pmu_name, len, name, config + 1);
+ } else {
+ /* case: event */
+ ret = asprintf(&new_name, "%s/%s/", pmu_name, name);
}
}
-
- counter->uniquified_name = true;
+ if (ret > 0) {
+ free(counter->name);
+ counter->name = new_name;
+ } else {
+ /* ENOMEM from asprintf. */
+ counter->uniquified_name = false;
+ }
}
static bool hybrid_uniquify(struct evsel *evsel, struct perf_stat_config *config)
@@ -940,15 +1009,29 @@ static bool should_skip_zero_counter(struct perf_stat_config *config,
int idx;
/*
+ * Skip unsupported default events when not verbose. (default events
+ * are all marked 'skippable').
+ */
+ if (verbose == 0 && counter->skippable && !counter->supported)
+ return true;
+
+ /*
* Skip value 0 when enabling --per-thread globally,
* otherwise it will have too many 0 output.
*/
if (config->aggr_mode == AGGR_THREAD && config->system_wide)
return true;
- /* Tool events have the software PMU but are only gathered on 1. */
- if (evsel__is_tool(counter))
- return true;
+ /*
+ * Many tool events are only gathered on the first index, skip other
+ * zero values.
+ */
+ if (evsel__is_tool(counter)) {
+ struct aggr_cpu_id own_id =
+ config->aggr_get_id(config, (struct perf_cpu){ .cpu = 0 });
+
+ return !aggr_cpu_id__equal(id, &own_id);
+ }
/*
* Skip value 0 when it's an uncore event and the given aggr id
@@ -1237,7 +1320,8 @@ static void print_metric_headers(struct perf_stat_config *config,
/* Print metrics headers only */
evlist__for_each_entry(evlist, counter) {
- if (config->aggr_mode != AGGR_NONE && counter->metric_leader != counter)
+ if (!config->iostat_run &&
+ config->aggr_mode != AGGR_NONE && counter->metric_leader != counter)
continue;
os.evsel = counter;
@@ -1558,6 +1642,31 @@ static void print_cgroup_counter(struct perf_stat_config *config, struct evlist
print_metric_end(config, os);
}
+static void disable_uniquify(struct evlist *evlist)
+{
+ struct evsel *counter;
+ struct perf_pmu *last_pmu = NULL;
+ bool first = true;
+
+ evlist__for_each_entry(evlist, counter) {
+ /* If PMUs vary then uniquify can be useful. */
+ if (!first && counter->pmu != last_pmu)
+ return;
+ first = false;
+ if (counter->pmu) {
+ /* Allow uniquify for uncore PMUs. */
+ if (!counter->pmu->is_core)
+ return;
+ /* Keep hybrid event names uniquified for clarity. */
+ if (perf_pmus__num_core_pmus() > 1)
+ return;
+ }
+ }
+ evlist__for_each_entry_continue(evlist, counter) {
+ counter->uniquified_name = true;
+ }
+}
+
void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *config,
struct target *_target, struct timespec *ts,
int argc, const char **argv)
@@ -1571,6 +1680,8 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf
.first = true,
};
+ disable_uniquify(evlist);
+
if (config->iostat_run)
evlist->selected = evlist__first(evlist);
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index 6bb975e46de3..47718610d5d8 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -15,6 +15,7 @@
#include <linux/zalloc.h>
#include "iostat.h"
#include "util/hashmap.h"
+#include "tool_pmu.h"
struct stats walltime_nsecs_stats;
struct rusage_stats ru_stats;
@@ -76,7 +77,7 @@ void perf_stat__reset_shadow_stats(void)
memset(&ru_stats, 0, sizeof(ru_stats));
}
-static enum stat_type evsel__stat_type(const struct evsel *evsel)
+static enum stat_type evsel__stat_type(struct evsel *evsel)
{
/* Fake perf_hw_cache_op_id values for use with evsel__match. */
u64 PERF_COUNT_hw_cache_l1d_miss = PERF_COUNT_HW_CACHE_L1D |
@@ -136,23 +137,19 @@ static enum stat_type evsel__stat_type(const struct evsel *evsel)
return STAT_NONE;
}
-static const char *get_ratio_color(const double ratios[3], double val)
+static enum metric_threshold_classify get_ratio_thresh(const double ratios[3], double val)
{
- const char *color = PERF_COLOR_NORMAL;
+ assert(ratios[0] > ratios[1]);
+ assert(ratios[1] > ratios[2]);
- if (val > ratios[0])
- color = PERF_COLOR_RED;
- else if (val > ratios[1])
- color = PERF_COLOR_MAGENTA;
- else if (val > ratios[2])
- color = PERF_COLOR_YELLOW;
-
- return color;
+ return val > ratios[1]
+ ? (val > ratios[0] ? METRIC_THRESHOLD_BAD : METRIC_THRESHOLD_NEARLY_BAD)
+ : (val > ratios[2] ? METRIC_THRESHOLD_LESS_GOOD : METRIC_THRESHOLD_GOOD);
}
static double find_stat(const struct evsel *evsel, int aggr_idx, enum stat_type type)
{
- const struct evsel *cur;
+ struct evsel *cur;
int evsel_ctx = evsel_context(evsel);
evlist__for_each_entry(evsel->evlist, cur) {
@@ -195,17 +192,21 @@ static void print_ratio(struct perf_stat_config *config,
const struct evsel *evsel, int aggr_idx,
double numerator, struct perf_stat_output_ctx *out,
enum stat_type denominator_type,
- const double color_ratios[3], const char *unit)
+ const double thresh_ratios[3], const char *_unit)
{
double denominator = find_stat(evsel, aggr_idx, denominator_type);
+ double ratio = 0;
+ enum metric_threshold_classify thresh = METRIC_THRESHOLD_UNKNOWN;
+ const char *fmt = NULL;
+ const char *unit = NULL;
if (numerator && denominator) {
- double ratio = numerator / denominator * 100.0;
- const char *color = get_ratio_color(color_ratios, ratio);
-
- out->print_metric(config, out->ctx, color, "%7.2f%%", unit, ratio);
- } else
- out->print_metric(config, out->ctx, NULL, NULL, unit, 0);
+ ratio = numerator / denominator * 100.0;
+ thresh = get_ratio_thresh(thresh_ratios, ratio);
+ fmt = "%7.2f%%";
+ unit = _unit;
+ }
+ out->print_metric(config, out->ctx, thresh, fmt, unit, ratio);
}
static void print_stalled_cycles_front(struct perf_stat_config *config,
@@ -213,9 +214,9 @@ static void print_stalled_cycles_front(struct perf_stat_config *config,
int aggr_idx, double stalled,
struct perf_stat_output_ctx *out)
{
- static const double color_ratios[3] = {50.0, 30.0, 10.0};
+ const double thresh_ratios[3] = {50.0, 30.0, 10.0};
- print_ratio(config, evsel, aggr_idx, stalled, out, STAT_CYCLES, color_ratios,
+ print_ratio(config, evsel, aggr_idx, stalled, out, STAT_CYCLES, thresh_ratios,
"frontend cycles idle");
}
@@ -224,9 +225,9 @@ static void print_stalled_cycles_back(struct perf_stat_config *config,
int aggr_idx, double stalled,
struct perf_stat_output_ctx *out)
{
- static const double color_ratios[3] = {75.0, 50.0, 20.0};
+ const double thresh_ratios[3] = {75.0, 50.0, 20.0};
- print_ratio(config, evsel, aggr_idx, stalled, out, STAT_CYCLES, color_ratios,
+ print_ratio(config, evsel, aggr_idx, stalled, out, STAT_CYCLES, thresh_ratios,
"backend cycles idle");
}
@@ -235,9 +236,9 @@ static void print_branch_miss(struct perf_stat_config *config,
int aggr_idx, double misses,
struct perf_stat_output_ctx *out)
{
- static const double color_ratios[3] = {20.0, 10.0, 5.0};
+ const double thresh_ratios[3] = {20.0, 10.0, 5.0};
- print_ratio(config, evsel, aggr_idx, misses, out, STAT_BRANCHES, color_ratios,
+ print_ratio(config, evsel, aggr_idx, misses, out, STAT_BRANCHES, thresh_ratios,
"of all branches");
}
@@ -246,9 +247,9 @@ static void print_l1d_miss(struct perf_stat_config *config,
int aggr_idx, double misses,
struct perf_stat_output_ctx *out)
{
- static const double color_ratios[3] = {20.0, 10.0, 5.0};
+ const double thresh_ratios[3] = {20.0, 10.0, 5.0};
- print_ratio(config, evsel, aggr_idx, misses, out, STAT_L1_DCACHE, color_ratios,
+ print_ratio(config, evsel, aggr_idx, misses, out, STAT_L1_DCACHE, thresh_ratios,
"of all L1-dcache accesses");
}
@@ -257,9 +258,9 @@ static void print_l1i_miss(struct perf_stat_config *config,
int aggr_idx, double misses,
struct perf_stat_output_ctx *out)
{
- static const double color_ratios[3] = {20.0, 10.0, 5.0};
+ const double thresh_ratios[3] = {20.0, 10.0, 5.0};
- print_ratio(config, evsel, aggr_idx, misses, out, STAT_L1_ICACHE, color_ratios,
+ print_ratio(config, evsel, aggr_idx, misses, out, STAT_L1_ICACHE, thresh_ratios,
"of all L1-icache accesses");
}
@@ -268,9 +269,9 @@ static void print_ll_miss(struct perf_stat_config *config,
int aggr_idx, double misses,
struct perf_stat_output_ctx *out)
{
- static const double color_ratios[3] = {20.0, 10.0, 5.0};
+ const double thresh_ratios[3] = {20.0, 10.0, 5.0};
- print_ratio(config, evsel, aggr_idx, misses, out, STAT_LL_CACHE, color_ratios,
+ print_ratio(config, evsel, aggr_idx, misses, out, STAT_LL_CACHE, thresh_ratios,
"of all LL-cache accesses");
}
@@ -279,9 +280,9 @@ static void print_dtlb_miss(struct perf_stat_config *config,
int aggr_idx, double misses,
struct perf_stat_output_ctx *out)
{
- static const double color_ratios[3] = {20.0, 10.0, 5.0};
+ const double thresh_ratios[3] = {20.0, 10.0, 5.0};
- print_ratio(config, evsel, aggr_idx, misses, out, STAT_DTLB_CACHE, color_ratios,
+ print_ratio(config, evsel, aggr_idx, misses, out, STAT_DTLB_CACHE, thresh_ratios,
"of all dTLB cache accesses");
}
@@ -290,9 +291,9 @@ static void print_itlb_miss(struct perf_stat_config *config,
int aggr_idx, double misses,
struct perf_stat_output_ctx *out)
{
- static const double color_ratios[3] = {20.0, 10.0, 5.0};
+ const double thresh_ratios[3] = {20.0, 10.0, 5.0};
- print_ratio(config, evsel, aggr_idx, misses, out, STAT_ITLB_CACHE, color_ratios,
+ print_ratio(config, evsel, aggr_idx, misses, out, STAT_ITLB_CACHE, thresh_ratios,
"of all iTLB cache accesses");
}
@@ -301,9 +302,9 @@ static void print_cache_miss(struct perf_stat_config *config,
int aggr_idx, double misses,
struct perf_stat_output_ctx *out)
{
- static const double color_ratios[3] = {20.0, 10.0, 5.0};
+ const double thresh_ratios[3] = {20.0, 10.0, 5.0};
- print_ratio(config, evsel, aggr_idx, misses, out, STAT_CACHE_REFS, color_ratios,
+ print_ratio(config, evsel, aggr_idx, misses, out, STAT_CACHE_REFS, thresh_ratios,
"of all cache refs");
}
@@ -319,15 +320,16 @@ static void print_instructions(struct perf_stat_config *config,
find_stat(evsel, aggr_idx, STAT_STALLED_CYCLES_BACK));
if (cycles) {
- print_metric(config, ctxp, NULL, "%7.2f ", "insn per cycle",
- instructions / cycles);
- } else
- print_metric(config, ctxp, NULL, NULL, "insn per cycle", 0);
-
+ print_metric(config, ctxp, METRIC_THRESHOLD_UNKNOWN, "%7.2f ",
+ "insn per cycle", instructions / cycles);
+ } else {
+ print_metric(config, ctxp, METRIC_THRESHOLD_UNKNOWN, /*fmt=*/NULL,
+ "insn per cycle", 0);
+ }
if (max_stalled && instructions) {
out->new_line(config, ctxp);
- print_metric(config, ctxp, NULL, "%7.2f ", "stalled cycles per insn",
- max_stalled / instructions);
+ print_metric(config, ctxp, METRIC_THRESHOLD_UNKNOWN, "%7.2f ",
+ "stalled cycles per insn", max_stalled / instructions);
}
}
@@ -341,9 +343,12 @@ static void print_cycles(struct perf_stat_config *config,
if (cycles && nsecs) {
double ratio = cycles / nsecs;
- out->print_metric(config, out->ctx, NULL, "%8.3f", "GHz", ratio);
- } else
- out->print_metric(config, out->ctx, NULL, NULL, "GHz", 0);
+ out->print_metric(config, out->ctx, METRIC_THRESHOLD_UNKNOWN, "%8.3f",
+ "GHz", ratio);
+ } else {
+ out->print_metric(config, out->ctx, METRIC_THRESHOLD_UNKNOWN, /*fmt=*/NULL,
+ "GHz", 0);
+ }
}
static void print_nsecs(struct perf_stat_config *config,
@@ -356,10 +361,12 @@ static void print_nsecs(struct perf_stat_config *config,
double wall_time = avg_stats(&walltime_nsecs_stats);
if (wall_time) {
- print_metric(config, ctxp, NULL, "%8.3f", "CPUs utilized",
+ print_metric(config, ctxp, METRIC_THRESHOLD_UNKNOWN, "%8.3f", "CPUs utilized",
nsecs / (wall_time * evsel->scale));
- } else
- print_metric(config, ctxp, NULL, NULL, "CPUs utilized", 0);
+ } else {
+ print_metric(config, ctxp, METRIC_THRESHOLD_UNKNOWN, /*fmt=*/NULL,
+ "CPUs utilized", 0);
+ }
}
static int prepare_metric(const struct metric_expr *mexp,
@@ -380,27 +387,36 @@ static int prepare_metric(const struct metric_expr *mexp,
struct stats *stats;
double scale;
- switch (metric_events[i]->tool_event) {
- case PERF_TOOL_DURATION_TIME:
+ switch (evsel__tool_event(metric_events[i])) {
+ case TOOL_PMU__EVENT_DURATION_TIME:
stats = &walltime_nsecs_stats;
scale = 1e-9;
break;
- case PERF_TOOL_USER_TIME:
+ case TOOL_PMU__EVENT_USER_TIME:
stats = &ru_stats.ru_utime_usec_stat;
scale = 1e-6;
break;
- case PERF_TOOL_SYSTEM_TIME:
+ case TOOL_PMU__EVENT_SYSTEM_TIME:
stats = &ru_stats.ru_stime_usec_stat;
scale = 1e-6;
break;
- case PERF_TOOL_NONE:
+ case TOOL_PMU__EVENT_NONE:
pr_err("Invalid tool event 'none'");
abort();
- case PERF_TOOL_MAX:
+ case TOOL_PMU__EVENT_MAX:
pr_err("Invalid tool event 'max'");
abort();
+ case TOOL_PMU__EVENT_HAS_PMEM:
+ case TOOL_PMU__EVENT_NUM_CORES:
+ case TOOL_PMU__EVENT_NUM_CPUS:
+ case TOOL_PMU__EVENT_NUM_CPUS_ONLINE:
+ case TOOL_PMU__EVENT_NUM_DIES:
+ case TOOL_PMU__EVENT_NUM_PACKAGES:
+ case TOOL_PMU__EVENT_SLOTS:
+ case TOOL_PMU__EVENT_SMT_ON:
+ case TOOL_PMU__EVENT_SYSTEM_TSC_FREQ:
default:
- pr_err("Unknown tool event '%s'", evsel__name(metric_events[i]));
+ pr_err("Unexpected tool event '%s'", evsel__name(metric_events[i]));
abort();
}
val = avg_stats(stats) * scale;
@@ -483,7 +499,7 @@ static void generic_metric(struct perf_stat_config *config,
double ratio, scale, threshold;
int i;
void *ctxp = out->ctx;
- const char *color = NULL;
+ enum metric_threshold_classify thresh = METRIC_THRESHOLD_UNKNOWN;
pctx = expr__ctx_new();
if (!pctx)
@@ -501,13 +517,13 @@ static void generic_metric(struct perf_stat_config *config,
if (!metric_events[i]) {
if (expr__parse(&ratio, pctx, metric_expr) == 0) {
char *unit;
- char metric_bf[64];
+ char metric_bf[128];
if (metric_threshold &&
expr__parse(&threshold, pctx, metric_threshold) == 0 &&
!isnan(threshold)) {
- color = fpclassify(threshold) == FP_ZERO
- ? PERF_COLOR_GREEN : PERF_COLOR_RED;
+ thresh = fpclassify(threshold) == FP_ZERO
+ ? METRIC_THRESHOLD_GOOD : METRIC_THRESHOLD_BAD;
}
if (metric_unit && metric_name) {
@@ -522,22 +538,22 @@ static void generic_metric(struct perf_stat_config *config,
scnprintf(metric_bf, sizeof(metric_bf),
"%s %s", unit, metric_name);
- print_metric(config, ctxp, color, "%8.1f",
+ print_metric(config, ctxp, thresh, "%8.1f",
metric_bf, ratio);
} else {
- print_metric(config, ctxp, color, "%8.2f",
+ print_metric(config, ctxp, thresh, "%8.2f",
metric_name ?
metric_name :
out->force_header ? evsel->name : "",
ratio);
}
} else {
- print_metric(config, ctxp, color, /*unit=*/NULL,
+ print_metric(config, ctxp, thresh, /*fmt=*/NULL,
out->force_header ?
(metric_name ?: evsel->name) : "", 0);
}
} else {
- print_metric(config, ctxp, color, /*unit=*/NULL,
+ print_metric(config, ctxp, thresh, /*fmt=*/NULL,
out->force_header ?
(metric_name ?: evsel->name) : "", 0);
}
@@ -573,7 +589,7 @@ static void perf_stat__print_metricgroup_header(struct perf_stat_config *config,
{
bool need_full_name = perf_pmus__num_core_pmus() > 1;
static const char *last_name;
- static const char *last_pmu;
+ static const struct perf_pmu *last_pmu;
char full_name[64];
/*
@@ -584,21 +600,21 @@ static void perf_stat__print_metricgroup_header(struct perf_stat_config *config,
* different metric events.
*/
if (last_name && !strcmp(last_name, name)) {
- if (!need_full_name || !strcmp(last_pmu, evsel->pmu_name)) {
+ if (!need_full_name || last_pmu != evsel->pmu) {
out->print_metricgroup_header(config, ctxp, NULL);
return;
}
}
- if (need_full_name)
- scnprintf(full_name, sizeof(full_name), "%s (%s)", name, evsel->pmu_name);
+ if (need_full_name && evsel->pmu)
+ scnprintf(full_name, sizeof(full_name), "%s (%s)", name, evsel->pmu->name);
else
scnprintf(full_name, sizeof(full_name), "%s", name);
out->print_metricgroup_header(config, ctxp, full_name);
last_name = name;
- last_pmu = evsel->pmu_name;
+ last_pmu = evsel->pmu;
}
/**
@@ -708,17 +724,21 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
if (unit != ' ')
snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit);
- print_metric(config, ctxp, NULL, "%8.3f", unit_buf, ratio);
- } else
+ print_metric(config, ctxp, METRIC_THRESHOLD_UNKNOWN, "%8.3f",
+ unit_buf, ratio);
+ } else {
num = 0;
+ }
}
}
perf_stat__print_shadow_stats_metricgroup(config, evsel, aggr_idx,
&num, NULL, out, metric_events);
- if (num == 0)
- print_metric(config, ctxp, NULL, NULL, NULL, 0);
+ if (num == 0) {
+ print_metric(config, ctxp, METRIC_THRESHOLD_UNKNOWN,
+ /*fmt=*/NULL, /*unit=*/NULL, 0);
+ }
}
/**
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index 0bd5467389e4..7c2ccdcc3fdb 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -553,7 +553,7 @@ static bool evsel__is_alias(struct evsel *evsel_a, struct evsel *evsel_b)
if (evsel__is_clock(evsel_a) != evsel__is_clock(evsel_b))
return false;
- return !!strcmp(evsel_a->pmu_name, evsel_b->pmu_name);
+ return evsel_a->pmu != evsel_b->pmu;
}
static void evsel__merge_aliases(struct evsel *evsel)
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index fd7a187551bd..6f8cff3cd39a 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -154,9 +154,21 @@ struct evlist;
extern struct stats walltime_nsecs_stats;
extern struct rusage_stats ru_stats;
+enum metric_threshold_classify {
+ METRIC_THRESHOLD_UNKNOWN,
+ METRIC_THRESHOLD_BAD,
+ METRIC_THRESHOLD_NEARLY_BAD,
+ METRIC_THRESHOLD_LESS_GOOD,
+ METRIC_THRESHOLD_GOOD,
+};
+const char *metric_threshold_classify__color(enum metric_threshold_classify thresh);
+
typedef void (*print_metric_t)(struct perf_stat_config *config,
- void *ctx, const char *color, const char *unit,
- const char *fmt, double val);
+ void *ctx,
+ enum metric_threshold_classify thresh,
+ const char *fmt,
+ const char *unit,
+ double val);
typedef void (*new_line_t)(struct perf_stat_config *config, void *ctx);
/* Used to print the display name of the Default metricgroup for now. */
diff --git a/tools/perf/util/string.c b/tools/perf/util/string.c
index 116a642ad99d..308fc7ec88cc 100644
--- a/tools/perf/util/string.c
+++ b/tools/perf/util/string.c
@@ -263,6 +263,34 @@ char *strpbrk_esc(char *str, const char *stopset)
return ptr;
}
+/* Like strpbrk_esc(), but not break if it is quoted with single/double quotes */
+char *strpbrk_esq(char *str, const char *stopset)
+{
+ char *_stopset = NULL;
+ char *ptr;
+ const char *squote = "'";
+ const char *dquote = "\"";
+
+ if (asprintf(&_stopset, "%s%c%c", stopset, *squote, *dquote) < 0)
+ return NULL;
+
+ do {
+ ptr = strpbrk_esc(str, _stopset);
+ if (!ptr)
+ break;
+ if (*ptr == *squote)
+ ptr = strpbrk_esc(ptr + 1, squote);
+ else if (*ptr == *dquote)
+ ptr = strpbrk_esc(ptr + 1, dquote);
+ else
+ break;
+ str = ptr + 1;
+ } while (ptr);
+
+ free(_stopset);
+ return ptr;
+}
+
/* Like strdup, but do not copy a single backslash */
char *strdup_esc(const char *str)
{
@@ -293,6 +321,78 @@ char *strdup_esc(const char *str)
return ret;
}
+/* Remove backslash right before quote and return next quote address. */
+static char *remove_consumed_esc(char *str, int len, int quote)
+{
+ char *ptr = str, *end = str + len;
+
+ while (*ptr != quote && ptr < end) {
+ if (*ptr == '\\' && *(ptr + 1) == quote) {
+ memmove(ptr, ptr + 1, end - (ptr + 1));
+ /* now *ptr is `quote`. */
+ end--;
+ }
+ ptr++;
+ }
+
+ return *ptr == quote ? ptr : NULL;
+}
+
+/*
+ * Like strdup_esc, but keep quoted string as it is (and single backslash
+ * before quote is removed). If there is no closed quote, return NULL.
+ */
+char *strdup_esq(const char *str)
+{
+ char *d, *ret;
+
+ /* If there is no quote, return normal strdup_esc() */
+ d = strpbrk_esc((char *)str, "\"'");
+ if (!d)
+ return strdup_esc(str);
+
+ ret = strdup(str);
+ if (!ret)
+ return NULL;
+
+ d = ret;
+ do {
+ d = strpbrk(d, "\\\"\'");
+ if (!d)
+ break;
+
+ if (*d == '"' || *d == '\'') {
+ /* This is non-escaped quote */
+ int quote = *d;
+ int len = strlen(d + 1) + 1;
+
+ /*
+ * Remove the start quote and remove consumed escape (backslash
+ * before quote) and remove the end quote. If there is no end
+ * quote, it is the input error.
+ */
+ memmove(d, d + 1, len);
+ d = remove_consumed_esc(d, len, quote);
+ if (!d)
+ goto error;
+ memmove(d, d + 1, strlen(d + 1) + 1);
+ }
+ if (*d == '\\') {
+ memmove(d, d + 1, strlen(d + 1) + 1);
+ if (*d == '\\') {
+ /* double backslash -- keep the second one. */
+ d++;
+ }
+ }
+ } while (*d != '\0');
+
+ return ret;
+
+error:
+ free(ret);
+ return NULL;
+}
+
unsigned int hex(char c)
{
if (c >= '0' && c <= '9')
diff --git a/tools/perf/util/string2.h b/tools/perf/util/string2.h
index 52cb8ba057c7..4c8bff47cfd3 100644
--- a/tools/perf/util/string2.h
+++ b/tools/perf/util/string2.h
@@ -37,6 +37,8 @@ char *asprintf__tp_filter_pids(size_t npids, pid_t *pids);
char *strpbrk_esc(char *str, const char *stopset);
char *strdup_esc(const char *str);
+char *strpbrk_esq(char *str, const char *stopset);
+char *strdup_esq(const char *str);
unsigned int hex(char c);
char *strreplace_chars(char needle, const char *haystack, const char *replace);
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 19eb623e0826..0037f1163919 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -257,7 +257,7 @@ void symbols__fixup_end(struct rb_root_cached *symbols, bool is_kallsyms)
* like in:
* ffffffffc1937000 T hdmi_driver_init [snd_hda_codec_hdmi]
*/
- if (prev->end == prev->start) {
+ if (prev->end == prev->start && prev->type != STT_NOTYPE) {
const char *prev_mod;
const char *curr_mod;
@@ -1931,6 +1931,9 @@ int dso__load(struct dso *dso, struct map *map)
if (next_slot) {
ss_pos++;
+ if (dso__binary_type(dso) == DSO_BINARY_TYPE__NOT_FOUND)
+ dso__set_binary_type(dso, symtab_type);
+
if (syms_ss && runtime_ss)
break;
} else {
@@ -2425,14 +2428,14 @@ static bool symbol__read_kptr_restrict(void)
{
bool value = false;
FILE *fp = fopen("/proc/sys/kernel/kptr_restrict", "r");
+ bool used_root;
+ bool cap_syslog = perf_cap__capable(CAP_SYSLOG, &used_root);
if (fp != NULL) {
char line[8];
if (fgets(line, sizeof(line), fp) != NULL)
- value = perf_cap__capable(CAP_SYSLOG) ?
- (atoi(line) >= 2) :
- (atoi(line) != 0);
+ value = cap_syslog ? (atoi(line) >= 2) : (atoi(line) != 0);
fclose(fp);
}
@@ -2440,7 +2443,7 @@ static bool symbol__read_kptr_restrict(void)
/* Per kernel/kallsyms.c:
* we also restrict when perf_event_paranoid > 1 w/o CAP_SYSLOG
*/
- if (perf_event_paranoid() > 1 && !perf_cap__capable(CAP_SYSLOG))
+ if (perf_event_paranoid() > 1 && !cap_syslog)
value = true;
return value;
diff --git a/tools/perf/util/symbol_conf.h b/tools/perf/util/symbol_conf.h
index 657cfa5af43c..a9c51acc722f 100644
--- a/tools/perf/util/symbol_conf.h
+++ b/tools/perf/util/symbol_conf.h
@@ -64,7 +64,7 @@ struct symbol_conf {
*sym_list_str,
*col_width_list_str,
*bt_stop_list_str;
- char *addr2line_path;
+ const char *addr2line_path;
unsigned long time_quantum;
struct strlist *dso_list,
*comm_list,
diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c
index 5498048f56ea..a58444c4aed1 100644
--- a/tools/perf/util/synthetic-events.c
+++ b/tools/perf/util/synthetic-events.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0-only
+// SPDX-License-Identifier: GPL-2.0-only
#include "util/cgroup.h"
#include "util/data.h"
@@ -47,7 +47,7 @@
unsigned int proc_map_timeout = DEFAULT_PROC_MAP_PARSE_TIMEOUT;
-int perf_tool__process_synth_event(struct perf_tool *tool,
+int perf_tool__process_synth_event(const struct perf_tool *tool,
union perf_event *event,
struct machine *machine,
perf_event__handler_t process)
@@ -187,7 +187,7 @@ static int perf_event__prepare_comm(union perf_event *event, pid_t pid, pid_t ti
return 0;
}
-pid_t perf_event__synthesize_comm(struct perf_tool *tool,
+pid_t perf_event__synthesize_comm(const struct perf_tool *tool,
union perf_event *event, pid_t pid,
perf_event__handler_t process,
struct machine *machine)
@@ -218,7 +218,7 @@ static void perf_event__get_ns_link_info(pid_t pid, const char *ns,
}
}
-int perf_event__synthesize_namespaces(struct perf_tool *tool,
+int perf_event__synthesize_namespaces(const struct perf_tool *tool,
union perf_event *event,
pid_t pid, pid_t tgid,
perf_event__handler_t process,
@@ -257,7 +257,7 @@ int perf_event__synthesize_namespaces(struct perf_tool *tool,
return 0;
}
-static int perf_event__synthesize_fork(struct perf_tool *tool,
+static int perf_event__synthesize_fork(const struct perf_tool *tool,
union perf_event *event,
pid_t pid, pid_t tgid, pid_t ppid,
perf_event__handler_t process,
@@ -418,7 +418,7 @@ out:
dso__put(dso);
}
-int perf_event__synthesize_mmap_events(struct perf_tool *tool,
+int perf_event__synthesize_mmap_events(const struct perf_tool *tool,
union perf_event *event,
pid_t pid, pid_t tgid,
perf_event__handler_t process,
@@ -542,7 +542,7 @@ out:
}
#ifdef HAVE_FILE_HANDLE
-static int perf_event__synthesize_cgroup(struct perf_tool *tool,
+static int perf_event__synthesize_cgroup(const struct perf_tool *tool,
union perf_event *event,
char *path, size_t mount_len,
perf_event__handler_t process,
@@ -582,7 +582,7 @@ static int perf_event__synthesize_cgroup(struct perf_tool *tool,
return 0;
}
-static int perf_event__walk_cgroup_tree(struct perf_tool *tool,
+static int perf_event__walk_cgroup_tree(const struct perf_tool *tool,
union perf_event *event,
char *path, size_t mount_len,
perf_event__handler_t process,
@@ -630,7 +630,7 @@ static int perf_event__walk_cgroup_tree(struct perf_tool *tool,
return ret;
}
-int perf_event__synthesize_cgroups(struct perf_tool *tool,
+int perf_event__synthesize_cgroups(const struct perf_tool *tool,
perf_event__handler_t process,
struct machine *machine)
{
@@ -657,7 +657,7 @@ int perf_event__synthesize_cgroups(struct perf_tool *tool,
return 0;
}
#else
-int perf_event__synthesize_cgroups(struct perf_tool *tool __maybe_unused,
+int perf_event__synthesize_cgroups(const struct perf_tool *tool __maybe_unused,
perf_event__handler_t process __maybe_unused,
struct machine *machine __maybe_unused)
{
@@ -666,7 +666,7 @@ int perf_event__synthesize_cgroups(struct perf_tool *tool __maybe_unused,
#endif
struct perf_event__synthesize_modules_maps_cb_args {
- struct perf_tool *tool;
+ const struct perf_tool *tool;
perf_event__handler_t process;
struct machine *machine;
union perf_event *event;
@@ -717,7 +717,7 @@ static int perf_event__synthesize_modules_maps_cb(struct map *map, void *data)
return 0;
}
-int perf_event__synthesize_modules(struct perf_tool *tool, perf_event__handler_t process,
+int perf_event__synthesize_modules(const struct perf_tool *tool, perf_event__handler_t process,
struct machine *machine)
{
int rc;
@@ -763,7 +763,7 @@ static int __event__synthesize_thread(union perf_event *comm_event,
union perf_event *fork_event,
union perf_event *namespaces_event,
pid_t pid, int full, perf_event__handler_t process,
- struct perf_tool *tool, struct machine *machine,
+ const struct perf_tool *tool, struct machine *machine,
bool needs_mmap, bool mmap_data)
{
char filename[PATH_MAX];
@@ -852,7 +852,7 @@ static int __event__synthesize_thread(union perf_event *comm_event,
return rc;
}
-int perf_event__synthesize_thread_map(struct perf_tool *tool,
+int perf_event__synthesize_thread_map(const struct perf_tool *tool,
struct perf_thread_map *threads,
perf_event__handler_t process,
struct machine *machine,
@@ -929,7 +929,7 @@ out:
return err;
}
-static int __perf_event__synthesize_threads(struct perf_tool *tool,
+static int __perf_event__synthesize_threads(const struct perf_tool *tool,
perf_event__handler_t process,
struct machine *machine,
bool needs_mmap,
@@ -993,7 +993,7 @@ out:
}
struct synthesize_threads_arg {
- struct perf_tool *tool;
+ const struct perf_tool *tool;
perf_event__handler_t process;
struct machine *machine;
bool needs_mmap;
@@ -1015,7 +1015,7 @@ static void *synthesize_threads_worker(void *arg)
return NULL;
}
-int perf_event__synthesize_threads(struct perf_tool *tool,
+int perf_event__synthesize_threads(const struct perf_tool *tool,
perf_event__handler_t process,
struct machine *machine,
bool needs_mmap, bool mmap_data,
@@ -1104,14 +1104,14 @@ free_dirent:
return err;
}
-int __weak perf_event__synthesize_extra_kmaps(struct perf_tool *tool __maybe_unused,
+int __weak perf_event__synthesize_extra_kmaps(const struct perf_tool *tool __maybe_unused,
perf_event__handler_t process __maybe_unused,
struct machine *machine __maybe_unused)
{
return 0;
}
-static int __perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
+static int __perf_event__synthesize_kernel_mmap(const struct perf_tool *tool,
perf_event__handler_t process,
struct machine *machine)
{
@@ -1183,7 +1183,7 @@ static int __perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
return err;
}
-int perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
+int perf_event__synthesize_kernel_mmap(const struct perf_tool *tool,
perf_event__handler_t process,
struct machine *machine)
{
@@ -1196,7 +1196,7 @@ int perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
return perf_event__synthesize_extra_kmaps(tool, process, machine);
}
-int perf_event__synthesize_thread_map2(struct perf_tool *tool,
+int perf_event__synthesize_thread_map2(const struct perf_tool *tool,
struct perf_thread_map *threads,
perf_event__handler_t process,
struct machine *machine)
@@ -1346,7 +1346,7 @@ static struct perf_record_cpu_map *cpu_map_event__new(const struct perf_cpu_map
}
-int perf_event__synthesize_cpu_map(struct perf_tool *tool,
+int perf_event__synthesize_cpu_map(const struct perf_tool *tool,
const struct perf_cpu_map *map,
perf_event__handler_t process,
struct machine *machine)
@@ -1364,7 +1364,7 @@ int perf_event__synthesize_cpu_map(struct perf_tool *tool,
return err;
}
-int perf_event__synthesize_stat_config(struct perf_tool *tool,
+int perf_event__synthesize_stat_config(const struct perf_tool *tool,
struct perf_stat_config *config,
perf_event__handler_t process,
struct machine *machine)
@@ -1403,7 +1403,7 @@ int perf_event__synthesize_stat_config(struct perf_tool *tool,
return err;
}
-int perf_event__synthesize_stat(struct perf_tool *tool,
+int perf_event__synthesize_stat(const struct perf_tool *tool,
struct perf_cpu cpu, u32 thread, u64 id,
struct perf_counts_values *count,
perf_event__handler_t process,
@@ -1425,7 +1425,7 @@ int perf_event__synthesize_stat(struct perf_tool *tool,
return process(tool, (union perf_event *) &event, NULL, machine);
}
-int perf_event__synthesize_stat_round(struct perf_tool *tool,
+int perf_event__synthesize_stat_round(const struct perf_tool *tool,
u64 evtime, u64 type,
perf_event__handler_t process,
struct machine *machine)
@@ -1826,7 +1826,7 @@ int perf_event__synthesize_id_sample(__u64 *array, u64 type, const struct perf_s
return (void *)array - (void *)start;
}
-int __perf_event__synthesize_id_index(struct perf_tool *tool, perf_event__handler_t process,
+int __perf_event__synthesize_id_index(const struct perf_tool *tool, perf_event__handler_t process,
struct evlist *evlist, struct machine *machine, size_t from)
{
union perf_event *ev;
@@ -1918,13 +1918,13 @@ out_err:
return err;
}
-int perf_event__synthesize_id_index(struct perf_tool *tool, perf_event__handler_t process,
+int perf_event__synthesize_id_index(const struct perf_tool *tool, perf_event__handler_t process,
struct evlist *evlist, struct machine *machine)
{
return __perf_event__synthesize_id_index(tool, process, evlist, machine, 0);
}
-int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool,
+int __machine__synthesize_threads(struct machine *machine, const struct perf_tool *tool,
struct target *target, struct perf_thread_map *threads,
perf_event__handler_t process, bool needs_mmap,
bool data_mmap, unsigned int nr_threads_synthesize)
@@ -1985,7 +1985,7 @@ static struct perf_record_event_update *event_update_event__new(size_t size, u64
return ev;
}
-int perf_event__synthesize_event_update_unit(struct perf_tool *tool, struct evsel *evsel,
+int perf_event__synthesize_event_update_unit(const struct perf_tool *tool, struct evsel *evsel,
perf_event__handler_t process)
{
size_t size = strlen(evsel->unit);
@@ -2002,7 +2002,7 @@ int perf_event__synthesize_event_update_unit(struct perf_tool *tool, struct evse
return err;
}
-int perf_event__synthesize_event_update_scale(struct perf_tool *tool, struct evsel *evsel,
+int perf_event__synthesize_event_update_scale(const struct perf_tool *tool, struct evsel *evsel,
perf_event__handler_t process)
{
struct perf_record_event_update *ev;
@@ -2019,7 +2019,7 @@ int perf_event__synthesize_event_update_scale(struct perf_tool *tool, struct evs
return err;
}
-int perf_event__synthesize_event_update_name(struct perf_tool *tool, struct evsel *evsel,
+int perf_event__synthesize_event_update_name(const struct perf_tool *tool, struct evsel *evsel,
perf_event__handler_t process)
{
struct perf_record_event_update *ev;
@@ -2036,7 +2036,7 @@ int perf_event__synthesize_event_update_name(struct perf_tool *tool, struct evse
return err;
}
-int perf_event__synthesize_event_update_cpus(struct perf_tool *tool, struct evsel *evsel,
+int perf_event__synthesize_event_update_cpus(const struct perf_tool *tool, struct evsel *evsel,
perf_event__handler_t process)
{
struct synthesize_cpu_map_data syn_data = { .map = evsel->core.own_cpus };
@@ -2059,7 +2059,7 @@ int perf_event__synthesize_event_update_cpus(struct perf_tool *tool, struct evse
return err;
}
-int perf_event__synthesize_attrs(struct perf_tool *tool, struct evlist *evlist,
+int perf_event__synthesize_attrs(const struct perf_tool *tool, struct evlist *evlist,
perf_event__handler_t process)
{
struct evsel *evsel;
@@ -2087,7 +2087,7 @@ static bool has_scale(struct evsel *evsel)
return evsel->scale != 1;
}
-int perf_event__synthesize_extra_attr(struct perf_tool *tool, struct evlist *evsel_list,
+int perf_event__synthesize_extra_attr(const struct perf_tool *tool, struct evlist *evsel_list,
perf_event__handler_t process, bool is_pipe)
{
struct evsel *evsel;
@@ -2143,7 +2143,7 @@ int perf_event__synthesize_extra_attr(struct perf_tool *tool, struct evlist *evs
return 0;
}
-int perf_event__synthesize_attr(struct perf_tool *tool, struct perf_event_attr *attr,
+int perf_event__synthesize_attr(const struct perf_tool *tool, struct perf_event_attr *attr,
u32 ids, u64 *id, perf_event__handler_t process)
{
union perf_event *ev;
@@ -2177,7 +2177,7 @@ int perf_event__synthesize_attr(struct perf_tool *tool, struct perf_event_attr *
}
#ifdef HAVE_LIBTRACEEVENT
-int perf_event__synthesize_tracing_data(struct perf_tool *tool, int fd, struct evlist *evlist,
+int perf_event__synthesize_tracing_data(const struct perf_tool *tool, int fd, struct evlist *evlist,
perf_event__handler_t process)
{
union perf_event ev;
@@ -2200,7 +2200,7 @@ int perf_event__synthesize_tracing_data(struct perf_tool *tool, int fd, struct e
if (!tdata)
return -1;
- memset(&ev, 0, sizeof(ev));
+ memset(&ev, 0, sizeof(ev.tracing_data));
ev.tracing_data.header.type = PERF_RECORD_HEADER_TRACING_DATA;
size = tdata->size;
@@ -2225,31 +2225,108 @@ int perf_event__synthesize_tracing_data(struct perf_tool *tool, int fd, struct e
}
#endif
-int perf_event__synthesize_build_id(struct perf_tool *tool, struct dso *pos, u16 misc,
- perf_event__handler_t process, struct machine *machine)
+int perf_event__synthesize_build_id(const struct perf_tool *tool,
+ struct perf_sample *sample,
+ struct machine *machine,
+ perf_event__handler_t process,
+ const struct evsel *evsel,
+ __u16 misc,
+ const struct build_id *bid,
+ const char *filename)
{
union perf_event ev;
size_t len;
- if (!dso__hit(pos))
- return 0;
+ len = sizeof(ev.build_id) + strlen(filename) + 1;
+ len = PERF_ALIGN(len, sizeof(u64));
- memset(&ev, 0, sizeof(ev));
+ memset(&ev, 0, len);
- len = dso__long_name_len(pos) + 1;
- len = PERF_ALIGN(len, NAME_ALIGN);
- ev.build_id.size = min(dso__bid(pos)->size, sizeof(dso__bid(pos)->data));
- memcpy(&ev.build_id.build_id, dso__bid(pos)->data, ev.build_id.size);
+ ev.build_id.size = min(bid->size, sizeof(ev.build_id.build_id));
+ memcpy(ev.build_id.build_id, bid->data, ev.build_id.size);
ev.build_id.header.type = PERF_RECORD_HEADER_BUILD_ID;
ev.build_id.header.misc = misc | PERF_RECORD_MISC_BUILD_ID_SIZE;
ev.build_id.pid = machine->pid;
- ev.build_id.header.size = sizeof(ev.build_id) + len;
- memcpy(&ev.build_id.filename, dso__long_name(pos), dso__long_name_len(pos));
+ ev.build_id.header.size = len;
+ strcpy(ev.build_id.filename, filename);
+
+ if (evsel) {
+ void *array = &ev;
+ int ret;
+
+ array += ev.header.size;
+ ret = perf_event__synthesize_id_sample(array, evsel->core.attr.sample_type, sample);
+ if (ret < 0)
+ return ret;
+
+ if (ret & 7) {
+ pr_err("Bad id sample size %d\n", ret);
+ return -EINVAL;
+ }
+
+ ev.header.size += ret;
+ }
+
+ return process(tool, &ev, sample, machine);
+}
+
+int perf_event__synthesize_mmap2_build_id(const struct perf_tool *tool,
+ struct perf_sample *sample,
+ struct machine *machine,
+ perf_event__handler_t process,
+ const struct evsel *evsel,
+ __u16 misc,
+ __u32 pid, __u32 tid,
+ __u64 start, __u64 len, __u64 pgoff,
+ const struct build_id *bid,
+ __u32 prot, __u32 flags,
+ const char *filename)
+{
+ union perf_event ev;
+ size_t ev_len;
+ void *array;
+ int ret;
+
+ ev_len = sizeof(ev.mmap2) - sizeof(ev.mmap2.filename) + strlen(filename) + 1;
+ ev_len = PERF_ALIGN(ev_len, sizeof(u64));
+
+ memset(&ev, 0, ev_len);
+
+ ev.mmap2.header.type = PERF_RECORD_MMAP2;
+ ev.mmap2.header.misc = misc | PERF_RECORD_MISC_MMAP_BUILD_ID;
+ ev.mmap2.header.size = ev_len;
+
+ ev.mmap2.pid = pid;
+ ev.mmap2.tid = tid;
+ ev.mmap2.start = start;
+ ev.mmap2.len = len;
+ ev.mmap2.pgoff = pgoff;
+
+ ev.mmap2.build_id_size = min(bid->size, sizeof(ev.mmap2.build_id));
+ memcpy(ev.mmap2.build_id, bid->data, ev.mmap2.build_id_size);
+
+ ev.mmap2.prot = prot;
+ ev.mmap2.flags = flags;
+
+ memcpy(ev.mmap2.filename, filename, min(strlen(filename), sizeof(ev.mmap.filename)));
+
+ array = &ev;
+ array += ev.header.size;
+ ret = perf_event__synthesize_id_sample(array, evsel->core.attr.sample_type, sample);
+ if (ret < 0)
+ return ret;
+
+ if (ret & 7) {
+ pr_err("Bad id sample size %d\n", ret);
+ return -EINVAL;
+ }
+
+ ev.header.size += ret;
- return process(tool, &ev, NULL, machine);
+ return process(tool, &ev, sample, machine);
}
-int perf_event__synthesize_stat_events(struct perf_stat_config *config, struct perf_tool *tool,
+int perf_event__synthesize_stat_events(struct perf_stat_config *config, const struct perf_tool *tool,
struct evlist *evlist, perf_event__handler_t process, bool attrs)
{
int err;
@@ -2286,7 +2363,7 @@ int perf_event__synthesize_stat_events(struct perf_stat_config *config, struct p
extern const struct perf_header_feature_ops feat_ops[HEADER_LAST_FEATURE];
-int perf_event__synthesize_features(struct perf_tool *tool, struct perf_session *session,
+int perf_event__synthesize_features(const struct perf_tool *tool, struct perf_session *session,
struct evlist *evlist, perf_event__handler_t process)
{
struct perf_header *header = &session->header;
@@ -2349,7 +2426,7 @@ int perf_event__synthesize_features(struct perf_tool *tool, struct perf_session
return ret;
}
-int perf_event__synthesize_for_pipe(struct perf_tool *tool,
+int perf_event__synthesize_for_pipe(const struct perf_tool *tool,
struct perf_session *session,
struct perf_data *data,
perf_event__handler_t process)
diff --git a/tools/perf/util/synthetic-events.h b/tools/perf/util/synthetic-events.h
index 53737d1619a4..b9c936b5cfeb 100644
--- a/tools/perf/util/synthetic-events.h
+++ b/tools/perf/util/synthetic-events.h
@@ -9,6 +9,7 @@
#include <perf/cpumap.h>
struct auxtrace_record;
+struct build_id;
struct dso;
struct evlist;
struct evsel;
@@ -40,45 +41,63 @@ enum perf_record_synth {
int parse_synth_opt(char *str);
-typedef int (*perf_event__handler_t)(struct perf_tool *tool, union perf_event *event,
+typedef int (*perf_event__handler_t)(const struct perf_tool *tool, union perf_event *event,
struct perf_sample *sample, struct machine *machine);
-int perf_event__synthesize_attrs(struct perf_tool *tool, struct evlist *evlist, perf_event__handler_t process);
-int perf_event__synthesize_attr(struct perf_tool *tool, struct perf_event_attr *attr, u32 ids, u64 *id, perf_event__handler_t process);
-int perf_event__synthesize_build_id(struct perf_tool *tool, struct dso *pos, u16 misc, perf_event__handler_t process, struct machine *machine);
-int perf_event__synthesize_cpu_map(struct perf_tool *tool, const struct perf_cpu_map *cpus, perf_event__handler_t process, struct machine *machine);
-int perf_event__synthesize_event_update_cpus(struct perf_tool *tool, struct evsel *evsel, perf_event__handler_t process);
-int perf_event__synthesize_event_update_name(struct perf_tool *tool, struct evsel *evsel, perf_event__handler_t process);
-int perf_event__synthesize_event_update_scale(struct perf_tool *tool, struct evsel *evsel, perf_event__handler_t process);
-int perf_event__synthesize_event_update_unit(struct perf_tool *tool, struct evsel *evsel, perf_event__handler_t process);
-int perf_event__synthesize_extra_attr(struct perf_tool *tool, struct evlist *evsel_list, perf_event__handler_t process, bool is_pipe);
-int perf_event__synthesize_extra_kmaps(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine);
-int perf_event__synthesize_features(struct perf_tool *tool, struct perf_session *session, struct evlist *evlist, perf_event__handler_t process);
-int perf_event__synthesize_id_index(struct perf_tool *tool, perf_event__handler_t process, struct evlist *evlist, struct machine *machine);
-int __perf_event__synthesize_id_index(struct perf_tool *tool, perf_event__handler_t process, struct evlist *evlist, struct machine *machine, size_t from);
+int perf_event__synthesize_attrs(const struct perf_tool *tool, struct evlist *evlist, perf_event__handler_t process);
+int perf_event__synthesize_attr(const struct perf_tool *tool, struct perf_event_attr *attr, u32 ids, u64 *id, perf_event__handler_t process);
+int perf_event__synthesize_build_id(const struct perf_tool *tool,
+ struct perf_sample *sample,
+ struct machine *machine,
+ perf_event__handler_t process,
+ const struct evsel *evsel,
+ __u16 misc,
+ const struct build_id *bid,
+ const char *filename);
+int perf_event__synthesize_mmap2_build_id(const struct perf_tool *tool,
+ struct perf_sample *sample,
+ struct machine *machine,
+ perf_event__handler_t process,
+ const struct evsel *evsel,
+ __u16 misc,
+ __u32 pid, __u32 tid,
+ __u64 start, __u64 len, __u64 pgoff,
+ const struct build_id *bid,
+ __u32 prot, __u32 flags,
+ const char *filename);
+int perf_event__synthesize_cpu_map(const struct perf_tool *tool, const struct perf_cpu_map *cpus, perf_event__handler_t process, struct machine *machine);
+int perf_event__synthesize_event_update_cpus(const struct perf_tool *tool, struct evsel *evsel, perf_event__handler_t process);
+int perf_event__synthesize_event_update_name(const struct perf_tool *tool, struct evsel *evsel, perf_event__handler_t process);
+int perf_event__synthesize_event_update_scale(const struct perf_tool *tool, struct evsel *evsel, perf_event__handler_t process);
+int perf_event__synthesize_event_update_unit(const struct perf_tool *tool, struct evsel *evsel, perf_event__handler_t process);
+int perf_event__synthesize_extra_attr(const struct perf_tool *tool, struct evlist *evsel_list, perf_event__handler_t process, bool is_pipe);
+int perf_event__synthesize_extra_kmaps(const struct perf_tool *tool, perf_event__handler_t process, struct machine *machine);
+int perf_event__synthesize_features(const struct perf_tool *tool, struct perf_session *session, struct evlist *evlist, perf_event__handler_t process);
+int perf_event__synthesize_id_index(const struct perf_tool *tool, perf_event__handler_t process, struct evlist *evlist, struct machine *machine);
+int __perf_event__synthesize_id_index(const struct perf_tool *tool, perf_event__handler_t process, struct evlist *evlist, struct machine *machine, size_t from);
int perf_event__synthesize_id_sample(__u64 *array, u64 type, const struct perf_sample *sample);
-int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine);
-int perf_event__synthesize_mmap_events(struct perf_tool *tool, union perf_event *event, pid_t pid, pid_t tgid, perf_event__handler_t process, struct machine *machine, bool mmap_data);
-int perf_event__synthesize_modules(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine);
-int perf_event__synthesize_namespaces(struct perf_tool *tool, union perf_event *event, pid_t pid, pid_t tgid, perf_event__handler_t process, struct machine *machine);
-int perf_event__synthesize_cgroups(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine);
+int perf_event__synthesize_kernel_mmap(const struct perf_tool *tool, perf_event__handler_t process, struct machine *machine);
+int perf_event__synthesize_mmap_events(const struct perf_tool *tool, union perf_event *event, pid_t pid, pid_t tgid, perf_event__handler_t process, struct machine *machine, bool mmap_data);
+int perf_event__synthesize_modules(const struct perf_tool *tool, perf_event__handler_t process, struct machine *machine);
+int perf_event__synthesize_namespaces(const struct perf_tool *tool, union perf_event *event, pid_t pid, pid_t tgid, perf_event__handler_t process, struct machine *machine);
+int perf_event__synthesize_cgroups(const struct perf_tool *tool, perf_event__handler_t process, struct machine *machine);
int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_format, const struct perf_sample *sample);
-int perf_event__synthesize_stat_config(struct perf_tool *tool, struct perf_stat_config *config, perf_event__handler_t process, struct machine *machine);
-int perf_event__synthesize_stat_events(struct perf_stat_config *config, struct perf_tool *tool, struct evlist *evlist, perf_event__handler_t process, bool attrs);
-int perf_event__synthesize_stat_round(struct perf_tool *tool, u64 time, u64 type, perf_event__handler_t process, struct machine *machine);
-int perf_event__synthesize_stat(struct perf_tool *tool, struct perf_cpu cpu, u32 thread, u64 id, struct perf_counts_values *count, perf_event__handler_t process, struct machine *machine);
-int perf_event__synthesize_thread_map2(struct perf_tool *tool, struct perf_thread_map *threads, perf_event__handler_t process, struct machine *machine);
-int perf_event__synthesize_thread_map(struct perf_tool *tool, struct perf_thread_map *threads, perf_event__handler_t process, struct machine *machine, bool needs_mmap, bool mmap_data);
-int perf_event__synthesize_threads(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine, bool needs_mmap, bool mmap_data, unsigned int nr_threads_synthesize);
-int perf_event__synthesize_tracing_data(struct perf_tool *tool, int fd, struct evlist *evlist, perf_event__handler_t process);
-int perf_event__synth_time_conv(const struct perf_event_mmap_page *pc, struct perf_tool *tool, perf_event__handler_t process, struct machine *machine);
-pid_t perf_event__synthesize_comm(struct perf_tool *tool, union perf_event *event, pid_t pid, perf_event__handler_t process, struct machine *machine);
-
-int perf_tool__process_synth_event(struct perf_tool *tool, union perf_event *event, struct machine *machine, perf_event__handler_t process);
+int perf_event__synthesize_stat_config(const struct perf_tool *tool, struct perf_stat_config *config, perf_event__handler_t process, struct machine *machine);
+int perf_event__synthesize_stat_events(struct perf_stat_config *config, const struct perf_tool *tool, struct evlist *evlist, perf_event__handler_t process, bool attrs);
+int perf_event__synthesize_stat_round(const struct perf_tool *tool, u64 time, u64 type, perf_event__handler_t process, struct machine *machine);
+int perf_event__synthesize_stat(const struct perf_tool *tool, struct perf_cpu cpu, u32 thread, u64 id, struct perf_counts_values *count, perf_event__handler_t process, struct machine *machine);
+int perf_event__synthesize_thread_map2(const struct perf_tool *tool, struct perf_thread_map *threads, perf_event__handler_t process, struct machine *machine);
+int perf_event__synthesize_thread_map(const struct perf_tool *tool, struct perf_thread_map *threads, perf_event__handler_t process, struct machine *machine, bool needs_mmap, bool mmap_data);
+int perf_event__synthesize_threads(const struct perf_tool *tool, perf_event__handler_t process, struct machine *machine, bool needs_mmap, bool mmap_data, unsigned int nr_threads_synthesize);
+int perf_event__synthesize_tracing_data(const struct perf_tool *tool, int fd, struct evlist *evlist, perf_event__handler_t process);
+int perf_event__synth_time_conv(const struct perf_event_mmap_page *pc, const struct perf_tool *tool, perf_event__handler_t process, struct machine *machine);
+pid_t perf_event__synthesize_comm(const struct perf_tool *tool, union perf_event *event, pid_t pid, perf_event__handler_t process, struct machine *machine);
+
+int perf_tool__process_synth_event(const struct perf_tool *tool, union perf_event *event, struct machine *machine, perf_event__handler_t process);
size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type, u64 read_format);
-int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool,
+int __machine__synthesize_threads(struct machine *machine, const struct perf_tool *tool,
struct target *target, struct perf_thread_map *threads,
perf_event__handler_t process, bool needs_mmap, bool data_mmap,
unsigned int nr_threads_synthesize);
@@ -87,7 +106,7 @@ int machine__synthesize_threads(struct machine *machine, struct target *target,
unsigned int nr_threads_synthesize);
#ifdef HAVE_AUXTRACE_SUPPORT
-int perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr, struct perf_tool *tool,
+int perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr, const struct perf_tool *tool,
struct perf_session *session, perf_event__handler_t process);
#else // HAVE_AUXTRACE_SUPPORT
@@ -96,7 +115,7 @@ int perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr, struct per
static inline int
perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr __maybe_unused,
- struct perf_tool *tool __maybe_unused,
+ const struct perf_tool *tool __maybe_unused,
struct perf_session *session __maybe_unused,
perf_event__handler_t process __maybe_unused)
{
@@ -117,7 +136,7 @@ static inline int perf_event__synthesize_bpf_events(struct perf_session *session
}
#endif // HAVE_LIBBPF_SUPPORT
-int perf_event__synthesize_for_pipe(struct perf_tool *tool,
+int perf_event__synthesize_for_pipe(const struct perf_tool *tool,
struct perf_session *session,
struct perf_data *data,
perf_event__handler_t process);
diff --git a/tools/perf/util/syscalltbl.c b/tools/perf/util/syscalltbl.c
index 0dd26b991b3f..69d8dcf5cf28 100644
--- a/tools/perf/util/syscalltbl.c
+++ b/tools/perf/util/syscalltbl.c
@@ -18,6 +18,10 @@
#include <asm/syscalls_64.c>
const int syscalltbl_native_max_id = SYSCALLTBL_x86_64_MAX_ID;
static const char *const *syscalltbl_native = syscalltbl_x86_64;
+#elif defined(__i386__)
+#include <asm/syscalls_32.c>
+const int syscalltbl_native_max_id = SYSCALLTBL_x86_MAX_ID;
+static const char *const *syscalltbl_native = syscalltbl_x86;
#elif defined(__s390x__)
#include <asm/syscalls_64.c>
const int syscalltbl_native_max_id = SYSCALLTBL_S390_64_MAX_ID;
@@ -42,6 +46,15 @@ static const char *const *syscalltbl_native = syscalltbl_mips_n64;
#include <asm/syscalls.c>
const int syscalltbl_native_max_id = SYSCALLTBL_LOONGARCH_MAX_ID;
static const char *const *syscalltbl_native = syscalltbl_loongarch;
+#elif defined(__riscv)
+#include <asm/syscalls.c>
+const int syscalltbl_native_max_id = SYSCALLTBL_RISCV_MAX_ID;
+static const char *const *syscalltbl_native = syscalltbl_riscv;
+#else
+const int syscalltbl_native_max_id = 0;
+static const char *const syscalltbl_native[] = {
+ [0] = "unknown",
+};
#endif
struct syscall {
@@ -178,6 +191,11 @@ int syscalltbl__id(struct syscalltbl *tbl, const char *name)
return audit_name_to_syscall(name, tbl->audit_machine);
}
+int syscalltbl__id_at_idx(struct syscalltbl *tbl __maybe_unused, int idx)
+{
+ return idx;
+}
+
int syscalltbl__strglobmatch_next(struct syscalltbl *tbl __maybe_unused,
const char *syscall_glob __maybe_unused, int *idx __maybe_unused)
{
diff --git a/tools/perf/util/target.h b/tools/perf/util/target.h
index d582cae8e105..2ee2cc30340f 100644
--- a/tools/perf/util/target.h
+++ b/tools/perf/util/target.h
@@ -17,6 +17,7 @@ struct target {
bool default_per_cpu;
bool per_thread;
bool use_bpf;
+ bool inherit;
int initial_delay;
const char *attr_map;
};
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index 87c59aa9fe38..0ffdd52d86d7 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -476,6 +476,7 @@ void thread__free_stitch_list(struct thread *thread)
return;
list_for_each_entry_safe(pos, tmp, &lbr_stitch->lists, node) {
+ map_symbol__exit(&pos->cursor.ms);
list_del_init(&pos->node);
free(pos);
}
@@ -485,6 +486,9 @@ void thread__free_stitch_list(struct thread *thread)
free(pos);
}
+ for (unsigned int i = 0 ; i < lbr_stitch->prev_lbr_cursor_size; i++)
+ map_symbol__exit(&lbr_stitch->prev_lbr_cursor[i].ms);
+
zfree(&lbr_stitch->prev_lbr_cursor);
free(thread__lbr_stitch(thread));
thread__set_lbr_stitch(thread, NULL);
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index 8b4a3c69bad1..6cbf6eb2812e 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -26,6 +26,7 @@ struct lbr_stitch {
struct list_head free_lists;
struct perf_sample prev_sample;
struct callchain_cursor_node *prev_lbr_cursor;
+ unsigned int prev_lbr_cursor_size;
};
DECLARE_RC_STRUCT(thread) {
diff --git a/tools/perf/util/threads.c b/tools/perf/util/threads.c
index ff2b169e0085..6ca0b178fb6c 100644
--- a/tools/perf/util/threads.c
+++ b/tools/perf/util/threads.c
@@ -141,7 +141,7 @@ void threads__remove_all_threads(struct threads *threads)
down_write(&table->lock);
__threads_table_entry__set_last_match(table, NULL);
- hashmap__for_each_entry_safe((&table->shard), cur, tmp, bkt) {
+ hashmap__for_each_entry_safe(&table->shard, cur, tmp, bkt) {
struct thread *old_value;
hashmap__delete(&table->shard, cur->key, /*old_key=*/NULL, &old_value);
@@ -175,7 +175,7 @@ int threads__for_each_thread(struct threads *threads,
size_t bkt;
down_read(&table->lock);
- hashmap__for_each_entry((&table->shard), cur, bkt) {
+ hashmap__for_each_entry(&table->shard, cur, bkt) {
int rc = fn((struct thread *)cur->pvalue, data);
if (rc != 0) {
diff --git a/tools/perf/util/time-utils.c b/tools/perf/util/time-utils.c
index 302443921681..1b91ccd4d523 100644
--- a/tools/perf/util/time-utils.c
+++ b/tools/perf/util/time-utils.c
@@ -20,7 +20,7 @@ int parse_nsec_time(const char *str, u64 *ptime)
u64 time_sec, time_nsec;
char *end;
- time_sec = strtoul(str, &end, 10);
+ time_sec = strtoull(str, &end, 10);
if (*end != '.' && *end != '\0')
return -1;
@@ -38,7 +38,7 @@ int parse_nsec_time(const char *str, u64 *ptime)
for (i = strlen(nsec_buf); i < 9; i++)
nsec_buf[i] = '0';
- time_nsec = strtoul(nsec_buf, &end, 10);
+ time_nsec = strtoull(nsec_buf, &end, 10);
if (*end != '\0')
return -1;
} else
diff --git a/tools/perf/util/tool.c b/tools/perf/util/tool.c
new file mode 100644
index 000000000000..3b7f390f26eb
--- /dev/null
+++ b/tools/perf/util/tool.c
@@ -0,0 +1,294 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "data.h"
+#include "debug.h"
+#include "header.h"
+#include "session.h"
+#include "stat.h"
+#include "tool.h"
+#include "tsc.h"
+#include <sys/mman.h>
+#include <unistd.h>
+
+#ifdef HAVE_ZSTD_SUPPORT
+static int perf_session__process_compressed_event(struct perf_session *session,
+ union perf_event *event, u64 file_offset,
+ const char *file_path)
+{
+ void *src;
+ size_t decomp_size, src_size;
+ u64 decomp_last_rem = 0;
+ size_t mmap_len, decomp_len = session->header.env.comp_mmap_len;
+ struct decomp *decomp, *decomp_last = session->active_decomp->decomp_last;
+
+ if (decomp_last) {
+ decomp_last_rem = decomp_last->size - decomp_last->head;
+ decomp_len += decomp_last_rem;
+ }
+
+ mmap_len = sizeof(struct decomp) + decomp_len;
+ decomp = mmap(NULL, mmap_len, PROT_READ|PROT_WRITE,
+ MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+ if (decomp == MAP_FAILED) {
+ pr_err("Couldn't allocate memory for decompression\n");
+ return -1;
+ }
+
+ decomp->file_pos = file_offset;
+ decomp->file_path = file_path;
+ decomp->mmap_len = mmap_len;
+ decomp->head = 0;
+
+ if (decomp_last_rem) {
+ memcpy(decomp->data, &(decomp_last->data[decomp_last->head]), decomp_last_rem);
+ decomp->size = decomp_last_rem;
+ }
+
+ src = (void *)event + sizeof(struct perf_record_compressed);
+ src_size = event->pack.header.size - sizeof(struct perf_record_compressed);
+
+ decomp_size = zstd_decompress_stream(session->active_decomp->zstd_decomp, src, src_size,
+ &(decomp->data[decomp_last_rem]), decomp_len - decomp_last_rem);
+ if (!decomp_size) {
+ munmap(decomp, mmap_len);
+ pr_err("Couldn't decompress data\n");
+ return -1;
+ }
+
+ decomp->size += decomp_size;
+
+ if (session->active_decomp->decomp == NULL)
+ session->active_decomp->decomp = decomp;
+ else
+ session->active_decomp->decomp_last->next = decomp;
+
+ session->active_decomp->decomp_last = decomp;
+
+ pr_debug("decomp (B): %zd to %zd\n", src_size, decomp_size);
+
+ return 0;
+}
+#endif
+
+static int process_event_synth_tracing_data_stub(struct perf_session *session
+ __maybe_unused,
+ union perf_event *event
+ __maybe_unused)
+{
+ dump_printf(": unhandled!\n");
+ return 0;
+}
+
+static int process_event_synth_attr_stub(const struct perf_tool *tool __maybe_unused,
+ union perf_event *event __maybe_unused,
+ struct evlist **pevlist
+ __maybe_unused)
+{
+ dump_printf(": unhandled!\n");
+ return 0;
+}
+
+static int process_event_synth_event_update_stub(const struct perf_tool *tool __maybe_unused,
+ union perf_event *event __maybe_unused,
+ struct evlist **pevlist
+ __maybe_unused)
+{
+ if (dump_trace)
+ perf_event__fprintf_event_update(event, stdout);
+
+ dump_printf(": unhandled!\n");
+ return 0;
+}
+
+int process_event_sample_stub(const struct perf_tool *tool __maybe_unused,
+ union perf_event *event __maybe_unused,
+ struct perf_sample *sample __maybe_unused,
+ struct evsel *evsel __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ dump_printf(": unhandled!\n");
+ return 0;
+}
+
+static int process_event_stub(const struct perf_tool *tool __maybe_unused,
+ union perf_event *event __maybe_unused,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ dump_printf(": unhandled!\n");
+ return 0;
+}
+
+static int process_finished_round_stub(const struct perf_tool *tool __maybe_unused,
+ union perf_event *event __maybe_unused,
+ struct ordered_events *oe __maybe_unused)
+{
+ dump_printf(": unhandled!\n");
+ return 0;
+}
+
+static int skipn(int fd, off_t n)
+{
+ char buf[4096];
+ ssize_t ret;
+
+ while (n > 0) {
+ ret = read(fd, buf, min(n, (off_t)sizeof(buf)));
+ if (ret <= 0)
+ return ret;
+ n -= ret;
+ }
+
+ return 0;
+}
+
+static s64 process_event_auxtrace_stub(struct perf_session *session __maybe_unused,
+ union perf_event *event)
+{
+ dump_printf(": unhandled!\n");
+ if (perf_data__is_pipe(session->data))
+ skipn(perf_data__fd(session->data), event->auxtrace.size);
+ return event->auxtrace.size;
+}
+
+static int process_event_op2_stub(struct perf_session *session __maybe_unused,
+ union perf_event *event __maybe_unused)
+{
+ dump_printf(": unhandled!\n");
+ return 0;
+}
+
+
+static
+int process_event_thread_map_stub(struct perf_session *session __maybe_unused,
+ union perf_event *event __maybe_unused)
+{
+ if (dump_trace)
+ perf_event__fprintf_thread_map(event, stdout);
+
+ dump_printf(": unhandled!\n");
+ return 0;
+}
+
+static
+int process_event_cpu_map_stub(struct perf_session *session __maybe_unused,
+ union perf_event *event __maybe_unused)
+{
+ if (dump_trace)
+ perf_event__fprintf_cpu_map(event, stdout);
+
+ dump_printf(": unhandled!\n");
+ return 0;
+}
+
+static
+int process_event_stat_config_stub(struct perf_session *session __maybe_unused,
+ union perf_event *event __maybe_unused)
+{
+ if (dump_trace)
+ perf_event__fprintf_stat_config(event, stdout);
+
+ dump_printf(": unhandled!\n");
+ return 0;
+}
+
+static int process_stat_stub(struct perf_session *perf_session __maybe_unused,
+ union perf_event *event)
+{
+ if (dump_trace)
+ perf_event__fprintf_stat(event, stdout);
+
+ dump_printf(": unhandled!\n");
+ return 0;
+}
+
+static int process_stat_round_stub(struct perf_session *perf_session __maybe_unused,
+ union perf_event *event)
+{
+ if (dump_trace)
+ perf_event__fprintf_stat_round(event, stdout);
+
+ dump_printf(": unhandled!\n");
+ return 0;
+}
+
+static int process_event_time_conv_stub(struct perf_session *perf_session __maybe_unused,
+ union perf_event *event)
+{
+ if (dump_trace)
+ perf_event__fprintf_time_conv(event, stdout);
+
+ dump_printf(": unhandled!\n");
+ return 0;
+}
+
+static int perf_session__process_compressed_event_stub(struct perf_session *session __maybe_unused,
+ union perf_event *event __maybe_unused,
+ u64 file_offset __maybe_unused,
+ const char *file_path __maybe_unused)
+{
+ dump_printf(": unhandled!\n");
+ return 0;
+}
+
+void perf_tool__init(struct perf_tool *tool, bool ordered_events)
+{
+ tool->ordered_events = ordered_events;
+ tool->ordering_requires_timestamps = false;
+ tool->namespace_events = false;
+ tool->cgroup_events = false;
+ tool->no_warn = false;
+ tool->show_feat_hdr = SHOW_FEAT_NO_HEADER;
+
+ tool->sample = process_event_sample_stub;
+ tool->mmap = process_event_stub;
+ tool->mmap2 = process_event_stub;
+ tool->comm = process_event_stub;
+ tool->namespaces = process_event_stub;
+ tool->cgroup = process_event_stub;
+ tool->fork = process_event_stub;
+ tool->exit = process_event_stub;
+ tool->lost = perf_event__process_lost;
+ tool->lost_samples = perf_event__process_lost_samples;
+ tool->aux = perf_event__process_aux;
+ tool->itrace_start = perf_event__process_itrace_start;
+ tool->context_switch = perf_event__process_switch;
+ tool->ksymbol = perf_event__process_ksymbol;
+ tool->bpf = perf_event__process_bpf;
+ tool->text_poke = perf_event__process_text_poke;
+ tool->aux_output_hw_id = perf_event__process_aux_output_hw_id;
+ tool->read = process_event_sample_stub;
+ tool->throttle = process_event_stub;
+ tool->unthrottle = process_event_stub;
+ tool->attr = process_event_synth_attr_stub;
+ tool->event_update = process_event_synth_event_update_stub;
+ tool->tracing_data = process_event_synth_tracing_data_stub;
+ tool->build_id = process_event_op2_stub;
+
+ if (ordered_events)
+ tool->finished_round = perf_event__process_finished_round;
+ else
+ tool->finished_round = process_finished_round_stub;
+
+ tool->id_index = process_event_op2_stub;
+ tool->auxtrace_info = process_event_op2_stub;
+ tool->auxtrace = process_event_auxtrace_stub;
+ tool->auxtrace_error = process_event_op2_stub;
+ tool->thread_map = process_event_thread_map_stub;
+ tool->cpu_map = process_event_cpu_map_stub;
+ tool->stat_config = process_event_stat_config_stub;
+ tool->stat = process_stat_stub;
+ tool->stat_round = process_stat_round_stub;
+ tool->time_conv = process_event_time_conv_stub;
+ tool->feature = process_event_op2_stub;
+#ifdef HAVE_ZSTD_SUPPORT
+ tool->compressed = perf_session__process_compressed_event;
+#else
+ tool->compressed = perf_session__process_compressed_event_stub;
+#endif
+ tool->finished_init = process_event_op2_stub;
+}
+
+bool perf_tool__compressed_is_stub(const struct perf_tool *tool)
+{
+ return tool->compressed == perf_session__process_compressed_event_stub;
+}
diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h
index c957fb849ac6..db1c7642b0d1 100644
--- a/tools/perf/util/tool.h
+++ b/tools/perf/util/tool.h
@@ -15,14 +15,14 @@ struct perf_tool;
struct machine;
struct ordered_events;
-typedef int (*event_sample)(struct perf_tool *tool, union perf_event *event,
+typedef int (*event_sample)(const struct perf_tool *tool, union perf_event *event,
struct perf_sample *sample,
struct evsel *evsel, struct machine *machine);
-typedef int (*event_op)(struct perf_tool *tool, union perf_event *event,
+typedef int (*event_op)(const struct perf_tool *tool, union perf_event *event,
struct perf_sample *sample, struct machine *machine);
-typedef int (*event_attr_op)(struct perf_tool *tool,
+typedef int (*event_attr_op)(const struct perf_tool *tool,
union perf_event *event,
struct evlist **pevlist);
@@ -31,7 +31,7 @@ typedef s64 (*event_op3)(struct perf_session *session, union perf_event *event);
typedef int (*event_op4)(struct perf_session *session, union perf_event *event, u64 data,
const char *str);
-typedef int (*event_oe)(struct perf_tool *tool, union perf_event *event,
+typedef int (*event_oe)(const struct perf_tool *tool, union perf_event *event,
struct ordered_events *oe);
enum show_feature_header {
@@ -85,7 +85,18 @@ struct perf_tool {
bool namespace_events;
bool cgroup_events;
bool no_warn;
+ bool dont_split_sample_group;
enum show_feature_header show_feat_hdr;
};
+void perf_tool__init(struct perf_tool *tool, bool ordered_events);
+
+bool perf_tool__compressed_is_stub(const struct perf_tool *tool);
+
+int process_event_sample_stub(const struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct evsel *evsel,
+ struct machine *machine);
+
#endif /* __PERF_TOOL_H */
diff --git a/tools/perf/util/tool_pmu.c b/tools/perf/util/tool_pmu.c
new file mode 100644
index 000000000000..4fb097578479
--- /dev/null
+++ b/tools/perf/util/tool_pmu.c
@@ -0,0 +1,505 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "cgroup.h"
+#include "counts.h"
+#include "cputopo.h"
+#include "evsel.h"
+#include "pmu.h"
+#include "print-events.h"
+#include "smt.h"
+#include "time-utils.h"
+#include "tool_pmu.h"
+#include "tsc.h"
+#include <api/fs/fs.h>
+#include <api/io.h>
+#include <internal/threadmap.h>
+#include <perf/threadmap.h>
+#include <fcntl.h>
+#include <strings.h>
+
+static const char *const tool_pmu__event_names[TOOL_PMU__EVENT_MAX] = {
+ NULL,
+ "duration_time",
+ "user_time",
+ "system_time",
+ "has_pmem",
+ "num_cores",
+ "num_cpus",
+ "num_cpus_online",
+ "num_dies",
+ "num_packages",
+ "slots",
+ "smt_on",
+ "system_tsc_freq",
+};
+
+bool tool_pmu__skip_event(const char *name __maybe_unused)
+{
+#if !defined(__aarch64__)
+ /* The slots event should only appear on arm64. */
+ if (strcasecmp(name, "slots") == 0)
+ return true;
+#endif
+#if !defined(__i386__) && !defined(__x86_64__)
+ /* The system_tsc_freq event should only appear on x86. */
+ if (strcasecmp(name, "system_tsc_freq") == 0)
+ return true;
+#endif
+ return false;
+}
+
+int tool_pmu__num_skip_events(void)
+{
+ int num = 0;
+
+#if !defined(__aarch64__)
+ num++;
+#endif
+#if !defined(__i386__) && !defined(__x86_64__)
+ num++;
+#endif
+ return num;
+}
+
+const char *tool_pmu__event_to_str(enum tool_pmu_event ev)
+{
+ if (ev > TOOL_PMU__EVENT_NONE && ev < TOOL_PMU__EVENT_MAX)
+ return tool_pmu__event_names[ev];
+
+ return NULL;
+}
+
+enum tool_pmu_event tool_pmu__str_to_event(const char *str)
+{
+ int i;
+
+ if (tool_pmu__skip_event(str))
+ return TOOL_PMU__EVENT_NONE;
+
+ tool_pmu__for_each_event(i) {
+ if (!strcasecmp(str, tool_pmu__event_names[i]))
+ return i;
+ }
+ return TOOL_PMU__EVENT_NONE;
+}
+
+bool perf_pmu__is_tool(const struct perf_pmu *pmu)
+{
+ return pmu && pmu->type == PERF_PMU_TYPE_TOOL;
+}
+
+bool evsel__is_tool(const struct evsel *evsel)
+{
+ return perf_pmu__is_tool(evsel->pmu);
+}
+
+enum tool_pmu_event evsel__tool_event(const struct evsel *evsel)
+{
+ if (!evsel__is_tool(evsel))
+ return TOOL_PMU__EVENT_NONE;
+
+ return (enum tool_pmu_event)evsel->core.attr.config;
+}
+
+const char *evsel__tool_pmu_event_name(const struct evsel *evsel)
+{
+ return tool_pmu__event_to_str(evsel->core.attr.config);
+}
+
+static bool read_until_char(struct io *io, char e)
+{
+ int c;
+
+ do {
+ c = io__get_char(io);
+ if (c == -1)
+ return false;
+ } while (c != e);
+ return true;
+}
+
+static int read_stat_field(int fd, struct perf_cpu cpu, int field, __u64 *val)
+{
+ char buf[256];
+ struct io io;
+ int i;
+
+ io__init(&io, fd, buf, sizeof(buf));
+
+ /* Skip lines to relevant CPU. */
+ for (i = -1; i < cpu.cpu; i++) {
+ if (!read_until_char(&io, '\n'))
+ return -EINVAL;
+ }
+ /* Skip to "cpu". */
+ if (io__get_char(&io) != 'c') return -EINVAL;
+ if (io__get_char(&io) != 'p') return -EINVAL;
+ if (io__get_char(&io) != 'u') return -EINVAL;
+
+ /* Skip N of cpuN. */
+ if (!read_until_char(&io, ' '))
+ return -EINVAL;
+
+ i = 1;
+ while (true) {
+ if (io__get_dec(&io, val) != ' ')
+ break;
+ if (field == i)
+ return 0;
+ i++;
+ }
+ return -EINVAL;
+}
+
+static int read_pid_stat_field(int fd, int field, __u64 *val)
+{
+ char buf[256];
+ struct io io;
+ int c, i;
+
+ io__init(&io, fd, buf, sizeof(buf));
+ if (io__get_dec(&io, val) != ' ')
+ return -EINVAL;
+ if (field == 1)
+ return 0;
+
+ /* Skip comm. */
+ if (io__get_char(&io) != '(' || !read_until_char(&io, ')'))
+ return -EINVAL;
+ if (field == 2)
+ return -EINVAL; /* String can't be returned. */
+
+ /* Skip state */
+ if (io__get_char(&io) != ' ' || io__get_char(&io) == -1)
+ return -EINVAL;
+ if (field == 3)
+ return -EINVAL; /* String can't be returned. */
+
+ /* Loop over numeric fields*/
+ if (io__get_char(&io) != ' ')
+ return -EINVAL;
+
+ i = 4;
+ while (true) {
+ c = io__get_dec(&io, val);
+ if (c == -1)
+ return -EINVAL;
+ if (c == -2) {
+ /* Assume a -ve was read */
+ c = io__get_dec(&io, val);
+ *val *= -1;
+ }
+ if (c != ' ')
+ return -EINVAL;
+ if (field == i)
+ return 0;
+ i++;
+ }
+ return -EINVAL;
+}
+
+int evsel__tool_pmu_prepare_open(struct evsel *evsel,
+ struct perf_cpu_map *cpus,
+ int nthreads)
+{
+ if ((evsel__tool_event(evsel) == TOOL_PMU__EVENT_SYSTEM_TIME ||
+ evsel__tool_event(evsel) == TOOL_PMU__EVENT_USER_TIME) &&
+ !evsel->start_times) {
+ evsel->start_times = xyarray__new(perf_cpu_map__nr(cpus),
+ nthreads,
+ sizeof(__u64));
+ if (!evsel->start_times)
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+#define FD(e, x, y) (*(int *)xyarray__entry(e->core.fd, x, y))
+
+int evsel__tool_pmu_open(struct evsel *evsel,
+ struct perf_thread_map *threads,
+ int start_cpu_map_idx, int end_cpu_map_idx)
+{
+ enum tool_pmu_event ev = evsel__tool_event(evsel);
+ int pid = -1, idx = 0, thread = 0, nthreads, err = 0, old_errno;
+
+ if (ev == TOOL_PMU__EVENT_NUM_CPUS)
+ return 0;
+
+ if (ev == TOOL_PMU__EVENT_DURATION_TIME) {
+ if (evsel->core.attr.sample_period) /* no sampling */
+ return -EINVAL;
+ evsel->start_time = rdclock();
+ return 0;
+ }
+
+ if (evsel->cgrp)
+ pid = evsel->cgrp->fd;
+
+ nthreads = perf_thread_map__nr(threads);
+ for (idx = start_cpu_map_idx; idx < end_cpu_map_idx; idx++) {
+ for (thread = 0; thread < nthreads; thread++) {
+ if (thread >= nthreads)
+ break;
+
+ if (!evsel->cgrp && !evsel->core.system_wide)
+ pid = perf_thread_map__pid(threads, thread);
+
+ if (ev == TOOL_PMU__EVENT_USER_TIME || ev == TOOL_PMU__EVENT_SYSTEM_TIME) {
+ bool system = ev == TOOL_PMU__EVENT_SYSTEM_TIME;
+ __u64 *start_time = NULL;
+ int fd;
+
+ if (evsel->core.attr.sample_period) {
+ /* no sampling */
+ err = -EINVAL;
+ goto out_close;
+ }
+ if (pid > -1) {
+ char buf[64];
+
+ snprintf(buf, sizeof(buf), "/proc/%d/stat", pid);
+ fd = open(buf, O_RDONLY);
+ evsel->pid_stat = true;
+ } else {
+ fd = open("/proc/stat", O_RDONLY);
+ }
+ FD(evsel, idx, thread) = fd;
+ if (fd < 0) {
+ err = -errno;
+ goto out_close;
+ }
+ start_time = xyarray__entry(evsel->start_times, idx, thread);
+ if (pid > -1) {
+ err = read_pid_stat_field(fd, system ? 15 : 14,
+ start_time);
+ } else {
+ struct perf_cpu cpu;
+
+ cpu = perf_cpu_map__cpu(evsel->core.cpus, idx);
+ err = read_stat_field(fd, cpu, system ? 3 : 1,
+ start_time);
+ }
+ if (err)
+ goto out_close;
+ }
+
+ }
+ }
+ return 0;
+out_close:
+ if (err)
+ threads->err_thread = thread;
+
+ old_errno = errno;
+ do {
+ while (--thread >= 0) {
+ if (FD(evsel, idx, thread) >= 0)
+ close(FD(evsel, idx, thread));
+ FD(evsel, idx, thread) = -1;
+ }
+ thread = nthreads;
+ } while (--idx >= 0);
+ errno = old_errno;
+ return err;
+}
+
+#if !defined(__i386__) && !defined(__x86_64__)
+u64 arch_get_tsc_freq(void)
+{
+ return 0;
+}
+#endif
+
+#if !defined(__aarch64__)
+u64 tool_pmu__cpu_slots_per_cycle(void)
+{
+ return 0;
+}
+#endif
+
+static bool has_pmem(void)
+{
+ static bool has_pmem, cached;
+ const char *sysfs = sysfs__mountpoint();
+ char path[PATH_MAX];
+
+ if (!cached) {
+ snprintf(path, sizeof(path), "%s/firmware/acpi/tables/NFIT", sysfs);
+ has_pmem = access(path, F_OK) == 0;
+ cached = true;
+ }
+ return has_pmem;
+}
+
+bool tool_pmu__read_event(enum tool_pmu_event ev, u64 *result)
+{
+ const struct cpu_topology *topology;
+
+ switch (ev) {
+ case TOOL_PMU__EVENT_HAS_PMEM:
+ *result = has_pmem() ? 1 : 0;
+ return true;
+
+ case TOOL_PMU__EVENT_NUM_CORES:
+ topology = online_topology();
+ *result = topology->core_cpus_lists;
+ return true;
+
+ case TOOL_PMU__EVENT_NUM_CPUS:
+ *result = cpu__max_present_cpu().cpu;
+ return true;
+
+ case TOOL_PMU__EVENT_NUM_CPUS_ONLINE: {
+ struct perf_cpu_map *online = cpu_map__online();
+
+ if (online) {
+ *result = perf_cpu_map__nr(online);
+ return true;
+ }
+ return false;
+ }
+ case TOOL_PMU__EVENT_NUM_DIES:
+ topology = online_topology();
+ *result = topology->die_cpus_lists;
+ return true;
+
+ case TOOL_PMU__EVENT_NUM_PACKAGES:
+ topology = online_topology();
+ *result = topology->package_cpus_lists;
+ return true;
+
+ case TOOL_PMU__EVENT_SLOTS:
+ *result = tool_pmu__cpu_slots_per_cycle();
+ return *result ? true : false;
+
+ case TOOL_PMU__EVENT_SMT_ON:
+ *result = smt_on() ? 1 : 0;
+ return true;
+
+ case TOOL_PMU__EVENT_SYSTEM_TSC_FREQ:
+ *result = arch_get_tsc_freq();
+ return true;
+
+ case TOOL_PMU__EVENT_NONE:
+ case TOOL_PMU__EVENT_DURATION_TIME:
+ case TOOL_PMU__EVENT_USER_TIME:
+ case TOOL_PMU__EVENT_SYSTEM_TIME:
+ case TOOL_PMU__EVENT_MAX:
+ default:
+ return false;
+ }
+}
+
+int evsel__tool_pmu_read(struct evsel *evsel, int cpu_map_idx, int thread)
+{
+ __u64 *start_time, cur_time, delta_start;
+ u64 val;
+ int fd, err = 0;
+ struct perf_counts_values *count, *old_count = NULL;
+ bool adjust = false;
+ enum tool_pmu_event ev = evsel__tool_event(evsel);
+
+ count = perf_counts(evsel->counts, cpu_map_idx, thread);
+
+ switch (ev) {
+ case TOOL_PMU__EVENT_HAS_PMEM:
+ case TOOL_PMU__EVENT_NUM_CORES:
+ case TOOL_PMU__EVENT_NUM_CPUS:
+ case TOOL_PMU__EVENT_NUM_CPUS_ONLINE:
+ case TOOL_PMU__EVENT_NUM_DIES:
+ case TOOL_PMU__EVENT_NUM_PACKAGES:
+ case TOOL_PMU__EVENT_SLOTS:
+ case TOOL_PMU__EVENT_SMT_ON:
+ case TOOL_PMU__EVENT_SYSTEM_TSC_FREQ:
+ if (evsel->prev_raw_counts)
+ old_count = perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread);
+ val = 0;
+ if (cpu_map_idx == 0 && thread == 0) {
+ if (!tool_pmu__read_event(ev, &val)) {
+ count->lost++;
+ val = 0;
+ }
+ }
+ if (old_count) {
+ count->val = old_count->val + val;
+ count->run = old_count->run + 1;
+ count->ena = old_count->ena + 1;
+ } else {
+ count->val = val;
+ count->run++;
+ count->ena++;
+ }
+ return 0;
+ case TOOL_PMU__EVENT_DURATION_TIME:
+ /*
+ * Pretend duration_time is only on the first CPU and thread, or
+ * else aggregation will scale duration_time by the number of
+ * CPUs/threads.
+ */
+ start_time = &evsel->start_time;
+ if (cpu_map_idx == 0 && thread == 0)
+ cur_time = rdclock();
+ else
+ cur_time = *start_time;
+ break;
+ case TOOL_PMU__EVENT_USER_TIME:
+ case TOOL_PMU__EVENT_SYSTEM_TIME: {
+ bool system = evsel__tool_event(evsel) == TOOL_PMU__EVENT_SYSTEM_TIME;
+
+ start_time = xyarray__entry(evsel->start_times, cpu_map_idx, thread);
+ fd = FD(evsel, cpu_map_idx, thread);
+ lseek(fd, SEEK_SET, 0);
+ if (evsel->pid_stat) {
+ /* The event exists solely on 1 CPU. */
+ if (cpu_map_idx == 0)
+ err = read_pid_stat_field(fd, system ? 15 : 14, &cur_time);
+ else
+ cur_time = 0;
+ } else {
+ /* The event is for all threads. */
+ if (thread == 0) {
+ struct perf_cpu cpu = perf_cpu_map__cpu(evsel->core.cpus,
+ cpu_map_idx);
+
+ err = read_stat_field(fd, cpu, system ? 3 : 1, &cur_time);
+ } else {
+ cur_time = 0;
+ }
+ }
+ adjust = true;
+ break;
+ }
+ case TOOL_PMU__EVENT_NONE:
+ case TOOL_PMU__EVENT_MAX:
+ default:
+ err = -EINVAL;
+ }
+ if (err)
+ return err;
+
+ delta_start = cur_time - *start_time;
+ if (adjust) {
+ __u64 ticks_per_sec = sysconf(_SC_CLK_TCK);
+
+ delta_start *= 1000000000 / ticks_per_sec;
+ }
+ count->val = delta_start;
+ count->ena = count->run = delta_start;
+ count->lost = 0;
+ return 0;
+}
+
+struct perf_pmu *perf_pmus__tool_pmu(void)
+{
+ static struct perf_pmu tool = {
+ .name = "tool",
+ .type = PERF_PMU_TYPE_TOOL,
+ .aliases = LIST_HEAD_INIT(tool.aliases),
+ .caps = LIST_HEAD_INIT(tool.caps),
+ .format = LIST_HEAD_INIT(tool.format),
+ };
+ if (!tool.events_table)
+ tool.events_table = find_core_events_table("common", "common");
+
+ return &tool;
+}
diff --git a/tools/perf/util/tool_pmu.h b/tools/perf/util/tool_pmu.h
new file mode 100644
index 000000000000..a60184859080
--- /dev/null
+++ b/tools/perf/util/tool_pmu.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __TOOL_PMU_H
+#define __TOOL_PMU_H
+
+#include "pmu.h"
+
+struct evsel;
+struct perf_thread_map;
+struct print_callbacks;
+
+enum tool_pmu_event {
+ TOOL_PMU__EVENT_NONE = 0,
+ TOOL_PMU__EVENT_DURATION_TIME,
+ TOOL_PMU__EVENT_USER_TIME,
+ TOOL_PMU__EVENT_SYSTEM_TIME,
+ TOOL_PMU__EVENT_HAS_PMEM,
+ TOOL_PMU__EVENT_NUM_CORES,
+ TOOL_PMU__EVENT_NUM_CPUS,
+ TOOL_PMU__EVENT_NUM_CPUS_ONLINE,
+ TOOL_PMU__EVENT_NUM_DIES,
+ TOOL_PMU__EVENT_NUM_PACKAGES,
+ TOOL_PMU__EVENT_SLOTS,
+ TOOL_PMU__EVENT_SMT_ON,
+ TOOL_PMU__EVENT_SYSTEM_TSC_FREQ,
+
+ TOOL_PMU__EVENT_MAX,
+};
+
+#define tool_pmu__for_each_event(ev) \
+ for ((ev) = TOOL_PMU__EVENT_DURATION_TIME; (ev) < TOOL_PMU__EVENT_MAX; ev++)
+
+const char *tool_pmu__event_to_str(enum tool_pmu_event ev);
+enum tool_pmu_event tool_pmu__str_to_event(const char *str);
+bool tool_pmu__skip_event(const char *name);
+int tool_pmu__num_skip_events(void);
+
+bool tool_pmu__read_event(enum tool_pmu_event ev, u64 *result);
+
+u64 tool_pmu__cpu_slots_per_cycle(void);
+
+bool perf_pmu__is_tool(const struct perf_pmu *pmu);
+
+bool evsel__is_tool(const struct evsel *evsel);
+enum tool_pmu_event evsel__tool_event(const struct evsel *evsel);
+const char *evsel__tool_pmu_event_name(const struct evsel *evsel);
+int evsel__tool_pmu_prepare_open(struct evsel *evsel,
+ struct perf_cpu_map *cpus,
+ int nthreads);
+int evsel__tool_pmu_open(struct evsel *evsel,
+ struct perf_thread_map *threads,
+ int start_cpu_map_idx, int end_cpu_map_idx);
+int evsel__tool_pmu_read(struct evsel *evsel, int cpu_map_idx, int thread);
+
+struct perf_pmu *perf_pmus__tool_pmu(void);
+
+#endif /* __TOOL_PMU_H */
diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c
index f0332bd3a501..41d53e1b43e7 100644
--- a/tools/perf/util/trace-event-parse.c
+++ b/tools/perf/util/trace-event-parse.c
@@ -12,7 +12,7 @@
#include <linux/ctype.h>
#include <linux/kernel.h>
-#include <traceevent/event-parse.h>
+#include <event-parse.h>
static int get_common_field(struct scripting_context *context,
int *offset, int *size, const char *type)
@@ -116,12 +116,6 @@ void event_format__fprintf(struct tep_event *event,
trace_seq_destroy(&s);
}
-void event_format__print(struct tep_event *event,
- int cpu, void *data, int size)
-{
- return event_format__fprintf(event, cpu, data, size, stdout);
-}
-
/*
* prev_state is of size long, which is 32 bits on 32 bit architectures.
* As it needs to have the same bits for both 32 bit and 64 bit architectures
diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c
index 1162c49b8082..ecbbb93f0185 100644
--- a/tools/perf/util/trace-event-read.c
+++ b/tools/perf/util/trace-event-read.c
@@ -11,7 +11,7 @@
#include <sys/stat.h>
#include <sys/wait.h>
#include <sys/mman.h>
-#include <traceevent/event-parse.h>
+#include <event-parse.h>
#include <fcntl.h>
#include <unistd.h>
#include <errno.h>
diff --git a/tools/perf/util/trace-event-scripting.c b/tools/perf/util/trace-event-scripting.c
index bd0000300c77..5596fcda2c10 100644
--- a/tools/perf/util/trace-event-scripting.c
+++ b/tools/perf/util/trace-event-scripting.c
@@ -10,7 +10,7 @@
#include <string.h>
#include <errno.h>
#ifdef HAVE_LIBTRACEEVENT
-#include <traceevent/event-parse.h>
+#include <event-parse.h>
#endif
#include "debug.h"
diff --git a/tools/perf/util/trace-event.c b/tools/perf/util/trace-event.c
index 8ad75b31e09b..6a8c66c64b70 100644
--- a/tools/perf/util/trace-event.c
+++ b/tools/perf/util/trace-event.c
@@ -8,7 +8,7 @@
#include <fcntl.h>
#include <linux/kernel.h>
#include <linux/err.h>
-#include <traceevent/event-parse.h>
+#include <event-parse.h>
#include <api/fs/tracing_path.h>
#include <api/fs/fs.h>
#include "trace-event.h"
diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h
index bbf8b26bc8da..79b939f947dd 100644
--- a/tools/perf/util/trace-event.h
+++ b/tools/perf/util/trace-event.h
@@ -42,9 +42,6 @@ struct tep_event *trace_event__tp_format_id(int id);
void event_format__fprintf(struct tep_event *event,
int cpu, void *data, int size, FILE *fp);
-void event_format__print(struct tep_event *event,
- int cpu, void *data, int size);
-
int parse_ftrace_file(struct tep_handle *pevent, char *buf, unsigned long size);
int parse_event_file(struct tep_handle *pevent,
char *buf, unsigned long size, char *sys);
@@ -150,7 +147,7 @@ int common_lock_depth(struct scripting_context *context);
int perf_sample__sprintf_flags(u32 flags, char *str, size_t sz);
#if defined(LIBTRACEEVENT_VERSION) && LIBTRACEEVENT_VERSION >= MAKE_LIBTRACEEVENT_VERSION(1, 5, 0)
-#include <traceevent/event-parse.h>
+#include <event-parse.h>
static inline bool tep_field_is_relative(unsigned long flags)
{
diff --git a/tools/perf/util/trace_augment.h b/tools/perf/util/trace_augment.h
new file mode 100644
index 000000000000..57a3e5045937
--- /dev/null
+++ b/tools/perf/util/trace_augment.h
@@ -0,0 +1,6 @@
+#ifndef TRACE_AUGMENT_H
+#define TRACE_AUGMENT_H
+
+#define TRACE_AUG_MAX_BUF 32 /* for buffer augmentation in perf trace */
+
+#endif
diff --git a/tools/perf/util/tsc.c b/tools/perf/util/tsc.c
index f19791d46e99..511a517ce613 100644
--- a/tools/perf/util/tsc.c
+++ b/tools/perf/util/tsc.c
@@ -72,7 +72,7 @@ int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc,
}
int perf_event__synth_time_conv(const struct perf_event_mmap_page *pc,
- struct perf_tool *tool,
+ const struct perf_tool *tool,
perf_event__handler_t process,
struct machine *machine)
{
@@ -119,7 +119,7 @@ size_t perf_event__fprintf_time_conv(union perf_event *event, FILE *fp)
size_t ret;
ret = fprintf(fp, "\n... Time Shift %" PRI_lu64 "\n", tc->time_shift);
- ret += fprintf(fp, "... Time Muliplier %" PRI_lu64 "\n", tc->time_mult);
+ ret += fprintf(fp, "... Time Multiplier %" PRI_lu64 "\n", tc->time_mult);
ret += fprintf(fp, "... Time Zero %" PRI_lu64 "\n", tc->time_zero);
/*
diff --git a/tools/perf/util/tsc.h b/tools/perf/util/tsc.h
index 88fd1c4c1cb8..57ce8449647f 100644
--- a/tools/perf/util/tsc.h
+++ b/tools/perf/util/tsc.h
@@ -25,7 +25,7 @@ int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc,
u64 perf_time_to_tsc(u64 ns, struct perf_tsc_conversion *tc);
u64 tsc_to_perf_time(u64 cyc, struct perf_tsc_conversion *tc);
u64 rdtsc(void);
-double arch_get_tsc_freq(void);
+u64 arch_get_tsc_freq(void);
size_t perf_event__fprintf_time_conv(union perf_event *event, FILE *fp);
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index 4f561e5e4162..0f031eb80b4c 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c
@@ -78,17 +78,23 @@ bool sysctl__nmi_watchdog_enabled(void)
bool test_attr__enabled;
+bool exclude_GH_default;
+
bool perf_host = true;
bool perf_guest = false;
void event_attr_init(struct perf_event_attr *attr)
{
+ /* to capture ABI version */
+ attr->size = sizeof(*attr);
+
+ if (!exclude_GH_default)
+ return;
+
if (!perf_host)
attr->exclude_host = 1;
if (!perf_guest)
attr->exclude_guest = 1;
- /* to capture ABI version */
- attr->size = sizeof(*attr);
}
int mkdir_p(char *path, mode_t mode)
@@ -325,94 +331,15 @@ int perf_event_paranoid(void)
bool perf_event_paranoid_check(int max_level)
{
- return perf_cap__capable(CAP_SYS_ADMIN) ||
- perf_cap__capable(CAP_PERFMON) ||
- perf_event_paranoid() <= max_level;
-}
-
-static int
-fetch_ubuntu_kernel_version(unsigned int *puint)
-{
- ssize_t len;
- size_t line_len = 0;
- char *ptr, *line = NULL;
- int version, patchlevel, sublevel, err;
- FILE *vsig;
-
- if (!puint)
- return 0;
-
- vsig = fopen("/proc/version_signature", "r");
- if (!vsig) {
- pr_debug("Open /proc/version_signature failed: %s\n",
- strerror(errno));
- return -1;
- }
-
- len = getline(&line, &line_len, vsig);
- fclose(vsig);
- err = -1;
- if (len <= 0) {
- pr_debug("Reading from /proc/version_signature failed: %s\n",
- strerror(errno));
- goto errout;
- }
-
- ptr = strrchr(line, ' ');
- if (!ptr) {
- pr_debug("Parsing /proc/version_signature failed: %s\n", line);
- goto errout;
- }
-
- err = sscanf(ptr + 1, "%d.%d.%d",
- &version, &patchlevel, &sublevel);
- if (err != 3) {
- pr_debug("Unable to get kernel version from /proc/version_signature '%s'\n",
- line);
- goto errout;
- }
-
- *puint = (version << 16) + (patchlevel << 8) + sublevel;
- err = 0;
-errout:
- free(line);
- return err;
-}
-
-int
-fetch_kernel_version(unsigned int *puint, char *str,
- size_t str_size)
-{
- struct utsname utsname;
- int version, patchlevel, sublevel, err;
- bool int_ver_ready = false;
-
- if (access("/proc/version_signature", R_OK) == 0)
- if (!fetch_ubuntu_kernel_version(puint))
- int_ver_ready = true;
-
- if (uname(&utsname))
- return -1;
+ bool used_root;
- if (str && str_size) {
- strncpy(str, utsname.release, str_size);
- str[str_size - 1] = '\0';
- }
-
- if (!puint || int_ver_ready)
- return 0;
-
- err = sscanf(utsname.release, "%d.%d.%d",
- &version, &patchlevel, &sublevel);
+ if (perf_cap__capable(CAP_SYS_ADMIN, &used_root))
+ return true;
- if (err != 3) {
- pr_debug("Unable to get kernel version from uname '%s'\n",
- utsname.release);
- return -1;
- }
+ if (!used_root && perf_cap__capable(CAP_PERFMON, &used_root))
+ return true;
- *puint = (version << 16) + (patchlevel << 8) + sublevel;
- return 0;
+ return perf_event_paranoid() <= max_level;
}
int perf_tip(char **strp, const char *dirpath)
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index 9966c21aaf04..3423778e39a5 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -21,6 +21,9 @@ extern const char perf_more_info_string[];
extern const char *input_name;
+/* This will control if perf_{host,guest} will set attr.exclude_{host,guest}. */
+extern bool exclude_GH_default;
+
extern bool perf_host;
extern bool perf_guest;
@@ -43,14 +46,6 @@ int sysctl__max_stack(void);
bool sysctl__nmi_watchdog_enabled(void);
-int fetch_kernel_version(unsigned int *puint,
- char *str, size_t str_sz);
-#define KVER_VERSION(x) (((x) >> 16) & 0xff)
-#define KVER_PATCHLEVEL(x) (((x) >> 8) & 0xff)
-#define KVER_SUBLEVEL(x) ((x) & 0xff)
-#define KVER_FMT "%d.%d.%d"
-#define KVER_PARAM(x) KVER_VERSION(x), KVER_PATCHLEVEL(x), KVER_SUBLEVEL(x)
-
int perf_tip(char **strp, const char *dirpath);
#ifndef HAVE_SCHED_GETCPU_SUPPORT
@@ -81,13 +76,6 @@ char *perf_exe(char *buf, int len);
#endif
#endif
-extern bool test_attr__enabled;
-void test_attr__ready(void);
-void test_attr__init(void);
-struct perf_event_attr;
-void test_attr__open(struct perf_event_attr *attr, pid_t pid, struct perf_cpu cpu,
- int fd, int group_fd, unsigned long flags);
-
struct perf_debuginfod {
const char *urls;
bool set;
diff --git a/tools/perf/util/vdso.c b/tools/perf/util/vdso.c
index 1b6f8f6db7aa..c12f5d8c4bf6 100644
--- a/tools/perf/util/vdso.c
+++ b/tools/perf/util/vdso.c
@@ -308,8 +308,10 @@ static struct dso *machine__find_vdso(struct machine *machine,
if (!dso) {
dso = dsos__find(&machine->dsos, DSO__NAME_VDSO,
true);
- if (dso && dso_type != dso__type(dso, machine))
+ if (dso && dso_type != dso__type(dso, machine)) {
+ dso__put(dso);
dso = NULL;
+ }
}
break;
case DSO__TYPE_X32BIT: