diff options
author | Ingo Molnar <mingo@kernel.org> | 2018-03-19 22:37:48 +0300 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2018-03-19 22:37:48 +0300 |
commit | ecd380b8dead1bad67e3af87e2ddfe826c3da79d (patch) | |
tree | b35db1791ac7acaba54eb3aa9f751790b735428d | |
parent | 134933e55789ece9bca973d3502c7b8f7a9dae86 (diff) | |
parent | 1cd618838b9703eabe4a75badf433382b12f6bef (diff) | |
download | linux-ecd380b8dead1bad67e3af87e2ddfe826c3da79d.tar.xz |
Merge tag 'perf-core-for-mingo-4.17-20180319' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core
Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:
- Fixes for problems experienced with new GCC 8 warnings, that treated
as errors, broke the build, related to snprintf and casting issues.
(Arnaldo Carvalho de Melo, Jiri Olsa, Josh Poinboeuf)
- Fix build of new breakpoint 'perf test' entry with clang < 6, noticed
on fedora 25, 26 and 27 (Arnaldo Carvalho de Melo)
- Workaround problem with symbol resolution in 'perf annotate', using
the symbol name already present in the objdump output (Arnaldo Carvalho de Melo)
- Document 'perf top --ignore-vmlinux' (Arnaldo Carvalho de Melo)
- Fix out of bounds access on array fd when cnt is 100 in one of the
'perf test' entries, detected using 'cpptest' (Colin Ian King)
- Add support for the forced leader feature, i.e. 'perf report --group'
for a group of events not really grouped when scheduled (without using
{} to enclose the list of events in the command line) in pipe mode,
e.g.:
$ perf record -e cycles,instructions -o - kill | perf report --group -i -
- Use right type to access array elements in 'perf probe' (Masami Hiramatsu)
- Update POWER9 vendor events (those described in JSON format) (Sukadev Bhattiprolu)
- Discard head in overwrite_rb_find_range() (Yisheng Xie)
- Avoid setting 'quiet' to 'true' unnecessarily (Yisheng Xie)
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
68 files changed, 1834 insertions, 520 deletions
diff --git a/tools/lib/str_error_r.c b/tools/lib/str_error_r.c index d6d65537b0d9..6aad8308a0ac 100644 --- a/tools/lib/str_error_r.c +++ b/tools/lib/str_error_r.c @@ -22,6 +22,6 @@ char *str_error_r(int errnum, char *buf, size_t buflen) { int err = strerror_r(errnum, buf, buflen); if (err) - snprintf(buf, buflen, "INTERNAL ERROR: strerror_r(%d, %p, %zd)=%d", errnum, buf, buflen, err); + snprintf(buf, buflen, "INTERNAL ERROR: strerror_r(%d, [buf], %zd)=%d", errnum, buflen, err); return buf; } diff --git a/tools/perf/Documentation/perf-c2c.txt b/tools/perf/Documentation/perf-c2c.txt index 822414235170..095aebdc5bb7 100644 --- a/tools/perf/Documentation/perf-c2c.txt +++ b/tools/perf/Documentation/perf-c2c.txt @@ -116,7 +116,7 @@ and calls standard perf record command. Following perf record options are configured by default: (check perf record man page for details) - -W,-d,--sample-cpu + -W,-d,--phys-data,--sample-cpu Unless specified otherwise with '-e' option, following events are monitored by default: diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index a039407d63b8..114fda12aa49 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt @@ -67,6 +67,9 @@ Default is to monitor all CPUS. --vmlinux=<path>:: Path to vmlinux. Required for annotation functionality. +--ignore-vmlinux:: + Ignore vmlinux files. + -m <pages>:: --mmap-pages=<pages>:: Number of mmap data pages (must be a power of two) or size diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 89cb2a36b8ff..98ff73648b51 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -75,7 +75,7 @@ endif # Disable it on all other architectures in case libdw unwind # support is detected in system. Add supported architectures # to the check. -ifneq ($(SRCARCH),$(filter $(SRCARCH),x86 arm powerpc s390)) +ifneq ($(SRCARCH),$(filter $(SRCARCH),x86 arm arm64 powerpc s390)) NO_LIBDW_DWARF_UNWIND := 1 endif diff --git a/tools/perf/arch/arm64/include/arch-tests.h b/tools/perf/arch/arm64/include/arch-tests.h new file mode 100644 index 000000000000..90ec4c8cb880 --- /dev/null +++ b/tools/perf/arch/arm64/include/arch-tests.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef ARCH_TESTS_H +#define ARCH_TESTS_H + +#ifdef HAVE_DWARF_UNWIND_SUPPORT +struct thread; +struct perf_sample; +#endif + +extern struct test arch_tests[]; + +#endif diff --git a/tools/perf/arch/arm64/tests/Build b/tools/perf/arch/arm64/tests/Build index b30eff9bcc83..883c57ff0c08 100644 --- a/tools/perf/arch/arm64/tests/Build +++ b/tools/perf/arch/arm64/tests/Build @@ -1,2 +1,4 @@ libperf-y += regs_load.o libperf-y += dwarf-unwind.o + +libperf-y += arch-tests.o diff --git a/tools/perf/arch/arm64/tests/arch-tests.c b/tools/perf/arch/arm64/tests/arch-tests.c new file mode 100644 index 000000000000..5b1543c98022 --- /dev/null +++ b/tools/perf/arch/arm64/tests/arch-tests.c @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <string.h> +#include "tests/tests.h" +#include "arch-tests.h" + +struct test arch_tests[] = { +#ifdef HAVE_DWARF_UNWIND_SUPPORT + { + .desc = "DWARF unwind", + .func = test__dwarf_unwind, + }, +#endif + { + .func = NULL, + }, +}; diff --git a/tools/perf/arch/arm64/util/Build b/tools/perf/arch/arm64/util/Build index c0b8dfef98ba..68f8a8eb3ad0 100644 --- a/tools/perf/arch/arm64/util/Build +++ b/tools/perf/arch/arm64/util/Build @@ -2,6 +2,7 @@ libperf-y += header.o libperf-y += sym-handling.o libperf-$(CONFIG_DWARF) += dwarf-regs.o libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o +libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o libperf-$(CONFIG_AUXTRACE) += ../../arm/util/pmu.o \ ../../arm/util/auxtrace.o \ diff --git a/tools/perf/arch/arm64/util/unwind-libdw.c b/tools/perf/arch/arm64/util/unwind-libdw.c new file mode 100644 index 000000000000..7623d85e77f3 --- /dev/null +++ b/tools/perf/arch/arm64/util/unwind-libdw.c @@ -0,0 +1,60 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <elfutils/libdwfl.h> +#include "../../util/unwind-libdw.h" +#include "../../util/perf_regs.h" +#include "../../util/event.h" + +bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg) +{ + struct unwind_info *ui = arg; + struct regs_dump *user_regs = &ui->sample->user_regs; + Dwarf_Word dwarf_regs[PERF_REG_ARM64_MAX], dwarf_pc; + +#define REG(r) ({ \ + Dwarf_Word val = 0; \ + perf_reg_value(&val, user_regs, PERF_REG_ARM64_##r); \ + val; \ +}) + + dwarf_regs[0] = REG(X0); + dwarf_regs[1] = REG(X1); + dwarf_regs[2] = REG(X2); + dwarf_regs[3] = REG(X3); + dwarf_regs[4] = REG(X4); + dwarf_regs[5] = REG(X5); + dwarf_regs[6] = REG(X6); + dwarf_regs[7] = REG(X7); + dwarf_regs[8] = REG(X8); + dwarf_regs[9] = REG(X9); + dwarf_regs[10] = REG(X10); + dwarf_regs[11] = REG(X11); + dwarf_regs[12] = REG(X12); + dwarf_regs[13] = REG(X13); + dwarf_regs[14] = REG(X14); + dwarf_regs[15] = REG(X15); + dwarf_regs[16] = REG(X16); + dwarf_regs[17] = REG(X17); + dwarf_regs[18] = REG(X18); + dwarf_regs[19] = REG(X19); + dwarf_regs[20] = REG(X20); + dwarf_regs[21] = REG(X21); + dwarf_regs[22] = REG(X22); + dwarf_regs[23] = REG(X23); + dwarf_regs[24] = REG(X24); + dwarf_regs[25] = REG(X25); + dwarf_regs[26] = REG(X26); + dwarf_regs[27] = REG(X27); + dwarf_regs[28] = REG(X28); + dwarf_regs[29] = REG(X29); + dwarf_regs[30] = REG(LR); + dwarf_regs[31] = REG(SP); + + if (!dwfl_thread_state_registers(thread, 0, PERF_REG_ARM64_MAX, + dwarf_regs)) + return false; + + dwarf_pc = REG(PC); + dwfl_thread_state_register_pc(thread, dwarf_pc); + + return true; +} diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 98d243fa0c06..2126bfbcb385 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -32,6 +32,7 @@ #include "evsel.h" #include "ui/browsers/hists.h" #include "thread.h" +#include "mem2node.h" struct c2c_hists { struct hists hists; @@ -49,6 +50,7 @@ struct c2c_hist_entry { struct c2c_hists *hists; struct c2c_stats stats; unsigned long *cpuset; + unsigned long *nodeset; struct c2c_stats *node_stats; unsigned int cacheline_idx; @@ -59,6 +61,11 @@ struct c2c_hist_entry { * because of its callchain dynamic entry */ struct hist_entry he; + + unsigned long paddr; + unsigned long paddr_cnt; + bool paddr_zero; + char *nodestr; }; static char const *coalesce_default = "pid,iaddr"; @@ -66,6 +73,7 @@ static char const *coalesce_default = "pid,iaddr"; struct perf_c2c { struct perf_tool tool; struct c2c_hists hists; + struct mem2node mem2node; unsigned long **nodes; int nodes_cnt; @@ -123,6 +131,10 @@ static void *c2c_he_zalloc(size_t size) if (!c2c_he->cpuset) return NULL; + c2c_he->nodeset = bitmap_alloc(c2c.nodes_cnt); + if (!c2c_he->nodeset) + return NULL; + c2c_he->node_stats = zalloc(c2c.nodes_cnt * sizeof(*c2c_he->node_stats)); if (!c2c_he->node_stats) return NULL; @@ -145,6 +157,8 @@ static void c2c_he_free(void *he) } free(c2c_he->cpuset); + free(c2c_he->nodeset); + free(c2c_he->nodestr); free(c2c_he->node_stats); free(c2c_he); } @@ -194,6 +208,28 @@ static void c2c_he__set_cpu(struct c2c_hist_entry *c2c_he, set_bit(sample->cpu, c2c_he->cpuset); } +static void c2c_he__set_node(struct c2c_hist_entry *c2c_he, + struct perf_sample *sample) +{ + int node; + + if (!sample->phys_addr) { + c2c_he->paddr_zero = true; + return; + } + + node = mem2node__node(&c2c.mem2node, sample->phys_addr); + if (WARN_ONCE(node < 0, "WARNING: failed to find node\n")) + return; + + set_bit(node, c2c_he->nodeset); + + if (c2c_he->paddr != sample->phys_addr) { + c2c_he->paddr_cnt++; + c2c_he->paddr = sample->phys_addr; + } +} + static void compute_stats(struct c2c_hist_entry *c2c_he, struct c2c_stats *stats, u64 weight) @@ -257,6 +293,7 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused, c2c_add_stats(&c2c_hists->stats, &stats); c2c_he__set_cpu(c2c_he, sample); + c2c_he__set_node(c2c_he, sample); hists__inc_nr_samples(&c2c_hists->hists, he->filtered); ret = hist_entry__append_callchain(he, sample); @@ -293,6 +330,7 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused, compute_stats(c2c_he, &stats, sample->weight); c2c_he__set_cpu(c2c_he, sample); + c2c_he__set_node(c2c_he, sample); hists__inc_nr_samples(&c2c_hists->hists, he->filtered); ret = hist_entry__append_callchain(he, sample); @@ -455,6 +493,31 @@ static int dcacheline_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, return scnprintf(hpp->buf, hpp->size, "%*s", width, HEX_STR(buf, addr)); } +static int +dcacheline_node_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, + struct hist_entry *he) +{ + struct c2c_hist_entry *c2c_he; + int width = c2c_width(fmt, hpp, he->hists); + + c2c_he = container_of(he, struct c2c_hist_entry, he); + if (WARN_ON_ONCE(!c2c_he->nodestr)) + return 0; + + return scnprintf(hpp->buf, hpp->size, "%*s", width, c2c_he->nodestr); +} + +static int +dcacheline_node_count(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, + struct hist_entry *he) +{ + struct c2c_hist_entry *c2c_he; + int width = c2c_width(fmt, hpp, he->hists); + + c2c_he = container_of(he, struct c2c_hist_entry, he); + return scnprintf(hpp->buf, hpp->size, "%*lu", width, c2c_he->paddr_cnt); +} + static int offset_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, struct hist_entry *he) { @@ -1200,23 +1263,47 @@ cl_idx_empty_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, } static struct c2c_dimension dim_dcacheline = { - .header = HEADER_LOW("Cacheline"), + .header = HEADER_SPAN("--- Cacheline ----", "Address", 2), .name = "dcacheline", .cmp = dcacheline_cmp, .entry = dcacheline_entry, .width = 18, }; -static struct c2c_header header_offset_tui = HEADER_LOW("Off"); +static struct c2c_dimension dim_dcacheline_node = { + .header = HEADER_LOW("Node"), + .name = "dcacheline_node", + .cmp = empty_cmp, + .entry = dcacheline_node_entry, + .width = 4, +}; + +static struct c2c_dimension dim_dcacheline_count = { + .header = HEADER_LOW("PA cnt"), + .name = "dcacheline_count", + .cmp = empty_cmp, + .entry = dcacheline_node_count, + .width = 6, +}; + +static struct c2c_header header_offset_tui = HEADER_SPAN("-----", "Off", 2); static struct c2c_dimension dim_offset = { - .header = HEADER_BOTH("Data address", "Offset"), + .header = HEADER_SPAN("--- Data address -", "Offset", 2), .name = "offset", .cmp = offset_cmp, .entry = offset_entry, .width = 18, }; +static struct c2c_dimension dim_offset_node = { + .header = HEADER_LOW("Node"), + .name = "offset_node", + .cmp = empty_cmp, + .entry = dcacheline_node_entry, + .width = 4, +}; + static struct c2c_dimension dim_iaddr = { .header = HEADER_LOW("Code address"), .name = "iaddr", @@ -1536,7 +1623,10 @@ static struct c2c_dimension dim_dcacheline_num_empty = { static struct c2c_dimension *dimensions[] = { &dim_dcacheline, + &dim_dcacheline_node, + &dim_dcacheline_count, &dim_offset, + &dim_offset_node, &dim_iaddr, &dim_tot_hitm, &dim_lcl_hitm, @@ -1839,20 +1929,56 @@ static inline int valid_hitm_or_store(struct hist_entry *he) return has_hitm || c2c_he->stats.store; } -static void calc_width(struct hist_entry *he) +static void set_node_width(struct c2c_hist_entry *c2c_he, int len) +{ + struct c2c_dimension *dim; + + dim = &c2c.hists == c2c_he->hists ? + &dim_dcacheline_node : &dim_offset_node; + + if (len > dim->width) + dim->width = len; +} + +static int set_nodestr(struct c2c_hist_entry *c2c_he) +{ + char buf[30]; + int len; + + if (c2c_he->nodestr) + return 0; + + if (bitmap_weight(c2c_he->nodeset, c2c.nodes_cnt)) { + len = bitmap_scnprintf(c2c_he->nodeset, c2c.nodes_cnt, + buf, sizeof(buf)); + } else { + len = scnprintf(buf, sizeof(buf), "N/A"); + } + + set_node_width(c2c_he, len); + c2c_he->nodestr = strdup(buf); + return c2c_he->nodestr ? 0 : -ENOMEM; +} + +static void calc_width(struct c2c_hist_entry *c2c_he) { struct c2c_hists *c2c_hists; - c2c_hists = container_of(he->hists, struct c2c_hists, hists); - hists__calc_col_len(&c2c_hists->hists, he); + c2c_hists = container_of(c2c_he->he.hists, struct c2c_hists, hists); + hists__calc_col_len(&c2c_hists->hists, &c2c_he->he); + set_nodestr(c2c_he); } static int filter_cb(struct hist_entry *he) { + struct c2c_hist_entry *c2c_he; + + c2c_he = container_of(he, struct c2c_hist_entry, he); + if (c2c.show_src && !he->srcline) he->srcline = hist_entry__get_srcline(he); - calc_width(he); + calc_width(c2c_he); if (!valid_hitm_or_store(he)) he->filtered = HIST_FILTER__C2C; @@ -1869,12 +1995,11 @@ static int resort_cl_cb(struct hist_entry *he) c2c_he = container_of(he, struct c2c_hist_entry, he); c2c_hists = c2c_he->hists; - calc_width(he); - if (display && c2c_hists) { static unsigned int idx; c2c_he->cacheline_idx = idx++; + calc_width(c2c_he); c2c_hists__reinit(c2c_hists, c2c.cl_output, c2c.cl_resort); @@ -2348,14 +2473,66 @@ static void perf_c2c_display(struct perf_session *session) } #endif /* HAVE_SLANG_SUPPORT */ -static void ui_quirks(void) +static char *fill_line(const char *orig, int len) +{ + int i, j, olen = strlen(orig); + char *buf; + + buf = zalloc(len + 1); + if (!buf) + return NULL; + + j = len / 2 - olen / 2; + + for (i = 0; i < j - 1; i++) + buf[i] = '-'; + + buf[i++] = ' '; + + strcpy(buf + i, orig); + + i += olen; + + buf[i++] = ' '; + + for (; i < len; i++) + buf[i] = '-'; + + return buf; +} + +static int ui_quirks(void) { + const char *nodestr = "Data address"; + char *buf; + if (!c2c.use_stdio) { dim_offset.width = 5; dim_offset.header = header_offset_tui; + nodestr = "CL"; } dim_percent_hitm.header = percent_hitm_header[c2c.display]; + + /* Fix the zero line for dcacheline column. */ + buf = fill_line("Cacheline", dim_dcacheline.width + + dim_dcacheline_node.width + + dim_dcacheline_count.width + 4); + if (!buf) + return -ENOMEM; + + dim_dcacheline.header.line[0].text = buf; + + /* Fix the zero line for offset column. */ + buf = fill_line(nodestr, dim_offset.width + + dim_offset_node.width + + dim_dcacheline_count.width + 4); + if (!buf) + return -ENOMEM; + + dim_offset.header.line[0].text = buf; + + return 0; } #define CALLCHAIN_DEFAULT_OPT "graph,0.5,caller,function,percent" @@ -2471,7 +2648,7 @@ static int build_cl_output(char *cl_sort, bool no_source) "percent_lcl_hitm," "percent_stores_l1hit," "percent_stores_l1miss," - "offset,", + "offset,offset_node,dcacheline_count,", add_pid ? "pid," : "", add_tid ? "tid," : "", add_iaddr ? "iaddr," : "", @@ -2600,17 +2777,21 @@ static int perf_c2c__report(int argc, const char **argv) goto out; } - err = setup_callchain(session->evlist); + err = mem2node__init(&c2c.mem2node, &session->header.env); if (err) goto out_session; + err = setup_callchain(session->evlist); + if (err) + goto out_mem2node; + if (symbol__init(&session->header.env) < 0) - goto out_session; + goto out_mem2node; /* No pipe support at the moment. */ if (perf_data__is_pipe(session->data)) { pr_debug("No pipe support at the moment.\n"); - goto out_session; + goto out_mem2node; } if (c2c.use_stdio) @@ -2623,12 +2804,14 @@ static int perf_c2c__report(int argc, const char **argv) err = perf_session__process_events(session); if (err) { pr_err("failed to process sample\n"); - goto out_session; + goto out_mem2node; } c2c_hists__reinit(&c2c.hists, "cl_idx," "dcacheline," + "dcacheline_node," + "dcacheline_count," "tot_recs," "percent_hitm," "tot_hitm,lcl_hitm,rmt_hitm," @@ -2650,10 +2833,15 @@ static int perf_c2c__report(int argc, const char **argv) ui_progress__finish(); - ui_quirks(); + if (ui_quirks()) { + pr_err("failed to setup UI\n"); + goto out_mem2node; + } perf_c2c_display(session); +out_mem2node: + mem2node__exit(&c2c.mem2node); out_session: perf_session__delete(session); out: @@ -2704,7 +2892,7 @@ static int perf_c2c__record(int argc, const char **argv) argc = parse_options(argc, argv, options, record_mem_usage, PARSE_OPT_KEEP_UNKNOWN); - rec_argc = argc + 10; /* max number of arguments */ + rec_argc = argc + 11; /* max number of arguments */ rec_argv = calloc(rec_argc + 1, sizeof(char *)); if (!rec_argv) return -1; @@ -2720,6 +2908,7 @@ static int perf_c2c__record(int argc, const char **argv) rec_argv[i++] = "-W"; rec_argv[i++] = "-d"; + rec_argv[i++] = "--phys-data"; rec_argv[i++] = "--sample-cpu"; for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) { diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index b81494587120..22ebeb92ac51 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -754,13 +754,10 @@ static int record__synthesize(struct record *rec, bool tail) return 0; if (data->is_pipe) { - err = perf_event__synthesize_features( - tool, session, rec->evlist, process_synthesized_event); - if (err < 0) { - pr_err("Couldn't synthesize features.\n"); - return err; - } - + /* + * We need to synthesize events first, because some + * features works on top of them (on report side). + */ err = perf_event__synthesize_attrs(tool, session, process_synthesized_event); if (err < 0) { @@ -768,6 +765,13 @@ static int record__synthesize(struct record *rec, bool tail) goto out; } + err = perf_event__synthesize_features(tool, session, rec->evlist, + process_synthesized_event); + if (err < 0) { + pr_err("Couldn't synthesize features.\n"); + return err; + } + if (have_tracepoints(&rec->evlist->entries)) { /* * FIXME err <= 0 here actually means that @@ -1279,10 +1283,12 @@ static int perf_record_config(const char *var, const char *value, void *cb) return -1; return 0; } - if (!strcmp(var, "record.call-graph")) - var = "call-graph.record-mode"; /* fall-through */ + if (!strcmp(var, "record.call-graph")) { + var = "call-graph.record-mode"; + return perf_default_config(var, value, cb); + } - return perf_default_config(var, value, cb); + return 0; } struct clockid_map { diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 971ccba85464..91da12975642 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -68,6 +68,7 @@ struct report { bool header; bool header_only; bool nonany_branch_mode; + bool group_set; int max_stack; struct perf_read_values show_threads_values; const char *pretty_printing_style; @@ -193,6 +194,45 @@ out: return err; } +/* + * Events in data file are not collect in groups, but we still want + * the group display. Set the artificial group and set the leader's + * forced_leader flag to notify the display code. + */ +static void setup_forced_leader(struct report *report, + struct perf_evlist *evlist) +{ + if (report->group_set && !evlist->nr_groups) { + struct perf_evsel *leader = perf_evlist__first(evlist); + + perf_evlist__set_leader(evlist); + leader->forced_leader = true; + } +} + +static int process_feature_event(struct perf_tool *tool, + union perf_event *event, + struct perf_session *session __maybe_unused) +{ + struct report *rep = container_of(tool, struct report, tool); + + if (event->feat.feat_id < HEADER_LAST_FEATURE) + return perf_event__process_feature(tool, event, session); + + if (event->feat.feat_id != HEADER_LAST_FEATURE) { + pr_err("failed: wrong feature ID: %" PRIu64 "\n", + event->feat.feat_id); + return -1; + } + + /* + * All features are received, we can force the + * group if needed. + */ + setup_forced_leader(rep, session->evlist); + return 0; +} + static int process_sample_event(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -940,7 +980,6 @@ int cmd_report(int argc, const char **argv) "perf report [<options>]", NULL }; - bool group_set = false; struct report report = { .tool = { .sample = process_sample_event, @@ -958,7 +997,7 @@ int cmd_report(int argc, const char **argv) .id_index = perf_event__process_id_index, .auxtrace_info = perf_event__process_auxtrace_info, .auxtrace = perf_event__process_auxtrace, - .feature = perf_event__process_feature, + .feature = process_feature_event, .ordered_events = true, .ordering_requires_timestamps = true, }, @@ -1060,7 +1099,7 @@ int cmd_report(int argc, const char **argv) "Specify disassembler style (e.g. -M intel for intel syntax)"), OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period, "Show a column with the sum of periods"), - OPT_BOOLEAN_SET(0, "group", &symbol_conf.event_group, &group_set, + OPT_BOOLEAN_SET(0, "group", &symbol_conf.event_group, &report.group_set, "Show event group information together"), OPT_CALLBACK_NOOPT('b', "branch-stack", &branch_mode, "", "use branch records for per branch histogram filling", @@ -1177,17 +1216,7 @@ repeat: has_br_stack = perf_header__has_feat(&session->header, HEADER_BRANCH_STACK); - /* - * Events in data file are not collect in groups, but we still want - * the group display. Set the artificial group and set the leader's - * forced_leader flag to notify the display code. - */ - if (group_set && !session->evlist->nr_groups) { - struct perf_evsel *leader = perf_evlist__first(session->evlist); - - perf_evlist__set_leader(session->evlist); - leader->forced_leader = true; - } + setup_forced_leader(&report, session->evlist); if (itrace_synth_opts.last_branch) has_br_stack = true; diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index cce926aeb0c0..313c42423393 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -2674,8 +2674,8 @@ static int list_available_scripts(const struct option *opt __maybe_unused, } for_each_lang(scripts_path, scripts_dir, lang_dirent) { - snprintf(lang_path, MAXPATHLEN, "%s/%s/bin", scripts_path, - lang_dirent->d_name); + scnprintf(lang_path, MAXPATHLEN, "%s/%s/bin", scripts_path, + lang_dirent->d_name); lang_dir = opendir(lang_path); if (!lang_dir) continue; @@ -2684,8 +2684,8 @@ static int list_available_scripts(const struct option *opt __maybe_unused, script_root = get_script_root(script_dirent, REPORT_SUFFIX); if (script_root) { desc = script_desc__findnew(script_root); - snprintf(script_path, MAXPATHLEN, "%s/%s", - lang_path, script_dirent->d_name); + scnprintf(script_path, MAXPATHLEN, "%s/%s", + lang_path, script_dirent->d_name); read_script_info(desc, script_path); free(script_root); } @@ -2721,7 +2721,7 @@ static int check_ev_match(char *dir_name, char *scriptname, int match, len; FILE *fp; - sprintf(filename, "%s/bin/%s-record", dir_name, scriptname); + scnprintf(filename, MAXPATHLEN, "%s/bin/%s-record", dir_name, scriptname); fp = fopen(filename, "r"); if (!fp) @@ -2799,8 +2799,8 @@ int find_scripts(char **scripts_array, char **scripts_path_array) } for_each_lang(scripts_path, scripts_dir, lang_dirent) { - snprintf(lang_path, MAXPATHLEN, "%s/%s", scripts_path, - lang_dirent->d_name); + scnprintf(lang_path, MAXPATHLEN, "%s/%s", scripts_path, + lang_dirent->d_name); #ifdef NO_LIBPERL if (strstr(lang_path, "perl")) continue; @@ -2855,8 +2855,8 @@ static char *get_script_path(const char *script_root, const char *suffix) return NULL; for_each_lang(scripts_path, scripts_dir, lang_dirent) { - snprintf(lang_path, MAXPATHLEN, "%s/%s/bin", scripts_path, - lang_dirent->d_name); + scnprintf(lang_path, MAXPATHLEN, "%s/%s/bin", scripts_path, + lang_dirent->d_name); lang_dir = opendir(lang_path); if (!lang_dir) continue; @@ -2867,8 +2867,8 @@ static char *get_script_path(const char *script_root, const char *suffix) free(__script_root); closedir(lang_dir); closedir(scripts_dir); - snprintf(script_path, MAXPATHLEN, "%s/%s", - lang_path, script_dirent->d_name); + scnprintf(script_path, MAXPATHLEN, "%s/%s", + lang_path, script_dirent->d_name); return strdup(script_path); } free(__script_root); diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 0fa9ea3a6d92..f5c454855908 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -2331,11 +2331,16 @@ static int add_default_attributes(void) return 0; if (transaction_run) { + struct parse_events_error errinfo; + if (pmu_have_event("cpu", "cycles-ct") && pmu_have_event("cpu", "el-start")) - err = parse_events(evsel_list, transaction_attrs, NULL); + err = parse_events(evsel_list, transaction_attrs, + &errinfo); else - err = parse_events(evsel_list, transaction_limited_attrs, NULL); + err = parse_events(evsel_list, + transaction_limited_attrs, + &errinfo); if (err) { fprintf(stderr, "Cannot set up transaction events\n"); return -1; diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 0a26b56afcc5..113c298ed38b 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1223,8 +1223,10 @@ parse_callchain_opt(const struct option *opt, const char *arg, int unset) static int perf_top_config(const char *var, const char *value, void *cb __maybe_unused) { - if (!strcmp(var, "top.call-graph")) - var = "call-graph.record-mode"; /* fall-through */ + if (!strcmp(var, "top.call-graph")) { + var = "call-graph.record-mode"; + return perf_default_config(var, value, cb); + } if (!strcmp(var, "top.children")) { symbol_conf.cumulate_callchain = perf_config_bool(var, value); return 0; diff --git a/tools/perf/pmu-events/Build b/tools/perf/pmu-events/Build index 999a4e878162..17783913d330 100644 --- a/tools/perf/pmu-events/Build +++ b/tools/perf/pmu-events/Build @@ -1,10 +1,12 @@ hostprogs := jevents jevents-y += json.o jsmn.o jevents.o +CHOSTFLAGS_jevents.o = -I$(srctree)/tools/include pmu-events-y += pmu-events.o JDIR = pmu-events/arch/$(SRCARCH) JSON = $(shell [ -d $(JDIR) ] && \ find $(JDIR) -name '*.json' -o -name 'mapfile.csv') + # # Locate/process JSON files in pmu-events/arch/ # directory and create tables in pmu-events.c. diff --git a/tools/perf/pmu-events/README b/tools/perf/pmu-events/README index c2ee3e4417fe..e62b09b6a844 100644 --- a/tools/perf/pmu-events/README +++ b/tools/perf/pmu-events/README @@ -11,12 +11,17 @@ tree tools/perf/pmu-events/arch/foo. - Regular files with '.json' extension in the name are assumed to be JSON files, each of which describes a set of PMU events. - - Regular files with basename starting with 'mapfile.csv' are assumed - to be a CSV file that maps a specific CPU to its set of PMU events. - (see below for mapfile format) + - The CSV file that maps a specific CPU to its set of PMU events is to + be named 'mapfile.csv' (see below for mapfile format). - Directories are traversed, but all other files are ignored. + - To reduce JSON event duplication per architecture, platform JSONs may + use "ArchStdEvent" keyword to dereference an "Architecture standard + events", defined in architecture standard JSONs. + Architecture standard JSONs must be located in the architecture root + folder. Matching is based on the "EventName" field. + The PMU events supported by a CPU model are expected to grouped into topics such as Pipelining, Cache, Memory, Floating-point etc. All events for a topic should be placed in a separate JSON file - where the file name identifies @@ -29,6 +34,10 @@ sub directory. Thus for the Silvermont X86 CPU: Cache.json Memory.json Virtual-Memory.json Frontend.json Pipeline.json +The JSONs folder for a CPU model/family may be placed in the root arch +folder, or may be placed in a vendor sub-folder under the arch folder +for instances where the arch and vendor are not the same. + Using the JSON files and the mapfile, 'jevents' generates the C source file, 'pmu-events.c', which encodes the two sets of tables: diff --git a/tools/perf/pmu-events/arch/arm64/cortex-a53/branch.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/branch.json index 3b6208763e50..0b0e6b26605b 100644 --- a/tools/perf/pmu-events/arch/arm64/cortex-a53/branch.json +++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/branch.json @@ -1,25 +1,23 @@ [ - {, - "EventCode": "0x7A", - "EventName": "BR_INDIRECT_SPEC", - "BriefDescription": "Branch speculatively executed - Indirect branch" + { + "ArchStdEvent": "BR_INDIRECT_SPEC", }, - {, + { "EventCode": "0xC9", "EventName": "BR_COND", "BriefDescription": "Conditional branch executed" }, - {, + { "EventCode": "0xCA", "EventName": "BR_INDIRECT_MISPRED", "BriefDescription": "Indirect branch mispredicted" }, - {, + { "EventCode": "0xCB", "EventName": "BR_INDIRECT_MISPRED_ADDR", "BriefDescription": "Indirect branch mispredicted because of address miscompare" }, - {, + { "EventCode": "0xCC", "EventName": "BR_COND_MISPRED", "BriefDescription": "Conditional branch mispredicted" diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/bus.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/bus.json new file mode 100644 index 000000000000..ce33b2553277 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/bus.json @@ -0,0 +1,8 @@ +[ + { + "ArchStdEvent": "BUS_ACCESS_RD", + }, + { + "ArchStdEvent": "BUS_ACCESS_WR", + } +] diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/cache.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/cache.json new file mode 100644 index 000000000000..5dfbec43c9f9 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/cache.json @@ -0,0 +1,27 @@ +[ + { + "EventCode": "0xC2", + "EventName": "PREFETCH_LINEFILL", + "BriefDescription": "Linefill because of prefetch" + }, + { + "EventCode": "0xC3", + "EventName": "PREFETCH_LINEFILL_DROP", + "BriefDescription": "Instruction Cache Throttle occurred" + }, + { + "EventCode": "0xC4", + "EventName": "READ_ALLOC_ENTER", + "BriefDescription": "Entering read allocate mode" + }, + { + "EventCode": "0xC5", + "EventName": "READ_ALLOC", + "BriefDescription": "Read allocate mode" + }, + { + "EventCode": "0xC8", + "EventName": "EXT_SNOOP", + "BriefDescription": "SCU Snooped data from another CPU for this CPU" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/cortex-a53/memory.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/memory.json index 480d9f7460ab..25ae642ba381 100644 --- a/tools/perf/pmu-events/arch/arm64/cortex-a53/memory.json +++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/memory.json @@ -1,20 +1,10 @@ [ - {, - "EventCode": "0x60", - "EventName": "BUS_ACCESS_LD", - "BriefDescription": "Bus access - Read" - }, - {, - "EventCode": "0x61", - "EventName": "BUS_ACCESS_ST", - "BriefDescription": "Bus access - Write" - }, - {, + { "EventCode": "0xC0", "EventName": "EXT_MEM_REQ", "BriefDescription": "External memory request" }, - {, + { "EventCode": "0xC1", "EventName": "EXT_MEM_REQ_NC", "BriefDescription": "Non-cacheable external memory request" diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/other.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/other.json new file mode 100644 index 000000000000..6cc6cbd7bf0b --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/other.json @@ -0,0 +1,28 @@ +[ + { + "ArchStdEvent": "EXC_IRQ", + }, + { + "ArchStdEvent": "EXC_FIQ", + }, + { + "EventCode": "0xC6", + "EventName": "PRE_DECODE_ERR", + "BriefDescription": "Pre-decode error" + }, + { + "EventCode": "0xD0", + "EventName": "L1I_CACHE_ERR", + "BriefDescription": "L1 Instruction Cache (data or tag) memory error" + }, + { + "EventCode": "0xD1", + "EventName": "L1D_CACHE_ERR", + "BriefDescription": "L1 Data Cache (data, tag or dirty) memory error, correctable or non-correctable" + }, + { + "EventCode": "0xD2", + "EventName": "TLB_ERR", + "BriefDescription": "TLB memory error" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/cortex-a53/pipeline.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/pipeline.json index 3149fb90555a..f45a6b5d0025 100644 --- a/tools/perf/pmu-events/arch/arm64/cortex-a53/pipeline.json +++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/pipeline.json @@ -1,50 +1,50 @@ [ - {, + { "EventCode": "0xC7", "EventName": "STALL_SB_FULL", "BriefDescription": "Data Write operation that stalls the pipeline because the store buffer is full" }, - {, + { "EventCode": "0xE0", "EventName": "OTHER_IQ_DEP_STALL", "BriefDescription": "Cycles that the DPU IQ is empty and that is not because of a recent micro-TLB miss, instruction cache miss or pre-decode error" }, - {, + { "EventCode": "0xE1", "EventName": "IC_DEP_STALL", "BriefDescription": "Cycles the DPU IQ is empty and there is an instruction cache miss being processed" }, - {, + { "EventCode": "0xE2", "EventName": "IUTLB_DEP_STALL", "BriefDescription": "Cycles the DPU IQ is empty and there is an instruction micro-TLB miss being processed" }, - {, + { "EventCode": "0xE3", "EventName": "DECODE_DEP_STALL", "BriefDescription": "Cycles the DPU IQ is empty and there is a pre-decode error being processed" }, - {, + { "EventCode": "0xE4", "EventName": "OTHER_INTERLOCK_STALL", "BriefDescription": "Cycles there is an interlock other than Advanced SIMD/Floating-point instructions or load/store instruction" }, - {, + { "EventCode": "0xE5", "EventName": "AGU_DEP_STALL", "BriefDescription": "Cycles there is an interlock for a load/store instruction waiting for data to calculate the address in the AGU" }, - {, + { "EventCode": "0xE6", "EventName": "SIMD_DEP_STALL", "BriefDescription": "Cycles there is an interlock for an Advanced SIMD/Floating-point operation." }, - {, + { "EventCode": "0xE7", "EventName": "LD_DEP_STALL", "BriefDescription": "Cycles there is a stall in the Wr stage because of a load miss" }, - {, + { "EventCode": "0xE8", "EventName": "ST_DEP_STALL", "BriefDescription": "Cycles there is a stall in the Wr stage because of a store" diff --git a/tools/perf/pmu-events/arch/arm64/armv8-recommended.json b/tools/perf/pmu-events/arch/arm64/armv8-recommended.json new file mode 100644 index 000000000000..6328828c018c --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/armv8-recommended.json @@ -0,0 +1,452 @@ +[ + { + "PublicDescription": "Attributable Level 1 data cache access, read", + "EventCode": "0x40", + "EventName": "L1D_CACHE_RD", + "BriefDescription": "L1D cache access, read" + }, + { + "PublicDescription": "Attributable Level 1 data cache access, write", + "EventCode": "0x41", + "EventName": "L1D_CACHE_WR", + "BriefDescription": "L1D cache access, write" + }, + { + "PublicDescription": "Attributable Level 1 data cache refill, read", + "EventCode": "0x42", + "EventName": "L1D_CACHE_REFILL_RD", + "BriefDescription": "L1D cache refill, read" + }, + { + "PublicDescription": "Attributable Level 1 data cache refill, write", + "EventCode": "0x43", + "EventName": "L1D_CACHE_REFILL_WR", + "BriefDescription": "L1D cache refill, write" + }, + { + "PublicDescription": "Attributable Level 1 data cache refill, inner", + "EventCode": "0x44", + "EventName": "L1D_CACHE_REFILL_INNER", + "BriefDescription": "L1D cache refill, inner" + }, + { + "PublicDescription": "Attributable Level 1 data cache refill, outer", + "EventCode": "0x45", + "EventName": "L1D_CACHE_REFILL_OUTER", + "BriefDescription": "L1D cache refill, outer" + }, + { + "PublicDescription": "Attributable Level 1 data cache Write-Back, victim", + "EventCode": "0x46", + "EventName": "L1D_CACHE_WB_VICTIM", + "BriefDescription": "L1D cache Write-Back, victim" + }, + { + "PublicDescription": "Level 1 data cache Write-Back, cleaning and coherency", + "EventCode": "0x47", + "EventName": "L1D_CACHE_WB_CLEAN", + "BriefDescription": "L1D cache Write-Back, cleaning and coherency" + }, + { + "PublicDescription": "Attributable Level 1 data cache invalidate", + "EventCode": "0x48", + "EventName": "L1D_CACHE_INVAL", + "BriefDescription": "L1D cache invalidate" + }, + { + "PublicDescription": "Attributable Level 1 data TLB refill, read", + "EventCode": "0x4C", + "EventName": "L1D_TLB_REFILL_RD", + "BriefDescription": "L1D tlb refill, read" + }, + { + "PublicDescription": "Attributable Level 1 data TLB refill, write", + "EventCode": "0x4D", + "EventName": "L1D_TLB_REFILL_WR", + "BriefDescription": "L1D tlb refill, write" + }, + { + "PublicDescription": "Attributable Level 1 data or unified TLB access, read", + "EventCode": "0x4E", + "EventName": "L1D_TLB_RD", + "BriefDescription": "L1D tlb access, read" + }, + { + "PublicDescription": "Attributable Level 1 data or unified TLB access, write", + "EventCode": "0x4F", + "EventName": "L1D_TLB_WR", + "BriefDescription": "L1D tlb access, write" + }, + { + "PublicDescription": "Attributable Level 2 data cache access, read", + "EventCode": "0x50", + "EventName": "L2D_CACHE_RD", + "BriefDescription": "L2D cache access, read" + }, + { + "PublicDescription": "Attributable Level 2 data cache access, write", + "EventCode": "0x51", + "EventName": "L2D_CACHE_WR", + "BriefDescription": "L2D cache access, write" + }, + { + "PublicDescription": "Attributable Level 2 data cache refill, read", + "EventCode": "0x52", + "EventName": "L2D_CACHE_REFILL_RD", + "BriefDescription": "L2D cache refill, read" + }, + { + "PublicDescription": "Attributable Level 2 data cache refill, write", + "EventCode": "0x53", + "EventName": "L2D_CACHE_REFILL_WR", + "BriefDescription": "L2D cache refill, write" + }, + { + "PublicDescription": "Attributable Level 2 data cache Write-Back, victim", + "EventCode": "0x56", + "EventName": "L2D_CACHE_WB_VICTIM", + "BriefDescription": "L2D cache Write-Back, victim" + }, + { + "PublicDescription": "Level 2 data cache Write-Back, cleaning and coherency", + "EventCode": "0x57", + "EventName": "L2D_CACHE_WB_CLEAN", + "BriefDescription": "L2D cache Write-Back, cleaning and coherency" + }, + { + "PublicDescription": "Attributable Level 2 data cache invalidate", + "EventCode": "0x58", + "EventName": "L2D_CACHE_INVAL", + "BriefDescription": "L2D cache invalidate" + }, + { + "PublicDescription": "Attributable Level 2 data or unified TLB refill, read", + "EventCode": "0x5c", + "EventName": "L2D_TLB_REFILL_RD", + "BriefDescription": "L2D cache refill, read" + }, + { + "PublicDescription": "Attributable Level 2 data or unified TLB refill, write", + "EventCode": "0x5d", + "EventName": "L2D_TLB_REFILL_WR", + "BriefDescription": "L2D cache refill, write" + }, + { + "PublicDescription": "Attributable Level 2 data or unified TLB access, read", + "EventCode": "0x5e", + "EventName": "L2D_TLB_RD", + "BriefDescription": "L2D cache access, read" + }, + { + "PublicDescription": "Attributable Level 2 data or unified TLB access, write", + "EventCode": "0x5f", + "EventName": "L2D_TLB_WR", + "BriefDescription": "L2D cache access, write" + }, + { + "PublicDescription": "Bus access read", + "EventCode": "0x60", + "EventName": "BUS_ACCESS_RD", + "BriefDescription": "Bus access read" + }, + { + "PublicDescription": "Bus access write", + "EventCode": "0x61", + "EventName": "BUS_ACCESS_WR", + "BriefDescription": "Bus access write" + } + { + "PublicDescription": "Bus access, Normal, Cacheable, Shareable", + "EventCode": "0x62", + "EventName": "BUS_ACCESS_SHARED", + "BriefDescription": "Bus access, Normal, Cacheable, Shareable" + } + { + "PublicDescription": "Bus access, not Normal, Cacheable, Shareable", + "EventCode": "0x63", + "EventName": "BUS_ACCESS_NOT_SHARED", + "BriefDescription": "Bus access, not Normal, Cacheable, Shareable" + } + { + "PublicDescription": "Bus access, Normal", + "EventCode": "0x64", + "EventName": "BUS_ACCESS_NORMAL", + "BriefDescription": "Bus access, Normal" + } + { + "PublicDescription": "Bus access, peripheral", + "EventCode": "0x65", + "EventName": "BUS_ACCESS_PERIPH", + "BriefDescription": "Bus access, peripheral" + } + { + "PublicDescription": "Data memory access, read", + "EventCode": "0x66", + "EventName": "MEM_ACCESS_RD", + "BriefDescription": "Data memory access, read" + } + { + "PublicDescription": "Data memory access, write", + "EventCode": "0x67", + "EventName": "MEM_ACCESS_WR", + "BriefDescription": "Data memory access, write" + } + { + "PublicDescription": "Unaligned access, read", + "EventCode": "0x68", + "EventName": "UNALIGNED_LD_SPEC", + "BriefDescription": "Unaligned access, read" + } + { + "PublicDescription": "Unaligned access, write", + "EventCode": "0x69", + "EventName": "UNALIGNED_ST_SPEC", + "BriefDescription": "Unaligned access, write" + } + { + "PublicDescription": "Unaligned access", + "EventCode": "0x6a", + "EventName": "UNALIGNED_LDST_SPEC", + "BriefDescription": "Unaligned access" + } + { + "PublicDescription": "Exclusive operation speculatively executed, LDREX or LDX", + "EventCode": "0x6c", + "EventName": "LDREX_SPEC", + "BriefDescription": "Exclusive operation speculatively executed, LDREX or LDX" + } + { + "PublicDescription": "Exclusive operation speculatively executed, STREX or STX pass", + "EventCode": "0x6d", + "EventName": "STREX_PASS_SPEC", + "BriefDescription": "Exclusive operation speculatively executed, STREX or STX pass" + } + { + "PublicDescription": "Exclusive operation speculatively executed, STREX or STX fail", + "EventCode": "0x6e", + "EventName": "STREX_FAIL_SPEC", + "BriefDescription": "Exclusive operation speculatively executed, STREX or STX fail" + } + { + "PublicDescription": "Exclusive operation speculatively executed, STREX or STX", + "EventCode": "0x6f", + "EventName": "STREX_SPEC", + "BriefDescription": "Exclusive operation speculatively executed, STREX or STX" + } + { + "PublicDescription": "Operation speculatively executed, load", + "EventCode": "0x70", + "EventName": "LD_SPEC", + "BriefDescription": "Operation speculatively executed, load" + } + { + "PublicDescription": "Operation speculatively executed, store" + "EventCode": "0x71", + "EventName": "ST_SPEC", + "BriefDescription": "Operation speculatively executed, store" + } + { + "PublicDescription": "Operation speculatively executed, load or store", + "EventCode": "0x72", + "EventName": "LDST_SPEC", + "BriefDescription": "Operation speculatively executed, load or store" + } + { + "PublicDescription": "Operation speculatively executed, integer data processing", + "EventCode": "0x73", + "EventName": "DP_SPEC", + "BriefDescription": "Operation speculatively executed, integer data processing" + } + { + "PublicDescription": "Operation speculatively executed, Advanced SIMD instruction", + "EventCode": "0x74", + "EventName": "ASE_SPEC", + "BriefDescription": "Operation speculatively executed, Advanced SIMD instruction", + } + { + "PublicDescription": "Operation speculatively executed, floating-point instruction", + "EventCode": "0x75", + "EventName": "VFP_SPEC", + "BriefDescription": "Operation speculatively executed, floating-point instruction" + } + { + "PublicDescription": "Operation speculatively executed, software change of the PC", + "EventCode": "0x76", + "EventName": "PC_WRITE_SPEC", + "BriefDescription": "Operation speculatively executed, software change of the PC" + } + { + "PublicDescription": "Operation speculatively executed, Cryptographic instruction", + "EventCode": "0x77", + "EventName": "CRYPTO_SPEC", + "BriefDescription": "Operation speculatively executed, Cryptographic instruction" + } + { + "PublicDescription": "Branch speculatively executed, immediate branch" + "EventCode": "0x78", + "EventName": "BR_IMMED_SPEC", + "BriefDescription": "Branch speculatively executed, immediate branch" + } + { + "PublicDescription": "Branch speculatively executed, procedure return" + "EventCode": "0x79", + "EventName": "BR_RETURN_SPEC", + "BriefDescription": "Branch speculatively executed, procedure return" + } + { + "PublicDescription": "Branch speculatively executed, indirect branch" + "EventCode": "0x7a", + "EventName": "BR_INDIRECT_SPEC", + "BriefDescription": "Branch speculatively executed, indirect branch" + } + { + "PublicDescription": "Barrier speculatively executed, ISB" + "EventCode": "0x7c", + "EventName": "ISB_SPEC", + "BriefDescription": "Barrier speculatively executed, ISB" + } + { + "PublicDescription": "Barrier speculatively executed, DSB" + "EventCode": "0x7d", + "EventName": "DSB_SPEC", + "BriefDescription": "Barrier speculatively executed, DSB" + } + { + "PublicDescription": "Barrier speculatively executed, DMB" + "EventCode": "0x7e", + "EventName": "DMB_SPEC", + "BriefDescription": "Barrier speculatively executed, DMB" + } + { + "PublicDescription": "Exception taken, Other synchronous" + "EventCode": "0x81", + "EventName": "EXC_UNDEF", + "BriefDescription": "Exception taken, Other synchronous" + } + { + "PublicDescription": "Exception taken, Supervisor Call" + "EventCode": "0x82", + "EventName": "EXC_SVC", + "BriefDescription": "Exception taken, Supervisor Call" + } + { + "PublicDescription": "Exception taken, Instruction Abort" + "EventCode": "0x83", + "EventName": "EXC_PABORT", + "BriefDescription": "Exception taken, Instruction Abort" + } + { + "PublicDescription": "Exception taken, Data Abort and SError" + "EventCode": "0x84", + "EventName": "EXC_DABORT", + "BriefDescription": "Exception taken, Data Abort and SError" + } + { + "PublicDescription": "Exception taken, IRQ" + "EventCode": "0x86", + "EventName": "EXC_IRQ", + "BriefDescription": "Exception taken, IRQ" + } + { + "PublicDescription": "Exception taken, FIQ" + "EventCode": "0x87", + "EventName": "EXC_FIQ", + "BriefDescription": "Exception taken, FIQ" + } + { + "PublicDescription": "Exception taken, Secure Monitor Call" + "EventCode": "0x88", + "EventName": "EXC_SMC", + "BriefDescription": "Exception taken, Secure Monitor Call" + } + { + "PublicDescription": "Exception taken, Hypervisor Call" + "EventCode": "0x8a", + "EventName": "EXC_HVC", + "BriefDescription": "Exception taken, Hypervisor Call" + } + { + "PublicDescription": "Exception taken, Instruction Abort not taken locally" + "EventCode": "0x8b", + "EventName": "EXC_TRAP_PABORT", + "BriefDescription": "Exception taken, Instruction Abort not taken locally" + } + { + "PublicDescription": "Exception taken, Data Abort or SError not taken locally" + "EventCode": "0x8c", + "EventName": "EXC_TRAP_DABORT", + "BriefDescription": "Exception taken, Data Abort or SError not taken locally" + } + { + "PublicDescription": "Exception taken, Other traps not taken locally" + "EventCode": "0x8d", + "EventName": "EXC_TRAP_OTHER", + "BriefDescription": "Exception taken, Other traps not taken locally" + } + { + "PublicDescription": "Exception taken, IRQ not taken locally" + "EventCode": "0x8e", + "EventName": "EXC_TRAP_IRQ", + "BriefDescription": "Exception taken, IRQ not taken locally" + } + { + "PublicDescription": "Exception taken, FIQ not taken locally" + "EventCode": "0x8f", + "EventName": "EXC_TRAP_FIQ", + "BriefDescription": "Exception taken, FIQ not taken locally" + } + { + "PublicDescription": "Release consistency operation speculatively executed, Load-Acquire" + "EventCode": "0x90", + "EventName": "RC_LD_SPEC", + "BriefDescription": "Release consistency operation speculatively executed, Load-Acquire" + } + { + "PublicDescription": "Release consistency operation speculatively executed, Store-Release" + "EventCode": "0x91", + "EventName": "RC_ST_SPEC", + "BriefDescription": "Release consistency operation speculatively executed, Store-Release" + } + { + "PublicDescription": "Attributable Level 3 data or unified cache access, read" + "EventCode": "0xa0", + "EventName": "L3D_CACHE_RD", + "BriefDescription": "Attributable Level 3 data or unified cache access, read" + } + { + "PublicDescription": "Attributable Level 3 data or unified cache access, write" + "EventCode": "0xa1", + "EventName": "L3D_CACHE_WR", + "BriefDescription": "Attributable Level 3 data or unified cache access, write" + } + { + "PublicDescription": "Attributable Level 3 data or unified cache refill, read" + "EventCode": "0xa2", + "EventName": "L3D_CACHE_REFILL_RD", + "BriefDescription": "Attributable Level 3 data or unified cache refill, read" + } + { + "PublicDescription": "Attributable Level 3 data or unified cache refill, write" + "EventCode": "0xa3", + "EventName": "L3D_CACHE_REFILL_WR", + "BriefDescription": "Attributable Level 3 data or unified cache refill, write" + } + { + "PublicDescription": "Attributable Level 3 data or unified cache Write-Back, victim" + "EventCode": "0xa6", + "EventName": "L3D_CACHE_WB_VICTIM", + "BriefDescription": "Attributable Level 3 data or unified cache Write-Back, victim" + } + { + "PublicDescription": "Attributable Level 3 data or unified cache Write-Back, cache clean" + "EventCode": "0xa7", + "EventName": "L3D_CACHE_WB_CLEAN", + "BriefDescription": "Attributable Level 3 data or unified cache Write-Back, cache clean" + } + { + "PublicDescription": "Attributable Level 3 data or unified cache access, invalidate" + "EventCode": "0xa8", + "EventName": "L3D_CACHE_INVAL", + "BriefDescription": "Attributable Level 3 data or unified cache access, invalidate" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/cavium/thunderx2-imp-def.json b/tools/perf/pmu-events/arch/arm64/cavium/thunderx2-imp-def.json deleted file mode 100644 index 2db45c40ebc7..000000000000 --- a/tools/perf/pmu-events/arch/arm64/cavium/thunderx2-imp-def.json +++ /dev/null @@ -1,62 +0,0 @@ -[ - { - "PublicDescription": "Attributable Level 1 data cache access, read", - "EventCode": "0x40", - "EventName": "l1d_cache_rd", - "BriefDescription": "L1D cache read", - }, - { - "PublicDescription": "Attributable Level 1 data cache access, write ", - "EventCode": "0x41", - "EventName": "l1d_cache_wr", - "BriefDescription": "L1D cache write", - }, - { - "PublicDescription": "Attributable Level 1 data cache refill, read", - "EventCode": "0x42", - "EventName": "l1d_cache_refill_rd", - "BriefDescription": "L1D cache refill read", - }, - { - "PublicDescription": "Attributable Level 1 data cache refill, write", - "EventCode": "0x43", - "EventName": "l1d_cache_refill_wr", - "BriefDescription": "L1D refill write", - }, - { - "PublicDescription": "Attributable Level 1 data TLB refill, read", - "EventCode": "0x4C", - "EventName": "l1d_tlb_refill_rd", - "BriefDescription": "L1D tlb refill read", - }, - { - "PublicDescription": "Attributable Level 1 data TLB refill, write", - "EventCode": "0x4D", - "EventName": "l1d_tlb_refill_wr", - "BriefDescription": "L1D tlb refill write", - }, - { - "PublicDescription": "Attributable Level 1 data or unified TLB access, read", - "EventCode": "0x4E", - "EventName": "l1d_tlb_rd", - "BriefDescription": "L1D tlb read", - }, - { - "PublicDescription": "Attributable Level 1 data or unified TLB access, write", - "EventCode": "0x4F", - "EventName": "l1d_tlb_wr", - "BriefDescription": "L1D tlb write", - }, - { - "PublicDescription": "Bus access read", - "EventCode": "0x60", - "EventName": "bus_access_rd", - "BriefDescription": "Bus access read", - }, - { - "PublicDescription": "Bus access write", - "EventCode": "0x61", - "EventName": "bus_access_wr", - "BriefDescription": "Bus access write", - } -] diff --git a/tools/perf/pmu-events/arch/arm64/cavium/thunderx2/core-imp-def.json b/tools/perf/pmu-events/arch/arm64/cavium/thunderx2/core-imp-def.json new file mode 100644 index 000000000000..bc03c06c3918 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/cavium/thunderx2/core-imp-def.json @@ -0,0 +1,32 @@ +[ + { + "ArchStdEvent": "L1D_CACHE_RD", + }, + { + "ArchStdEvent": "L1D_CACHE_WR", + }, + { + "ArchStdEvent": "L1D_CACHE_REFILL_RD", + }, + { + "ArchStdEvent": "L1D_CACHE_REFILL_WR", + }, + { + "ArchStdEvent": "L1D_TLB_REFILL_RD", + }, + { + "ArchStdEvent": "L1D_TLB_REFILL_WR", + }, + { + "ArchStdEvent": "L1D_TLB_RD", + }, + { + "ArchStdEvent": "L1D_TLB_WR", + }, + { + "ArchStdEvent": "BUS_ACCESS_RD", + }, + { + "ArchStdEvent": "BUS_ACCESS_WR", + } +] diff --git a/tools/perf/pmu-events/arch/arm64/cortex-a53/bus.json b/tools/perf/pmu-events/arch/arm64/cortex-a53/bus.json deleted file mode 100644 index 480d9f7460ab..000000000000 --- a/tools/perf/pmu-events/arch/arm64/cortex-a53/bus.json +++ /dev/null @@ -1,22 +0,0 @@ -[ - {, - "EventCode": "0x60", - "EventName": "BUS_ACCESS_LD", - "BriefDescription": "Bus access - Read" - }, - {, - "EventCode": "0x61", - "EventName": "BUS_ACCESS_ST", - "BriefDescription": "Bus access - Write" - }, - {, - "EventCode": "0xC0", - "EventName": "EXT_MEM_REQ", - "BriefDescription": "External memory request" - }, - {, - "EventCode": "0xC1", - "EventName": "EXT_MEM_REQ_NC", - "BriefDescription": "Non-cacheable external memory request" - } -] diff --git a/tools/perf/pmu-events/arch/arm64/cortex-a53/cache.json b/tools/perf/pmu-events/arch/arm64/cortex-a53/cache.json deleted file mode 100644 index 11baad6344b9..000000000000 --- a/tools/perf/pmu-events/arch/arm64/cortex-a53/cache.json +++ /dev/null @@ -1,27 +0,0 @@ -[ - {, - "EventCode": "0xC2", - "EventName": "PREFETCH_LINEFILL", - "BriefDescription": "Linefill because of prefetch" - }, - {, - "EventCode": "0xC3", - "EventName": "PREFETCH_LINEFILL_DROP", - "BriefDescription": "Instruction Cache Throttle occurred" - }, - {, - "EventCode": "0xC4", - "EventName": "READ_ALLOC_ENTER", - "BriefDescription": "Entering read allocate mode" - }, - {, - "EventCode": "0xC5", - "EventName": "READ_ALLOC", - "BriefDescription": "Read allocate mode" - }, - {, - "EventCode": "0xC8", - "EventName": "EXT_SNOOP", - "BriefDescription": "SCU Snooped data from another CPU for this CPU" - } -] diff --git a/tools/perf/pmu-events/arch/arm64/cortex-a53/other.json b/tools/perf/pmu-events/arch/arm64/cortex-a53/other.json deleted file mode 100644 index 73a22402d003..000000000000 --- a/tools/perf/pmu-events/arch/arm64/cortex-a53/other.json +++ /dev/null @@ -1,32 +0,0 @@ -[ - {, - "EventCode": "0x86", - "EventName": "EXC_IRQ", - "BriefDescription": "Exception taken, IRQ" - }, - {, - "EventCode": "0x87", - "EventName": "EXC_FIQ", - "BriefDescription": "Exception taken, FIQ" - }, - {, - "EventCode": "0xC6", - "EventName": "PRE_DECODE_ERR", - "BriefDescription": "Pre-decode error" - }, - {, - "EventCode": "0xD0", - "EventName": "L1I_CACHE_ERR", - "BriefDescription": "L1 Instruction Cache (data or tag) memory error" - }, - {, - "EventCode": "0xD1", - "EventName": "L1D_CACHE_ERR", - "BriefDescription": "L1 Data Cache (data, tag or dirty) memory error, correctable or non-correctable" - }, - {, - "EventCode": "0xD2", - "EventName": "TLB_ERR", - "BriefDescription": "TLB memory error" - } -] diff --git a/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/core-imp-def.json b/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/core-imp-def.json new file mode 100644 index 000000000000..9f0f15d15f75 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/core-imp-def.json @@ -0,0 +1,122 @@ +[ + { + "ArchStdEvent": "L1D_CACHE_RD", + }, + { + "ArchStdEvent": "L1D_CACHE_WR", + }, + { + "ArchStdEvent": "L1D_CACHE_REFILL_RD", + }, + { + "ArchStdEvent": "L1D_CACHE_REFILL_WR", + }, + { + "ArchStdEvent": "L1D_CACHE_WB_VICTIM", + }, + { + "ArchStdEvent": "L1D_CACHE_WB_CLEAN", + }, + { + "ArchStdEvent": "L1D_CACHE_INVAL", + }, + { + "ArchStdEvent": "L1D_TLB_REFILL_RD", + }, + { + "ArchStdEvent": "L1D_TLB_REFILL_WR", + }, + { + "ArchStdEvent": "L1D_TLB_RD", + }, + { + "ArchStdEvent": "L1D_TLB_WR", + }, + { + "ArchStdEvent": "L2D_CACHE_RD", + }, + { + "ArchStdEvent": "L2D_CACHE_WR", + }, + { + "ArchStdEvent": "L2D_CACHE_REFILL_RD", + }, + { + "ArchStdEvent": "L2D_CACHE_REFILL_WR", + }, + { + "ArchStdEvent": "L2D_CACHE_WB_VICTIM", + }, + { + "ArchStdEvent": "L2D_CACHE_WB_CLEAN", + }, + { + "ArchStdEvent": "L2D_CACHE_INVAL", + }, + { + "PublicDescription": "Level 1 instruction cache prefetch access count", + "EventCode": "0x102e", + "EventName": "L1I_CACHE_PRF", + "BriefDescription": "L1I cache prefetch access count", + }, + { + "PublicDescription": "Level 1 instruction cache miss due to prefetch access count", + "EventCode": "0x102f", + "EventName": "L1I_CACHE_PRF_REFILL", + "BriefDescription": "L1I cache miss due to prefetch access count", + }, + { + "PublicDescription": "Instruction queue is empty", + "EventCode": "0x1043", + "EventName": "IQ_IS_EMPTY", + "BriefDescription": "Instruction queue is empty", + }, + { + "PublicDescription": "Instruction fetch stall cycles", + "EventCode": "0x1044", + "EventName": "IF_IS_STALL", + "BriefDescription": "Instruction fetch stall cycles", + }, + { + "PublicDescription": "Instructions can receive, but not send", + "EventCode": "0x2014", + "EventName": "FETCH_BUBBLE", + "BriefDescription": "Instructions can receive, but not send", + }, + { + "PublicDescription": "Prefetch request from LSU", + "EventCode": "0x6013", + "EventName": "PRF_REQ", + "BriefDescription": "Prefetch request from LSU", + }, + { + "PublicDescription": "Hit on prefetched data", + "EventCode": "0x6014", + "EventName": "HIT_ON_PRF", + "BriefDescription": "Hit on prefetched data", + }, + { + "PublicDescription": "Cycles of that the number of issuing micro operations are less than 4", + "EventCode": "0x7001", + "EventName": "EXE_STALL_CYCLE", + "BriefDescription": "Cycles of that the number of issue ups are less than 4", + }, + { + "PublicDescription": "No any micro operation is issued and meanwhile any load operation is not resolved", + "EventCode": "0x7004", + "EventName": "MEM_STALL_ANYLOAD", + "BriefDescription": "No any micro operation is issued and meanwhile any load operation is not resolved", + }, + { + "PublicDescription": "No any micro operation is issued and meanwhile there is any load operation missing L1 cache and pending data refill", + "EventCode": "0x7006", + "EventName": "MEM_STALL_L1MISS", + "BriefDescription": "No any micro operation is issued and meanwhile there is any load operation missing L1 cache and pending data refill", + }, + { + "PublicDescription": "No any micro operation is issued and meanwhile there is any load operation missing both L1 and L2 cache and pending data refill from L3 cache", + "EventCode": "0x7007", + "EventName": "MEM_STALL_L2MISS", + "BriefDescription": "No any micro operation is issued and meanwhile there is any load operation missing both L1 and L2 cache and pending data refill from L3 cache", + }, +] diff --git a/tools/perf/pmu-events/arch/arm64/mapfile.csv b/tools/perf/pmu-events/arch/arm64/mapfile.csv index e61c9ca6cf9e..f03e26ecb658 100644 --- a/tools/perf/pmu-events/arch/arm64/mapfile.csv +++ b/tools/perf/pmu-events/arch/arm64/mapfile.csv @@ -12,5 +12,7 @@ # # #Family-model,Version,Filename,EventType -0x00000000420f5160,v1,cavium,core -0x00000000410fd03[[:xdigit:]],v1,cortex-a53,core +0x00000000410fd03[[:xdigit:]],v1,arm/cortex-a53,core +0x00000000420f5160,v1,cavium/thunderx2,core +0x00000000430f0af0,v1,cavium/thunderx2,core +0x00000000480fd010,v1,hisilicon/hip08,core diff --git a/tools/perf/pmu-events/arch/powerpc/power9/cache.json b/tools/perf/pmu-events/arch/powerpc/power9/cache.json index 7945c5196c43..851072105054 100644 --- a/tools/perf/pmu-events/arch/powerpc/power9/cache.json +++ b/tools/perf/pmu-events/arch/powerpc/power9/cache.json @@ -20,11 +20,6 @@ "BriefDescription": "Finish stall due to a scalar fixed point or CR instruction in the execution pipeline. These instructions get routed to the ALU, ALU2, and DIV pipes" }, {, - "EventCode": "0x1D15C", - "EventName": "PM_MRK_DTLB_MISS_1G", - "BriefDescription": "Marked Data TLB reload (after a miss) page size 2M. Implies radix translation was used" - }, - {, "EventCode": "0x4D12A", "EventName": "PM_MRK_DATA_FROM_RL4_CYC", "BriefDescription": "Duration in cycles to reload from another chip's L4 on the same Node or Group ( Remote) due to a marked load" @@ -80,21 +75,6 @@ "BriefDescription": "Threshold counter exceed a count of 4096" }, {, - "EventCode": "0x3D156", - "EventName": "PM_MRK_DTLB_MISS_64K", - "BriefDescription": "Marked Data TLB Miss page size 64K" - }, - {, - "EventCode": "0x4C15E", - "EventName": "PM_MRK_DTLB_MISS_16M", - "BriefDescription": "Marked Data TLB Miss page size 16M" - }, - {, - "EventCode": "0x2D15E", - "EventName": "PM_MRK_DTLB_MISS_16G", - "BriefDescription": "Marked Data TLB Miss page size 16G" - }, - {, "EventCode": "0x3F14A", "EventName": "PM_MRK_DPTEG_FROM_RMEM", "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's memory on the same Node or Group ( Remote) due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included" @@ -123,10 +103,5 @@ "EventCode": "0x1002A", "EventName": "PM_CMPLU_STALL_LARX", "BriefDescription": "Finish stall because the NTF instruction was a larx waiting to be satisfied" - }, - {, - "EventCode": "0x1C058", - "EventName": "PM_DTLB_MISS_16G", - "BriefDescription": "Data TLB Miss page size 16G" } ]
\ No newline at end of file diff --git a/tools/perf/pmu-events/arch/powerpc/power9/frontend.json b/tools/perf/pmu-events/arch/powerpc/power9/frontend.json index bd8361b5fd6a..f9fa84b16fb5 100644 --- a/tools/perf/pmu-events/arch/powerpc/power9/frontend.json +++ b/tools/perf/pmu-events/arch/powerpc/power9/frontend.json @@ -155,11 +155,6 @@ "BriefDescription": "Duration in cycles to reload with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a marked load" }, {, - "EventCode": "0x3C056", - "EventName": "PM_DTLB_MISS_64K", - "BriefDescription": "Data TLB Miss page size 64K" - }, - {, "EventCode": "0x30060", "EventName": "PM_TM_TRANS_RUN_INST", "BriefDescription": "Run instructions completed in transactional state (gated by the run latch)" @@ -345,11 +340,6 @@ "BriefDescription": "Larx finished" }, {, - "EventCode": "0x4C056", - "EventName": "PM_DTLB_MISS_16M", - "BriefDescription": "Data TLB Miss page size 16M" - }, - {, "EventCode": "0x1003A", "EventName": "PM_CMPLU_STALL_LSU_FIN", "BriefDescription": "Finish stall because the NTF instruction was an LSU op (other than a load or a store) with all its dependencies met and just going through the LSU pipe to finish" diff --git a/tools/perf/pmu-events/arch/powerpc/power9/marked.json b/tools/perf/pmu-events/arch/powerpc/power9/marked.json index 22f9f32060a8..b1954c38bab1 100644 --- a/tools/perf/pmu-events/arch/powerpc/power9/marked.json +++ b/tools/perf/pmu-events/arch/powerpc/power9/marked.json @@ -530,11 +530,6 @@ "BriefDescription": "Counts all Icache reloads includes demand, prefetch, prefetch turned into demand and demand turned into prefetch" }, {, - "EventCode": "0x4003C", - "EventName": "PM_DISP_HELD_SYNC_HOLD", - "BriefDescription": "Cycles in which dispatch is held because of a synchronizing instruction in the pipeline" - }, - {, "EventCode": "0x3003C", "EventName": "PM_CMPLU_STALL_NESTED_TEND", "BriefDescription": "Completion stall because the ISU is updating the TEXASR to keep track of the nested tend and decrement the TEXASR nested level. This is a short delay" diff --git a/tools/perf/pmu-events/arch/powerpc/power9/memory.json b/tools/perf/pmu-events/arch/powerpc/power9/memory.json index 9960d1c0dd44..2e2ebc700c74 100644 --- a/tools/perf/pmu-events/arch/powerpc/power9/memory.json +++ b/tools/perf/pmu-events/arch/powerpc/power9/memory.json @@ -45,11 +45,6 @@ "BriefDescription": "count of Loads completed" }, {, - "EventCode": "0x2D156", - "EventName": "PM_MRK_DTLB_MISS_4K", - "BriefDescription": "Marked Data TLB Miss page size 4k" - }, - {, "EventCode": "0x4C042", "EventName": "PM_DATA_FROM_L3", "BriefDescription": "The processor's data cache was reloaded from local core's L3 due to a demand load" diff --git a/tools/perf/pmu-events/arch/powerpc/power9/other.json b/tools/perf/pmu-events/arch/powerpc/power9/other.json index 5ce312973f1e..48cf4f920b3f 100644 --- a/tools/perf/pmu-events/arch/powerpc/power9/other.json +++ b/tools/perf/pmu-events/arch/powerpc/power9/other.json @@ -70,6 +70,11 @@ "BriefDescription": "Cycles thread running at priority level 0 or 1" }, {, + "EventCode": "0x4C054", + "EventName": "PM_DERAT_MISS_16G_1G", + "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 16G (hpt mode) or 1G (radix mode)" + }, + {, "EventCode": "0x2084", "EventName": "PM_FLUSH_HB_RESTORE_CYC", "BriefDescription": "Cycles in which no new instructions can be dispatched to the ICT after a flush. History buffer recovery" @@ -107,12 +112,12 @@ {, "EventCode": "0x360B2", "EventName": "PM_L3_GRP_GUESS_WRONG_LOW", - "BriefDescription": "Initial scope=group (GS or NNS) but data from outside group (far or rem). Prediction too Low" + "BriefDescription": "Prefetch scope predictor selected GS or NNS, but was wrong because scope was LNS" }, {, "EventCode": "0x168A6", "EventName": "PM_TM_CAM_OVERFLOW", - "BriefDescription": "L3 TM cam overflow during L2 co of SC" + "BriefDescription": "L3 TM CAM is full when a L2 castout of TM_SC line occurs. Line is pushed to memory" }, {, "EventCode": "0xE8B0", @@ -150,11 +155,6 @@ "BriefDescription": "All ISU rejects" }, {, - "EventCode": "0x460A6", - "EventName": "PM_RD_FORMING_SC", - "BriefDescription": "Read forming SC" - }, - {, "EventCode": "0x468A0", "EventName": "PM_L3_PF_OFF_CHIP_MEM", "BriefDescription": "L3 PF from Off chip memory" @@ -187,7 +187,7 @@ {, "EventCode": "0x368A6", "EventName": "PM_SNP_TM_HIT_T", - "BriefDescription": "Snp TM sthit T/Tn/Te" + "BriefDescription": "TM snoop that is a store hits line in L3 in T, Tn or Te state (shared modified)" }, {, "EventCode": "0x3001A", @@ -205,6 +205,11 @@ "BriefDescription": "Duration in cycles to reload with Modified (M) data from another core's ECO L3 on the same chip due to a marked load" }, {, + "EventCode": "0xF0B4", + "EventName": "PM_DC_PREF_CONS_ALLOC", + "BriefDescription": "Prefetch stream allocated in the conservative phase by either the hardware prefetch mechanism or software prefetch. The sum of this pair subtracted from the total number of allocs will give the total allocs in normal phase" + }, + {, "EventCode": "0xF894", "EventName": "PM_LSU3_L1_CAM_CANCEL", "BriefDescription": "ls3 l1 tm cam cancel" @@ -227,7 +232,12 @@ {, "EventCode": "0x468A6", "EventName": "PM_RD_CLEARING_SC", - "BriefDescription": "Read clearing SC" + "BriefDescription": "Core TM load hits line in L3 in TM_SC state and causes it to be invalidated" + }, + {, + "EventCode": "0xD0B0", + "EventName": "PM_HWSYNC", + "BriefDescription": "" }, {, "EventCode": "0x168B0", @@ -265,6 +275,11 @@ "BriefDescription": "Prefetch stream allocated by the hardware prefetch mechanism" }, {, + "EventCode": "0xF0BC", + "EventName": "PM_LS2_UNALIGNED_ST", + "BriefDescription": "Store instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the Store of that size. If the Store wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty" + }, + {, "EventCode": "0xD0AC", "EventName": "PM_SRQ_SYNC_CYC", "BriefDescription": "A sync is in the S2Q (edge detect to count)" @@ -275,6 +290,11 @@ "BriefDescription": "Marked instruction was reloaded from a location beyond the local chiplet" }, {, + "EventCode": "0x58A8", + "EventName": "PM_DECODE_HOLD_ICT_FULL", + "BriefDescription": "Counts the number of cycles in which the IFU was not able to decode and transmit one or more instructions because all itags were in use. This means the ICT is full for this thread" + }, + {, "EventCode": "0x26082", "EventName": "PM_L2_IC_INV", "BriefDescription": "I-cache Invalidates sent over the realod bus to the core" @@ -365,6 +385,16 @@ "BriefDescription": "Duration in cycles to reload either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to a marked load" }, {, + "EventCode": "0xF888", + "EventName": "PM_LSU1_STORE_REJECT", + "BriefDescription": "All internal store rejects cause the instruction to go back to the SRQ and go to sleep until woken up to try again after the condition has been met" + }, + {, + "EventCode": "0xC098", + "EventName": "PM_LS2_UNALIGNED_LD", + "BriefDescription": "Load instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the load of that size. If the load wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty" + }, + {, "EventCode": "0x20058", "EventName": "PM_DARQ1_10_12_ENTRIES", "BriefDescription": "Cycles in which 10 or more DARQ1 entries (out of 12) are in use" @@ -372,7 +402,7 @@ {, "EventCode": "0x360A6", "EventName": "PM_SNP_TM_HIT_M", - "BriefDescription": "Snp TM st hit M/Mu" + "BriefDescription": "TM snoop that is a store hits line in L3 in M or Mu state (exclusive modified)" }, {, "EventCode": "0x5898", @@ -395,9 +425,9 @@ "BriefDescription": "A data line was written to the L1 due to a hardware or software prefetch" }, {, - "EventCode": "0xF888", - "EventName": "PM_LSU1_STORE_REJECT", - "BriefDescription": "All internal store rejects cause the instruction to go back to the SRQ and go to sleep until woken up to try again after the condition has been met" + "EventCode": "0x2608E", + "EventName": "PM_TM_LD_CONF", + "BriefDescription": "TM Load (fav or non-fav) ran into conflict (failed)" }, {, "EventCode": "0x1D144", @@ -422,7 +452,7 @@ {, "EventCode": "0x26884", "EventName": "PM_DSIDE_MRU_TOUCH", - "BriefDescription": "D-side L2 MRU touch sent to L2" + "BriefDescription": "D-side L2 MRU touch commands sent to the L2" }, {, "EventCode": "0x30134", @@ -440,6 +470,16 @@ "BriefDescription": "XL-form branch was mispredicted due to the predicted target address missing from EAT. The EAT forces a mispredict in this case since there is no predicated target to validate. This is a rare case that may occur when the EAT is full and a branch is issued" }, {, + "EventCode": "0xC094", + "EventName": "PM_LS0_UNALIGNED_LD", + "BriefDescription": "Load instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the load of that size. If the load wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty" + }, + {, + "EventCode": "0xF8BC", + "EventName": "PM_LS3_UNALIGNED_ST", + "BriefDescription": "Store instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the Store of that size. If the Store wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty" + }, + {, "EventCode": "0x460AE", "EventName": "PM_L3_P2_CO_RTY", "BriefDescription": "L3 CO received retry port 2 (memory only), every retry counted" @@ -492,7 +532,7 @@ {, "EventCode": "0xC880", "EventName": "PM_LS1_LD_VECTOR_FIN", - "BriefDescription": "" + "BriefDescription": "LS1 finished load vector op" }, {, "EventCode": "0x2894", @@ -515,6 +555,11 @@ "BriefDescription": "Marked derat reload (miss) for any page size" }, {, + "EventCode": "0x160A0", + "EventName": "PM_L3_PF_MISS_L3", + "BriefDescription": "L3 PF missed in L3" + }, + {, "EventCode": "0x1C04A", "EventName": "PM_DATA_FROM_RL2L3_SHR", "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a demand load" @@ -565,11 +610,21 @@ "BriefDescription": "L2 guess local (LNS) and guess was not correct (ie data not on chip)" }, {, + "EventCode": "0xC888", + "EventName": "PM_LSU_DTLB_MISS_64K", + "BriefDescription": "Data TLB Miss page size 64K" + }, + {, "EventCode": "0xE0A4", "EventName": "PM_TMA_REQ_L2", "BriefDescription": "addrs only req to L2 only on the first one,Indication that Load footprint is not expanding" }, {, + "EventCode": "0xC088", + "EventName": "PM_LSU_DTLB_MISS_4K", + "BriefDescription": "Data TLB Miss page size 4K" + }, + {, "EventCode": "0x3C042", "EventName": "PM_DATA_FROM_L3_DISP_CONFLICT", "BriefDescription": "The processor's data cache was reloaded from local core's L3 with dispatch conflict due to a demand load" @@ -602,7 +657,7 @@ {, "EventCode": "0x26084", "EventName": "PM_L2_RCLD_DISP_FAIL_OTHER", - "BriefDescription": "All I-or-D side load dispatch attempts for this thread that failed due to reason other than address collision (excludes i_l2mru_tch_reqs)" + "BriefDescription": "All D-side-Ld or I-side-instruction-fetch dispatch attempts for this thread that failed due to reasons other than an address collision conflicts with an L2 machines (e.g. Read-Claim/Snoop machine not available)" }, {, "EventCode": "0x101E4", @@ -647,12 +702,12 @@ {, "EventCode": "0x46080", "EventName": "PM_L2_DISP_ALL_L2MISS", - "BriefDescription": "All successful Ld/St dispatches for this thread that were an L2 miss (excludes i_l2mru_tch_reqs)" + "BriefDescription": "All successful D-side-Ld/St or I-side-instruction-fetch dispatches for this thread that were an L2 miss" }, {, - "EventCode": "0x160A0", - "EventName": "PM_L3_PF_MISS_L3", - "BriefDescription": "L3 PF missed in L3" + "EventCode": "0xF8B8", + "EventName": "PM_LS1_UNALIGNED_ST", + "BriefDescription": "Store instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the Store of that size. If the Store wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty" }, {, "EventCode": "0x408C", @@ -667,7 +722,7 @@ {, "EventCode": "0x160B2", "EventName": "PM_L3_LOC_GUESS_CORRECT", - "BriefDescription": "initial scope=node/chip (LNS) and data from local node (local) (pred successful) - always PFs only" + "BriefDescription": "Prefetch scope predictor selected LNS and was correct" }, {, "EventCode": "0x48B4", @@ -767,7 +822,7 @@ {, "EventCode": "0x36082", "EventName": "PM_L2_LD_DISP", - "BriefDescription": "All successful I-or-D side load dispatches for this thread (excludes i_l2mru_tch_reqs)" + "BriefDescription": "All successful D-side-Ld or I-side-instruction-fetch dispatches for this thread" }, {, "EventCode": "0xF8B0", @@ -787,7 +842,7 @@ {, "EventCode": "0x16884", "EventName": "PM_L2_RCLD_DISP_FAIL_ADDR", - "BriefDescription": "All I-od-D side load dispatch attempts for this thread that failed due to address collision with RC/CO/SN/SQ machine (excludes i_l2mru_tch_reqs)" + "BriefDescription": "All D-side-Ld or I-side-instruction-fetch dispatch attempts for this thread that failed due to an address collision conflicts with an L2 machines already working on this line (e.g. ld-hit-stq or Read-claim/Castout/Snoop machines)" }, {, "EventCode": "0x460A0", @@ -830,6 +885,11 @@ "BriefDescription": "Instruction prefetch requests" }, {, + "EventCode": "0xC898", + "EventName": "PM_LS3_UNALIGNED_LD", + "BriefDescription": "Load instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the load of that size. If the load wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty" + }, + {, "EventCode": "0x488C", "EventName": "PM_IC_PREF_WRITE", "BriefDescription": "Instruction prefetch written into IL1" @@ -837,7 +897,7 @@ {, "EventCode": "0xF89C", "EventName": "PM_XLATE_MISS", - "BriefDescription": "The LSU requested a line from L2 for translation. It may be satisfied from any source beyond L2. Includes speculative instructions" + "BriefDescription": "The LSU requested a line from L2 for translation. It may be satisfied from any source beyond L2. Includes speculative instructions. Includes instruction, prefetch and demand" }, {, "EventCode": "0x14158", @@ -850,9 +910,14 @@ "BriefDescription": "Duration in cycles to reload with Shared (S) data from another core's L3 on the same chip due to a marked load" }, {, + "EventCode": "0xC88C", + "EventName": "PM_LSU_DTLB_MISS_16G_1G", + "BriefDescription": "Data TLB Miss page size 16G (HPT) or 1G (Radix)" + }, + {, "EventCode": "0x268A6", "EventName": "PM_TM_RST_SC", - "BriefDescription": "TM-snp rst RM SC" + "BriefDescription": "TM snoop hits line in L3 that is TM_SC state and causes it to be invalidated" }, {, "EventCode": "0x468A4", @@ -917,7 +982,7 @@ {, "EventCode": "0x46086", "EventName": "PM_L2_SN_M_RD_DONE", - "BriefDescription": "SNP dispatched for a read and was M (true M)" + "BriefDescription": "Snoop dispatched for a read and was M (true M)" }, {, "EventCode": "0x40154", @@ -980,14 +1045,9 @@ "BriefDescription": "Link stack predicts right address" }, {, - "EventCode": "0x4C05A", - "EventName": "PM_DTLB_MISS_1G", - "BriefDescription": "Data TLB reload (after a miss) page size 1G. Implies radix translation was used" - }, - {, "EventCode": "0x36886", "EventName": "PM_L2_SN_SX_I_DONE", - "BriefDescription": "SNP dispatched and went from Sx to Ix" + "BriefDescription": "Snoop dispatched and went from Sx to Ix" }, {, "EventCode": "0x4E04A", @@ -1000,11 +1060,6 @@ "BriefDescription": "Duration in cycles to reload from another chip's L4 on a different Node or Group (Distant) due to a marked load" }, {, - "EventCode": "0x2608E", - "EventName": "PM_TM_LD_CONF", - "BriefDescription": "TM Load (fav or non-fav) ran into conflict (failed)" - }, - {, "EventCode": "0x4080", "EventName": "PM_INST_FROM_L1", "BriefDescription": "Instruction fetches from L1. L1 instruction hit" @@ -1037,7 +1092,7 @@ {, "EventCode": "0x260A6", "EventName": "PM_NON_TM_RST_SC", - "BriefDescription": "Non-TM snp rst TM SC" + "BriefDescription": "Non-TM snoop hits line in L3 that is TM_SC state and causes it to be invalidated" }, {, "EventCode": "0x3608A", @@ -1065,11 +1120,6 @@ "BriefDescription": "Branch mispredict flushes. Includes target and address misprecition" }, {, - "EventCode": "0x508C", - "EventName": "PM_SHL_CREATED", - "BriefDescription": "Store-Hit-Load Table Entry Created" - }, - {, "EventCode": "0x1504C", "EventName": "PM_IPTEG_FROM_LL4", "BriefDescription": "A Page Table Entry was loaded into the TLB from the local chip's L4 cache due to a instruction side request" @@ -1107,7 +1157,7 @@ {, "EventCode": "0x2608A", "EventName": "PM_ISIDE_DISP_FAIL_ADDR", - "BriefDescription": "All I-side dispatch attempts for this thread that failed due to a addr collision with another machine (excludes i_l2mru_tch_reqs)" + "BriefDescription": "All I-side-instruction-fetch dispatch attempts for this thread that failed due to an address collision conflict with an L2 machine already working on this line (e.g. ld-hit-stq or RC/CO/SN machines)" }, {, "EventCode": "0x50B4", @@ -1180,9 +1230,9 @@ "BriefDescription": "Number of stcx instructions finished. This includes instructions in the speculative path of a branch that may be flushed" }, {, - "EventCode": "0xE0B8", - "EventName": "PM_LS2_TM_DISALLOW", - "BriefDescription": "A TM-ineligible instruction tries to execute inside a transaction and the LSU disallows it" + "EventCode": "0xD8AC", + "EventName": "PM_LWSYNC", + "BriefDescription": "" }, {, "EventCode": "0x2094", @@ -1210,6 +1260,11 @@ "BriefDescription": "Ict empty for this thread due to dispatch holds because the History Buffer was full. Could be GPR/VSR/VMR/FPR/CR/XVF; CR; XVF (XER/VSCR/FPSCR)" }, {, + "EventCode": "0xC894", + "EventName": "PM_LS1_UNALIGNED_LD", + "BriefDescription": "Load instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the load of that size. If the load wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty" + }, + {, "EventCode": "0x360A2", "EventName": "PM_L3_L2_CO_HIT", "BriefDescription": "L2 CO hits" @@ -1292,7 +1347,7 @@ {, "EventCode": "0xC084", "EventName": "PM_LS2_LD_VECTOR_FIN", - "BriefDescription": "" + "BriefDescription": "LS2 finished load vector op" }, {, "EventCode": "0x1608E", @@ -1345,6 +1400,11 @@ "BriefDescription": "Continuous 16 cycle (2to1) window where this signals rotates thru sampling each SN machine busy. PMU uses this wave to then do 16 cyc count to sample total number of machs running" }, {, + "EventCode": "0x36084", + "EventName": "PM_L2_RCST_DISP", + "BriefDescription": "All D-side store dispatch attempts for this thread" + }, + {, "EventCode": "0x46084", "EventName": "PM_L2_RCST_DISP_FAIL_OTHER", "BriefDescription": "All D-side store dispatch attempts for this thread that failed due to reason other than address collision" @@ -1355,11 +1415,6 @@ "BriefDescription": "A demand load referenced a line in an active strided prefetch stream. The stream could have been allocated through the hardware prefetch mechanism or through software." }, {, - "EventCode": "0x36084", - "EventName": "PM_L2_RCST_DISP", - "BriefDescription": "All D-side store dispatch attempts for this thread" - }, - {, "EventCode": "0x45054", "EventName": "PM_FMA_CMPL", "BriefDescription": "two flops operation completed (fmadd, fnmadd, fmsub, fnmsub) Scalar instructions only. " @@ -1372,7 +1427,7 @@ {, "EventCode": "0x36080", "EventName": "PM_L2_INST", - "BriefDescription": "All successful I-side dispatches for this thread (excludes i_l2mru_tch reqs)" + "BriefDescription": "All successful I-side-instruction-fetch (e.g. i-demand, i-prefetch) dispatches for this thread" }, {, "EventCode": "0x3504C", @@ -1387,7 +1442,7 @@ {, "EventCode": "0x1688A", "EventName": "PM_ISIDE_DISP", - "BriefDescription": "All I-side dispatch attempts for this thread (excludes i_l2mru_tch_reqs)" + "BriefDescription": "All I-side-instruction-fetch dispatch attempts for this thread" }, {, "EventCode": "0x468AA", @@ -1420,6 +1475,11 @@ "BriefDescription": "Load tm hit in L1" }, {, + "EventCode": "0xE0B8", + "EventName": "PM_LS2_TM_DISALLOW", + "BriefDescription": "A TM-ineligible instruction tries to execute inside a transaction and the LSU disallows it" + }, + {, "EventCode": "0x44044", "EventName": "PM_INST_FROM_L31_ECO_MOD", "BriefDescription": "The processor's Instruction cache was reloaded with Modified (M) data from another core's ECO L3 on the same chip due to an instruction fetch (not prefetch)" @@ -1467,7 +1527,7 @@ {, "EventCode": "0x36086", "EventName": "PM_L2_RC_ST_DONE", - "BriefDescription": "RC did store to line that was Tx or Sx" + "BriefDescription": "Read-claim machine did store to line that was in Tx or Sx (Tagged or Shared state)" }, {, "EventCode": "0xE8AC", @@ -1500,6 +1560,11 @@ "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 without conflict due to a instruction side request" }, {, + "EventCode": "0x460A6", + "EventName": "PM_RD_FORMING_SC", + "BriefDescription": "Doesn't occur" + }, + {, "EventCode": "0x35042", "EventName": "PM_IPTEG_FROM_L3_DISP_CONFLICT", "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 with dispatch conflict due to a instruction side request" @@ -1527,7 +1592,7 @@ {, "EventCode": "0x36882", "EventName": "PM_L2_LD_HIT", - "BriefDescription": "All successful I-or-D side load dispatches for this thread that were L2 hits (excludes i_l2mru_tch_reqs)" + "BriefDescription": "All successful D-side-Ld or I-side-instruction-fetch dispatches for this thread that were L2 hits" }, {, "EventCode": "0x168AC", @@ -1555,11 +1620,6 @@ "BriefDescription": "ProbeNops dispatched" }, {, - "EventCode": "0x58A8", - "EventName": "PM_DECODE_HOLD_ICT_FULL", - "BriefDescription": "Counts the number of cycles in which the IFU was not able to decode and transmit one or more instructions because all itags were in use. This means the ICT is full for this thread" - }, - {, "EventCode": "0x10052", "EventName": "PM_GRP_PUMP_MPRED_RTY", "BriefDescription": "Final Pump Scope (Group) ended up larger than Initial Pump Scope (Chip) for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)" @@ -1572,7 +1632,7 @@ {, "EventCode": "0x2688A", "EventName": "PM_ISIDE_DISP_FAIL_OTHER", - "BriefDescription": "All I-side dispatch attempts for this thread that failed due to a reason other than addrs collision (excludes i_l2mru_tch_reqs)" + "BriefDescription": "All I-side-instruction-fetch dispatch attempts for this thread that failed due to reasons other than an address collision conflict with an L2 machine (e.g. no available RC/CO machines)" }, {, "EventCode": "0x2001A", @@ -1652,12 +1712,12 @@ {, "EventCode": "0x46880", "EventName": "PM_ISIDE_MRU_TOUCH", - "BriefDescription": "I-side L2 MRU touch sent to L2 for this thread" + "BriefDescription": "I-side L2 MRU touch sent to L2 for this thread I-side L2 MRU touch commands sent to the L2 for this thread" }, {, - "EventCode": "0x1C05C", - "EventName": "PM_DTLB_MISS_2M", - "BriefDescription": "Data TLB reload (after a miss) page size 2M. Implies radix translation was used" + "EventCode": "0x508C", + "EventName": "PM_SHL_CREATED", + "BriefDescription": "Store-Hit-Load Table Entry Created" }, {, "EventCode": "0x50B8", @@ -1672,7 +1732,7 @@ {, "EventCode": "0x268B2", "EventName": "PM_L3_LOC_GUESS_WRONG", - "BriefDescription": "Initial scope=node (LNS) but data from out side local node (near or far or rem). Prediction too Low" + "BriefDescription": "Prefetch scope predictor selected LNS, but was wrong" }, {, "EventCode": "0x36088", @@ -1685,6 +1745,11 @@ "BriefDescription": "L3 PF received retry port 2, every retry counted" }, {, + "EventCode": "0xD8B0", + "EventName": "PM_PTESYNC", + "BriefDescription": "" + }, + {, "EventCode": "0x26086", "EventName": "PM_CO_TM_SC_FOOTPRINT", "BriefDescription": "L2 did a cleanifdirty CO to the L3 (ie created an SC line in the L3) OR L2 TM_store hit dirty HPC line and L3 indicated SC line formed in L3 on RDR bus" @@ -1740,6 +1805,11 @@ "BriefDescription": "All successful D-Side Store dispatches that were an L2 miss for this thread" }, {, + "EventCode": "0xF8B4", + "EventName": "PM_DC_PREF_XCONS_ALLOC", + "BriefDescription": "Prefetch stream allocated in the Ultra conservative phase by either the hardware prefetch mechanism or software prefetch" + }, + {, "EventCode": "0x35048", "EventName": "PM_IPTEG_FROM_DL2L3_SHR", "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a instruction side request" @@ -1782,7 +1852,7 @@ {, "EventCode": "0x460B2", "EventName": "PM_L3_SYS_GUESS_WRONG", - "BriefDescription": "Initial scope=system (VGS or RNS) but data from local or near. Prediction too high" + "BriefDescription": "Prefetch scope predictor selected VGS or RNS, but was wrong" }, {, "EventCode": "0x58B8", @@ -1800,11 +1870,6 @@ "BriefDescription": "Completion time tabortnoncd, tabortcd, treclaim" }, {, - "EventCode": "0x4C054", - "EventName": "PM_DERAT_MISS_16G", - "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 16G" - }, - {, "EventCode": "0x268A0", "EventName": "PM_L3_CO_L31", "BriefDescription": "L3 CO to L3.1 OR of port 0 and 1 (lossy = may undercount if two cresps come in the same cyc)" @@ -1862,7 +1927,7 @@ {, "EventCode": "0x368B2", "EventName": "PM_L3_GRP_GUESS_WRONG_HIGH", - "BriefDescription": "Initial scope=group (GS or NNS) but data from local node. Prediction too high" + "BriefDescription": "Prefetch scope predictor selected GS or NNS, but was wrong because scope was VGS or RNS" }, {, "EventCode": "0xE8BC", @@ -1897,7 +1962,7 @@ {, "EventCode": "0x260B2", "EventName": "PM_L3_SYS_GUESS_CORRECT", - "BriefDescription": "Initial scope=system (VGS or RNS) and data from outside group (far or rem)(pred successful)" + "BriefDescription": "Prefetch scope predictor selected VGS or RNS and was correct" }, {, "EventCode": "0x1D146", @@ -1915,6 +1980,11 @@ "BriefDescription": "RC requests that were on group (aka nodel) pump attempts" }, {, + "EventCode": "0xC08C", + "EventName": "PM_LSU_DTLB_MISS_16M_2M", + "BriefDescription": "Data TLB Miss page size 16M (HPT) or 2M (Radix)" + }, + {, "EventCode": "0x16080", "EventName": "PM_L2_LD", "BriefDescription": "All successful D-side Load dispatches for this thread (L2 miss + L2 hits)" @@ -1927,7 +1997,7 @@ {, "EventCode": "0xC080", "EventName": "PM_LS0_LD_VECTOR_FIN", - "BriefDescription": "" + "BriefDescription": "LS0 finished load vector op" }, {, "EventCode": "0x368B0", @@ -2000,6 +2070,11 @@ "BriefDescription": "Conditional Branch Completed in which the HW correctly predicted the direction as taken. Counted at completion time" }, {, + "EventCode": "0xF0B8", + "EventName": "PM_LS0_UNALIGNED_ST", + "BriefDescription": "Store instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the Store of that size. If the Store wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty" + }, + {, "EventCode": "0x20132", "EventName": "PM_MRK_DFU_FIN", "BriefDescription": "Decimal Unit marked Instruction Finish" @@ -2007,7 +2082,7 @@ {, "EventCode": "0x160A6", "EventName": "PM_TM_SC_CO", - "BriefDescription": "L3 castout TM SC line" + "BriefDescription": "L3 castout of line that was StoreCopy (original value of speculatively written line) in a Transaction" }, {, "EventCode": "0xC8B0", @@ -2017,7 +2092,7 @@ {, "EventCode": "0x16084", "EventName": "PM_L2_RCLD_DISP", - "BriefDescription": "All I-or-D side load dispatch attempts for this thread (excludes i_l2mru_tch_reqs)" + "BriefDescription": "All D-side-Ld or I-side-instruction-fetch dispatch attempts for this thread" }, {, "EventCode": "0x3F150", @@ -2122,12 +2197,12 @@ {, "EventCode": "0x46082", "EventName": "PM_L2_ST_DISP", - "BriefDescription": "All successful D-side store dispatches for this thread (L2 miss + L2 hits)" + "BriefDescription": "All successful D-side store dispatches for this thread" }, {, "EventCode": "0x36880", "EventName": "PM_L2_INST_MISS", - "BriefDescription": "All successful I-side dispatches that were an L2 miss for this thread (excludes i_l2mru_tch reqs)" + "BriefDescription": "All successful I-side-instruction-fetch (e.g. i-demand, i-prefetch) dispatches for this thread that were an L2 miss" }, {, "EventCode": "0xE084", @@ -2217,7 +2292,7 @@ {, "EventCode": "0xC884", "EventName": "PM_LS3_LD_VECTOR_FIN", - "BriefDescription": "" + "BriefDescription": "LS3 finished load vector op" }, {, "EventCode": "0x360A8", @@ -2242,7 +2317,7 @@ {, "EventCode": "0x168B2", "EventName": "PM_L3_GRP_GUESS_CORRECT", - "BriefDescription": "Initial scope=group (GS or NNS) and data from same group (near) (pred successful)" + "BriefDescription": "Prefetch scope predictor selected GS or NNS and was correct" }, {, "EventCode": "0x48A4", diff --git a/tools/perf/pmu-events/arch/powerpc/power9/pipeline.json b/tools/perf/pmu-events/arch/powerpc/power9/pipeline.json index 5af1abbe82c4..b4772f54a271 100644 --- a/tools/perf/pmu-events/arch/powerpc/power9/pipeline.json +++ b/tools/perf/pmu-events/arch/powerpc/power9/pipeline.json @@ -65,11 +65,6 @@ "BriefDescription": "Dispatch Held" }, {, - "EventCode": "0x3D154", - "EventName": "PM_MRK_DERAT_MISS_16M", - "BriefDescription": "Marked Data ERAT Miss (Data TLB Access) page size 16M" - }, - {, "EventCode": "0x200F8", "EventName": "PM_EXT_INT", "BriefDescription": "external interrupt" @@ -120,6 +115,11 @@ "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 without dispatch conflicts hit on Mepf state. due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included" }, {, + "EventCode": "0x4C15C", + "EventName": "PM_MRK_DERAT_MISS_16G_1G", + "BriefDescription": "Marked Data ERAT Miss (Data TLB Access) page size 16G (hpt mode) and 1G (radix mode)" + }, + {, "EventCode": "0x10024", "EventName": "PM_PMC5_OVERFLOW", "BriefDescription": "Overflow from counter 5" @@ -155,11 +155,6 @@ "BriefDescription": "Ict empty for this thread due to Icache Miss" }, {, - "EventCode": "0x3D152", - "EventName": "PM_MRK_DERAT_MISS_1G", - "BriefDescription": "Marked Data ERAT Miss (Data TLB Access) page size 1G. Implies radix translation" - }, - {, "EventCode": "0x4F14A", "EventName": "PM_MRK_DPTEG_FROM_OFF_CHIP_CACHE", "BriefDescription": "A Page Table Entry was loaded into the TLB either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included" @@ -185,11 +180,6 @@ "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 without conflict due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included" }, {, - "EventCode": "0x2C05A", - "EventName": "PM_DERAT_MISS_1G", - "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 1G. Implies radix translation" - }, - {, "EventCode": "0x1F058", "EventName": "PM_RADIX_PWC_L2_PTE_FROM_L2", "BriefDescription": "A Page Table Entry was reloaded to a level 2 page walk cache from the core's L2 data cache. This implies that level 3 and level 4 PWC accesses were not necessary for this translation" @@ -240,11 +230,6 @@ "BriefDescription": "Data PTEG reload" }, {, - "EventCode": "0x2D152", - "EventName": "PM_MRK_DERAT_MISS_2M", - "BriefDescription": "Marked Data ERAT Miss (Data TLB Access) page size 2M. Implies radix translation" - }, - {, "EventCode": "0x2C046", "EventName": "PM_DATA_FROM_RL2L3_MOD", "BriefDescription": "The processor's data cache was reloaded with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a demand load" @@ -290,6 +275,11 @@ "BriefDescription": "Finish stall because the NTF instruction was issued to the Decimal Floating Point execution pipe and waiting to finish. Includes decimal floating point instructions + 128 bit binary floating point instructions. Not qualified by multicycle" }, {, + "EventCode": "0x3C054", + "EventName": "PM_DERAT_MISS_16M_2M", + "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 16M (HPT mode) or 2M (Radix mode)" + }, + {, "EventCode": "0x4C04C", "EventName": "PM_DATA_FROM_DMEM", "BriefDescription": "The processor's data cache was reloaded from another chip's memory on the same Node or Group (Distant) due to a demand load" @@ -360,11 +350,6 @@ "BriefDescription": "The processor's Instruction cache was reloaded from a memory location including L4 from local remote or distant due to an instruction fetch (not prefetch)" }, {, - "EventCode": "0x1C05A", - "EventName": "PM_DERAT_MISS_2M", - "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 2M. Implies radix translation" - }, - {, "EventCode": "0x30024", "EventName": "PM_PMC6_OVERFLOW", "BriefDescription": "Overflow from counter 6" @@ -375,6 +360,11 @@ "BriefDescription": "Branch Instruction Finished" }, {, + "EventCode": "0x3D154", + "EventName": "PM_MRK_DERAT_MISS_16M_2M", + "BriefDescription": "Marked Data ERAT Miss (Data TLB Access) page size 16M (hpt mode) or 2M (radix mode)" + }, + {, "EventCode": "0x30020", "EventName": "PM_PMC2_REWIND", "BriefDescription": "PMC2 Rewind Event (did not match condition)" @@ -410,11 +400,6 @@ "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's L3 on the same chip due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included" }, {, - "EventCode": "0x4C15C", - "EventName": "PM_MRK_DERAT_MISS_16G", - "BriefDescription": "Marked Data ERAT Miss (Data TLB Access) page size 16G" - }, - {, "EventCode": "0x14052", "EventName": "PM_INST_GRP_PUMP_MPRED_RTY", "BriefDescription": "Final Pump Scope (Group) ended up larger than Initial Pump Scope (Chip) for an instruction fetch" @@ -445,11 +430,6 @@ "BriefDescription": "Icache miss demand cycles" }, {, - "EventCode": "0x3C054", - "EventName": "PM_DERAT_MISS_16M", - "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 16M" - }, - {, "EventCode": "0x2D14E", "EventName": "PM_MRK_DATA_FROM_L21_SHR", "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another core's L2 on the same chip due to a marked load" diff --git a/tools/perf/pmu-events/arch/powerpc/power9/pmc.json b/tools/perf/pmu-events/arch/powerpc/power9/pmc.json index d0b89f930567..8b3b0f3be664 100644 --- a/tools/perf/pmu-events/arch/powerpc/power9/pmc.json +++ b/tools/perf/pmu-events/arch/powerpc/power9/pmc.json @@ -10,11 +10,6 @@ "BriefDescription": "Local memory above threshold for LSU medium" }, {, - "EventCode": "0x2C056", - "EventName": "PM_DTLB_MISS_4K", - "BriefDescription": "Data TLB Miss page size 4k" - }, - {, "EventCode": "0x40118", "EventName": "PM_MRK_DCACHE_RELOAD_INTV", "BriefDescription": "Combined Intervention event" diff --git a/tools/perf/pmu-events/arch/powerpc/power9/translation.json b/tools/perf/pmu-events/arch/powerpc/power9/translation.json index bc8e03d7a6b0..b27642676244 100644 --- a/tools/perf/pmu-events/arch/powerpc/power9/translation.json +++ b/tools/perf/pmu-events/arch/powerpc/power9/translation.json @@ -30,11 +30,6 @@ "BriefDescription": "Store finish count. Includes speculative activity" }, {, - "EventCode": "0x44042", - "EventName": "PM_INST_FROM_L3", - "BriefDescription": "The processor's Instruction cache was reloaded from local core's L3 due to an instruction fetch (not prefetch)" - }, - {, "EventCode": "0x1504A", "EventName": "PM_IPTEG_FROM_RL2L3_SHR", "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a instruction side request" @@ -125,6 +120,11 @@ "BriefDescription": "PMC1 Rewind Value saved" }, {, + "EventCode": "0x44042", + "EventName": "PM_INST_FROM_L3", + "BriefDescription": "The processor's Instruction cache was reloaded from local core's L3 due to an instruction fetch (not prefetch)" + }, + {, "EventCode": "0x200FE", "EventName": "PM_DATA_FROM_L2MISS", "BriefDescription": "Demand LD - L2 Miss (not L2 hit)" diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index b578aa26e375..db3a594ee1e4 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -39,11 +39,13 @@ #include <unistd.h> #include <stdarg.h> #include <libgen.h> +#include <limits.h> #include <dirent.h> #include <sys/time.h> /* getrlimit */ #include <sys/resource.h> /* getrlimit */ #include <ftw.h> #include <sys/stat.h> +#include <linux/list.h> #include "jsmn.h" #include "json.h" #include "jevents.h" @@ -249,31 +251,25 @@ static const char *field_to_perf(struct map *table, char *map, jsmntok_t *val) jsmntok_t *loc = (t); \ if (!(t)->start && (t) > tokens) \ loc = (t) - 1; \ - pr_err("%s:%d: " m ", got %s\n", fn, \ - json_line(map, loc), \ - json_name(t)); \ + pr_err("%s:%d: " m ", got %s\n", fn, \ + json_line(map, loc), \ + json_name(t)); \ + err = -EIO; \ goto out_free; \ } } while (0) -#define TOPIC_DEPTH 256 -static char *topic_array[TOPIC_DEPTH]; -static int topic_level; +static char *topic; static char *get_topic(void) { - char *tp_old, *tp = NULL; + char *tp; int i; - for (i = 0; i < topic_level + 1; i++) { - int n; - - tp_old = tp; - n = asprintf(&tp, "%s%s", tp ?: "", topic_array[i]); - if (n < 0) { - pr_info("%s: asprintf() error %s\n", prog); - return NULL; - } - free(tp_old); + /* tp is free'd in process_one_file() */ + i = asprintf(&tp, "%s", topic); + if (i < 0) { + pr_info("%s: asprintf() error %s\n", prog); + return NULL; } for (i = 0; i < (int) strlen(tp); i++) { @@ -290,25 +286,15 @@ static char *get_topic(void) return tp; } -static int add_topic(int level, char *bname) +static int add_topic(char *bname) { - char *topic; - - level -= 2; - - if (level >= TOPIC_DEPTH) - return -EINVAL; - + free(topic); topic = strdup(bname); if (!topic) { pr_info("%s: strdup() error %s for file %s\n", prog, strerror(errno), bname); return -ENOMEM; } - - free(topic_array[topic_level]); - topic_array[topic_level] = topic; - topic_level = level; return 0; } @@ -366,6 +352,81 @@ static int print_events_table_entry(void *data, char *name, char *event, return 0; } +struct event_struct { + struct list_head list; + char *name; + char *event; + char *desc; + char *long_desc; + char *pmu; + char *unit; + char *perpkg; + char *metric_expr; + char *metric_name; + char *metric_group; +}; + +#define ADD_EVENT_FIELD(field) do { if (field) { \ + es->field = strdup(field); \ + if (!es->field) \ + goto out_free; \ +} } while (0) + +#define FREE_EVENT_FIELD(field) free(es->field) + +#define TRY_FIXUP_FIELD(field) do { if (es->field && !*field) {\ + *field = strdup(es->field); \ + if (!*field) \ + return -ENOMEM; \ +} } while (0) + +#define FOR_ALL_EVENT_STRUCT_FIELDS(op) do { \ + op(name); \ + op(event); \ + op(desc); \ + op(long_desc); \ + op(pmu); \ + op(unit); \ + op(perpkg); \ + op(metric_expr); \ + op(metric_name); \ + op(metric_group); \ +} while (0) + +static LIST_HEAD(arch_std_events); + +static void free_arch_std_events(void) +{ + struct event_struct *es, *next; + + list_for_each_entry_safe(es, next, &arch_std_events, list) { + FOR_ALL_EVENT_STRUCT_FIELDS(FREE_EVENT_FIELD); + list_del(&es->list); + free(es); + } +} + +static int save_arch_std_events(void *data, char *name, char *event, + char *desc, char *long_desc, char *pmu, + char *unit, char *perpkg, char *metric_expr, + char *metric_name, char *metric_group) +{ + struct event_struct *es; + struct stat *sb = data; + + es = malloc(sizeof(*es)); + if (!es) + return -ENOMEM; + memset(es, 0, sizeof(*es)); + FOR_ALL_EVENT_STRUCT_FIELDS(ADD_EVENT_FIELD); + list_add_tail(&es->list, &arch_std_events); + return 0; +out_free: + FOR_ALL_EVENT_STRUCT_FIELDS(FREE_EVENT_FIELD); + free(es); + return -ENOMEM; +} + static void print_events_table_suffix(FILE *outfp) { fprintf(outfp, "{\n"); @@ -407,6 +468,32 @@ static char *real_event(const char *name, char *event) return event; } +static int +try_fixup(const char *fn, char *arch_std, char **event, char **desc, + char **name, char **long_desc, char **pmu, char **filter, + char **perpkg, char **unit, char **metric_expr, char **metric_name, + char **metric_group, unsigned long long eventcode) +{ + /* try to find matching event from arch standard values */ + struct event_struct *es; + + list_for_each_entry(es, &arch_std_events, list) { + if (!strcmp(arch_std, es->name)) { + if (!eventcode && es->event) { + /* allow EventCode to be overridden */ + free(*event); + *event = NULL; + } + FOR_ALL_EVENT_STRUCT_FIELDS(TRY_FIXUP_FIELD); + return 0; + } + } + + pr_err("%s: could not find matching %s for %s\n", + prog, arch_std, fn); + return -1; +} + /* Call func with each event in the json file */ int json_events(const char *fn, int (*func)(void *data, char *name, char *event, char *desc, @@ -416,7 +503,7 @@ int json_events(const char *fn, char *metric_name, char *metric_group), void *data) { - int err = -EIO; + int err; size_t size; jsmntok_t *tokens, *tok; int i, j, len; @@ -442,6 +529,7 @@ int json_events(const char *fn, char *metric_expr = NULL; char *metric_name = NULL; char *metric_group = NULL; + char *arch_std = NULL; unsigned long long eventcode = 0; struct msrmap *msr = NULL; jsmntok_t *msrval = NULL; @@ -527,6 +615,10 @@ int json_events(const char *fn, addfield(map, &metric_expr, "", "", val); for (s = metric_expr; *s; s++) *s = tolower(*s); + } else if (json_streq(map, field, "ArchStdEvent")) { + addfield(map, &arch_std, "", "", val); + for (s = arch_std; *s; s++) + *s = tolower(*s); } /* ignore unknown fields */ } @@ -551,8 +643,21 @@ int json_events(const char *fn, if (name) fixname(name); + if (arch_std) { + /* + * An arch standard event is referenced, so try to + * fixup any unassigned values. + */ + err = try_fixup(fn, arch_std, &event, &desc, &name, + &long_desc, &pmu, &filter, &perpkg, + &unit, &metric_expr, &metric_name, + &metric_group, eventcode); + if (err) + goto free_strings; + } err = func(data, name, real_event(name, event), desc, long_desc, pmu, unit, perpkg, metric_expr, metric_name, metric_group); +free_strings: free(event); free(desc); free(name); @@ -565,6 +670,8 @@ int json_events(const char *fn, free(metric_expr); free(metric_name); free(metric_group); + free(arch_std); + if (err) break; tok += j; @@ -588,7 +695,7 @@ static char *file_name_to_table_name(char *fname) * Derive rest of table name from basename of the JSON file, * replacing hyphens and stripping out .json suffix. */ - n = asprintf(&tblname, "pme_%s", basename(fname)); + n = asprintf(&tblname, "pme_%s", fname); if (n < 0) { pr_info("%s: asprintf() error %s for file %s\n", prog, strerror(errno), fname); @@ -598,7 +705,7 @@ static char *file_name_to_table_name(char *fname) for (i = 0; i < strlen(tblname); i++) { c = tblname[i]; - if (c == '-') + if (c == '-' || c == '/') tblname[i] = '_'; else if (c == '.') { tblname[i] = '\0'; @@ -755,25 +862,106 @@ static int get_maxfds(void) static FILE *eventsfp; static char *mapfile; +static int is_leaf_dir(const char *fpath) +{ + DIR *d; + struct dirent *dir; + int res = 1; + + d = opendir(fpath); + if (!d) + return 0; + + while ((dir = readdir(d)) != NULL) { + if (!strcmp(dir->d_name, ".") || !strcmp(dir->d_name, "..")) + continue; + + if (dir->d_type == DT_DIR) { + res = 0; + break; + } else if (dir->d_type == DT_UNKNOWN) { + char path[PATH_MAX]; + struct stat st; + + sprintf(path, "%s/%s", fpath, dir->d_name); + if (stat(path, &st)) + break; + + if (S_ISDIR(st.st_mode)) { + res = 0; + break; + } + } + } + + closedir(d); + + return res; +} + +static int is_json_file(const char *name) +{ + const char *suffix; + + if (strlen(name) < 5) + return 0; + + suffix = name + strlen(name) - 5; + + if (strncmp(suffix, ".json", 5) == 0) + return 1; + return 0; +} + +static int preprocess_arch_std_files(const char *fpath, const struct stat *sb, + int typeflag, struct FTW *ftwbuf) +{ + int level = ftwbuf->level; + int is_file = typeflag == FTW_F; + + if (level == 1 && is_file && is_json_file(fpath)) + return json_events(fpath, save_arch_std_events, (void *)sb); + + return 0; +} + static int process_one_file(const char *fpath, const struct stat *sb, int typeflag, struct FTW *ftwbuf) { - char *tblname, *bname = (char *) fpath + ftwbuf->base; + char *tblname, *bname; int is_dir = typeflag == FTW_D; int is_file = typeflag == FTW_F; int level = ftwbuf->level; int err = 0; + if (level == 2 && is_dir) { + /* + * For level 2 directory, bname will include parent name, + * like vendor/platform. So search back from platform dir + * to find this. + */ + bname = (char *) fpath + ftwbuf->base - 2; + for (;;) { + if (*bname == '/') + break; + bname--; + } + bname++; + } else + bname = (char *) fpath + ftwbuf->base; + pr_debug("%s %d %7jd %-20s %s\n", is_file ? "f" : is_dir ? "d" : "x", level, sb->st_size, bname, fpath); - /* base dir */ - if (level == 0) + /* base dir or too deep */ + if (level == 0 || level > 3) return 0; + /* model directory, reset topic */ - if (level == 1 && is_dir) { + if ((level == 1 && is_dir && is_leaf_dir(fpath)) || + (level == 2 && is_dir)) { if (close_table) print_events_table_suffix(eventsfp); @@ -798,16 +986,10 @@ static int process_one_file(const char *fpath, const struct stat *sb, * after processing all JSON files (so we can write out the * mapping table after all PMU events tables). * - * TODO: Allow for multiple mapfiles? Punt for now. */ if (level == 1 && is_file) { - if (!strncmp(bname, "mapfile.csv", 11)) { - if (mapfile) { - pr_info("%s: Many mapfiles? Using %s, ignoring %s\n", - prog, mapfile, fpath); - } else { - mapfile = strdup(fpath); - } + if (!strcmp(bname, "mapfile.csv")) { + mapfile = strdup(fpath); return 0; } @@ -820,16 +1002,14 @@ static int process_one_file(const char *fpath, const struct stat *sb, * ignore it. It could be a readme.txt for instance. */ if (is_file) { - char *suffix = bname + strlen(bname) - 5; - - if (strncmp(suffix, ".json", 5)) { + if (!is_json_file(bname)) { pr_info("%s: Ignoring file without .json suffix %s\n", prog, fpath); return 0; } } - if (level > 1 && add_topic(level, bname)) + if (level > 1 && add_topic(bname)) return -ENOMEM; /* @@ -928,12 +1108,26 @@ int main(int argc, char *argv[]) maxfds = get_maxfds(); mapfile = NULL; + rc = nftw(ldirname, preprocess_arch_std_files, maxfds, 0); + if (rc && verbose) { + pr_info("%s: Error preprocessing arch standard files %s\n", + prog, ldirname); + goto empty_map; + } else if (rc < 0) { + /* Make build fail */ + free_arch_std_events(); + return 1; + } else if (rc) { + goto empty_map; + } + rc = nftw(ldirname, process_one_file, maxfds, 0); if (rc && verbose) { pr_info("%s: Error walking file tree %s\n", prog, ldirname); goto empty_map; } else if (rc < 0) { /* Make build fail */ + free_arch_std_events(); return 1; } else if (rc) { goto empty_map; @@ -958,5 +1152,6 @@ int main(int argc, char *argv[]) empty_map: fclose(eventsfp); create_empty_mapping(output_file); + free_arch_std_events(); return 0; } diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build index 62ca0174d5e1..6c108fa79ae3 100644 --- a/tools/perf/tests/Build +++ b/tools/perf/tests/Build @@ -48,6 +48,7 @@ perf-y += bitmap.o perf-y += perf-hooks.o perf-y += clang.o perf-y += unit_number__scnprintf.o +perf-y += mem2node.o $(OUTPUT)tests/llvm-src-base.c: tests/bpf-script-example.c tests/Build $(call rule_mkdir) diff --git a/tools/perf/tests/attr.c b/tools/perf/tests/attr.c index 97f64ad7fa08..05dfe11c2f9e 100644 --- a/tools/perf/tests/attr.c +++ b/tools/perf/tests/attr.c @@ -170,8 +170,8 @@ static int run_dir(const char *d, const char *perf) if (verbose > 0) vcnt++; - snprintf(cmd, 3*PATH_MAX, PYTHON " %s/attr.py -d %s/attr/ -p %s %.*s", - d, d, perf, vcnt, v); + scnprintf(cmd, 3*PATH_MAX, PYTHON " %s/attr.py -d %s/attr/ -p %s %.*s", + d, d, perf, vcnt, v); return system(cmd) ? TEST_FAIL : TEST_OK; } diff --git a/tools/perf/tests/bp_account.c b/tools/perf/tests/bp_account.c index 2f75fa0c4fef..a20cbc445426 100644 --- a/tools/perf/tests/bp_account.c +++ b/tools/perf/tests/bp_account.c @@ -103,20 +103,18 @@ static int bp_accounting(int wp_cnt, int share) static int detect_cnt(bool is_x) { struct perf_event_attr attr; - void *addr = is_x ? test_function : (void *) &the_var; + void *addr = is_x ? (void *)test_function : (void *)&the_var; int fd[100], cnt = 0, i; while (1) { - fd[cnt] = __event(is_x, addr, &attr); - - if (fd[cnt] < 0) - break; - if (cnt == 100) { pr_debug("way too many debug registers, fix the test\n"); return 0; } + fd[cnt] = __event(is_x, addr, &attr); + if (fd[cnt] < 0) + break; cnt++; } diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 38bf109ce106..625f5a6772af 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -275,6 +275,10 @@ static struct test generic_tests[] = { .func = test__unit_number__scnprint, }, { + .desc = "mem2node", + .func = test__mem2node, + }, + { .func = NULL, }, }; diff --git a/tools/perf/tests/mem.c b/tools/perf/tests/mem.c index 21952e1e6e6d..0f82ee9fd3f7 100644 --- a/tools/perf/tests/mem.c +++ b/tools/perf/tests/mem.c @@ -16,7 +16,7 @@ static int check(union perf_mem_data_src data_src, n = perf_mem__snp_scnprintf(out, sizeof out, &mi); n += perf_mem__lvl_scnprintf(out + n, sizeof out - n, &mi); - snprintf(failure, sizeof failure, "unexpected %s", out); + scnprintf(failure, sizeof failure, "unexpected %s", out); TEST_ASSERT_VAL(failure, !strcmp(string, out)); return 0; } diff --git a/tools/perf/tests/mem2node.c b/tools/perf/tests/mem2node.c new file mode 100644 index 000000000000..0c3c87f86e03 --- /dev/null +++ b/tools/perf/tests/mem2node.c @@ -0,0 +1,75 @@ +#include <linux/compiler.h> +#include <linux/bitmap.h> +#include "cpumap.h" +#include "mem2node.h" +#include "tests.h" + +static struct node { + int node; + const char *map; +} test_nodes[] = { + { .node = 0, .map = "0" }, + { .node = 1, .map = "1-2" }, + { .node = 3, .map = "5-7,9" }, +}; + +#define T TEST_ASSERT_VAL + +static unsigned long *get_bitmap(const char *str, int nbits) +{ + struct cpu_map *map = cpu_map__new(str); + unsigned long *bm = NULL; + int i; + + bm = bitmap_alloc(nbits); + + if (map && bm) { + bitmap_zero(bm, nbits); + + for (i = 0; i < map->nr; i++) { + set_bit(map->map[i], bm); + } + } + + if (map) + cpu_map__put(map); + else + free(bm); + + return bm && map ? bm : NULL; +} + +int test__mem2node(struct test *t __maybe_unused, int subtest __maybe_unused) +{ + struct mem2node map; + struct memory_node nodes[3]; + struct perf_env env = { + .memory_nodes = (struct memory_node *) &nodes[0], + .nr_memory_nodes = ARRAY_SIZE(nodes), + .memory_bsize = 0x100, + }; + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(nodes); i++) { + nodes[i].node = test_nodes[i].node; + nodes[i].size = 10; + + T("failed: alloc bitmap", + (nodes[i].set = get_bitmap(test_nodes[i].map, 10))); + } + + T("failed: mem2node__init", !mem2node__init(&map, &env)); + T("failed: mem2node__node", 0 == mem2node__node(&map, 0x50)); + T("failed: mem2node__node", 1 == mem2node__node(&map, 0x100)); + T("failed: mem2node__node", 1 == mem2node__node(&map, 0x250)); + T("failed: mem2node__node", 3 == mem2node__node(&map, 0x500)); + T("failed: mem2node__node", 3 == mem2node__node(&map, 0x650)); + T("failed: mem2node__node", -1 == mem2node__node(&map, 0x450)); + T("failed: mem2node__node", -1 == mem2node__node(&map, 0x1050)); + + for (i = 0; i < ARRAY_SIZE(nodes); i++) + free(nodes[i].set); + + mem2node__exit(&map); + return 0; +} diff --git a/tools/perf/tests/pmu.c b/tools/perf/tests/pmu.c index 9abca267afa9..7bedf8608fdd 100644 --- a/tools/perf/tests/pmu.c +++ b/tools/perf/tests/pmu.c @@ -98,7 +98,7 @@ static char *test_format_dir_get(void) struct test_format *format = &test_formats[i]; FILE *file; - snprintf(name, PATH_MAX, "%s/%s", dir, format->name); + scnprintf(name, PATH_MAX, "%s/%s", dir, format->name); file = fopen(name, "w"); if (!file) diff --git a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh index 52c3ee701a89..1ecc1f0ff84a 100755 --- a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh +++ b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh @@ -47,7 +47,10 @@ trace_libc_inet_pton_backtrace() { [ -z "${expected[$idx]}" ] && break done - rm -f $file + # If any statements are executed from this point onwards, + # the exit code of the last among these will be reflected + # in err below. If the exit code is 0, the test will pass + # even if the perf script output does not match. } # Check for IPv6 interface existence diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index 9f51edac44ae..a9760e790563 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -103,6 +103,7 @@ int test__clang(struct test *test, int subtest); const char *test__clang_subtest_get_desc(int subtest); int test__clang_subtest_get_nr(void); int test__unit_number__scnprint(struct test *test, int subtest); +int test__mem2node(struct test *t, int subtest); bool test__bp_signal_is_supported(void); diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c index 25dd1e0ecc58..6832fcb2e6ff 100644 --- a/tools/perf/ui/stdio/hist.c +++ b/tools/perf/ui/stdio/hist.c @@ -840,15 +840,11 @@ size_t events_stats__fprintf(struct events_stats *stats, FILE *fp) for (i = 0; i < PERF_RECORD_HEADER_MAX; ++i) { const char *name; - if (stats->nr_events[i] == 0) - continue; - name = perf_event__name(i); if (!strcmp(name, "UNKNOWN")) continue; - ret += fprintf(fp, "%16s events: %10d\n", name, - stats->nr_events[i]); + ret += fprintf(fp, "%16s events: %10d\n", name, stats->nr_events[i]); } return ret; diff --git a/tools/perf/util/Build b/tools/perf/util/Build index ea0a452550b0..8052373bcd6a 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -106,6 +106,7 @@ libperf-y += units.o libperf-y += time-utils.o libperf-y += expr-bison.o libperf-y += branch.o +libperf-y += mem2node.o libperf-$(CONFIG_LIBBPF) += bpf-loader.o libperf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index bc3302da702b..535357c6ce02 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -238,6 +238,9 @@ static int call__scnprintf(struct ins *ins, char *bf, size_t size, if (ops->target.addr == 0) return ins__raw_scnprintf(ins, bf, size, ops); + if (ops->target.name) + return scnprintf(bf, size, "%-6s %s", ins->name, ops->target.name); + return scnprintf(bf, size, "%-6s *%" PRIx64, ins->name, ops->target.addr); } @@ -1427,7 +1430,7 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) { struct map *map = args->map; struct dso *dso = map->dso; - char command[PATH_MAX * 2]; + char *command; FILE *file; char symfs_filename[PATH_MAX]; struct kcore_extract kce; @@ -1468,7 +1471,7 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) strcpy(symfs_filename, tmp); } - snprintf(command, sizeof(command), + err = asprintf(&command, "%s %s%s --start-address=0x%016" PRIx64 " --stop-address=0x%016" PRIx64 " -l -d %s %s -C \"%s\" 2>/dev/null|grep -v \"%s:\"|expand", @@ -1481,12 +1484,17 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) symbol_conf.annotate_src ? "-S" : "", symfs_filename, symfs_filename); + if (err < 0) { + pr_err("Failure allocating memory for the command to run\n"); + goto out_remove_tmp; + } + pr_debug("Executing: %s\n", command); err = -1; if (pipe(stdout_fd) < 0) { pr_err("Failure creating the pipe to run %s\n", command); - goto out_remove_tmp; + goto out_free_command; } pid = fork(); @@ -1513,7 +1521,7 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) * If we were using debug info should retry with * original binary. */ - goto out_remove_tmp; + goto out_free_command; } nline = 0; @@ -1541,6 +1549,8 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) fclose(file); err = 0; +out_free_command: + free(command); out_remove_tmp: close(stdout_fd[0]); @@ -1554,7 +1564,7 @@ out: out_close_stdout: close(stdout_fd[1]); - goto out_remove_tmp; + goto out_free_command; } static void calc_percent(struct sym_hist *hist, diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c index 78408f5c4bad..decb91f9da82 100644 --- a/tools/perf/util/cgroup.c +++ b/tools/perf/util/cgroup.c @@ -81,7 +81,7 @@ static int open_cgroup(const char *name) if (cgroupfs_find_mountpoint(mnt, PATH_MAX + 1)) return -1; - snprintf(path, PATH_MAX, "%s/%s", mnt, name); + scnprintf(path, PATH_MAX, "%s/%s", mnt, name); fd = open(path, O_RDONLY); if (fd == -1) diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index f3a71db83947..3d6459626c2a 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -232,7 +232,6 @@ int perf_quiet_option(void) var++; } - quiet = true; return 0; } diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index 6d311868d850..4c842762e3f2 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -32,6 +32,10 @@ void perf_env__exit(struct perf_env *env) for (i = 0; i < env->caches_cnt; i++) cpu_cache_level__free(&env->caches[i]); zfree(&env->caches); + + for (i = 0; i < env->nr_memory_nodes; i++) + free(env->memory_nodes[i].set); + zfree(&env->memory_nodes); } int perf_env__set_cmdline(struct perf_env *env, int argc, const char *argv[]) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index e14b3f7c7212..121df1683c36 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -3415,8 +3415,17 @@ int perf_event__synthesize_features(struct perf_tool *tool, return ret; } } + + /* Send HEADER_LAST_FEATURE mark. */ + fe = ff.buf; + fe->feat_id = HEADER_LAST_FEATURE; + fe->header.type = PERF_RECORD_HEADER_FEATURE; + fe->header.size = sizeof(*fe); + + ret = process(tool, ff.buf, NULL, NULL); + free(ff.buf); - return 0; + return ret; } int perf_event__process_feature(struct perf_tool *tool, diff --git a/tools/perf/util/llvm-utils.c b/tools/perf/util/llvm-utils.c index 4952b429caa7..1cca0a2fa641 100644 --- a/tools/perf/util/llvm-utils.c +++ b/tools/perf/util/llvm-utils.c @@ -433,6 +433,7 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf, char serr[STRERR_BUFSIZE]; char *kbuild_dir = NULL, *kbuild_include_opts = NULL; const char *template = llvm_param.clang_bpf_cmd_template; + char *command_echo, *command_out; if (path[0] != '-' && realpath(path, abspath) == NULL) { err = errno; @@ -487,6 +488,16 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf, (path[0] == '-') ? path : abspath); pr_debug("llvm compiling command template: %s\n", template); + + if (asprintf(&command_echo, "echo -n \"%s\"", template) < 0) + goto errout; + + err = read_from_pipe(command_echo, (void **) &command_out, NULL); + if (err) + goto errout; + + pr_debug("llvm compiling command : %s\n", command_out); + err = read_from_pipe(template, &obj_buf, &obj_buf_sz); if (err) { pr_err("ERROR:\tunable to compile %s\n", path); @@ -497,6 +508,8 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf, goto errout; } + free(command_echo); + free(command_out); free(kbuild_dir); free(kbuild_include_opts); @@ -509,6 +522,7 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf, *p_obj_buf_sz = obj_buf_sz; return 0; errout: + free(command_echo); free(kbuild_dir); free(kbuild_include_opts); free(obj_buf); diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 43fbbee409ec..2eca8478e24f 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -50,21 +50,13 @@ static void machine__threads_init(struct machine *machine) static int machine__set_mmap_name(struct machine *machine) { - if (machine__is_host(machine)) { - if (symbol_conf.vmlinux_name) - machine->mmap_name = strdup(symbol_conf.vmlinux_name); - else - machine->mmap_name = strdup("[kernel.kallsyms]"); - } else if (machine__is_default_guest(machine)) { - if (symbol_conf.default_guest_vmlinux_name) - machine->mmap_name = strdup(symbol_conf.default_guest_vmlinux_name); - else - machine->mmap_name = strdup("[guest.kernel.kallsyms]"); - } else { - if (asprintf(&machine->mmap_name, "[guest.kernel.kallsyms.%d]", - machine->pid) < 0) - machine->mmap_name = NULL; - } + if (machine__is_host(machine)) + machine->mmap_name = strdup("[kernel.kallsyms]"); + else if (machine__is_default_guest(machine)) + machine->mmap_name = strdup("[guest.kernel.kallsyms]"); + else if (asprintf(&machine->mmap_name, "[guest.kernel.kallsyms.%d]", + machine->pid) < 0) + machine->mmap_name = NULL; return machine->mmap_name ? 0 : -ENOMEM; } @@ -794,9 +786,15 @@ static struct dso *machine__get_kernel(struct machine *machine) struct dso *kernel; if (machine__is_host(machine)) { + if (symbol_conf.vmlinux_name) + vmlinux_name = symbol_conf.vmlinux_name; + kernel = machine__findnew_kernel(machine, vmlinux_name, "[kernel]", DSO_TYPE_KERNEL); } else { + if (symbol_conf.default_guest_vmlinux_name) + vmlinux_name = symbol_conf.default_guest_vmlinux_name; + kernel = machine__findnew_kernel(machine, vmlinux_name, "[guest.kernel]", DSO_TYPE_GUEST_KERNEL); diff --git a/tools/perf/util/mem2node.c b/tools/perf/util/mem2node.c new file mode 100644 index 000000000000..c6fd81c02586 --- /dev/null +++ b/tools/perf/util/mem2node.c @@ -0,0 +1,134 @@ +#include <errno.h> +#include <inttypes.h> +#include <linux/bitmap.h> +#include "mem2node.h" +#include "util.h" + +struct phys_entry { + struct rb_node rb_node; + u64 start; + u64 end; + u64 node; +}; + +static void phys_entry__insert(struct phys_entry *entry, struct rb_root *root) +{ + struct rb_node **p = &root->rb_node; + struct rb_node *parent = NULL; + struct phys_entry *e; + + while (*p != NULL) { + parent = *p; + e = rb_entry(parent, struct phys_entry, rb_node); + + if (entry->start < e->start) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + } + + rb_link_node(&entry->rb_node, parent, p); + rb_insert_color(&entry->rb_node, root); +} + +static void +phys_entry__init(struct phys_entry *entry, u64 start, u64 bsize, u64 node) +{ + entry->start = start; + entry->end = start + bsize; + entry->node = node; + RB_CLEAR_NODE(&entry->rb_node); +} + +int mem2node__init(struct mem2node *map, struct perf_env *env) +{ + struct memory_node *n, *nodes = &env->memory_nodes[0]; + struct phys_entry *entries, *tmp_entries; + u64 bsize = env->memory_bsize; + int i, j = 0, max = 0; + + memset(map, 0x0, sizeof(*map)); + map->root = RB_ROOT; + + for (i = 0; i < env->nr_memory_nodes; i++) { + n = &nodes[i]; + max += bitmap_weight(n->set, n->size); + } + + entries = zalloc(sizeof(*entries) * max); + if (!entries) + return -ENOMEM; + + for (i = 0; i < env->nr_memory_nodes; i++) { + u64 bit; + + n = &nodes[i]; + + for (bit = 0; bit < n->size; bit++) { + u64 start; + + if (!test_bit(bit, n->set)) + continue; + + start = bit * bsize; + + /* + * Merge nearby areas, we walk in order + * through the bitmap, so no need to sort. + */ + if (j > 0) { + struct phys_entry *prev = &entries[j - 1]; + + if ((prev->end == start) && + (prev->node == n->node)) { + prev->end += bsize; + continue; + } + } + + phys_entry__init(&entries[j++], start, bsize, n->node); + } + } + + /* Cut unused entries, due to merging. */ + tmp_entries = realloc(entries, sizeof(*entries) * j); + if (tmp_entries) + entries = tmp_entries; + + for (i = 0; i < j; i++) { + pr_debug("mem2node %03" PRIu64 " [0x%016" PRIx64 "-0x%016" PRIx64 "]\n", + entries[i].node, entries[i].start, entries[i].end); + + phys_entry__insert(&entries[i], &map->root); + } + + map->entries = entries; + return 0; +} + +void mem2node__exit(struct mem2node *map) +{ + zfree(&map->entries); +} + +int mem2node__node(struct mem2node *map, u64 addr) +{ + struct rb_node **p, *parent = NULL; + struct phys_entry *entry; + + p = &map->root.rb_node; + while (*p != NULL) { + parent = *p; + entry = rb_entry(parent, struct phys_entry, rb_node); + if (addr < entry->start) + p = &(*p)->rb_left; + else if (addr >= entry->end) + p = &(*p)->rb_right; + else + goto out; + } + + entry = NULL; +out: + return entry ? (int) entry->node : -1; +} diff --git a/tools/perf/util/mem2node.h b/tools/perf/util/mem2node.h new file mode 100644 index 000000000000..59c4752a2181 --- /dev/null +++ b/tools/perf/util/mem2node.h @@ -0,0 +1,19 @@ +#ifndef __MEM2NODE_H +#define __MEM2NODE_H + +#include <linux/rbtree.h> +#include "env.h" + +struct phys_entry; + +struct mem2node { + struct rb_root root; + struct phys_entry *entries; + int cnt; +}; + +int mem2node__init(struct mem2node *map, struct perf_env *env); +void mem2node__exit(struct mem2node *map); +int mem2node__node(struct mem2node *map, u64 addr); + +#endif /* __MEM2NODE_H */ diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index 074c4fd3b67e..38ca3ffb9d61 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -199,19 +199,18 @@ int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd) return 0; } -static int overwrite_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64 *end) +static int overwrite_rb_find_range(void *buf, int mask, u64 *start, u64 *end) { struct perf_event_header *pheader; - u64 evt_head = head; + u64 evt_head = *start; int size = mask + 1; - pr_debug2("overwrite_rb_find_range: buf=%p, head=%"PRIx64"\n", buf, head); - pheader = (struct perf_event_header *)(buf + (head & mask)); - *start = head; + pr_debug2("%s: buf=%p, start=%"PRIx64"\n", __func__, buf, *start); + pheader = (struct perf_event_header *)(buf + (*start & mask)); while (true) { - if (evt_head - head >= (unsigned int)size) { + if (evt_head - *start >= (unsigned int)size) { pr_debug("Finished reading overwrite ring buffer: rewind\n"); - if (evt_head - head > (unsigned int)size) + if (evt_head - *start > (unsigned int)size) evt_head -= pheader->size; *end = evt_head; return 0; @@ -262,7 +261,7 @@ int perf_mmap__read_init(struct perf_mmap *md) * Backward ring buffer is full. We still have a chance to read * most of data from it. */ - if (overwrite_rb_find_range(data, md->mask, head, &md->start, &md->end)) + if (overwrite_rb_find_range(data, md->mask, &md->start, &md->end)) return -EINVAL; } diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 4e80ca320399..2fb0272146d8 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -206,8 +206,8 @@ struct tracepoint_path *tracepoint_id_to_path(u64 config) for_each_event(sys_dirent, evt_dir, evt_dirent) { - snprintf(evt_path, MAXPATHLEN, "%s/%s/id", dir_path, - evt_dirent->d_name); + scnprintf(evt_path, MAXPATHLEN, "%s/%s/id", dir_path, + evt_dirent->d_name); fd = open(evt_path, O_RDONLY); if (fd < 0) continue; diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 1111d5bf15ca..064bdcb7bd78 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -351,7 +351,7 @@ static int pmu_aliases_parse(char *dir, struct list_head *head) if (pmu_alias_info_file(name)) continue; - snprintf(path, PATH_MAX, "%s/%s", dir, name); + scnprintf(path, PATH_MAX, "%s/%s", dir, name); file = fopen(path, "r"); if (!file) { diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index a5731de0e5eb..c37fbef1711d 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -423,20 +423,20 @@ static int convert_variable_fields(Dwarf_Die *vr_die, const char *varname, pr_warning("Failed to get the type of %s.\n", varname); return -ENOENT; } - pr_debug2("Var real type: (%x)\n", (unsigned)dwarf_dieoffset(&type)); + pr_debug2("Var real type: %s (%x)\n", dwarf_diename(&type), + (unsigned)dwarf_dieoffset(&type)); tag = dwarf_tag(&type); if (field->name[0] == '[' && (tag == DW_TAG_array_type || tag == DW_TAG_pointer_type)) { - if (field->next) - /* Save original type for next field */ - memcpy(die_mem, &type, sizeof(*die_mem)); + /* Save original type for next field or type */ + memcpy(die_mem, &type, sizeof(*die_mem)); /* Get the type of this array */ if (die_get_real_type(&type, &type) == NULL) { pr_warning("Failed to get the type of %s.\n", varname); return -ENOENT; } - pr_debug2("Array real type: (%x)\n", + pr_debug2("Array real type: %s (%x)\n", dwarf_diename(&type), (unsigned)dwarf_dieoffset(&type)); if (tag == DW_TAG_pointer_type) { ref = zalloc(sizeof(struct probe_trace_arg_ref)); @@ -448,9 +448,6 @@ static int convert_variable_fields(Dwarf_Die *vr_die, const char *varname, *ref_ptr = ref; } ref->offset += dwarf_bytesize(&type) * field->index; - if (!field->next) - /* Save vr_die for converting types */ - memcpy(die_mem, vr_die, sizeof(*die_mem)); goto next; } else if (tag == DW_TAG_pointer_type) { /* Check the pointer and dereference */ diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py index 6891635b50c3..001be4f9d3b9 100644 --- a/tools/perf/util/setup.py +++ b/tools/perf/util/setup.py @@ -28,6 +28,8 @@ class install_lib(_install_lib): cflags = getenv('CFLAGS', '').split() # switch off several checks (need to be at the end of cflags list) cflags += ['-fno-strict-aliasing', '-Wno-write-strings', '-Wno-unused-parameter' ] +if cc != "clang": + cflags += ['-Wno-cast-function-type' ] src_perf = getenv('srctree') + '/tools/perf' build_lib = getenv('PYTHON_EXTBUILD_LIB') diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 32235657c1ac..a0061e0b0fad 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -92,7 +92,7 @@ static const char *id_str[PERF_STAT_EVSEL_ID__MAX] = { }; #undef ID -void perf_stat_evsel_id_init(struct perf_evsel *evsel) +static void perf_stat_evsel_id_init(struct perf_evsel *evsel) { struct perf_stat_evsel *ps = evsel->stats; int i; diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 2f44e386a0e8..8f56ba4fd258 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -128,8 +128,6 @@ bool __perf_evsel_stat__is(struct perf_evsel *evsel, #define perf_stat_evsel__is(evsel, id) \ __perf_evsel_stat__is(evsel, PERF_STAT_EVSEL_ID__ ## id) -void perf_stat_evsel_id_init(struct perf_evsel *evsel); - extern struct runtime_stat rt_stat; extern struct stats walltime_nsecs_stats; diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c index 1e9c974faf67..8e969f28cc59 100644 --- a/tools/perf/util/unwind-libdw.c +++ b/tools/perf/util/unwind-libdw.c @@ -50,7 +50,7 @@ static int __report_module(struct addr_location *al, u64 ip, if (!mod) mod = dwfl_report_elf(ui->dwfl, dso->short_name, - dso->long_name, -1, al->map->start, + (dso->symsrc_filename ? dso->symsrc_filename : dso->long_name), -1, al->map->start, false); return mod && dwfl_addrmodule(ui->dwfl, ip) == mod ? 0 : -1; |