summaryrefslogtreecommitdiff
path: root/tools/perf/util
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-03-31 02:40:08 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2020-03-31 02:40:08 +0300
commit9b82f05f869a823d43ea4186f5f732f2924d3693 (patch)
tree6aaa625789d7d345d0694ebe20276f0b42e5a149 /tools/perf/util
parent4b9fd8a829a1eec7442e38afff21d610604de56a (diff)
parent629b3df7ecb01fddfdf71cb5d3c563d143117c33 (diff)
downloadlinux-9b82f05f869a823d43ea4186f5f732f2924d3693.tar.xz
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf updates from Ingo Molnar: "The main changes in this cycle were: Kernel side changes: - A couple of x86/cpu cleanups and changes were grandfathered in due to patch dependencies. These clean up the set of CPU model/family matching macros with a consistent namespace and C99 initializer style. - A bunch of updates to various low level PMU drivers: * AMD Family 19h L3 uncore PMU * Intel Tiger Lake uncore support * misc fixes to LBR TOS sampling - optprobe fixes - perf/cgroup: optimize cgroup event sched-in processing - misc cleanups and fixes Tooling side changes are to: - perf {annotate,expr,record,report,stat,test} - perl scripting - libapi, libperf and libtraceevent - vendor events on Intel and S390, ARM cs-etm - Intel PT updates - Documentation changes and updates to core facilities - misc cleanups, fixes and other enhancements" * 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (89 commits) cpufreq/intel_pstate: Fix wrong macro conversion x86/cpu: Cleanup the now unused CPU match macros hwrng: via_rng: Convert to new X86 CPU match macros crypto: Convert to new CPU match macros ASoC: Intel: Convert to new X86 CPU match macros powercap/intel_rapl: Convert to new X86 CPU match macros PCI: intel-mid: Convert to new X86 CPU match macros mmc: sdhci-acpi: Convert to new X86 CPU match macros intel_idle: Convert to new X86 CPU match macros extcon: axp288: Convert to new X86 CPU match macros thermal: Convert to new X86 CPU match macros hwmon: Convert to new X86 CPU match macros platform/x86: Convert to new CPU match macros EDAC: Convert to new X86 CPU match macros cpufreq: Convert to new X86 CPU match macros ACPI: Convert to new X86 CPU match macros x86/platform: Convert to new CPU match macros x86/kernel: Convert to new CPU match macros x86/kvm: Convert to new CPU match macros x86/perf/events: Convert to new CPU match macros ...
Diffstat (limited to 'tools/perf/util')
-rw-r--r--tools/perf/util/Build11
-rw-r--r--tools/perf/util/annotate.c2
-rw-r--r--tools/perf/util/annotate.h1
-rw-r--r--tools/perf/util/block-info.c106
-rw-r--r--tools/perf/util/block-info.h9
-rw-r--r--tools/perf/util/branch.h22
-rw-r--r--tools/perf/util/cgroup.c63
-rw-r--r--tools/perf/util/cs-etm.c159
-rw-r--r--tools/perf/util/event.h1
-rw-r--r--tools/perf/util/evsel.c20
-rw-r--r--tools/perf/util/evsel.h6
-rw-r--r--tools/perf/util/expr.c112
-rw-r--r--tools/perf/util/expr.h8
-rw-r--r--tools/perf/util/expr.l114
-rw-r--r--tools/perf/util/expr.y185
-rw-r--r--tools/perf/util/header.c37
-rw-r--r--tools/perf/util/hist.c3
-rw-r--r--tools/perf/util/intel-pt.c2
-rw-r--r--tools/perf/util/llvm-utils.c2
-rw-r--r--tools/perf/util/machine.c35
-rw-r--r--tools/perf/util/map.c8
-rw-r--r--tools/perf/util/metricgroup.c109
-rw-r--r--tools/perf/util/mmap.c21
-rw-r--r--tools/perf/util/perf_event_attr_fprintf.c1
-rw-r--r--tools/perf/util/scripting-engines/trace-event-python.c30
-rw-r--r--tools/perf/util/session.c8
-rw-r--r--tools/perf/util/stat-display.c39
-rw-r--r--tools/perf/util/stat-shadow.c4
-rw-r--r--tools/perf/util/stat.h1
-rw-r--r--tools/perf/util/synthetic-events.c7
-rw-r--r--tools/perf/util/util.c18
-rw-r--r--tools/perf/util/util.h2
32 files changed, 746 insertions, 400 deletions
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 07da6c790b63..c0cf8dff694e 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -121,7 +121,9 @@ perf-y += mem-events.o
perf-y += vsprintf.o
perf-y += units.o
perf-y += time-utils.o
+perf-y += expr-flex.o
perf-y += expr-bison.o
+perf-y += expr.o
perf-y += branch.o
perf-y += mem2node.o
@@ -189,9 +191,13 @@ $(OUTPUT)util/parse-events-bison.c: util/parse-events.y
$(call rule_mkdir)
$(Q)$(call echo-cmd,bison)$(BISON) -v util/parse-events.y -d $(PARSER_DEBUG_BISON) -o $@ -p parse_events_
+$(OUTPUT)util/expr-flex.c: util/expr.l $(OUTPUT)util/expr-bison.c
+ $(call rule_mkdir)
+ $(Q)$(call echo-cmd,flex)$(FLEX) -o $@ --header-file=$(OUTPUT)util/expr-flex.h $(PARSER_DEBUG_FLEX) util/expr.l
+
$(OUTPUT)util/expr-bison.c: util/expr.y
$(call rule_mkdir)
- $(Q)$(call echo-cmd,bison)$(BISON) -v util/expr.y -d $(PARSER_DEBUG_BISON) -o $@ -p expr__
+ $(Q)$(call echo-cmd,bison)$(BISON) -v util/expr.y -d $(PARSER_DEBUG_BISON) -o $@ -p expr_
$(OUTPUT)util/pmu-flex.c: util/pmu.l $(OUTPUT)util/pmu-bison.c
$(call rule_mkdir)
@@ -203,12 +209,14 @@ $(OUTPUT)util/pmu-bison.c: util/pmu.y
CFLAGS_parse-events-flex.o += -w
CFLAGS_pmu-flex.o += -w
+CFLAGS_expr-flex.o += -w
CFLAGS_parse-events-bison.o += -DYYENABLE_NLS=0 -w
CFLAGS_pmu-bison.o += -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w
CFLAGS_expr-bison.o += -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w
$(OUTPUT)util/parse-events.o: $(OUTPUT)util/parse-events-flex.c $(OUTPUT)util/parse-events-bison.c
$(OUTPUT)util/pmu.o: $(OUTPUT)util/pmu-flex.c $(OUTPUT)util/pmu-bison.c
+$(OUTPUT)util/expr.o: $(OUTPUT)util/expr-flex.c $(OUTPUT)util/expr-bison.c
CFLAGS_bitmap.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
CFLAGS_find_bit.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
@@ -216,6 +224,7 @@ CFLAGS_rbtree.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ET
CFLAGS_libstring.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
CFLAGS_hweight.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
CFLAGS_parse-events.o += -Wno-redundant-decls
+CFLAGS_expr.o += -Wno-redundant-decls
CFLAGS_header.o += -include $(OUTPUT)PERF-VERSION-FILE
$(OUTPUT)util/kallsyms.o: ../lib/symbol/kallsyms.c FORCE
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 0ea95be84b3b..f1ea0d61eb5b 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -2611,8 +2611,6 @@ void annotation__mark_jump_targets(struct annotation *notes, struct symbol *sym)
if (++al->jump_sources > notes->max_jump_sources)
notes->max_jump_sources = al->jump_sources;
-
- ++notes->nr_jumps;
}
}
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index 001258601a37..07c775938d46 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -279,7 +279,6 @@ struct annotation {
struct annotation_options *options;
struct annotation_line **offsets;
int nr_events;
- int nr_jumps;
int max_jump_sources;
int nr_entries;
int nr_asm_entries;
diff --git a/tools/perf/util/block-info.c b/tools/perf/util/block-info.c
index fbbb6d640dad..423ec69bda6c 100644
--- a/tools/perf/util/block-info.c
+++ b/tools/perf/util/block-info.c
@@ -65,8 +65,7 @@ struct block_info *block_info__new(void)
return bi;
}
-int64_t block_info__cmp(struct perf_hpp_fmt *fmt __maybe_unused,
- struct hist_entry *left, struct hist_entry *right)
+int64_t __block_info__cmp(struct hist_entry *left, struct hist_entry *right)
{
struct block_info *bi_l = left->block_info;
struct block_info *bi_r = right->block_info;
@@ -74,30 +73,27 @@ int64_t block_info__cmp(struct perf_hpp_fmt *fmt __maybe_unused,
if (!bi_l->sym || !bi_r->sym) {
if (!bi_l->sym && !bi_r->sym)
- return 0;
+ return -1;
else if (!bi_l->sym)
return -1;
else
return 1;
}
- if (bi_l->sym == bi_r->sym) {
- if (bi_l->start == bi_r->start) {
- if (bi_l->end == bi_r->end)
- return 0;
- else
- return (int64_t)(bi_r->end - bi_l->end);
- } else
- return (int64_t)(bi_r->start - bi_l->start);
- } else {
- cmp = strcmp(bi_l->sym->name, bi_r->sym->name);
+ cmp = strcmp(bi_l->sym->name, bi_r->sym->name);
+ if (cmp)
return cmp;
- }
- if (bi_l->sym->start != bi_r->sym->start)
- return (int64_t)(bi_r->sym->start - bi_l->sym->start);
+ if (bi_l->start != bi_r->start)
+ return (int64_t)(bi_r->start - bi_l->start);
- return (int64_t)(bi_r->sym->end - bi_l->sym->end);
+ return (int64_t)(bi_r->end - bi_l->end);
+}
+
+int64_t block_info__cmp(struct perf_hpp_fmt *fmt __maybe_unused,
+ struct hist_entry *left, struct hist_entry *right)
+{
+ return __block_info__cmp(left, right);
}
static void init_block_info(struct block_info *bi, struct symbol *sym,
@@ -185,6 +181,17 @@ static int block_column_width(struct perf_hpp_fmt *fmt,
return block_fmt->width;
}
+static int color_pct(struct perf_hpp *hpp, int width, double pct)
+{
+#ifdef HAVE_SLANG_SUPPORT
+ if (use_browser) {
+ return __hpp__slsmg_color_printf(hpp, "%*.2f%%",
+ width - 1, pct);
+ }
+#endif
+ return hpp_color_scnprintf(hpp, "%*.2f%%", width - 1, pct);
+}
+
static int block_total_cycles_pct_entry(struct perf_hpp_fmt *fmt,
struct perf_hpp *hpp,
struct hist_entry *he)
@@ -192,14 +199,11 @@ static int block_total_cycles_pct_entry(struct perf_hpp_fmt *fmt,
struct block_fmt *block_fmt = container_of(fmt, struct block_fmt, fmt);
struct block_info *bi = he->block_info;
double ratio = 0.0;
- char buf[16];
if (block_fmt->total_cycles)
ratio = (double)bi->cycles / (double)block_fmt->total_cycles;
- sprintf(buf, "%.2f%%", 100.0 * ratio);
-
- return scnprintf(hpp->buf, hpp->size, "%*s", block_fmt->width, buf);
+ return color_pct(hpp, block_fmt->width, 100.0 * ratio);
}
static int64_t block_total_cycles_pct_sort(struct perf_hpp_fmt *fmt,
@@ -252,16 +256,13 @@ static int block_cycles_pct_entry(struct perf_hpp_fmt *fmt,
struct block_info *bi = he->block_info;
double ratio = 0.0;
u64 avg;
- char buf[16];
if (block_fmt->block_cycles && bi->num_aggr) {
avg = bi->cycles_aggr / bi->num_aggr;
ratio = (double)avg / (double)block_fmt->block_cycles;
}
- sprintf(buf, "%.2f%%", 100.0 * ratio);
-
- return scnprintf(hpp->buf, hpp->size, "%*s", block_fmt->width, buf);
+ return color_pct(hpp, block_fmt->width, 100.0 * ratio);
}
static int block_avg_cycles_entry(struct perf_hpp_fmt *fmt,
@@ -349,7 +350,7 @@ static void hpp_register(struct block_fmt *block_fmt, int idx,
switch (idx) {
case PERF_HPP_REPORT__BLOCK_TOTAL_CYCLES_PCT:
- fmt->entry = block_total_cycles_pct_entry;
+ fmt->color = block_total_cycles_pct_entry;
fmt->cmp = block_info__cmp;
fmt->sort = block_total_cycles_pct_sort;
break;
@@ -357,7 +358,7 @@ static void hpp_register(struct block_fmt *block_fmt, int idx,
fmt->entry = block_cycles_lbr_entry;
break;
case PERF_HPP_REPORT__BLOCK_CYCLES_PCT:
- fmt->entry = block_cycles_pct_entry;
+ fmt->color = block_cycles_pct_entry;
break;
case PERF_HPP_REPORT__BLOCK_AVG_CYCLES:
fmt->entry = block_avg_cycles_entry;
@@ -377,33 +378,41 @@ static void hpp_register(struct block_fmt *block_fmt, int idx,
}
static void register_block_columns(struct perf_hpp_list *hpp_list,
- struct block_fmt *block_fmts)
+ struct block_fmt *block_fmts,
+ int *block_hpps, int nr_hpps)
{
- for (int i = 0; i < PERF_HPP_REPORT__BLOCK_MAX_INDEX; i++)
- hpp_register(&block_fmts[i], i, hpp_list);
+ for (int i = 0; i < nr_hpps; i++)
+ hpp_register(&block_fmts[i], block_hpps[i], hpp_list);
}
-static void init_block_hist(struct block_hist *bh, struct block_fmt *block_fmts)
+static void init_block_hist(struct block_hist *bh, struct block_fmt *block_fmts,
+ int *block_hpps, int nr_hpps)
{
__hists__init(&bh->block_hists, &bh->block_list);
perf_hpp_list__init(&bh->block_list);
bh->block_list.nr_header_lines = 1;
- register_block_columns(&bh->block_list, block_fmts);
+ register_block_columns(&bh->block_list, block_fmts,
+ block_hpps, nr_hpps);
- perf_hpp_list__register_sort_field(&bh->block_list,
- &block_fmts[PERF_HPP_REPORT__BLOCK_TOTAL_CYCLES_PCT].fmt);
+ /* Sort by the first fmt */
+ perf_hpp_list__register_sort_field(&bh->block_list, &block_fmts[0].fmt);
}
-static void process_block_report(struct hists *hists,
- struct block_report *block_report,
- u64 total_cycles)
+static int process_block_report(struct hists *hists,
+ struct block_report *block_report,
+ u64 total_cycles, int *block_hpps,
+ int nr_hpps)
{
struct rb_node *next = rb_first_cached(&hists->entries);
struct block_hist *bh = &block_report->hist;
struct hist_entry *he;
- init_block_hist(bh, block_report->fmts);
+ if (nr_hpps > PERF_HPP_REPORT__BLOCK_MAX_INDEX)
+ return -1;
+
+ block_report->nr_fmts = nr_hpps;
+ init_block_hist(bh, block_report->fmts, block_hpps, nr_hpps);
while (next) {
he = rb_entry(next, struct hist_entry, rb_node);
@@ -412,16 +421,19 @@ static void process_block_report(struct hists *hists,
next = rb_next(&he->rb_node);
}
- for (int i = 0; i < PERF_HPP_REPORT__BLOCK_MAX_INDEX; i++) {
+ for (int i = 0; i < nr_hpps; i++) {
block_report->fmts[i].total_cycles = total_cycles;
block_report->fmts[i].block_cycles = block_report->cycles;
}
hists__output_resort(&bh->block_hists, NULL);
+ return 0;
}
struct block_report *block_info__create_report(struct evlist *evlist,
- u64 total_cycles)
+ u64 total_cycles,
+ int *block_hpps, int nr_hpps,
+ int *nr_reps)
{
struct block_report *block_reports;
int nr_hists = evlist->core.nr_entries, i = 0;
@@ -434,13 +446,23 @@ struct block_report *block_info__create_report(struct evlist *evlist,
evlist__for_each_entry(evlist, pos) {
struct hists *hists = evsel__hists(pos);
- process_block_report(hists, &block_reports[i], total_cycles);
+ process_block_report(hists, &block_reports[i], total_cycles,
+ block_hpps, nr_hpps);
i++;
}
+ *nr_reps = nr_hists;
return block_reports;
}
+void block_info__free_report(struct block_report *reps, int nr_reps)
+{
+ for (int i = 0; i < nr_reps; i++)
+ hists__delete_entries(&reps[i].hist.block_hists);
+
+ free(reps);
+}
+
int report__browse_block_hists(struct block_hist *bh, float min_percent,
struct evsel *evsel, struct perf_env *env,
struct annotation_options *annotation_opts)
@@ -452,13 +474,11 @@ int report__browse_block_hists(struct block_hist *bh, float min_percent,
symbol_conf.report_individual_block = true;
hists__fprintf(&bh->block_hists, true, 0, 0, min_percent,
stdout, true);
- hists__delete_entries(&bh->block_hists);
return 0;
case 1:
symbol_conf.report_individual_block = true;
ret = block_hists_tui_browse(bh, evsel, min_percent,
env, annotation_opts);
- hists__delete_entries(&bh->block_hists);
return ret;
default:
return -1;
diff --git a/tools/perf/util/block-info.h b/tools/perf/util/block-info.h
index bef0d75e9819..42e9dcc4cf0a 100644
--- a/tools/perf/util/block-info.h
+++ b/tools/perf/util/block-info.h
@@ -45,6 +45,7 @@ struct block_report {
struct block_hist hist;
u64 cycles;
struct block_fmt fmts[PERF_HPP_REPORT__BLOCK_MAX_INDEX];
+ int nr_fmts;
};
struct block_hist;
@@ -61,6 +62,8 @@ static inline void __block_info__zput(struct block_info **bi)
#define block_info__zput(bi) __block_info__zput(&bi)
+int64_t __block_info__cmp(struct hist_entry *left, struct hist_entry *right);
+
int64_t block_info__cmp(struct perf_hpp_fmt *fmt __maybe_unused,
struct hist_entry *left, struct hist_entry *right);
@@ -68,7 +71,11 @@ int block_info__process_sym(struct hist_entry *he, struct block_hist *bh,
u64 *block_cycles_aggr, u64 total_cycles);
struct block_report *block_info__create_report(struct evlist *evlist,
- u64 total_cycles);
+ u64 total_cycles,
+ int *block_hpps, int nr_hpps,
+ int *nr_reps);
+
+void block_info__free_report(struct block_report *reps, int nr_reps);
int report__browse_block_hists(struct block_hist *bh, float min_percent,
struct evsel *evsel, struct perf_env *env,
diff --git a/tools/perf/util/branch.h b/tools/perf/util/branch.h
index 88e00d268f6f..154a05cd03af 100644
--- a/tools/perf/util/branch.h
+++ b/tools/perf/util/branch.h
@@ -12,6 +12,7 @@
#include <linux/stddef.h>
#include <linux/perf_event.h>
#include <linux/types.h>
+#include "event.h"
struct branch_flags {
u64 mispred:1;
@@ -39,9 +40,30 @@ struct branch_entry {
struct branch_stack {
u64 nr;
+ u64 hw_idx;
struct branch_entry entries[0];
};
+/*
+ * The hw_idx is only available when PERF_SAMPLE_BRANCH_HW_INDEX is applied.
+ * Otherwise, the output format of a sample with branch stack is
+ * struct branch_stack {
+ * u64 nr;
+ * struct branch_entry entries[0];
+ * }
+ * Check whether the hw_idx is available,
+ * and return the corresponding pointer of entries[0].
+ */
+static inline struct branch_entry *perf_sample__branch_entries(struct perf_sample *sample)
+{
+ u64 *entry = (u64 *)sample->branch_stack;
+
+ entry++;
+ if (sample->no_hw_idx)
+ return (struct branch_entry *)entry;
+ return (struct branch_entry *)(++entry);
+}
+
struct branch_type_stat {
bool branch_to;
u64 counts[PERF_BR_MAX];
diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c
index 4881d4af3381..5bc9d3b01bd9 100644
--- a/tools/perf/util/cgroup.c
+++ b/tools/perf/util/cgroup.c
@@ -3,75 +3,16 @@
#include "evsel.h"
#include "cgroup.h"
#include "evlist.h"
-#include <linux/stringify.h>
#include <linux/zalloc.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <stdlib.h>
#include <string.h>
+#include <api/fs/fs.h>
int nr_cgroups;
-static int
-cgroupfs_find_mountpoint(char *buf, size_t maxlen)
-{
- FILE *fp;
- char mountpoint[PATH_MAX + 1], tokens[PATH_MAX + 1], type[PATH_MAX + 1];
- char path_v1[PATH_MAX + 1], path_v2[PATH_MAX + 2], *path;
- char *token, *saved_ptr = NULL;
-
- fp = fopen("/proc/mounts", "r");
- if (!fp)
- return -1;
-
- /*
- * in order to handle split hierarchy, we need to scan /proc/mounts
- * and inspect every cgroupfs mount point to find one that has
- * perf_event subsystem
- */
- path_v1[0] = '\0';
- path_v2[0] = '\0';
-
- while (fscanf(fp, "%*s %"__stringify(PATH_MAX)"s %"__stringify(PATH_MAX)"s %"
- __stringify(PATH_MAX)"s %*d %*d\n",
- mountpoint, type, tokens) == 3) {
-
- if (!path_v1[0] && !strcmp(type, "cgroup")) {
-
- token = strtok_r(tokens, ",", &saved_ptr);
-
- while (token != NULL) {
- if (!strcmp(token, "perf_event")) {
- strcpy(path_v1, mountpoint);
- break;
- }
- token = strtok_r(NULL, ",", &saved_ptr);
- }
- }
-
- if (!path_v2[0] && !strcmp(type, "cgroup2"))
- strcpy(path_v2, mountpoint);
-
- if (path_v1[0] && path_v2[0])
- break;
- }
- fclose(fp);
-
- if (path_v1[0])
- path = path_v1;
- else if (path_v2[0])
- path = path_v2;
- else
- return -1;
-
- if (strlen(path) < maxlen) {
- strcpy(buf, path);
- return 0;
- }
- return -1;
-}
-
static int open_cgroup(const char *name)
{
char path[PATH_MAX + 1];
@@ -79,7 +20,7 @@ static int open_cgroup(const char *name)
int fd;
- if (cgroupfs_find_mountpoint(mnt, PATH_MAX + 1))
+ if (cgroupfs_find_mountpoint(mnt, PATH_MAX + 1, "perf_event"))
return -1;
scnprintf(path, PATH_MAX, "%s/%s", mnt, name);
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index 5471045ebf5c..62d2f9b9ce1b 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -363,6 +363,23 @@ struct cs_etm_packet_queue
return NULL;
}
+static void cs_etm__packet_swap(struct cs_etm_auxtrace *etm,
+ struct cs_etm_traceid_queue *tidq)
+{
+ struct cs_etm_packet *tmp;
+
+ if (etm->sample_branches || etm->synth_opts.last_branch ||
+ etm->sample_instructions) {
+ /*
+ * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for
+ * the next incoming packet.
+ */
+ tmp = tidq->packet;
+ tidq->packet = tidq->prev_packet;
+ tidq->prev_packet = tmp;
+ }
+}
+
static void cs_etm__packet_dump(const char *pkt_string)
{
const char *color = PERF_COLOR_BLUE;
@@ -945,7 +962,7 @@ static inline u64 cs_etm__instr_addr(struct cs_etm_queue *etmq,
if (packet->isa == CS_ETM_ISA_T32) {
u64 addr = packet->start_addr;
- while (offset > 0) {
+ while (offset) {
addr += cs_etm__t32_instr_size(etmq,
trace_chan_id, addr);
offset--;
@@ -1134,10 +1151,8 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
cs_etm__copy_insn(etmq, tidq->trace_chan_id, tidq->packet, &sample);
- if (etm->synth_opts.last_branch) {
- cs_etm__copy_last_branch_rb(etmq, tidq);
+ if (etm->synth_opts.last_branch)
sample.branch_stack = tidq->last_branch;
- }
if (etm->synth_opts.inject) {
ret = cs_etm__inject_event(event, &sample,
@@ -1153,9 +1168,6 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
"CS ETM Trace: failed to deliver instruction event, error %d\n",
ret);
- if (etm->synth_opts.last_branch)
- cs_etm__reset_last_branch_rb(tidq);
-
return ret;
}
@@ -1172,6 +1184,7 @@ static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq,
union perf_event *event = tidq->event_buf;
struct dummy_branch_stack {
u64 nr;
+ u64 hw_idx;
struct branch_entry entries;
} dummy_bs;
u64 ip;
@@ -1202,6 +1215,7 @@ static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq,
if (etm->synth_opts.last_branch) {
dummy_bs = (struct dummy_branch_stack){
.nr = 1,
+ .hw_idx = -1ULL,
.entries = {
.from = sample.ip,
.to = sample.addr,
@@ -1340,12 +1354,14 @@ static int cs_etm__sample(struct cs_etm_queue *etmq,
struct cs_etm_traceid_queue *tidq)
{
struct cs_etm_auxtrace *etm = etmq->etm;
- struct cs_etm_packet *tmp;
int ret;
u8 trace_chan_id = tidq->trace_chan_id;
- u64 instrs_executed = tidq->packet->instr_count;
+ u64 instrs_prev;
+
+ /* Get instructions remainder from previous packet */
+ instrs_prev = tidq->period_instructions;
- tidq->period_instructions += instrs_executed;
+ tidq->period_instructions += tidq->packet->instr_count;
/*
* Record a branch when the last instruction in
@@ -1363,26 +1379,80 @@ static int cs_etm__sample(struct cs_etm_queue *etmq,
* TODO: allow period to be defined in cycles and clock time
*/
- /* Get number of instructions executed after the sample point */
- u64 instrs_over = tidq->period_instructions -
- etm->instructions_sample_period;
+ /*
+ * Below diagram demonstrates the instruction samples
+ * generation flows:
+ *
+ * Instrs Instrs Instrs Instrs
+ * Sample(n) Sample(n+1) Sample(n+2) Sample(n+3)
+ * | | | |
+ * V V V V
+ * --------------------------------------------------
+ * ^ ^
+ * | |
+ * Period Period
+ * instructions(Pi) instructions(Pi')
+ *
+ * | |
+ * \---------------- -----------------/
+ * V
+ * tidq->packet->instr_count
+ *
+ * Instrs Sample(n...) are the synthesised samples occurring
+ * every etm->instructions_sample_period instructions - as
+ * defined on the perf command line. Sample(n) is being the
+ * last sample before the current etm packet, n+1 to n+3
+ * samples are generated from the current etm packet.
+ *
+ * tidq->packet->instr_count represents the number of
+ * instructions in the current etm packet.
+ *
+ * Period instructions (Pi) contains the the number of
+ * instructions executed after the sample point(n) from the
+ * previous etm packet. This will always be less than
+ * etm->instructions_sample_period.
+ *
+ * When generate new samples, it combines with two parts
+ * instructions, one is the tail of the old packet and another
+ * is the head of the new coming packet, to generate
+ * sample(n+1); sample(n+2) and sample(n+3) consume the
+ * instructions with sample period. After sample(n+3), the rest
+ * instructions will be used by later packet and it is assigned
+ * to tidq->period_instructions for next round calculation.
+ */
/*
- * Calculate the address of the sampled instruction (-1 as
- * sample is reported as though instruction has just been
- * executed, but PC has not advanced to next instruction)
+ * Get the initial offset into the current packet instructions;
+ * entry conditions ensure that instrs_prev is less than
+ * etm->instructions_sample_period.
*/
- u64 offset = (instrs_executed - instrs_over - 1);
- u64 addr = cs_etm__instr_addr(etmq, trace_chan_id,
- tidq->packet, offset);
+ u64 offset = etm->instructions_sample_period - instrs_prev;
+ u64 addr;
- ret = cs_etm__synth_instruction_sample(
- etmq, tidq, addr, etm->instructions_sample_period);
- if (ret)
- return ret;
+ /* Prepare last branches for instruction sample */
+ if (etm->synth_opts.last_branch)
+ cs_etm__copy_last_branch_rb(etmq, tidq);
- /* Carry remaining instructions into next sample period */
- tidq->period_instructions = instrs_over;
+ while (tidq->period_instructions >=
+ etm->instructions_sample_period) {
+ /*
+ * Calculate the address of the sampled instruction (-1
+ * as sample is reported as though instruction has just
+ * been executed, but PC has not advanced to next
+ * instruction)
+ */
+ addr = cs_etm__instr_addr(etmq, trace_chan_id,
+ tidq->packet, offset - 1);
+ ret = cs_etm__synth_instruction_sample(
+ etmq, tidq, addr,
+ etm->instructions_sample_period);
+ if (ret)
+ return ret;
+
+ offset += etm->instructions_sample_period;
+ tidq->period_instructions -=
+ etm->instructions_sample_period;
+ }
}
if (etm->sample_branches) {
@@ -1404,15 +1474,7 @@ static int cs_etm__sample(struct cs_etm_queue *etmq,
}
}
- if (etm->sample_branches || etm->synth_opts.last_branch) {
- /*
- * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for
- * the next incoming packet.
- */
- tmp = tidq->packet;
- tidq->packet = tidq->prev_packet;
- tidq->prev_packet = tmp;
- }
+ cs_etm__packet_swap(etm, tidq);
return 0;
}
@@ -1441,7 +1503,6 @@ static int cs_etm__flush(struct cs_etm_queue *etmq,
{
int err = 0;
struct cs_etm_auxtrace *etm = etmq->etm;
- struct cs_etm_packet *tmp;
/* Handle start tracing packet */
if (tidq->prev_packet->sample_type == CS_ETM_EMPTY)
@@ -1449,6 +1510,11 @@ static int cs_etm__flush(struct cs_etm_queue *etmq,
if (etmq->etm->synth_opts.last_branch &&
tidq->prev_packet->sample_type == CS_ETM_RANGE) {
+ u64 addr;
+
+ /* Prepare last branches for instruction sample */
+ cs_etm__copy_last_branch_rb(etmq, tidq);
+
/*
* Generate a last branch event for the branches left in the
* circular buffer at the end of the trace.
@@ -1456,7 +1522,7 @@ static int cs_etm__flush(struct cs_etm_queue *etmq,
* Use the address of the end of the last reported execution
* range
*/
- u64 addr = cs_etm__last_executed_instr(tidq->prev_packet);
+ addr = cs_etm__last_executed_instr(tidq->prev_packet);
err = cs_etm__synth_instruction_sample(
etmq, tidq, addr,
@@ -1476,15 +1542,11 @@ static int cs_etm__flush(struct cs_etm_queue *etmq,
}
swap_packet:
- if (etm->sample_branches || etm->synth_opts.last_branch) {
- /*
- * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for
- * the next incoming packet.
- */
- tmp = tidq->packet;
- tidq->packet = tidq->prev_packet;
- tidq->prev_packet = tmp;
- }
+ cs_etm__packet_swap(etm, tidq);
+
+ /* Reset last branches after flush the trace */
+ if (etm->synth_opts.last_branch)
+ cs_etm__reset_last_branch_rb(tidq);
return err;
}
@@ -1505,11 +1567,16 @@ static int cs_etm__end_block(struct cs_etm_queue *etmq,
*/
if (etmq->etm->synth_opts.last_branch &&
tidq->prev_packet->sample_type == CS_ETM_RANGE) {
+ u64 addr;
+
+ /* Prepare last branches for instruction sample */
+ cs_etm__copy_last_branch_rb(etmq, tidq);
+
/*
* Use the address of the end of the last reported execution
* range.
*/
- u64 addr = cs_etm__last_executed_instr(tidq->prev_packet);
+ addr = cs_etm__last_executed_instr(tidq->prev_packet);
err = cs_etm__synth_instruction_sample(
etmq, tidq, addr,
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 85223159737c..3cda40a2fafc 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -139,6 +139,7 @@ struct perf_sample {
u16 insn_len;
u8 cpumode;
u16 misc;
+ bool no_hw_idx; /* No hw_idx collected in branch_stack */
char insn[MAX_INSN];
void *raw_data;
struct ip_callchain *callchain;
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index c8dc4450884c..816d930d774e 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -712,7 +712,8 @@ static void __perf_evsel__config_callchain(struct evsel *evsel,
attr->branch_sample_type = PERF_SAMPLE_BRANCH_USER |
PERF_SAMPLE_BRANCH_CALL_STACK |
PERF_SAMPLE_BRANCH_NO_CYCLES |
- PERF_SAMPLE_BRANCH_NO_FLAGS;
+ PERF_SAMPLE_BRANCH_NO_FLAGS |
+ PERF_SAMPLE_BRANCH_HW_INDEX;
}
} else
pr_warning("Cannot use LBR callstack with branch stack. "
@@ -763,7 +764,8 @@ perf_evsel__reset_callgraph(struct evsel *evsel,
if (param->record_mode == CALLCHAIN_LBR) {
perf_evsel__reset_sample_bit(evsel, BRANCH_STACK);
attr->branch_sample_type &= ~(PERF_SAMPLE_BRANCH_USER |
- PERF_SAMPLE_BRANCH_CALL_STACK);
+ PERF_SAMPLE_BRANCH_CALL_STACK |
+ PERF_SAMPLE_BRANCH_HW_INDEX);
}
if (param->record_mode == CALLCHAIN_DWARF) {
perf_evsel__reset_sample_bit(evsel, REGS_USER);
@@ -1673,6 +1675,8 @@ fallback_missing_features:
evsel->core.attr.ksymbol = 0;
if (perf_missing_features.bpf)
evsel->core.attr.bpf_event = 0;
+ if (perf_missing_features.branch_hw_idx)
+ evsel->core.attr.branch_sample_type &= ~PERF_SAMPLE_BRANCH_HW_INDEX;
retry_sample_id:
if (perf_missing_features.sample_id_all)
evsel->core.attr.sample_id_all = 0;
@@ -1784,7 +1788,12 @@ try_fallback:
* Must probe features in the order they were added to the
* perf_event_attr interface.
*/
- if (!perf_missing_features.aux_output && evsel->core.attr.aux_output) {
+ if (!perf_missing_features.branch_hw_idx &&
+ (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX)) {
+ perf_missing_features.branch_hw_idx = true;
+ pr_debug2("switching off branch HW index support\n");
+ goto fallback_missing_features;
+ } else if (!perf_missing_features.aux_output && evsel->core.attr.aux_output) {
perf_missing_features.aux_output = true;
pr_debug2_peo("Kernel has no attr.aux_output support, bailing out\n");
goto out_close;
@@ -2169,7 +2178,12 @@ int perf_evsel__parse_sample(struct evsel *evsel, union perf_event *event,
if (data->branch_stack->nr > max_branch_nr)
return -EFAULT;
+
sz = data->branch_stack->nr * sizeof(struct branch_entry);
+ if (perf_evsel__has_branch_hw_idx(evsel))
+ sz += sizeof(u64);
+ else
+ data->no_hw_idx = true;
OVERFLOW_CHECK(array, sz, max_size);
array = (void *)array + sz;
}
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index dc14f4a823cd..33804740e2ca 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -119,6 +119,7 @@ struct perf_missing_features {
bool ksymbol;
bool bpf;
bool aux_output;
+ bool branch_hw_idx;
};
extern struct perf_missing_features perf_missing_features;
@@ -389,6 +390,11 @@ static inline bool perf_evsel__has_branch_callstack(const struct evsel *evsel)
return evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_CALL_STACK;
}
+static inline bool perf_evsel__has_branch_hw_idx(const struct evsel *evsel)
+{
+ return evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX;
+}
+
static inline bool evsel__has_callchain(const struct evsel *evsel)
{
return (evsel->core.attr.sample_type & PERF_SAMPLE_CALLCHAIN) != 0;
diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c
new file mode 100644
index 000000000000..fd192ddf93c1
--- /dev/null
+++ b/tools/perf/util/expr.c
@@ -0,0 +1,112 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdbool.h>
+#include <assert.h>
+#include "expr.h"
+#include "expr-bison.h"
+#define YY_EXTRA_TYPE int
+#include "expr-flex.h"
+
+#ifdef PARSER_DEBUG
+extern int expr_debug;
+#endif
+
+/* Caller must make sure id is allocated */
+void expr__add_id(struct parse_ctx *ctx, const char *name, double val)
+{
+ int idx;
+
+ assert(ctx->num_ids < MAX_PARSE_ID);
+ idx = ctx->num_ids++;
+ ctx->ids[idx].name = name;
+ ctx->ids[idx].val = val;
+}
+
+void expr__ctx_init(struct parse_ctx *ctx)
+{
+ ctx->num_ids = 0;
+}
+
+static int
+__expr__parse(double *val, struct parse_ctx *ctx, const char *expr,
+ int start)
+{
+ YY_BUFFER_STATE buffer;
+ void *scanner;
+ int ret;
+
+ ret = expr_lex_init_extra(start, &scanner);
+ if (ret)
+ return ret;
+
+ buffer = expr__scan_string(expr, scanner);
+
+#ifdef PARSER_DEBUG
+ expr_debug = 1;
+#endif
+
+ ret = expr_parse(val, ctx, scanner);
+
+ expr__flush_buffer(buffer, scanner);
+ expr__delete_buffer(buffer, scanner);
+ expr_lex_destroy(scanner);
+ return ret;
+}
+
+int expr__parse(double *final_val, struct parse_ctx *ctx, const char *expr)
+{
+ return __expr__parse(final_val, ctx, expr, EXPR_PARSE) ? -1 : 0;
+}
+
+static bool
+already_seen(const char *val, const char *one, const char **other,
+ int num_other)
+{
+ int i;
+
+ if (one && !strcasecmp(one, val))
+ return true;
+ for (i = 0; i < num_other; i++)
+ if (!strcasecmp(other[i], val))
+ return true;
+ return false;
+}
+
+int expr__find_other(const char *expr, const char *one, const char ***other,
+ int *num_other)
+{
+ int err, i = 0, j = 0;
+ struct parse_ctx ctx;
+
+ expr__ctx_init(&ctx);
+ err = __expr__parse(NULL, &ctx, expr, EXPR_OTHER);
+ if (err)
+ return -1;
+
+ *other = malloc((ctx.num_ids + 1) * sizeof(char *));
+ if (!*other)
+ return -ENOMEM;
+
+ for (i = 0, j = 0; i < ctx.num_ids; i++) {
+ const char *str = ctx.ids[i].name;
+
+ if (already_seen(str, one, *other, j))
+ continue;
+
+ str = strdup(str);
+ if (!str)
+ goto out;
+ (*other)[j++] = str;
+ }
+ (*other)[j] = NULL;
+
+out:
+ if (i != ctx.num_ids) {
+ while (--j)
+ free((char *) (*other)[i]);
+ free(*other);
+ err = -1;
+ }
+
+ *num_other = j;
+ return err;
+}
diff --git a/tools/perf/util/expr.h b/tools/perf/util/expr.h
index 046160831f90..9377538f4097 100644
--- a/tools/perf/util/expr.h
+++ b/tools/perf/util/expr.h
@@ -2,7 +2,7 @@
#ifndef PARSE_CTX_H
#define PARSE_CTX_H 1
-#define EXPR_MAX_OTHER 15
+#define EXPR_MAX_OTHER 20
#define MAX_PARSE_ID EXPR_MAX_OTHER
struct parse_id {
@@ -17,10 +17,8 @@ struct parse_ctx {
void expr__ctx_init(struct parse_ctx *ctx);
void expr__add_id(struct parse_ctx *ctx, const char *id, double val);
-#ifndef IN_EXPR_Y
-int expr__parse(double *final_val, struct parse_ctx *ctx, const char **pp);
-#endif
-int expr__find_other(const char *p, const char *one, const char ***other,
+int expr__parse(double *final_val, struct parse_ctx *ctx, const char *expr);
+int expr__find_other(const char *expr, const char *one, const char ***other,
int *num_other);
#endif
diff --git a/tools/perf/util/expr.l b/tools/perf/util/expr.l
new file mode 100644
index 000000000000..eaad29243c23
--- /dev/null
+++ b/tools/perf/util/expr.l
@@ -0,0 +1,114 @@
+%option prefix="expr_"
+%option reentrant
+%option bison-bridge
+
+%{
+#include <linux/compiler.h>
+#include "expr.h"
+#include "expr-bison.h"
+
+char *expr_get_text(yyscan_t yyscanner);
+YYSTYPE *expr_get_lval(yyscan_t yyscanner);
+
+static int __value(YYSTYPE *yylval, char *str, int base, int token)
+{
+ u64 num;
+
+ errno = 0;
+ num = strtoull(str, NULL, base);
+ if (errno)
+ return EXPR_ERROR;
+
+ yylval->num = num;
+ return token;
+}
+
+static int value(yyscan_t scanner, int base)
+{
+ YYSTYPE *yylval = expr_get_lval(scanner);
+ char *text = expr_get_text(scanner);
+
+ return __value(yylval, text, base, NUMBER);
+}
+
+/*
+ * Allow @ instead of / to be able to specify pmu/event/ without
+ * conflicts with normal division.
+ */
+static char *normalize(char *str)
+{
+ char *ret = str;
+ char *dst = str;
+
+ while (*str) {
+ if (*str == '@')
+ *dst++ = '/';
+ else if (*str == '\\')
+ *dst++ = *++str;
+ else
+ *dst++ = *str;
+ str++;
+ }
+
+ *dst = 0x0;
+ return ret;
+}
+
+static int str(yyscan_t scanner, int token)
+{
+ YYSTYPE *yylval = expr_get_lval(scanner);
+ char *text = expr_get_text(scanner);
+
+ yylval->str = normalize(strdup(text));
+ if (!yylval->str)
+ return EXPR_ERROR;
+
+ yylval->str = normalize(yylval->str);
+ return token;
+}
+%}
+
+number [0-9]+
+
+sch [-,=]
+spec \\{sch}
+sym [0-9a-zA-Z_\.:@]+
+symbol {spec}*{sym}*{spec}*{sym}*
+
+%%
+ {
+ int start_token;
+
+ start_token = expr_get_extra(yyscanner);
+
+ if (start_token) {
+ expr_set_extra(NULL, yyscanner);
+ return start_token;
+ }
+ }
+
+max { return MAX; }
+min { return MIN; }
+if { return IF; }
+else { return ELSE; }
+#smt_on { return SMT_ON; }
+{number} { return value(yyscanner, 10); }
+{symbol} { return str(yyscanner, ID); }
+"|" { return '|'; }
+"^" { return '^'; }
+"&" { return '&'; }
+"-" { return '-'; }
+"+" { return '+'; }
+"*" { return '*'; }
+"/" { return '/'; }
+"%" { return '%'; }
+"(" { return '('; }
+")" { return ')'; }
+"," { return ','; }
+. { }
+%%
+
+int expr_wrap(void *scanner __maybe_unused)
+{
+ return 1;
+}
diff --git a/tools/perf/util/expr.y b/tools/perf/util/expr.y
index 7d226241f1d7..4720cbe79357 100644
--- a/tools/perf/util/expr.y
+++ b/tools/perf/util/expr.y
@@ -1,31 +1,32 @@
/* Simple expression parser */
%{
+#define YYDEBUG 1
+#include <stdio.h>
#include "util.h"
#include "util/debug.h"
#include <stdlib.h> // strtod()
#define IN_EXPR_Y 1
#include "expr.h"
#include "smt.h"
-#include <assert.h>
#include <string.h>
-#define MAXIDLEN 256
%}
%define api.pure full
%parse-param { double *final_val }
%parse-param { struct parse_ctx *ctx }
-%parse-param { const char **pp }
-%lex-param { const char **pp }
+%parse-param {void *scanner}
+%lex-param {void* scanner}
%union {
- double num;
- char id[MAXIDLEN+1];
+ double num;
+ char *str;
}
+%token EXPR_PARSE EXPR_OTHER EXPR_ERROR
%token <num> NUMBER
-%token <id> ID
+%token <str> ID
%token MIN MAX IF ELSE SMT_ON
%left MIN MAX IF
%left '|'
@@ -37,11 +38,9 @@
%type <num> expr if_expr
%{
-static int expr__lex(YYSTYPE *res, const char **pp);
-
-static void expr__error(double *final_val __maybe_unused,
+static void expr_error(double *final_val __maybe_unused,
struct parse_ctx *ctx __maybe_unused,
- const char **pp __maybe_unused,
+ void *scanner,
const char *s)
{
pr_debug("%s\n", s);
@@ -63,6 +62,27 @@ static int lookup_id(struct parse_ctx *ctx, char *id, double *val)
%}
%%
+start:
+EXPR_PARSE all_expr
+|
+EXPR_OTHER all_other
+
+all_other: all_other other
+|
+
+other: ID
+{
+ if (ctx->num_ids + 1 >= EXPR_MAX_OTHER) {
+ pr_err("failed: way too many variables");
+ YYABORT;
+ }
+
+ ctx->ids[ctx->num_ids++].name = $1;
+}
+|
+MIN | MAX | IF | ELSE | SMT_ON | NUMBER | '|' | '^' | '&' | '-' | '+' | '*' | '/' | '%' | '(' | ')'
+
+
all_expr: if_expr { *final_val = $1; }
;
@@ -93,146 +113,3 @@ expr: NUMBER
;
%%
-
-static int expr__symbol(YYSTYPE *res, const char *p, const char **pp)
-{
- char *dst = res->id;
- const char *s = p;
-
- if (*p == '#')
- *dst++ = *p++;
-
- while (isalnum(*p) || *p == '_' || *p == '.' || *p == ':' || *p == '@' || *p == '\\') {
- if (p - s >= MAXIDLEN)
- return -1;
- /*
- * Allow @ instead of / to be able to specify pmu/event/ without
- * conflicts with normal division.
- */
- if (*p == '@')
- *dst++ = '/';
- else if (*p == '\\')
- *dst++ = *++p;
- else
- *dst++ = *p;
- p++;
- }
- *dst = 0;
- *pp = p;
- dst = res->id;
- switch (dst[0]) {
- case 'm':
- if (!strcmp(dst, "min"))
- return MIN;
- if (!strcmp(dst, "max"))
- return MAX;
- break;
- case 'i':
- if (!strcmp(dst, "if"))
- return IF;
- break;
- case 'e':
- if (!strcmp(dst, "else"))
- return ELSE;
- break;
- case '#':
- if (!strcasecmp(dst, "#smt_on"))
- return SMT_ON;
- break;
- }
- return ID;
-}
-
-static int expr__lex(YYSTYPE *res, const char **pp)
-{
- int tok;
- const char *s;
- const char *p = *pp;
-
- while (isspace(*p))
- p++;
- s = p;
- switch (*p++) {
- case '#':
- case 'a' ... 'z':
- case 'A' ... 'Z':
- return expr__symbol(res, p - 1, pp);
- case '0' ... '9': case '.':
- res->num = strtod(s, (char **)&p);
- tok = NUMBER;
- break;
- default:
- tok = *s;
- break;
- }
- *pp = p;
- return tok;
-}
-
-/* Caller must make sure id is allocated */
-void expr__add_id(struct parse_ctx *ctx, const char *name, double val)
-{
- int idx;
- assert(ctx->num_ids < MAX_PARSE_ID);
- idx = ctx->num_ids++;
- ctx->ids[idx].name = name;
- ctx->ids[idx].val = val;
-}
-
-void expr__ctx_init(struct parse_ctx *ctx)
-{
- ctx->num_ids = 0;
-}
-
-static bool already_seen(const char *val, const char *one, const char **other,
- int num_other)
-{
- int i;
-
- if (one && !strcasecmp(one, val))
- return true;
- for (i = 0; i < num_other; i++)
- if (!strcasecmp(other[i], val))
- return true;
- return false;
-}
-
-int expr__find_other(const char *p, const char *one, const char ***other,
- int *num_otherp)
-{
- const char *orig = p;
- int err = -1;
- int num_other;
-
- *other = malloc((EXPR_MAX_OTHER + 1) * sizeof(char *));
- if (!*other)
- return -1;
-
- num_other = 0;
- for (;;) {
- YYSTYPE val;
- int tok = expr__lex(&val, &p);
- if (tok == 0) {
- err = 0;
- break;
- }
- if (tok == ID && !already_seen(val.id, one, *other, num_other)) {
- if (num_other >= EXPR_MAX_OTHER - 1) {
- pr_debug("Too many extra events in %s\n", orig);
- break;
- }
- (*other)[num_other] = strdup(val.id);
- if (!(*other)[num_other])
- return -1;
- num_other++;
- }
- }
- (*other)[num_other] = NULL;
- *num_otherp = num_other;
- if (err) {
- *num_otherp = 0;
- free(*other);
- *other = NULL;
- }
- return err;
-}
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 4246e7447e54..acbd046bf95c 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -1590,6 +1590,40 @@ static void free_event_desc(struct evsel *events)
free(events);
}
+static bool perf_attr_check(struct perf_event_attr *attr)
+{
+ if (attr->__reserved_1 || attr->__reserved_2 || attr->__reserved_3) {
+ pr_warning("Reserved bits are set unexpectedly. "
+ "Please update perf tool.\n");
+ return false;
+ }
+
+ if (attr->sample_type & ~(PERF_SAMPLE_MAX-1)) {
+ pr_warning("Unknown sample type (0x%llx) is detected. "
+ "Please update perf tool.\n",
+ attr->sample_type);
+ return false;
+ }
+
+ if (attr->read_format & ~(PERF_FORMAT_MAX-1)) {
+ pr_warning("Unknown read format (0x%llx) is detected. "
+ "Please update perf tool.\n",
+ attr->read_format);
+ return false;
+ }
+
+ if ((attr->sample_type & PERF_SAMPLE_BRANCH_STACK) &&
+ (attr->branch_sample_type & ~(PERF_SAMPLE_BRANCH_MAX-1))) {
+ pr_warning("Unknown branch sample type (0x%llx) is detected. "
+ "Please update perf tool.\n",
+ attr->branch_sample_type);
+
+ return false;
+ }
+
+ return true;
+}
+
static struct evsel *read_event_desc(struct feat_fd *ff)
{
struct evsel *evsel, *events = NULL;
@@ -1634,6 +1668,9 @@ static struct evsel *read_event_desc(struct feat_fd *ff)
memcpy(&evsel->core.attr, buf, msz);
+ if (!perf_attr_check(&evsel->core.attr))
+ goto error;
+
if (do_read_u32(ff, &nr))
goto error;
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index ca5a8f4d007e..e74a5acf66d9 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -2584,9 +2584,10 @@ void hist__account_cycles(struct branch_stack *bs, struct addr_location *al,
u64 *total_cycles)
{
struct branch_info *bi;
+ struct branch_entry *entries = perf_sample__branch_entries(sample);
/* If we have branch cycles always annotate them. */
- if (bs && bs->nr && bs->entries[0].flags.cycles) {
+ if (bs && bs->nr && entries[0].flags.cycles) {
int i;
bi = sample__resolve_bstack(sample, al);
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 33cf8928cf05..23c8289c2472 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -1295,6 +1295,7 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
struct perf_sample sample = { .ip = 0, };
struct dummy_branch_stack {
u64 nr;
+ u64 hw_idx;
struct branch_entry entries;
} dummy_bs;
@@ -1316,6 +1317,7 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
if (pt->synth_opts.last_branch && sort__mode == SORT_MODE__BRANCH) {
dummy_bs = (struct dummy_branch_stack){
.nr = 1,
+ .hw_idx = -1ULL,
.entries = {
.from = sample.ip,
.to = sample.addr,
diff --git a/tools/perf/util/llvm-utils.c b/tools/perf/util/llvm-utils.c
index b5af680fc667..dbdffb6673fe 100644
--- a/tools/perf/util/llvm-utils.c
+++ b/tools/perf/util/llvm-utils.c
@@ -265,6 +265,8 @@ static int detect_kbuild_dir(char **kbuild_dir)
return -ENOMEM;
return 0;
}
+ pr_debug("%s: Couldn't find \"%s\", missing kernel-devel package?.\n",
+ __func__, autoconf_path);
free(autoconf_path);
return -ENOENT;
}
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index fb5c2cd44d30..fd14f1489802 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -2081,15 +2081,16 @@ struct branch_info *sample__resolve_bstack(struct perf_sample *sample,
{
unsigned int i;
const struct branch_stack *bs = sample->branch_stack;
+ struct branch_entry *entries = perf_sample__branch_entries(sample);
struct branch_info *bi = calloc(bs->nr, sizeof(struct branch_info));
if (!bi)
return NULL;
for (i = 0; i < bs->nr; i++) {
- ip__resolve_ams(al->thread, &bi[i].to, bs->entries[i].to);
- ip__resolve_ams(al->thread, &bi[i].from, bs->entries[i].from);
- bi[i].flags = bs->entries[i].flags;
+ ip__resolve_ams(al->thread, &bi[i].to, entries[i].to);
+ ip__resolve_ams(al->thread, &bi[i].from, entries[i].from);
+ bi[i].flags = entries[i].flags;
}
return bi;
}
@@ -2185,6 +2186,7 @@ static int resolve_lbr_callchain_sample(struct thread *thread,
/* LBR only affects the user callchain */
if (i != chain_nr) {
struct branch_stack *lbr_stack = sample->branch_stack;
+ struct branch_entry *entries = perf_sample__branch_entries(sample);
int lbr_nr = lbr_stack->nr, j, k;
bool branch;
struct branch_flags *flags;
@@ -2210,31 +2212,29 @@ static int resolve_lbr_callchain_sample(struct thread *thread,
ip = chain->ips[j];
else if (j > i + 1) {
k = j - i - 2;
- ip = lbr_stack->entries[k].from;
+ ip = entries[k].from;
branch = true;
- flags = &lbr_stack->entries[k].flags;
+ flags = &entries[k].flags;
} else {
- ip = lbr_stack->entries[0].to;
+ ip = entries[0].to;
branch = true;
- flags = &lbr_stack->entries[0].flags;
- branch_from =
- lbr_stack->entries[0].from;
+ flags = &entries[0].flags;
+ branch_from = entries[0].from;
}
} else {
if (j < lbr_nr) {
k = lbr_nr - j - 1;
- ip = lbr_stack->entries[k].from;
+ ip = entries[k].from;
branch = true;
- flags = &lbr_stack->entries[k].flags;
+ flags = &entries[k].flags;
}
else if (j > lbr_nr)
ip = chain->ips[i + 1 - (j - lbr_nr)];
else {
- ip = lbr_stack->entries[0].to;
+ ip = entries[0].to;
branch = true;
- flags = &lbr_stack->entries[0].flags;
- branch_from =
- lbr_stack->entries[0].from;
+ flags = &entries[0].flags;
+ branch_from = entries[0].from;
}
}
@@ -2281,6 +2281,7 @@ static int thread__resolve_callchain_sample(struct thread *thread,
int max_stack)
{
struct branch_stack *branch = sample->branch_stack;
+ struct branch_entry *entries = perf_sample__branch_entries(sample);
struct ip_callchain *chain = sample->callchain;
int chain_nr = 0;
u8 cpumode = PERF_RECORD_MISC_USER;
@@ -2328,7 +2329,7 @@ static int thread__resolve_callchain_sample(struct thread *thread,
for (i = 0; i < nr; i++) {
if (callchain_param.order == ORDER_CALLEE) {
- be[i] = branch->entries[i];
+ be[i] = entries[i];
if (chain == NULL)
continue;
@@ -2347,7 +2348,7 @@ static int thread__resolve_callchain_sample(struct thread *thread,
be[i].from >= chain->ips[first_call] - 8)
first_call++;
} else
- be[i] = branch->entries[branch->nr - i - 1];
+ be[i] = entries[branch->nr - i - 1];
}
memset(iter, 0, sizeof(struct iterations) * nr);
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index b342f744b1fc..53d96611e6a6 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -44,8 +44,8 @@ static inline int is_no_dso_memory(const char *filename)
static inline int is_android_lib(const char *filename)
{
- return !strncmp(filename, "/data/app-lib", 13) ||
- !strncmp(filename, "/system/lib", 11);
+ return strstarts(filename, "/data/app-lib/") ||
+ strstarts(filename, "/system/lib/");
}
static inline bool replace_android_lib(const char *filename, char *newfilename)
@@ -65,7 +65,7 @@ static inline bool replace_android_lib(const char *filename, char *newfilename)
app_abi_length = strlen(app_abi);
- if (!strncmp(filename, "/data/app-lib", 13)) {
+ if (strstarts(filename, "/data/app-lib/")) {
char *apk_path;
if (!app_abi_length)
@@ -89,7 +89,7 @@ static inline bool replace_android_lib(const char *filename, char *newfilename)
return true;
}
- if (!strncmp(filename, "/system/lib/", 12)) {
+ if (strstarts(filename, "/system/lib/")) {
char *ndk, *app;
const char *arch;
size_t ndk_length;
diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
index 02aee946b6c1..c3a8c701609a 100644
--- a/tools/perf/util/metricgroup.c
+++ b/tools/perf/util/metricgroup.c
@@ -22,6 +22,8 @@
#include <linux/string.h>
#include <linux/zalloc.h>
#include <subcmd/parse-options.h>
+#include <api/fs/fs.h>
+#include "util.h"
struct metric_event *metricgroup__lookup(struct rblist *metric_events,
struct evsel *evsel,
@@ -399,13 +401,85 @@ void metricgroup__print(bool metrics, bool metricgroups, char *filter,
strlist__delete(metriclist);
}
+static void metricgroup__add_metric_weak_group(struct strbuf *events,
+ const char **ids,
+ int idnum)
+{
+ bool no_group = false;
+ int i;
+
+ for (i = 0; i < idnum; i++) {
+ pr_debug("found event %s\n", ids[i]);
+ /*
+ * Duration time maps to a software event and can make
+ * groups not count. Always use it outside a
+ * group.
+ */
+ if (!strcmp(ids[i], "duration_time")) {
+ if (i > 0)
+ strbuf_addf(events, "}:W,");
+ strbuf_addf(events, "duration_time");
+ no_group = true;
+ continue;
+ }
+ strbuf_addf(events, "%s%s",
+ i == 0 || no_group ? "{" : ",",
+ ids[i]);
+ no_group = false;
+ }
+ if (!no_group)
+ strbuf_addf(events, "}:W");
+}
+
+static void metricgroup__add_metric_non_group(struct strbuf *events,
+ const char **ids,
+ int idnum)
+{
+ int i;
+
+ for (i = 0; i < idnum; i++)
+ strbuf_addf(events, ",%s", ids[i]);
+}
+
+static void metricgroup___watchdog_constraint_hint(const char *name, bool foot)
+{
+ static bool violate_nmi_constraint;
+
+ if (!foot) {
+ pr_warning("Splitting metric group %s into standalone metrics.\n", name);
+ violate_nmi_constraint = true;
+ return;
+ }
+
+ if (!violate_nmi_constraint)
+ return;
+
+ pr_warning("Try disabling the NMI watchdog to comply NO_NMI_WATCHDOG metric constraint:\n"
+ " echo 0 > /proc/sys/kernel/nmi_watchdog\n"
+ " perf stat ...\n"
+ " echo 1 > /proc/sys/kernel/nmi_watchdog\n");
+}
+
+static bool metricgroup__has_constraint(struct pmu_event *pe)
+{
+ if (!pe->metric_constraint)
+ return false;
+
+ if (!strcmp(pe->metric_constraint, "NO_NMI_WATCHDOG") &&
+ sysctl__nmi_watchdog_enabled()) {
+ metricgroup___watchdog_constraint_hint(pe->metric_name, false);
+ return true;
+ }
+
+ return false;
+}
+
static int metricgroup__add_metric(const char *metric, struct strbuf *events,
struct list_head *group_list)
{
struct pmu_events_map *map = perf_pmu__find_map(NULL);
struct pmu_event *pe;
- int ret = -EINVAL;
- int i, j;
+ int i, ret = -EINVAL;
if (!map)
return 0;
@@ -422,7 +496,6 @@ static int metricgroup__add_metric(const char *metric, struct strbuf *events,
const char **ids;
int idnum;
struct egroup *eg;
- bool no_group = false;
pr_debug("metric expr %s for %s\n", pe->metric_expr, pe->metric_name);
@@ -431,27 +504,11 @@ static int metricgroup__add_metric(const char *metric, struct strbuf *events,
continue;
if (events->len > 0)
strbuf_addf(events, ",");
- for (j = 0; j < idnum; j++) {
- pr_debug("found event %s\n", ids[j]);
- /*
- * Duration time maps to a software event and can make
- * groups not count. Always use it outside a
- * group.
- */
- if (!strcmp(ids[j], "duration_time")) {
- if (j > 0)
- strbuf_addf(events, "}:W,");
- strbuf_addf(events, "duration_time");
- no_group = true;
- continue;
- }
- strbuf_addf(events, "%s%s",
- j == 0 || no_group ? "{" : ",",
- ids[j]);
- no_group = false;
- }
- if (!no_group)
- strbuf_addf(events, "}:W");
+
+ if (metricgroup__has_constraint(pe))
+ metricgroup__add_metric_non_group(events, ids, idnum);
+ else
+ metricgroup__add_metric_weak_group(events, ids, idnum);
eg = malloc(sizeof(struct egroup));
if (!eg) {
@@ -493,6 +550,10 @@ static int metricgroup__add_metric_list(const char *list, struct strbuf *events,
}
}
free(nlist);
+
+ if (!ret)
+ metricgroup___watchdog_constraint_hint(NULL, true);
+
return ret;
}
diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index 3b664fa673a6..ab7108d22428 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -98,20 +98,29 @@ static int perf_mmap__aio_bind(struct mmap *map, int idx, int cpu, int affinity)
{
void *data;
size_t mmap_len;
- unsigned long node_mask;
+ unsigned long *node_mask;
+ unsigned long node_index;
+ int err = 0;
if (affinity != PERF_AFFINITY_SYS && cpu__max_node() > 1) {
data = map->aio.data[idx];
mmap_len = mmap__mmap_len(map);
- node_mask = 1UL << cpu__get_node(cpu);
- if (mbind(data, mmap_len, MPOL_BIND, &node_mask, 1, 0)) {
- pr_err("Failed to bind [%p-%p] AIO buffer to node %d: error %m\n",
- data, data + mmap_len, cpu__get_node(cpu));
+ node_index = cpu__get_node(cpu);
+ node_mask = bitmap_alloc(node_index + 1);
+ if (!node_mask) {
+ pr_err("Failed to allocate node mask for mbind: error %m\n");
return -1;
}
+ set_bit(node_index, node_mask);
+ if (mbind(data, mmap_len, MPOL_BIND, node_mask, node_index + 1 + 1, 0)) {
+ pr_err("Failed to bind [%p-%p] AIO buffer to node %lu: error %m\n",
+ data, data + mmap_len, node_index);
+ err = -1;
+ }
+ bitmap_free(node_mask);
}
- return 0;
+ return err;
}
#else /* !HAVE_LIBNUMA_SUPPORT */
static int perf_mmap__aio_alloc(struct mmap *map, int idx)
diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/perf_event_attr_fprintf.c
index 651203126c71..355d3458d4e6 100644
--- a/tools/perf/util/perf_event_attr_fprintf.c
+++ b/tools/perf/util/perf_event_attr_fprintf.c
@@ -50,6 +50,7 @@ static void __p_branch_sample_type(char *buf, size_t size, u64 value)
bit_name(ABORT_TX), bit_name(IN_TX), bit_name(NO_TX),
bit_name(COND), bit_name(CALL_STACK), bit_name(IND_JUMP),
bit_name(CALL), bit_name(NO_FLAGS), bit_name(NO_CYCLES),
+ bit_name(HW_INDEX),
{ .name = NULL, }
};
#undef bit_name
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index 80ca5d0ab7fe..8c1b27cd8b99 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -464,6 +464,7 @@ static PyObject *python_process_brstack(struct perf_sample *sample,
struct thread *thread)
{
struct branch_stack *br = sample->branch_stack;
+ struct branch_entry *entries = perf_sample__branch_entries(sample);
PyObject *pylist;
u64 i;
@@ -484,28 +485,28 @@ static PyObject *python_process_brstack(struct perf_sample *sample,
Py_FatalError("couldn't create Python dictionary");
pydict_set_item_string_decref(pyelem, "from",
- PyLong_FromUnsignedLongLong(br->entries[i].from));
+ PyLong_FromUnsignedLongLong(entries[i].from));
pydict_set_item_string_decref(pyelem, "to",
- PyLong_FromUnsignedLongLong(br->entries[i].to));
+ PyLong_FromUnsignedLongLong(entries[i].to));
pydict_set_item_string_decref(pyelem, "mispred",
- PyBool_FromLong(br->entries[i].flags.mispred));
+ PyBool_FromLong(entries[i].flags.mispred));
pydict_set_item_string_decref(pyelem, "predicted",
- PyBool_FromLong(br->entries[i].flags.predicted));
+ PyBool_FromLong(entries[i].flags.predicted));
pydict_set_item_string_decref(pyelem, "in_tx",
- PyBool_FromLong(br->entries[i].flags.in_tx));
+ PyBool_FromLong(entries[i].flags.in_tx));
pydict_set_item_string_decref(pyelem, "abort",
- PyBool_FromLong(br->entries[i].flags.abort));
+ PyBool_FromLong(entries[i].flags.abort));
pydict_set_item_string_decref(pyelem, "cycles",
- PyLong_FromUnsignedLongLong(br->entries[i].flags.cycles));
+ PyLong_FromUnsignedLongLong(entries[i].flags.cycles));
thread__find_map_fb(thread, sample->cpumode,
- br->entries[i].from, &al);
+ entries[i].from, &al);
dsoname = get_dsoname(al.map);
pydict_set_item_string_decref(pyelem, "from_dsoname",
_PyUnicode_FromString(dsoname));
thread__find_map_fb(thread, sample->cpumode,
- br->entries[i].to, &al);
+ entries[i].to, &al);
dsoname = get_dsoname(al.map);
pydict_set_item_string_decref(pyelem, "to_dsoname",
_PyUnicode_FromString(dsoname));
@@ -561,6 +562,7 @@ static PyObject *python_process_brstacksym(struct perf_sample *sample,
struct thread *thread)
{
struct branch_stack *br = sample->branch_stack;
+ struct branch_entry *entries = perf_sample__branch_entries(sample);
PyObject *pylist;
u64 i;
char bf[512];
@@ -581,22 +583,22 @@ static PyObject *python_process_brstacksym(struct perf_sample *sample,
Py_FatalError("couldn't create Python dictionary");
thread__find_symbol_fb(thread, sample->cpumode,
- br->entries[i].from, &al);
+ entries[i].from, &al);
get_symoff(al.sym, &al, true, bf, sizeof(bf));
pydict_set_item_string_decref(pyelem, "from",
_PyUnicode_FromString(bf));
thread__find_symbol_fb(thread, sample->cpumode,
- br->entries[i].to, &al);
+ entries[i].to, &al);
get_symoff(al.sym, &al, true, bf, sizeof(bf));
pydict_set_item_string_decref(pyelem, "to",
_PyUnicode_FromString(bf));
- get_br_mspred(&br->entries[i].flags, bf, sizeof(bf));
+ get_br_mspred(&entries[i].flags, bf, sizeof(bf));
pydict_set_item_string_decref(pyelem, "pred",
_PyUnicode_FromString(bf));
- if (br->entries[i].flags.in_tx) {
+ if (entries[i].flags.in_tx) {
pydict_set_item_string_decref(pyelem, "in_tx",
_PyUnicode_FromString("X"));
} else {
@@ -604,7 +606,7 @@ static PyObject *python_process_brstacksym(struct perf_sample *sample,
_PyUnicode_FromString("-"));
}
- if (br->entries[i].flags.abort) {
+ if (entries[i].flags.abort) {
pydict_set_item_string_decref(pyelem, "abort",
_PyUnicode_FromString("A"));
} else {
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index d0d7d25b23e3..055b00abd56d 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1007,6 +1007,7 @@ static void callchain__lbr_callstack_printf(struct perf_sample *sample)
{
struct ip_callchain *callchain = sample->callchain;
struct branch_stack *lbr_stack = sample->branch_stack;
+ struct branch_entry *entries = perf_sample__branch_entries(sample);
u64 kernel_callchain_nr = callchain->nr;
unsigned int i;
@@ -1043,10 +1044,10 @@ static void callchain__lbr_callstack_printf(struct perf_sample *sample)
i, callchain->ips[i]);
printf("..... %2d: %016" PRIx64 "\n",
- (int)(kernel_callchain_nr), lbr_stack->entries[0].to);
+ (int)(kernel_callchain_nr), entries[0].to);
for (i = 0; i < lbr_stack->nr; i++)
printf("..... %2d: %016" PRIx64 "\n",
- (int)(i + kernel_callchain_nr + 1), lbr_stack->entries[i].from);
+ (int)(i + kernel_callchain_nr + 1), entries[i].from);
}
}
@@ -1068,6 +1069,7 @@ static void callchain__printf(struct evsel *evsel,
static void branch_stack__printf(struct perf_sample *sample, bool callstack)
{
+ struct branch_entry *entries = perf_sample__branch_entries(sample);
uint64_t i;
printf("%s: nr:%" PRIu64 "\n",
@@ -1075,7 +1077,7 @@ static void branch_stack__printf(struct perf_sample *sample, bool callstack)
sample->branch_stack->nr);
for (i = 0; i < sample->branch_stack->nr; i++) {
- struct branch_entry *e = &sample->branch_stack->entries[i];
+ struct branch_entry *e = &entries[i];
if (!callstack) {
printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 " %hu cycles %s%s%s%s %x\n",
diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index bc31fccc0057..76c6052b12e2 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -16,6 +16,7 @@
#include <linux/ctype.h>
#include "cgroup.h"
#include <api/fs/fs.h>
+#include "util.h"
#define CNTR_NOT_SUPPORTED "<not supported>"
#define CNTR_NOT_COUNTED "<not counted>"
@@ -110,7 +111,7 @@ static void aggr_printout(struct perf_stat_config *config,
config->csv_sep);
break;
case AGGR_NONE:
- if (evsel->percore) {
+ if (evsel->percore && !config->percore_show_thread) {
fprintf(config->output, "S%d-D%d-C%*d%s",
cpu_map__id_to_socket(id),
cpu_map__id_to_die(id),
@@ -628,7 +629,7 @@ static void aggr_cb(struct perf_stat_config *config,
static void print_counter_aggrdata(struct perf_stat_config *config,
struct evsel *counter, int s,
char *prefix, bool metric_only,
- bool *first)
+ bool *first, int cpu)
{
struct aggr_data ad;
FILE *output = config->output;
@@ -654,7 +655,7 @@ static void print_counter_aggrdata(struct perf_stat_config *config,
fprintf(output, "%s", prefix);
uval = val * counter->scale;
- printout(config, id, nr, counter, uval, prefix,
+ printout(config, cpu != -1 ? cpu : id, nr, counter, uval, prefix,
run, ena, 1.0, &rt_stat);
if (!metric_only)
fputc('\n', output);
@@ -687,7 +688,7 @@ static void print_aggr(struct perf_stat_config *config,
evlist__for_each_entry(evlist, counter) {
print_counter_aggrdata(config, counter, s,
prefix, metric_only,
- &first);
+ &first, -1);
}
if (metric_only)
fputc('\n', output);
@@ -1097,7 +1098,6 @@ static void print_footer(struct perf_stat_config *config)
{
double avg = avg_stats(config->walltime_nsecs_stats) / NSEC_PER_SEC;
FILE *output = config->output;
- int n;
if (!config->null_run)
fprintf(output, "\n");
@@ -1131,9 +1131,7 @@ static void print_footer(struct perf_stat_config *config)
}
fprintf(output, "\n\n");
- if (config->print_free_counters_hint &&
- sysctl__read_int("kernel/nmi_watchdog", &n) >= 0 &&
- n > 0)
+ if (config->print_free_counters_hint && sysctl__nmi_watchdog_enabled())
fprintf(output,
"Some events weren't counted. Try disabling the NMI watchdog:\n"
" echo 0 > /proc/sys/kernel/nmi_watchdog\n"
@@ -1146,6 +1144,26 @@ static void print_footer(struct perf_stat_config *config)
"the same PMU. Try reorganizing the group.\n");
}
+static void print_percore_thread(struct perf_stat_config *config,
+ struct evsel *counter, char *prefix)
+{
+ int s, s2, id;
+ bool first = true;
+
+ for (int i = 0; i < perf_evsel__nr_cpus(counter); i++) {
+ s2 = config->aggr_get_id(config, evsel__cpus(counter), i);
+ for (s = 0; s < config->aggr_map->nr; s++) {
+ id = config->aggr_map->map[s];
+ if (s2 == id)
+ break;
+ }
+
+ print_counter_aggrdata(config, counter, s,
+ prefix, false,
+ &first, i);
+ }
+}
+
static void print_percore(struct perf_stat_config *config,
struct evsel *counter, char *prefix)
{
@@ -1157,13 +1175,16 @@ static void print_percore(struct perf_stat_config *config,
if (!(config->aggr_map || config->aggr_get_id))
return;
+ if (config->percore_show_thread)
+ return print_percore_thread(config, counter, prefix);
+
for (s = 0; s < config->aggr_map->nr; s++) {
if (prefix && metric_only)
fprintf(output, "%s", prefix);
print_counter_aggrdata(config, counter, s,
prefix, metric_only,
- &first);
+ &first, -1);
}
if (metric_only)
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index 90d23cc3c8d4..0fd713d3674f 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -777,9 +777,7 @@ static void generic_metric(struct perf_stat_config *config,
}
if (!metric_events[i]) {
- const char *p = metric_expr;
-
- if (expr__parse(&ratio, &pctx, &p) == 0) {
+ if (expr__parse(&ratio, &pctx, metric_expr) == 0) {
char *unit;
char metric_bf[64];
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index fb990efa54a8..b4fdfaa7f2c0 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -109,6 +109,7 @@ struct perf_stat_config {
bool walltime_run_table;
bool all_kernel;
bool all_user;
+ bool percore_show_thread;
FILE *output;
unsigned int interval;
unsigned int timeout;
diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c
index c423298fe62d..3f28af39f9c6 100644
--- a/tools/perf/util/synthetic-events.c
+++ b/tools/perf/util/synthetic-events.c
@@ -345,6 +345,7 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
continue;
event->mmap2.ino = (u64)ino;
+ event->mmap2.ino_generation = 0;
/*
* Just like the kernel, see __perf_event_mmap in kernel/perf_event.c
@@ -1183,7 +1184,8 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type,
if (type & PERF_SAMPLE_BRANCH_STACK) {
sz = sample->branch_stack->nr * sizeof(struct branch_entry);
- sz += sizeof(u64);
+ /* nr, hw_idx */
+ sz += 2 * sizeof(u64);
result += sz;
}
@@ -1344,7 +1346,8 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_fo
if (type & PERF_SAMPLE_BRANCH_STACK) {
sz = sample->branch_stack->nr * sizeof(struct branch_entry);
- sz += sizeof(u64);
+ /* nr, hw_idx */
+ sz += 2 * sizeof(u64);
memcpy(array, sample->branch_stack, sz);
array = (void *)array + sz;
}
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index 969ae560dad9..d707c9624dd9 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c
@@ -55,6 +55,24 @@ int sysctl__max_stack(void)
return sysctl_perf_event_max_stack;
}
+bool sysctl__nmi_watchdog_enabled(void)
+{
+ static bool cached;
+ static bool nmi_watchdog;
+ int value;
+
+ if (cached)
+ return nmi_watchdog;
+
+ if (sysctl__read_int("kernel/nmi_watchdog", &value) < 0)
+ return false;
+
+ nmi_watchdog = (value > 0) ? true : false;
+ cached = true;
+
+ return nmi_watchdog;
+}
+
bool test_attr__enabled;
bool perf_host = true;
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index 9969b8b46f7c..f486fdd3a538 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -29,6 +29,8 @@ size_t hex_width(u64 v);
int sysctl__max_stack(void);
+bool sysctl__nmi_watchdog_enabled(void);
+
int fetch_kernel_version(unsigned int *puint,
char *str, size_t str_sz);
#define KVER_VERSION(x) (((x) >> 16) & 0xff)