summaryrefslogtreecommitdiff
path: root/tools/perf
diff options
context:
space:
mode:
Diffstat (limited to 'tools/perf')
-rw-r--r--tools/perf/.gitignore2
-rw-r--r--tools/perf/Makefile.config2
-rw-r--r--tools/perf/Makefile.perf3
-rw-r--r--tools/perf/builtin-record.c2
-rw-r--r--tools/perf/builtin-sched.c99
-rw-r--r--tools/perf/builtin-trace.c9
-rwxr-xr-xtools/perf/scripts/python/exported-sql-viewer.py5
-rw-r--r--tools/perf/tests/bp_account.c1
-rw-r--r--tools/perf/tests/switch-tracking.c2
-rw-r--r--tools/perf/tests/tests-scripts.c1
-rw-r--r--tools/perf/ui/browsers/hists.c2
-rw-r--r--tools/perf/util/build-id.c2
-rw-r--r--tools/perf/util/evsel.c11
-rw-r--r--tools/perf/util/evsel.h2
-rw-r--r--tools/perf/util/intel-pt.c205
-rw-r--r--tools/perf/util/machine.c6
-rw-r--r--tools/perf/util/print-events.c1
-rw-r--r--tools/perf/util/symbol-minimal.c160
-rw-r--r--tools/perf/util/symbol.c1
-rw-r--r--tools/perf/util/thread.c8
-rw-r--r--tools/perf/util/thread.h2
21 files changed, 387 insertions, 139 deletions
diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore
index f5b81d439387..1ef45d803024 100644
--- a/tools/perf/.gitignore
+++ b/tools/perf/.gitignore
@@ -48,8 +48,6 @@ libbpf/
libperf/
libsubcmd/
libsymbol/
-libtraceevent/
-libtraceevent_plugins/
fixdep
Documentation/doc.dep
python_ext_build/
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index b102a4c525e4..a2034fa18325 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -569,6 +569,8 @@ ifndef NO_LIBELF
ifeq ($(feature-libdebuginfod), 1)
CFLAGS += -DHAVE_DEBUGINFOD_SUPPORT
EXTLIBS += -ldebuginfod
+ else
+ $(warning No elfutils/debuginfod.h found, no debuginfo server support, please install libdebuginfod-dev/elfutils-debuginfod-client-devel or equivalent)
endif
endif
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 8ee59ecb1411..b61c355fbdee 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -1143,7 +1143,8 @@ install-tests: all install-gtk
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/base_probe'; \
$(INSTALL) tests/shell/base_probe/*.sh '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/base_probe'; \
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/base_report'; \
- $(INSTALL) tests/shell/base_probe/*.sh '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/base_report'; \
+ $(INSTALL) tests/shell/base_report/*.sh '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/base_report'; \
+ $(INSTALL) tests/shell/base_report/*.txt '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/base_report'; \
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/coresight' ; \
$(INSTALL) tests/shell/coresight/*.sh '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/coresight'
$(Q)$(MAKE) -C tests/shell/coresight install-tests
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index adbaf80b398c..ab9035573a15 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -3471,7 +3471,7 @@ static struct option __record_options[] = {
"sample selected machine registers on interrupt,"
" use '-I?' to list register names", parse_intr_regs),
OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register",
- "sample selected machine registers on interrupt,"
+ "sample selected machine registers in user space,"
" use '--user-regs=?' to list register names", parse_user_regs),
OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
"Record running/enabled time of read (:S) events"),
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 5981cc51abc8..64bf3ac237f2 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -999,7 +999,7 @@ thread_atoms_search(struct rb_root_cached *root, struct thread *thread,
else if (cmp < 0)
node = node->rb_right;
else {
- BUG_ON(thread != atoms->thread);
+ BUG_ON(!RC_CHK_EQUAL(thread, atoms->thread));
return atoms;
}
}
@@ -1116,6 +1116,21 @@ add_sched_in_event(struct work_atoms *atoms, u64 timestamp)
atoms->nb_atoms++;
}
+static void free_work_atoms(struct work_atoms *atoms)
+{
+ struct work_atom *atom, *tmp;
+
+ if (atoms == NULL)
+ return;
+
+ list_for_each_entry_safe(atom, tmp, &atoms->work_list, list) {
+ list_del(&atom->list);
+ free(atom);
+ }
+ thread__zput(atoms->thread);
+ free(atoms);
+}
+
static int latency_switch_event(struct perf_sched *sched,
struct evsel *evsel,
struct perf_sample *sample,
@@ -1639,6 +1654,7 @@ static int map_switch_event(struct perf_sched *sched, struct evsel *evsel,
const char *color = PERF_COLOR_NORMAL;
char stimestamp[32];
const char *str;
+ int ret = -1;
BUG_ON(this_cpu.cpu >= MAX_CPUS || this_cpu.cpu < 0);
@@ -1669,17 +1685,20 @@ static int map_switch_event(struct perf_sched *sched, struct evsel *evsel,
sched_in = map__findnew_thread(sched, machine, -1, next_pid);
sched_out = map__findnew_thread(sched, machine, -1, prev_pid);
if (sched_in == NULL || sched_out == NULL)
- return -1;
+ goto out;
tr = thread__get_runtime(sched_in);
- if (tr == NULL) {
- thread__put(sched_in);
- return -1;
- }
+ if (tr == NULL)
+ goto out;
+
+ thread__put(sched->curr_thread[this_cpu.cpu]);
+ thread__put(sched->curr_out_thread[this_cpu.cpu]);
sched->curr_thread[this_cpu.cpu] = thread__get(sched_in);
sched->curr_out_thread[this_cpu.cpu] = thread__get(sched_out);
+ ret = 0;
+
str = thread__comm_str(sched_in);
new_shortname = 0;
if (!tr->shortname[0]) {
@@ -1774,12 +1793,10 @@ sched_out:
color_fprintf(stdout, color, "\n");
out:
- if (sched->map.task_name)
- thread__put(sched_out);
-
+ thread__put(sched_out);
thread__put(sched_in);
- return 0;
+ return ret;
}
static int process_sched_switch_event(const struct perf_tool *tool,
@@ -2023,6 +2040,16 @@ static u64 evsel__get_time(struct evsel *evsel, u32 cpu)
return r->last_time[cpu];
}
+static void timehist__evsel_priv_destructor(void *priv)
+{
+ struct evsel_runtime *r = priv;
+
+ if (r) {
+ free(r->last_time);
+ free(r);
+ }
+}
+
static int comm_width = 30;
static char *timehist_get_commstr(struct thread *thread)
@@ -3276,6 +3303,8 @@ static int perf_sched__timehist(struct perf_sched *sched)
setup_pager();
+ evsel__set_priv_destructor(timehist__evsel_priv_destructor);
+
/* prefer sched_waking if it is captured */
if (evlist__find_tracepoint_by_name(session->evlist, "sched:sched_waking"))
handlers[1].handler = timehist_sched_wakeup_ignore;
@@ -3376,13 +3405,13 @@ static void __merge_work_atoms(struct rb_root_cached *root, struct work_atoms *d
this->total_runtime += data->total_runtime;
this->nb_atoms += data->nb_atoms;
this->total_lat += data->total_lat;
- list_splice(&data->work_list, &this->work_list);
+ list_splice_init(&data->work_list, &this->work_list);
if (this->max_lat < data->max_lat) {
this->max_lat = data->max_lat;
this->max_lat_start = data->max_lat_start;
this->max_lat_end = data->max_lat_end;
}
- zfree(&data);
+ free_work_atoms(data);
return;
}
}
@@ -3461,7 +3490,6 @@ static int perf_sched__lat(struct perf_sched *sched)
work_list = rb_entry(next, struct work_atoms, node);
output_lat_thread(sched, work_list);
next = rb_next(next);
- thread__zput(work_list->thread);
}
printf(" -----------------------------------------------------------------------------------------------------------------\n");
@@ -3475,6 +3503,13 @@ static int perf_sched__lat(struct perf_sched *sched)
rc = 0;
+ while ((next = rb_first_cached(&sched->sorted_atom_root))) {
+ struct work_atoms *data;
+
+ data = rb_entry(next, struct work_atoms, node);
+ rb_erase_cached(next, &sched->sorted_atom_root);
+ free_work_atoms(data);
+ }
out_free_cpus_switch_event:
free_cpus_switch_event(sched);
return rc;
@@ -3546,10 +3581,10 @@ static int perf_sched__map(struct perf_sched *sched)
sched->curr_out_thread = calloc(MAX_CPUS, sizeof(*(sched->curr_out_thread)));
if (!sched->curr_out_thread)
- return rc;
+ goto out_free_curr_thread;
if (setup_cpus_switch_event(sched))
- goto out_free_curr_thread;
+ goto out_free_curr_out_thread;
if (setup_map_cpus(sched))
goto out_free_cpus_switch_event;
@@ -3580,7 +3615,14 @@ out_put_map_cpus:
out_free_cpus_switch_event:
free_cpus_switch_event(sched);
+out_free_curr_out_thread:
+ for (int i = 0; i < MAX_CPUS; i++)
+ thread__put(sched->curr_out_thread[i]);
+ zfree(&sched->curr_out_thread);
+
out_free_curr_thread:
+ for (int i = 0; i < MAX_CPUS; i++)
+ thread__put(sched->curr_thread[i]);
zfree(&sched->curr_thread);
return rc;
}
@@ -3887,13 +3929,15 @@ int cmd_sched(int argc, const char **argv)
if (!argc)
usage_with_options(sched_usage, sched_options);
+ thread__set_priv_destructor(free);
+
/*
* Aliased to 'perf script' for now:
*/
if (!strcmp(argv[0], "script")) {
- return cmd_script(argc, argv);
+ ret = cmd_script(argc, argv);
} else if (strlen(argv[0]) > 2 && strstarts("record", argv[0])) {
- return __cmd_record(argc, argv);
+ ret = __cmd_record(argc, argv);
} else if (strlen(argv[0]) > 2 && strstarts("latency", argv[0])) {
sched.tp_handler = &lat_ops;
if (argc > 1) {
@@ -3902,7 +3946,7 @@ int cmd_sched(int argc, const char **argv)
usage_with_options(latency_usage, latency_options);
}
setup_sorting(&sched, latency_options, latency_usage);
- return perf_sched__lat(&sched);
+ ret = perf_sched__lat(&sched);
} else if (!strcmp(argv[0], "map")) {
if (argc) {
argc = parse_options(argc, argv, map_options, map_usage, 0);
@@ -3913,13 +3957,14 @@ int cmd_sched(int argc, const char **argv)
sched.map.task_names = strlist__new(sched.map.task_name, NULL);
if (sched.map.task_names == NULL) {
fprintf(stderr, "Failed to parse task names\n");
- return -1;
+ ret = -1;
+ goto out;
}
}
}
sched.tp_handler = &map_ops;
setup_sorting(&sched, latency_options, latency_usage);
- return perf_sched__map(&sched);
+ ret = perf_sched__map(&sched);
} else if (strlen(argv[0]) > 2 && strstarts("replay", argv[0])) {
sched.tp_handler = &replay_ops;
if (argc) {
@@ -3927,7 +3972,7 @@ int cmd_sched(int argc, const char **argv)
if (argc)
usage_with_options(replay_usage, replay_options);
}
- return perf_sched__replay(&sched);
+ ret = perf_sched__replay(&sched);
} else if (!strcmp(argv[0], "timehist")) {
if (argc) {
argc = parse_options(argc, argv, timehist_options,
@@ -3943,19 +3988,19 @@ int cmd_sched(int argc, const char **argv)
parse_options_usage(NULL, timehist_options, "w", true);
if (sched.show_next)
parse_options_usage(NULL, timehist_options, "n", true);
- return -EINVAL;
+ ret = -EINVAL;
+ goto out;
}
ret = symbol__validate_sym_arguments();
- if (ret)
- return ret;
-
- return perf_sched__timehist(&sched);
+ if (!ret)
+ ret = perf_sched__timehist(&sched);
} else {
usage_with_options(sched_usage, sched_options);
}
+out:
/* free usage string allocated by parse_options_subcommand */
free((void *)sched_usage[0]);
- return 0;
+ return ret;
}
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index ecd26e058baf..f77e4f4b6f03 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -1327,7 +1327,7 @@ static const struct syscall_fmt syscall_fmts[] = {
.arg = { [0] = { .scnprintf = SCA_FDAT, /* olddirfd */ },
[2] = { .scnprintf = SCA_FDAT, /* newdirfd */ },
[4] = { .scnprintf = SCA_RENAMEAT2_FLAGS, /* flags */ }, }, },
- { .name = "rseq", .errpid = true,
+ { .name = "rseq",
.arg = { [0] = { .from_user = true /* rseq */, }, }, },
{ .name = "rt_sigaction",
.arg = { [0] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
@@ -1351,7 +1351,7 @@ static const struct syscall_fmt syscall_fmts[] = {
{ .name = "sendto",
.arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ },
[4] = SCA_SOCKADDR_FROM_USER(addr), }, },
- { .name = "set_robust_list", .errpid = true,
+ { .name = "set_robust_list",
.arg = { [0] = { .from_user = true /* head */, }, }, },
{ .name = "set_tid_address", .errpid = true, },
{ .name = "setitimer",
@@ -2873,8 +2873,8 @@ errno_print: {
else if (sc->fmt->errpid) {
struct thread *child = machine__find_thread(trace->host, ret, ret);
+ fprintf(trace->output, "%ld", ret);
if (child != NULL) {
- fprintf(trace->output, "%ld", ret);
if (thread__comm_set(child))
fprintf(trace->output, " (%s)", thread__comm_str(child));
thread__put(child);
@@ -3986,10 +3986,13 @@ static int trace__set_filter_loop_pids(struct trace *trace)
if (!strcmp(thread__comm_str(parent), "sshd") ||
strstarts(thread__comm_str(parent), "gnome-terminal")) {
pids[nr++] = thread__tid(parent);
+ thread__put(parent);
break;
}
+ thread__put(thread);
thread = parent;
}
+ thread__put(thread);
err = evlist__append_tp_filter_pids(trace->evlist, nr, pids);
if (!err && trace->filter_pids.map)
diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py
index 121cf61ba1b3..e0b2e7268ef6 100755
--- a/tools/perf/scripts/python/exported-sql-viewer.py
+++ b/tools/perf/scripts/python/exported-sql-viewer.py
@@ -680,7 +680,10 @@ class CallGraphModelBase(TreeModel):
s = value.replace("%", "\\%")
s = s.replace("_", "\\_")
# Translate * and ? into SQL LIKE pattern characters % and _
- trans = string.maketrans("*?", "%_")
+ if sys.version_info[0] == 3:
+ trans = str.maketrans("*?", "%_")
+ else:
+ trans = string.maketrans("*?", "%_")
match = " LIKE '" + str(s).translate(trans) + "'"
else:
match = " GLOB '" + str(value) + "'"
diff --git a/tools/perf/tests/bp_account.c b/tools/perf/tests/bp_account.c
index 4cb7d486b5c1..047433c977bc 100644
--- a/tools/perf/tests/bp_account.c
+++ b/tools/perf/tests/bp_account.c
@@ -104,6 +104,7 @@ static int bp_accounting(int wp_cnt, int share)
fd_wp = wp_event((void *)&the_var, &attr_new);
TEST_ASSERT_VAL("failed to create max wp\n", fd_wp != -1);
pr_debug("wp max created\n");
+ close(fd_wp);
}
for (i = 0; i < wp_cnt; i++)
diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c
index 5cab17a1942e..ee43d8fa2ed6 100644
--- a/tools/perf/tests/switch-tracking.c
+++ b/tools/perf/tests/switch-tracking.c
@@ -258,7 +258,7 @@ static int compar(const void *a, const void *b)
const struct event_node *nodeb = b;
s64 cmp = nodea->event_time - nodeb->event_time;
- return cmp;
+ return cmp < 0 ? -1 : (cmp > 0 ? 1 : 0);
}
static int process_events(struct evlist *evlist,
diff --git a/tools/perf/tests/tests-scripts.c b/tools/perf/tests/tests-scripts.c
index ed114b044293..b6986d50dde6 100644
--- a/tools/perf/tests/tests-scripts.c
+++ b/tools/perf/tests/tests-scripts.c
@@ -255,6 +255,7 @@ static void append_scripts_in_dir(int dir_fd,
continue; /* Skip scripts that have a separate driver. */
fd = openat(dir_fd, ent->d_name, O_PATH);
append_scripts_in_dir(fd, result, result_sz);
+ close(fd);
}
for (i = 0; i < n_dirs; i++) /* Clean up */
zfree(&entlist[i]);
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index 49ba82bf3391..3283b6313bab 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -3267,10 +3267,10 @@ do_hotkey: // key came straight from options ui__popup_menu()
/*
* No need to set actions->dso here since
* it's just to remove the current filter.
- * Ditto for thread below.
*/
do_zoom_dso(browser, actions);
} else if (top == &browser->hists->thread_filter) {
+ actions->thread = thread;
do_zoom_thread(browser, actions);
} else if (top == &browser->hists->socket_filter) {
do_zoom_socket(browser, actions);
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index e763e8d99a43..ee00313d5d7e 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -864,7 +864,7 @@ static int dso__cache_build_id(struct dso *dso, struct machine *machine,
char *allocated_name = NULL;
int ret = 0;
- if (!dso__has_build_id(dso))
+ if (!dso__has_build_id(dso) || !dso__hit(dso))
return 0;
if (dso__is_kcore(dso)) {
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index dbf9c8cee3c5..6d7249cc1a99 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1477,6 +1477,15 @@ static void evsel__free_config_terms(struct evsel *evsel)
free_config_terms(&evsel->config_terms);
}
+static void (*evsel__priv_destructor)(void *priv);
+
+void evsel__set_priv_destructor(void (*destructor)(void *priv))
+{
+ assert(evsel__priv_destructor == NULL);
+
+ evsel__priv_destructor = destructor;
+}
+
void evsel__exit(struct evsel *evsel)
{
assert(list_empty(&evsel->core.node));
@@ -1502,6 +1511,8 @@ void evsel__exit(struct evsel *evsel)
hashmap__free(evsel->per_pkg_mask);
evsel->per_pkg_mask = NULL;
zfree(&evsel->metric_events);
+ if (evsel__priv_destructor)
+ evsel__priv_destructor(evsel->priv);
perf_evsel__object.fini(evsel);
if (evsel__tool_event(evsel) == PERF_TOOL_SYSTEM_TIME ||
evsel__tool_event(evsel) == PERF_TOOL_USER_TIME)
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 15e745a9a798..26574a33a725 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -282,6 +282,8 @@ void evsel__init(struct evsel *evsel, struct perf_event_attr *attr, int idx);
void evsel__exit(struct evsel *evsel);
void evsel__delete(struct evsel *evsel);
+void evsel__set_priv_destructor(void (*destructor)(void *priv));
+
struct callchain_param;
void evsel__config(struct evsel *evsel, struct record_opts *opts,
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index fd2597613f3d..61f10578e121 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -127,6 +127,7 @@ struct intel_pt {
bool single_pebs;
bool sample_pebs;
+ int pebs_data_src_fmt;
struct evsel *pebs_evsel;
u64 evt_sample_type;
@@ -175,6 +176,7 @@ enum switch_state {
struct intel_pt_pebs_event {
struct evsel *evsel;
u64 id;
+ int data_src_fmt;
};
struct intel_pt_queue {
@@ -2232,7 +2234,146 @@ static void intel_pt_add_lbrs(struct branch_stack *br_stack,
}
}
-static int intel_pt_do_synth_pebs_sample(struct intel_pt_queue *ptq, struct evsel *evsel, u64 id)
+#define P(a, b) PERF_MEM_S(a, b)
+#define OP_LH (P(OP, LOAD) | P(LVL, HIT))
+#define LEVEL(x) P(LVLNUM, x)
+#define REM P(REMOTE, REMOTE)
+#define SNOOP_NONE_MISS (P(SNOOP, NONE) | P(SNOOP, MISS))
+
+#define PERF_PEBS_DATA_SOURCE_GRT_MAX 0x10
+#define PERF_PEBS_DATA_SOURCE_GRT_MASK (PERF_PEBS_DATA_SOURCE_GRT_MAX - 1)
+
+/* Based on kernel __intel_pmu_pebs_data_source_grt() and pebs_data_source */
+static const u64 pebs_data_source_grt[PERF_PEBS_DATA_SOURCE_GRT_MAX] = {
+ P(OP, LOAD) | P(LVL, MISS) | LEVEL(L3) | P(SNOOP, NA), /* L3 miss|SNP N/A */
+ OP_LH | P(LVL, L1) | LEVEL(L1) | P(SNOOP, NONE), /* L1 hit|SNP None */
+ OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE), /* LFB/MAB hit|SNP None */
+ OP_LH | P(LVL, L2) | LEVEL(L2) | P(SNOOP, NONE), /* L2 hit|SNP None */
+ OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, NONE), /* L3 hit|SNP None */
+ OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT), /* L3 hit|SNP Hit */
+ OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM), /* L3 hit|SNP HitM */
+ OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM), /* L3 hit|SNP HitM */
+ OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOPX, FWD), /* L3 hit|SNP Fwd */
+ OP_LH | P(LVL, REM_CCE1) | REM | LEVEL(L3) | P(SNOOP, HITM), /* Remote L3 hit|SNP HitM */
+ OP_LH | P(LVL, LOC_RAM) | LEVEL(RAM) | P(SNOOP, HIT), /* RAM hit|SNP Hit */
+ OP_LH | P(LVL, REM_RAM1) | REM | LEVEL(L3) | P(SNOOP, HIT), /* Remote L3 hit|SNP Hit */
+ OP_LH | P(LVL, LOC_RAM) | LEVEL(RAM) | SNOOP_NONE_MISS, /* RAM hit|SNP None or Miss */
+ OP_LH | P(LVL, REM_RAM1) | LEVEL(RAM) | REM | SNOOP_NONE_MISS, /* Remote RAM hit|SNP None or Miss */
+ OP_LH | P(LVL, IO) | LEVEL(NA) | P(SNOOP, NONE), /* I/O hit|SNP None */
+ OP_LH | P(LVL, UNC) | LEVEL(NA) | P(SNOOP, NONE), /* Uncached hit|SNP None */
+};
+
+/* Based on kernel __intel_pmu_pebs_data_source_cmt() and pebs_data_source */
+static const u64 pebs_data_source_cmt[PERF_PEBS_DATA_SOURCE_GRT_MAX] = {
+ P(OP, LOAD) | P(LVL, MISS) | LEVEL(L3) | P(SNOOP, NA), /* L3 miss|SNP N/A */
+ OP_LH | P(LVL, L1) | LEVEL(L1) | P(SNOOP, NONE), /* L1 hit|SNP None */
+ OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE), /* LFB/MAB hit|SNP None */
+ OP_LH | P(LVL, L2) | LEVEL(L2) | P(SNOOP, NONE), /* L2 hit|SNP None */
+ OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, NONE), /* L3 hit|SNP None */
+ OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, MISS), /* L3 hit|SNP Hit */
+ OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT), /* L3 hit|SNP HitM */
+ OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOPX, FWD), /* L3 hit|SNP HitM */
+ OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM), /* L3 hit|SNP Fwd */
+ OP_LH | P(LVL, REM_CCE1) | REM | LEVEL(L3) | P(SNOOP, HITM), /* Remote L3 hit|SNP HitM */
+ OP_LH | P(LVL, LOC_RAM) | LEVEL(RAM) | P(SNOOP, NONE), /* RAM hit|SNP Hit */
+ OP_LH | LEVEL(RAM) | REM | P(SNOOP, NONE), /* Remote L3 hit|SNP Hit */
+ OP_LH | LEVEL(RAM) | REM | P(SNOOPX, FWD), /* RAM hit|SNP None or Miss */
+ OP_LH | LEVEL(RAM) | REM | P(SNOOP, HITM), /* Remote RAM hit|SNP None or Miss */
+ OP_LH | P(LVL, IO) | LEVEL(NA) | P(SNOOP, NONE), /* I/O hit|SNP None */
+ OP_LH | P(LVL, UNC) | LEVEL(NA) | P(SNOOP, NONE), /* Uncached hit|SNP None */
+};
+
+/* Based on kernel pebs_set_tlb_lock() */
+static inline void pebs_set_tlb_lock(u64 *val, bool tlb, bool lock)
+{
+ /*
+ * TLB access
+ * 0 = did not miss 2nd level TLB
+ * 1 = missed 2nd level TLB
+ */
+ if (tlb)
+ *val |= P(TLB, MISS) | P(TLB, L2);
+ else
+ *val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2);
+
+ /* locked prefix */
+ if (lock)
+ *val |= P(LOCK, LOCKED);
+}
+
+/* Based on kernel __grt_latency_data() */
+static u64 intel_pt_grt_latency_data(u8 dse, bool tlb, bool lock, bool blk,
+ const u64 *pebs_data_source)
+{
+ u64 val;
+
+ dse &= PERF_PEBS_DATA_SOURCE_GRT_MASK;
+ val = pebs_data_source[dse];
+
+ pebs_set_tlb_lock(&val, tlb, lock);
+
+ if (blk)
+ val |= P(BLK, DATA);
+ else
+ val |= P(BLK, NA);
+
+ return val;
+}
+
+/* Default value for data source */
+#define PERF_MEM_NA (PERF_MEM_S(OP, NA) |\
+ PERF_MEM_S(LVL, NA) |\
+ PERF_MEM_S(SNOOP, NA) |\
+ PERF_MEM_S(LOCK, NA) |\
+ PERF_MEM_S(TLB, NA) |\
+ PERF_MEM_S(LVLNUM, NA))
+
+enum DATA_SRC_FORMAT {
+ DATA_SRC_FORMAT_ERR = -1,
+ DATA_SRC_FORMAT_NA = 0,
+ DATA_SRC_FORMAT_GRT = 1,
+ DATA_SRC_FORMAT_CMT = 2,
+};
+
+/* Based on kernel grt_latency_data() and cmt_latency_data */
+static u64 intel_pt_get_data_src(u64 mem_aux_info, int data_src_fmt)
+{
+ switch (data_src_fmt) {
+ case DATA_SRC_FORMAT_GRT: {
+ union {
+ u64 val;
+ struct {
+ unsigned int dse:4;
+ unsigned int locked:1;
+ unsigned int stlb_miss:1;
+ unsigned int fwd_blk:1;
+ unsigned int reserved:25;
+ };
+ } x = {.val = mem_aux_info};
+ return intel_pt_grt_latency_data(x.dse, x.stlb_miss, x.locked, x.fwd_blk,
+ pebs_data_source_grt);
+ }
+ case DATA_SRC_FORMAT_CMT: {
+ union {
+ u64 val;
+ struct {
+ unsigned int dse:5;
+ unsigned int locked:1;
+ unsigned int stlb_miss:1;
+ unsigned int fwd_blk:1;
+ unsigned int reserved:24;
+ };
+ } x = {.val = mem_aux_info};
+ return intel_pt_grt_latency_data(x.dse, x.stlb_miss, x.locked, x.fwd_blk,
+ pebs_data_source_cmt);
+ }
+ default:
+ return PERF_MEM_NA;
+ }
+}
+
+static int intel_pt_do_synth_pebs_sample(struct intel_pt_queue *ptq, struct evsel *evsel,
+ u64 id, int data_src_fmt)
{
const struct intel_pt_blk_items *items = &ptq->state->items;
struct perf_sample sample = { .ip = 0, };
@@ -2350,6 +2491,18 @@ static int intel_pt_do_synth_pebs_sample(struct intel_pt_queue *ptq, struct evse
}
}
+ if (sample_type & PERF_SAMPLE_DATA_SRC) {
+ if (items->has_mem_aux_info && data_src_fmt) {
+ if (data_src_fmt < 0) {
+ pr_err("Intel PT missing data_src info\n");
+ return -1;
+ }
+ sample.data_src = intel_pt_get_data_src(items->mem_aux_info, data_src_fmt);
+ } else {
+ sample.data_src = PERF_MEM_NA;
+ }
+ }
+
if (sample_type & PERF_SAMPLE_TRANSACTION && items->has_tsx_aux_info) {
u64 ax = items->has_rax ? items->rax : 0;
/* Refer kernel's intel_hsw_transaction() */
@@ -2368,9 +2521,10 @@ static int intel_pt_synth_single_pebs_sample(struct intel_pt_queue *ptq)
{
struct intel_pt *pt = ptq->pt;
struct evsel *evsel = pt->pebs_evsel;
+ int data_src_fmt = pt->pebs_data_src_fmt;
u64 id = evsel->core.id[0];
- return intel_pt_do_synth_pebs_sample(ptq, evsel, id);
+ return intel_pt_do_synth_pebs_sample(ptq, evsel, id, data_src_fmt);
}
static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq)
@@ -2395,7 +2549,7 @@ static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq)
hw_id);
return intel_pt_synth_single_pebs_sample(ptq);
}
- err = intel_pt_do_synth_pebs_sample(ptq, pe->evsel, pe->id);
+ err = intel_pt_do_synth_pebs_sample(ptq, pe->evsel, pe->id, pe->data_src_fmt);
if (err)
return err;
}
@@ -3355,6 +3509,49 @@ static int intel_pt_process_itrace_start(struct intel_pt *pt,
event->itrace_start.tid);
}
+/*
+ * Events with data_src are identified by L1_Hit_Indication
+ * refer https://github.com/intel/perfmon
+ */
+static int intel_pt_data_src_fmt(struct intel_pt *pt, struct evsel *evsel)
+{
+ struct perf_env *env = pt->machine->env;
+ int fmt = DATA_SRC_FORMAT_NA;
+
+ if (!env->cpuid)
+ return DATA_SRC_FORMAT_ERR;
+
+ /*
+ * PEBS-via-PT is only supported on E-core non-hybrid. Of those only
+ * Gracemont and Crestmont have data_src. Check for:
+ * Alderlake N (Gracemont)
+ * Sierra Forest (Crestmont)
+ * Grand Ridge (Crestmont)
+ */
+
+ if (!strncmp(env->cpuid, "GenuineIntel,6,190,", 19))
+ fmt = DATA_SRC_FORMAT_GRT;
+
+ if (!strncmp(env->cpuid, "GenuineIntel,6,175,", 19) ||
+ !strncmp(env->cpuid, "GenuineIntel,6,182,", 19))
+ fmt = DATA_SRC_FORMAT_CMT;
+
+ if (fmt == DATA_SRC_FORMAT_NA)
+ return fmt;
+
+ /*
+ * Only data_src events are:
+ * mem-loads event=0xd0,umask=0x5
+ * mem-stores event=0xd0,umask=0x6
+ */
+ if (evsel->core.attr.type == PERF_TYPE_RAW &&
+ ((evsel->core.attr.config & 0xffff) == 0x5d0 ||
+ (evsel->core.attr.config & 0xffff) == 0x6d0))
+ return fmt;
+
+ return DATA_SRC_FORMAT_NA;
+}
+
static int intel_pt_process_aux_output_hw_id(struct intel_pt *pt,
union perf_event *event,
struct perf_sample *sample)
@@ -3375,6 +3572,7 @@ static int intel_pt_process_aux_output_hw_id(struct intel_pt *pt,
ptq->pebs[hw_id].evsel = evsel;
ptq->pebs[hw_id].id = sample->id;
+ ptq->pebs[hw_id].data_src_fmt = intel_pt_data_src_fmt(pt, evsel);
return 0;
}
@@ -3924,6 +4122,7 @@ static void intel_pt_setup_pebs_events(struct intel_pt *pt)
}
pt->single_pebs = true;
pt->sample_pebs = true;
+ pt->pebs_data_src_fmt = intel_pt_data_src_fmt(pt, evsel);
pt->pebs_evsel = evsel;
}
}
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 9be2f4479f52..20fd742984e3 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -1974,7 +1974,7 @@ static void ip__resolve_ams(struct thread *thread,
* Thus, we have to try consecutively until we find a match
* or else, the symbol is unknown
*/
- thread__find_cpumode_addr_location(thread, ip, &al);
+ thread__find_cpumode_addr_location(thread, ip, /*symbols=*/true, &al);
ams->addr = ip;
ams->al_addr = al.addr;
@@ -2076,7 +2076,7 @@ static int add_callchain_ip(struct thread *thread,
al.sym = NULL;
al.srcline = NULL;
if (!cpumode) {
- thread__find_cpumode_addr_location(thread, ip, &al);
+ thread__find_cpumode_addr_location(thread, ip, symbols, &al);
} else {
if (ip >= PERF_CONTEXT_MAX) {
switch (ip) {
@@ -2104,6 +2104,8 @@ static int add_callchain_ip(struct thread *thread,
}
if (symbols)
thread__find_symbol(thread, *cpumode, ip, &al);
+ else
+ thread__find_map(thread, *cpumode, ip, &al);
}
if (al.sym != NULL) {
diff --git a/tools/perf/util/print-events.c b/tools/perf/util/print-events.c
index 81e0135cddf0..a1c71d9793bd 100644
--- a/tools/perf/util/print-events.c
+++ b/tools/perf/util/print-events.c
@@ -282,6 +282,7 @@ bool is_event_supported(u8 type, u64 config)
ret = evsel__open(evsel, NULL, tmap) >= 0;
}
+ evsel__close(evsel);
evsel__delete(evsel);
}
diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c
index c6f369b5d893..36c1d3090689 100644
--- a/tools/perf/util/symbol-minimal.c
+++ b/tools/perf/util/symbol-minimal.c
@@ -90,11 +90,23 @@ int filename__read_build_id(const char *filename, struct build_id *bid)
{
FILE *fp;
int ret = -1;
- bool need_swap = false;
+ bool need_swap = false, elf32;
u8 e_ident[EI_NIDENT];
- size_t buf_size;
- void *buf;
int i;
+ union {
+ struct {
+ Elf32_Ehdr ehdr32;
+ Elf32_Phdr *phdr32;
+ };
+ struct {
+ Elf64_Ehdr ehdr64;
+ Elf64_Phdr *phdr64;
+ };
+ } hdrs;
+ void *phdr;
+ size_t phdr_size;
+ void *buf = NULL;
+ size_t buf_size = 0;
fp = fopen(filename, "r");
if (fp == NULL)
@@ -108,117 +120,79 @@ int filename__read_build_id(const char *filename, struct build_id *bid)
goto out;
need_swap = check_need_swap(e_ident[EI_DATA]);
+ elf32 = e_ident[EI_CLASS] == ELFCLASS32;
- /* for simplicity */
- fseek(fp, 0, SEEK_SET);
-
- if (e_ident[EI_CLASS] == ELFCLASS32) {
- Elf32_Ehdr ehdr;
- Elf32_Phdr *phdr;
-
- if (fread(&ehdr, sizeof(ehdr), 1, fp) != 1)
- goto out;
+ if (fread(elf32 ? (void *)&hdrs.ehdr32 : (void *)&hdrs.ehdr64,
+ elf32 ? sizeof(hdrs.ehdr32) : sizeof(hdrs.ehdr64),
+ 1, fp) != 1)
+ goto out;
- if (need_swap) {
- ehdr.e_phoff = bswap_32(ehdr.e_phoff);
- ehdr.e_phentsize = bswap_16(ehdr.e_phentsize);
- ehdr.e_phnum = bswap_16(ehdr.e_phnum);
+ if (need_swap) {
+ if (elf32) {
+ hdrs.ehdr32.e_phoff = bswap_32(hdrs.ehdr32.e_phoff);
+ hdrs.ehdr32.e_phentsize = bswap_16(hdrs.ehdr32.e_phentsize);
+ hdrs.ehdr32.e_phnum = bswap_16(hdrs.ehdr32.e_phnum);
+ } else {
+ hdrs.ehdr64.e_phoff = bswap_64(hdrs.ehdr64.e_phoff);
+ hdrs.ehdr64.e_phentsize = bswap_16(hdrs.ehdr64.e_phentsize);
+ hdrs.ehdr64.e_phnum = bswap_16(hdrs.ehdr64.e_phnum);
}
+ }
+ phdr_size = elf32 ? hdrs.ehdr32.e_phentsize * hdrs.ehdr32.e_phnum
+ : hdrs.ehdr64.e_phentsize * hdrs.ehdr64.e_phnum;
+ phdr = malloc(phdr_size);
+ if (phdr == NULL)
+ goto out;
- buf_size = ehdr.e_phentsize * ehdr.e_phnum;
- buf = malloc(buf_size);
- if (buf == NULL)
- goto out;
-
- fseek(fp, ehdr.e_phoff, SEEK_SET);
- if (fread(buf, buf_size, 1, fp) != 1)
- goto out_free;
-
- for (i = 0, phdr = buf; i < ehdr.e_phnum; i++, phdr++) {
- void *tmp;
- long offset;
-
- if (need_swap) {
- phdr->p_type = bswap_32(phdr->p_type);
- phdr->p_offset = bswap_32(phdr->p_offset);
- phdr->p_filesz = bswap_32(phdr->p_filesz);
- }
-
- if (phdr->p_type != PT_NOTE)
- continue;
-
- buf_size = phdr->p_filesz;
- offset = phdr->p_offset;
- tmp = realloc(buf, buf_size);
- if (tmp == NULL)
- goto out_free;
-
- buf = tmp;
- fseek(fp, offset, SEEK_SET);
- if (fread(buf, buf_size, 1, fp) != 1)
- goto out_free;
+ fseek(fp, elf32 ? hdrs.ehdr32.e_phoff : hdrs.ehdr64.e_phoff, SEEK_SET);
+ if (fread(phdr, phdr_size, 1, fp) != 1)
+ goto out_free;
- ret = read_build_id(buf, buf_size, bid, need_swap);
- if (ret == 0) {
- ret = bid->size;
- break;
- }
- }
- } else {
- Elf64_Ehdr ehdr;
- Elf64_Phdr *phdr;
+ if (elf32)
+ hdrs.phdr32 = phdr;
+ else
+ hdrs.phdr64 = phdr;
- if (fread(&ehdr, sizeof(ehdr), 1, fp) != 1)
- goto out;
+ for (i = 0; i < elf32 ? hdrs.ehdr32.e_phnum : hdrs.ehdr64.e_phnum; i++) {
+ size_t p_filesz;
if (need_swap) {
- ehdr.e_phoff = bswap_64(ehdr.e_phoff);
- ehdr.e_phentsize = bswap_16(ehdr.e_phentsize);
- ehdr.e_phnum = bswap_16(ehdr.e_phnum);
+ if (elf32) {
+ hdrs.phdr32[i].p_type = bswap_32(hdrs.phdr32[i].p_type);
+ hdrs.phdr32[i].p_offset = bswap_32(hdrs.phdr32[i].p_offset);
+ hdrs.phdr32[i].p_filesz = bswap_32(hdrs.phdr32[i].p_offset);
+ } else {
+ hdrs.phdr64[i].p_type = bswap_32(hdrs.phdr64[i].p_type);
+ hdrs.phdr64[i].p_offset = bswap_64(hdrs.phdr64[i].p_offset);
+ hdrs.phdr64[i].p_filesz = bswap_64(hdrs.phdr64[i].p_filesz);
+ }
}
+ if ((elf32 ? hdrs.phdr32[i].p_type : hdrs.phdr64[i].p_type) != PT_NOTE)
+ continue;
- buf_size = ehdr.e_phentsize * ehdr.e_phnum;
- buf = malloc(buf_size);
- if (buf == NULL)
- goto out;
-
- fseek(fp, ehdr.e_phoff, SEEK_SET);
- if (fread(buf, buf_size, 1, fp) != 1)
- goto out_free;
-
- for (i = 0, phdr = buf; i < ehdr.e_phnum; i++, phdr++) {
+ p_filesz = elf32 ? hdrs.phdr32[i].p_filesz : hdrs.phdr64[i].p_filesz;
+ if (p_filesz > buf_size) {
void *tmp;
- long offset;
-
- if (need_swap) {
- phdr->p_type = bswap_32(phdr->p_type);
- phdr->p_offset = bswap_64(phdr->p_offset);
- phdr->p_filesz = bswap_64(phdr->p_filesz);
- }
- if (phdr->p_type != PT_NOTE)
- continue;
-
- buf_size = phdr->p_filesz;
- offset = phdr->p_offset;
+ buf_size = p_filesz;
tmp = realloc(buf, buf_size);
if (tmp == NULL)
goto out_free;
-
buf = tmp;
- fseek(fp, offset, SEEK_SET);
- if (fread(buf, buf_size, 1, fp) != 1)
- goto out_free;
+ }
+ fseek(fp, elf32 ? hdrs.phdr32[i].p_offset : hdrs.phdr64[i].p_offset, SEEK_SET);
+ if (fread(buf, p_filesz, 1, fp) != 1)
+ goto out_free;
- ret = read_build_id(buf, buf_size, bid, need_swap);
- if (ret == 0) {
- ret = bid->size;
- break;
- }
+ ret = read_build_id(buf, p_filesz, bid, need_swap);
+ if (ret == 0) {
+ ret = bid->size;
+ break;
}
}
out_free:
free(buf);
+ free(phdr);
out:
fclose(fp);
return ret;
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 3bbf173ad822..c0ec5ed4f1aa 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -1405,6 +1405,7 @@ static int dso__load_kcore(struct dso *dso, struct map *map,
goto out_err;
}
}
+ map__zput(new_node->map);
free(new_node);
}
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index 0ffdd52d86d7..309d573eac9a 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -406,7 +406,7 @@ int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp, bo
}
void thread__find_cpumode_addr_location(struct thread *thread, u64 addr,
- struct addr_location *al)
+ bool symbols, struct addr_location *al)
{
size_t i;
const u8 cpumodes[] = {
@@ -417,7 +417,11 @@ void thread__find_cpumode_addr_location(struct thread *thread, u64 addr,
};
for (i = 0; i < ARRAY_SIZE(cpumodes); i++) {
- thread__find_symbol(thread, cpumodes[i], addr, al);
+ if (symbols)
+ thread__find_symbol(thread, cpumodes[i], addr, al);
+ else
+ thread__find_map(thread, cpumodes[i], addr, al);
+
if (al->map)
break;
}
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index 6cbf6eb2812e..1fb32e7d62a4 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -122,7 +122,7 @@ struct symbol *thread__find_symbol_fb(struct thread *thread, u8 cpumode,
u64 addr, struct addr_location *al);
void thread__find_cpumode_addr_location(struct thread *thread, u64 addr,
- struct addr_location *al);
+ bool symbols, struct addr_location *al);
int thread__memcpy(struct thread *thread, struct machine *machine,
void *buf, u64 ip, int len, bool *is64bit);