summaryrefslogtreecommitdiff
path: root/tools/perf/util
diff options
context:
space:
mode:
Diffstat (limited to 'tools/perf/util')
-rw-r--r--tools/perf/util/Build3
-rwxr-xr-xtools/perf/util/PERF-VERSION-GEN10
-rw-r--r--tools/perf/util/annotate.c34
-rw-r--r--tools/perf/util/annotate.h8
-rw-r--r--tools/perf/util/auxtrace.c27
-rw-r--r--tools/perf/util/auxtrace.h4
-rw-r--r--tools/perf/util/bpf-event.c5
-rw-r--r--tools/perf/util/bpf-event.h1
-rw-r--r--tools/perf/util/bpf-loader.c24
-rw-r--r--tools/perf/util/bpf_lock_contention.c26
-rw-r--r--tools/perf/util/bpf_skel/bperf_cgroup.bpf.c29
-rw-r--r--tools/perf/util/bpf_skel/lock_contention.bpf.c5
-rw-r--r--tools/perf/util/branch.c70
-rw-r--r--tools/perf/util/branch.h7
-rw-r--r--tools/perf/util/build-id.c12
-rw-r--r--tools/perf/util/callchain.c12
-rw-r--r--tools/perf/util/config.c31
-rw-r--r--tools/perf/util/config.h1
-rw-r--r--tools/perf/util/cpumap.c39
-rw-r--r--tools/perf/util/cpumap.h2
-rw-r--r--tools/perf/util/cputopo.c61
-rw-r--r--tools/perf/util/cputopo.h5
-rw-r--r--tools/perf/util/dso.c19
-rw-r--r--tools/perf/util/dso.h4
-rw-r--r--tools/perf/util/events_stats.h1
-rw-r--r--tools/perf/util/evlist.c316
-rw-r--r--tools/perf/util/evlist.h13
-rw-r--r--tools/perf/util/evsel.c30
-rw-r--r--tools/perf/util/evsel.h1
-rw-r--r--tools/perf/util/expr.c40
-rw-r--r--tools/perf/util/expr.h25
-rw-r--r--tools/perf/util/expr.l6
-rw-r--r--tools/perf/util/expr.y2
-rw-r--r--tools/perf/util/genelf.c1
-rw-r--r--tools/perf/util/genelf.h4
-rw-r--r--tools/perf/util/header.c24
-rw-r--r--tools/perf/util/hisi-ptt-decoder/Build1
-rw-r--r--tools/perf/util/hisi-ptt-decoder/hisi-ptt-pkt-decoder.c164
-rw-r--r--tools/perf/util/hisi-ptt-decoder/hisi-ptt-pkt-decoder.h31
-rw-r--r--tools/perf/util/hisi-ptt.c192
-rw-r--r--tools/perf/util/hisi-ptt.h19
-rw-r--r--tools/perf/util/hist.c22
-rw-r--r--tools/perf/util/hist.h6
-rw-r--r--tools/perf/util/include/linux/linkage.h13
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-log.c117
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-log.h3
-rw-r--r--tools/perf/util/intel-pt.c32
-rw-r--r--tools/perf/util/jitdump.c7
-rw-r--r--tools/perf/util/lock-contention.h5
-rw-r--r--tools/perf/util/machine.c4
-rw-r--r--tools/perf/util/map.c3
-rw-r--r--tools/perf/util/mem-events.c17
-rw-r--r--tools/perf/util/metricgroup.c145
-rw-r--r--tools/perf/util/metricgroup.h4
-rw-r--r--tools/perf/util/mmap.h1
-rw-r--r--tools/perf/util/mutex.c119
-rw-r--r--tools/perf/util/mutex.h108
-rw-r--r--tools/perf/util/parse-branch-options.c5
-rw-r--r--tools/perf/util/parse-events.c11
-rw-r--r--tools/perf/util/perf_event_attr_fprintf.c4
-rw-r--r--tools/perf/util/pmu.c19
-rw-r--r--tools/perf/util/pmu.h2
-rw-r--r--tools/perf/util/pmu.l2
-rw-r--r--tools/perf/util/pmu.y17
-rw-r--r--tools/perf/util/probe-event.c3
-rw-r--r--tools/perf/util/session.c7
-rw-r--r--tools/perf/util/smt.c110
-rw-r--r--tools/perf/util/smt.h19
-rw-r--r--tools/perf/util/sort.c38
-rw-r--r--tools/perf/util/sort.h3
-rw-r--r--tools/perf/util/stat-display.c42
-rw-r--r--tools/perf/util/stat-shadow.c338
-rw-r--r--tools/perf/util/stat.c29
-rw-r--r--tools/perf/util/stat.h12
-rw-r--r--tools/perf/util/string.c1
-rw-r--r--tools/perf/util/symbol.c4
-rw-r--r--tools/perf/util/synthetic-events.c184
-rw-r--r--tools/perf/util/top.h5
78 files changed, 2087 insertions, 683 deletions
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 485e1a343165..e315ecaec323 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -118,6 +118,8 @@ perf-$(CONFIG_AUXTRACE) += intel-pt.o
perf-$(CONFIG_AUXTRACE) += intel-bts.o
perf-$(CONFIG_AUXTRACE) += arm-spe.o
perf-$(CONFIG_AUXTRACE) += arm-spe-decoder/
+perf-$(CONFIG_AUXTRACE) += hisi-ptt.o
+perf-$(CONFIG_AUXTRACE) += hisi-ptt-decoder/
perf-$(CONFIG_AUXTRACE) += s390-cpumsf.o
ifdef CONFIG_LIBOPENCSD
@@ -143,6 +145,7 @@ perf-y += branch.o
perf-y += mem2node.o
perf-y += clockid.o
perf-y += list_sort.o
+perf-y += mutex.o
perf-$(CONFIG_LIBBPF) += bpf-loader.o
perf-$(CONFIG_LIBBPF) += bpf_map.o
diff --git a/tools/perf/util/PERF-VERSION-GEN b/tools/perf/util/PERF-VERSION-GEN
index 0ee5af529238..3cc42821d9b3 100755
--- a/tools/perf/util/PERF-VERSION-GEN
+++ b/tools/perf/util/PERF-VERSION-GEN
@@ -11,7 +11,8 @@ LF='
'
#
-# Always try first to get the version from the kernel Makefile
+# Use version from kernel Makefile unless not in a git repository and
+# PERF-VERSION-FILE exists
#
CID=
TAG=
@@ -19,9 +20,14 @@ if test -d ../../.git -o -f ../../.git
then
TAG=$(MAKEFLAGS= make -sC ../.. kernelversion)
CID=$(git log -1 --abbrev=12 --pretty=format:"%h" 2>/dev/null) && CID="-g$CID"
-else
+elif test -f ../../PERF-VERSION-FILE
+then
TAG=$(cut -d' ' -f3 ../../PERF-VERSION-FILE | sed -e 's/\"//g')
fi
+if test -z "$TAG"
+then
+ TAG=$(MAKEFLAGS= make -sC ../.. kernelversion)
+fi
VN="$TAG$CID"
if test -n "$CID"
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 2c6a485c3de5..db475e44f42f 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -35,7 +35,6 @@
#include "arch/common.h"
#include "namespaces.h"
#include <regex.h>
-#include <pthread.h>
#include <linux/bitops.h>
#include <linux/kernel.h>
#include <linux/string.h>
@@ -821,7 +820,7 @@ void symbol__annotate_zero_histograms(struct symbol *sym)
{
struct annotation *notes = symbol__annotation(sym);
- pthread_mutex_lock(&notes->lock);
+ mutex_lock(&notes->lock);
if (notes->src != NULL) {
memset(notes->src->histograms, 0,
notes->src->nr_histograms * notes->src->sizeof_sym_hist);
@@ -829,7 +828,7 @@ void symbol__annotate_zero_histograms(struct symbol *sym)
memset(notes->src->cycles_hist, 0,
symbol__size(sym) * sizeof(struct cyc_hist));
}
- pthread_mutex_unlock(&notes->lock);
+ mutex_unlock(&notes->lock);
}
static int __symbol__account_cycles(struct cyc_hist *ch,
@@ -1086,7 +1085,7 @@ void annotation__compute_ipc(struct annotation *notes, size_t size)
notes->hit_insn = 0;
notes->cover_insn = 0;
- pthread_mutex_lock(&notes->lock);
+ mutex_lock(&notes->lock);
for (offset = size - 1; offset >= 0; --offset) {
struct cyc_hist *ch;
@@ -1105,7 +1104,7 @@ void annotation__compute_ipc(struct annotation *notes, size_t size)
notes->have_cycles = true;
}
}
- pthread_mutex_unlock(&notes->lock);
+ mutex_unlock(&notes->lock);
}
int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, struct perf_sample *sample,
@@ -1258,13 +1257,13 @@ int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool r
void annotation__init(struct annotation *notes)
{
- pthread_mutex_init(&notes->lock, NULL);
+ mutex_init(&notes->lock);
}
void annotation__exit(struct annotation *notes)
{
annotated_source__delete(notes->src);
- pthread_mutex_destroy(&notes->lock);
+ mutex_destroy(&notes->lock);
}
static void annotation_line__add(struct annotation_line *al, struct list_head *head)
@@ -1698,6 +1697,7 @@ fallback:
*/
__symbol__join_symfs(filename, filename_size, dso->long_name);
+ mutex_lock(&dso->lock);
if (access(filename, R_OK) && errno == ENOENT && dso->nsinfo) {
char *new_name = filename_with_chroot(dso->nsinfo->pid,
filename);
@@ -1706,6 +1706,7 @@ fallback:
free(new_name);
}
}
+ mutex_unlock(&dso->lock);
}
free(build_id_path);
@@ -2238,7 +2239,10 @@ int symbol__annotate(struct map_symbol *ms, struct evsel *evsel,
}
args.ms = *ms;
- notes->start = map__rip_2objdump(ms->map, sym->start);
+ if (notes->options && notes->options->full_addr)
+ notes->start = map__objdump_2mem(ms->map, ms->sym->start);
+ else
+ notes->start = map__rip_2objdump(ms->map, ms->sym->start);
return symbol__disassemble(sym, &args);
}
@@ -2761,6 +2765,8 @@ void annotation__update_column_widths(struct annotation *notes)
{
if (notes->options->use_offset)
notes->widths.target = notes->widths.min_addr;
+ else if (notes->options->full_addr)
+ notes->widths.target = BITS_PER_LONG / 4;
else
notes->widths.target = notes->widths.max_addr;
@@ -2770,6 +2776,18 @@ void annotation__update_column_widths(struct annotation *notes)
notes->widths.addr += notes->widths.jumps + 1;
}
+void annotation__toggle_full_addr(struct annotation *notes, struct map_symbol *ms)
+{
+ notes->options->full_addr = !notes->options->full_addr;
+
+ if (notes->options->full_addr)
+ notes->start = map__objdump_2mem(ms->map, ms->sym->start);
+ else
+ notes->start = map__rip_2objdump(ms->map, ms->sym->start);
+
+ annotation__update_column_widths(notes);
+}
+
static void annotation__calc_lines(struct annotation *notes, struct map *map,
struct rb_root *root,
struct annotation_options *opts)
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index 986f2bbe4870..8934072c39e6 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -8,9 +8,9 @@
#include <linux/types.h>
#include <linux/list.h>
#include <linux/rbtree.h>
-#include <pthread.h>
#include <asm/bug.h>
#include "symbol_conf.h"
+#include "mutex.h"
#include "spark.h"
struct hist_browser_timer;
@@ -88,7 +88,8 @@ struct annotation_options {
show_nr_jumps,
show_minmax_cycle,
show_asm_raw,
- annotate_src;
+ annotate_src,
+ full_addr;
u8 offset_level;
int min_pcnt;
int max_lines;
@@ -273,7 +274,7 @@ struct annotated_source {
};
struct annotation {
- pthread_mutex_t lock;
+ struct mutex lock;
u64 max_coverage;
u64 start;
u64 hit_cycles;
@@ -325,6 +326,7 @@ void annotation__compute_ipc(struct annotation *notes, size_t size);
void annotation__mark_jump_targets(struct annotation *notes, struct symbol *sym);
void annotation__update_column_widths(struct annotation *notes);
void annotation__init_column_widths(struct annotation *notes, struct symbol *sym);
+void annotation__toggle_full_addr(struct annotation *notes, struct map_symbol *ms);
static inline struct sym_hist *annotated_source__histogram(struct annotated_source *src, int idx)
{
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index 6edab8a16de6..46ada5ec3f9a 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -26,6 +26,7 @@
#include <linux/list.h>
#include <linux/zalloc.h>
+#include "config.h"
#include "evlist.h"
#include "dso.h"
#include "map.h"
@@ -51,6 +52,7 @@
#include "intel-pt.h"
#include "intel-bts.h"
#include "arm-spe.h"
+#include "hisi-ptt.h"
#include "s390-cpumsf.h"
#include "util/mmap.h"
@@ -1319,6 +1321,9 @@ int perf_event__process_auxtrace_info(struct perf_session *session,
case PERF_AUXTRACE_S390_CPUMSF:
err = s390_cpumsf_process_auxtrace_info(event, session);
break;
+ case PERF_AUXTRACE_HISI_PTT:
+ err = hisi_ptt_process_auxtrace_info(event, session);
+ break;
case PERF_AUXTRACE_UNKNOWN:
default:
return -EINVAL;
@@ -1434,6 +1439,16 @@ static int get_flags(const char **ptr, unsigned int *plus_flags, unsigned int *m
}
}
+#define ITRACE_DFLT_LOG_ON_ERROR_SZ 16384
+
+static unsigned int itrace_log_on_error_size(void)
+{
+ unsigned int sz = 0;
+
+ perf_config_scan("itrace.debug-log-buffer-size", "%u", &sz);
+ return sz ?: ITRACE_DFLT_LOG_ON_ERROR_SZ;
+}
+
/*
* Please check tools/perf/Documentation/perf-script.txt for information
* about the options parsed here, which is introduced after this cset,
@@ -1532,6 +1547,8 @@ int itrace_do_parse_synth_opts(struct itrace_synth_opts *synth_opts,
if (get_flags(&p, &synth_opts->log_plus_flags,
&synth_opts->log_minus_flags))
goto out_err;
+ if (synth_opts->log_plus_flags & AUXTRACE_LOG_FLG_ON_ERROR)
+ synth_opts->log_on_error_size = itrace_log_on_error_size();
break;
case 'c':
synth_opts->branches = true;
@@ -2308,11 +2325,19 @@ struct sym_args {
bool near;
};
+static bool kern_sym_name_match(const char *kname, const char *name)
+{
+ size_t n = strlen(name);
+
+ return !strcmp(kname, name) ||
+ (!strncmp(kname, name, n) && kname[n] == '\t');
+}
+
static bool kern_sym_match(struct sym_args *args, const char *name, char type)
{
/* A function with the same name, and global or the n'th found or any */
return kallsyms__is_function(type) &&
- !strcmp(name, args->name) &&
+ kern_sym_name_match(name, args->name) &&
((args->global && isupper(type)) ||
(args->selected && ++(args->cnt) == args->idx) ||
(!args->global && !args->selected));
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index 6a4fbfd34c6b..6a0f9b98f059 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h
@@ -48,6 +48,7 @@ enum auxtrace_type {
PERF_AUXTRACE_CS_ETM,
PERF_AUXTRACE_ARM_SPE,
PERF_AUXTRACE_S390_CPUMSF,
+ PERF_AUXTRACE_HISI_PTT,
};
enum itrace_period_type {
@@ -60,6 +61,7 @@ enum itrace_period_type {
#define AUXTRACE_ERR_FLG_DATA_LOST (1 << ('l' - 'a'))
#define AUXTRACE_LOG_FLG_ALL_PERF_EVTS (1 << ('a' - 'a'))
+#define AUXTRACE_LOG_FLG_ON_ERROR (1 << ('e' - 'a'))
#define AUXTRACE_LOG_FLG_USE_STDOUT (1 << ('o' - 'a'))
/**
@@ -110,6 +112,7 @@ enum itrace_period_type {
* @log_plus_flags: flags to affect what is logged
* @log_minus_flags: flags to affect what is logged
* @quick: quicker (less detailed) decoding
+ * @log_on_error_size: size of log to keep for outputting log only on errors
*/
struct itrace_synth_opts {
bool set;
@@ -155,6 +158,7 @@ struct itrace_synth_opts {
unsigned int log_plus_flags;
unsigned int log_minus_flags;
unsigned int quick;
+ unsigned int log_on_error_size;
};
/**
diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c
index eee64ddb766d..cc7c1f90cf62 100644
--- a/tools/perf/util/bpf-event.c
+++ b/tools/perf/util/bpf-event.c
@@ -36,6 +36,11 @@ struct btf *btf__load_from_kernel_by_id(__u32 id)
#endif
#ifndef HAVE_LIBBPF_BPF_PROG_LOAD
+LIBBPF_API int bpf_load_program(enum bpf_prog_type type,
+ const struct bpf_insn *insns, size_t insns_cnt,
+ const char *license, __u32 kern_version,
+ char *log_buf, size_t log_buf_sz);
+
int bpf_prog_load(enum bpf_prog_type prog_type,
const char *prog_name __maybe_unused,
const char *license,
diff --git a/tools/perf/util/bpf-event.h b/tools/perf/util/bpf-event.h
index 144a8a24cc69..1bcbd4fb6c66 100644
--- a/tools/perf/util/bpf-event.h
+++ b/tools/perf/util/bpf-event.h
@@ -4,7 +4,6 @@
#include <linux/compiler.h>
#include <linux/rbtree.h>
-#include <pthread.h>
#include <api/fd/array.h>
#include <stdio.h>
diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c
index e2052f4fed33..f4adeccdbbcb 100644
--- a/tools/perf/util/bpf-loader.c
+++ b/tools/perf/util/bpf-loader.c
@@ -27,11 +27,33 @@
#include "util.h"
#include "llvm-utils.h"
#include "c++/clang-c.h"
-#include "hashmap.h"
+#ifdef HAVE_LIBBPF_SUPPORT
+#include <bpf/hashmap.h>
+#else
+#include "util/hashmap.h"
+#endif
#include "asm/bug.h"
#include <internal/xyarray.h>
+#ifndef HAVE_LIBBPF_BPF_PROGRAM__SET_INSNS
+int bpf_program__set_insns(struct bpf_program *prog __maybe_unused,
+ struct bpf_insn *new_insns __maybe_unused, size_t new_insn_cnt __maybe_unused)
+{
+ pr_err("%s: not support, update libbpf\n", __func__);
+ return -ENOTSUP;
+}
+
+int libbpf_register_prog_handler(const char *sec __maybe_unused,
+ enum bpf_prog_type prog_type __maybe_unused,
+ enum bpf_attach_type exp_attach_type __maybe_unused,
+ const struct libbpf_prog_handler_opts *opts __maybe_unused)
+{
+ pr_err("%s: not support, update libbpf\n", __func__);
+ return -ENOTSUP;
+}
+#endif
+
/* temporarily disable libbpf deprecation warnings */
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
diff --git a/tools/perf/util/bpf_lock_contention.c b/tools/perf/util/bpf_lock_contention.c
index c591a66733ef..fc4d613cb979 100644
--- a/tools/perf/util/bpf_lock_contention.c
+++ b/tools/perf/util/bpf_lock_contention.c
@@ -8,17 +8,13 @@
#include "util/thread_map.h"
#include "util/lock-contention.h"
#include <linux/zalloc.h>
+#include <linux/string.h>
#include <bpf/bpf.h>
#include "bpf_skel/lock_contention.skel.h"
static struct lock_contention_bpf *skel;
-/* should be same as bpf_skel/lock_contention.bpf.c */
-struct lock_contention_key {
- s32 stack_id;
-};
-
struct lock_contention_data {
u64 total_time;
u64 min_time;
@@ -40,6 +36,7 @@ int lock_contention_prepare(struct lock_contention *con)
return -1;
}
+ bpf_map__set_value_size(skel->maps.stacks, con->max_stack * sizeof(u64));
bpf_map__set_max_entries(skel->maps.stacks, con->map_nr_entries);
bpf_map__set_max_entries(skel->maps.lock_stat, con->map_nr_entries);
@@ -91,6 +88,8 @@ int lock_contention_prepare(struct lock_contention *con)
bpf_map_update_elem(fd, &pid, &val, BPF_ANY);
}
+ skel->bss->stack_skip = con->stack_skip;
+
lock_contention_bpf__attach(skel);
return 0;
}
@@ -114,7 +113,7 @@ int lock_contention_read(struct lock_contention *con)
struct lock_contention_data data;
struct lock_stat *st;
struct machine *machine = con->machine;
- u64 stack_trace[CONTENTION_STACK_DEPTH];
+ u64 stack_trace[con->max_stack];
fd = bpf_map__fd(skel->maps.lock_stat);
stack = bpf_map__fd(skel->maps.stacks);
@@ -125,7 +124,7 @@ int lock_contention_read(struct lock_contention *con)
while (!bpf_map_get_next_key(fd, &prev_key, &key)) {
struct map *kmap;
struct symbol *sym;
- int idx;
+ int idx = 0;
bpf_map_lookup_elem(fd, &key, &data);
st = zalloc(sizeof(*st));
@@ -144,10 +143,9 @@ int lock_contention_read(struct lock_contention *con)
bpf_map_lookup_elem(stack, &key, stack_trace);
- /* skip BPF + lock internal functions */
- idx = CONTENTION_STACK_SKIP;
+ /* skip lock internal functions */
while (is_lock_function(machine, stack_trace[idx]) &&
- idx < CONTENTION_STACK_DEPTH - 1)
+ idx < con->max_stack - 1)
idx++;
st->addr = stack_trace[idx];
@@ -171,6 +169,14 @@ int lock_contention_read(struct lock_contention *con)
return -1;
}
+ if (verbose) {
+ st->callstack = memdup(stack_trace, sizeof(stack_trace));
+ if (st->callstack == NULL) {
+ free(st);
+ return -1;
+ }
+ }
+
hlist_add_head(&st->hash_entry, con->result);
prev_key = key;
}
diff --git a/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c b/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c
index 9aa8cdd93de4..6a438e0102c5 100644
--- a/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c
+++ b/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c
@@ -43,6 +43,18 @@ struct {
__uint(value_size, sizeof(struct bpf_perf_event_value));
} cgrp_readings SEC(".maps");
+/* new kernel cgroup definition */
+struct cgroup___new {
+ int level;
+ struct cgroup *ancestors[];
+} __attribute__((preserve_access_index));
+
+/* old kernel cgroup definition */
+struct cgroup___old {
+ int level;
+ u64 ancestor_ids[];
+} __attribute__((preserve_access_index));
+
const volatile __u32 num_events = 1;
const volatile __u32 num_cpus = 1;
@@ -50,6 +62,21 @@ int enabled = 0;
int use_cgroup_v2 = 0;
int perf_subsys_id = -1;
+static inline __u64 get_cgroup_v1_ancestor_id(struct cgroup *cgrp, int level)
+{
+ /* recast pointer to capture new type for compiler */
+ struct cgroup___new *cgrp_new = (void *)cgrp;
+
+ if (bpf_core_field_exists(cgrp_new->ancestors)) {
+ return BPF_CORE_READ(cgrp_new, ancestors[level], kn, id);
+ } else {
+ /* recast pointer to capture old type for compiler */
+ struct cgroup___old *cgrp_old = (void *)cgrp;
+
+ return BPF_CORE_READ(cgrp_old, ancestor_ids[level]);
+ }
+}
+
static inline int get_cgroup_v1_idx(__u32 *cgrps, int size)
{
struct task_struct *p = (void *)bpf_get_current_task();
@@ -77,7 +104,7 @@ static inline int get_cgroup_v1_idx(__u32 *cgrps, int size)
break;
// convert cgroup-id to a map index
- cgrp_id = BPF_CORE_READ(cgrp, ancestor_ids[i]);
+ cgrp_id = get_cgroup_v1_ancestor_id(cgrp, i);
elem = bpf_map_lookup_elem(&cgrp_idx, &cgrp_id);
if (!elem)
continue;
diff --git a/tools/perf/util/bpf_skel/lock_contention.bpf.c b/tools/perf/util/bpf_skel/lock_contention.bpf.c
index 9e8b94eb6320..1bb8628e7c9f 100644
--- a/tools/perf/util/bpf_skel/lock_contention.bpf.c
+++ b/tools/perf/util/bpf_skel/lock_contention.bpf.c
@@ -72,9 +72,10 @@ struct {
int enabled;
int has_cpu;
int has_task;
+int stack_skip;
/* error stat */
-unsigned long lost;
+int lost;
static inline int can_record(void)
{
@@ -117,7 +118,7 @@ int contention_begin(u64 *ctx)
pelem->timestamp = bpf_ktime_get_ns();
pelem->lock = (__u64)ctx[0];
pelem->flags = (__u32)ctx[1];
- pelem->stack_id = bpf_get_stackid(ctx, &stacks, BPF_F_FAST_STACK_CMP);
+ pelem->stack_id = bpf_get_stackid(ctx, &stacks, BPF_F_FAST_STACK_CMP | stack_skip);
if (pelem->stack_id < 0)
lost++;
diff --git a/tools/perf/util/branch.c b/tools/perf/util/branch.c
index a9a909db8cc7..6d38238481d3 100644
--- a/tools/perf/util/branch.c
+++ b/tools/perf/util/branch.c
@@ -21,7 +21,10 @@ void branch_type_count(struct branch_type_stat *st, struct branch_flags *flags,
if (flags->type == PERF_BR_UNKNOWN || from == 0)
return;
- st->counts[flags->type]++;
+ if (flags->type == PERF_BR_EXTEND_ABI)
+ st->new_counts[flags->new_type]++;
+ else
+ st->counts[flags->type]++;
if (flags->type == PERF_BR_COND) {
if (to > from)
@@ -36,6 +39,38 @@ void branch_type_count(struct branch_type_stat *st, struct branch_flags *flags,
st->cross_4k++;
}
+const char *branch_new_type_name(int new_type)
+{
+ const char *branch_new_names[PERF_BR_NEW_MAX] = {
+ "FAULT_ALGN",
+ "FAULT_DATA",
+ "FAULT_INST",
+/*
+ * TODO: This switch should happen on 'session->header.env.arch'
+ * instead, because an arm64 platform perf recording could be
+ * opened for analysis on other platforms as well.
+ */
+#ifdef __aarch64__
+ "ARM64_FIQ",
+ "ARM64_DEBUG_HALT",
+ "ARM64_DEBUG_EXIT",
+ "ARM64_DEBUG_INST",
+ "ARM64_DEBUG_DATA"
+#else
+ "ARCH_1",
+ "ARCH_2",
+ "ARCH_3",
+ "ARCH_4",
+ "ARCH_5"
+#endif
+ };
+
+ if (new_type >= 0 && new_type < PERF_BR_NEW_MAX)
+ return branch_new_names[new_type];
+
+ return NULL;
+}
+
const char *branch_type_name(int type)
{
const char *branch_names[PERF_BR_MAX] = {
@@ -51,7 +86,10 @@ const char *branch_type_name(int type)
"COND_CALL",
"COND_RET",
"ERET",
- "IRQ"
+ "IRQ",
+ "SERROR",
+ "NO_TX",
+ "", // Needed for PERF_BR_EXTEND_ABI that ends up triggering some compiler warnings about NULL deref
};
if (type >= 0 && type < PERF_BR_MAX)
@@ -60,6 +98,17 @@ const char *branch_type_name(int type)
return NULL;
}
+const char *get_branch_type(struct branch_entry *e)
+{
+ if (e->flags.type == PERF_BR_UNKNOWN)
+ return "";
+
+ if (e->flags.type == PERF_BR_EXTEND_ABI)
+ return branch_new_type_name(e->flags.new_type);
+
+ return branch_type_name(e->flags.type);
+}
+
void branch_type_stat_display(FILE *fp, struct branch_type_stat *st)
{
u64 total = 0;
@@ -106,6 +155,15 @@ void branch_type_stat_display(FILE *fp, struct branch_type_stat *st)
100.0 *
(double)st->counts[i] / (double)total);
}
+
+ for (i = 0; i < PERF_BR_NEW_MAX; i++) {
+ if (st->new_counts[i] > 0)
+ fprintf(fp, "\n%8s: %5.1f%%",
+ branch_new_type_name(i),
+ 100.0 *
+ (double)st->new_counts[i] / (double)total);
+ }
+
}
static int count_str_scnprintf(int idx, const char *str, char *bf, int size)
@@ -121,6 +179,9 @@ int branch_type_str(struct branch_type_stat *st, char *bf, int size)
for (i = 0; i < PERF_BR_MAX; i++)
total += st->counts[i];
+ for (i = 0; i < PERF_BR_NEW_MAX; i++)
+ total += st->new_counts[i];
+
if (total == 0)
return 0;
@@ -138,6 +199,11 @@ int branch_type_str(struct branch_type_stat *st, char *bf, int size)
printed += count_str_scnprintf(j++, branch_type_name(i), bf + printed, size - printed);
}
+ for (i = 0; i < PERF_BR_NEW_MAX; i++) {
+ if (st->new_counts[i] > 0)
+ printed += count_str_scnprintf(j++, branch_new_type_name(i), bf + printed, size - printed);
+ }
+
if (st->cross_4k > 0)
printed += count_str_scnprintf(j++, "CROSS_4K", bf + printed, size - printed);
diff --git a/tools/perf/util/branch.h b/tools/perf/util/branch.h
index 17b2ccc61094..f838b23db180 100644
--- a/tools/perf/util/branch.h
+++ b/tools/perf/util/branch.h
@@ -24,7 +24,9 @@ struct branch_flags {
u64 abort:1;
u64 cycles:16;
u64 type:4;
- u64 reserved:40;
+ u64 new_type:4;
+ u64 priv:3;
+ u64 reserved:33;
};
};
};
@@ -72,6 +74,7 @@ static inline struct branch_entry *perf_sample__branch_entries(struct perf_sampl
struct branch_type_stat {
bool branch_to;
u64 counts[PERF_BR_MAX];
+ u64 new_counts[PERF_BR_NEW_MAX];
u64 cond_fwd;
u64 cond_bwd;
u64 cross_4k;
@@ -82,6 +85,8 @@ void branch_type_count(struct branch_type_stat *st, struct branch_flags *flags,
u64 from, u64 to);
const char *branch_type_name(int type);
+const char *branch_new_type_name(int new_type);
+const char *get_branch_type(struct branch_entry *e);
void branch_type_stat_display(FILE *fp, struct branch_type_stat *st);
int branch_type_str(struct branch_type_stat *st, char *bf, int bfsize);
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index ec18ed5caf3e..a839b30c981b 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -898,11 +898,15 @@ static int filename__read_build_id_ns(const char *filename,
static bool dso__build_id_mismatch(struct dso *dso, const char *name)
{
struct build_id bid;
+ bool ret = false;
- if (filename__read_build_id_ns(name, &bid, dso->nsinfo) < 0)
- return false;
+ mutex_lock(&dso->lock);
+ if (filename__read_build_id_ns(name, &bid, dso->nsinfo) >= 0)
+ ret = !dso__build_id_equal(dso, &bid);
- return !dso__build_id_equal(dso, &bid);
+ mutex_unlock(&dso->lock);
+
+ return ret;
}
static int dso__cache_build_id(struct dso *dso, struct machine *machine,
@@ -941,8 +945,10 @@ static int dso__cache_build_id(struct dso *dso, struct machine *machine,
if (!is_kallsyms && dso__build_id_mismatch(dso, name))
goto out_free;
+ mutex_lock(&dso->lock);
ret = build_id_cache__add_b(&dso->bid, name, dso->nsinfo,
is_kallsyms, is_vdso, proper_name, root_dir);
+ mutex_unlock(&dso->lock);
out_free:
free(allocated_name);
return ret;
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 7e663673f79f..a093a15f048f 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -1307,24 +1307,16 @@ int callchain_branch_counts(struct callchain_root *root,
static int count_pri64_printf(int idx, const char *str, u64 value, char *bf, int bfsize)
{
- int printed;
-
- printed = scnprintf(bf, bfsize, "%s%s:%" PRId64 "", (idx) ? " " : " (", str, value);
-
- return printed;
+ return scnprintf(bf, bfsize, "%s%s:%" PRId64 "", (idx) ? " " : " (", str, value);
}
static int count_float_printf(int idx, const char *str, float value,
char *bf, int bfsize, float threshold)
{
- int printed;
-
if (threshold != 0.0 && value < threshold)
return 0;
- printed = scnprintf(bf, bfsize, "%s%s:%.1f%%", (idx) ? " " : " (", str, value);
-
- return printed;
+ return scnprintf(bf, bfsize, "%s%s:%.1f%%", (idx) ? " " : " (", str, value);
}
static int branch_to_str(char *bf, int bfsize,
diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c
index 60ce5908c664..3f2ae19a1dd4 100644
--- a/tools/perf/util/config.c
+++ b/tools/perf/util/config.c
@@ -908,3 +908,34 @@ void set_buildid_dir(const char *dir)
/* for communicating with external commands */
setenv("PERF_BUILDID_DIR", buildid_dir, 1);
}
+
+struct perf_config_scan_data {
+ const char *name;
+ const char *fmt;
+ va_list args;
+ int ret;
+};
+
+static int perf_config_scan_cb(const char *var, const char *value, void *data)
+{
+ struct perf_config_scan_data *d = data;
+
+ if (!strcmp(var, d->name))
+ d->ret = vsscanf(value, d->fmt, d->args);
+
+ return 0;
+}
+
+int perf_config_scan(const char *name, const char *fmt, ...)
+{
+ struct perf_config_scan_data d = {
+ .name = name,
+ .fmt = fmt,
+ };
+
+ va_start(d.args, fmt);
+ perf_config(perf_config_scan_cb, &d);
+ va_end(d.args);
+
+ return d.ret;
+}
diff --git a/tools/perf/util/config.h b/tools/perf/util/config.h
index 2fd77aaff4d2..2e5e808928a5 100644
--- a/tools/perf/util/config.h
+++ b/tools/perf/util/config.h
@@ -29,6 +29,7 @@ typedef int (*config_fn_t)(const char *, const char *, void *);
int perf_default_config(const char *, const char *, void *);
int perf_config(config_fn_t fn, void *);
+int perf_config_scan(const char *name, const char *fmt, ...) __scanf(2, 3);
int perf_config_set(struct perf_config_set *set,
config_fn_t fn, void *data);
int perf_config_int(int *dest, const char *, const char *);
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index ae43fb88f444..8486ca3bec75 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -112,12 +112,39 @@ static struct perf_cpu_map *cpu_map__from_mask(const struct perf_record_cpu_map_
}
+static struct perf_cpu_map *cpu_map__from_range(const struct perf_record_cpu_map_data *data)
+{
+ struct perf_cpu_map *map;
+ unsigned int i = 0;
+
+ map = perf_cpu_map__empty_new(data->range_cpu_data.end_cpu -
+ data->range_cpu_data.start_cpu + 1 + data->range_cpu_data.any_cpu);
+ if (!map)
+ return NULL;
+
+ if (data->range_cpu_data.any_cpu)
+ map->map[i++].cpu = -1;
+
+ for (int cpu = data->range_cpu_data.start_cpu; cpu <= data->range_cpu_data.end_cpu;
+ i++, cpu++)
+ map->map[i].cpu = cpu;
+
+ return map;
+}
+
struct perf_cpu_map *cpu_map__new_data(const struct perf_record_cpu_map_data *data)
{
- if (data->type == PERF_CPU_MAP__CPUS)
+ switch (data->type) {
+ case PERF_CPU_MAP__CPUS:
return cpu_map__from_entries(data);
- else
+ case PERF_CPU_MAP__MASK:
return cpu_map__from_mask(data);
+ case PERF_CPU_MAP__RANGE_CPUS:
+ return cpu_map__from_range(data);
+ default:
+ pr_err("cpu_map__new_data unknown type %d\n", data->type);
+ return NULL;
+ }
}
size_t cpu_map__fprintf(struct perf_cpu_map *map, FILE *fp)
@@ -202,7 +229,7 @@ static int aggr_cpu_id__cmp(const void *a_pointer, const void *b_pointer)
else if (a->core != b->core)
return a->core - b->core;
else
- return a->thread - b->thread;
+ return a->thread_idx - b->thread_idx;
}
struct cpu_aggr_map *cpu_aggr_map__new(const struct perf_cpu_map *cpus,
@@ -640,7 +667,7 @@ const struct perf_cpu_map *cpu_map__online(void) /* thread unsafe */
bool aggr_cpu_id__equal(const struct aggr_cpu_id *a, const struct aggr_cpu_id *b)
{
- return a->thread == b->thread &&
+ return a->thread_idx == b->thread_idx &&
a->node == b->node &&
a->socket == b->socket &&
a->die == b->die &&
@@ -650,7 +677,7 @@ bool aggr_cpu_id__equal(const struct aggr_cpu_id *a, const struct aggr_cpu_id *b
bool aggr_cpu_id__is_empty(const struct aggr_cpu_id *a)
{
- return a->thread == -1 &&
+ return a->thread_idx == -1 &&
a->node == -1 &&
a->socket == -1 &&
a->die == -1 &&
@@ -661,7 +688,7 @@ bool aggr_cpu_id__is_empty(const struct aggr_cpu_id *a)
struct aggr_cpu_id aggr_cpu_id__empty(void)
{
struct aggr_cpu_id ret = {
- .thread = -1,
+ .thread_idx = -1,
.node = -1,
.socket = -1,
.die = -1,
diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h
index fa8a5acdcae1..4a6d029576ee 100644
--- a/tools/perf/util/cpumap.h
+++ b/tools/perf/util/cpumap.h
@@ -10,7 +10,7 @@
/** Identify where counts are aggregated, -1 implies not to aggregate. */
struct aggr_cpu_id {
/** A value in the range 0 to number of threads. */
- int thread;
+ int thread_idx;
/** The numa node X as read from /sys/devices/system/node/nodeX. */
int node;
/**
diff --git a/tools/perf/util/cputopo.c b/tools/perf/util/cputopo.c
index d275d843c155..1a3ff6449158 100644
--- a/tools/perf/util/cputopo.c
+++ b/tools/perf/util/cputopo.c
@@ -157,6 +157,67 @@ void cpu_topology__delete(struct cpu_topology *tp)
free(tp);
}
+bool cpu_topology__smt_on(const struct cpu_topology *topology)
+{
+ for (u32 i = 0; i < topology->core_cpus_lists; i++) {
+ const char *cpu_list = topology->core_cpus_list[i];
+
+ /*
+ * If there is a need to separate siblings in a core then SMT is
+ * enabled.
+ */
+ if (strchr(cpu_list, ',') || strchr(cpu_list, '-'))
+ return true;
+ }
+ return false;
+}
+
+bool cpu_topology__core_wide(const struct cpu_topology *topology,
+ const char *user_requested_cpu_list)
+{
+ struct perf_cpu_map *user_requested_cpus;
+
+ /*
+ * If user_requested_cpu_list is empty then all CPUs are recorded and so
+ * core_wide is true.
+ */
+ if (!user_requested_cpu_list)
+ return true;
+
+ user_requested_cpus = perf_cpu_map__new(user_requested_cpu_list);
+ /* Check that every user requested CPU is the complete set of SMT threads on a core. */
+ for (u32 i = 0; i < topology->core_cpus_lists; i++) {
+ const char *core_cpu_list = topology->core_cpus_list[i];
+ struct perf_cpu_map *core_cpus = perf_cpu_map__new(core_cpu_list);
+ struct perf_cpu cpu;
+ int idx;
+ bool has_first, first = true;
+
+ perf_cpu_map__for_each_cpu(cpu, idx, core_cpus) {
+ if (first) {
+ has_first = perf_cpu_map__has(user_requested_cpus, cpu);
+ first = false;
+ } else {
+ /*
+ * If the first core CPU is user requested then
+ * all subsequent CPUs in the core must be user
+ * requested too. If the first CPU isn't user
+ * requested then none of the others must be
+ * too.
+ */
+ if (perf_cpu_map__has(user_requested_cpus, cpu) != has_first) {
+ perf_cpu_map__put(core_cpus);
+ perf_cpu_map__put(user_requested_cpus);
+ return false;
+ }
+ }
+ }
+ perf_cpu_map__put(core_cpus);
+ }
+ perf_cpu_map__put(user_requested_cpus);
+ return true;
+}
+
static bool has_die_topology(void)
{
char filename[MAXPATHLEN];
diff --git a/tools/perf/util/cputopo.h b/tools/perf/util/cputopo.h
index 854e18f9041e..969e5920a00e 100644
--- a/tools/perf/util/cputopo.h
+++ b/tools/perf/util/cputopo.h
@@ -58,6 +58,11 @@ struct hybrid_topology {
struct cpu_topology *cpu_topology__new(void);
void cpu_topology__delete(struct cpu_topology *tp);
+/* Determine from the core list whether SMT was enabled. */
+bool cpu_topology__smt_on(const struct cpu_topology *topology);
+/* Are the sets of SMT siblings all enabled or all disabled in user_requested_cpus. */
+bool cpu_topology__core_wide(const struct cpu_topology *topology,
+ const char *user_requested_cpu_list);
struct numa_topology *numa_topology__new(void);
void numa_topology__delete(struct numa_topology *tp);
diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index 5ac13958d1bd..f1a14c0ad26d 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -501,6 +501,7 @@ static int __open_dso(struct dso *dso, struct machine *machine)
if (!name)
return -ENOMEM;
+ mutex_lock(&dso->lock);
if (machine)
root_dir = machine->root_dir;
@@ -541,6 +542,7 @@ static int __open_dso(struct dso *dso, struct machine *machine)
unlink(name);
out:
+ mutex_unlock(&dso->lock);
free(name);
return fd;
}
@@ -559,8 +561,11 @@ static int open_dso(struct dso *dso, struct machine *machine)
int fd;
struct nscookie nsc;
- if (dso->binary_type != DSO_BINARY_TYPE__BUILD_ID_CACHE)
+ if (dso->binary_type != DSO_BINARY_TYPE__BUILD_ID_CACHE) {
+ mutex_lock(&dso->lock);
nsinfo__mountns_enter(dso->nsinfo, &nsc);
+ mutex_unlock(&dso->lock);
+ }
fd = __open_dso(dso, machine);
if (dso->binary_type != DSO_BINARY_TYPE__BUILD_ID_CACHE)
nsinfo__mountns_exit(&nsc);
@@ -795,7 +800,7 @@ dso_cache__free(struct dso *dso)
struct rb_root *root = &dso->data.cache;
struct rb_node *next = rb_first(root);
- pthread_mutex_lock(&dso->lock);
+ mutex_lock(&dso->lock);
while (next) {
struct dso_cache *cache;
@@ -804,7 +809,7 @@ dso_cache__free(struct dso *dso)
rb_erase(&cache->rb_node, root);
free(cache);
}
- pthread_mutex_unlock(&dso->lock);
+ mutex_unlock(&dso->lock);
}
static struct dso_cache *__dso_cache__find(struct dso *dso, u64 offset)
@@ -841,7 +846,7 @@ dso_cache__insert(struct dso *dso, struct dso_cache *new)
struct dso_cache *cache;
u64 offset = new->offset;
- pthread_mutex_lock(&dso->lock);
+ mutex_lock(&dso->lock);
while (*p != NULL) {
u64 end;
@@ -862,7 +867,7 @@ dso_cache__insert(struct dso *dso, struct dso_cache *new)
cache = NULL;
out:
- pthread_mutex_unlock(&dso->lock);
+ mutex_unlock(&dso->lock);
return cache;
}
@@ -1297,7 +1302,7 @@ struct dso *dso__new_id(const char *name, struct dso_id *id)
dso->root = NULL;
INIT_LIST_HEAD(&dso->node);
INIT_LIST_HEAD(&dso->data.open_entry);
- pthread_mutex_init(&dso->lock, NULL);
+ mutex_init(&dso->lock);
refcount_set(&dso->refcnt, 1);
}
@@ -1336,7 +1341,7 @@ void dso__delete(struct dso *dso)
dso__free_a2l(dso);
zfree(&dso->symsrc_filename);
nsinfo__zput(dso->nsinfo);
- pthread_mutex_destroy(&dso->lock);
+ mutex_destroy(&dso->lock);
free(dso);
}
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
index 66981c7a9a18..58d94175e714 100644
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h
@@ -2,7 +2,6 @@
#ifndef __PERF_DSO
#define __PERF_DSO
-#include <pthread.h>
#include <linux/refcount.h>
#include <linux/types.h>
#include <linux/rbtree.h>
@@ -11,6 +10,7 @@
#include <stdio.h>
#include <linux/bitops.h>
#include "build-id.h"
+#include "mutex.h"
struct machine;
struct map;
@@ -145,7 +145,7 @@ struct dso_cache {
struct auxtrace_cache;
struct dso {
- pthread_mutex_t lock;
+ struct mutex lock;
struct list_head node;
struct rb_node rb_node; /* rbtree node sorted by long name */
struct rb_root *root; /* root of rbtree that rb_node is in */
diff --git a/tools/perf/util/events_stats.h b/tools/perf/util/events_stats.h
index 040ab9d0a803..8fecc9fbaecc 100644
--- a/tools/perf/util/events_stats.h
+++ b/tools/perf/util/events_stats.h
@@ -47,6 +47,7 @@ struct hists_stats {
u64 total_non_filtered_period;
u32 nr_samples;
u32 nr_non_filtered_samples;
+ u32 nr_lost_samples;
};
void events_stats__inc(struct events_stats *stats, u32 type);
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 48167f3941a6..6612b00949e7 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -15,6 +15,7 @@
#include "target.h"
#include "evlist.h"
#include "evsel.h"
+#include "record.h"
#include "debug.h"
#include "units.h"
#include "bpf_counter.h"
@@ -40,12 +41,14 @@
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <sys/prctl.h>
+#include <sys/timerfd.h>
#include <linux/bitops.h>
#include <linux/hash.h>
#include <linux/log2.h>
#include <linux/err.h>
#include <linux/string.h>
+#include <linux/time64.h>
#include <linux/zalloc.h>
#include <perf/evlist.h>
#include <perf/evsel.h>
@@ -147,6 +150,7 @@ static void evlist__purge(struct evlist *evlist)
void evlist__exit(struct evlist *evlist)
{
+ event_enable_timer__exit(&evlist->eet);
zfree(&evlist->mmap);
zfree(&evlist->overwrite_mmap);
perf_evlist__exit(&evlist->core);
@@ -264,28 +268,6 @@ int evlist__add_dummy(struct evlist *evlist)
return 0;
}
-static void evlist__add_on_all_cpus(struct evlist *evlist, struct evsel *evsel)
-{
- evsel->core.system_wide = true;
-
- /*
- * All CPUs.
- *
- * Note perf_event_open() does not accept CPUs that are not online, so
- * in fact this CPU list will include only all online CPUs.
- */
- perf_cpu_map__put(evsel->core.own_cpus);
- evsel->core.own_cpus = perf_cpu_map__new(NULL);
- perf_cpu_map__put(evsel->core.cpus);
- evsel->core.cpus = perf_cpu_map__get(evsel->core.own_cpus);
-
- /* No threads */
- perf_thread_map__put(evsel->core.threads);
- evsel->core.threads = perf_thread_map__new_dummy();
-
- evlist__add(evlist, evsel);
-}
-
struct evsel *evlist__add_aux_dummy(struct evlist *evlist, bool system_wide)
{
struct evsel *evsel = evlist__dummy_event(evlist);
@@ -298,17 +280,31 @@ struct evsel *evlist__add_aux_dummy(struct evlist *evlist, bool system_wide)
evsel->core.attr.exclude_hv = 1;
evsel->core.attr.freq = 0;
evsel->core.attr.sample_period = 1;
+ evsel->core.system_wide = system_wide;
evsel->no_aux_samples = true;
evsel->name = strdup("dummy:u");
- if (system_wide)
- evlist__add_on_all_cpus(evlist, evsel);
- else
- evlist__add(evlist, evsel);
-
+ evlist__add(evlist, evsel);
return evsel;
}
+struct evsel *evlist__add_sched_switch(struct evlist *evlist, bool system_wide)
+{
+ struct evsel *evsel = evsel__newtp_idx("sched", "sched_switch", 0);
+
+ if (IS_ERR(evsel))
+ return evsel;
+
+ evsel__set_sample_bit(evsel, CPU);
+ evsel__set_sample_bit(evsel, TIME);
+
+ evsel->core.system_wide = system_wide;
+ evsel->no_aux_samples = true;
+
+ evlist__add(evlist, evsel);
+ return evsel;
+};
+
int evlist__add_attrs(struct evlist *evlist, struct perf_event_attr *attrs, size_t nr_attrs)
{
struct evsel *evsel, *n;
@@ -480,7 +476,7 @@ static int evlist__is_enabled(struct evlist *evlist)
return false;
}
-static void __evlist__disable(struct evlist *evlist, char *evsel_name)
+static void __evlist__disable(struct evlist *evlist, char *evsel_name, bool excl_dummy)
{
struct evsel *pos;
struct evlist_cpu_iterator evlist_cpu_itr;
@@ -502,6 +498,8 @@ static void __evlist__disable(struct evlist *evlist, char *evsel_name)
continue;
if (pos->disabled || !evsel__is_group_leader(pos) || !pos->core.fd)
continue;
+ if (excl_dummy && evsel__is_dummy_event(pos))
+ continue;
if (pos->immediate)
has_imm = true;
if (pos->immediate != imm)
@@ -518,6 +516,8 @@ static void __evlist__disable(struct evlist *evlist, char *evsel_name)
continue;
if (!evsel__is_group_leader(pos) || !pos->core.fd)
continue;
+ if (excl_dummy && evsel__is_dummy_event(pos))
+ continue;
pos->disabled = true;
}
@@ -533,15 +533,20 @@ static void __evlist__disable(struct evlist *evlist, char *evsel_name)
void evlist__disable(struct evlist *evlist)
{
- __evlist__disable(evlist, NULL);
+ __evlist__disable(evlist, NULL, false);
+}
+
+void evlist__disable_non_dummy(struct evlist *evlist)
+{
+ __evlist__disable(evlist, NULL, true);
}
void evlist__disable_evsel(struct evlist *evlist, char *evsel_name)
{
- __evlist__disable(evlist, evsel_name);
+ __evlist__disable(evlist, evsel_name, false);
}
-static void __evlist__enable(struct evlist *evlist, char *evsel_name)
+static void __evlist__enable(struct evlist *evlist, char *evsel_name, bool excl_dummy)
{
struct evsel *pos;
struct evlist_cpu_iterator evlist_cpu_itr;
@@ -560,6 +565,8 @@ static void __evlist__enable(struct evlist *evlist, char *evsel_name)
continue;
if (!evsel__is_group_leader(pos) || !pos->core.fd)
continue;
+ if (excl_dummy && evsel__is_dummy_event(pos))
+ continue;
evsel__enable_cpu(pos, evlist_cpu_itr.cpu_map_idx);
}
affinity__cleanup(affinity);
@@ -568,6 +575,8 @@ static void __evlist__enable(struct evlist *evlist, char *evsel_name)
continue;
if (!evsel__is_group_leader(pos) || !pos->core.fd)
continue;
+ if (excl_dummy && evsel__is_dummy_event(pos))
+ continue;
pos->disabled = false;
}
@@ -581,12 +590,17 @@ static void __evlist__enable(struct evlist *evlist, char *evsel_name)
void evlist__enable(struct evlist *evlist)
{
- __evlist__enable(evlist, NULL);
+ __evlist__enable(evlist, NULL, false);
+}
+
+void evlist__enable_non_dummy(struct evlist *evlist)
+{
+ __evlist__enable(evlist, NULL, true);
}
void evlist__enable_evsel(struct evlist *evlist, char *evsel_name)
{
- __evlist__enable(evlist, evsel_name);
+ __evlist__enable(evlist, evsel_name, false);
}
void evlist__toggle_enable(struct evlist *evlist)
@@ -608,7 +622,8 @@ int evlist__filter_pollfd(struct evlist *evlist, short revents_and_mask)
int evlist__add_wakeup_eventfd(struct evlist *evlist, int fd)
{
return perf_evlist__add_pollfd(&evlist->core, fd, NULL, POLLIN,
- fdarray_flag__nonfilterable);
+ fdarray_flag__nonfilterable |
+ fdarray_flag__non_perf_event);
}
#endif
@@ -1897,7 +1912,8 @@ int evlist__initialize_ctlfd(struct evlist *evlist, int fd, int ack)
}
evlist->ctl_fd.pos = perf_evlist__add_pollfd(&evlist->core, fd, NULL, POLLIN,
- fdarray_flag__nonfilterable);
+ fdarray_flag__nonfilterable |
+ fdarray_flag__non_perf_event);
if (evlist->ctl_fd.pos < 0) {
evlist->ctl_fd.pos = -1;
pr_err("Failed to add ctl fd entry: %m\n");
@@ -2147,20 +2163,234 @@ int evlist__ctlfd_process(struct evlist *evlist, enum evlist_ctl_cmd *cmd)
return err;
}
-int evlist__ctlfd_update(struct evlist *evlist, struct pollfd *update)
+/**
+ * struct event_enable_time - perf record -D/--delay single time range.
+ * @start: start of time range to enable events in milliseconds
+ * @end: end of time range to enable events in milliseconds
+ *
+ * N.B. this structure is also accessed as an array of int.
+ */
+struct event_enable_time {
+ int start;
+ int end;
+};
+
+static int parse_event_enable_time(const char *str, struct event_enable_time *range, bool first)
{
- int ctlfd_pos = evlist->ctl_fd.pos;
- struct pollfd *entries = evlist->core.pollfd.entries;
+ const char *fmt = first ? "%u - %u %n" : " , %u - %u %n";
+ int ret, start, end, n;
- if (!evlist__ctlfd_initialized(evlist))
+ ret = sscanf(str, fmt, &start, &end, &n);
+ if (ret != 2 || end <= start)
+ return -EINVAL;
+ if (range) {
+ range->start = start;
+ range->end = end;
+ }
+ return n;
+}
+
+static ssize_t parse_event_enable_times(const char *str, struct event_enable_time *range)
+{
+ int incr = !!range;
+ bool first = true;
+ ssize_t ret, cnt;
+
+ for (cnt = 0; *str; cnt++) {
+ ret = parse_event_enable_time(str, range, first);
+ if (ret < 0)
+ return ret;
+ /* Check no overlap */
+ if (!first && range && range->start <= range[-1].end)
+ return -EINVAL;
+ str += ret;
+ range += incr;
+ first = false;
+ }
+ return cnt;
+}
+
+/**
+ * struct event_enable_timer - control structure for perf record -D/--delay.
+ * @evlist: event list
+ * @times: time ranges that events are enabled (N.B. this is also accessed as an
+ * array of int)
+ * @times_cnt: number of time ranges
+ * @timerfd: timer file descriptor
+ * @pollfd_pos: position in @evlist array of file descriptors to poll (fdarray)
+ * @times_step: current position in (int *)@times)[],
+ * refer event_enable_timer__process()
+ *
+ * Note, this structure is only used when there are time ranges, not when there
+ * is only an initial delay.
+ */
+struct event_enable_timer {
+ struct evlist *evlist;
+ struct event_enable_time *times;
+ size_t times_cnt;
+ int timerfd;
+ int pollfd_pos;
+ size_t times_step;
+};
+
+static int str_to_delay(const char *str)
+{
+ char *endptr;
+ long d;
+
+ d = strtol(str, &endptr, 10);
+ if (*endptr || d > INT_MAX || d < -1)
return 0;
+ return d;
+}
- if (entries[ctlfd_pos].fd != update->fd ||
- entries[ctlfd_pos].events != update->events)
- return -1;
+int evlist__parse_event_enable_time(struct evlist *evlist, struct record_opts *opts,
+ const char *str, int unset)
+{
+ enum fdarray_flags flags = fdarray_flag__nonfilterable | fdarray_flag__non_perf_event;
+ struct event_enable_timer *eet;
+ ssize_t times_cnt;
+ ssize_t ret;
+ int err;
+
+ if (unset)
+ return 0;
+
+ opts->initial_delay = str_to_delay(str);
+ if (opts->initial_delay)
+ return 0;
+
+ ret = parse_event_enable_times(str, NULL);
+ if (ret < 0)
+ return ret;
+
+ times_cnt = ret;
+ if (times_cnt == 0)
+ return -EINVAL;
+
+ eet = zalloc(sizeof(*eet));
+ if (!eet)
+ return -ENOMEM;
+
+ eet->times = calloc(times_cnt, sizeof(*eet->times));
+ if (!eet->times) {
+ err = -ENOMEM;
+ goto free_eet;
+ }
+
+ if (parse_event_enable_times(str, eet->times) != times_cnt) {
+ err = -EINVAL;
+ goto free_eet_times;
+ }
+
+ eet->times_cnt = times_cnt;
+
+ eet->timerfd = timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC);
+ if (eet->timerfd == -1) {
+ err = -errno;
+ pr_err("timerfd_create failed: %s\n", strerror(errno));
+ goto free_eet_times;
+ }
+
+ eet->pollfd_pos = perf_evlist__add_pollfd(&evlist->core, eet->timerfd, NULL, POLLIN, flags);
+ if (eet->pollfd_pos < 0) {
+ err = eet->pollfd_pos;
+ goto close_timerfd;
+ }
+
+ eet->evlist = evlist;
+ evlist->eet = eet;
+ opts->initial_delay = eet->times[0].start;
- entries[ctlfd_pos].revents = update->revents;
return 0;
+
+close_timerfd:
+ close(eet->timerfd);
+free_eet_times:
+ free(eet->times);
+free_eet:
+ free(eet);
+ return err;
+}
+
+static int event_enable_timer__set_timer(struct event_enable_timer *eet, int ms)
+{
+ struct itimerspec its = {
+ .it_value.tv_sec = ms / MSEC_PER_SEC,
+ .it_value.tv_nsec = (ms % MSEC_PER_SEC) * NSEC_PER_MSEC,
+ };
+ int err = 0;
+
+ if (timerfd_settime(eet->timerfd, 0, &its, NULL) < 0) {
+ err = -errno;
+ pr_err("timerfd_settime failed: %s\n", strerror(errno));
+ }
+ return err;
+}
+
+int event_enable_timer__start(struct event_enable_timer *eet)
+{
+ int ms;
+
+ if (!eet)
+ return 0;
+
+ ms = eet->times[0].end - eet->times[0].start;
+ eet->times_step = 1;
+
+ return event_enable_timer__set_timer(eet, ms);
+}
+
+int event_enable_timer__process(struct event_enable_timer *eet)
+{
+ struct pollfd *entries;
+ short revents;
+
+ if (!eet)
+ return 0;
+
+ entries = eet->evlist->core.pollfd.entries;
+ revents = entries[eet->pollfd_pos].revents;
+ entries[eet->pollfd_pos].revents = 0;
+
+ if (revents & POLLIN) {
+ size_t step = eet->times_step;
+ size_t pos = step / 2;
+
+ if (step & 1) {
+ evlist__disable_non_dummy(eet->evlist);
+ pr_info(EVLIST_DISABLED_MSG);
+ if (pos >= eet->times_cnt - 1) {
+ /* Disarm timer */
+ event_enable_timer__set_timer(eet, 0);
+ return 1; /* Stop */
+ }
+ } else {
+ evlist__enable_non_dummy(eet->evlist);
+ pr_info(EVLIST_ENABLED_MSG);
+ }
+
+ step += 1;
+ pos = step / 2;
+
+ if (pos < eet->times_cnt) {
+ int *times = (int *)eet->times; /* Accessing 'times' as array of int */
+ int ms = times[step] - times[step - 1];
+
+ eet->times_step = step;
+ return event_enable_timer__set_timer(eet, ms);
+ }
+ }
+
+ return 0;
+}
+
+void event_enable_timer__exit(struct event_enable_timer **ep)
+{
+ if (!ep || !*ep)
+ return;
+ free((*ep)->times);
+ zfree(ep);
}
struct evsel *evlist__find_evsel(struct evlist *evlist, int idx)
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 351ba2887a79..16734c6756b3 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -48,6 +48,8 @@ enum bkw_mmap_state {
BKW_MMAP_EMPTY,
};
+struct event_enable_timer;
+
struct evlist {
struct perf_evlist core;
bool enabled;
@@ -79,6 +81,7 @@ struct evlist {
int ack; /* ack file descriptor for control commands */
int pos; /* index at evlist core object to check signals */
} ctl_fd;
+ struct event_enable_timer *eet;
};
struct evsel_str_handler {
@@ -124,6 +127,7 @@ static inline struct evsel *evlist__add_dummy_on_all_cpus(struct evlist *evlist)
{
return evlist__add_aux_dummy(evlist, true);
}
+struct evsel *evlist__add_sched_switch(struct evlist *evlist, bool system_wide);
int evlist__add_sb_event(struct evlist *evlist, struct perf_event_attr *attr,
evsel__sb_cb_t cb, void *data);
@@ -205,6 +209,8 @@ void evlist__enable(struct evlist *evlist);
void evlist__toggle_enable(struct evlist *evlist);
void evlist__disable_evsel(struct evlist *evlist, char *evsel_name);
void evlist__enable_evsel(struct evlist *evlist, char *evsel_name);
+void evlist__disable_non_dummy(struct evlist *evlist);
+void evlist__enable_non_dummy(struct evlist *evlist);
void evlist__set_selected(struct evlist *evlist, struct evsel *evsel);
@@ -418,13 +424,18 @@ void evlist__close_control(int ctl_fd, int ctl_fd_ack, bool *ctl_fd_close);
int evlist__initialize_ctlfd(struct evlist *evlist, int ctl_fd, int ctl_fd_ack);
int evlist__finalize_ctlfd(struct evlist *evlist);
bool evlist__ctlfd_initialized(struct evlist *evlist);
-int evlist__ctlfd_update(struct evlist *evlist, struct pollfd *update);
int evlist__ctlfd_process(struct evlist *evlist, enum evlist_ctl_cmd *cmd);
int evlist__ctlfd_ack(struct evlist *evlist);
#define EVLIST_ENABLED_MSG "Events enabled\n"
#define EVLIST_DISABLED_MSG "Events disabled\n"
+int evlist__parse_event_enable_time(struct evlist *evlist, struct record_opts *opts,
+ const char *str, int unset);
+int event_enable_timer__start(struct event_enable_timer *eet);
+void event_enable_timer__exit(struct event_enable_timer **ep);
+int event_enable_timer__process(struct event_enable_timer *eet);
+
struct evsel *evlist__find_evsel(struct evlist *evlist, int idx);
int evlist__scnprintf_evsels(struct evlist *evlist, size_t size, char *bf);
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 18c3eb864d55..76605fde3507 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -46,7 +46,11 @@
#include "string2.h"
#include "memswap.h"
#include "util.h"
-#include "hashmap.h"
+#ifdef HAVE_LIBBPF_SUPPORT
+#include <bpf/hashmap.h>
+#else
+#include "util/hashmap.h"
+#endif
#include "pmu-hybrid.h"
#include "off_cpu.h"
#include "../perf-sys.h"
@@ -1157,6 +1161,7 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts,
attr->sample_id_all = perf_missing_features.sample_id_all ? 0 : 1;
attr->inherit = !opts->no_inherit;
attr->write_backward = opts->overwrite ? 1 : 0;
+ attr->read_format = PERF_FORMAT_LOST;
evsel__set_sample_bit(evsel, IP);
evsel__set_sample_bit(evsel, TID);
@@ -1808,7 +1813,7 @@ static struct perf_thread_map *empty_thread_map;
static int __evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus,
struct perf_thread_map *threads)
{
- int nthreads;
+ int nthreads = perf_thread_map__nr(threads);
if ((perf_missing_features.write_backward && evsel->core.attr.write_backward) ||
(perf_missing_features.aux_output && evsel->core.attr.aux_output))
@@ -1834,11 +1839,6 @@ static int __evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus,
threads = empty_thread_map;
}
- if (evsel->core.system_wide)
- nthreads = 1;
- else
- nthreads = threads->nr;
-
if (evsel->core.fd == NULL &&
perf_evsel__alloc_fd(&evsel->core, perf_cpu_map__nr(cpus), nthreads) < 0)
return -ENOMEM;
@@ -1852,6 +1852,8 @@ static int __evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus,
static void evsel__disable_missing_features(struct evsel *evsel)
{
+ if (perf_missing_features.read_lost)
+ evsel->core.attr.read_format &= ~PERF_FORMAT_LOST;
if (perf_missing_features.weight_struct) {
evsel__set_sample_bit(evsel, WEIGHT);
evsel__reset_sample_bit(evsel, WEIGHT_STRUCT);
@@ -1903,7 +1905,12 @@ bool evsel__detect_missing_features(struct evsel *evsel)
* Must probe features in the order they were added to the
* perf_event_attr interface.
*/
- if (!perf_missing_features.weight_struct &&
+ if (!perf_missing_features.read_lost &&
+ (evsel->core.attr.read_format & PERF_FORMAT_LOST)) {
+ perf_missing_features.read_lost = true;
+ pr_debug2("switching off PERF_FORMAT_LOST support\n");
+ return true;
+ } else if (!perf_missing_features.weight_struct &&
(evsel->core.attr.sample_type & PERF_SAMPLE_WEIGHT_STRUCT)) {
perf_missing_features.weight_struct = true;
pr_debug2("switching off weight struct support\n");
@@ -2049,10 +2056,7 @@ static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus,
if (threads == NULL)
threads = empty_thread_map;
- if (evsel->core.system_wide)
- nthreads = 1;
- else
- nthreads = threads->nr;
+ nthreads = perf_thread_map__nr(threads);
if (evsel->cgrp)
pid = evsel->cgrp->fd;
@@ -2077,6 +2081,7 @@ retry_open:
test_attr__ready();
+ /* Debug message used by test scripts */
pr_debug2_peo("sys_perf_event_open: pid %d cpu %d group_fd %d flags %#lx",
pid, perf_cpu_map__cpu(cpus, idx).cpu, group_fd, evsel->open_flags);
@@ -2102,6 +2107,7 @@ retry_open:
fd, group_fd, evsel->open_flags);
}
+ /* Debug message used by test scripts */
pr_debug2_peo(" = %d\n", fd);
if (evsel->bpf_fd >= 0) {
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index d927713b513e..989865e16aad 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -188,6 +188,7 @@ struct perf_missing_features {
bool data_page_size;
bool code_page_size;
bool weight_struct;
+ bool read_lost;
};
extern struct perf_missing_features perf_missing_features;
diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c
index c15a9852fa41..aaacf514dc09 100644
--- a/tools/perf/util/expr.c
+++ b/tools/perf/util/expr.c
@@ -182,7 +182,7 @@ int expr__add_ref(struct expr_parse_ctx *ctx, struct metric_ref *ref)
{
struct expr_id_data *data_ptr = NULL, *old_data = NULL;
char *old_key = NULL;
- char *name, *p;
+ char *name;
int ret;
data_ptr = zalloc(sizeof(*data_ptr));
@@ -196,15 +196,6 @@ int expr__add_ref(struct expr_parse_ctx *ctx, struct metric_ref *ref)
}
/*
- * The jevents tool converts all metric expressions
- * to lowercase, including metric references, hence
- * we need to add lowercase name for metric, so it's
- * properly found.
- */
- for (p = name; *p; p++)
- *p = tolower(*p);
-
- /*
* Intentionally passing just const char pointers,
* originally from 'struct pmu_event' object.
* We don't need to change them, so there's no
@@ -310,7 +301,9 @@ struct expr_parse_ctx *expr__ctx_new(void)
free(ctx);
return NULL;
}
- ctx->runtime = 0;
+ ctx->sctx.user_requested_cpu_list = NULL;
+ ctx->sctx.runtime = 0;
+ ctx->sctx.system_wide = false;
return ctx;
}
@@ -332,6 +325,10 @@ void expr__ctx_free(struct expr_parse_ctx *ctx)
struct hashmap_entry *cur;
size_t bkt;
+ if (!ctx)
+ return;
+
+ free(ctx->sctx.user_requested_cpu_list);
hashmap__for_each_entry(ctx->ids, cur, bkt) {
free((char *)cur->key);
free(cur->value);
@@ -344,16 +341,13 @@ static int
__expr__parse(double *val, struct expr_parse_ctx *ctx, const char *expr,
bool compute_ids)
{
- struct expr_scanner_ctx scanner_ctx = {
- .runtime = ctx->runtime,
- };
YY_BUFFER_STATE buffer;
void *scanner;
int ret;
pr_debug2("parsing metric: %s\n", expr);
- ret = expr_lex_init_extra(&scanner_ctx, &scanner);
+ ret = expr_lex_init_extra(&ctx->sctx, &scanner);
if (ret)
return ret;
@@ -410,16 +404,11 @@ double arch_get_tsc_freq(void)
}
#endif
-double expr__get_literal(const char *literal)
+double expr__get_literal(const char *literal, const struct expr_scanner_ctx *ctx)
{
static struct cpu_topology *topology;
double result = NAN;
- if (!strcasecmp("#smt_on", literal)) {
- result = smt_on() > 0 ? 1.0 : 0.0;
- goto out;
- }
-
if (!strcmp("#num_cpus", literal)) {
result = cpu__max_present_cpu().cpu;
goto out;
@@ -443,6 +432,15 @@ double expr__get_literal(const char *literal)
goto out;
}
}
+ if (!strcasecmp("#smt_on", literal)) {
+ result = smt_on(topology) ? 1.0 : 0.0;
+ goto out;
+ }
+ if (!strcmp("#core_wide", literal)) {
+ result = core_wide(ctx->system_wide, ctx->user_requested_cpu_list, topology)
+ ? 1.0 : 0.0;
+ goto out;
+ }
if (!strcmp("#num_packages", literal)) {
result = topology->package_cpus_lists;
goto out;
diff --git a/tools/perf/util/expr.h b/tools/perf/util/expr.h
index bd2116983bbb..d6c1668dc1a0 100644
--- a/tools/perf/util/expr.h
+++ b/tools/perf/util/expr.h
@@ -2,28 +2,27 @@
#ifndef PARSE_CTX_H
#define PARSE_CTX_H 1
-// There are fixes that need to land upstream before we can use libbpf's headers,
-// for now use our copy unconditionally, since the data structures at this point
-// are exactly the same, no problem.
-//#ifdef HAVE_LIBBPF_SUPPORT
-//#include <bpf/hashmap.h>
-//#else
+#ifdef HAVE_LIBBPF_SUPPORT
+#include <bpf/hashmap.h>
+#else
#include "util/hashmap.h"
-//#endif
+#endif
struct metric_ref;
+struct expr_scanner_ctx {
+ char *user_requested_cpu_list;
+ int runtime;
+ bool system_wide;
+};
+
struct expr_parse_ctx {
struct hashmap *ids;
- int runtime;
+ struct expr_scanner_ctx sctx;
};
struct expr_id_data;
-struct expr_scanner_ctx {
- int runtime;
-};
-
struct hashmap *ids__new(void);
void ids__free(struct hashmap *ids);
int ids__insert(struct hashmap *ids, const char *id);
@@ -58,6 +57,6 @@ int expr__find_ids(const char *expr, const char *one,
double expr_id_data__value(const struct expr_id_data *data);
double expr_id_data__source_count(const struct expr_id_data *data);
-double expr__get_literal(const char *literal);
+double expr__get_literal(const char *literal, const struct expr_scanner_ctx *ctx);
#endif
diff --git a/tools/perf/util/expr.l b/tools/perf/util/expr.l
index 4dc8edbfd9ce..0168a9637330 100644
--- a/tools/perf/util/expr.l
+++ b/tools/perf/util/expr.l
@@ -79,11 +79,11 @@ static int str(yyscan_t scanner, int token, int runtime)
return token;
}
-static int literal(yyscan_t scanner)
+static int literal(yyscan_t scanner, const struct expr_scanner_ctx *sctx)
{
YYSTYPE *yylval = expr_get_lval(scanner);
- yylval->num = expr__get_literal(expr_get_text(scanner));
+ yylval->num = expr__get_literal(expr_get_text(scanner), sctx);
if (isnan(yylval->num))
return EXPR_ERROR;
@@ -108,7 +108,7 @@ min { return MIN; }
if { return IF; }
else { return ELSE; }
source_count { return SOURCE_COUNT; }
-{literal} { return literal(yyscanner); }
+{literal} { return literal(yyscanner, sctx); }
{number} { return value(yyscanner); }
{symbol} { return str(yyscanner, ID, sctx->runtime); }
"|" { return '|'; }
diff --git a/tools/perf/util/expr.y b/tools/perf/util/expr.y
index a30b825adb7b..635e562350c5 100644
--- a/tools/perf/util/expr.y
+++ b/tools/perf/util/expr.y
@@ -156,7 +156,7 @@ start: if_expr
}
;
-if_expr: expr IF expr ELSE expr
+if_expr: expr IF expr ELSE if_expr
{
if (fpclassify($3.val) == FP_ZERO) {
/*
diff --git a/tools/perf/util/genelf.c b/tools/perf/util/genelf.c
index d81b54563e96..fefc72066c4e 100644
--- a/tools/perf/util/genelf.c
+++ b/tools/perf/util/genelf.c
@@ -345,6 +345,7 @@ jit_write_elf(int fd, uint64_t load_addr, const char *sym,
eh_frame_base_offset);
if (retval)
goto error;
+ retval = -1;
}
/*
diff --git a/tools/perf/util/genelf.h b/tools/perf/util/genelf.h
index b5c909546e3f..6af062d1c452 100644
--- a/tools/perf/util/genelf.h
+++ b/tools/perf/util/genelf.h
@@ -2,6 +2,8 @@
#ifndef __GENELF_H__
#define __GENELF_H__
+#include <linux/math.h>
+
/* genelf.c */
int jit_write_elf(int fd, uint64_t code_addr, const char *sym,
const void *code, int csize, void *debug, int nr_debug_entries,
@@ -76,6 +78,6 @@ int jit_add_debug_info(Elf *e, uint64_t code_addr, void *debug, int nr_debug_ent
#endif
/* The .text section is directly after the ELF header */
-#define GEN_ELF_TEXT_OFFSET sizeof(Elf_Ehdr)
+#define GEN_ELF_TEXT_OFFSET round_up(sizeof(Elf_Ehdr) + sizeof(Elf_Phdr), 16)
#endif
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index c30c29c51410..98dfaf84bd13 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -4295,8 +4295,6 @@ out:
size_t perf_event__fprintf_event_update(union perf_event *event, FILE *fp)
{
struct perf_record_event_update *ev = &event->event_update;
- struct perf_record_event_update_scale *ev_scale;
- struct perf_record_event_update_cpus *ev_cpus;
struct perf_cpu_map *map;
size_t ret;
@@ -4304,20 +4302,18 @@ size_t perf_event__fprintf_event_update(union perf_event *event, FILE *fp)
switch (ev->type) {
case PERF_EVENT_UPDATE__SCALE:
- ev_scale = (struct perf_record_event_update_scale *)ev->data;
- ret += fprintf(fp, "... scale: %f\n", ev_scale->scale);
+ ret += fprintf(fp, "... scale: %f\n", ev->scale.scale);
break;
case PERF_EVENT_UPDATE__UNIT:
- ret += fprintf(fp, "... unit: %s\n", ev->data);
+ ret += fprintf(fp, "... unit: %s\n", ev->unit);
break;
case PERF_EVENT_UPDATE__NAME:
- ret += fprintf(fp, "... name: %s\n", ev->data);
+ ret += fprintf(fp, "... name: %s\n", ev->name);
break;
case PERF_EVENT_UPDATE__CPUS:
- ev_cpus = (struct perf_record_event_update_cpus *)ev->data;
ret += fprintf(fp, "... ");
- map = cpu_map__new_data(&ev_cpus->cpus);
+ map = cpu_map__new_data(&ev->cpus.cpus);
if (map)
ret += cpu_map__fprintf(map, fp);
else
@@ -4374,8 +4370,6 @@ int perf_event__process_event_update(struct perf_tool *tool __maybe_unused,
struct evlist **pevlist)
{
struct perf_record_event_update *ev = &event->event_update;
- struct perf_record_event_update_scale *ev_scale;
- struct perf_record_event_update_cpus *ev_cpus;
struct evlist *evlist;
struct evsel *evsel;
struct perf_cpu_map *map;
@@ -4395,19 +4389,17 @@ int perf_event__process_event_update(struct perf_tool *tool __maybe_unused,
switch (ev->type) {
case PERF_EVENT_UPDATE__UNIT:
free((char *)evsel->unit);
- evsel->unit = strdup(ev->data);
+ evsel->unit = strdup(ev->unit);
break;
case PERF_EVENT_UPDATE__NAME:
free(evsel->name);
- evsel->name = strdup(ev->data);
+ evsel->name = strdup(ev->name);
break;
case PERF_EVENT_UPDATE__SCALE:
- ev_scale = (struct perf_record_event_update_scale *)ev->data;
- evsel->scale = ev_scale->scale;
+ evsel->scale = ev->scale.scale;
break;
case PERF_EVENT_UPDATE__CPUS:
- ev_cpus = (struct perf_record_event_update_cpus *)ev->data;
- map = cpu_map__new_data(&ev_cpus->cpus);
+ map = cpu_map__new_data(&ev->cpus.cpus);
if (map) {
perf_cpu_map__put(evsel->core.own_cpus);
evsel->core.own_cpus = map;
diff --git a/tools/perf/util/hisi-ptt-decoder/Build b/tools/perf/util/hisi-ptt-decoder/Build
new file mode 100644
index 000000000000..db3db8b75033
--- /dev/null
+++ b/tools/perf/util/hisi-ptt-decoder/Build
@@ -0,0 +1 @@
+perf-$(CONFIG_AUXTRACE) += hisi-ptt-pkt-decoder.o
diff --git a/tools/perf/util/hisi-ptt-decoder/hisi-ptt-pkt-decoder.c b/tools/perf/util/hisi-ptt-decoder/hisi-ptt-pkt-decoder.c
new file mode 100644
index 000000000000..a17c423a526d
--- /dev/null
+++ b/tools/perf/util/hisi-ptt-decoder/hisi-ptt-pkt-decoder.c
@@ -0,0 +1,164 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * HiSilicon PCIe Trace and Tuning (PTT) support
+ * Copyright (c) 2022 HiSilicon Technologies Co., Ltd.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <endian.h>
+#include <byteswap.h>
+#include <linux/bitops.h>
+#include <stdarg.h>
+
+#include "../color.h"
+#include "hisi-ptt-pkt-decoder.h"
+
+/*
+ * For 8DW format, the bit[31:11] of DW0 is always 0x1fffff, which can be
+ * used to distinguish the data format.
+ * 8DW format is like:
+ * bits [ 31:11 ][ 10:0 ]
+ * |---------------------------------------|-------------------|
+ * DW0 [ 0x1fffff ][ Reserved (0x7ff) ]
+ * DW1 [ Prefix ]
+ * DW2 [ Header DW0 ]
+ * DW3 [ Header DW1 ]
+ * DW4 [ Header DW2 ]
+ * DW5 [ Header DW3 ]
+ * DW6 [ Reserved (0x0) ]
+ * DW7 [ Time ]
+ *
+ * 4DW format is like:
+ * bits [31:30] [ 29:25 ][24][23][22][21][ 20:11 ][ 10:0 ]
+ * |-----|---------|---|---|---|---|-------------|-------------|
+ * DW0 [ Fmt ][ Type ][T9][T8][TH][SO][ Length ][ Time ]
+ * DW1 [ Header DW1 ]
+ * DW2 [ Header DW2 ]
+ * DW3 [ Header DW3 ]
+ */
+
+enum hisi_ptt_8dw_pkt_field_type {
+ HISI_PTT_8DW_CHK_AND_RSV0,
+ HISI_PTT_8DW_PREFIX,
+ HISI_PTT_8DW_HEAD0,
+ HISI_PTT_8DW_HEAD1,
+ HISI_PTT_8DW_HEAD2,
+ HISI_PTT_8DW_HEAD3,
+ HISI_PTT_8DW_RSV1,
+ HISI_PTT_8DW_TIME,
+ HISI_PTT_8DW_TYPE_MAX
+};
+
+enum hisi_ptt_4dw_pkt_field_type {
+ HISI_PTT_4DW_HEAD1,
+ HISI_PTT_4DW_HEAD2,
+ HISI_PTT_4DW_HEAD3,
+ HISI_PTT_4DW_TYPE_MAX
+};
+
+static const char * const hisi_ptt_8dw_pkt_field_name[] = {
+ [HISI_PTT_8DW_PREFIX] = "Prefix",
+ [HISI_PTT_8DW_HEAD0] = "Header DW0",
+ [HISI_PTT_8DW_HEAD1] = "Header DW1",
+ [HISI_PTT_8DW_HEAD2] = "Header DW2",
+ [HISI_PTT_8DW_HEAD3] = "Header DW3",
+ [HISI_PTT_8DW_TIME] = "Time"
+};
+
+static const char * const hisi_ptt_4dw_pkt_field_name[] = {
+ [HISI_PTT_4DW_HEAD1] = "Header DW1",
+ [HISI_PTT_4DW_HEAD2] = "Header DW2",
+ [HISI_PTT_4DW_HEAD3] = "Header DW3",
+};
+
+union hisi_ptt_4dw {
+ struct {
+ uint32_t format : 2;
+ uint32_t type : 5;
+ uint32_t t9 : 1;
+ uint32_t t8 : 1;
+ uint32_t th : 1;
+ uint32_t so : 1;
+ uint32_t len : 10;
+ uint32_t time : 11;
+ };
+ uint32_t value;
+};
+
+static void hisi_ptt_print_pkt(const unsigned char *buf, int pos, const char *desc)
+{
+ const char *color = PERF_COLOR_BLUE;
+ int i;
+
+ printf(".");
+ color_fprintf(stdout, color, " %08x: ", pos);
+ for (i = 0; i < HISI_PTT_FIELD_LENTH; i++)
+ color_fprintf(stdout, color, "%02x ", buf[pos + i]);
+ for (i = 0; i < HISI_PTT_MAX_SPACE_LEN; i++)
+ color_fprintf(stdout, color, " ");
+ color_fprintf(stdout, color, " %s\n", desc);
+}
+
+static int hisi_ptt_8dw_kpt_desc(const unsigned char *buf, int pos)
+{
+ int i;
+
+ for (i = 0; i < HISI_PTT_8DW_TYPE_MAX; i++) {
+ /* Do not show 8DW check field and reserved fields */
+ if (i == HISI_PTT_8DW_CHK_AND_RSV0 || i == HISI_PTT_8DW_RSV1) {
+ pos += HISI_PTT_FIELD_LENTH;
+ continue;
+ }
+
+ hisi_ptt_print_pkt(buf, pos, hisi_ptt_8dw_pkt_field_name[i]);
+ pos += HISI_PTT_FIELD_LENTH;
+ }
+
+ return hisi_ptt_pkt_size[HISI_PTT_8DW_PKT];
+}
+
+static void hisi_ptt_4dw_print_dw0(const unsigned char *buf, int pos)
+{
+ const char *color = PERF_COLOR_BLUE;
+ union hisi_ptt_4dw dw0;
+ int i;
+
+ dw0.value = *(uint32_t *)(buf + pos);
+ printf(".");
+ color_fprintf(stdout, color, " %08x: ", pos);
+ for (i = 0; i < HISI_PTT_FIELD_LENTH; i++)
+ color_fprintf(stdout, color, "%02x ", buf[pos + i]);
+ for (i = 0; i < HISI_PTT_MAX_SPACE_LEN; i++)
+ color_fprintf(stdout, color, " ");
+
+ color_fprintf(stdout, color,
+ " %s %x %s %x %s %x %s %x %s %x %s %x %s %x %s %x\n",
+ "Format", dw0.format, "Type", dw0.type, "T9", dw0.t9,
+ "T8", dw0.t8, "TH", dw0.th, "SO", dw0.so, "Length",
+ dw0.len, "Time", dw0.time);
+}
+
+static int hisi_ptt_4dw_kpt_desc(const unsigned char *buf, int pos)
+{
+ int i;
+
+ hisi_ptt_4dw_print_dw0(buf, pos);
+ pos += HISI_PTT_FIELD_LENTH;
+
+ for (i = 0; i < HISI_PTT_4DW_TYPE_MAX; i++) {
+ hisi_ptt_print_pkt(buf, pos, hisi_ptt_4dw_pkt_field_name[i]);
+ pos += HISI_PTT_FIELD_LENTH;
+ }
+
+ return hisi_ptt_pkt_size[HISI_PTT_4DW_PKT];
+}
+
+int hisi_ptt_pkt_desc(const unsigned char *buf, int pos, enum hisi_ptt_pkt_type type)
+{
+ if (type == HISI_PTT_8DW_PKT)
+ return hisi_ptt_8dw_kpt_desc(buf, pos);
+
+ return hisi_ptt_4dw_kpt_desc(buf, pos);
+}
diff --git a/tools/perf/util/hisi-ptt-decoder/hisi-ptt-pkt-decoder.h b/tools/perf/util/hisi-ptt-decoder/hisi-ptt-pkt-decoder.h
new file mode 100644
index 000000000000..e78f1b5bc836
--- /dev/null
+++ b/tools/perf/util/hisi-ptt-decoder/hisi-ptt-pkt-decoder.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * HiSilicon PCIe Trace and Tuning (PTT) support
+ * Copyright (c) 2022 HiSilicon Technologies Co., Ltd.
+ */
+
+#ifndef INCLUDE__HISI_PTT_PKT_DECODER_H__
+#define INCLUDE__HISI_PTT_PKT_DECODER_H__
+
+#include <stddef.h>
+#include <stdint.h>
+
+#define HISI_PTT_8DW_CHECK_MASK GENMASK(31, 11)
+#define HISI_PTT_IS_8DW_PKT GENMASK(31, 11)
+#define HISI_PTT_MAX_SPACE_LEN 10
+#define HISI_PTT_FIELD_LENTH 4
+
+enum hisi_ptt_pkt_type {
+ HISI_PTT_4DW_PKT,
+ HISI_PTT_8DW_PKT,
+ HISI_PTT_PKT_MAX
+};
+
+static int hisi_ptt_pkt_size[] = {
+ [HISI_PTT_4DW_PKT] = 16,
+ [HISI_PTT_8DW_PKT] = 32,
+};
+
+int hisi_ptt_pkt_desc(const unsigned char *buf, int pos, enum hisi_ptt_pkt_type type);
+
+#endif
diff --git a/tools/perf/util/hisi-ptt.c b/tools/perf/util/hisi-ptt.c
new file mode 100644
index 000000000000..45b614bb73bf
--- /dev/null
+++ b/tools/perf/util/hisi-ptt.c
@@ -0,0 +1,192 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * HiSilicon PCIe Trace and Tuning (PTT) support
+ * Copyright (c) 2022 HiSilicon Technologies Co., Ltd.
+ */
+
+#include <byteswap.h>
+#include <endian.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <linux/bitops.h>
+#include <linux/kernel.h>
+#include <linux/log2.h>
+#include <linux/types.h>
+#include <linux/zalloc.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "auxtrace.h"
+#include "color.h"
+#include "debug.h"
+#include "evsel.h"
+#include "hisi-ptt.h"
+#include "hisi-ptt-decoder/hisi-ptt-pkt-decoder.h"
+#include "machine.h"
+#include "session.h"
+#include "tool.h"
+#include <internal/lib.h>
+
+struct hisi_ptt {
+ struct auxtrace auxtrace;
+ u32 auxtrace_type;
+ struct perf_session *session;
+ struct machine *machine;
+ u32 pmu_type;
+};
+
+struct hisi_ptt_queue {
+ struct hisi_ptt *ptt;
+ struct auxtrace_buffer *buffer;
+};
+
+static enum hisi_ptt_pkt_type hisi_ptt_check_packet_type(unsigned char *buf)
+{
+ uint32_t head = *(uint32_t *)buf;
+
+ if ((HISI_PTT_8DW_CHECK_MASK & head) == HISI_PTT_IS_8DW_PKT)
+ return HISI_PTT_8DW_PKT;
+
+ return HISI_PTT_4DW_PKT;
+}
+
+static void hisi_ptt_dump(struct hisi_ptt *ptt __maybe_unused,
+ unsigned char *buf, size_t len)
+{
+ const char *color = PERF_COLOR_BLUE;
+ enum hisi_ptt_pkt_type type;
+ size_t pos = 0;
+ int pkt_len;
+
+ type = hisi_ptt_check_packet_type(buf);
+ len = round_down(len, hisi_ptt_pkt_size[type]);
+ color_fprintf(stdout, color, ". ... HISI PTT data: size %zu bytes\n",
+ len);
+
+ while (len > 0) {
+ pkt_len = hisi_ptt_pkt_desc(buf, pos, type);
+ if (!pkt_len)
+ color_fprintf(stdout, color, " Bad packet!\n");
+
+ pos += pkt_len;
+ len -= pkt_len;
+ }
+}
+
+static void hisi_ptt_dump_event(struct hisi_ptt *ptt, unsigned char *buf,
+ size_t len)
+{
+ printf(".\n");
+
+ hisi_ptt_dump(ptt, buf, len);
+}
+
+static int hisi_ptt_process_event(struct perf_session *session __maybe_unused,
+ union perf_event *event __maybe_unused,
+ struct perf_sample *sample __maybe_unused,
+ struct perf_tool *tool __maybe_unused)
+{
+ return 0;
+}
+
+static int hisi_ptt_process_auxtrace_event(struct perf_session *session,
+ union perf_event *event,
+ struct perf_tool *tool __maybe_unused)
+{
+ struct hisi_ptt *ptt = container_of(session->auxtrace, struct hisi_ptt,
+ auxtrace);
+ int fd = perf_data__fd(session->data);
+ int size = event->auxtrace.size;
+ void *data = malloc(size);
+ off_t data_offset;
+ int err;
+
+ if (!data)
+ return -errno;
+
+ if (perf_data__is_pipe(session->data)) {
+ data_offset = 0;
+ } else {
+ data_offset = lseek(fd, 0, SEEK_CUR);
+ if (data_offset == -1)
+ return -errno;
+ }
+
+ err = readn(fd, data, size);
+ if (err != (ssize_t)size) {
+ free(data);
+ return -errno;
+ }
+
+ if (dump_trace)
+ hisi_ptt_dump_event(ptt, data, size);
+
+ return 0;
+}
+
+static int hisi_ptt_flush(struct perf_session *session __maybe_unused,
+ struct perf_tool *tool __maybe_unused)
+{
+ return 0;
+}
+
+static void hisi_ptt_free_events(struct perf_session *session __maybe_unused)
+{
+}
+
+static void hisi_ptt_free(struct perf_session *session)
+{
+ struct hisi_ptt *ptt = container_of(session->auxtrace, struct hisi_ptt,
+ auxtrace);
+
+ session->auxtrace = NULL;
+ free(ptt);
+}
+
+static bool hisi_ptt_evsel_is_auxtrace(struct perf_session *session,
+ struct evsel *evsel)
+{
+ struct hisi_ptt *ptt = container_of(session->auxtrace, struct hisi_ptt, auxtrace);
+
+ return evsel->core.attr.type == ptt->pmu_type;
+}
+
+static void hisi_ptt_print_info(__u64 type)
+{
+ if (!dump_trace)
+ return;
+
+ fprintf(stdout, " PMU Type %" PRId64 "\n", (s64) type);
+}
+
+int hisi_ptt_process_auxtrace_info(union perf_event *event,
+ struct perf_session *session)
+{
+ struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
+ struct hisi_ptt *ptt;
+
+ if (auxtrace_info->header.size < HISI_PTT_AUXTRACE_PRIV_SIZE +
+ sizeof(struct perf_record_auxtrace_info))
+ return -EINVAL;
+
+ ptt = zalloc(sizeof(*ptt));
+ if (!ptt)
+ return -ENOMEM;
+
+ ptt->session = session;
+ ptt->machine = &session->machines.host; /* No kvm support */
+ ptt->auxtrace_type = auxtrace_info->type;
+ ptt->pmu_type = auxtrace_info->priv[0];
+
+ ptt->auxtrace.process_event = hisi_ptt_process_event;
+ ptt->auxtrace.process_auxtrace_event = hisi_ptt_process_auxtrace_event;
+ ptt->auxtrace.flush_events = hisi_ptt_flush;
+ ptt->auxtrace.free_events = hisi_ptt_free_events;
+ ptt->auxtrace.free = hisi_ptt_free;
+ ptt->auxtrace.evsel_is_auxtrace = hisi_ptt_evsel_is_auxtrace;
+ session->auxtrace = &ptt->auxtrace;
+
+ hisi_ptt_print_info(auxtrace_info->priv[0]);
+
+ return 0;
+}
diff --git a/tools/perf/util/hisi-ptt.h b/tools/perf/util/hisi-ptt.h
new file mode 100644
index 000000000000..2db9b4056214
--- /dev/null
+++ b/tools/perf/util/hisi-ptt.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * HiSilicon PCIe Trace and Tuning (PTT) support
+ * Copyright (c) 2022 HiSilicon Technologies Co., Ltd.
+ */
+
+#ifndef INCLUDE__PERF_HISI_PTT_H__
+#define INCLUDE__PERF_HISI_PTT_H__
+
+#define HISI_PTT_PMU_NAME "hisi_ptt"
+#define HISI_PTT_AUXTRACE_PRIV_SIZE sizeof(u64)
+
+struct auxtrace_record *hisi_ptt_recording_init(int *err,
+ struct perf_pmu *hisi_ptt_pmu);
+
+int hisi_ptt_process_auxtrace_info(union perf_event *event,
+ struct perf_session *session);
+
+#endif
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 1c085ab56534..17a05e943b44 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -215,6 +215,7 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
hists__new_col_len(hists, HISTC_GLOBAL_INS_LAT, 13);
hists__new_col_len(hists, HISTC_LOCAL_P_STAGE_CYC, 13);
hists__new_col_len(hists, HISTC_GLOBAL_P_STAGE_CYC, 13);
+ hists__new_col_len(hists, HISTC_ADDR, BITS_PER_LONG / 4 + 2);
if (symbol_conf.nanosecs)
hists__new_col_len(hists, HISTC_TIME, 16);
@@ -1622,13 +1623,13 @@ struct rb_root_cached *hists__get_rotate_entries_in(struct hists *hists)
{
struct rb_root_cached *root;
- pthread_mutex_lock(&hists->lock);
+ mutex_lock(&hists->lock);
root = hists->entries_in;
if (++hists->entries_in > &hists->entries_in_array[1])
hists->entries_in = &hists->entries_in_array[0];
- pthread_mutex_unlock(&hists->lock);
+ mutex_unlock(&hists->lock);
return root;
}
@@ -2335,6 +2336,11 @@ void hists__inc_nr_samples(struct hists *hists, bool filtered)
hists->stats.nr_non_filtered_samples++;
}
+void hists__inc_nr_lost_samples(struct hists *hists, u32 lost)
+{
+ hists->stats.nr_lost_samples += lost;
+}
+
static struct hist_entry *hists__add_dummy_entry(struct hists *hists,
struct hist_entry *pair)
{
@@ -2678,12 +2684,16 @@ size_t evlist__fprintf_nr_events(struct evlist *evlist, FILE *fp,
evlist__for_each_entry(evlist, pos) {
struct hists *hists = evsel__hists(pos);
- if (skip_empty && !hists->stats.nr_samples)
+ if (skip_empty && !hists->stats.nr_samples && !hists->stats.nr_lost_samples)
continue;
ret += fprintf(fp, "%s stats:\n", evsel__name(pos));
- ret += fprintf(fp, "%16s events: %10d\n",
- "SAMPLE", hists->stats.nr_samples);
+ if (hists->stats.nr_samples)
+ ret += fprintf(fp, "%16s events: %10d\n",
+ "SAMPLE", hists->stats.nr_samples);
+ if (hists->stats.nr_lost_samples)
+ ret += fprintf(fp, "%16s events: %10d\n",
+ "LOST_SAMPLES", hists->stats.nr_lost_samples);
}
return ret;
@@ -2805,7 +2815,7 @@ int __hists__init(struct hists *hists, struct perf_hpp_list *hpp_list)
hists->entries_in = &hists->entries_in_array[0];
hists->entries_collapsed = RB_ROOT_CACHED;
hists->entries = RB_ROOT_CACHED;
- pthread_mutex_init(&hists->lock, NULL);
+ mutex_init(&hists->lock);
hists->socket_filter = -1;
hists->hpp_list = hpp_list;
INIT_LIST_HEAD(&hists->hpp_formats);
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 7ed4648d2fc2..ebd8a8f783ee 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -4,10 +4,10 @@
#include <linux/rbtree.h>
#include <linux/types.h>
-#include <pthread.h>
#include "evsel.h"
#include "color.h"
#include "events_stats.h"
+#include "mutex.h"
struct hist_entry;
struct hist_entry_ops;
@@ -79,6 +79,7 @@ enum hist_column {
HISTC_GLOBAL_P_STAGE_CYC,
HISTC_ADDR_FROM,
HISTC_ADDR_TO,
+ HISTC_ADDR,
HISTC_NR_COLS, /* Last entry */
};
@@ -98,7 +99,7 @@ struct hists {
const struct dso *dso_filter;
const char *uid_filter_str;
const char *symbol_filter_str;
- pthread_mutex_t lock;
+ struct mutex lock;
struct hists_stats stats;
u64 event_stream;
u16 col_len[HISTC_NR_COLS];
@@ -201,6 +202,7 @@ void hists__reset_stats(struct hists *hists);
void hists__inc_stats(struct hists *hists, struct hist_entry *h);
void hists__inc_nr_events(struct hists *hists);
void hists__inc_nr_samples(struct hists *hists, bool filtered);
+void hists__inc_nr_lost_samples(struct hists *hists, u32 lost);
size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows,
int max_cols, float min_pcnt, FILE *fp,
diff --git a/tools/perf/util/include/linux/linkage.h b/tools/perf/util/include/linux/linkage.h
index aa0c5179836d..75e2248416f5 100644
--- a/tools/perf/util/include/linux/linkage.h
+++ b/tools/perf/util/include/linux/linkage.h
@@ -115,4 +115,17 @@
SYM_ALIAS(alias, name, SYM_T_FUNC, SYM_L_WEAK)
#endif
+// In the kernel sources (include/linux/cfi_types.h), this has a different
+// definition when CONFIG_CFI_CLANG is used, for tools/ just use the !clang
+// definition:
+#ifndef SYM_TYPED_START
+#define SYM_TYPED_START(name, linkage, align...) \
+ SYM_START(name, linkage, align)
+#endif
+
+#ifndef SYM_TYPED_FUNC_START
+#define SYM_TYPED_FUNC_START(name) \
+ SYM_TYPED_START(name, SYM_L_GLOBAL, SYM_A_ALIGN)
+#endif
+
#endif /* PERF_LINUX_LINKAGE_H_ */
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-log.c b/tools/perf/util/intel-pt-decoder/intel-pt-log.c
index 5f5dfc8753f3..ef55d6232cf0 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-log.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-log.c
@@ -5,12 +5,16 @@
*/
#include <stdio.h>
+#include <stdlib.h>
#include <stdint.h>
#include <inttypes.h>
#include <stdarg.h>
#include <stdbool.h>
#include <string.h>
+#include <linux/zalloc.h>
+#include <linux/kernel.h>
+
#include "intel-pt-log.h"
#include "intel-pt-insn-decoder.h"
@@ -18,18 +22,33 @@
#define MAX_LOG_NAME 256
+#define DFLT_BUF_SZ (16 * 1024)
+
+struct log_buf {
+ char *buf;
+ size_t buf_sz;
+ size_t head;
+ bool wrapped;
+ FILE *backend;
+};
+
static FILE *f;
static char log_name[MAX_LOG_NAME];
bool intel_pt_enable_logging;
+static bool intel_pt_dump_log_on_error;
+static unsigned int intel_pt_log_on_error_size;
+static struct log_buf log_buf;
void *intel_pt_log_fp(void)
{
return f;
}
-void intel_pt_log_enable(void)
+void intel_pt_log_enable(bool dump_log_on_error, unsigned int log_on_error_size)
{
intel_pt_enable_logging = true;
+ intel_pt_dump_log_on_error = dump_log_on_error;
+ intel_pt_log_on_error_size = log_on_error_size;
}
void intel_pt_log_disable(void)
@@ -74,6 +93,100 @@ static void intel_pt_print_no_data(uint64_t pos, int indent)
fprintf(f, " ");
}
+static ssize_t log_buf__write(void *cookie, const char *buf, size_t size)
+{
+ struct log_buf *b = cookie;
+ size_t sz = size;
+
+ if (!b->buf)
+ return size;
+
+ while (sz) {
+ size_t space = b->buf_sz - b->head;
+ size_t n = min(space, sz);
+
+ memcpy(b->buf + b->head, buf, n);
+ sz -= n;
+ buf += n;
+ b->head += n;
+ if (sz && b->head >= b->buf_sz) {
+ b->head = 0;
+ b->wrapped = true;
+ }
+ }
+ return size;
+}
+
+static int log_buf__close(void *cookie)
+{
+ struct log_buf *b = cookie;
+
+ zfree(&b->buf);
+ return 0;
+}
+
+static FILE *log_buf__open(struct log_buf *b, FILE *backend, unsigned int sz)
+{
+ cookie_io_functions_t fns = {
+ .write = log_buf__write,
+ .close = log_buf__close,
+ };
+ FILE *file;
+
+ memset(b, 0, sizeof(*b));
+ b->buf_sz = sz;
+ b->buf = malloc(b->buf_sz);
+ b->backend = backend;
+ file = fopencookie(b, "a", fns);
+ if (!file)
+ zfree(&b->buf);
+ return file;
+}
+
+static bool remove_first_line(const char **p, size_t *n)
+{
+ for (; *n && **p != '\n'; ++*p, --*n)
+ ;
+ if (*n) {
+ *p += 1;
+ *n -= 1;
+ return true;
+ }
+ return false;
+}
+
+static void write_lines(const char *p, size_t n, FILE *fp, bool *remove_first)
+{
+ if (*remove_first)
+ *remove_first = !remove_first_line(&p, &n);
+ fwrite(p, n, 1, fp);
+}
+
+static void log_buf__dump(struct log_buf *b)
+{
+ bool remove_first = false;
+
+ if (!b->buf)
+ return;
+
+ fflush(f); /* Could update b->head and b->wrapped */
+ fprintf(b->backend, "Dumping debug log buffer\n");
+ if (b->wrapped) {
+ remove_first = true;
+ write_lines(b->buf + b->head, b->buf_sz - b->head, b->backend, &remove_first);
+ }
+ write_lines(b->buf, b->head, b->backend, &remove_first);
+ fprintf(b->backend, "End of debug log buffer dump\n");
+
+ b->head = 0;
+ b->wrapped = false;
+}
+
+void intel_pt_log_dump_buf(void)
+{
+ log_buf__dump(&log_buf);
+}
+
static int intel_pt_log_open(void)
{
if (!intel_pt_enable_logging)
@@ -86,6 +199,8 @@ static int intel_pt_log_open(void)
f = fopen(log_name, "w+");
else
f = stdout;
+ if (f && intel_pt_dump_log_on_error)
+ f = log_buf__open(&log_buf, f, intel_pt_log_on_error_size);
if (!f) {
intel_pt_enable_logging = false;
return -1;
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-log.h b/tools/perf/util/intel-pt-decoder/intel-pt-log.h
index d900aab24b21..354d7d23fc81 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-log.h
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-log.h
@@ -14,9 +14,10 @@
struct intel_pt_pkt;
void *intel_pt_log_fp(void);
-void intel_pt_log_enable(void);
+void intel_pt_log_enable(bool dump_log_on_error, unsigned int log_on_error_size);
void intel_pt_log_disable(void);
void intel_pt_log_set_name(const char *name);
+void intel_pt_log_dump_buf(void);
void __intel_pt_log_packet(const struct intel_pt_pkt *packet, int pkt_len,
uint64_t pos, const unsigned char *buf);
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index d5e9fc8106dd..e3548ddef254 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -842,7 +842,8 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn,
offset, buf,
INTEL_PT_INSN_BUF_SZ);
if (len <= 0) {
- intel_pt_log("ERROR: failed to read at %" PRIu64 " ", offset);
+ intel_pt_log("ERROR: failed to read at offset %#" PRIx64 " ",
+ offset);
if (intel_pt_enable_logging)
dso__fprintf(al.map->dso, intel_pt_log_fp());
return -EINVAL;
@@ -2418,6 +2419,8 @@ static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu,
pid_t pid, pid_t tid, u64 ip, u64 timestamp,
pid_t machine_pid, int vcpu)
{
+ bool dump_log_on_error = pt->synth_opts.log_plus_flags & AUXTRACE_LOG_FLG_ON_ERROR;
+ bool log_on_stdout = pt->synth_opts.log_plus_flags & AUXTRACE_LOG_FLG_USE_STDOUT;
union perf_event event;
char msg[MAX_AUXTRACE_ERROR_MSG];
int err;
@@ -2437,6 +2440,16 @@ static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu,
code, cpu, pid, tid, ip, msg, timestamp,
machine_pid, vcpu);
+ if (intel_pt_enable_logging && !log_on_stdout) {
+ FILE *fp = intel_pt_log_fp();
+
+ if (fp)
+ perf_event__fprintf_auxtrace_error(&event, fp);
+ }
+
+ if (code != INTEL_PT_ERR_LOST && dump_log_on_error)
+ intel_pt_log_dump_buf();
+
err = perf_session__deliver_synth_event(pt->session, &event, NULL);
if (err)
pr_err("Intel Processor Trace: failed to deliver error event, error %d\n",
@@ -4033,6 +4046,7 @@ static const char * const intel_pt_info_fmts[] = {
[INTEL_PT_SNAPSHOT_MODE] = " Snapshot mode %"PRId64"\n",
[INTEL_PT_PER_CPU_MMAPS] = " Per-cpu maps %"PRId64"\n",
[INTEL_PT_MTC_BIT] = " MTC bit %#"PRIx64"\n",
+ [INTEL_PT_MTC_FREQ_BITS] = " MTC freq bits %#"PRIx64"\n",
[INTEL_PT_TSC_CTC_N] = " TSC:CTC numerator %"PRIu64"\n",
[INTEL_PT_TSC_CTC_D] = " TSC:CTC denominator %"PRIu64"\n",
[INTEL_PT_CYC_BIT] = " CYC bit %#"PRIx64"\n",
@@ -4047,8 +4061,12 @@ static void intel_pt_print_info(__u64 *arr, int start, int finish)
if (!dump_trace)
return;
- for (i = start; i <= finish; i++)
- fprintf(stdout, intel_pt_info_fmts[i], arr[i]);
+ for (i = start; i <= finish; i++) {
+ const char *fmt = intel_pt_info_fmts[i];
+
+ if (fmt)
+ fprintf(stdout, fmt, arr[i]);
+ }
}
static void intel_pt_print_info_str(const char *name, const char *str)
@@ -4271,8 +4289,12 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
goto err_delete_thread;
}
- if (pt->synth_opts.log)
- intel_pt_log_enable();
+ if (pt->synth_opts.log) {
+ bool log_on_error = pt->synth_opts.log_plus_flags & AUXTRACE_LOG_FLG_ON_ERROR;
+ unsigned int log_on_error_size = pt->synth_opts.log_on_error_size;
+
+ intel_pt_log_enable(log_on_error, log_on_error_size);
+ }
/* Maximum non-turbo ratio is TSC freq / 100 MHz */
if (pt->tc.time_mult) {
diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c
index 4e6632203704..0e033278fa12 100644
--- a/tools/perf/util/jitdump.c
+++ b/tools/perf/util/jitdump.c
@@ -56,13 +56,6 @@ struct jit_buf_desc {
char dir[PATH_MAX];
};
-struct debug_line_info {
- unsigned long vma;
- unsigned int lineno;
- /* The filename format is unspecified, absolute path, relative etc. */
- char const filename[];
-};
-
struct jit_tool {
struct perf_tool tool;
struct perf_data output;
diff --git a/tools/perf/util/lock-contention.h b/tools/perf/util/lock-contention.h
index 2146efc33396..b8cb8830b7bc 100644
--- a/tools/perf/util/lock-contention.h
+++ b/tools/perf/util/lock-contention.h
@@ -11,6 +11,7 @@ struct lock_stat {
u64 addr; /* address of lockdep_map, used as ID */
char *name; /* for strcpy(), we cannot use const */
+ u64 *callstack;
unsigned int nr_acquire;
unsigned int nr_acquired;
@@ -113,7 +114,9 @@ struct lock_contention {
struct machine *machine;
struct hlist_head *result;
unsigned long map_nr_entries;
- unsigned long lost;
+ int lost;
+ int max_stack;
+ int stack_skip;
};
#ifdef HAVE_BPF_SKEL
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 2a16cae28407..76316e459c3d 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -1128,10 +1128,6 @@ static struct dso *machine__get_kernel(struct machine *machine)
return kernel;
}
-struct process_args {
- u64 start;
-};
-
void machine__get_kallsyms_filename(struct machine *machine, char *buf,
size_t bufsz)
{
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index e0aa4a254583..f3a3d9b3a40d 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -181,7 +181,10 @@ struct map *map__new(struct machine *machine, u64 start, u64 len,
if (!(prot & PROT_EXEC))
dso__set_loaded(dso);
}
+ mutex_lock(&dso->lock);
+ nsinfo__put(dso->nsinfo);
dso->nsinfo = nsi;
+ mutex_unlock(&dso->lock);
if (build_id__is_defined(bid)) {
dso__set_build_id(dso, bid);
diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c
index 764883183519..b3a91093069a 100644
--- a/tools/perf/util/mem-events.c
+++ b/tools/perf/util/mem-events.c
@@ -156,11 +156,12 @@ void perf_mem_events__list(void)
for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
struct perf_mem_event *e = perf_mem_events__ptr(j);
- fprintf(stderr, "%-13s%-*s%s\n",
- e->tag ?: "",
- verbose > 0 ? 25 : 0,
- verbose > 0 ? perf_mem_events__name(j, NULL) : "",
- e->supported ? ": available" : "");
+ fprintf(stderr, "%-*s%-*s%s",
+ e->tag ? 13 : 0,
+ e->tag ? : "",
+ e->tag && verbose > 0 ? 25 : 0,
+ e->tag && verbose > 0 ? perf_mem_events__name(j, NULL) : "",
+ e->supported ? ": available\n" : "");
}
}
@@ -281,7 +282,7 @@ static const char * const mem_lvl[] = {
"HIT",
"MISS",
"L1",
- "LFB",
+ "LFB/MAB",
"L2",
"L3",
"Local RAM",
@@ -294,8 +295,10 @@ static const char * const mem_lvl[] = {
};
static const char * const mem_lvlnum[] = {
+ [PERF_MEM_LVLNUM_CXL] = "CXL",
+ [PERF_MEM_LVLNUM_IO] = "I/O",
[PERF_MEM_LVLNUM_ANY_CACHE] = "Any cache",
- [PERF_MEM_LVLNUM_LFB] = "LFB",
+ [PERF_MEM_LVLNUM_LFB] = "LFB/MAB",
[PERF_MEM_LVLNUM_RAM] = "RAM",
[PERF_MEM_LVLNUM_PMEM] = "PMEM",
[PERF_MEM_LVLNUM_NA] = "N/A",
diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
index c93bcaf6d55d..4c98ac29ee13 100644
--- a/tools/perf/util/metricgroup.c
+++ b/tools/perf/util/metricgroup.c
@@ -22,6 +22,7 @@
#include <linux/list_sort.h>
#include <linux/string.h>
#include <linux/zalloc.h>
+#include <perf/cpumap.h>
#include <subcmd/parse-options.h>
#include <api/fs/fs.h>
#include "util.h"
@@ -108,17 +109,6 @@ void metricgroup__rblist_exit(struct rblist *metric_events)
rblist__exit(metric_events);
}
-/*
- * A node in the list of referenced metrics. metric_expr
- * is held as a convenience to avoid a search through the
- * metric list.
- */
-struct metric_ref_node {
- const char *metric_name;
- const char *metric_expr;
- struct list_head list;
-};
-
/**
* The metric under construction. The data held here will be placed in a
* metric_expr.
@@ -189,10 +179,24 @@ static bool metricgroup__has_constraint(const struct pmu_event *pe)
return false;
}
+static void metric__free(struct metric *m)
+{
+ if (!m)
+ return;
+
+ free(m->metric_refs);
+ expr__ctx_free(m->pctx);
+ free((char *)m->modifier);
+ evlist__delete(m->evlist);
+ free(m);
+}
+
static struct metric *metric__new(const struct pmu_event *pe,
const char *modifier,
bool metric_no_group,
- int runtime)
+ int runtime,
+ const char *user_requested_cpu_list,
+ bool system_wide)
{
struct metric *m;
@@ -201,35 +205,34 @@ static struct metric *metric__new(const struct pmu_event *pe,
return NULL;
m->pctx = expr__ctx_new();
- if (!m->pctx) {
- free(m);
- return NULL;
- }
+ if (!m->pctx)
+ goto out_err;
m->metric_name = pe->metric_name;
- m->modifier = modifier ? strdup(modifier) : NULL;
- if (modifier && !m->modifier) {
- expr__ctx_free(m->pctx);
- free(m);
- return NULL;
+ m->modifier = NULL;
+ if (modifier) {
+ m->modifier = strdup(modifier);
+ if (!m->modifier)
+ goto out_err;
}
m->metric_expr = pe->metric_expr;
m->metric_unit = pe->unit;
- m->pctx->runtime = runtime;
+ m->pctx->sctx.user_requested_cpu_list = NULL;
+ if (user_requested_cpu_list) {
+ m->pctx->sctx.user_requested_cpu_list = strdup(user_requested_cpu_list);
+ if (!m->pctx->sctx.user_requested_cpu_list)
+ goto out_err;
+ }
+ m->pctx->sctx.runtime = runtime;
+ m->pctx->sctx.system_wide = system_wide;
m->has_constraint = metric_no_group || metricgroup__has_constraint(pe);
m->metric_refs = NULL;
m->evlist = NULL;
return m;
-}
-
-static void metric__free(struct metric *m)
-{
- free(m->metric_refs);
- expr__ctx_free(m->pctx);
- free((char *)m->modifier);
- evlist__delete(m->evlist);
- free(m);
+out_err:
+ metric__free(m);
+ return NULL;
}
static bool contains_metric_id(struct evsel **metric_events, int num_events,
@@ -874,6 +877,8 @@ struct metricgroup_add_iter_data {
int *ret;
bool *has_match;
bool metric_no_group;
+ const char *user_requested_cpu_list;
+ bool system_wide;
struct metric *root_metric;
const struct visited_metric *visited;
const struct pmu_events_table *table;
@@ -887,6 +892,8 @@ static int add_metric(struct list_head *metric_list,
const struct pmu_event *pe,
const char *modifier,
bool metric_no_group,
+ const char *user_requested_cpu_list,
+ bool system_wide,
struct metric *root_metric,
const struct visited_metric *visited,
const struct pmu_events_table *table);
@@ -899,6 +906,8 @@ static int add_metric(struct list_head *metric_list,
* @metric_no_group: Should events written to events be grouped "{}" or
* global. Grouping is the default but due to multiplexing the
* user may override.
+ * @user_requested_cpu_list: Command line specified CPUs to record on.
+ * @system_wide: Are events for all processes recorded.
* @root_metric: Metrics may reference other metrics to form a tree. In this
* case the root_metric holds all the IDs and a list of referenced
* metrics. When adding a root this argument is NULL.
@@ -910,6 +919,8 @@ static int add_metric(struct list_head *metric_list,
static int resolve_metric(struct list_head *metric_list,
const char *modifier,
bool metric_no_group,
+ const char *user_requested_cpu_list,
+ bool system_wide,
struct metric *root_metric,
const struct visited_metric *visited,
const struct pmu_events_table *table)
@@ -956,7 +967,8 @@ static int resolve_metric(struct list_head *metric_list,
*/
for (i = 0; i < pending_cnt; i++) {
ret = add_metric(metric_list, &pending[i].pe, modifier, metric_no_group,
- root_metric, visited, table);
+ user_requested_cpu_list, system_wide, root_metric, visited,
+ table);
if (ret)
break;
}
@@ -974,6 +986,8 @@ static int resolve_metric(struct list_head *metric_list,
* global. Grouping is the default but due to multiplexing the
* user may override.
* @runtime: A special argument for the parser only known at runtime.
+ * @user_requested_cpu_list: Command line specified CPUs to record on.
+ * @system_wide: Are events for all processes recorded.
* @root_metric: Metrics may reference other metrics to form a tree. In this
* case the root_metric holds all the IDs and a list of referenced
* metrics. When adding a root this argument is NULL.
@@ -987,6 +1001,8 @@ static int __add_metric(struct list_head *metric_list,
const char *modifier,
bool metric_no_group,
int runtime,
+ const char *user_requested_cpu_list,
+ bool system_wide,
struct metric *root_metric,
const struct visited_metric *visited,
const struct pmu_events_table *table)
@@ -1011,7 +1027,8 @@ static int __add_metric(struct list_head *metric_list,
* This metric is the root of a tree and may reference other
* metrics that are added recursively.
*/
- root_metric = metric__new(pe, modifier, metric_no_group, runtime);
+ root_metric = metric__new(pe, modifier, metric_no_group, runtime,
+ user_requested_cpu_list, system_wide);
if (!root_metric)
return -ENOMEM;
@@ -1060,8 +1077,9 @@ static int __add_metric(struct list_head *metric_list,
ret = -EINVAL;
} else {
/* Resolve referenced metrics. */
- ret = resolve_metric(metric_list, modifier, metric_no_group, root_metric,
- &visited_node, table);
+ ret = resolve_metric(metric_list, modifier, metric_no_group,
+ user_requested_cpu_list, system_wide,
+ root_metric, &visited_node, table);
}
if (ret) {
@@ -1109,6 +1127,8 @@ static int add_metric(struct list_head *metric_list,
const struct pmu_event *pe,
const char *modifier,
bool metric_no_group,
+ const char *user_requested_cpu_list,
+ bool system_wide,
struct metric *root_metric,
const struct visited_metric *visited,
const struct pmu_events_table *table)
@@ -1119,7 +1139,8 @@ static int add_metric(struct list_head *metric_list,
if (!strstr(pe->metric_expr, "?")) {
ret = __add_metric(metric_list, pe, modifier, metric_no_group, 0,
- root_metric, visited, table);
+ user_requested_cpu_list, system_wide, root_metric,
+ visited, table);
} else {
int j, count;
@@ -1132,7 +1153,8 @@ static int add_metric(struct list_head *metric_list,
for (j = 0; j < count && !ret; j++)
ret = __add_metric(metric_list, pe, modifier, metric_no_group, j,
- root_metric, visited, table);
+ user_requested_cpu_list, system_wide,
+ root_metric, visited, table);
}
return ret;
@@ -1149,6 +1171,7 @@ static int metricgroup__add_metric_sys_event_iter(const struct pmu_event *pe,
return 0;
ret = add_metric(d->metric_list, pe, d->modifier, d->metric_no_group,
+ d->user_requested_cpu_list, d->system_wide,
d->root_metric, d->visited, d->table);
if (ret)
goto out;
@@ -1191,7 +1214,9 @@ struct metricgroup__add_metric_data {
struct list_head *list;
const char *metric_name;
const char *modifier;
+ const char *user_requested_cpu_list;
bool metric_no_group;
+ bool system_wide;
bool has_match;
};
@@ -1208,8 +1233,8 @@ static int metricgroup__add_metric_callback(const struct pmu_event *pe,
data->has_match = true;
ret = add_metric(data->list, pe, data->modifier, data->metric_no_group,
- /*root_metric=*/NULL,
- /*visited_metrics=*/NULL, table);
+ data->user_requested_cpu_list, data->system_wide,
+ /*root_metric=*/NULL, /*visited_metrics=*/NULL, table);
}
return ret;
}
@@ -1223,12 +1248,16 @@ static int metricgroup__add_metric_callback(const struct pmu_event *pe,
* @metric_no_group: Should events written to events be grouped "{}" or
* global. Grouping is the default but due to multiplexing the
* user may override.
+ * @user_requested_cpu_list: Command line specified CPUs to record on.
+ * @system_wide: Are events for all processes recorded.
* @metric_list: The list that the metric or metric group are added to.
* @table: The table that is searched for metrics, most commonly the table for the
* architecture perf is running upon.
*/
static int metricgroup__add_metric(const char *metric_name, const char *modifier,
bool metric_no_group,
+ const char *user_requested_cpu_list,
+ bool system_wide,
struct list_head *metric_list,
const struct pmu_events_table *table)
{
@@ -1242,6 +1271,8 @@ static int metricgroup__add_metric(const char *metric_name, const char *modifier
.metric_name = metric_name,
.modifier = modifier,
.metric_no_group = metric_no_group,
+ .user_requested_cpu_list = user_requested_cpu_list,
+ .system_wide = system_wide,
.has_match = false,
};
/*
@@ -1263,6 +1294,8 @@ static int metricgroup__add_metric(const char *metric_name, const char *modifier
.metric_name = metric_name,
.modifier = modifier,
.metric_no_group = metric_no_group,
+ .user_requested_cpu_list = user_requested_cpu_list,
+ .system_wide = system_wide,
.has_match = &has_match,
.ret = &ret,
.table = table,
@@ -1293,12 +1326,15 @@ out:
* @metric_no_group: Should events written to events be grouped "{}" or
* global. Grouping is the default but due to multiplexing the
* user may override.
+ * @user_requested_cpu_list: Command line specified CPUs to record on.
+ * @system_wide: Are events for all processes recorded.
* @metric_list: The list that metrics are added to.
* @table: The table that is searched for metrics, most commonly the table for the
* architecture perf is running upon.
*/
static int metricgroup__add_metric_list(const char *list, bool metric_no_group,
- struct list_head *metric_list,
+ const char *user_requested_cpu_list,
+ bool system_wide, struct list_head *metric_list,
const struct pmu_events_table *table)
{
char *list_itr, *list_copy, *metric_name, *modifier;
@@ -1315,8 +1351,8 @@ static int metricgroup__add_metric_list(const char *list, bool metric_no_group,
*modifier++ = '\0';
ret = metricgroup__add_metric(metric_name, modifier,
- metric_no_group, metric_list,
- table);
+ metric_no_group, user_requested_cpu_list,
+ system_wide, metric_list, table);
if (ret == -EINVAL)
pr_err("Cannot find metric or group `%s'\n", metric_name);
@@ -1505,6 +1541,8 @@ err_out:
static int parse_groups(struct evlist *perf_evlist, const char *str,
bool metric_no_group,
bool metric_no_merge,
+ const char *user_requested_cpu_list,
+ bool system_wide,
struct perf_pmu *fake_pmu,
struct rblist *metric_events_list,
const struct pmu_events_table *table)
@@ -1518,7 +1556,8 @@ static int parse_groups(struct evlist *perf_evlist, const char *str,
if (metric_events_list->nr_entries == 0)
metricgroup__rblist_init(metric_events_list);
ret = metricgroup__add_metric_list(str, metric_no_group,
- &metric_list, table);
+ user_requested_cpu_list,
+ system_wide, &metric_list, table);
if (ret)
goto out;
@@ -1626,7 +1665,7 @@ static int parse_groups(struct evlist *perf_evlist, const char *str,
}
expr->metric_unit = m->metric_unit;
expr->metric_events = metric_events;
- expr->runtime = m->pctx->runtime;
+ expr->runtime = m->pctx->sctx.runtime;
list_add(&expr->nd, &me->head);
}
@@ -1646,20 +1685,22 @@ out:
return ret;
}
-int metricgroup__parse_groups(const struct option *opt,
+int metricgroup__parse_groups(struct evlist *perf_evlist,
const char *str,
bool metric_no_group,
bool metric_no_merge,
+ const char *user_requested_cpu_list,
+ bool system_wide,
struct rblist *metric_events)
{
- struct evlist *perf_evlist = *(struct evlist **)opt->value;
const struct pmu_events_table *table = pmu_events_table__find();
if (!table)
return -EINVAL;
- return parse_groups(perf_evlist, str, metric_no_group,
- metric_no_merge, NULL, metric_events, table);
+ return parse_groups(perf_evlist, str, metric_no_group, metric_no_merge,
+ user_requested_cpu_list, system_wide,
+ /*fake_pmu=*/NULL, metric_events, table);
}
int metricgroup__parse_groups_test(struct evlist *evlist,
@@ -1669,8 +1710,10 @@ int metricgroup__parse_groups_test(struct evlist *evlist,
bool metric_no_merge,
struct rblist *metric_events)
{
- return parse_groups(evlist, str, metric_no_group,
- metric_no_merge, &perf_pmu__fake, metric_events, table);
+ return parse_groups(evlist, str, metric_no_group, metric_no_merge,
+ /*user_requested_cpu_list=*/NULL,
+ /*system_wide=*/false,
+ &perf_pmu__fake, metric_events, table);
}
static int metricgroup__has_metric_callback(const struct pmu_event *pe,
@@ -1703,7 +1746,7 @@ int metricgroup__copy_metric_events(struct evlist *evlist, struct cgroup *cgrp,
struct rblist *new_metric_events,
struct rblist *old_metric_events)
{
- unsigned i;
+ unsigned int i;
for (i = 0; i < rblist__nr_entries(old_metric_events); i++) {
struct rb_node *nd;
diff --git a/tools/perf/util/metricgroup.h b/tools/perf/util/metricgroup.h
index 016b3b1a289a..732d3a0d3334 100644
--- a/tools/perf/util/metricgroup.h
+++ b/tools/perf/util/metricgroup.h
@@ -64,10 +64,12 @@ struct metric_expr {
struct metric_event *metricgroup__lookup(struct rblist *metric_events,
struct evsel *evsel,
bool create);
-int metricgroup__parse_groups(const struct option *opt,
+int metricgroup__parse_groups(struct evlist *perf_evlist,
const char *str,
bool metric_no_group,
bool metric_no_merge,
+ const char *user_requested_cpu_list,
+ bool system_wide,
struct rblist *metric_events);
int metricgroup__parse_groups_test(struct evlist *evlist,
const struct pmu_events_table *table,
diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h
index cd8b0777473b..cd4ccec7f361 100644
--- a/tools/perf/util/mmap.h
+++ b/tools/perf/util/mmap.h
@@ -9,7 +9,6 @@
#include <linux/bitops.h>
#include <perf/cpumap.h>
#include <stdbool.h>
-#include <pthread.h> // for cpu_set_t
#ifdef HAVE_AIO_SUPPORT
#include <aio.h>
#endif
diff --git a/tools/perf/util/mutex.c b/tools/perf/util/mutex.c
new file mode 100644
index 000000000000..bca7f0717f35
--- /dev/null
+++ b/tools/perf/util/mutex.c
@@ -0,0 +1,119 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "mutex.h"
+
+#include "debug.h"
+#include <linux/string.h>
+#include <errno.h>
+
+static void check_err(const char *fn, int err)
+{
+ char sbuf[STRERR_BUFSIZE];
+
+ if (err == 0)
+ return;
+
+ pr_err("%s error: '%s'\n", fn, str_error_r(err, sbuf, sizeof(sbuf)));
+}
+
+#define CHECK_ERR(err) check_err(__func__, err)
+
+static void __mutex_init(struct mutex *mtx, bool pshared)
+{
+ pthread_mutexattr_t attr;
+
+ CHECK_ERR(pthread_mutexattr_init(&attr));
+
+#ifndef NDEBUG
+ /* In normal builds enable error checking, such as recursive usage. */
+ CHECK_ERR(pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ERRORCHECK));
+#endif
+ if (pshared)
+ CHECK_ERR(pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED));
+
+ CHECK_ERR(pthread_mutex_init(&mtx->lock, &attr));
+ CHECK_ERR(pthread_mutexattr_destroy(&attr));
+}
+
+void mutex_init(struct mutex *mtx)
+{
+ __mutex_init(mtx, /*pshared=*/false);
+}
+
+void mutex_init_pshared(struct mutex *mtx)
+{
+ __mutex_init(mtx, /*pshared=*/true);
+}
+
+void mutex_destroy(struct mutex *mtx)
+{
+ CHECK_ERR(pthread_mutex_destroy(&mtx->lock));
+}
+
+void mutex_lock(struct mutex *mtx)
+ NO_THREAD_SAFETY_ANALYSIS
+{
+ CHECK_ERR(pthread_mutex_lock(&mtx->lock));
+}
+
+void mutex_unlock(struct mutex *mtx)
+ NO_THREAD_SAFETY_ANALYSIS
+{
+ CHECK_ERR(pthread_mutex_unlock(&mtx->lock));
+}
+
+bool mutex_trylock(struct mutex *mtx)
+{
+ int ret = pthread_mutex_trylock(&mtx->lock);
+
+ if (ret == 0)
+ return true; /* Lock acquired. */
+
+ if (ret == EBUSY)
+ return false; /* Lock busy. */
+
+ /* Print error. */
+ CHECK_ERR(ret);
+ return false;
+}
+
+static void __cond_init(struct cond *cnd, bool pshared)
+{
+ pthread_condattr_t attr;
+
+ CHECK_ERR(pthread_condattr_init(&attr));
+ if (pshared)
+ CHECK_ERR(pthread_condattr_setpshared(&attr, PTHREAD_PROCESS_SHARED));
+
+ CHECK_ERR(pthread_cond_init(&cnd->cond, &attr));
+ CHECK_ERR(pthread_condattr_destroy(&attr));
+}
+
+void cond_init(struct cond *cnd)
+{
+ __cond_init(cnd, /*pshared=*/false);
+}
+
+void cond_init_pshared(struct cond *cnd)
+{
+ __cond_init(cnd, /*pshared=*/true);
+}
+
+void cond_destroy(struct cond *cnd)
+{
+ CHECK_ERR(pthread_cond_destroy(&cnd->cond));
+}
+
+void cond_wait(struct cond *cnd, struct mutex *mtx)
+{
+ CHECK_ERR(pthread_cond_wait(&cnd->cond, &mtx->lock));
+}
+
+void cond_signal(struct cond *cnd)
+{
+ CHECK_ERR(pthread_cond_signal(&cnd->cond));
+}
+
+void cond_broadcast(struct cond *cnd)
+{
+ CHECK_ERR(pthread_cond_broadcast(&cnd->cond));
+}
diff --git a/tools/perf/util/mutex.h b/tools/perf/util/mutex.h
new file mode 100644
index 000000000000..40661120cacc
--- /dev/null
+++ b/tools/perf/util/mutex.h
@@ -0,0 +1,108 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_MUTEX_H
+#define __PERF_MUTEX_H
+
+#include <pthread.h>
+#include <stdbool.h>
+
+/*
+ * A function-like feature checking macro that is a wrapper around
+ * `__has_attribute`, which is defined by GCC 5+ and Clang and evaluates to a
+ * nonzero constant integer if the attribute is supported or 0 if not.
+ */
+#ifdef __has_attribute
+#define HAVE_ATTRIBUTE(x) __has_attribute(x)
+#else
+#define HAVE_ATTRIBUTE(x) 0
+#endif
+
+#if HAVE_ATTRIBUTE(guarded_by) && HAVE_ATTRIBUTE(pt_guarded_by) && \
+ HAVE_ATTRIBUTE(lockable) && HAVE_ATTRIBUTE(exclusive_lock_function) && \
+ HAVE_ATTRIBUTE(exclusive_trylock_function) && HAVE_ATTRIBUTE(exclusive_locks_required) && \
+ HAVE_ATTRIBUTE(no_thread_safety_analysis)
+
+/* Documents if a shared field or global variable needs to be protected by a mutex. */
+#define GUARDED_BY(x) __attribute__((guarded_by(x)))
+
+/*
+ * Documents if the memory location pointed to by a pointer should be guarded by
+ * a mutex when dereferencing the pointer.
+ */
+#define PT_GUARDED_BY(x) __attribute__((pt_guarded_by(x)))
+
+/* Documents if a type is a lockable type. */
+#define LOCKABLE __attribute__((lockable))
+
+/* Documents functions that acquire a lock in the body of a function, and do not release it. */
+#define EXCLUSIVE_LOCK_FUNCTION(...) __attribute__((exclusive_lock_function(__VA_ARGS__)))
+
+/*
+ * Documents functions that expect a lock to be held on entry to the function,
+ * and release it in the body of the function.
+ */
+#define UNLOCK_FUNCTION(...) __attribute__((unlock_function(__VA_ARGS__)))
+
+/* Documents functions that try to acquire a lock, and return success or failure. */
+#define EXCLUSIVE_TRYLOCK_FUNCTION(...) \
+ __attribute__((exclusive_trylock_function(__VA_ARGS__)))
+
+/* Documents a function that expects a mutex to be held prior to entry. */
+#define EXCLUSIVE_LOCKS_REQUIRED(...) __attribute__((exclusive_locks_required(__VA_ARGS__)))
+
+/* Turns off thread safety checking within the body of a particular function. */
+#define NO_THREAD_SAFETY_ANALYSIS __attribute__((no_thread_safety_analysis))
+
+#else
+
+#define GUARDED_BY(x)
+#define PT_GUARDED_BY(x)
+#define LOCKABLE
+#define EXCLUSIVE_LOCK_FUNCTION(...)
+#define UNLOCK_FUNCTION(...)
+#define EXCLUSIVE_TRYLOCK_FUNCTION(...)
+#define EXCLUSIVE_LOCKS_REQUIRED(...)
+#define NO_THREAD_SAFETY_ANALYSIS
+
+#endif
+
+/*
+ * A wrapper around the mutex implementation that allows perf to error check
+ * usage, etc.
+ */
+struct LOCKABLE mutex {
+ pthread_mutex_t lock;
+};
+
+/* A wrapper around the condition variable implementation. */
+struct cond {
+ pthread_cond_t cond;
+};
+
+/* Default initialize the mtx struct. */
+void mutex_init(struct mutex *mtx);
+/*
+ * Initialize the mtx struct and set the process-shared rather than default
+ * process-private attribute.
+ */
+void mutex_init_pshared(struct mutex *mtx);
+void mutex_destroy(struct mutex *mtx);
+
+void mutex_lock(struct mutex *mtx) EXCLUSIVE_LOCK_FUNCTION(*mtx);
+void mutex_unlock(struct mutex *mtx) UNLOCK_FUNCTION(*mtx);
+/* Tries to acquire the lock and returns true on success. */
+bool mutex_trylock(struct mutex *mtx) EXCLUSIVE_TRYLOCK_FUNCTION(true, *mtx);
+
+/* Default initialize the cond struct. */
+void cond_init(struct cond *cnd);
+/*
+ * Initialize the cond struct and specify the process-shared rather than default
+ * process-private attribute.
+ */
+void cond_init_pshared(struct cond *cnd);
+void cond_destroy(struct cond *cnd);
+
+void cond_wait(struct cond *cnd, struct mutex *mtx) EXCLUSIVE_LOCKS_REQUIRED(mtx);
+void cond_signal(struct cond *cnd);
+void cond_broadcast(struct cond *cnd);
+
+#endif /* __PERF_MUTEX_H */
diff --git a/tools/perf/util/parse-branch-options.c b/tools/perf/util/parse-branch-options.c
index bb4aa88c50a8..31faf2bb49ff 100644
--- a/tools/perf/util/parse-branch-options.c
+++ b/tools/perf/util/parse-branch-options.c
@@ -32,6 +32,7 @@ static const struct branch_mode branch_modes[] = {
BRANCH_OPT("call", PERF_SAMPLE_BRANCH_CALL),
BRANCH_OPT("save_type", PERF_SAMPLE_BRANCH_TYPE_SAVE),
BRANCH_OPT("stack", PERF_SAMPLE_BRANCH_CALL_STACK),
+ BRANCH_OPT("priv", PERF_SAMPLE_BRANCH_PRIV_SAVE),
BRANCH_END
};
@@ -101,8 +102,10 @@ parse_branch_stack(const struct option *opt, const char *str, int unset)
/*
* cannot set it twice, -b + --branch-filter for instance
*/
- if (*mode)
+ if (*mode) {
+ pr_err("Error: Can't use --branch-any (-b) with --branch-filter (-j).\n");
return -1;
+ }
return parse_branch_str(str, mode);
}
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index f3b2c2a87456..5973f46c2375 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -150,14 +150,6 @@ struct event_symbol event_symbols_sw[PERF_COUNT_SW_MAX] = {
},
};
-#define __PERF_EVENT_FIELD(config, name) \
- ((config & PERF_EVENT_##name##_MASK) >> PERF_EVENT_##name##_SHIFT)
-
-#define PERF_EVENT_RAW(config) __PERF_EVENT_FIELD(config, RAW)
-#define PERF_EVENT_CONFIG(config) __PERF_EVENT_FIELD(config, CONFIG)
-#define PERF_EVENT_TYPE(config) __PERF_EVENT_FIELD(config, TYPE)
-#define PERF_EVENT_ID(config) __PERF_EVENT_FIELD(config, EVENT)
-
bool is_event_supported(u8 type, u64 config)
{
bool ret = true;
@@ -254,6 +246,9 @@ __add_event(struct list_head *list, int *idx,
struct perf_cpu_map *cpus = pmu ? perf_cpu_map__get(pmu->cpus) :
cpu_list ? perf_cpu_map__new(cpu_list) : NULL;
+ if (pmu)
+ perf_pmu__warn_invalid_formats(pmu);
+
if (pmu && attr->type == PERF_TYPE_RAW)
perf_pmu__warn_invalid_config(pmu, attr->config, name);
diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/perf_event_attr_fprintf.c
index 98af3fa4ea35..7e5e7b30510d 100644
--- a/tools/perf/util/perf_event_attr_fprintf.c
+++ b/tools/perf/util/perf_event_attr_fprintf.c
@@ -52,7 +52,7 @@ static void __p_branch_sample_type(char *buf, size_t size, u64 value)
bit_name(ABORT_TX), bit_name(IN_TX), bit_name(NO_TX),
bit_name(COND), bit_name(CALL_STACK), bit_name(IND_JUMP),
bit_name(CALL), bit_name(NO_FLAGS), bit_name(NO_CYCLES),
- bit_name(TYPE_SAVE), bit_name(HW_INDEX),
+ bit_name(TYPE_SAVE), bit_name(HW_INDEX), bit_name(PRIV_SAVE),
{ .name = NULL, }
};
#undef bit_name
@@ -64,7 +64,7 @@ static void __p_read_format(char *buf, size_t size, u64 value)
#define bit_name(n) { PERF_FORMAT_##n, #n }
struct bit_names bits[] = {
bit_name(TOTAL_TIME_ENABLED), bit_name(TOTAL_TIME_RUNNING),
- bit_name(ID), bit_name(GROUP),
+ bit_name(ID), bit_name(GROUP), bit_name(LOST),
{ .name = NULL, }
};
#undef bit_name
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 89655d53117a..03284059175f 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -1005,6 +1005,23 @@ err:
return NULL;
}
+void perf_pmu__warn_invalid_formats(struct perf_pmu *pmu)
+{
+ struct perf_pmu_format *format;
+
+ /* fake pmu doesn't have format list */
+ if (pmu == &perf_pmu__fake)
+ return;
+
+ list_for_each_entry(format, &pmu->format, list)
+ if (format->value >= PERF_PMU_FORMAT_VALUE_CONFIG_END) {
+ pr_warning("WARNING: '%s' format '%s' requires 'perf_event_attr::config%d'"
+ "which is not supported by this version of perf!\n",
+ pmu->name, format->name, format->value);
+ return;
+ }
+}
+
static struct perf_pmu *pmu_find(const char *name)
{
struct perf_pmu *pmu;
@@ -1182,7 +1199,7 @@ static char *pmu_formats_string(struct list_head *formats)
struct perf_pmu_format *format;
char *str = NULL;
struct strbuf buf = STRBUF_INIT;
- unsigned i = 0;
+ unsigned int i = 0;
if (!formats)
return NULL;
diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
index a7b0f9507510..68e15c38ae71 100644
--- a/tools/perf/util/pmu.h
+++ b/tools/perf/util/pmu.h
@@ -17,6 +17,7 @@ enum {
PERF_PMU_FORMAT_VALUE_CONFIG,
PERF_PMU_FORMAT_VALUE_CONFIG1,
PERF_PMU_FORMAT_VALUE_CONFIG2,
+ PERF_PMU_FORMAT_VALUE_CONFIG_END,
};
#define PERF_PMU_FORMAT_BITS 64
@@ -139,6 +140,7 @@ int perf_pmu__caps_parse(struct perf_pmu *pmu);
void perf_pmu__warn_invalid_config(struct perf_pmu *pmu, __u64 config,
const char *name);
+void perf_pmu__warn_invalid_formats(struct perf_pmu *pmu);
bool perf_pmu__has_hybrid(void);
int perf_pmu__match(char *pattern, char *name, char *tok);
diff --git a/tools/perf/util/pmu.l b/tools/perf/util/pmu.l
index a15d9fbd7c0e..58b4926cfaca 100644
--- a/tools/perf/util/pmu.l
+++ b/tools/perf/util/pmu.l
@@ -27,8 +27,6 @@ num_dec [0-9]+
{num_dec} { return value(10); }
config { return PP_CONFIG; }
-config1 { return PP_CONFIG1; }
-config2 { return PP_CONFIG2; }
- { return '-'; }
: { return ':'; }
, { return ','; }
diff --git a/tools/perf/util/pmu.y b/tools/perf/util/pmu.y
index bfd7e8509869..e675d79a0274 100644
--- a/tools/perf/util/pmu.y
+++ b/tools/perf/util/pmu.y
@@ -10,8 +10,6 @@
#include <string.h>
#include "pmu.h"
-extern int perf_pmu_lex (void);
-
#define ABORT_ON(val) \
do { \
if (val) \
@@ -20,7 +18,7 @@ do { \
%}
-%token PP_CONFIG PP_CONFIG1 PP_CONFIG2
+%token PP_CONFIG
%token PP_VALUE PP_ERROR
%type <num> PP_VALUE
%type <bits> bit_term
@@ -47,18 +45,11 @@ PP_CONFIG ':' bits
$3));
}
|
-PP_CONFIG1 ':' bits
+PP_CONFIG PP_VALUE ':' bits
{
ABORT_ON(perf_pmu__new_format(format, name,
- PERF_PMU_FORMAT_VALUE_CONFIG1,
- $3));
-}
-|
-PP_CONFIG2 ':' bits
-{
- ABORT_ON(perf_pmu__new_format(format, name,
- PERF_PMU_FORMAT_VALUE_CONFIG2,
- $3));
+ $2,
+ $4));
}
bits:
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 785246ff4179..0c24bc7afbca 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -29,6 +29,7 @@
#include "color.h"
#include "map.h"
#include "maps.h"
+#include "mutex.h"
#include "symbol.h"
#include <api/fs/fs.h>
#include "trace-event.h" /* For __maybe_unused */
@@ -180,8 +181,10 @@ struct map *get_target_map(const char *target, struct nsinfo *nsi, bool user)
map = dso__new_map(target);
if (map && map->dso) {
+ mutex_lock(&map->dso->lock);
nsinfo__put(map->dso->nsinfo);
map->dso->nsinfo = nsinfo__get(nsi);
+ mutex_unlock(&map->dso->lock);
}
return map;
} else {
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 192c9274f7ad..1a4f10de29ff 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -943,6 +943,11 @@ static void perf_event__cpu_map_swap(union perf_event *event,
default:
pr_err("cpu_map swap: unsupported long size\n");
}
+ break;
+ case PERF_CPU_MAP__RANGE_CPUS:
+ data->range_cpu_data.start_cpu = bswap_16(data->range_cpu_data.start_cpu);
+ data->range_cpu_data.end_cpu = bswap_16(data->range_cpu_data.end_cpu);
+ break;
default:
break;
}
@@ -1180,7 +1185,7 @@ static void branch_stack__printf(struct perf_sample *sample, bool callstack)
e->flags.abort ? "A" : " ",
e->flags.in_tx ? "T" : " ",
(unsigned)e->flags.reserved,
- e->flags.type ? branch_type_name(e->flags.type) : "");
+ get_branch_type(e));
} else {
if (i == 0) {
printf("..... %2"PRIu64": %016" PRIx64 "\n"
diff --git a/tools/perf/util/smt.c b/tools/perf/util/smt.c
index 2b0a36ebf27a..994e9e418227 100644
--- a/tools/perf/util/smt.c
+++ b/tools/perf/util/smt.c
@@ -1,99 +1,37 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <linux/bitops.h>
+// SPDX-License-Identifier: GPL-2.0-only
+#include <string.h>
#include "api/fs/fs.h"
+#include "cputopo.h"
#include "smt.h"
-/**
- * hweight_str - Returns the number of bits set in str. Stops at first non-hex
- * or ',' character.
- */
-static int hweight_str(char *str)
-{
- int result = 0;
-
- while (*str) {
- switch (*str++) {
- case '0':
- case ',':
- break;
- case '1':
- case '2':
- case '4':
- case '8':
- result++;
- break;
- case '3':
- case '5':
- case '6':
- case '9':
- case 'a':
- case 'A':
- case 'c':
- case 'C':
- result += 2;
- break;
- case '7':
- case 'b':
- case 'B':
- case 'd':
- case 'D':
- case 'e':
- case 'E':
- result += 3;
- break;
- case 'f':
- case 'F':
- result += 4;
- break;
- default:
- goto done;
- }
- }
-done:
- return result;
-}
-
-int smt_on(void)
+bool smt_on(const struct cpu_topology *topology)
{
static bool cached;
- static int cached_result;
- int cpu;
- int ncpu;
+ static bool cached_result;
+ int fs_value;
if (cached)
return cached_result;
- if (sysfs__read_int("devices/system/cpu/smt/active", &cached_result) >= 0) {
- cached = true;
- return cached_result;
- }
-
- cached_result = 0;
- ncpu = sysconf(_SC_NPROCESSORS_CONF);
- for (cpu = 0; cpu < ncpu; cpu++) {
- unsigned long long siblings;
- char *str;
- size_t strlen;
- char fn[256];
+ if (sysfs__read_int("devices/system/cpu/smt/active", &fs_value) >= 0)
+ cached_result = (fs_value == 1);
+ else
+ cached_result = cpu_topology__smt_on(topology);
- snprintf(fn, sizeof fn,
- "devices/system/cpu/cpu%d/topology/thread_siblings", cpu);
- if (sysfs__read_str(fn, &str, &strlen) < 0) {
- snprintf(fn, sizeof fn,
- "devices/system/cpu/cpu%d/topology/core_cpus", cpu);
- if (sysfs__read_str(fn, &str, &strlen) < 0)
- continue;
- }
- /* Entry is hex, but does not have 0x, so need custom parser */
- siblings = hweight_str(str);
- free(str);
- if (siblings > 1) {
- cached_result = 1;
- break;
- }
- }
cached = true;
return cached_result;
}
+
+bool core_wide(bool system_wide, const char *user_requested_cpu_list,
+ const struct cpu_topology *topology)
+{
+ /* If not everything running on a core is being recorded then we can't use core_wide. */
+ if (!system_wide)
+ return false;
+
+ /* Cheap case that SMT is disabled and therefore we're inherently core_wide. */
+ if (!smt_on(topology))
+ return true;
+
+ return cpu_topology__core_wide(topology, user_requested_cpu_list);
+}
diff --git a/tools/perf/util/smt.h b/tools/perf/util/smt.h
index b8414b7bcbc8..ae9095f2c38c 100644
--- a/tools/perf/util/smt.h
+++ b/tools/perf/util/smt.h
@@ -1,6 +1,17 @@
-#ifndef SMT_H
-#define SMT_H 1
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __SMT_H
+#define __SMT_H 1
-int smt_on(void);
+struct cpu_topology;
-#endif
+/* Returns true if SMT (aka hyperthreading) is enabled. */
+bool smt_on(const struct cpu_topology *topology);
+
+/*
+ * Returns true when system wide and all SMT threads for a core are in the
+ * user_requested_cpus map.
+ */
+bool core_wide(bool system_wide, const char *user_requested_cpu_list,
+ const struct cpu_topology *topology);
+
+#endif /* __SMT_H */
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 6d5588e80935..2e7330867e2e 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -1948,6 +1948,43 @@ struct sort_entry sort_dso_size = {
.se_width_idx = HISTC_DSO_SIZE,
};
+/* --sort dso_size */
+
+static int64_t
+sort__addr_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ u64 left_ip = left->ip;
+ u64 right_ip = right->ip;
+ struct map *left_map = left->ms.map;
+ struct map *right_map = right->ms.map;
+
+ if (left_map)
+ left_ip = left_map->unmap_ip(left_map, left_ip);
+ if (right_map)
+ right_ip = right_map->unmap_ip(right_map, right_ip);
+
+ return _sort__addr_cmp(left_ip, right_ip);
+}
+
+static int hist_entry__addr_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ u64 ip = he->ip;
+ struct map *map = he->ms.map;
+
+ if (map)
+ ip = map->unmap_ip(map, ip);
+
+ return repsep_snprintf(bf, size, "%-#*llx", width, ip);
+}
+
+struct sort_entry sort_addr = {
+ .se_header = "Address",
+ .se_cmp = sort__addr_cmp,
+ .se_snprintf = hist_entry__addr_snprintf,
+ .se_width_idx = HISTC_ADDR,
+};
+
struct sort_dimension {
const char *name;
@@ -1997,6 +2034,7 @@ static struct sort_dimension common_sort_dimensions[] = {
DIM(SORT_GLOBAL_INS_LAT, "ins_lat", sort_global_ins_lat),
DIM(SORT_LOCAL_PIPELINE_STAGE_CYC, "local_p_stage_cyc", sort_local_p_stage_cyc),
DIM(SORT_GLOBAL_PIPELINE_STAGE_CYC, "p_stage_cyc", sort_global_p_stage_cyc),
+ DIM(SORT_ADDR, "addr", sort_addr),
};
#undef DIM
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 2ddc00d1c464..04ff8b61a2a7 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -34,7 +34,6 @@ extern struct sort_entry sort_dso_to;
extern struct sort_entry sort_sym_from;
extern struct sort_entry sort_sym_to;
extern struct sort_entry sort_srcline;
-extern enum sort_type sort__first_dimension;
extern const char default_mem_sort_order[];
struct res_sample {
@@ -237,6 +236,7 @@ enum sort_type {
SORT_GLOBAL_INS_LAT,
SORT_LOCAL_PIPELINE_STAGE_CYC,
SORT_GLOBAL_PIPELINE_STAGE_CYC,
+ SORT_ADDR,
/* branch stack specific sort keys */
__SORT_BRANCH_STACK,
@@ -295,7 +295,6 @@ struct block_hist {
};
extern struct sort_entry sort_thread;
-extern struct list_head hist_entry__sort_list;
struct evlist;
struct tep_handle;
diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index b82844cb0ce7..ba66bb7fc1ca 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -67,7 +67,7 @@ static void print_noise(struct perf_stat_config *config,
return;
ps = evsel->stats;
- print_noise_pct(config, stddev_stats(&ps->res_stats[0]), avg);
+ print_noise_pct(config, stddev_stats(&ps->res_stats), avg);
}
static void print_cgroup(struct perf_stat_config *config, struct evsel *evsel)
@@ -168,7 +168,7 @@ static void aggr_printout(struct perf_stat_config *config,
id.socket,
id.die,
id.core);
- } else if (id.core > -1) {
+ } else if (id.cpu.cpu > -1) {
fprintf(config->output, "\"cpu\" : \"%d\", ",
id.cpu.cpu);
}
@@ -179,7 +179,7 @@ static void aggr_printout(struct perf_stat_config *config,
id.die,
config->csv_output ? 0 : -3,
id.core, config->csv_sep);
- } else if (id.core > -1) {
+ } else if (id.cpu.cpu > -1) {
fprintf(config->output, "CPU%*d%s",
config->csv_output ? 0 : -7,
id.cpu.cpu, config->csv_sep);
@@ -189,14 +189,14 @@ static void aggr_printout(struct perf_stat_config *config,
case AGGR_THREAD:
if (config->json_output) {
fprintf(config->output, "\"thread\" : \"%s-%d\", ",
- perf_thread_map__comm(evsel->core.threads, id.thread),
- perf_thread_map__pid(evsel->core.threads, id.thread));
+ perf_thread_map__comm(evsel->core.threads, id.thread_idx),
+ perf_thread_map__pid(evsel->core.threads, id.thread_idx));
} else {
fprintf(config->output, "%*s-%*d%s",
config->csv_output ? 0 : 16,
- perf_thread_map__comm(evsel->core.threads, id.thread),
+ perf_thread_map__comm(evsel->core.threads, id.thread_idx),
config->csv_output ? 0 : -8,
- perf_thread_map__pid(evsel->core.threads, id.thread),
+ perf_thread_map__pid(evsel->core.threads, id.thread_idx),
config->csv_sep);
}
break;
@@ -273,7 +273,7 @@ static void new_line_csv(struct perf_stat_config *config, void *ctx)
fputc('\n', os->fh);
if (os->prefix)
- fprintf(os->fh, "%s%s", os->prefix, config->csv_sep);
+ fprintf(os->fh, "%s", os->prefix);
aggr_printout(config, os->evsel, os->id, os->nr);
for (i = 0; i < os->nfields; i++)
fputs(config->csv_sep, os->fh);
@@ -442,7 +442,7 @@ static void print_metric_header(struct perf_stat_config *config,
fprintf(os->fh, "%*s ", config->metric_only_len, unit);
}
-static int first_shadow_cpu_map_idx(struct perf_stat_config *config,
+static int first_shadow_map_idx(struct perf_stat_config *config,
struct evsel *evsel, const struct aggr_cpu_id *id)
{
struct perf_cpu_map *cpus = evsel__cpus(evsel);
@@ -452,6 +452,9 @@ static int first_shadow_cpu_map_idx(struct perf_stat_config *config,
if (config->aggr_mode == AGGR_NONE)
return perf_cpu_map__idx(cpus, id->cpu);
+ if (config->aggr_mode == AGGR_THREAD)
+ return id->thread_idx;
+
if (!config->aggr_get_id)
return 0;
@@ -556,7 +559,7 @@ static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int
[AGGR_CORE] = 2,
[AGGR_THREAD] = 1,
[AGGR_UNSET] = 0,
- [AGGR_NODE] = 0,
+ [AGGR_NODE] = 1,
};
pm = config->metric_only ? print_metric_only_csv : print_metric_csv;
@@ -646,7 +649,7 @@ static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int
}
perf_stat__print_shadow_stats(config, counter, uval,
- first_shadow_cpu_map_idx(config, counter, &id),
+ first_shadow_map_idx(config, counter, &id),
&out, &config->metric_events, st);
if (!config->csv_output && !config->metric_only && !config->json_output) {
print_noise(config, counter, noise);
@@ -676,7 +679,7 @@ static void aggr_update_shadow(struct perf_stat_config *config,
val += perf_counts(counter->counts, idx, 0)->val;
}
perf_stat__update_shadow_stats(counter, val,
- first_shadow_cpu_map_idx(config, counter, &id),
+ first_shadow_map_idx(config, counter, &id),
&rt_stat);
}
}
@@ -943,7 +946,7 @@ static struct perf_aggr_thread_value *sort_aggr_thread(
buf[i].counter = counter;
buf[i].id = aggr_cpu_id__empty();
- buf[i].id.thread = thread;
+ buf[i].id.thread_idx = thread;
buf[i].uval = uval;
buf[i].val = val;
buf[i].run = run;
@@ -979,14 +982,9 @@ static void print_aggr_thread(struct perf_stat_config *config,
fprintf(output, "%s", prefix);
id = buf[thread].id;
- if (config->stats)
- printout(config, id, 0, buf[thread].counter, buf[thread].uval,
- prefix, buf[thread].run, buf[thread].ena, 1.0,
- &config->stats[id.thread]);
- else
- printout(config, id, 0, buf[thread].counter, buf[thread].uval,
- prefix, buf[thread].run, buf[thread].ena, 1.0,
- &rt_stat);
+ printout(config, id, 0, buf[thread].counter, buf[thread].uval,
+ prefix, buf[thread].run, buf[thread].ena, 1.0,
+ &rt_stat);
fputc('\n', output);
}
@@ -1126,6 +1124,7 @@ static int aggr_header_lens[] = {
[AGGR_SOCKET] = 12,
[AGGR_NONE] = 6,
[AGGR_THREAD] = 24,
+ [AGGR_NODE] = 6,
[AGGR_GLOBAL] = 0,
};
@@ -1135,6 +1134,7 @@ static const char *aggr_header_csv[] = {
[AGGR_SOCKET] = "socket,cpus",
[AGGR_NONE] = "cpu,",
[AGGR_THREAD] = "comm-pid,",
+ [AGGR_NODE] = "node,",
[AGGR_GLOBAL] = ""
};
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index 788ce5e46470..07b29fe272c7 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -33,9 +33,8 @@ struct saved_value {
struct evsel *evsel;
enum stat_type type;
int ctx;
- int cpu_map_idx;
+ int map_idx; /* cpu or thread map index */
struct cgroup *cgrp;
- struct runtime_stat *stat;
struct stats stats;
u64 metric_total;
int metric_other;
@@ -48,8 +47,8 @@ static int saved_value_cmp(struct rb_node *rb_node, const void *entry)
rb_node);
const struct saved_value *b = entry;
- if (a->cpu_map_idx != b->cpu_map_idx)
- return a->cpu_map_idx - b->cpu_map_idx;
+ if (a->map_idx != b->map_idx)
+ return a->map_idx - b->map_idx;
/*
* Previously the rbtree was used to link generic metrics.
@@ -67,16 +66,6 @@ static int saved_value_cmp(struct rb_node *rb_node, const void *entry)
if (a->cgrp != b->cgrp)
return (char *)a->cgrp < (char *)b->cgrp ? -1 : +1;
- if (a->evsel == NULL && b->evsel == NULL) {
- if (a->stat == b->stat)
- return 0;
-
- if ((char *)a->stat < (char *)b->stat)
- return -1;
-
- return 1;
- }
-
if (a->evsel == b->evsel)
return 0;
if ((char *)a->evsel < (char *)b->evsel)
@@ -106,7 +95,7 @@ static void saved_value_delete(struct rblist *rblist __maybe_unused,
}
static struct saved_value *saved_value_lookup(struct evsel *evsel,
- int cpu_map_idx,
+ int map_idx,
bool create,
enum stat_type type,
int ctx,
@@ -116,11 +105,10 @@ static struct saved_value *saved_value_lookup(struct evsel *evsel,
struct rblist *rblist;
struct rb_node *nd;
struct saved_value dm = {
- .cpu_map_idx = cpu_map_idx,
+ .map_idx = map_idx,
.evsel = evsel,
.type = type,
.ctx = ctx,
- .stat = st,
.cgrp = cgrp,
};
@@ -215,10 +203,10 @@ struct runtime_stat_data {
static void update_runtime_stat(struct runtime_stat *st,
enum stat_type type,
- int cpu_map_idx, u64 count,
+ int map_idx, u64 count,
struct runtime_stat_data *rsd)
{
- struct saved_value *v = saved_value_lookup(NULL, cpu_map_idx, true, type,
+ struct saved_value *v = saved_value_lookup(NULL, map_idx, true, type,
rsd->ctx, st, rsd->cgrp);
if (v)
@@ -231,7 +219,7 @@ static void update_runtime_stat(struct runtime_stat *st,
* instruction rates, etc:
*/
void perf_stat__update_shadow_stats(struct evsel *counter, u64 count,
- int cpu_map_idx, struct runtime_stat *st)
+ int map_idx, struct runtime_stat *st)
{
u64 count_ns = count;
struct saved_value *v;
@@ -243,88 +231,88 @@ void perf_stat__update_shadow_stats(struct evsel *counter, u64 count,
count *= counter->scale;
if (evsel__is_clock(counter))
- update_runtime_stat(st, STAT_NSECS, cpu_map_idx, count_ns, &rsd);
+ update_runtime_stat(st, STAT_NSECS, map_idx, count_ns, &rsd);
else if (evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
- update_runtime_stat(st, STAT_CYCLES, cpu_map_idx, count, &rsd);
+ update_runtime_stat(st, STAT_CYCLES, map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, CYCLES_IN_TX))
- update_runtime_stat(st, STAT_CYCLES_IN_TX, cpu_map_idx, count, &rsd);
+ update_runtime_stat(st, STAT_CYCLES_IN_TX, map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, TRANSACTION_START))
- update_runtime_stat(st, STAT_TRANSACTION, cpu_map_idx, count, &rsd);
+ update_runtime_stat(st, STAT_TRANSACTION, map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, ELISION_START))
- update_runtime_stat(st, STAT_ELISION, cpu_map_idx, count, &rsd);
+ update_runtime_stat(st, STAT_ELISION, map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, TOPDOWN_TOTAL_SLOTS))
update_runtime_stat(st, STAT_TOPDOWN_TOTAL_SLOTS,
- cpu_map_idx, count, &rsd);
+ map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_ISSUED))
update_runtime_stat(st, STAT_TOPDOWN_SLOTS_ISSUED,
- cpu_map_idx, count, &rsd);
+ map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_RETIRED))
update_runtime_stat(st, STAT_TOPDOWN_SLOTS_RETIRED,
- cpu_map_idx, count, &rsd);
+ map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_BUBBLES))
update_runtime_stat(st, STAT_TOPDOWN_FETCH_BUBBLES,
- cpu_map_idx, count, &rsd);
+ map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES))
update_runtime_stat(st, STAT_TOPDOWN_RECOVERY_BUBBLES,
- cpu_map_idx, count, &rsd);
+ map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, TOPDOWN_RETIRING))
update_runtime_stat(st, STAT_TOPDOWN_RETIRING,
- cpu_map_idx, count, &rsd);
+ map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, TOPDOWN_BAD_SPEC))
update_runtime_stat(st, STAT_TOPDOWN_BAD_SPEC,
- cpu_map_idx, count, &rsd);
+ map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, TOPDOWN_FE_BOUND))
update_runtime_stat(st, STAT_TOPDOWN_FE_BOUND,
- cpu_map_idx, count, &rsd);
+ map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, TOPDOWN_BE_BOUND))
update_runtime_stat(st, STAT_TOPDOWN_BE_BOUND,
- cpu_map_idx, count, &rsd);
+ map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, TOPDOWN_HEAVY_OPS))
update_runtime_stat(st, STAT_TOPDOWN_HEAVY_OPS,
- cpu_map_idx, count, &rsd);
+ map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, TOPDOWN_BR_MISPREDICT))
update_runtime_stat(st, STAT_TOPDOWN_BR_MISPREDICT,
- cpu_map_idx, count, &rsd);
+ map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_LAT))
update_runtime_stat(st, STAT_TOPDOWN_FETCH_LAT,
- cpu_map_idx, count, &rsd);
+ map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, TOPDOWN_MEM_BOUND))
update_runtime_stat(st, STAT_TOPDOWN_MEM_BOUND,
- cpu_map_idx, count, &rsd);
+ map_idx, count, &rsd);
else if (evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
update_runtime_stat(st, STAT_STALLED_CYCLES_FRONT,
- cpu_map_idx, count, &rsd);
+ map_idx, count, &rsd);
else if (evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
update_runtime_stat(st, STAT_STALLED_CYCLES_BACK,
- cpu_map_idx, count, &rsd);
+ map_idx, count, &rsd);
else if (evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
- update_runtime_stat(st, STAT_BRANCHES, cpu_map_idx, count, &rsd);
+ update_runtime_stat(st, STAT_BRANCHES, map_idx, count, &rsd);
else if (evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
- update_runtime_stat(st, STAT_CACHEREFS, cpu_map_idx, count, &rsd);
+ update_runtime_stat(st, STAT_CACHEREFS, map_idx, count, &rsd);
else if (evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
- update_runtime_stat(st, STAT_L1_DCACHE, cpu_map_idx, count, &rsd);
+ update_runtime_stat(st, STAT_L1_DCACHE, map_idx, count, &rsd);
else if (evsel__match(counter, HW_CACHE, HW_CACHE_L1I))
- update_runtime_stat(st, STAT_L1_ICACHE, cpu_map_idx, count, &rsd);
+ update_runtime_stat(st, STAT_L1_ICACHE, map_idx, count, &rsd);
else if (evsel__match(counter, HW_CACHE, HW_CACHE_LL))
- update_runtime_stat(st, STAT_LL_CACHE, cpu_map_idx, count, &rsd);
+ update_runtime_stat(st, STAT_LL_CACHE, map_idx, count, &rsd);
else if (evsel__match(counter, HW_CACHE, HW_CACHE_DTLB))
- update_runtime_stat(st, STAT_DTLB_CACHE, cpu_map_idx, count, &rsd);
+ update_runtime_stat(st, STAT_DTLB_CACHE, map_idx, count, &rsd);
else if (evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
- update_runtime_stat(st, STAT_ITLB_CACHE, cpu_map_idx, count, &rsd);
+ update_runtime_stat(st, STAT_ITLB_CACHE, map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, SMI_NUM))
- update_runtime_stat(st, STAT_SMI_NUM, cpu_map_idx, count, &rsd);
+ update_runtime_stat(st, STAT_SMI_NUM, map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, APERF))
- update_runtime_stat(st, STAT_APERF, cpu_map_idx, count, &rsd);
+ update_runtime_stat(st, STAT_APERF, map_idx, count, &rsd);
if (counter->collect_stat) {
- v = saved_value_lookup(counter, cpu_map_idx, true, STAT_NONE, 0, st,
+ v = saved_value_lookup(counter, map_idx, true, STAT_NONE, 0, st,
rsd.cgrp);
update_stats(&v->stats, count);
if (counter->metric_leader)
v->metric_total += count;
} else if (counter->metric_leader) {
v = saved_value_lookup(counter->metric_leader,
- cpu_map_idx, true, STAT_NONE, 0, st, rsd.cgrp);
+ map_idx, true, STAT_NONE, 0, st, rsd.cgrp);
v->metric_total += count;
v->metric_other++;
}
@@ -466,12 +454,12 @@ void perf_stat__collect_metric_expr(struct evlist *evsel_list)
}
static double runtime_stat_avg(struct runtime_stat *st,
- enum stat_type type, int cpu_map_idx,
+ enum stat_type type, int map_idx,
struct runtime_stat_data *rsd)
{
struct saved_value *v;
- v = saved_value_lookup(NULL, cpu_map_idx, false, type, rsd->ctx, st, rsd->cgrp);
+ v = saved_value_lookup(NULL, map_idx, false, type, rsd->ctx, st, rsd->cgrp);
if (!v)
return 0.0;
@@ -479,12 +467,12 @@ static double runtime_stat_avg(struct runtime_stat *st,
}
static double runtime_stat_n(struct runtime_stat *st,
- enum stat_type type, int cpu_map_idx,
+ enum stat_type type, int map_idx,
struct runtime_stat_data *rsd)
{
struct saved_value *v;
- v = saved_value_lookup(NULL, cpu_map_idx, false, type, rsd->ctx, st, rsd->cgrp);
+ v = saved_value_lookup(NULL, map_idx, false, type, rsd->ctx, st, rsd->cgrp);
if (!v)
return 0.0;
@@ -492,7 +480,7 @@ static double runtime_stat_n(struct runtime_stat *st,
}
static void print_stalled_cycles_frontend(struct perf_stat_config *config,
- int cpu_map_idx, double avg,
+ int map_idx, double avg,
struct perf_stat_output_ctx *out,
struct runtime_stat *st,
struct runtime_stat_data *rsd)
@@ -500,7 +488,7 @@ static void print_stalled_cycles_frontend(struct perf_stat_config *config,
double total, ratio = 0.0;
const char *color;
- total = runtime_stat_avg(st, STAT_CYCLES, cpu_map_idx, rsd);
+ total = runtime_stat_avg(st, STAT_CYCLES, map_idx, rsd);
if (total)
ratio = avg / total * 100.0;
@@ -515,7 +503,7 @@ static void print_stalled_cycles_frontend(struct perf_stat_config *config,
}
static void print_stalled_cycles_backend(struct perf_stat_config *config,
- int cpu_map_idx, double avg,
+ int map_idx, double avg,
struct perf_stat_output_ctx *out,
struct runtime_stat *st,
struct runtime_stat_data *rsd)
@@ -523,7 +511,7 @@ static void print_stalled_cycles_backend(struct perf_stat_config *config,
double total, ratio = 0.0;
const char *color;
- total = runtime_stat_avg(st, STAT_CYCLES, cpu_map_idx, rsd);
+ total = runtime_stat_avg(st, STAT_CYCLES, map_idx, rsd);
if (total)
ratio = avg / total * 100.0;
@@ -534,7 +522,7 @@ static void print_stalled_cycles_backend(struct perf_stat_config *config,
}
static void print_branch_misses(struct perf_stat_config *config,
- int cpu_map_idx, double avg,
+ int map_idx, double avg,
struct perf_stat_output_ctx *out,
struct runtime_stat *st,
struct runtime_stat_data *rsd)
@@ -542,7 +530,7 @@ static void print_branch_misses(struct perf_stat_config *config,
double total, ratio = 0.0;
const char *color;
- total = runtime_stat_avg(st, STAT_BRANCHES, cpu_map_idx, rsd);
+ total = runtime_stat_avg(st, STAT_BRANCHES, map_idx, rsd);
if (total)
ratio = avg / total * 100.0;
@@ -553,7 +541,7 @@ static void print_branch_misses(struct perf_stat_config *config,
}
static void print_l1_dcache_misses(struct perf_stat_config *config,
- int cpu_map_idx, double avg,
+ int map_idx, double avg,
struct perf_stat_output_ctx *out,
struct runtime_stat *st,
struct runtime_stat_data *rsd)
@@ -561,7 +549,7 @@ static void print_l1_dcache_misses(struct perf_stat_config *config,
double total, ratio = 0.0;
const char *color;
- total = runtime_stat_avg(st, STAT_L1_DCACHE, cpu_map_idx, rsd);
+ total = runtime_stat_avg(st, STAT_L1_DCACHE, map_idx, rsd);
if (total)
ratio = avg / total * 100.0;
@@ -572,7 +560,7 @@ static void print_l1_dcache_misses(struct perf_stat_config *config,
}
static void print_l1_icache_misses(struct perf_stat_config *config,
- int cpu_map_idx, double avg,
+ int map_idx, double avg,
struct perf_stat_output_ctx *out,
struct runtime_stat *st,
struct runtime_stat_data *rsd)
@@ -580,7 +568,7 @@ static void print_l1_icache_misses(struct perf_stat_config *config,
double total, ratio = 0.0;
const char *color;
- total = runtime_stat_avg(st, STAT_L1_ICACHE, cpu_map_idx, rsd);
+ total = runtime_stat_avg(st, STAT_L1_ICACHE, map_idx, rsd);
if (total)
ratio = avg / total * 100.0;
@@ -590,7 +578,7 @@ static void print_l1_icache_misses(struct perf_stat_config *config,
}
static void print_dtlb_cache_misses(struct perf_stat_config *config,
- int cpu_map_idx, double avg,
+ int map_idx, double avg,
struct perf_stat_output_ctx *out,
struct runtime_stat *st,
struct runtime_stat_data *rsd)
@@ -598,7 +586,7 @@ static void print_dtlb_cache_misses(struct perf_stat_config *config,
double total, ratio = 0.0;
const char *color;
- total = runtime_stat_avg(st, STAT_DTLB_CACHE, cpu_map_idx, rsd);
+ total = runtime_stat_avg(st, STAT_DTLB_CACHE, map_idx, rsd);
if (total)
ratio = avg / total * 100.0;
@@ -608,7 +596,7 @@ static void print_dtlb_cache_misses(struct perf_stat_config *config,
}
static void print_itlb_cache_misses(struct perf_stat_config *config,
- int cpu_map_idx, double avg,
+ int map_idx, double avg,
struct perf_stat_output_ctx *out,
struct runtime_stat *st,
struct runtime_stat_data *rsd)
@@ -616,7 +604,7 @@ static void print_itlb_cache_misses(struct perf_stat_config *config,
double total, ratio = 0.0;
const char *color;
- total = runtime_stat_avg(st, STAT_ITLB_CACHE, cpu_map_idx, rsd);
+ total = runtime_stat_avg(st, STAT_ITLB_CACHE, map_idx, rsd);
if (total)
ratio = avg / total * 100.0;
@@ -626,7 +614,7 @@ static void print_itlb_cache_misses(struct perf_stat_config *config,
}
static void print_ll_cache_misses(struct perf_stat_config *config,
- int cpu_map_idx, double avg,
+ int map_idx, double avg,
struct perf_stat_output_ctx *out,
struct runtime_stat *st,
struct runtime_stat_data *rsd)
@@ -634,7 +622,7 @@ static void print_ll_cache_misses(struct perf_stat_config *config,
double total, ratio = 0.0;
const char *color;
- total = runtime_stat_avg(st, STAT_LL_CACHE, cpu_map_idx, rsd);
+ total = runtime_stat_avg(st, STAT_LL_CACHE, map_idx, rsd);
if (total)
ratio = avg / total * 100.0;
@@ -692,61 +680,61 @@ static double sanitize_val(double x)
return x;
}
-static double td_total_slots(int cpu_map_idx, struct runtime_stat *st,
+static double td_total_slots(int map_idx, struct runtime_stat *st,
struct runtime_stat_data *rsd)
{
- return runtime_stat_avg(st, STAT_TOPDOWN_TOTAL_SLOTS, cpu_map_idx, rsd);
+ return runtime_stat_avg(st, STAT_TOPDOWN_TOTAL_SLOTS, map_idx, rsd);
}
-static double td_bad_spec(int cpu_map_idx, struct runtime_stat *st,
+static double td_bad_spec(int map_idx, struct runtime_stat *st,
struct runtime_stat_data *rsd)
{
double bad_spec = 0;
double total_slots;
double total;
- total = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_ISSUED, cpu_map_idx, rsd) -
- runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, cpu_map_idx, rsd) +
- runtime_stat_avg(st, STAT_TOPDOWN_RECOVERY_BUBBLES, cpu_map_idx, rsd);
+ total = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_ISSUED, map_idx, rsd) -
+ runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, map_idx, rsd) +
+ runtime_stat_avg(st, STAT_TOPDOWN_RECOVERY_BUBBLES, map_idx, rsd);
- total_slots = td_total_slots(cpu_map_idx, st, rsd);
+ total_slots = td_total_slots(map_idx, st, rsd);
if (total_slots)
bad_spec = total / total_slots;
return sanitize_val(bad_spec);
}
-static double td_retiring(int cpu_map_idx, struct runtime_stat *st,
+static double td_retiring(int map_idx, struct runtime_stat *st,
struct runtime_stat_data *rsd)
{
double retiring = 0;
- double total_slots = td_total_slots(cpu_map_idx, st, rsd);
+ double total_slots = td_total_slots(map_idx, st, rsd);
double ret_slots = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED,
- cpu_map_idx, rsd);
+ map_idx, rsd);
if (total_slots)
retiring = ret_slots / total_slots;
return retiring;
}
-static double td_fe_bound(int cpu_map_idx, struct runtime_stat *st,
+static double td_fe_bound(int map_idx, struct runtime_stat *st,
struct runtime_stat_data *rsd)
{
double fe_bound = 0;
- double total_slots = td_total_slots(cpu_map_idx, st, rsd);
+ double total_slots = td_total_slots(map_idx, st, rsd);
double fetch_bub = runtime_stat_avg(st, STAT_TOPDOWN_FETCH_BUBBLES,
- cpu_map_idx, rsd);
+ map_idx, rsd);
if (total_slots)
fe_bound = fetch_bub / total_slots;
return fe_bound;
}
-static double td_be_bound(int cpu_map_idx, struct runtime_stat *st,
+static double td_be_bound(int map_idx, struct runtime_stat *st,
struct runtime_stat_data *rsd)
{
- double sum = (td_fe_bound(cpu_map_idx, st, rsd) +
- td_bad_spec(cpu_map_idx, st, rsd) +
- td_retiring(cpu_map_idx, st, rsd));
+ double sum = (td_fe_bound(map_idx, st, rsd) +
+ td_bad_spec(map_idx, st, rsd) +
+ td_retiring(map_idx, st, rsd));
if (sum == 0)
return 0;
return sanitize_val(1.0 - sum);
@@ -757,15 +745,15 @@ static double td_be_bound(int cpu_map_idx, struct runtime_stat *st,
* the ratios we need to recreate the sum.
*/
-static double td_metric_ratio(int cpu_map_idx, enum stat_type type,
+static double td_metric_ratio(int map_idx, enum stat_type type,
struct runtime_stat *stat,
struct runtime_stat_data *rsd)
{
- double sum = runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, cpu_map_idx, rsd) +
- runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, cpu_map_idx, rsd) +
- runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, cpu_map_idx, rsd) +
- runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, cpu_map_idx, rsd);
- double d = runtime_stat_avg(stat, type, cpu_map_idx, rsd);
+ double sum = runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, map_idx, rsd) +
+ runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, map_idx, rsd) +
+ runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, map_idx, rsd) +
+ runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, map_idx, rsd);
+ double d = runtime_stat_avg(stat, type, map_idx, rsd);
if (sum)
return d / sum;
@@ -777,23 +765,23 @@ static double td_metric_ratio(int cpu_map_idx, enum stat_type type,
* We allow two missing.
*/
-static bool full_td(int cpu_map_idx, struct runtime_stat *stat,
+static bool full_td(int map_idx, struct runtime_stat *stat,
struct runtime_stat_data *rsd)
{
int c = 0;
- if (runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, cpu_map_idx, rsd) > 0)
+ if (runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, map_idx, rsd) > 0)
c++;
- if (runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, cpu_map_idx, rsd) > 0)
+ if (runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, map_idx, rsd) > 0)
c++;
- if (runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, cpu_map_idx, rsd) > 0)
+ if (runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, map_idx, rsd) > 0)
c++;
- if (runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, cpu_map_idx, rsd) > 0)
+ if (runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, map_idx, rsd) > 0)
c++;
return c >= 2;
}
-static void print_smi_cost(struct perf_stat_config *config, int cpu_map_idx,
+static void print_smi_cost(struct perf_stat_config *config, int map_idx,
struct perf_stat_output_ctx *out,
struct runtime_stat *st,
struct runtime_stat_data *rsd)
@@ -801,9 +789,9 @@ static void print_smi_cost(struct perf_stat_config *config, int cpu_map_idx,
double smi_num, aperf, cycles, cost = 0.0;
const char *color = NULL;
- smi_num = runtime_stat_avg(st, STAT_SMI_NUM, cpu_map_idx, rsd);
- aperf = runtime_stat_avg(st, STAT_APERF, cpu_map_idx, rsd);
- cycles = runtime_stat_avg(st, STAT_CYCLES, cpu_map_idx, rsd);
+ smi_num = runtime_stat_avg(st, STAT_SMI_NUM, map_idx, rsd);
+ aperf = runtime_stat_avg(st, STAT_APERF, map_idx, rsd);
+ cycles = runtime_stat_avg(st, STAT_CYCLES, map_idx, rsd);
if ((cycles == 0) || (aperf == 0))
return;
@@ -820,7 +808,7 @@ static void print_smi_cost(struct perf_stat_config *config, int cpu_map_idx,
static int prepare_metric(struct evsel **metric_events,
struct metric_ref *metric_refs,
struct expr_parse_ctx *pctx,
- int cpu_map_idx,
+ int map_idx,
struct runtime_stat *st)
{
double scale;
@@ -859,17 +847,22 @@ static int prepare_metric(struct evsel **metric_events,
abort();
}
} else {
- v = saved_value_lookup(metric_events[i], cpu_map_idx, false,
+ v = saved_value_lookup(metric_events[i], map_idx, false,
STAT_NONE, 0, st,
metric_events[i]->cgrp);
if (!v)
break;
stats = &v->stats;
- scale = 1.0;
+ /*
+ * If an event was scaled during stat gathering, reverse
+ * the scale before computing the metric.
+ */
+ scale = 1.0 / metric_events[i]->scale;
+
source_count = evsel__source_count(metric_events[i]);
if (v->metric_other)
- metric_total = v->metric_total;
+ metric_total = v->metric_total * scale;
}
n = strdup(evsel__metric_id(metric_events[i]));
if (!n)
@@ -897,7 +890,7 @@ static void generic_metric(struct perf_stat_config *config,
const char *metric_name,
const char *metric_unit,
int runtime,
- int cpu_map_idx,
+ int map_idx,
struct perf_stat_output_ctx *out,
struct runtime_stat *st)
{
@@ -911,8 +904,11 @@ static void generic_metric(struct perf_stat_config *config,
if (!pctx)
return;
- pctx->runtime = runtime;
- i = prepare_metric(metric_events, metric_refs, pctx, cpu_map_idx, st);
+ if (config->user_requested_cpu_list)
+ pctx->sctx.user_requested_cpu_list = strdup(config->user_requested_cpu_list);
+ pctx->sctx.runtime = runtime;
+ pctx->sctx.system_wide = config->system_wide;
+ i = prepare_metric(metric_events, metric_refs, pctx, map_idx, st);
if (i < 0) {
expr__ctx_free(pctx);
return;
@@ -957,7 +953,7 @@ static void generic_metric(struct perf_stat_config *config,
expr__ctx_free(pctx);
}
-double test_generic_metric(struct metric_expr *mexp, int cpu_map_idx, struct runtime_stat *st)
+double test_generic_metric(struct metric_expr *mexp, int map_idx, struct runtime_stat *st)
{
struct expr_parse_ctx *pctx;
double ratio = 0.0;
@@ -966,7 +962,7 @@ double test_generic_metric(struct metric_expr *mexp, int cpu_map_idx, struct run
if (!pctx)
return NAN;
- if (prepare_metric(mexp->metric_events, mexp->metric_refs, pctx, cpu_map_idx, st) < 0)
+ if (prepare_metric(mexp->metric_events, mexp->metric_refs, pctx, map_idx, st) < 0)
goto out;
if (expr__parse(&ratio, pctx, mexp->metric_expr))
@@ -979,7 +975,7 @@ out:
void perf_stat__print_shadow_stats(struct perf_stat_config *config,
struct evsel *evsel,
- double avg, int cpu_map_idx,
+ double avg, int map_idx,
struct perf_stat_output_ctx *out,
struct rblist *metric_events,
struct runtime_stat *st)
@@ -998,7 +994,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
if (config->iostat_run) {
iostat_print_metric(config, evsel, out);
} else if (evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
- total = runtime_stat_avg(st, STAT_CYCLES, cpu_map_idx, &rsd);
+ total = runtime_stat_avg(st, STAT_CYCLES, map_idx, &rsd);
if (total) {
ratio = avg / total;
@@ -1008,11 +1004,11 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
print_metric(config, ctxp, NULL, NULL, "insn per cycle", 0);
}
- total = runtime_stat_avg(st, STAT_STALLED_CYCLES_FRONT, cpu_map_idx, &rsd);
+ total = runtime_stat_avg(st, STAT_STALLED_CYCLES_FRONT, map_idx, &rsd);
total = max(total, runtime_stat_avg(st,
STAT_STALLED_CYCLES_BACK,
- cpu_map_idx, &rsd));
+ map_idx, &rsd));
if (total && avg) {
out->new_line(config, ctxp);
@@ -1022,8 +1018,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
ratio);
}
} else if (evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) {
- if (runtime_stat_n(st, STAT_BRANCHES, cpu_map_idx, &rsd) != 0)
- print_branch_misses(config, cpu_map_idx, avg, out, st, &rsd);
+ if (runtime_stat_n(st, STAT_BRANCHES, map_idx, &rsd) != 0)
+ print_branch_misses(config, map_idx, avg, out, st, &rsd);
else
print_metric(config, ctxp, NULL, NULL, "of all branches", 0);
} else if (
@@ -1032,8 +1028,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
- if (runtime_stat_n(st, STAT_L1_DCACHE, cpu_map_idx, &rsd) != 0)
- print_l1_dcache_misses(config, cpu_map_idx, avg, out, st, &rsd);
+ if (runtime_stat_n(st, STAT_L1_DCACHE, map_idx, &rsd) != 0)
+ print_l1_dcache_misses(config, map_idx, avg, out, st, &rsd);
else
print_metric(config, ctxp, NULL, NULL, "of all L1-dcache accesses", 0);
} else if (
@@ -1042,8 +1038,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
- if (runtime_stat_n(st, STAT_L1_ICACHE, cpu_map_idx, &rsd) != 0)
- print_l1_icache_misses(config, cpu_map_idx, avg, out, st, &rsd);
+ if (runtime_stat_n(st, STAT_L1_ICACHE, map_idx, &rsd) != 0)
+ print_l1_icache_misses(config, map_idx, avg, out, st, &rsd);
else
print_metric(config, ctxp, NULL, NULL, "of all L1-icache accesses", 0);
} else if (
@@ -1052,8 +1048,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
- if (runtime_stat_n(st, STAT_DTLB_CACHE, cpu_map_idx, &rsd) != 0)
- print_dtlb_cache_misses(config, cpu_map_idx, avg, out, st, &rsd);
+ if (runtime_stat_n(st, STAT_DTLB_CACHE, map_idx, &rsd) != 0)
+ print_dtlb_cache_misses(config, map_idx, avg, out, st, &rsd);
else
print_metric(config, ctxp, NULL, NULL, "of all dTLB cache accesses", 0);
} else if (
@@ -1062,8 +1058,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
- if (runtime_stat_n(st, STAT_ITLB_CACHE, cpu_map_idx, &rsd) != 0)
- print_itlb_cache_misses(config, cpu_map_idx, avg, out, st, &rsd);
+ if (runtime_stat_n(st, STAT_ITLB_CACHE, map_idx, &rsd) != 0)
+ print_itlb_cache_misses(config, map_idx, avg, out, st, &rsd);
else
print_metric(config, ctxp, NULL, NULL, "of all iTLB cache accesses", 0);
} else if (
@@ -1072,27 +1068,27 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
- if (runtime_stat_n(st, STAT_LL_CACHE, cpu_map_idx, &rsd) != 0)
- print_ll_cache_misses(config, cpu_map_idx, avg, out, st, &rsd);
+ if (runtime_stat_n(st, STAT_LL_CACHE, map_idx, &rsd) != 0)
+ print_ll_cache_misses(config, map_idx, avg, out, st, &rsd);
else
print_metric(config, ctxp, NULL, NULL, "of all LL-cache accesses", 0);
} else if (evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) {
- total = runtime_stat_avg(st, STAT_CACHEREFS, cpu_map_idx, &rsd);
+ total = runtime_stat_avg(st, STAT_CACHEREFS, map_idx, &rsd);
if (total)
ratio = avg * 100 / total;
- if (runtime_stat_n(st, STAT_CACHEREFS, cpu_map_idx, &rsd) != 0)
+ if (runtime_stat_n(st, STAT_CACHEREFS, map_idx, &rsd) != 0)
print_metric(config, ctxp, NULL, "%8.3f %%",
"of all cache refs", ratio);
else
print_metric(config, ctxp, NULL, NULL, "of all cache refs", 0);
} else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) {
- print_stalled_cycles_frontend(config, cpu_map_idx, avg, out, st, &rsd);
+ print_stalled_cycles_frontend(config, map_idx, avg, out, st, &rsd);
} else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
- print_stalled_cycles_backend(config, cpu_map_idx, avg, out, st, &rsd);
+ print_stalled_cycles_backend(config, map_idx, avg, out, st, &rsd);
} else if (evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
- total = runtime_stat_avg(st, STAT_NSECS, cpu_map_idx, &rsd);
+ total = runtime_stat_avg(st, STAT_NSECS, map_idx, &rsd);
if (total) {
ratio = avg / total;
@@ -1101,7 +1097,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
print_metric(config, ctxp, NULL, NULL, "Ghz", 0);
}
} else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) {
- total = runtime_stat_avg(st, STAT_CYCLES, cpu_map_idx, &rsd);
+ total = runtime_stat_avg(st, STAT_CYCLES, map_idx, &rsd);
if (total)
print_metric(config, ctxp, NULL,
@@ -1111,8 +1107,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
print_metric(config, ctxp, NULL, NULL, "transactional cycles",
0);
} else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) {
- total = runtime_stat_avg(st, STAT_CYCLES, cpu_map_idx, &rsd);
- total2 = runtime_stat_avg(st, STAT_CYCLES_IN_TX, cpu_map_idx, &rsd);
+ total = runtime_stat_avg(st, STAT_CYCLES, map_idx, &rsd);
+ total2 = runtime_stat_avg(st, STAT_CYCLES_IN_TX, map_idx, &rsd);
if (total2 < avg)
total2 = avg;
@@ -1122,19 +1118,19 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
else
print_metric(config, ctxp, NULL, NULL, "aborted cycles", 0);
} else if (perf_stat_evsel__is(evsel, TRANSACTION_START)) {
- total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, cpu_map_idx, &rsd);
+ total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, map_idx, &rsd);
if (avg)
ratio = total / avg;
- if (runtime_stat_n(st, STAT_CYCLES_IN_TX, cpu_map_idx, &rsd) != 0)
+ if (runtime_stat_n(st, STAT_CYCLES_IN_TX, map_idx, &rsd) != 0)
print_metric(config, ctxp, NULL, "%8.0f",
"cycles / transaction", ratio);
else
print_metric(config, ctxp, NULL, NULL, "cycles / transaction",
0);
} else if (perf_stat_evsel__is(evsel, ELISION_START)) {
- total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, cpu_map_idx, &rsd);
+ total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, map_idx, &rsd);
if (avg)
ratio = total / avg;
@@ -1147,28 +1143,28 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
else
print_metric(config, ctxp, NULL, NULL, "CPUs utilized", 0);
} else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_BUBBLES)) {
- double fe_bound = td_fe_bound(cpu_map_idx, st, &rsd);
+ double fe_bound = td_fe_bound(map_idx, st, &rsd);
if (fe_bound > 0.2)
color = PERF_COLOR_RED;
print_metric(config, ctxp, color, "%8.1f%%", "frontend bound",
fe_bound * 100.);
} else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_RETIRED)) {
- double retiring = td_retiring(cpu_map_idx, st, &rsd);
+ double retiring = td_retiring(map_idx, st, &rsd);
if (retiring > 0.7)
color = PERF_COLOR_GREEN;
print_metric(config, ctxp, color, "%8.1f%%", "retiring",
retiring * 100.);
} else if (perf_stat_evsel__is(evsel, TOPDOWN_RECOVERY_BUBBLES)) {
- double bad_spec = td_bad_spec(cpu_map_idx, st, &rsd);
+ double bad_spec = td_bad_spec(map_idx, st, &rsd);
if (bad_spec > 0.1)
color = PERF_COLOR_RED;
print_metric(config, ctxp, color, "%8.1f%%", "bad speculation",
bad_spec * 100.);
} else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_ISSUED)) {
- double be_bound = td_be_bound(cpu_map_idx, st, &rsd);
+ double be_bound = td_be_bound(map_idx, st, &rsd);
const char *name = "backend bound";
static int have_recovery_bubbles = -1;
@@ -1181,14 +1177,14 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
if (be_bound > 0.2)
color = PERF_COLOR_RED;
- if (td_total_slots(cpu_map_idx, st, &rsd) > 0)
+ if (td_total_slots(map_idx, st, &rsd) > 0)
print_metric(config, ctxp, color, "%8.1f%%", name,
be_bound * 100.);
else
print_metric(config, ctxp, NULL, NULL, name, 0);
} else if (perf_stat_evsel__is(evsel, TOPDOWN_RETIRING) &&
- full_td(cpu_map_idx, st, &rsd)) {
- double retiring = td_metric_ratio(cpu_map_idx,
+ full_td(map_idx, st, &rsd)) {
+ double retiring = td_metric_ratio(map_idx,
STAT_TOPDOWN_RETIRING, st,
&rsd);
if (retiring > 0.7)
@@ -1196,8 +1192,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
print_metric(config, ctxp, color, "%8.1f%%", "Retiring",
retiring * 100.);
} else if (perf_stat_evsel__is(evsel, TOPDOWN_FE_BOUND) &&
- full_td(cpu_map_idx, st, &rsd)) {
- double fe_bound = td_metric_ratio(cpu_map_idx,
+ full_td(map_idx, st, &rsd)) {
+ double fe_bound = td_metric_ratio(map_idx,
STAT_TOPDOWN_FE_BOUND, st,
&rsd);
if (fe_bound > 0.2)
@@ -1205,8 +1201,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
print_metric(config, ctxp, color, "%8.1f%%", "Frontend Bound",
fe_bound * 100.);
} else if (perf_stat_evsel__is(evsel, TOPDOWN_BE_BOUND) &&
- full_td(cpu_map_idx, st, &rsd)) {
- double be_bound = td_metric_ratio(cpu_map_idx,
+ full_td(map_idx, st, &rsd)) {
+ double be_bound = td_metric_ratio(map_idx,
STAT_TOPDOWN_BE_BOUND, st,
&rsd);
if (be_bound > 0.2)
@@ -1214,8 +1210,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
print_metric(config, ctxp, color, "%8.1f%%", "Backend Bound",
be_bound * 100.);
} else if (perf_stat_evsel__is(evsel, TOPDOWN_BAD_SPEC) &&
- full_td(cpu_map_idx, st, &rsd)) {
- double bad_spec = td_metric_ratio(cpu_map_idx,
+ full_td(map_idx, st, &rsd)) {
+ double bad_spec = td_metric_ratio(map_idx,
STAT_TOPDOWN_BAD_SPEC, st,
&rsd);
if (bad_spec > 0.1)
@@ -1223,11 +1219,11 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
print_metric(config, ctxp, color, "%8.1f%%", "Bad Speculation",
bad_spec * 100.);
} else if (perf_stat_evsel__is(evsel, TOPDOWN_HEAVY_OPS) &&
- full_td(cpu_map_idx, st, &rsd) && (config->topdown_level > 1)) {
- double retiring = td_metric_ratio(cpu_map_idx,
+ full_td(map_idx, st, &rsd) && (config->topdown_level > 1)) {
+ double retiring = td_metric_ratio(map_idx,
STAT_TOPDOWN_RETIRING, st,
&rsd);
- double heavy_ops = td_metric_ratio(cpu_map_idx,
+ double heavy_ops = td_metric_ratio(map_idx,
STAT_TOPDOWN_HEAVY_OPS, st,
&rsd);
double light_ops = retiring - heavy_ops;
@@ -1243,11 +1239,11 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
print_metric(config, ctxp, color, "%8.1f%%", "Light Operations",
light_ops * 100.);
} else if (perf_stat_evsel__is(evsel, TOPDOWN_BR_MISPREDICT) &&
- full_td(cpu_map_idx, st, &rsd) && (config->topdown_level > 1)) {
- double bad_spec = td_metric_ratio(cpu_map_idx,
+ full_td(map_idx, st, &rsd) && (config->topdown_level > 1)) {
+ double bad_spec = td_metric_ratio(map_idx,
STAT_TOPDOWN_BAD_SPEC, st,
&rsd);
- double br_mis = td_metric_ratio(cpu_map_idx,
+ double br_mis = td_metric_ratio(map_idx,
STAT_TOPDOWN_BR_MISPREDICT, st,
&rsd);
double m_clears = bad_spec - br_mis;
@@ -1263,11 +1259,11 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
print_metric(config, ctxp, color, "%8.1f%%", "Machine Clears",
m_clears * 100.);
} else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_LAT) &&
- full_td(cpu_map_idx, st, &rsd) && (config->topdown_level > 1)) {
- double fe_bound = td_metric_ratio(cpu_map_idx,
+ full_td(map_idx, st, &rsd) && (config->topdown_level > 1)) {
+ double fe_bound = td_metric_ratio(map_idx,
STAT_TOPDOWN_FE_BOUND, st,
&rsd);
- double fetch_lat = td_metric_ratio(cpu_map_idx,
+ double fetch_lat = td_metric_ratio(map_idx,
STAT_TOPDOWN_FETCH_LAT, st,
&rsd);
double fetch_bw = fe_bound - fetch_lat;
@@ -1283,11 +1279,11 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
print_metric(config, ctxp, color, "%8.1f%%", "Fetch Bandwidth",
fetch_bw * 100.);
} else if (perf_stat_evsel__is(evsel, TOPDOWN_MEM_BOUND) &&
- full_td(cpu_map_idx, st, &rsd) && (config->topdown_level > 1)) {
- double be_bound = td_metric_ratio(cpu_map_idx,
+ full_td(map_idx, st, &rsd) && (config->topdown_level > 1)) {
+ double be_bound = td_metric_ratio(map_idx,
STAT_TOPDOWN_BE_BOUND, st,
&rsd);
- double mem_bound = td_metric_ratio(cpu_map_idx,
+ double mem_bound = td_metric_ratio(map_idx,
STAT_TOPDOWN_MEM_BOUND, st,
&rsd);
double core_bound = be_bound - mem_bound;
@@ -1304,12 +1300,13 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
core_bound * 100.);
} else if (evsel->metric_expr) {
generic_metric(config, evsel->metric_expr, evsel->metric_events, NULL,
- evsel->name, evsel->metric_name, NULL, 1, cpu_map_idx, out, st);
- } else if (runtime_stat_n(st, STAT_NSECS, cpu_map_idx, &rsd) != 0) {
+ evsel->name, evsel->metric_name, NULL, 1,
+ map_idx, out, st);
+ } else if (runtime_stat_n(st, STAT_NSECS, map_idx, &rsd) != 0) {
char unit = ' ';
char unit_buf[10] = "/sec";
- total = runtime_stat_avg(st, STAT_NSECS, cpu_map_idx, &rsd);
+ total = runtime_stat_avg(st, STAT_NSECS, map_idx, &rsd);
if (total)
ratio = convert_unit_double(1000000000.0 * avg / total, &unit);
@@ -1317,7 +1314,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit);
print_metric(config, ctxp, NULL, "%8.3f", unit_buf, ratio);
} else if (perf_stat_evsel__is(evsel, SMI_NUM)) {
- print_smi_cost(config, cpu_map_idx, out, st, &rsd);
+ print_smi_cost(config, map_idx, out, st, &rsd);
} else {
num = 0;
}
@@ -1329,8 +1326,9 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
if (num++ > 0)
out->new_line(config, ctxp);
generic_metric(config, mexp->metric_expr, mexp->metric_events,
- mexp->metric_refs, evsel->name, mexp->metric_name,
- mexp->metric_unit, mexp->runtime, cpu_map_idx, out, st);
+ mexp->metric_refs, evsel->name, mexp->metric_name,
+ mexp->metric_unit, mexp->runtime,
+ map_idx, out, st);
}
}
if (num == 0)
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index 0882b4754fcf..8ec8bb4a9912 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -14,7 +14,11 @@
#include "evlist.h"
#include "evsel.h"
#include "thread_map.h"
-#include "hashmap.h"
+#ifdef HAVE_LIBBPF_SUPPORT
+#include <bpf/hashmap.h>
+#else
+#include "util/hashmap.h"
+#endif
#include <linux/zalloc.h>
void update_stats(struct stats *stats, u64 val)
@@ -128,13 +132,9 @@ static void perf_stat_evsel_id_init(struct evsel *evsel)
static void evsel__reset_stat_priv(struct evsel *evsel)
{
- int i;
struct perf_stat_evsel *ps = evsel->stats;
- for (i = 0; i < 3; i++)
- init_stats(&ps->res_stats[i]);
-
- perf_stat_evsel_id_init(evsel);
+ init_stats(&ps->res_stats);
}
static int evsel__alloc_stat_priv(struct evsel *evsel)
@@ -142,6 +142,7 @@ static int evsel__alloc_stat_priv(struct evsel *evsel)
evsel->stats = zalloc(sizeof(struct perf_stat_evsel));
if (evsel->stats == NULL)
return -ENOMEM;
+ perf_stat_evsel_id_init(evsel);
evsel__reset_stat_priv(evsel);
return 0;
}
@@ -388,12 +389,8 @@ process_counter_values(struct perf_stat_config *config, struct evsel *evsel,
}
if (config->aggr_mode == AGGR_THREAD) {
- if (config->stats)
- perf_stat__update_shadow_stats(evsel,
- count->val, 0, &config->stats[thread]);
- else
- perf_stat__update_shadow_stats(evsel,
- count->val, 0, &rt_stat);
+ perf_stat__update_shadow_stats(evsel, count->val,
+ thread, &rt_stat);
}
break;
case AGGR_GLOBAL:
@@ -416,9 +413,6 @@ static int process_counter_maps(struct perf_stat_config *config,
int ncpus = evsel__nr_cpus(counter);
int idx, thread;
- if (counter->core.system_wide)
- nthreads = 1;
-
for (thread = 0; thread < nthreads; thread++) {
for (idx = 0; idx < ncpus; idx++) {
if (process_counter_values(config, counter, idx, thread,
@@ -436,7 +430,7 @@ int perf_stat_process_counter(struct perf_stat_config *config,
struct perf_counts_values *aggr = &counter->counts->aggr;
struct perf_stat_evsel *ps = counter->stats;
u64 *count = counter->counts->aggr.values;
- int i, ret;
+ int ret;
aggr->val = aggr->ena = aggr->run = 0;
@@ -454,8 +448,7 @@ int perf_stat_process_counter(struct perf_stat_config *config,
evsel__compute_deltas(counter, -1, -1, aggr);
perf_counts_values__scale(aggr, config->scale, &counter->counts->scaled);
- for (i = 0; i < 3; i++)
- update_stats(&ps->res_stats[i], count[i]);
+ update_stats(&ps->res_stats, *count);
if (verbose > 0) {
fprintf(config->output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index 668250022f8c..b0899c6e002f 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -43,7 +43,7 @@ enum perf_stat_evsel_id {
};
struct perf_stat_evsel {
- struct stats res_stats[3];
+ struct stats res_stats;
enum perf_stat_evsel_id id;
u64 *group_data;
};
@@ -141,6 +141,8 @@ struct perf_stat_config {
bool stop_read_counter;
bool quiet;
bool iostat_run;
+ char *user_requested_cpu_list;
+ bool system_wide;
FILE *output;
unsigned int interval;
unsigned int timeout;
@@ -151,8 +153,6 @@ struct perf_stat_config {
int run_count;
int print_free_counters_hint;
int print_mixed_hw_group_error;
- struct runtime_stat *stats;
- int stats_num;
const char *csv_sep;
struct stats *walltime_nsecs_stats;
struct rusage ru_data;
@@ -232,7 +232,7 @@ void perf_stat__init_shadow_stats(void);
void perf_stat__reset_shadow_stats(void);
void perf_stat__reset_shadow_per_stat(struct runtime_stat *st);
void perf_stat__update_shadow_stats(struct evsel *counter, u64 count,
- int cpu_map_idx, struct runtime_stat *st);
+ int map_idx, struct runtime_stat *st);
struct perf_stat_output_ctx {
void *ctx;
print_metric_t print_metric;
@@ -242,7 +242,7 @@ struct perf_stat_output_ctx {
void perf_stat__print_shadow_stats(struct perf_stat_config *config,
struct evsel *evsel,
- double avg, int cpu,
+ double avg, int map_idx,
struct perf_stat_output_ctx *out,
struct rblist *metric_events,
struct runtime_stat *st);
@@ -277,5 +277,5 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf
struct target *_target, struct timespec *ts, int argc, const char **argv);
struct metric_expr;
-double test_generic_metric(struct metric_expr *mexp, int cpu_map_idx, struct runtime_stat *st);
+double test_generic_metric(struct metric_expr *mexp, int map_idx, struct runtime_stat *st);
#endif
diff --git a/tools/perf/util/string.c b/tools/perf/util/string.c
index f6d90cdd9225..4f12a96f33cc 100644
--- a/tools/perf/util/string.c
+++ b/tools/perf/util/string.c
@@ -15,7 +15,6 @@ const char *dots =
"....................................................................."
".....................................................................";
-#define K 1024LL
/*
* perf_atoll()
* Parse (\d+)(b|B|kb|KB|mb|MB|gb|GB|tb|TB) (e.g. "256MB")
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index a4b22caa7c24..a3a165ae933a 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -1791,6 +1791,7 @@ int dso__load(struct dso *dso, struct map *map)
char newmapname[PATH_MAX];
const char *map_path = dso->long_name;
+ mutex_lock(&dso->lock);
perfmap = strncmp(dso->name, "/tmp/perf-", 10) == 0;
if (perfmap) {
if (dso->nsinfo && (dso__find_perf_map(newmapname,
@@ -1800,7 +1801,6 @@ int dso__load(struct dso *dso, struct map *map)
}
nsinfo__mountns_enter(dso->nsinfo, &nsc);
- pthread_mutex_lock(&dso->lock);
/* check again under the dso->lock */
if (dso__loaded(dso)) {
@@ -1964,7 +1964,7 @@ out_free:
ret = 0;
out:
dso__set_loaded(dso);
- pthread_mutex_unlock(&dso->lock);
+ mutex_unlock(&dso->lock);
nsinfo__mountns_exit(&nsc);
return ret;
diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c
index 538790758e24..cccd293b5312 100644
--- a/tools/perf/util/synthetic-events.c
+++ b/tools/perf/util/synthetic-events.c
@@ -364,11 +364,14 @@ static bool read_proc_maps_line(struct io *io, __u64 *start, __u64 *end,
}
static void perf_record_mmap2__read_build_id(struct perf_record_mmap2 *event,
+ struct machine *machine,
bool is_kernel)
{
struct build_id bid;
struct nsinfo *nsi;
struct nscookie nc;
+ struct dso *dso = NULL;
+ struct dso_id id;
int rc;
if (is_kernel) {
@@ -376,6 +379,18 @@ static void perf_record_mmap2__read_build_id(struct perf_record_mmap2 *event,
goto out;
}
+ id.maj = event->maj;
+ id.min = event->min;
+ id.ino = event->ino;
+ id.ino_generation = event->ino_generation;
+
+ dso = dsos__findnew_id(&machine->dsos, event->filename, &id);
+ if (dso && dso->has_build_id) {
+ bid = dso->bid;
+ rc = 0;
+ goto out;
+ }
+
nsi = nsinfo__new(event->pid);
nsinfo__mountns_enter(nsi, &nc);
@@ -391,12 +406,16 @@ out:
event->header.misc |= PERF_RECORD_MISC_MMAP_BUILD_ID;
event->__reserved_1 = 0;
event->__reserved_2 = 0;
+
+ if (dso && !dso->has_build_id)
+ dso__set_build_id(dso, &bid);
} else {
if (event->filename[0] == '/') {
pr_debug2("Failed to read build ID for %s\n",
event->filename);
}
}
+ dso__put(dso);
}
int perf_event__synthesize_mmap_events(struct perf_tool *tool,
@@ -507,7 +526,7 @@ out:
event->mmap2.tid = pid;
if (symbol_conf.buildid_mmap2)
- perf_record_mmap2__read_build_id(&event->mmap2, false);
+ perf_record_mmap2__read_build_id(&event->mmap2, machine, false);
if (perf_tool__process_synth_event(tool, event, machine, process) != 0) {
rc = -1;
@@ -690,7 +709,7 @@ int perf_event__synthesize_modules(struct perf_tool *tool, perf_event__handler_t
memcpy(event->mmap2.filename, pos->dso->long_name,
pos->dso->long_name_len + 1);
- perf_record_mmap2__read_build_id(&event->mmap2, false);
+ perf_record_mmap2__read_build_id(&event->mmap2, machine, false);
} else {
size = PERF_ALIGN(pos->dso->long_name_len + 1, sizeof(u64));
event->mmap.header.type = PERF_RECORD_MMAP;
@@ -1126,7 +1145,7 @@ static int __perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
event->mmap2.len = map->end - event->mmap.start;
event->mmap2.pid = machine->pid;
- perf_record_mmap2__read_build_id(&event->mmap2, true);
+ perf_record_mmap2__read_build_id(&event->mmap2, machine, true);
} else {
size = snprintf(event->mmap.filename, sizeof(event->mmap.filename),
"%s%s", machine->mmap_name, kmap->ref_reloc_sym->name) + 1;
@@ -1195,93 +1214,97 @@ int perf_event__synthesize_thread_map2(struct perf_tool *tool,
return err;
}
-static void synthesize_cpus(struct perf_record_cpu_map_data *data,
- const struct perf_cpu_map *map)
-{
- int i, map_nr = perf_cpu_map__nr(map);
-
- data->cpus_data.nr = map_nr;
+struct synthesize_cpu_map_data {
+ const struct perf_cpu_map *map;
+ int nr;
+ int min_cpu;
+ int max_cpu;
+ int has_any_cpu;
+ int type;
+ size_t size;
+ struct perf_record_cpu_map_data *data;
+};
- for (i = 0; i < map_nr; i++)
- data->cpus_data.cpu[i] = perf_cpu_map__cpu(map, i).cpu;
+static void synthesize_cpus(struct synthesize_cpu_map_data *data)
+{
+ data->data->type = PERF_CPU_MAP__CPUS;
+ data->data->cpus_data.nr = data->nr;
+ for (int i = 0; i < data->nr; i++)
+ data->data->cpus_data.cpu[i] = perf_cpu_map__cpu(data->map, i).cpu;
}
-static void synthesize_mask(struct perf_record_cpu_map_data *data,
- const struct perf_cpu_map *map, int max)
+static void synthesize_mask(struct synthesize_cpu_map_data *data)
{
int idx;
struct perf_cpu cpu;
/* Due to padding, the 4bytes per entry mask variant is always smaller. */
- data->mask32_data.nr = BITS_TO_U32(max);
- data->mask32_data.long_size = 4;
+ data->data->type = PERF_CPU_MAP__MASK;
+ data->data->mask32_data.nr = BITS_TO_U32(data->max_cpu);
+ data->data->mask32_data.long_size = 4;
- perf_cpu_map__for_each_cpu(cpu, idx, map) {
+ perf_cpu_map__for_each_cpu(cpu, idx, data->map) {
int bit_word = cpu.cpu / 32;
- __u32 bit_mask = 1U << (cpu.cpu & 31);
+ u32 bit_mask = 1U << (cpu.cpu & 31);
- data->mask32_data.mask[bit_word] |= bit_mask;
+ data->data->mask32_data.mask[bit_word] |= bit_mask;
}
}
-static size_t cpus_size(const struct perf_cpu_map *map)
-{
- return sizeof(struct cpu_map_entries) + perf_cpu_map__nr(map) * sizeof(u16);
-}
-
-static size_t mask_size(const struct perf_cpu_map *map, int *max)
+static void synthesize_range_cpus(struct synthesize_cpu_map_data *data)
{
- *max = perf_cpu_map__max(map).cpu;
- return sizeof(struct perf_record_mask_cpu_map32) + BITS_TO_U32(*max) * sizeof(__u32);
+ data->data->type = PERF_CPU_MAP__RANGE_CPUS;
+ data->data->range_cpu_data.any_cpu = data->has_any_cpu;
+ data->data->range_cpu_data.start_cpu = data->min_cpu;
+ data->data->range_cpu_data.end_cpu = data->max_cpu;
}
-static void *cpu_map_data__alloc(const struct perf_cpu_map *map, size_t *size,
- u16 *type, int *max)
+static void *cpu_map_data__alloc(struct synthesize_cpu_map_data *syn_data,
+ size_t header_size)
{
size_t size_cpus, size_mask;
- bool is_dummy = perf_cpu_map__empty(map);
- /*
- * Both array and mask data have variable size based
- * on the number of cpus and their actual values.
- * The size of the 'struct perf_record_cpu_map_data' is:
- *
- * array = size of 'struct cpu_map_entries' +
- * number of cpus * sizeof(u64)
- *
- * mask = size of 'struct perf_record_record_cpu_map' +
- * maximum cpu bit converted to size of longs
- *
- * and finally + the size of 'struct perf_record_cpu_map_data'.
- */
- size_cpus = cpus_size(map);
- size_mask = mask_size(map, max);
+ syn_data->nr = perf_cpu_map__nr(syn_data->map);
+ syn_data->has_any_cpu = (perf_cpu_map__cpu(syn_data->map, 0).cpu == -1) ? 1 : 0;
- if (is_dummy || (size_cpus < size_mask)) {
- *size += size_cpus;
- *type = PERF_CPU_MAP__CPUS;
- } else {
- *size += size_mask;
- *type = PERF_CPU_MAP__MASK;
+ syn_data->min_cpu = perf_cpu_map__cpu(syn_data->map, syn_data->has_any_cpu).cpu;
+ syn_data->max_cpu = perf_cpu_map__max(syn_data->map).cpu;
+ if (syn_data->max_cpu - syn_data->min_cpu + 1 == syn_data->nr - syn_data->has_any_cpu) {
+ /* A consecutive range of CPUs can be encoded using a range. */
+ assert(sizeof(u16) + sizeof(struct perf_record_range_cpu_map) == sizeof(u64));
+ syn_data->type = PERF_CPU_MAP__RANGE_CPUS;
+ syn_data->size = header_size + sizeof(u64);
+ return zalloc(syn_data->size);
}
- *size += sizeof(__u16); /* For perf_record_cpu_map_data.type. */
- *size = PERF_ALIGN(*size, sizeof(u64));
- return zalloc(*size);
+ size_cpus = sizeof(u16) + sizeof(struct cpu_map_entries) + syn_data->nr * sizeof(u16);
+ /* Due to padding, the 4bytes per entry mask variant is always smaller. */
+ size_mask = sizeof(u16) + sizeof(struct perf_record_mask_cpu_map32) +
+ BITS_TO_U32(syn_data->max_cpu) * sizeof(__u32);
+ if (syn_data->has_any_cpu || size_cpus < size_mask) {
+ /* Follow the CPU map encoding. */
+ syn_data->type = PERF_CPU_MAP__CPUS;
+ syn_data->size = header_size + PERF_ALIGN(size_cpus, sizeof(u64));
+ return zalloc(syn_data->size);
+ }
+ /* Encode using a bitmask. */
+ syn_data->type = PERF_CPU_MAP__MASK;
+ syn_data->size = header_size + PERF_ALIGN(size_mask, sizeof(u64));
+ return zalloc(syn_data->size);
}
-static void cpu_map_data__synthesize(struct perf_record_cpu_map_data *data,
- const struct perf_cpu_map *map,
- u16 type, int max)
+static void cpu_map_data__synthesize(struct synthesize_cpu_map_data *data)
{
- data->type = type;
-
- switch (type) {
+ switch (data->type) {
case PERF_CPU_MAP__CPUS:
- synthesize_cpus(data, map);
+ synthesize_cpus(data);
break;
case PERF_CPU_MAP__MASK:
- synthesize_mask(data, map, max);
+ synthesize_mask(data);
+ break;
+ case PERF_CPU_MAP__RANGE_CPUS:
+ synthesize_range_cpus(data);
+ break;
default:
break;
}
@@ -1289,23 +1312,22 @@ static void cpu_map_data__synthesize(struct perf_record_cpu_map_data *data,
static struct perf_record_cpu_map *cpu_map_event__new(const struct perf_cpu_map *map)
{
- size_t size = sizeof(struct perf_event_header);
+ struct synthesize_cpu_map_data syn_data = { .map = map };
struct perf_record_cpu_map *event;
- int max;
- u16 type;
- event = cpu_map_data__alloc(map, &size, &type, &max);
+
+ event = cpu_map_data__alloc(&syn_data, sizeof(struct perf_event_header));
if (!event)
return NULL;
+ syn_data.data = &event->data;
event->header.type = PERF_RECORD_CPU_MAP;
- event->header.size = size;
- event->data.type = type;
-
- cpu_map_data__synthesize(&event->data, map, type, max);
+ event->header.size = syn_data.size;
+ cpu_map_data__synthesize(&syn_data);
return event;
}
+
int perf_event__synthesize_cpu_map(struct perf_tool *tool,
const struct perf_cpu_map *map,
perf_event__handler_t process,
@@ -1955,7 +1977,7 @@ int perf_event__synthesize_event_update_unit(struct perf_tool *tool, struct evse
if (ev == NULL)
return -ENOMEM;
- strlcpy(ev->data, evsel->unit, size + 1);
+ strlcpy(ev->unit, evsel->unit, size + 1);
err = process(tool, (union perf_event *)ev, NULL, NULL);
free(ev);
return err;
@@ -1972,8 +1994,7 @@ int perf_event__synthesize_event_update_scale(struct perf_tool *tool, struct evs
if (ev == NULL)
return -ENOMEM;
- ev_data = (struct perf_record_event_update_scale *)ev->data;
- ev_data->scale = evsel->scale;
+ ev->scale.scale = evsel->scale;
err = process(tool, (union perf_event *)ev, NULL, NULL);
free(ev);
return err;
@@ -1990,7 +2011,7 @@ int perf_event__synthesize_event_update_name(struct perf_tool *tool, struct evse
if (ev == NULL)
return -ENOMEM;
- strlcpy(ev->data, evsel->name, len + 1);
+ strlcpy(ev->name, evsel->name, len + 1);
err = process(tool, (union perf_event *)ev, NULL, NULL);
free(ev);
return err;
@@ -1999,25 +2020,20 @@ int perf_event__synthesize_event_update_name(struct perf_tool *tool, struct evse
int perf_event__synthesize_event_update_cpus(struct perf_tool *tool, struct evsel *evsel,
perf_event__handler_t process)
{
- size_t size = sizeof(struct perf_record_event_update);
+ struct synthesize_cpu_map_data syn_data = { .map = evsel->core.own_cpus };
struct perf_record_event_update *ev;
- int max, err;
- u16 type;
-
- if (!evsel->core.own_cpus)
- return 0;
+ int err;
- ev = cpu_map_data__alloc(evsel->core.own_cpus, &size, &type, &max);
+ ev = cpu_map_data__alloc(&syn_data, sizeof(struct perf_event_header) + 2 * sizeof(u64));
if (!ev)
return -ENOMEM;
+ syn_data.data = &ev->cpus.cpus;
ev->header.type = PERF_RECORD_EVENT_UPDATE;
- ev->header.size = (u16)size;
+ ev->header.size = (u16)syn_data.size;
ev->type = PERF_EVENT_UPDATE__CPUS;
ev->id = evsel->core.id[0];
-
- cpu_map_data__synthesize((struct perf_record_cpu_map_data *)ev->data,
- evsel->core.own_cpus, type, max);
+ cpu_map_data__synthesize(&syn_data);
err = process(tool, (union perf_event *)ev, NULL, NULL);
free(ev);
diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h
index 1c2c0a838430..a8b0d79bd96c 100644
--- a/tools/perf/util/top.h
+++ b/tools/perf/util/top.h
@@ -5,6 +5,7 @@
#include "tool.h"
#include "evswitch.h"
#include "annotate.h"
+#include "mutex.h"
#include "ordered-events.h"
#include "record.h"
#include <linux/types.h>
@@ -53,8 +54,8 @@ struct perf_top {
struct ordered_events *in;
struct ordered_events data[2];
bool rotate;
- pthread_mutex_t mutex;
- pthread_cond_t cond;
+ struct mutex mutex;
+ struct cond cond;
} qe;
};