diff options
Diffstat (limited to 'tools/perf/util')
78 files changed, 2087 insertions, 683 deletions
diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 485e1a343165..e315ecaec323 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -118,6 +118,8 @@ perf-$(CONFIG_AUXTRACE) += intel-pt.o perf-$(CONFIG_AUXTRACE) += intel-bts.o perf-$(CONFIG_AUXTRACE) += arm-spe.o perf-$(CONFIG_AUXTRACE) += arm-spe-decoder/ +perf-$(CONFIG_AUXTRACE) += hisi-ptt.o +perf-$(CONFIG_AUXTRACE) += hisi-ptt-decoder/ perf-$(CONFIG_AUXTRACE) += s390-cpumsf.o ifdef CONFIG_LIBOPENCSD @@ -143,6 +145,7 @@ perf-y += branch.o perf-y += mem2node.o perf-y += clockid.o perf-y += list_sort.o +perf-y += mutex.o perf-$(CONFIG_LIBBPF) += bpf-loader.o perf-$(CONFIG_LIBBPF) += bpf_map.o diff --git a/tools/perf/util/PERF-VERSION-GEN b/tools/perf/util/PERF-VERSION-GEN index 0ee5af529238..3cc42821d9b3 100755 --- a/tools/perf/util/PERF-VERSION-GEN +++ b/tools/perf/util/PERF-VERSION-GEN @@ -11,7 +11,8 @@ LF=' ' # -# Always try first to get the version from the kernel Makefile +# Use version from kernel Makefile unless not in a git repository and +# PERF-VERSION-FILE exists # CID= TAG= @@ -19,9 +20,14 @@ if test -d ../../.git -o -f ../../.git then TAG=$(MAKEFLAGS= make -sC ../.. kernelversion) CID=$(git log -1 --abbrev=12 --pretty=format:"%h" 2>/dev/null) && CID="-g$CID" -else +elif test -f ../../PERF-VERSION-FILE +then TAG=$(cut -d' ' -f3 ../../PERF-VERSION-FILE | sed -e 's/\"//g') fi +if test -z "$TAG" +then + TAG=$(MAKEFLAGS= make -sC ../.. kernelversion) +fi VN="$TAG$CID" if test -n "$CID" diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 2c6a485c3de5..db475e44f42f 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -35,7 +35,6 @@ #include "arch/common.h" #include "namespaces.h" #include <regex.h> -#include <pthread.h> #include <linux/bitops.h> #include <linux/kernel.h> #include <linux/string.h> @@ -821,7 +820,7 @@ void symbol__annotate_zero_histograms(struct symbol *sym) { struct annotation *notes = symbol__annotation(sym); - pthread_mutex_lock(¬es->lock); + mutex_lock(¬es->lock); if (notes->src != NULL) { memset(notes->src->histograms, 0, notes->src->nr_histograms * notes->src->sizeof_sym_hist); @@ -829,7 +828,7 @@ void symbol__annotate_zero_histograms(struct symbol *sym) memset(notes->src->cycles_hist, 0, symbol__size(sym) * sizeof(struct cyc_hist)); } - pthread_mutex_unlock(¬es->lock); + mutex_unlock(¬es->lock); } static int __symbol__account_cycles(struct cyc_hist *ch, @@ -1086,7 +1085,7 @@ void annotation__compute_ipc(struct annotation *notes, size_t size) notes->hit_insn = 0; notes->cover_insn = 0; - pthread_mutex_lock(¬es->lock); + mutex_lock(¬es->lock); for (offset = size - 1; offset >= 0; --offset) { struct cyc_hist *ch; @@ -1105,7 +1104,7 @@ void annotation__compute_ipc(struct annotation *notes, size_t size) notes->have_cycles = true; } } - pthread_mutex_unlock(¬es->lock); + mutex_unlock(¬es->lock); } int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, struct perf_sample *sample, @@ -1258,13 +1257,13 @@ int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool r void annotation__init(struct annotation *notes) { - pthread_mutex_init(¬es->lock, NULL); + mutex_init(¬es->lock); } void annotation__exit(struct annotation *notes) { annotated_source__delete(notes->src); - pthread_mutex_destroy(¬es->lock); + mutex_destroy(¬es->lock); } static void annotation_line__add(struct annotation_line *al, struct list_head *head) @@ -1698,6 +1697,7 @@ fallback: */ __symbol__join_symfs(filename, filename_size, dso->long_name); + mutex_lock(&dso->lock); if (access(filename, R_OK) && errno == ENOENT && dso->nsinfo) { char *new_name = filename_with_chroot(dso->nsinfo->pid, filename); @@ -1706,6 +1706,7 @@ fallback: free(new_name); } } + mutex_unlock(&dso->lock); } free(build_id_path); @@ -2238,7 +2239,10 @@ int symbol__annotate(struct map_symbol *ms, struct evsel *evsel, } args.ms = *ms; - notes->start = map__rip_2objdump(ms->map, sym->start); + if (notes->options && notes->options->full_addr) + notes->start = map__objdump_2mem(ms->map, ms->sym->start); + else + notes->start = map__rip_2objdump(ms->map, ms->sym->start); return symbol__disassemble(sym, &args); } @@ -2761,6 +2765,8 @@ void annotation__update_column_widths(struct annotation *notes) { if (notes->options->use_offset) notes->widths.target = notes->widths.min_addr; + else if (notes->options->full_addr) + notes->widths.target = BITS_PER_LONG / 4; else notes->widths.target = notes->widths.max_addr; @@ -2770,6 +2776,18 @@ void annotation__update_column_widths(struct annotation *notes) notes->widths.addr += notes->widths.jumps + 1; } +void annotation__toggle_full_addr(struct annotation *notes, struct map_symbol *ms) +{ + notes->options->full_addr = !notes->options->full_addr; + + if (notes->options->full_addr) + notes->start = map__objdump_2mem(ms->map, ms->sym->start); + else + notes->start = map__rip_2objdump(ms->map, ms->sym->start); + + annotation__update_column_widths(notes); +} + static void annotation__calc_lines(struct annotation *notes, struct map *map, struct rb_root *root, struct annotation_options *opts) diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 986f2bbe4870..8934072c39e6 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -8,9 +8,9 @@ #include <linux/types.h> #include <linux/list.h> #include <linux/rbtree.h> -#include <pthread.h> #include <asm/bug.h> #include "symbol_conf.h" +#include "mutex.h" #include "spark.h" struct hist_browser_timer; @@ -88,7 +88,8 @@ struct annotation_options { show_nr_jumps, show_minmax_cycle, show_asm_raw, - annotate_src; + annotate_src, + full_addr; u8 offset_level; int min_pcnt; int max_lines; @@ -273,7 +274,7 @@ struct annotated_source { }; struct annotation { - pthread_mutex_t lock; + struct mutex lock; u64 max_coverage; u64 start; u64 hit_cycles; @@ -325,6 +326,7 @@ void annotation__compute_ipc(struct annotation *notes, size_t size); void annotation__mark_jump_targets(struct annotation *notes, struct symbol *sym); void annotation__update_column_widths(struct annotation *notes); void annotation__init_column_widths(struct annotation *notes, struct symbol *sym); +void annotation__toggle_full_addr(struct annotation *notes, struct map_symbol *ms); static inline struct sym_hist *annotated_source__histogram(struct annotated_source *src, int idx) { diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index 6edab8a16de6..46ada5ec3f9a 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -26,6 +26,7 @@ #include <linux/list.h> #include <linux/zalloc.h> +#include "config.h" #include "evlist.h" #include "dso.h" #include "map.h" @@ -51,6 +52,7 @@ #include "intel-pt.h" #include "intel-bts.h" #include "arm-spe.h" +#include "hisi-ptt.h" #include "s390-cpumsf.h" #include "util/mmap.h" @@ -1319,6 +1321,9 @@ int perf_event__process_auxtrace_info(struct perf_session *session, case PERF_AUXTRACE_S390_CPUMSF: err = s390_cpumsf_process_auxtrace_info(event, session); break; + case PERF_AUXTRACE_HISI_PTT: + err = hisi_ptt_process_auxtrace_info(event, session); + break; case PERF_AUXTRACE_UNKNOWN: default: return -EINVAL; @@ -1434,6 +1439,16 @@ static int get_flags(const char **ptr, unsigned int *plus_flags, unsigned int *m } } +#define ITRACE_DFLT_LOG_ON_ERROR_SZ 16384 + +static unsigned int itrace_log_on_error_size(void) +{ + unsigned int sz = 0; + + perf_config_scan("itrace.debug-log-buffer-size", "%u", &sz); + return sz ?: ITRACE_DFLT_LOG_ON_ERROR_SZ; +} + /* * Please check tools/perf/Documentation/perf-script.txt for information * about the options parsed here, which is introduced after this cset, @@ -1532,6 +1547,8 @@ int itrace_do_parse_synth_opts(struct itrace_synth_opts *synth_opts, if (get_flags(&p, &synth_opts->log_plus_flags, &synth_opts->log_minus_flags)) goto out_err; + if (synth_opts->log_plus_flags & AUXTRACE_LOG_FLG_ON_ERROR) + synth_opts->log_on_error_size = itrace_log_on_error_size(); break; case 'c': synth_opts->branches = true; @@ -2308,11 +2325,19 @@ struct sym_args { bool near; }; +static bool kern_sym_name_match(const char *kname, const char *name) +{ + size_t n = strlen(name); + + return !strcmp(kname, name) || + (!strncmp(kname, name, n) && kname[n] == '\t'); +} + static bool kern_sym_match(struct sym_args *args, const char *name, char type) { /* A function with the same name, and global or the n'th found or any */ return kallsyms__is_function(type) && - !strcmp(name, args->name) && + kern_sym_name_match(name, args->name) && ((args->global && isupper(type)) || (args->selected && ++(args->cnt) == args->idx) || (!args->global && !args->selected)); diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index 6a4fbfd34c6b..6a0f9b98f059 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -48,6 +48,7 @@ enum auxtrace_type { PERF_AUXTRACE_CS_ETM, PERF_AUXTRACE_ARM_SPE, PERF_AUXTRACE_S390_CPUMSF, + PERF_AUXTRACE_HISI_PTT, }; enum itrace_period_type { @@ -60,6 +61,7 @@ enum itrace_period_type { #define AUXTRACE_ERR_FLG_DATA_LOST (1 << ('l' - 'a')) #define AUXTRACE_LOG_FLG_ALL_PERF_EVTS (1 << ('a' - 'a')) +#define AUXTRACE_LOG_FLG_ON_ERROR (1 << ('e' - 'a')) #define AUXTRACE_LOG_FLG_USE_STDOUT (1 << ('o' - 'a')) /** @@ -110,6 +112,7 @@ enum itrace_period_type { * @log_plus_flags: flags to affect what is logged * @log_minus_flags: flags to affect what is logged * @quick: quicker (less detailed) decoding + * @log_on_error_size: size of log to keep for outputting log only on errors */ struct itrace_synth_opts { bool set; @@ -155,6 +158,7 @@ struct itrace_synth_opts { unsigned int log_plus_flags; unsigned int log_minus_flags; unsigned int quick; + unsigned int log_on_error_size; }; /** diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c index eee64ddb766d..cc7c1f90cf62 100644 --- a/tools/perf/util/bpf-event.c +++ b/tools/perf/util/bpf-event.c @@ -36,6 +36,11 @@ struct btf *btf__load_from_kernel_by_id(__u32 id) #endif #ifndef HAVE_LIBBPF_BPF_PROG_LOAD +LIBBPF_API int bpf_load_program(enum bpf_prog_type type, + const struct bpf_insn *insns, size_t insns_cnt, + const char *license, __u32 kern_version, + char *log_buf, size_t log_buf_sz); + int bpf_prog_load(enum bpf_prog_type prog_type, const char *prog_name __maybe_unused, const char *license, diff --git a/tools/perf/util/bpf-event.h b/tools/perf/util/bpf-event.h index 144a8a24cc69..1bcbd4fb6c66 100644 --- a/tools/perf/util/bpf-event.h +++ b/tools/perf/util/bpf-event.h @@ -4,7 +4,6 @@ #include <linux/compiler.h> #include <linux/rbtree.h> -#include <pthread.h> #include <api/fd/array.h> #include <stdio.h> diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c index e2052f4fed33..f4adeccdbbcb 100644 --- a/tools/perf/util/bpf-loader.c +++ b/tools/perf/util/bpf-loader.c @@ -27,11 +27,33 @@ #include "util.h" #include "llvm-utils.h" #include "c++/clang-c.h" -#include "hashmap.h" +#ifdef HAVE_LIBBPF_SUPPORT +#include <bpf/hashmap.h> +#else +#include "util/hashmap.h" +#endif #include "asm/bug.h" #include <internal/xyarray.h> +#ifndef HAVE_LIBBPF_BPF_PROGRAM__SET_INSNS +int bpf_program__set_insns(struct bpf_program *prog __maybe_unused, + struct bpf_insn *new_insns __maybe_unused, size_t new_insn_cnt __maybe_unused) +{ + pr_err("%s: not support, update libbpf\n", __func__); + return -ENOTSUP; +} + +int libbpf_register_prog_handler(const char *sec __maybe_unused, + enum bpf_prog_type prog_type __maybe_unused, + enum bpf_attach_type exp_attach_type __maybe_unused, + const struct libbpf_prog_handler_opts *opts __maybe_unused) +{ + pr_err("%s: not support, update libbpf\n", __func__); + return -ENOTSUP; +} +#endif + /* temporarily disable libbpf deprecation warnings */ #pragma GCC diagnostic ignored "-Wdeprecated-declarations" diff --git a/tools/perf/util/bpf_lock_contention.c b/tools/perf/util/bpf_lock_contention.c index c591a66733ef..fc4d613cb979 100644 --- a/tools/perf/util/bpf_lock_contention.c +++ b/tools/perf/util/bpf_lock_contention.c @@ -8,17 +8,13 @@ #include "util/thread_map.h" #include "util/lock-contention.h" #include <linux/zalloc.h> +#include <linux/string.h> #include <bpf/bpf.h> #include "bpf_skel/lock_contention.skel.h" static struct lock_contention_bpf *skel; -/* should be same as bpf_skel/lock_contention.bpf.c */ -struct lock_contention_key { - s32 stack_id; -}; - struct lock_contention_data { u64 total_time; u64 min_time; @@ -40,6 +36,7 @@ int lock_contention_prepare(struct lock_contention *con) return -1; } + bpf_map__set_value_size(skel->maps.stacks, con->max_stack * sizeof(u64)); bpf_map__set_max_entries(skel->maps.stacks, con->map_nr_entries); bpf_map__set_max_entries(skel->maps.lock_stat, con->map_nr_entries); @@ -91,6 +88,8 @@ int lock_contention_prepare(struct lock_contention *con) bpf_map_update_elem(fd, &pid, &val, BPF_ANY); } + skel->bss->stack_skip = con->stack_skip; + lock_contention_bpf__attach(skel); return 0; } @@ -114,7 +113,7 @@ int lock_contention_read(struct lock_contention *con) struct lock_contention_data data; struct lock_stat *st; struct machine *machine = con->machine; - u64 stack_trace[CONTENTION_STACK_DEPTH]; + u64 stack_trace[con->max_stack]; fd = bpf_map__fd(skel->maps.lock_stat); stack = bpf_map__fd(skel->maps.stacks); @@ -125,7 +124,7 @@ int lock_contention_read(struct lock_contention *con) while (!bpf_map_get_next_key(fd, &prev_key, &key)) { struct map *kmap; struct symbol *sym; - int idx; + int idx = 0; bpf_map_lookup_elem(fd, &key, &data); st = zalloc(sizeof(*st)); @@ -144,10 +143,9 @@ int lock_contention_read(struct lock_contention *con) bpf_map_lookup_elem(stack, &key, stack_trace); - /* skip BPF + lock internal functions */ - idx = CONTENTION_STACK_SKIP; + /* skip lock internal functions */ while (is_lock_function(machine, stack_trace[idx]) && - idx < CONTENTION_STACK_DEPTH - 1) + idx < con->max_stack - 1) idx++; st->addr = stack_trace[idx]; @@ -171,6 +169,14 @@ int lock_contention_read(struct lock_contention *con) return -1; } + if (verbose) { + st->callstack = memdup(stack_trace, sizeof(stack_trace)); + if (st->callstack == NULL) { + free(st); + return -1; + } + } + hlist_add_head(&st->hash_entry, con->result); prev_key = key; } diff --git a/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c b/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c index 9aa8cdd93de4..6a438e0102c5 100644 --- a/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c +++ b/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c @@ -43,6 +43,18 @@ struct { __uint(value_size, sizeof(struct bpf_perf_event_value)); } cgrp_readings SEC(".maps"); +/* new kernel cgroup definition */ +struct cgroup___new { + int level; + struct cgroup *ancestors[]; +} __attribute__((preserve_access_index)); + +/* old kernel cgroup definition */ +struct cgroup___old { + int level; + u64 ancestor_ids[]; +} __attribute__((preserve_access_index)); + const volatile __u32 num_events = 1; const volatile __u32 num_cpus = 1; @@ -50,6 +62,21 @@ int enabled = 0; int use_cgroup_v2 = 0; int perf_subsys_id = -1; +static inline __u64 get_cgroup_v1_ancestor_id(struct cgroup *cgrp, int level) +{ + /* recast pointer to capture new type for compiler */ + struct cgroup___new *cgrp_new = (void *)cgrp; + + if (bpf_core_field_exists(cgrp_new->ancestors)) { + return BPF_CORE_READ(cgrp_new, ancestors[level], kn, id); + } else { + /* recast pointer to capture old type for compiler */ + struct cgroup___old *cgrp_old = (void *)cgrp; + + return BPF_CORE_READ(cgrp_old, ancestor_ids[level]); + } +} + static inline int get_cgroup_v1_idx(__u32 *cgrps, int size) { struct task_struct *p = (void *)bpf_get_current_task(); @@ -77,7 +104,7 @@ static inline int get_cgroup_v1_idx(__u32 *cgrps, int size) break; // convert cgroup-id to a map index - cgrp_id = BPF_CORE_READ(cgrp, ancestor_ids[i]); + cgrp_id = get_cgroup_v1_ancestor_id(cgrp, i); elem = bpf_map_lookup_elem(&cgrp_idx, &cgrp_id); if (!elem) continue; diff --git a/tools/perf/util/bpf_skel/lock_contention.bpf.c b/tools/perf/util/bpf_skel/lock_contention.bpf.c index 9e8b94eb6320..1bb8628e7c9f 100644 --- a/tools/perf/util/bpf_skel/lock_contention.bpf.c +++ b/tools/perf/util/bpf_skel/lock_contention.bpf.c @@ -72,9 +72,10 @@ struct { int enabled; int has_cpu; int has_task; +int stack_skip; /* error stat */ -unsigned long lost; +int lost; static inline int can_record(void) { @@ -117,7 +118,7 @@ int contention_begin(u64 *ctx) pelem->timestamp = bpf_ktime_get_ns(); pelem->lock = (__u64)ctx[0]; pelem->flags = (__u32)ctx[1]; - pelem->stack_id = bpf_get_stackid(ctx, &stacks, BPF_F_FAST_STACK_CMP); + pelem->stack_id = bpf_get_stackid(ctx, &stacks, BPF_F_FAST_STACK_CMP | stack_skip); if (pelem->stack_id < 0) lost++; diff --git a/tools/perf/util/branch.c b/tools/perf/util/branch.c index a9a909db8cc7..6d38238481d3 100644 --- a/tools/perf/util/branch.c +++ b/tools/perf/util/branch.c @@ -21,7 +21,10 @@ void branch_type_count(struct branch_type_stat *st, struct branch_flags *flags, if (flags->type == PERF_BR_UNKNOWN || from == 0) return; - st->counts[flags->type]++; + if (flags->type == PERF_BR_EXTEND_ABI) + st->new_counts[flags->new_type]++; + else + st->counts[flags->type]++; if (flags->type == PERF_BR_COND) { if (to > from) @@ -36,6 +39,38 @@ void branch_type_count(struct branch_type_stat *st, struct branch_flags *flags, st->cross_4k++; } +const char *branch_new_type_name(int new_type) +{ + const char *branch_new_names[PERF_BR_NEW_MAX] = { + "FAULT_ALGN", + "FAULT_DATA", + "FAULT_INST", +/* + * TODO: This switch should happen on 'session->header.env.arch' + * instead, because an arm64 platform perf recording could be + * opened for analysis on other platforms as well. + */ +#ifdef __aarch64__ + "ARM64_FIQ", + "ARM64_DEBUG_HALT", + "ARM64_DEBUG_EXIT", + "ARM64_DEBUG_INST", + "ARM64_DEBUG_DATA" +#else + "ARCH_1", + "ARCH_2", + "ARCH_3", + "ARCH_4", + "ARCH_5" +#endif + }; + + if (new_type >= 0 && new_type < PERF_BR_NEW_MAX) + return branch_new_names[new_type]; + + return NULL; +} + const char *branch_type_name(int type) { const char *branch_names[PERF_BR_MAX] = { @@ -51,7 +86,10 @@ const char *branch_type_name(int type) "COND_CALL", "COND_RET", "ERET", - "IRQ" + "IRQ", + "SERROR", + "NO_TX", + "", // Needed for PERF_BR_EXTEND_ABI that ends up triggering some compiler warnings about NULL deref }; if (type >= 0 && type < PERF_BR_MAX) @@ -60,6 +98,17 @@ const char *branch_type_name(int type) return NULL; } +const char *get_branch_type(struct branch_entry *e) +{ + if (e->flags.type == PERF_BR_UNKNOWN) + return ""; + + if (e->flags.type == PERF_BR_EXTEND_ABI) + return branch_new_type_name(e->flags.new_type); + + return branch_type_name(e->flags.type); +} + void branch_type_stat_display(FILE *fp, struct branch_type_stat *st) { u64 total = 0; @@ -106,6 +155,15 @@ void branch_type_stat_display(FILE *fp, struct branch_type_stat *st) 100.0 * (double)st->counts[i] / (double)total); } + + for (i = 0; i < PERF_BR_NEW_MAX; i++) { + if (st->new_counts[i] > 0) + fprintf(fp, "\n%8s: %5.1f%%", + branch_new_type_name(i), + 100.0 * + (double)st->new_counts[i] / (double)total); + } + } static int count_str_scnprintf(int idx, const char *str, char *bf, int size) @@ -121,6 +179,9 @@ int branch_type_str(struct branch_type_stat *st, char *bf, int size) for (i = 0; i < PERF_BR_MAX; i++) total += st->counts[i]; + for (i = 0; i < PERF_BR_NEW_MAX; i++) + total += st->new_counts[i]; + if (total == 0) return 0; @@ -138,6 +199,11 @@ int branch_type_str(struct branch_type_stat *st, char *bf, int size) printed += count_str_scnprintf(j++, branch_type_name(i), bf + printed, size - printed); } + for (i = 0; i < PERF_BR_NEW_MAX; i++) { + if (st->new_counts[i] > 0) + printed += count_str_scnprintf(j++, branch_new_type_name(i), bf + printed, size - printed); + } + if (st->cross_4k > 0) printed += count_str_scnprintf(j++, "CROSS_4K", bf + printed, size - printed); diff --git a/tools/perf/util/branch.h b/tools/perf/util/branch.h index 17b2ccc61094..f838b23db180 100644 --- a/tools/perf/util/branch.h +++ b/tools/perf/util/branch.h @@ -24,7 +24,9 @@ struct branch_flags { u64 abort:1; u64 cycles:16; u64 type:4; - u64 reserved:40; + u64 new_type:4; + u64 priv:3; + u64 reserved:33; }; }; }; @@ -72,6 +74,7 @@ static inline struct branch_entry *perf_sample__branch_entries(struct perf_sampl struct branch_type_stat { bool branch_to; u64 counts[PERF_BR_MAX]; + u64 new_counts[PERF_BR_NEW_MAX]; u64 cond_fwd; u64 cond_bwd; u64 cross_4k; @@ -82,6 +85,8 @@ void branch_type_count(struct branch_type_stat *st, struct branch_flags *flags, u64 from, u64 to); const char *branch_type_name(int type); +const char *branch_new_type_name(int new_type); +const char *get_branch_type(struct branch_entry *e); void branch_type_stat_display(FILE *fp, struct branch_type_stat *st); int branch_type_str(struct branch_type_stat *st, char *bf, int bfsize); diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index ec18ed5caf3e..a839b30c981b 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -898,11 +898,15 @@ static int filename__read_build_id_ns(const char *filename, static bool dso__build_id_mismatch(struct dso *dso, const char *name) { struct build_id bid; + bool ret = false; - if (filename__read_build_id_ns(name, &bid, dso->nsinfo) < 0) - return false; + mutex_lock(&dso->lock); + if (filename__read_build_id_ns(name, &bid, dso->nsinfo) >= 0) + ret = !dso__build_id_equal(dso, &bid); - return !dso__build_id_equal(dso, &bid); + mutex_unlock(&dso->lock); + + return ret; } static int dso__cache_build_id(struct dso *dso, struct machine *machine, @@ -941,8 +945,10 @@ static int dso__cache_build_id(struct dso *dso, struct machine *machine, if (!is_kallsyms && dso__build_id_mismatch(dso, name)) goto out_free; + mutex_lock(&dso->lock); ret = build_id_cache__add_b(&dso->bid, name, dso->nsinfo, is_kallsyms, is_vdso, proper_name, root_dir); + mutex_unlock(&dso->lock); out_free: free(allocated_name); return ret; diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 7e663673f79f..a093a15f048f 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -1307,24 +1307,16 @@ int callchain_branch_counts(struct callchain_root *root, static int count_pri64_printf(int idx, const char *str, u64 value, char *bf, int bfsize) { - int printed; - - printed = scnprintf(bf, bfsize, "%s%s:%" PRId64 "", (idx) ? " " : " (", str, value); - - return printed; + return scnprintf(bf, bfsize, "%s%s:%" PRId64 "", (idx) ? " " : " (", str, value); } static int count_float_printf(int idx, const char *str, float value, char *bf, int bfsize, float threshold) { - int printed; - if (threshold != 0.0 && value < threshold) return 0; - printed = scnprintf(bf, bfsize, "%s%s:%.1f%%", (idx) ? " " : " (", str, value); - - return printed; + return scnprintf(bf, bfsize, "%s%s:%.1f%%", (idx) ? " " : " (", str, value); } static int branch_to_str(char *bf, int bfsize, diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c index 60ce5908c664..3f2ae19a1dd4 100644 --- a/tools/perf/util/config.c +++ b/tools/perf/util/config.c @@ -908,3 +908,34 @@ void set_buildid_dir(const char *dir) /* for communicating with external commands */ setenv("PERF_BUILDID_DIR", buildid_dir, 1); } + +struct perf_config_scan_data { + const char *name; + const char *fmt; + va_list args; + int ret; +}; + +static int perf_config_scan_cb(const char *var, const char *value, void *data) +{ + struct perf_config_scan_data *d = data; + + if (!strcmp(var, d->name)) + d->ret = vsscanf(value, d->fmt, d->args); + + return 0; +} + +int perf_config_scan(const char *name, const char *fmt, ...) +{ + struct perf_config_scan_data d = { + .name = name, + .fmt = fmt, + }; + + va_start(d.args, fmt); + perf_config(perf_config_scan_cb, &d); + va_end(d.args); + + return d.ret; +} diff --git a/tools/perf/util/config.h b/tools/perf/util/config.h index 2fd77aaff4d2..2e5e808928a5 100644 --- a/tools/perf/util/config.h +++ b/tools/perf/util/config.h @@ -29,6 +29,7 @@ typedef int (*config_fn_t)(const char *, const char *, void *); int perf_default_config(const char *, const char *, void *); int perf_config(config_fn_t fn, void *); +int perf_config_scan(const char *name, const char *fmt, ...) __scanf(2, 3); int perf_config_set(struct perf_config_set *set, config_fn_t fn, void *data); int perf_config_int(int *dest, const char *, const char *); diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index ae43fb88f444..8486ca3bec75 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -112,12 +112,39 @@ static struct perf_cpu_map *cpu_map__from_mask(const struct perf_record_cpu_map_ } +static struct perf_cpu_map *cpu_map__from_range(const struct perf_record_cpu_map_data *data) +{ + struct perf_cpu_map *map; + unsigned int i = 0; + + map = perf_cpu_map__empty_new(data->range_cpu_data.end_cpu - + data->range_cpu_data.start_cpu + 1 + data->range_cpu_data.any_cpu); + if (!map) + return NULL; + + if (data->range_cpu_data.any_cpu) + map->map[i++].cpu = -1; + + for (int cpu = data->range_cpu_data.start_cpu; cpu <= data->range_cpu_data.end_cpu; + i++, cpu++) + map->map[i].cpu = cpu; + + return map; +} + struct perf_cpu_map *cpu_map__new_data(const struct perf_record_cpu_map_data *data) { - if (data->type == PERF_CPU_MAP__CPUS) + switch (data->type) { + case PERF_CPU_MAP__CPUS: return cpu_map__from_entries(data); - else + case PERF_CPU_MAP__MASK: return cpu_map__from_mask(data); + case PERF_CPU_MAP__RANGE_CPUS: + return cpu_map__from_range(data); + default: + pr_err("cpu_map__new_data unknown type %d\n", data->type); + return NULL; + } } size_t cpu_map__fprintf(struct perf_cpu_map *map, FILE *fp) @@ -202,7 +229,7 @@ static int aggr_cpu_id__cmp(const void *a_pointer, const void *b_pointer) else if (a->core != b->core) return a->core - b->core; else - return a->thread - b->thread; + return a->thread_idx - b->thread_idx; } struct cpu_aggr_map *cpu_aggr_map__new(const struct perf_cpu_map *cpus, @@ -640,7 +667,7 @@ const struct perf_cpu_map *cpu_map__online(void) /* thread unsafe */ bool aggr_cpu_id__equal(const struct aggr_cpu_id *a, const struct aggr_cpu_id *b) { - return a->thread == b->thread && + return a->thread_idx == b->thread_idx && a->node == b->node && a->socket == b->socket && a->die == b->die && @@ -650,7 +677,7 @@ bool aggr_cpu_id__equal(const struct aggr_cpu_id *a, const struct aggr_cpu_id *b bool aggr_cpu_id__is_empty(const struct aggr_cpu_id *a) { - return a->thread == -1 && + return a->thread_idx == -1 && a->node == -1 && a->socket == -1 && a->die == -1 && @@ -661,7 +688,7 @@ bool aggr_cpu_id__is_empty(const struct aggr_cpu_id *a) struct aggr_cpu_id aggr_cpu_id__empty(void) { struct aggr_cpu_id ret = { - .thread = -1, + .thread_idx = -1, .node = -1, .socket = -1, .die = -1, diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index fa8a5acdcae1..4a6d029576ee 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -10,7 +10,7 @@ /** Identify where counts are aggregated, -1 implies not to aggregate. */ struct aggr_cpu_id { /** A value in the range 0 to number of threads. */ - int thread; + int thread_idx; /** The numa node X as read from /sys/devices/system/node/nodeX. */ int node; /** diff --git a/tools/perf/util/cputopo.c b/tools/perf/util/cputopo.c index d275d843c155..1a3ff6449158 100644 --- a/tools/perf/util/cputopo.c +++ b/tools/perf/util/cputopo.c @@ -157,6 +157,67 @@ void cpu_topology__delete(struct cpu_topology *tp) free(tp); } +bool cpu_topology__smt_on(const struct cpu_topology *topology) +{ + for (u32 i = 0; i < topology->core_cpus_lists; i++) { + const char *cpu_list = topology->core_cpus_list[i]; + + /* + * If there is a need to separate siblings in a core then SMT is + * enabled. + */ + if (strchr(cpu_list, ',') || strchr(cpu_list, '-')) + return true; + } + return false; +} + +bool cpu_topology__core_wide(const struct cpu_topology *topology, + const char *user_requested_cpu_list) +{ + struct perf_cpu_map *user_requested_cpus; + + /* + * If user_requested_cpu_list is empty then all CPUs are recorded and so + * core_wide is true. + */ + if (!user_requested_cpu_list) + return true; + + user_requested_cpus = perf_cpu_map__new(user_requested_cpu_list); + /* Check that every user requested CPU is the complete set of SMT threads on a core. */ + for (u32 i = 0; i < topology->core_cpus_lists; i++) { + const char *core_cpu_list = topology->core_cpus_list[i]; + struct perf_cpu_map *core_cpus = perf_cpu_map__new(core_cpu_list); + struct perf_cpu cpu; + int idx; + bool has_first, first = true; + + perf_cpu_map__for_each_cpu(cpu, idx, core_cpus) { + if (first) { + has_first = perf_cpu_map__has(user_requested_cpus, cpu); + first = false; + } else { + /* + * If the first core CPU is user requested then + * all subsequent CPUs in the core must be user + * requested too. If the first CPU isn't user + * requested then none of the others must be + * too. + */ + if (perf_cpu_map__has(user_requested_cpus, cpu) != has_first) { + perf_cpu_map__put(core_cpus); + perf_cpu_map__put(user_requested_cpus); + return false; + } + } + } + perf_cpu_map__put(core_cpus); + } + perf_cpu_map__put(user_requested_cpus); + return true; +} + static bool has_die_topology(void) { char filename[MAXPATHLEN]; diff --git a/tools/perf/util/cputopo.h b/tools/perf/util/cputopo.h index 854e18f9041e..969e5920a00e 100644 --- a/tools/perf/util/cputopo.h +++ b/tools/perf/util/cputopo.h @@ -58,6 +58,11 @@ struct hybrid_topology { struct cpu_topology *cpu_topology__new(void); void cpu_topology__delete(struct cpu_topology *tp); +/* Determine from the core list whether SMT was enabled. */ +bool cpu_topology__smt_on(const struct cpu_topology *topology); +/* Are the sets of SMT siblings all enabled or all disabled in user_requested_cpus. */ +bool cpu_topology__core_wide(const struct cpu_topology *topology, + const char *user_requested_cpu_list); struct numa_topology *numa_topology__new(void); void numa_topology__delete(struct numa_topology *tp); diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index 5ac13958d1bd..f1a14c0ad26d 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -501,6 +501,7 @@ static int __open_dso(struct dso *dso, struct machine *machine) if (!name) return -ENOMEM; + mutex_lock(&dso->lock); if (machine) root_dir = machine->root_dir; @@ -541,6 +542,7 @@ static int __open_dso(struct dso *dso, struct machine *machine) unlink(name); out: + mutex_unlock(&dso->lock); free(name); return fd; } @@ -559,8 +561,11 @@ static int open_dso(struct dso *dso, struct machine *machine) int fd; struct nscookie nsc; - if (dso->binary_type != DSO_BINARY_TYPE__BUILD_ID_CACHE) + if (dso->binary_type != DSO_BINARY_TYPE__BUILD_ID_CACHE) { + mutex_lock(&dso->lock); nsinfo__mountns_enter(dso->nsinfo, &nsc); + mutex_unlock(&dso->lock); + } fd = __open_dso(dso, machine); if (dso->binary_type != DSO_BINARY_TYPE__BUILD_ID_CACHE) nsinfo__mountns_exit(&nsc); @@ -795,7 +800,7 @@ dso_cache__free(struct dso *dso) struct rb_root *root = &dso->data.cache; struct rb_node *next = rb_first(root); - pthread_mutex_lock(&dso->lock); + mutex_lock(&dso->lock); while (next) { struct dso_cache *cache; @@ -804,7 +809,7 @@ dso_cache__free(struct dso *dso) rb_erase(&cache->rb_node, root); free(cache); } - pthread_mutex_unlock(&dso->lock); + mutex_unlock(&dso->lock); } static struct dso_cache *__dso_cache__find(struct dso *dso, u64 offset) @@ -841,7 +846,7 @@ dso_cache__insert(struct dso *dso, struct dso_cache *new) struct dso_cache *cache; u64 offset = new->offset; - pthread_mutex_lock(&dso->lock); + mutex_lock(&dso->lock); while (*p != NULL) { u64 end; @@ -862,7 +867,7 @@ dso_cache__insert(struct dso *dso, struct dso_cache *new) cache = NULL; out: - pthread_mutex_unlock(&dso->lock); + mutex_unlock(&dso->lock); return cache; } @@ -1297,7 +1302,7 @@ struct dso *dso__new_id(const char *name, struct dso_id *id) dso->root = NULL; INIT_LIST_HEAD(&dso->node); INIT_LIST_HEAD(&dso->data.open_entry); - pthread_mutex_init(&dso->lock, NULL); + mutex_init(&dso->lock); refcount_set(&dso->refcnt, 1); } @@ -1336,7 +1341,7 @@ void dso__delete(struct dso *dso) dso__free_a2l(dso); zfree(&dso->symsrc_filename); nsinfo__zput(dso->nsinfo); - pthread_mutex_destroy(&dso->lock); + mutex_destroy(&dso->lock); free(dso); } diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index 66981c7a9a18..58d94175e714 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -2,7 +2,6 @@ #ifndef __PERF_DSO #define __PERF_DSO -#include <pthread.h> #include <linux/refcount.h> #include <linux/types.h> #include <linux/rbtree.h> @@ -11,6 +10,7 @@ #include <stdio.h> #include <linux/bitops.h> #include "build-id.h" +#include "mutex.h" struct machine; struct map; @@ -145,7 +145,7 @@ struct dso_cache { struct auxtrace_cache; struct dso { - pthread_mutex_t lock; + struct mutex lock; struct list_head node; struct rb_node rb_node; /* rbtree node sorted by long name */ struct rb_root *root; /* root of rbtree that rb_node is in */ diff --git a/tools/perf/util/events_stats.h b/tools/perf/util/events_stats.h index 040ab9d0a803..8fecc9fbaecc 100644 --- a/tools/perf/util/events_stats.h +++ b/tools/perf/util/events_stats.h @@ -47,6 +47,7 @@ struct hists_stats { u64 total_non_filtered_period; u32 nr_samples; u32 nr_non_filtered_samples; + u32 nr_lost_samples; }; void events_stats__inc(struct events_stats *stats, u32 type); diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 48167f3941a6..6612b00949e7 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -15,6 +15,7 @@ #include "target.h" #include "evlist.h" #include "evsel.h" +#include "record.h" #include "debug.h" #include "units.h" #include "bpf_counter.h" @@ -40,12 +41,14 @@ #include <sys/ioctl.h> #include <sys/mman.h> #include <sys/prctl.h> +#include <sys/timerfd.h> #include <linux/bitops.h> #include <linux/hash.h> #include <linux/log2.h> #include <linux/err.h> #include <linux/string.h> +#include <linux/time64.h> #include <linux/zalloc.h> #include <perf/evlist.h> #include <perf/evsel.h> @@ -147,6 +150,7 @@ static void evlist__purge(struct evlist *evlist) void evlist__exit(struct evlist *evlist) { + event_enable_timer__exit(&evlist->eet); zfree(&evlist->mmap); zfree(&evlist->overwrite_mmap); perf_evlist__exit(&evlist->core); @@ -264,28 +268,6 @@ int evlist__add_dummy(struct evlist *evlist) return 0; } -static void evlist__add_on_all_cpus(struct evlist *evlist, struct evsel *evsel) -{ - evsel->core.system_wide = true; - - /* - * All CPUs. - * - * Note perf_event_open() does not accept CPUs that are not online, so - * in fact this CPU list will include only all online CPUs. - */ - perf_cpu_map__put(evsel->core.own_cpus); - evsel->core.own_cpus = perf_cpu_map__new(NULL); - perf_cpu_map__put(evsel->core.cpus); - evsel->core.cpus = perf_cpu_map__get(evsel->core.own_cpus); - - /* No threads */ - perf_thread_map__put(evsel->core.threads); - evsel->core.threads = perf_thread_map__new_dummy(); - - evlist__add(evlist, evsel); -} - struct evsel *evlist__add_aux_dummy(struct evlist *evlist, bool system_wide) { struct evsel *evsel = evlist__dummy_event(evlist); @@ -298,17 +280,31 @@ struct evsel *evlist__add_aux_dummy(struct evlist *evlist, bool system_wide) evsel->core.attr.exclude_hv = 1; evsel->core.attr.freq = 0; evsel->core.attr.sample_period = 1; + evsel->core.system_wide = system_wide; evsel->no_aux_samples = true; evsel->name = strdup("dummy:u"); - if (system_wide) - evlist__add_on_all_cpus(evlist, evsel); - else - evlist__add(evlist, evsel); - + evlist__add(evlist, evsel); return evsel; } +struct evsel *evlist__add_sched_switch(struct evlist *evlist, bool system_wide) +{ + struct evsel *evsel = evsel__newtp_idx("sched", "sched_switch", 0); + + if (IS_ERR(evsel)) + return evsel; + + evsel__set_sample_bit(evsel, CPU); + evsel__set_sample_bit(evsel, TIME); + + evsel->core.system_wide = system_wide; + evsel->no_aux_samples = true; + + evlist__add(evlist, evsel); + return evsel; +}; + int evlist__add_attrs(struct evlist *evlist, struct perf_event_attr *attrs, size_t nr_attrs) { struct evsel *evsel, *n; @@ -480,7 +476,7 @@ static int evlist__is_enabled(struct evlist *evlist) return false; } -static void __evlist__disable(struct evlist *evlist, char *evsel_name) +static void __evlist__disable(struct evlist *evlist, char *evsel_name, bool excl_dummy) { struct evsel *pos; struct evlist_cpu_iterator evlist_cpu_itr; @@ -502,6 +498,8 @@ static void __evlist__disable(struct evlist *evlist, char *evsel_name) continue; if (pos->disabled || !evsel__is_group_leader(pos) || !pos->core.fd) continue; + if (excl_dummy && evsel__is_dummy_event(pos)) + continue; if (pos->immediate) has_imm = true; if (pos->immediate != imm) @@ -518,6 +516,8 @@ static void __evlist__disable(struct evlist *evlist, char *evsel_name) continue; if (!evsel__is_group_leader(pos) || !pos->core.fd) continue; + if (excl_dummy && evsel__is_dummy_event(pos)) + continue; pos->disabled = true; } @@ -533,15 +533,20 @@ static void __evlist__disable(struct evlist *evlist, char *evsel_name) void evlist__disable(struct evlist *evlist) { - __evlist__disable(evlist, NULL); + __evlist__disable(evlist, NULL, false); +} + +void evlist__disable_non_dummy(struct evlist *evlist) +{ + __evlist__disable(evlist, NULL, true); } void evlist__disable_evsel(struct evlist *evlist, char *evsel_name) { - __evlist__disable(evlist, evsel_name); + __evlist__disable(evlist, evsel_name, false); } -static void __evlist__enable(struct evlist *evlist, char *evsel_name) +static void __evlist__enable(struct evlist *evlist, char *evsel_name, bool excl_dummy) { struct evsel *pos; struct evlist_cpu_iterator evlist_cpu_itr; @@ -560,6 +565,8 @@ static void __evlist__enable(struct evlist *evlist, char *evsel_name) continue; if (!evsel__is_group_leader(pos) || !pos->core.fd) continue; + if (excl_dummy && evsel__is_dummy_event(pos)) + continue; evsel__enable_cpu(pos, evlist_cpu_itr.cpu_map_idx); } affinity__cleanup(affinity); @@ -568,6 +575,8 @@ static void __evlist__enable(struct evlist *evlist, char *evsel_name) continue; if (!evsel__is_group_leader(pos) || !pos->core.fd) continue; + if (excl_dummy && evsel__is_dummy_event(pos)) + continue; pos->disabled = false; } @@ -581,12 +590,17 @@ static void __evlist__enable(struct evlist *evlist, char *evsel_name) void evlist__enable(struct evlist *evlist) { - __evlist__enable(evlist, NULL); + __evlist__enable(evlist, NULL, false); +} + +void evlist__enable_non_dummy(struct evlist *evlist) +{ + __evlist__enable(evlist, NULL, true); } void evlist__enable_evsel(struct evlist *evlist, char *evsel_name) { - __evlist__enable(evlist, evsel_name); + __evlist__enable(evlist, evsel_name, false); } void evlist__toggle_enable(struct evlist *evlist) @@ -608,7 +622,8 @@ int evlist__filter_pollfd(struct evlist *evlist, short revents_and_mask) int evlist__add_wakeup_eventfd(struct evlist *evlist, int fd) { return perf_evlist__add_pollfd(&evlist->core, fd, NULL, POLLIN, - fdarray_flag__nonfilterable); + fdarray_flag__nonfilterable | + fdarray_flag__non_perf_event); } #endif @@ -1897,7 +1912,8 @@ int evlist__initialize_ctlfd(struct evlist *evlist, int fd, int ack) } evlist->ctl_fd.pos = perf_evlist__add_pollfd(&evlist->core, fd, NULL, POLLIN, - fdarray_flag__nonfilterable); + fdarray_flag__nonfilterable | + fdarray_flag__non_perf_event); if (evlist->ctl_fd.pos < 0) { evlist->ctl_fd.pos = -1; pr_err("Failed to add ctl fd entry: %m\n"); @@ -2147,20 +2163,234 @@ int evlist__ctlfd_process(struct evlist *evlist, enum evlist_ctl_cmd *cmd) return err; } -int evlist__ctlfd_update(struct evlist *evlist, struct pollfd *update) +/** + * struct event_enable_time - perf record -D/--delay single time range. + * @start: start of time range to enable events in milliseconds + * @end: end of time range to enable events in milliseconds + * + * N.B. this structure is also accessed as an array of int. + */ +struct event_enable_time { + int start; + int end; +}; + +static int parse_event_enable_time(const char *str, struct event_enable_time *range, bool first) { - int ctlfd_pos = evlist->ctl_fd.pos; - struct pollfd *entries = evlist->core.pollfd.entries; + const char *fmt = first ? "%u - %u %n" : " , %u - %u %n"; + int ret, start, end, n; - if (!evlist__ctlfd_initialized(evlist)) + ret = sscanf(str, fmt, &start, &end, &n); + if (ret != 2 || end <= start) + return -EINVAL; + if (range) { + range->start = start; + range->end = end; + } + return n; +} + +static ssize_t parse_event_enable_times(const char *str, struct event_enable_time *range) +{ + int incr = !!range; + bool first = true; + ssize_t ret, cnt; + + for (cnt = 0; *str; cnt++) { + ret = parse_event_enable_time(str, range, first); + if (ret < 0) + return ret; + /* Check no overlap */ + if (!first && range && range->start <= range[-1].end) + return -EINVAL; + str += ret; + range += incr; + first = false; + } + return cnt; +} + +/** + * struct event_enable_timer - control structure for perf record -D/--delay. + * @evlist: event list + * @times: time ranges that events are enabled (N.B. this is also accessed as an + * array of int) + * @times_cnt: number of time ranges + * @timerfd: timer file descriptor + * @pollfd_pos: position in @evlist array of file descriptors to poll (fdarray) + * @times_step: current position in (int *)@times)[], + * refer event_enable_timer__process() + * + * Note, this structure is only used when there are time ranges, not when there + * is only an initial delay. + */ +struct event_enable_timer { + struct evlist *evlist; + struct event_enable_time *times; + size_t times_cnt; + int timerfd; + int pollfd_pos; + size_t times_step; +}; + +static int str_to_delay(const char *str) +{ + char *endptr; + long d; + + d = strtol(str, &endptr, 10); + if (*endptr || d > INT_MAX || d < -1) return 0; + return d; +} - if (entries[ctlfd_pos].fd != update->fd || - entries[ctlfd_pos].events != update->events) - return -1; +int evlist__parse_event_enable_time(struct evlist *evlist, struct record_opts *opts, + const char *str, int unset) +{ + enum fdarray_flags flags = fdarray_flag__nonfilterable | fdarray_flag__non_perf_event; + struct event_enable_timer *eet; + ssize_t times_cnt; + ssize_t ret; + int err; + + if (unset) + return 0; + + opts->initial_delay = str_to_delay(str); + if (opts->initial_delay) + return 0; + + ret = parse_event_enable_times(str, NULL); + if (ret < 0) + return ret; + + times_cnt = ret; + if (times_cnt == 0) + return -EINVAL; + + eet = zalloc(sizeof(*eet)); + if (!eet) + return -ENOMEM; + + eet->times = calloc(times_cnt, sizeof(*eet->times)); + if (!eet->times) { + err = -ENOMEM; + goto free_eet; + } + + if (parse_event_enable_times(str, eet->times) != times_cnt) { + err = -EINVAL; + goto free_eet_times; + } + + eet->times_cnt = times_cnt; + + eet->timerfd = timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC); + if (eet->timerfd == -1) { + err = -errno; + pr_err("timerfd_create failed: %s\n", strerror(errno)); + goto free_eet_times; + } + + eet->pollfd_pos = perf_evlist__add_pollfd(&evlist->core, eet->timerfd, NULL, POLLIN, flags); + if (eet->pollfd_pos < 0) { + err = eet->pollfd_pos; + goto close_timerfd; + } + + eet->evlist = evlist; + evlist->eet = eet; + opts->initial_delay = eet->times[0].start; - entries[ctlfd_pos].revents = update->revents; return 0; + +close_timerfd: + close(eet->timerfd); +free_eet_times: + free(eet->times); +free_eet: + free(eet); + return err; +} + +static int event_enable_timer__set_timer(struct event_enable_timer *eet, int ms) +{ + struct itimerspec its = { + .it_value.tv_sec = ms / MSEC_PER_SEC, + .it_value.tv_nsec = (ms % MSEC_PER_SEC) * NSEC_PER_MSEC, + }; + int err = 0; + + if (timerfd_settime(eet->timerfd, 0, &its, NULL) < 0) { + err = -errno; + pr_err("timerfd_settime failed: %s\n", strerror(errno)); + } + return err; +} + +int event_enable_timer__start(struct event_enable_timer *eet) +{ + int ms; + + if (!eet) + return 0; + + ms = eet->times[0].end - eet->times[0].start; + eet->times_step = 1; + + return event_enable_timer__set_timer(eet, ms); +} + +int event_enable_timer__process(struct event_enable_timer *eet) +{ + struct pollfd *entries; + short revents; + + if (!eet) + return 0; + + entries = eet->evlist->core.pollfd.entries; + revents = entries[eet->pollfd_pos].revents; + entries[eet->pollfd_pos].revents = 0; + + if (revents & POLLIN) { + size_t step = eet->times_step; + size_t pos = step / 2; + + if (step & 1) { + evlist__disable_non_dummy(eet->evlist); + pr_info(EVLIST_DISABLED_MSG); + if (pos >= eet->times_cnt - 1) { + /* Disarm timer */ + event_enable_timer__set_timer(eet, 0); + return 1; /* Stop */ + } + } else { + evlist__enable_non_dummy(eet->evlist); + pr_info(EVLIST_ENABLED_MSG); + } + + step += 1; + pos = step / 2; + + if (pos < eet->times_cnt) { + int *times = (int *)eet->times; /* Accessing 'times' as array of int */ + int ms = times[step] - times[step - 1]; + + eet->times_step = step; + return event_enable_timer__set_timer(eet, ms); + } + } + + return 0; +} + +void event_enable_timer__exit(struct event_enable_timer **ep) +{ + if (!ep || !*ep) + return; + free((*ep)->times); + zfree(ep); } struct evsel *evlist__find_evsel(struct evlist *evlist, int idx) diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 351ba2887a79..16734c6756b3 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -48,6 +48,8 @@ enum bkw_mmap_state { BKW_MMAP_EMPTY, }; +struct event_enable_timer; + struct evlist { struct perf_evlist core; bool enabled; @@ -79,6 +81,7 @@ struct evlist { int ack; /* ack file descriptor for control commands */ int pos; /* index at evlist core object to check signals */ } ctl_fd; + struct event_enable_timer *eet; }; struct evsel_str_handler { @@ -124,6 +127,7 @@ static inline struct evsel *evlist__add_dummy_on_all_cpus(struct evlist *evlist) { return evlist__add_aux_dummy(evlist, true); } +struct evsel *evlist__add_sched_switch(struct evlist *evlist, bool system_wide); int evlist__add_sb_event(struct evlist *evlist, struct perf_event_attr *attr, evsel__sb_cb_t cb, void *data); @@ -205,6 +209,8 @@ void evlist__enable(struct evlist *evlist); void evlist__toggle_enable(struct evlist *evlist); void evlist__disable_evsel(struct evlist *evlist, char *evsel_name); void evlist__enable_evsel(struct evlist *evlist, char *evsel_name); +void evlist__disable_non_dummy(struct evlist *evlist); +void evlist__enable_non_dummy(struct evlist *evlist); void evlist__set_selected(struct evlist *evlist, struct evsel *evsel); @@ -418,13 +424,18 @@ void evlist__close_control(int ctl_fd, int ctl_fd_ack, bool *ctl_fd_close); int evlist__initialize_ctlfd(struct evlist *evlist, int ctl_fd, int ctl_fd_ack); int evlist__finalize_ctlfd(struct evlist *evlist); bool evlist__ctlfd_initialized(struct evlist *evlist); -int evlist__ctlfd_update(struct evlist *evlist, struct pollfd *update); int evlist__ctlfd_process(struct evlist *evlist, enum evlist_ctl_cmd *cmd); int evlist__ctlfd_ack(struct evlist *evlist); #define EVLIST_ENABLED_MSG "Events enabled\n" #define EVLIST_DISABLED_MSG "Events disabled\n" +int evlist__parse_event_enable_time(struct evlist *evlist, struct record_opts *opts, + const char *str, int unset); +int event_enable_timer__start(struct event_enable_timer *eet); +void event_enable_timer__exit(struct event_enable_timer **ep); +int event_enable_timer__process(struct event_enable_timer *eet); + struct evsel *evlist__find_evsel(struct evlist *evlist, int idx); int evlist__scnprintf_evsels(struct evlist *evlist, size_t size, char *bf); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 18c3eb864d55..76605fde3507 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -46,7 +46,11 @@ #include "string2.h" #include "memswap.h" #include "util.h" -#include "hashmap.h" +#ifdef HAVE_LIBBPF_SUPPORT +#include <bpf/hashmap.h> +#else +#include "util/hashmap.h" +#endif #include "pmu-hybrid.h" #include "off_cpu.h" #include "../perf-sys.h" @@ -1157,6 +1161,7 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts, attr->sample_id_all = perf_missing_features.sample_id_all ? 0 : 1; attr->inherit = !opts->no_inherit; attr->write_backward = opts->overwrite ? 1 : 0; + attr->read_format = PERF_FORMAT_LOST; evsel__set_sample_bit(evsel, IP); evsel__set_sample_bit(evsel, TID); @@ -1808,7 +1813,7 @@ static struct perf_thread_map *empty_thread_map; static int __evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus, struct perf_thread_map *threads) { - int nthreads; + int nthreads = perf_thread_map__nr(threads); if ((perf_missing_features.write_backward && evsel->core.attr.write_backward) || (perf_missing_features.aux_output && evsel->core.attr.aux_output)) @@ -1834,11 +1839,6 @@ static int __evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus, threads = empty_thread_map; } - if (evsel->core.system_wide) - nthreads = 1; - else - nthreads = threads->nr; - if (evsel->core.fd == NULL && perf_evsel__alloc_fd(&evsel->core, perf_cpu_map__nr(cpus), nthreads) < 0) return -ENOMEM; @@ -1852,6 +1852,8 @@ static int __evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus, static void evsel__disable_missing_features(struct evsel *evsel) { + if (perf_missing_features.read_lost) + evsel->core.attr.read_format &= ~PERF_FORMAT_LOST; if (perf_missing_features.weight_struct) { evsel__set_sample_bit(evsel, WEIGHT); evsel__reset_sample_bit(evsel, WEIGHT_STRUCT); @@ -1903,7 +1905,12 @@ bool evsel__detect_missing_features(struct evsel *evsel) * Must probe features in the order they were added to the * perf_event_attr interface. */ - if (!perf_missing_features.weight_struct && + if (!perf_missing_features.read_lost && + (evsel->core.attr.read_format & PERF_FORMAT_LOST)) { + perf_missing_features.read_lost = true; + pr_debug2("switching off PERF_FORMAT_LOST support\n"); + return true; + } else if (!perf_missing_features.weight_struct && (evsel->core.attr.sample_type & PERF_SAMPLE_WEIGHT_STRUCT)) { perf_missing_features.weight_struct = true; pr_debug2("switching off weight struct support\n"); @@ -2049,10 +2056,7 @@ static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, if (threads == NULL) threads = empty_thread_map; - if (evsel->core.system_wide) - nthreads = 1; - else - nthreads = threads->nr; + nthreads = perf_thread_map__nr(threads); if (evsel->cgrp) pid = evsel->cgrp->fd; @@ -2077,6 +2081,7 @@ retry_open: test_attr__ready(); + /* Debug message used by test scripts */ pr_debug2_peo("sys_perf_event_open: pid %d cpu %d group_fd %d flags %#lx", pid, perf_cpu_map__cpu(cpus, idx).cpu, group_fd, evsel->open_flags); @@ -2102,6 +2107,7 @@ retry_open: fd, group_fd, evsel->open_flags); } + /* Debug message used by test scripts */ pr_debug2_peo(" = %d\n", fd); if (evsel->bpf_fd >= 0) { diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index d927713b513e..989865e16aad 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -188,6 +188,7 @@ struct perf_missing_features { bool data_page_size; bool code_page_size; bool weight_struct; + bool read_lost; }; extern struct perf_missing_features perf_missing_features; diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c index c15a9852fa41..aaacf514dc09 100644 --- a/tools/perf/util/expr.c +++ b/tools/perf/util/expr.c @@ -182,7 +182,7 @@ int expr__add_ref(struct expr_parse_ctx *ctx, struct metric_ref *ref) { struct expr_id_data *data_ptr = NULL, *old_data = NULL; char *old_key = NULL; - char *name, *p; + char *name; int ret; data_ptr = zalloc(sizeof(*data_ptr)); @@ -196,15 +196,6 @@ int expr__add_ref(struct expr_parse_ctx *ctx, struct metric_ref *ref) } /* - * The jevents tool converts all metric expressions - * to lowercase, including metric references, hence - * we need to add lowercase name for metric, so it's - * properly found. - */ - for (p = name; *p; p++) - *p = tolower(*p); - - /* * Intentionally passing just const char pointers, * originally from 'struct pmu_event' object. * We don't need to change them, so there's no @@ -310,7 +301,9 @@ struct expr_parse_ctx *expr__ctx_new(void) free(ctx); return NULL; } - ctx->runtime = 0; + ctx->sctx.user_requested_cpu_list = NULL; + ctx->sctx.runtime = 0; + ctx->sctx.system_wide = false; return ctx; } @@ -332,6 +325,10 @@ void expr__ctx_free(struct expr_parse_ctx *ctx) struct hashmap_entry *cur; size_t bkt; + if (!ctx) + return; + + free(ctx->sctx.user_requested_cpu_list); hashmap__for_each_entry(ctx->ids, cur, bkt) { free((char *)cur->key); free(cur->value); @@ -344,16 +341,13 @@ static int __expr__parse(double *val, struct expr_parse_ctx *ctx, const char *expr, bool compute_ids) { - struct expr_scanner_ctx scanner_ctx = { - .runtime = ctx->runtime, - }; YY_BUFFER_STATE buffer; void *scanner; int ret; pr_debug2("parsing metric: %s\n", expr); - ret = expr_lex_init_extra(&scanner_ctx, &scanner); + ret = expr_lex_init_extra(&ctx->sctx, &scanner); if (ret) return ret; @@ -410,16 +404,11 @@ double arch_get_tsc_freq(void) } #endif -double expr__get_literal(const char *literal) +double expr__get_literal(const char *literal, const struct expr_scanner_ctx *ctx) { static struct cpu_topology *topology; double result = NAN; - if (!strcasecmp("#smt_on", literal)) { - result = smt_on() > 0 ? 1.0 : 0.0; - goto out; - } - if (!strcmp("#num_cpus", literal)) { result = cpu__max_present_cpu().cpu; goto out; @@ -443,6 +432,15 @@ double expr__get_literal(const char *literal) goto out; } } + if (!strcasecmp("#smt_on", literal)) { + result = smt_on(topology) ? 1.0 : 0.0; + goto out; + } + if (!strcmp("#core_wide", literal)) { + result = core_wide(ctx->system_wide, ctx->user_requested_cpu_list, topology) + ? 1.0 : 0.0; + goto out; + } if (!strcmp("#num_packages", literal)) { result = topology->package_cpus_lists; goto out; diff --git a/tools/perf/util/expr.h b/tools/perf/util/expr.h index bd2116983bbb..d6c1668dc1a0 100644 --- a/tools/perf/util/expr.h +++ b/tools/perf/util/expr.h @@ -2,28 +2,27 @@ #ifndef PARSE_CTX_H #define PARSE_CTX_H 1 -// There are fixes that need to land upstream before we can use libbpf's headers, -// for now use our copy unconditionally, since the data structures at this point -// are exactly the same, no problem. -//#ifdef HAVE_LIBBPF_SUPPORT -//#include <bpf/hashmap.h> -//#else +#ifdef HAVE_LIBBPF_SUPPORT +#include <bpf/hashmap.h> +#else #include "util/hashmap.h" -//#endif +#endif struct metric_ref; +struct expr_scanner_ctx { + char *user_requested_cpu_list; + int runtime; + bool system_wide; +}; + struct expr_parse_ctx { struct hashmap *ids; - int runtime; + struct expr_scanner_ctx sctx; }; struct expr_id_data; -struct expr_scanner_ctx { - int runtime; -}; - struct hashmap *ids__new(void); void ids__free(struct hashmap *ids); int ids__insert(struct hashmap *ids, const char *id); @@ -58,6 +57,6 @@ int expr__find_ids(const char *expr, const char *one, double expr_id_data__value(const struct expr_id_data *data); double expr_id_data__source_count(const struct expr_id_data *data); -double expr__get_literal(const char *literal); +double expr__get_literal(const char *literal, const struct expr_scanner_ctx *ctx); #endif diff --git a/tools/perf/util/expr.l b/tools/perf/util/expr.l index 4dc8edbfd9ce..0168a9637330 100644 --- a/tools/perf/util/expr.l +++ b/tools/perf/util/expr.l @@ -79,11 +79,11 @@ static int str(yyscan_t scanner, int token, int runtime) return token; } -static int literal(yyscan_t scanner) +static int literal(yyscan_t scanner, const struct expr_scanner_ctx *sctx) { YYSTYPE *yylval = expr_get_lval(scanner); - yylval->num = expr__get_literal(expr_get_text(scanner)); + yylval->num = expr__get_literal(expr_get_text(scanner), sctx); if (isnan(yylval->num)) return EXPR_ERROR; @@ -108,7 +108,7 @@ min { return MIN; } if { return IF; } else { return ELSE; } source_count { return SOURCE_COUNT; } -{literal} { return literal(yyscanner); } +{literal} { return literal(yyscanner, sctx); } {number} { return value(yyscanner); } {symbol} { return str(yyscanner, ID, sctx->runtime); } "|" { return '|'; } diff --git a/tools/perf/util/expr.y b/tools/perf/util/expr.y index a30b825adb7b..635e562350c5 100644 --- a/tools/perf/util/expr.y +++ b/tools/perf/util/expr.y @@ -156,7 +156,7 @@ start: if_expr } ; -if_expr: expr IF expr ELSE expr +if_expr: expr IF expr ELSE if_expr { if (fpclassify($3.val) == FP_ZERO) { /* diff --git a/tools/perf/util/genelf.c b/tools/perf/util/genelf.c index d81b54563e96..fefc72066c4e 100644 --- a/tools/perf/util/genelf.c +++ b/tools/perf/util/genelf.c @@ -345,6 +345,7 @@ jit_write_elf(int fd, uint64_t load_addr, const char *sym, eh_frame_base_offset); if (retval) goto error; + retval = -1; } /* diff --git a/tools/perf/util/genelf.h b/tools/perf/util/genelf.h index b5c909546e3f..6af062d1c452 100644 --- a/tools/perf/util/genelf.h +++ b/tools/perf/util/genelf.h @@ -2,6 +2,8 @@ #ifndef __GENELF_H__ #define __GENELF_H__ +#include <linux/math.h> + /* genelf.c */ int jit_write_elf(int fd, uint64_t code_addr, const char *sym, const void *code, int csize, void *debug, int nr_debug_entries, @@ -76,6 +78,6 @@ int jit_add_debug_info(Elf *e, uint64_t code_addr, void *debug, int nr_debug_ent #endif /* The .text section is directly after the ELF header */ -#define GEN_ELF_TEXT_OFFSET sizeof(Elf_Ehdr) +#define GEN_ELF_TEXT_OFFSET round_up(sizeof(Elf_Ehdr) + sizeof(Elf_Phdr), 16) #endif diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index c30c29c51410..98dfaf84bd13 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -4295,8 +4295,6 @@ out: size_t perf_event__fprintf_event_update(union perf_event *event, FILE *fp) { struct perf_record_event_update *ev = &event->event_update; - struct perf_record_event_update_scale *ev_scale; - struct perf_record_event_update_cpus *ev_cpus; struct perf_cpu_map *map; size_t ret; @@ -4304,20 +4302,18 @@ size_t perf_event__fprintf_event_update(union perf_event *event, FILE *fp) switch (ev->type) { case PERF_EVENT_UPDATE__SCALE: - ev_scale = (struct perf_record_event_update_scale *)ev->data; - ret += fprintf(fp, "... scale: %f\n", ev_scale->scale); + ret += fprintf(fp, "... scale: %f\n", ev->scale.scale); break; case PERF_EVENT_UPDATE__UNIT: - ret += fprintf(fp, "... unit: %s\n", ev->data); + ret += fprintf(fp, "... unit: %s\n", ev->unit); break; case PERF_EVENT_UPDATE__NAME: - ret += fprintf(fp, "... name: %s\n", ev->data); + ret += fprintf(fp, "... name: %s\n", ev->name); break; case PERF_EVENT_UPDATE__CPUS: - ev_cpus = (struct perf_record_event_update_cpus *)ev->data; ret += fprintf(fp, "... "); - map = cpu_map__new_data(&ev_cpus->cpus); + map = cpu_map__new_data(&ev->cpus.cpus); if (map) ret += cpu_map__fprintf(map, fp); else @@ -4374,8 +4370,6 @@ int perf_event__process_event_update(struct perf_tool *tool __maybe_unused, struct evlist **pevlist) { struct perf_record_event_update *ev = &event->event_update; - struct perf_record_event_update_scale *ev_scale; - struct perf_record_event_update_cpus *ev_cpus; struct evlist *evlist; struct evsel *evsel; struct perf_cpu_map *map; @@ -4395,19 +4389,17 @@ int perf_event__process_event_update(struct perf_tool *tool __maybe_unused, switch (ev->type) { case PERF_EVENT_UPDATE__UNIT: free((char *)evsel->unit); - evsel->unit = strdup(ev->data); + evsel->unit = strdup(ev->unit); break; case PERF_EVENT_UPDATE__NAME: free(evsel->name); - evsel->name = strdup(ev->data); + evsel->name = strdup(ev->name); break; case PERF_EVENT_UPDATE__SCALE: - ev_scale = (struct perf_record_event_update_scale *)ev->data; - evsel->scale = ev_scale->scale; + evsel->scale = ev->scale.scale; break; case PERF_EVENT_UPDATE__CPUS: - ev_cpus = (struct perf_record_event_update_cpus *)ev->data; - map = cpu_map__new_data(&ev_cpus->cpus); + map = cpu_map__new_data(&ev->cpus.cpus); if (map) { perf_cpu_map__put(evsel->core.own_cpus); evsel->core.own_cpus = map; diff --git a/tools/perf/util/hisi-ptt-decoder/Build b/tools/perf/util/hisi-ptt-decoder/Build new file mode 100644 index 000000000000..db3db8b75033 --- /dev/null +++ b/tools/perf/util/hisi-ptt-decoder/Build @@ -0,0 +1 @@ +perf-$(CONFIG_AUXTRACE) += hisi-ptt-pkt-decoder.o diff --git a/tools/perf/util/hisi-ptt-decoder/hisi-ptt-pkt-decoder.c b/tools/perf/util/hisi-ptt-decoder/hisi-ptt-pkt-decoder.c new file mode 100644 index 000000000000..a17c423a526d --- /dev/null +++ b/tools/perf/util/hisi-ptt-decoder/hisi-ptt-pkt-decoder.c @@ -0,0 +1,164 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * HiSilicon PCIe Trace and Tuning (PTT) support + * Copyright (c) 2022 HiSilicon Technologies Co., Ltd. + */ + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <endian.h> +#include <byteswap.h> +#include <linux/bitops.h> +#include <stdarg.h> + +#include "../color.h" +#include "hisi-ptt-pkt-decoder.h" + +/* + * For 8DW format, the bit[31:11] of DW0 is always 0x1fffff, which can be + * used to distinguish the data format. + * 8DW format is like: + * bits [ 31:11 ][ 10:0 ] + * |---------------------------------------|-------------------| + * DW0 [ 0x1fffff ][ Reserved (0x7ff) ] + * DW1 [ Prefix ] + * DW2 [ Header DW0 ] + * DW3 [ Header DW1 ] + * DW4 [ Header DW2 ] + * DW5 [ Header DW3 ] + * DW6 [ Reserved (0x0) ] + * DW7 [ Time ] + * + * 4DW format is like: + * bits [31:30] [ 29:25 ][24][23][22][21][ 20:11 ][ 10:0 ] + * |-----|---------|---|---|---|---|-------------|-------------| + * DW0 [ Fmt ][ Type ][T9][T8][TH][SO][ Length ][ Time ] + * DW1 [ Header DW1 ] + * DW2 [ Header DW2 ] + * DW3 [ Header DW3 ] + */ + +enum hisi_ptt_8dw_pkt_field_type { + HISI_PTT_8DW_CHK_AND_RSV0, + HISI_PTT_8DW_PREFIX, + HISI_PTT_8DW_HEAD0, + HISI_PTT_8DW_HEAD1, + HISI_PTT_8DW_HEAD2, + HISI_PTT_8DW_HEAD3, + HISI_PTT_8DW_RSV1, + HISI_PTT_8DW_TIME, + HISI_PTT_8DW_TYPE_MAX +}; + +enum hisi_ptt_4dw_pkt_field_type { + HISI_PTT_4DW_HEAD1, + HISI_PTT_4DW_HEAD2, + HISI_PTT_4DW_HEAD3, + HISI_PTT_4DW_TYPE_MAX +}; + +static const char * const hisi_ptt_8dw_pkt_field_name[] = { + [HISI_PTT_8DW_PREFIX] = "Prefix", + [HISI_PTT_8DW_HEAD0] = "Header DW0", + [HISI_PTT_8DW_HEAD1] = "Header DW1", + [HISI_PTT_8DW_HEAD2] = "Header DW2", + [HISI_PTT_8DW_HEAD3] = "Header DW3", + [HISI_PTT_8DW_TIME] = "Time" +}; + +static const char * const hisi_ptt_4dw_pkt_field_name[] = { + [HISI_PTT_4DW_HEAD1] = "Header DW1", + [HISI_PTT_4DW_HEAD2] = "Header DW2", + [HISI_PTT_4DW_HEAD3] = "Header DW3", +}; + +union hisi_ptt_4dw { + struct { + uint32_t format : 2; + uint32_t type : 5; + uint32_t t9 : 1; + uint32_t t8 : 1; + uint32_t th : 1; + uint32_t so : 1; + uint32_t len : 10; + uint32_t time : 11; + }; + uint32_t value; +}; + +static void hisi_ptt_print_pkt(const unsigned char *buf, int pos, const char *desc) +{ + const char *color = PERF_COLOR_BLUE; + int i; + + printf("."); + color_fprintf(stdout, color, " %08x: ", pos); + for (i = 0; i < HISI_PTT_FIELD_LENTH; i++) + color_fprintf(stdout, color, "%02x ", buf[pos + i]); + for (i = 0; i < HISI_PTT_MAX_SPACE_LEN; i++) + color_fprintf(stdout, color, " "); + color_fprintf(stdout, color, " %s\n", desc); +} + +static int hisi_ptt_8dw_kpt_desc(const unsigned char *buf, int pos) +{ + int i; + + for (i = 0; i < HISI_PTT_8DW_TYPE_MAX; i++) { + /* Do not show 8DW check field and reserved fields */ + if (i == HISI_PTT_8DW_CHK_AND_RSV0 || i == HISI_PTT_8DW_RSV1) { + pos += HISI_PTT_FIELD_LENTH; + continue; + } + + hisi_ptt_print_pkt(buf, pos, hisi_ptt_8dw_pkt_field_name[i]); + pos += HISI_PTT_FIELD_LENTH; + } + + return hisi_ptt_pkt_size[HISI_PTT_8DW_PKT]; +} + +static void hisi_ptt_4dw_print_dw0(const unsigned char *buf, int pos) +{ + const char *color = PERF_COLOR_BLUE; + union hisi_ptt_4dw dw0; + int i; + + dw0.value = *(uint32_t *)(buf + pos); + printf("."); + color_fprintf(stdout, color, " %08x: ", pos); + for (i = 0; i < HISI_PTT_FIELD_LENTH; i++) + color_fprintf(stdout, color, "%02x ", buf[pos + i]); + for (i = 0; i < HISI_PTT_MAX_SPACE_LEN; i++) + color_fprintf(stdout, color, " "); + + color_fprintf(stdout, color, + " %s %x %s %x %s %x %s %x %s %x %s %x %s %x %s %x\n", + "Format", dw0.format, "Type", dw0.type, "T9", dw0.t9, + "T8", dw0.t8, "TH", dw0.th, "SO", dw0.so, "Length", + dw0.len, "Time", dw0.time); +} + +static int hisi_ptt_4dw_kpt_desc(const unsigned char *buf, int pos) +{ + int i; + + hisi_ptt_4dw_print_dw0(buf, pos); + pos += HISI_PTT_FIELD_LENTH; + + for (i = 0; i < HISI_PTT_4DW_TYPE_MAX; i++) { + hisi_ptt_print_pkt(buf, pos, hisi_ptt_4dw_pkt_field_name[i]); + pos += HISI_PTT_FIELD_LENTH; + } + + return hisi_ptt_pkt_size[HISI_PTT_4DW_PKT]; +} + +int hisi_ptt_pkt_desc(const unsigned char *buf, int pos, enum hisi_ptt_pkt_type type) +{ + if (type == HISI_PTT_8DW_PKT) + return hisi_ptt_8dw_kpt_desc(buf, pos); + + return hisi_ptt_4dw_kpt_desc(buf, pos); +} diff --git a/tools/perf/util/hisi-ptt-decoder/hisi-ptt-pkt-decoder.h b/tools/perf/util/hisi-ptt-decoder/hisi-ptt-pkt-decoder.h new file mode 100644 index 000000000000..e78f1b5bc836 --- /dev/null +++ b/tools/perf/util/hisi-ptt-decoder/hisi-ptt-pkt-decoder.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * HiSilicon PCIe Trace and Tuning (PTT) support + * Copyright (c) 2022 HiSilicon Technologies Co., Ltd. + */ + +#ifndef INCLUDE__HISI_PTT_PKT_DECODER_H__ +#define INCLUDE__HISI_PTT_PKT_DECODER_H__ + +#include <stddef.h> +#include <stdint.h> + +#define HISI_PTT_8DW_CHECK_MASK GENMASK(31, 11) +#define HISI_PTT_IS_8DW_PKT GENMASK(31, 11) +#define HISI_PTT_MAX_SPACE_LEN 10 +#define HISI_PTT_FIELD_LENTH 4 + +enum hisi_ptt_pkt_type { + HISI_PTT_4DW_PKT, + HISI_PTT_8DW_PKT, + HISI_PTT_PKT_MAX +}; + +static int hisi_ptt_pkt_size[] = { + [HISI_PTT_4DW_PKT] = 16, + [HISI_PTT_8DW_PKT] = 32, +}; + +int hisi_ptt_pkt_desc(const unsigned char *buf, int pos, enum hisi_ptt_pkt_type type); + +#endif diff --git a/tools/perf/util/hisi-ptt.c b/tools/perf/util/hisi-ptt.c new file mode 100644 index 000000000000..45b614bb73bf --- /dev/null +++ b/tools/perf/util/hisi-ptt.c @@ -0,0 +1,192 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * HiSilicon PCIe Trace and Tuning (PTT) support + * Copyright (c) 2022 HiSilicon Technologies Co., Ltd. + */ + +#include <byteswap.h> +#include <endian.h> +#include <errno.h> +#include <inttypes.h> +#include <linux/bitops.h> +#include <linux/kernel.h> +#include <linux/log2.h> +#include <linux/types.h> +#include <linux/zalloc.h> +#include <stdlib.h> +#include <unistd.h> + +#include "auxtrace.h" +#include "color.h" +#include "debug.h" +#include "evsel.h" +#include "hisi-ptt.h" +#include "hisi-ptt-decoder/hisi-ptt-pkt-decoder.h" +#include "machine.h" +#include "session.h" +#include "tool.h" +#include <internal/lib.h> + +struct hisi_ptt { + struct auxtrace auxtrace; + u32 auxtrace_type; + struct perf_session *session; + struct machine *machine; + u32 pmu_type; +}; + +struct hisi_ptt_queue { + struct hisi_ptt *ptt; + struct auxtrace_buffer *buffer; +}; + +static enum hisi_ptt_pkt_type hisi_ptt_check_packet_type(unsigned char *buf) +{ + uint32_t head = *(uint32_t *)buf; + + if ((HISI_PTT_8DW_CHECK_MASK & head) == HISI_PTT_IS_8DW_PKT) + return HISI_PTT_8DW_PKT; + + return HISI_PTT_4DW_PKT; +} + +static void hisi_ptt_dump(struct hisi_ptt *ptt __maybe_unused, + unsigned char *buf, size_t len) +{ + const char *color = PERF_COLOR_BLUE; + enum hisi_ptt_pkt_type type; + size_t pos = 0; + int pkt_len; + + type = hisi_ptt_check_packet_type(buf); + len = round_down(len, hisi_ptt_pkt_size[type]); + color_fprintf(stdout, color, ". ... HISI PTT data: size %zu bytes\n", + len); + + while (len > 0) { + pkt_len = hisi_ptt_pkt_desc(buf, pos, type); + if (!pkt_len) + color_fprintf(stdout, color, " Bad packet!\n"); + + pos += pkt_len; + len -= pkt_len; + } +} + +static void hisi_ptt_dump_event(struct hisi_ptt *ptt, unsigned char *buf, + size_t len) +{ + printf(".\n"); + + hisi_ptt_dump(ptt, buf, len); +} + +static int hisi_ptt_process_event(struct perf_session *session __maybe_unused, + union perf_event *event __maybe_unused, + struct perf_sample *sample __maybe_unused, + struct perf_tool *tool __maybe_unused) +{ + return 0; +} + +static int hisi_ptt_process_auxtrace_event(struct perf_session *session, + union perf_event *event, + struct perf_tool *tool __maybe_unused) +{ + struct hisi_ptt *ptt = container_of(session->auxtrace, struct hisi_ptt, + auxtrace); + int fd = perf_data__fd(session->data); + int size = event->auxtrace.size; + void *data = malloc(size); + off_t data_offset; + int err; + + if (!data) + return -errno; + + if (perf_data__is_pipe(session->data)) { + data_offset = 0; + } else { + data_offset = lseek(fd, 0, SEEK_CUR); + if (data_offset == -1) + return -errno; + } + + err = readn(fd, data, size); + if (err != (ssize_t)size) { + free(data); + return -errno; + } + + if (dump_trace) + hisi_ptt_dump_event(ptt, data, size); + + return 0; +} + +static int hisi_ptt_flush(struct perf_session *session __maybe_unused, + struct perf_tool *tool __maybe_unused) +{ + return 0; +} + +static void hisi_ptt_free_events(struct perf_session *session __maybe_unused) +{ +} + +static void hisi_ptt_free(struct perf_session *session) +{ + struct hisi_ptt *ptt = container_of(session->auxtrace, struct hisi_ptt, + auxtrace); + + session->auxtrace = NULL; + free(ptt); +} + +static bool hisi_ptt_evsel_is_auxtrace(struct perf_session *session, + struct evsel *evsel) +{ + struct hisi_ptt *ptt = container_of(session->auxtrace, struct hisi_ptt, auxtrace); + + return evsel->core.attr.type == ptt->pmu_type; +} + +static void hisi_ptt_print_info(__u64 type) +{ + if (!dump_trace) + return; + + fprintf(stdout, " PMU Type %" PRId64 "\n", (s64) type); +} + +int hisi_ptt_process_auxtrace_info(union perf_event *event, + struct perf_session *session) +{ + struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info; + struct hisi_ptt *ptt; + + if (auxtrace_info->header.size < HISI_PTT_AUXTRACE_PRIV_SIZE + + sizeof(struct perf_record_auxtrace_info)) + return -EINVAL; + + ptt = zalloc(sizeof(*ptt)); + if (!ptt) + return -ENOMEM; + + ptt->session = session; + ptt->machine = &session->machines.host; /* No kvm support */ + ptt->auxtrace_type = auxtrace_info->type; + ptt->pmu_type = auxtrace_info->priv[0]; + + ptt->auxtrace.process_event = hisi_ptt_process_event; + ptt->auxtrace.process_auxtrace_event = hisi_ptt_process_auxtrace_event; + ptt->auxtrace.flush_events = hisi_ptt_flush; + ptt->auxtrace.free_events = hisi_ptt_free_events; + ptt->auxtrace.free = hisi_ptt_free; + ptt->auxtrace.evsel_is_auxtrace = hisi_ptt_evsel_is_auxtrace; + session->auxtrace = &ptt->auxtrace; + + hisi_ptt_print_info(auxtrace_info->priv[0]); + + return 0; +} diff --git a/tools/perf/util/hisi-ptt.h b/tools/perf/util/hisi-ptt.h new file mode 100644 index 000000000000..2db9b4056214 --- /dev/null +++ b/tools/perf/util/hisi-ptt.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * HiSilicon PCIe Trace and Tuning (PTT) support + * Copyright (c) 2022 HiSilicon Technologies Co., Ltd. + */ + +#ifndef INCLUDE__PERF_HISI_PTT_H__ +#define INCLUDE__PERF_HISI_PTT_H__ + +#define HISI_PTT_PMU_NAME "hisi_ptt" +#define HISI_PTT_AUXTRACE_PRIV_SIZE sizeof(u64) + +struct auxtrace_record *hisi_ptt_recording_init(int *err, + struct perf_pmu *hisi_ptt_pmu); + +int hisi_ptt_process_auxtrace_info(union perf_event *event, + struct perf_session *session); + +#endif diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 1c085ab56534..17a05e943b44 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -215,6 +215,7 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h) hists__new_col_len(hists, HISTC_GLOBAL_INS_LAT, 13); hists__new_col_len(hists, HISTC_LOCAL_P_STAGE_CYC, 13); hists__new_col_len(hists, HISTC_GLOBAL_P_STAGE_CYC, 13); + hists__new_col_len(hists, HISTC_ADDR, BITS_PER_LONG / 4 + 2); if (symbol_conf.nanosecs) hists__new_col_len(hists, HISTC_TIME, 16); @@ -1622,13 +1623,13 @@ struct rb_root_cached *hists__get_rotate_entries_in(struct hists *hists) { struct rb_root_cached *root; - pthread_mutex_lock(&hists->lock); + mutex_lock(&hists->lock); root = hists->entries_in; if (++hists->entries_in > &hists->entries_in_array[1]) hists->entries_in = &hists->entries_in_array[0]; - pthread_mutex_unlock(&hists->lock); + mutex_unlock(&hists->lock); return root; } @@ -2335,6 +2336,11 @@ void hists__inc_nr_samples(struct hists *hists, bool filtered) hists->stats.nr_non_filtered_samples++; } +void hists__inc_nr_lost_samples(struct hists *hists, u32 lost) +{ + hists->stats.nr_lost_samples += lost; +} + static struct hist_entry *hists__add_dummy_entry(struct hists *hists, struct hist_entry *pair) { @@ -2678,12 +2684,16 @@ size_t evlist__fprintf_nr_events(struct evlist *evlist, FILE *fp, evlist__for_each_entry(evlist, pos) { struct hists *hists = evsel__hists(pos); - if (skip_empty && !hists->stats.nr_samples) + if (skip_empty && !hists->stats.nr_samples && !hists->stats.nr_lost_samples) continue; ret += fprintf(fp, "%s stats:\n", evsel__name(pos)); - ret += fprintf(fp, "%16s events: %10d\n", - "SAMPLE", hists->stats.nr_samples); + if (hists->stats.nr_samples) + ret += fprintf(fp, "%16s events: %10d\n", + "SAMPLE", hists->stats.nr_samples); + if (hists->stats.nr_lost_samples) + ret += fprintf(fp, "%16s events: %10d\n", + "LOST_SAMPLES", hists->stats.nr_lost_samples); } return ret; @@ -2805,7 +2815,7 @@ int __hists__init(struct hists *hists, struct perf_hpp_list *hpp_list) hists->entries_in = &hists->entries_in_array[0]; hists->entries_collapsed = RB_ROOT_CACHED; hists->entries = RB_ROOT_CACHED; - pthread_mutex_init(&hists->lock, NULL); + mutex_init(&hists->lock); hists->socket_filter = -1; hists->hpp_list = hpp_list; INIT_LIST_HEAD(&hists->hpp_formats); diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 7ed4648d2fc2..ebd8a8f783ee 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -4,10 +4,10 @@ #include <linux/rbtree.h> #include <linux/types.h> -#include <pthread.h> #include "evsel.h" #include "color.h" #include "events_stats.h" +#include "mutex.h" struct hist_entry; struct hist_entry_ops; @@ -79,6 +79,7 @@ enum hist_column { HISTC_GLOBAL_P_STAGE_CYC, HISTC_ADDR_FROM, HISTC_ADDR_TO, + HISTC_ADDR, HISTC_NR_COLS, /* Last entry */ }; @@ -98,7 +99,7 @@ struct hists { const struct dso *dso_filter; const char *uid_filter_str; const char *symbol_filter_str; - pthread_mutex_t lock; + struct mutex lock; struct hists_stats stats; u64 event_stream; u16 col_len[HISTC_NR_COLS]; @@ -201,6 +202,7 @@ void hists__reset_stats(struct hists *hists); void hists__inc_stats(struct hists *hists, struct hist_entry *h); void hists__inc_nr_events(struct hists *hists); void hists__inc_nr_samples(struct hists *hists, bool filtered); +void hists__inc_nr_lost_samples(struct hists *hists, u32 lost); size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows, int max_cols, float min_pcnt, FILE *fp, diff --git a/tools/perf/util/include/linux/linkage.h b/tools/perf/util/include/linux/linkage.h index aa0c5179836d..75e2248416f5 100644 --- a/tools/perf/util/include/linux/linkage.h +++ b/tools/perf/util/include/linux/linkage.h @@ -115,4 +115,17 @@ SYM_ALIAS(alias, name, SYM_T_FUNC, SYM_L_WEAK) #endif +// In the kernel sources (include/linux/cfi_types.h), this has a different +// definition when CONFIG_CFI_CLANG is used, for tools/ just use the !clang +// definition: +#ifndef SYM_TYPED_START +#define SYM_TYPED_START(name, linkage, align...) \ + SYM_START(name, linkage, align) +#endif + +#ifndef SYM_TYPED_FUNC_START +#define SYM_TYPED_FUNC_START(name) \ + SYM_TYPED_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) +#endif + #endif /* PERF_LINUX_LINKAGE_H_ */ diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-log.c b/tools/perf/util/intel-pt-decoder/intel-pt-log.c index 5f5dfc8753f3..ef55d6232cf0 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-log.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-log.c @@ -5,12 +5,16 @@ */ #include <stdio.h> +#include <stdlib.h> #include <stdint.h> #include <inttypes.h> #include <stdarg.h> #include <stdbool.h> #include <string.h> +#include <linux/zalloc.h> +#include <linux/kernel.h> + #include "intel-pt-log.h" #include "intel-pt-insn-decoder.h" @@ -18,18 +22,33 @@ #define MAX_LOG_NAME 256 +#define DFLT_BUF_SZ (16 * 1024) + +struct log_buf { + char *buf; + size_t buf_sz; + size_t head; + bool wrapped; + FILE *backend; +}; + static FILE *f; static char log_name[MAX_LOG_NAME]; bool intel_pt_enable_logging; +static bool intel_pt_dump_log_on_error; +static unsigned int intel_pt_log_on_error_size; +static struct log_buf log_buf; void *intel_pt_log_fp(void) { return f; } -void intel_pt_log_enable(void) +void intel_pt_log_enable(bool dump_log_on_error, unsigned int log_on_error_size) { intel_pt_enable_logging = true; + intel_pt_dump_log_on_error = dump_log_on_error; + intel_pt_log_on_error_size = log_on_error_size; } void intel_pt_log_disable(void) @@ -74,6 +93,100 @@ static void intel_pt_print_no_data(uint64_t pos, int indent) fprintf(f, " "); } +static ssize_t log_buf__write(void *cookie, const char *buf, size_t size) +{ + struct log_buf *b = cookie; + size_t sz = size; + + if (!b->buf) + return size; + + while (sz) { + size_t space = b->buf_sz - b->head; + size_t n = min(space, sz); + + memcpy(b->buf + b->head, buf, n); + sz -= n; + buf += n; + b->head += n; + if (sz && b->head >= b->buf_sz) { + b->head = 0; + b->wrapped = true; + } + } + return size; +} + +static int log_buf__close(void *cookie) +{ + struct log_buf *b = cookie; + + zfree(&b->buf); + return 0; +} + +static FILE *log_buf__open(struct log_buf *b, FILE *backend, unsigned int sz) +{ + cookie_io_functions_t fns = { + .write = log_buf__write, + .close = log_buf__close, + }; + FILE *file; + + memset(b, 0, sizeof(*b)); + b->buf_sz = sz; + b->buf = malloc(b->buf_sz); + b->backend = backend; + file = fopencookie(b, "a", fns); + if (!file) + zfree(&b->buf); + return file; +} + +static bool remove_first_line(const char **p, size_t *n) +{ + for (; *n && **p != '\n'; ++*p, --*n) + ; + if (*n) { + *p += 1; + *n -= 1; + return true; + } + return false; +} + +static void write_lines(const char *p, size_t n, FILE *fp, bool *remove_first) +{ + if (*remove_first) + *remove_first = !remove_first_line(&p, &n); + fwrite(p, n, 1, fp); +} + +static void log_buf__dump(struct log_buf *b) +{ + bool remove_first = false; + + if (!b->buf) + return; + + fflush(f); /* Could update b->head and b->wrapped */ + fprintf(b->backend, "Dumping debug log buffer\n"); + if (b->wrapped) { + remove_first = true; + write_lines(b->buf + b->head, b->buf_sz - b->head, b->backend, &remove_first); + } + write_lines(b->buf, b->head, b->backend, &remove_first); + fprintf(b->backend, "End of debug log buffer dump\n"); + + b->head = 0; + b->wrapped = false; +} + +void intel_pt_log_dump_buf(void) +{ + log_buf__dump(&log_buf); +} + static int intel_pt_log_open(void) { if (!intel_pt_enable_logging) @@ -86,6 +199,8 @@ static int intel_pt_log_open(void) f = fopen(log_name, "w+"); else f = stdout; + if (f && intel_pt_dump_log_on_error) + f = log_buf__open(&log_buf, f, intel_pt_log_on_error_size); if (!f) { intel_pt_enable_logging = false; return -1; diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-log.h b/tools/perf/util/intel-pt-decoder/intel-pt-log.h index d900aab24b21..354d7d23fc81 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-log.h +++ b/tools/perf/util/intel-pt-decoder/intel-pt-log.h @@ -14,9 +14,10 @@ struct intel_pt_pkt; void *intel_pt_log_fp(void); -void intel_pt_log_enable(void); +void intel_pt_log_enable(bool dump_log_on_error, unsigned int log_on_error_size); void intel_pt_log_disable(void); void intel_pt_log_set_name(const char *name); +void intel_pt_log_dump_buf(void); void __intel_pt_log_packet(const struct intel_pt_pkt *packet, int pkt_len, uint64_t pos, const unsigned char *buf); diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index d5e9fc8106dd..e3548ddef254 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -842,7 +842,8 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn, offset, buf, INTEL_PT_INSN_BUF_SZ); if (len <= 0) { - intel_pt_log("ERROR: failed to read at %" PRIu64 " ", offset); + intel_pt_log("ERROR: failed to read at offset %#" PRIx64 " ", + offset); if (intel_pt_enable_logging) dso__fprintf(al.map->dso, intel_pt_log_fp()); return -EINVAL; @@ -2418,6 +2419,8 @@ static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu, pid_t pid, pid_t tid, u64 ip, u64 timestamp, pid_t machine_pid, int vcpu) { + bool dump_log_on_error = pt->synth_opts.log_plus_flags & AUXTRACE_LOG_FLG_ON_ERROR; + bool log_on_stdout = pt->synth_opts.log_plus_flags & AUXTRACE_LOG_FLG_USE_STDOUT; union perf_event event; char msg[MAX_AUXTRACE_ERROR_MSG]; int err; @@ -2437,6 +2440,16 @@ static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu, code, cpu, pid, tid, ip, msg, timestamp, machine_pid, vcpu); + if (intel_pt_enable_logging && !log_on_stdout) { + FILE *fp = intel_pt_log_fp(); + + if (fp) + perf_event__fprintf_auxtrace_error(&event, fp); + } + + if (code != INTEL_PT_ERR_LOST && dump_log_on_error) + intel_pt_log_dump_buf(); + err = perf_session__deliver_synth_event(pt->session, &event, NULL); if (err) pr_err("Intel Processor Trace: failed to deliver error event, error %d\n", @@ -4033,6 +4046,7 @@ static const char * const intel_pt_info_fmts[] = { [INTEL_PT_SNAPSHOT_MODE] = " Snapshot mode %"PRId64"\n", [INTEL_PT_PER_CPU_MMAPS] = " Per-cpu maps %"PRId64"\n", [INTEL_PT_MTC_BIT] = " MTC bit %#"PRIx64"\n", + [INTEL_PT_MTC_FREQ_BITS] = " MTC freq bits %#"PRIx64"\n", [INTEL_PT_TSC_CTC_N] = " TSC:CTC numerator %"PRIu64"\n", [INTEL_PT_TSC_CTC_D] = " TSC:CTC denominator %"PRIu64"\n", [INTEL_PT_CYC_BIT] = " CYC bit %#"PRIx64"\n", @@ -4047,8 +4061,12 @@ static void intel_pt_print_info(__u64 *arr, int start, int finish) if (!dump_trace) return; - for (i = start; i <= finish; i++) - fprintf(stdout, intel_pt_info_fmts[i], arr[i]); + for (i = start; i <= finish; i++) { + const char *fmt = intel_pt_info_fmts[i]; + + if (fmt) + fprintf(stdout, fmt, arr[i]); + } } static void intel_pt_print_info_str(const char *name, const char *str) @@ -4271,8 +4289,12 @@ int intel_pt_process_auxtrace_info(union perf_event *event, goto err_delete_thread; } - if (pt->synth_opts.log) - intel_pt_log_enable(); + if (pt->synth_opts.log) { + bool log_on_error = pt->synth_opts.log_plus_flags & AUXTRACE_LOG_FLG_ON_ERROR; + unsigned int log_on_error_size = pt->synth_opts.log_on_error_size; + + intel_pt_log_enable(log_on_error, log_on_error_size); + } /* Maximum non-turbo ratio is TSC freq / 100 MHz */ if (pt->tc.time_mult) { diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c index 4e6632203704..0e033278fa12 100644 --- a/tools/perf/util/jitdump.c +++ b/tools/perf/util/jitdump.c @@ -56,13 +56,6 @@ struct jit_buf_desc { char dir[PATH_MAX]; }; -struct debug_line_info { - unsigned long vma; - unsigned int lineno; - /* The filename format is unspecified, absolute path, relative etc. */ - char const filename[]; -}; - struct jit_tool { struct perf_tool tool; struct perf_data output; diff --git a/tools/perf/util/lock-contention.h b/tools/perf/util/lock-contention.h index 2146efc33396..b8cb8830b7bc 100644 --- a/tools/perf/util/lock-contention.h +++ b/tools/perf/util/lock-contention.h @@ -11,6 +11,7 @@ struct lock_stat { u64 addr; /* address of lockdep_map, used as ID */ char *name; /* for strcpy(), we cannot use const */ + u64 *callstack; unsigned int nr_acquire; unsigned int nr_acquired; @@ -113,7 +114,9 @@ struct lock_contention { struct machine *machine; struct hlist_head *result; unsigned long map_nr_entries; - unsigned long lost; + int lost; + int max_stack; + int stack_skip; }; #ifdef HAVE_BPF_SKEL diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 2a16cae28407..76316e459c3d 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1128,10 +1128,6 @@ static struct dso *machine__get_kernel(struct machine *machine) return kernel; } -struct process_args { - u64 start; -}; - void machine__get_kallsyms_filename(struct machine *machine, char *buf, size_t bufsz) { diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index e0aa4a254583..f3a3d9b3a40d 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -181,7 +181,10 @@ struct map *map__new(struct machine *machine, u64 start, u64 len, if (!(prot & PROT_EXEC)) dso__set_loaded(dso); } + mutex_lock(&dso->lock); + nsinfo__put(dso->nsinfo); dso->nsinfo = nsi; + mutex_unlock(&dso->lock); if (build_id__is_defined(bid)) { dso__set_build_id(dso, bid); diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c index 764883183519..b3a91093069a 100644 --- a/tools/perf/util/mem-events.c +++ b/tools/perf/util/mem-events.c @@ -156,11 +156,12 @@ void perf_mem_events__list(void) for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) { struct perf_mem_event *e = perf_mem_events__ptr(j); - fprintf(stderr, "%-13s%-*s%s\n", - e->tag ?: "", - verbose > 0 ? 25 : 0, - verbose > 0 ? perf_mem_events__name(j, NULL) : "", - e->supported ? ": available" : ""); + fprintf(stderr, "%-*s%-*s%s", + e->tag ? 13 : 0, + e->tag ? : "", + e->tag && verbose > 0 ? 25 : 0, + e->tag && verbose > 0 ? perf_mem_events__name(j, NULL) : "", + e->supported ? ": available\n" : ""); } } @@ -281,7 +282,7 @@ static const char * const mem_lvl[] = { "HIT", "MISS", "L1", - "LFB", + "LFB/MAB", "L2", "L3", "Local RAM", @@ -294,8 +295,10 @@ static const char * const mem_lvl[] = { }; static const char * const mem_lvlnum[] = { + [PERF_MEM_LVLNUM_CXL] = "CXL", + [PERF_MEM_LVLNUM_IO] = "I/O", [PERF_MEM_LVLNUM_ANY_CACHE] = "Any cache", - [PERF_MEM_LVLNUM_LFB] = "LFB", + [PERF_MEM_LVLNUM_LFB] = "LFB/MAB", [PERF_MEM_LVLNUM_RAM] = "RAM", [PERF_MEM_LVLNUM_PMEM] = "PMEM", [PERF_MEM_LVLNUM_NA] = "N/A", diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index c93bcaf6d55d..4c98ac29ee13 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -22,6 +22,7 @@ #include <linux/list_sort.h> #include <linux/string.h> #include <linux/zalloc.h> +#include <perf/cpumap.h> #include <subcmd/parse-options.h> #include <api/fs/fs.h> #include "util.h" @@ -108,17 +109,6 @@ void metricgroup__rblist_exit(struct rblist *metric_events) rblist__exit(metric_events); } -/* - * A node in the list of referenced metrics. metric_expr - * is held as a convenience to avoid a search through the - * metric list. - */ -struct metric_ref_node { - const char *metric_name; - const char *metric_expr; - struct list_head list; -}; - /** * The metric under construction. The data held here will be placed in a * metric_expr. @@ -189,10 +179,24 @@ static bool metricgroup__has_constraint(const struct pmu_event *pe) return false; } +static void metric__free(struct metric *m) +{ + if (!m) + return; + + free(m->metric_refs); + expr__ctx_free(m->pctx); + free((char *)m->modifier); + evlist__delete(m->evlist); + free(m); +} + static struct metric *metric__new(const struct pmu_event *pe, const char *modifier, bool metric_no_group, - int runtime) + int runtime, + const char *user_requested_cpu_list, + bool system_wide) { struct metric *m; @@ -201,35 +205,34 @@ static struct metric *metric__new(const struct pmu_event *pe, return NULL; m->pctx = expr__ctx_new(); - if (!m->pctx) { - free(m); - return NULL; - } + if (!m->pctx) + goto out_err; m->metric_name = pe->metric_name; - m->modifier = modifier ? strdup(modifier) : NULL; - if (modifier && !m->modifier) { - expr__ctx_free(m->pctx); - free(m); - return NULL; + m->modifier = NULL; + if (modifier) { + m->modifier = strdup(modifier); + if (!m->modifier) + goto out_err; } m->metric_expr = pe->metric_expr; m->metric_unit = pe->unit; - m->pctx->runtime = runtime; + m->pctx->sctx.user_requested_cpu_list = NULL; + if (user_requested_cpu_list) { + m->pctx->sctx.user_requested_cpu_list = strdup(user_requested_cpu_list); + if (!m->pctx->sctx.user_requested_cpu_list) + goto out_err; + } + m->pctx->sctx.runtime = runtime; + m->pctx->sctx.system_wide = system_wide; m->has_constraint = metric_no_group || metricgroup__has_constraint(pe); m->metric_refs = NULL; m->evlist = NULL; return m; -} - -static void metric__free(struct metric *m) -{ - free(m->metric_refs); - expr__ctx_free(m->pctx); - free((char *)m->modifier); - evlist__delete(m->evlist); - free(m); +out_err: + metric__free(m); + return NULL; } static bool contains_metric_id(struct evsel **metric_events, int num_events, @@ -874,6 +877,8 @@ struct metricgroup_add_iter_data { int *ret; bool *has_match; bool metric_no_group; + const char *user_requested_cpu_list; + bool system_wide; struct metric *root_metric; const struct visited_metric *visited; const struct pmu_events_table *table; @@ -887,6 +892,8 @@ static int add_metric(struct list_head *metric_list, const struct pmu_event *pe, const char *modifier, bool metric_no_group, + const char *user_requested_cpu_list, + bool system_wide, struct metric *root_metric, const struct visited_metric *visited, const struct pmu_events_table *table); @@ -899,6 +906,8 @@ static int add_metric(struct list_head *metric_list, * @metric_no_group: Should events written to events be grouped "{}" or * global. Grouping is the default but due to multiplexing the * user may override. + * @user_requested_cpu_list: Command line specified CPUs to record on. + * @system_wide: Are events for all processes recorded. * @root_metric: Metrics may reference other metrics to form a tree. In this * case the root_metric holds all the IDs and a list of referenced * metrics. When adding a root this argument is NULL. @@ -910,6 +919,8 @@ static int add_metric(struct list_head *metric_list, static int resolve_metric(struct list_head *metric_list, const char *modifier, bool metric_no_group, + const char *user_requested_cpu_list, + bool system_wide, struct metric *root_metric, const struct visited_metric *visited, const struct pmu_events_table *table) @@ -956,7 +967,8 @@ static int resolve_metric(struct list_head *metric_list, */ for (i = 0; i < pending_cnt; i++) { ret = add_metric(metric_list, &pending[i].pe, modifier, metric_no_group, - root_metric, visited, table); + user_requested_cpu_list, system_wide, root_metric, visited, + table); if (ret) break; } @@ -974,6 +986,8 @@ static int resolve_metric(struct list_head *metric_list, * global. Grouping is the default but due to multiplexing the * user may override. * @runtime: A special argument for the parser only known at runtime. + * @user_requested_cpu_list: Command line specified CPUs to record on. + * @system_wide: Are events for all processes recorded. * @root_metric: Metrics may reference other metrics to form a tree. In this * case the root_metric holds all the IDs and a list of referenced * metrics. When adding a root this argument is NULL. @@ -987,6 +1001,8 @@ static int __add_metric(struct list_head *metric_list, const char *modifier, bool metric_no_group, int runtime, + const char *user_requested_cpu_list, + bool system_wide, struct metric *root_metric, const struct visited_metric *visited, const struct pmu_events_table *table) @@ -1011,7 +1027,8 @@ static int __add_metric(struct list_head *metric_list, * This metric is the root of a tree and may reference other * metrics that are added recursively. */ - root_metric = metric__new(pe, modifier, metric_no_group, runtime); + root_metric = metric__new(pe, modifier, metric_no_group, runtime, + user_requested_cpu_list, system_wide); if (!root_metric) return -ENOMEM; @@ -1060,8 +1077,9 @@ static int __add_metric(struct list_head *metric_list, ret = -EINVAL; } else { /* Resolve referenced metrics. */ - ret = resolve_metric(metric_list, modifier, metric_no_group, root_metric, - &visited_node, table); + ret = resolve_metric(metric_list, modifier, metric_no_group, + user_requested_cpu_list, system_wide, + root_metric, &visited_node, table); } if (ret) { @@ -1109,6 +1127,8 @@ static int add_metric(struct list_head *metric_list, const struct pmu_event *pe, const char *modifier, bool metric_no_group, + const char *user_requested_cpu_list, + bool system_wide, struct metric *root_metric, const struct visited_metric *visited, const struct pmu_events_table *table) @@ -1119,7 +1139,8 @@ static int add_metric(struct list_head *metric_list, if (!strstr(pe->metric_expr, "?")) { ret = __add_metric(metric_list, pe, modifier, metric_no_group, 0, - root_metric, visited, table); + user_requested_cpu_list, system_wide, root_metric, + visited, table); } else { int j, count; @@ -1132,7 +1153,8 @@ static int add_metric(struct list_head *metric_list, for (j = 0; j < count && !ret; j++) ret = __add_metric(metric_list, pe, modifier, metric_no_group, j, - root_metric, visited, table); + user_requested_cpu_list, system_wide, + root_metric, visited, table); } return ret; @@ -1149,6 +1171,7 @@ static int metricgroup__add_metric_sys_event_iter(const struct pmu_event *pe, return 0; ret = add_metric(d->metric_list, pe, d->modifier, d->metric_no_group, + d->user_requested_cpu_list, d->system_wide, d->root_metric, d->visited, d->table); if (ret) goto out; @@ -1191,7 +1214,9 @@ struct metricgroup__add_metric_data { struct list_head *list; const char *metric_name; const char *modifier; + const char *user_requested_cpu_list; bool metric_no_group; + bool system_wide; bool has_match; }; @@ -1208,8 +1233,8 @@ static int metricgroup__add_metric_callback(const struct pmu_event *pe, data->has_match = true; ret = add_metric(data->list, pe, data->modifier, data->metric_no_group, - /*root_metric=*/NULL, - /*visited_metrics=*/NULL, table); + data->user_requested_cpu_list, data->system_wide, + /*root_metric=*/NULL, /*visited_metrics=*/NULL, table); } return ret; } @@ -1223,12 +1248,16 @@ static int metricgroup__add_metric_callback(const struct pmu_event *pe, * @metric_no_group: Should events written to events be grouped "{}" or * global. Grouping is the default but due to multiplexing the * user may override. + * @user_requested_cpu_list: Command line specified CPUs to record on. + * @system_wide: Are events for all processes recorded. * @metric_list: The list that the metric or metric group are added to. * @table: The table that is searched for metrics, most commonly the table for the * architecture perf is running upon. */ static int metricgroup__add_metric(const char *metric_name, const char *modifier, bool metric_no_group, + const char *user_requested_cpu_list, + bool system_wide, struct list_head *metric_list, const struct pmu_events_table *table) { @@ -1242,6 +1271,8 @@ static int metricgroup__add_metric(const char *metric_name, const char *modifier .metric_name = metric_name, .modifier = modifier, .metric_no_group = metric_no_group, + .user_requested_cpu_list = user_requested_cpu_list, + .system_wide = system_wide, .has_match = false, }; /* @@ -1263,6 +1294,8 @@ static int metricgroup__add_metric(const char *metric_name, const char *modifier .metric_name = metric_name, .modifier = modifier, .metric_no_group = metric_no_group, + .user_requested_cpu_list = user_requested_cpu_list, + .system_wide = system_wide, .has_match = &has_match, .ret = &ret, .table = table, @@ -1293,12 +1326,15 @@ out: * @metric_no_group: Should events written to events be grouped "{}" or * global. Grouping is the default but due to multiplexing the * user may override. + * @user_requested_cpu_list: Command line specified CPUs to record on. + * @system_wide: Are events for all processes recorded. * @metric_list: The list that metrics are added to. * @table: The table that is searched for metrics, most commonly the table for the * architecture perf is running upon. */ static int metricgroup__add_metric_list(const char *list, bool metric_no_group, - struct list_head *metric_list, + const char *user_requested_cpu_list, + bool system_wide, struct list_head *metric_list, const struct pmu_events_table *table) { char *list_itr, *list_copy, *metric_name, *modifier; @@ -1315,8 +1351,8 @@ static int metricgroup__add_metric_list(const char *list, bool metric_no_group, *modifier++ = '\0'; ret = metricgroup__add_metric(metric_name, modifier, - metric_no_group, metric_list, - table); + metric_no_group, user_requested_cpu_list, + system_wide, metric_list, table); if (ret == -EINVAL) pr_err("Cannot find metric or group `%s'\n", metric_name); @@ -1505,6 +1541,8 @@ err_out: static int parse_groups(struct evlist *perf_evlist, const char *str, bool metric_no_group, bool metric_no_merge, + const char *user_requested_cpu_list, + bool system_wide, struct perf_pmu *fake_pmu, struct rblist *metric_events_list, const struct pmu_events_table *table) @@ -1518,7 +1556,8 @@ static int parse_groups(struct evlist *perf_evlist, const char *str, if (metric_events_list->nr_entries == 0) metricgroup__rblist_init(metric_events_list); ret = metricgroup__add_metric_list(str, metric_no_group, - &metric_list, table); + user_requested_cpu_list, + system_wide, &metric_list, table); if (ret) goto out; @@ -1626,7 +1665,7 @@ static int parse_groups(struct evlist *perf_evlist, const char *str, } expr->metric_unit = m->metric_unit; expr->metric_events = metric_events; - expr->runtime = m->pctx->runtime; + expr->runtime = m->pctx->sctx.runtime; list_add(&expr->nd, &me->head); } @@ -1646,20 +1685,22 @@ out: return ret; } -int metricgroup__parse_groups(const struct option *opt, +int metricgroup__parse_groups(struct evlist *perf_evlist, const char *str, bool metric_no_group, bool metric_no_merge, + const char *user_requested_cpu_list, + bool system_wide, struct rblist *metric_events) { - struct evlist *perf_evlist = *(struct evlist **)opt->value; const struct pmu_events_table *table = pmu_events_table__find(); if (!table) return -EINVAL; - return parse_groups(perf_evlist, str, metric_no_group, - metric_no_merge, NULL, metric_events, table); + return parse_groups(perf_evlist, str, metric_no_group, metric_no_merge, + user_requested_cpu_list, system_wide, + /*fake_pmu=*/NULL, metric_events, table); } int metricgroup__parse_groups_test(struct evlist *evlist, @@ -1669,8 +1710,10 @@ int metricgroup__parse_groups_test(struct evlist *evlist, bool metric_no_merge, struct rblist *metric_events) { - return parse_groups(evlist, str, metric_no_group, - metric_no_merge, &perf_pmu__fake, metric_events, table); + return parse_groups(evlist, str, metric_no_group, metric_no_merge, + /*user_requested_cpu_list=*/NULL, + /*system_wide=*/false, + &perf_pmu__fake, metric_events, table); } static int metricgroup__has_metric_callback(const struct pmu_event *pe, @@ -1703,7 +1746,7 @@ int metricgroup__copy_metric_events(struct evlist *evlist, struct cgroup *cgrp, struct rblist *new_metric_events, struct rblist *old_metric_events) { - unsigned i; + unsigned int i; for (i = 0; i < rblist__nr_entries(old_metric_events); i++) { struct rb_node *nd; diff --git a/tools/perf/util/metricgroup.h b/tools/perf/util/metricgroup.h index 016b3b1a289a..732d3a0d3334 100644 --- a/tools/perf/util/metricgroup.h +++ b/tools/perf/util/metricgroup.h @@ -64,10 +64,12 @@ struct metric_expr { struct metric_event *metricgroup__lookup(struct rblist *metric_events, struct evsel *evsel, bool create); -int metricgroup__parse_groups(const struct option *opt, +int metricgroup__parse_groups(struct evlist *perf_evlist, const char *str, bool metric_no_group, bool metric_no_merge, + const char *user_requested_cpu_list, + bool system_wide, struct rblist *metric_events); int metricgroup__parse_groups_test(struct evlist *evlist, const struct pmu_events_table *table, diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h index cd8b0777473b..cd4ccec7f361 100644 --- a/tools/perf/util/mmap.h +++ b/tools/perf/util/mmap.h @@ -9,7 +9,6 @@ #include <linux/bitops.h> #include <perf/cpumap.h> #include <stdbool.h> -#include <pthread.h> // for cpu_set_t #ifdef HAVE_AIO_SUPPORT #include <aio.h> #endif diff --git a/tools/perf/util/mutex.c b/tools/perf/util/mutex.c new file mode 100644 index 000000000000..bca7f0717f35 --- /dev/null +++ b/tools/perf/util/mutex.c @@ -0,0 +1,119 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "mutex.h" + +#include "debug.h" +#include <linux/string.h> +#include <errno.h> + +static void check_err(const char *fn, int err) +{ + char sbuf[STRERR_BUFSIZE]; + + if (err == 0) + return; + + pr_err("%s error: '%s'\n", fn, str_error_r(err, sbuf, sizeof(sbuf))); +} + +#define CHECK_ERR(err) check_err(__func__, err) + +static void __mutex_init(struct mutex *mtx, bool pshared) +{ + pthread_mutexattr_t attr; + + CHECK_ERR(pthread_mutexattr_init(&attr)); + +#ifndef NDEBUG + /* In normal builds enable error checking, such as recursive usage. */ + CHECK_ERR(pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ERRORCHECK)); +#endif + if (pshared) + CHECK_ERR(pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED)); + + CHECK_ERR(pthread_mutex_init(&mtx->lock, &attr)); + CHECK_ERR(pthread_mutexattr_destroy(&attr)); +} + +void mutex_init(struct mutex *mtx) +{ + __mutex_init(mtx, /*pshared=*/false); +} + +void mutex_init_pshared(struct mutex *mtx) +{ + __mutex_init(mtx, /*pshared=*/true); +} + +void mutex_destroy(struct mutex *mtx) +{ + CHECK_ERR(pthread_mutex_destroy(&mtx->lock)); +} + +void mutex_lock(struct mutex *mtx) + NO_THREAD_SAFETY_ANALYSIS +{ + CHECK_ERR(pthread_mutex_lock(&mtx->lock)); +} + +void mutex_unlock(struct mutex *mtx) + NO_THREAD_SAFETY_ANALYSIS +{ + CHECK_ERR(pthread_mutex_unlock(&mtx->lock)); +} + +bool mutex_trylock(struct mutex *mtx) +{ + int ret = pthread_mutex_trylock(&mtx->lock); + + if (ret == 0) + return true; /* Lock acquired. */ + + if (ret == EBUSY) + return false; /* Lock busy. */ + + /* Print error. */ + CHECK_ERR(ret); + return false; +} + +static void __cond_init(struct cond *cnd, bool pshared) +{ + pthread_condattr_t attr; + + CHECK_ERR(pthread_condattr_init(&attr)); + if (pshared) + CHECK_ERR(pthread_condattr_setpshared(&attr, PTHREAD_PROCESS_SHARED)); + + CHECK_ERR(pthread_cond_init(&cnd->cond, &attr)); + CHECK_ERR(pthread_condattr_destroy(&attr)); +} + +void cond_init(struct cond *cnd) +{ + __cond_init(cnd, /*pshared=*/false); +} + +void cond_init_pshared(struct cond *cnd) +{ + __cond_init(cnd, /*pshared=*/true); +} + +void cond_destroy(struct cond *cnd) +{ + CHECK_ERR(pthread_cond_destroy(&cnd->cond)); +} + +void cond_wait(struct cond *cnd, struct mutex *mtx) +{ + CHECK_ERR(pthread_cond_wait(&cnd->cond, &mtx->lock)); +} + +void cond_signal(struct cond *cnd) +{ + CHECK_ERR(pthread_cond_signal(&cnd->cond)); +} + +void cond_broadcast(struct cond *cnd) +{ + CHECK_ERR(pthread_cond_broadcast(&cnd->cond)); +} diff --git a/tools/perf/util/mutex.h b/tools/perf/util/mutex.h new file mode 100644 index 000000000000..40661120cacc --- /dev/null +++ b/tools/perf/util/mutex.h @@ -0,0 +1,108 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PERF_MUTEX_H +#define __PERF_MUTEX_H + +#include <pthread.h> +#include <stdbool.h> + +/* + * A function-like feature checking macro that is a wrapper around + * `__has_attribute`, which is defined by GCC 5+ and Clang and evaluates to a + * nonzero constant integer if the attribute is supported or 0 if not. + */ +#ifdef __has_attribute +#define HAVE_ATTRIBUTE(x) __has_attribute(x) +#else +#define HAVE_ATTRIBUTE(x) 0 +#endif + +#if HAVE_ATTRIBUTE(guarded_by) && HAVE_ATTRIBUTE(pt_guarded_by) && \ + HAVE_ATTRIBUTE(lockable) && HAVE_ATTRIBUTE(exclusive_lock_function) && \ + HAVE_ATTRIBUTE(exclusive_trylock_function) && HAVE_ATTRIBUTE(exclusive_locks_required) && \ + HAVE_ATTRIBUTE(no_thread_safety_analysis) + +/* Documents if a shared field or global variable needs to be protected by a mutex. */ +#define GUARDED_BY(x) __attribute__((guarded_by(x))) + +/* + * Documents if the memory location pointed to by a pointer should be guarded by + * a mutex when dereferencing the pointer. + */ +#define PT_GUARDED_BY(x) __attribute__((pt_guarded_by(x))) + +/* Documents if a type is a lockable type. */ +#define LOCKABLE __attribute__((lockable)) + +/* Documents functions that acquire a lock in the body of a function, and do not release it. */ +#define EXCLUSIVE_LOCK_FUNCTION(...) __attribute__((exclusive_lock_function(__VA_ARGS__))) + +/* + * Documents functions that expect a lock to be held on entry to the function, + * and release it in the body of the function. + */ +#define UNLOCK_FUNCTION(...) __attribute__((unlock_function(__VA_ARGS__))) + +/* Documents functions that try to acquire a lock, and return success or failure. */ +#define EXCLUSIVE_TRYLOCK_FUNCTION(...) \ + __attribute__((exclusive_trylock_function(__VA_ARGS__))) + +/* Documents a function that expects a mutex to be held prior to entry. */ +#define EXCLUSIVE_LOCKS_REQUIRED(...) __attribute__((exclusive_locks_required(__VA_ARGS__))) + +/* Turns off thread safety checking within the body of a particular function. */ +#define NO_THREAD_SAFETY_ANALYSIS __attribute__((no_thread_safety_analysis)) + +#else + +#define GUARDED_BY(x) +#define PT_GUARDED_BY(x) +#define LOCKABLE +#define EXCLUSIVE_LOCK_FUNCTION(...) +#define UNLOCK_FUNCTION(...) +#define EXCLUSIVE_TRYLOCK_FUNCTION(...) +#define EXCLUSIVE_LOCKS_REQUIRED(...) +#define NO_THREAD_SAFETY_ANALYSIS + +#endif + +/* + * A wrapper around the mutex implementation that allows perf to error check + * usage, etc. + */ +struct LOCKABLE mutex { + pthread_mutex_t lock; +}; + +/* A wrapper around the condition variable implementation. */ +struct cond { + pthread_cond_t cond; +}; + +/* Default initialize the mtx struct. */ +void mutex_init(struct mutex *mtx); +/* + * Initialize the mtx struct and set the process-shared rather than default + * process-private attribute. + */ +void mutex_init_pshared(struct mutex *mtx); +void mutex_destroy(struct mutex *mtx); + +void mutex_lock(struct mutex *mtx) EXCLUSIVE_LOCK_FUNCTION(*mtx); +void mutex_unlock(struct mutex *mtx) UNLOCK_FUNCTION(*mtx); +/* Tries to acquire the lock and returns true on success. */ +bool mutex_trylock(struct mutex *mtx) EXCLUSIVE_TRYLOCK_FUNCTION(true, *mtx); + +/* Default initialize the cond struct. */ +void cond_init(struct cond *cnd); +/* + * Initialize the cond struct and specify the process-shared rather than default + * process-private attribute. + */ +void cond_init_pshared(struct cond *cnd); +void cond_destroy(struct cond *cnd); + +void cond_wait(struct cond *cnd, struct mutex *mtx) EXCLUSIVE_LOCKS_REQUIRED(mtx); +void cond_signal(struct cond *cnd); +void cond_broadcast(struct cond *cnd); + +#endif /* __PERF_MUTEX_H */ diff --git a/tools/perf/util/parse-branch-options.c b/tools/perf/util/parse-branch-options.c index bb4aa88c50a8..31faf2bb49ff 100644 --- a/tools/perf/util/parse-branch-options.c +++ b/tools/perf/util/parse-branch-options.c @@ -32,6 +32,7 @@ static const struct branch_mode branch_modes[] = { BRANCH_OPT("call", PERF_SAMPLE_BRANCH_CALL), BRANCH_OPT("save_type", PERF_SAMPLE_BRANCH_TYPE_SAVE), BRANCH_OPT("stack", PERF_SAMPLE_BRANCH_CALL_STACK), + BRANCH_OPT("priv", PERF_SAMPLE_BRANCH_PRIV_SAVE), BRANCH_END }; @@ -101,8 +102,10 @@ parse_branch_stack(const struct option *opt, const char *str, int unset) /* * cannot set it twice, -b + --branch-filter for instance */ - if (*mode) + if (*mode) { + pr_err("Error: Can't use --branch-any (-b) with --branch-filter (-j).\n"); return -1; + } return parse_branch_str(str, mode); } diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index f3b2c2a87456..5973f46c2375 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -150,14 +150,6 @@ struct event_symbol event_symbols_sw[PERF_COUNT_SW_MAX] = { }, }; -#define __PERF_EVENT_FIELD(config, name) \ - ((config & PERF_EVENT_##name##_MASK) >> PERF_EVENT_##name##_SHIFT) - -#define PERF_EVENT_RAW(config) __PERF_EVENT_FIELD(config, RAW) -#define PERF_EVENT_CONFIG(config) __PERF_EVENT_FIELD(config, CONFIG) -#define PERF_EVENT_TYPE(config) __PERF_EVENT_FIELD(config, TYPE) -#define PERF_EVENT_ID(config) __PERF_EVENT_FIELD(config, EVENT) - bool is_event_supported(u8 type, u64 config) { bool ret = true; @@ -254,6 +246,9 @@ __add_event(struct list_head *list, int *idx, struct perf_cpu_map *cpus = pmu ? perf_cpu_map__get(pmu->cpus) : cpu_list ? perf_cpu_map__new(cpu_list) : NULL; + if (pmu) + perf_pmu__warn_invalid_formats(pmu); + if (pmu && attr->type == PERF_TYPE_RAW) perf_pmu__warn_invalid_config(pmu, attr->config, name); diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/perf_event_attr_fprintf.c index 98af3fa4ea35..7e5e7b30510d 100644 --- a/tools/perf/util/perf_event_attr_fprintf.c +++ b/tools/perf/util/perf_event_attr_fprintf.c @@ -52,7 +52,7 @@ static void __p_branch_sample_type(char *buf, size_t size, u64 value) bit_name(ABORT_TX), bit_name(IN_TX), bit_name(NO_TX), bit_name(COND), bit_name(CALL_STACK), bit_name(IND_JUMP), bit_name(CALL), bit_name(NO_FLAGS), bit_name(NO_CYCLES), - bit_name(TYPE_SAVE), bit_name(HW_INDEX), + bit_name(TYPE_SAVE), bit_name(HW_INDEX), bit_name(PRIV_SAVE), { .name = NULL, } }; #undef bit_name @@ -64,7 +64,7 @@ static void __p_read_format(char *buf, size_t size, u64 value) #define bit_name(n) { PERF_FORMAT_##n, #n } struct bit_names bits[] = { bit_name(TOTAL_TIME_ENABLED), bit_name(TOTAL_TIME_RUNNING), - bit_name(ID), bit_name(GROUP), + bit_name(ID), bit_name(GROUP), bit_name(LOST), { .name = NULL, } }; #undef bit_name diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 89655d53117a..03284059175f 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -1005,6 +1005,23 @@ err: return NULL; } +void perf_pmu__warn_invalid_formats(struct perf_pmu *pmu) +{ + struct perf_pmu_format *format; + + /* fake pmu doesn't have format list */ + if (pmu == &perf_pmu__fake) + return; + + list_for_each_entry(format, &pmu->format, list) + if (format->value >= PERF_PMU_FORMAT_VALUE_CONFIG_END) { + pr_warning("WARNING: '%s' format '%s' requires 'perf_event_attr::config%d'" + "which is not supported by this version of perf!\n", + pmu->name, format->name, format->value); + return; + } +} + static struct perf_pmu *pmu_find(const char *name) { struct perf_pmu *pmu; @@ -1182,7 +1199,7 @@ static char *pmu_formats_string(struct list_head *formats) struct perf_pmu_format *format; char *str = NULL; struct strbuf buf = STRBUF_INIT; - unsigned i = 0; + unsigned int i = 0; if (!formats) return NULL; diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index a7b0f9507510..68e15c38ae71 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -17,6 +17,7 @@ enum { PERF_PMU_FORMAT_VALUE_CONFIG, PERF_PMU_FORMAT_VALUE_CONFIG1, PERF_PMU_FORMAT_VALUE_CONFIG2, + PERF_PMU_FORMAT_VALUE_CONFIG_END, }; #define PERF_PMU_FORMAT_BITS 64 @@ -139,6 +140,7 @@ int perf_pmu__caps_parse(struct perf_pmu *pmu); void perf_pmu__warn_invalid_config(struct perf_pmu *pmu, __u64 config, const char *name); +void perf_pmu__warn_invalid_formats(struct perf_pmu *pmu); bool perf_pmu__has_hybrid(void); int perf_pmu__match(char *pattern, char *name, char *tok); diff --git a/tools/perf/util/pmu.l b/tools/perf/util/pmu.l index a15d9fbd7c0e..58b4926cfaca 100644 --- a/tools/perf/util/pmu.l +++ b/tools/perf/util/pmu.l @@ -27,8 +27,6 @@ num_dec [0-9]+ {num_dec} { return value(10); } config { return PP_CONFIG; } -config1 { return PP_CONFIG1; } -config2 { return PP_CONFIG2; } - { return '-'; } : { return ':'; } , { return ','; } diff --git a/tools/perf/util/pmu.y b/tools/perf/util/pmu.y index bfd7e8509869..e675d79a0274 100644 --- a/tools/perf/util/pmu.y +++ b/tools/perf/util/pmu.y @@ -10,8 +10,6 @@ #include <string.h> #include "pmu.h" -extern int perf_pmu_lex (void); - #define ABORT_ON(val) \ do { \ if (val) \ @@ -20,7 +18,7 @@ do { \ %} -%token PP_CONFIG PP_CONFIG1 PP_CONFIG2 +%token PP_CONFIG %token PP_VALUE PP_ERROR %type <num> PP_VALUE %type <bits> bit_term @@ -47,18 +45,11 @@ PP_CONFIG ':' bits $3)); } | -PP_CONFIG1 ':' bits +PP_CONFIG PP_VALUE ':' bits { ABORT_ON(perf_pmu__new_format(format, name, - PERF_PMU_FORMAT_VALUE_CONFIG1, - $3)); -} -| -PP_CONFIG2 ':' bits -{ - ABORT_ON(perf_pmu__new_format(format, name, - PERF_PMU_FORMAT_VALUE_CONFIG2, - $3)); + $2, + $4)); } bits: diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 785246ff4179..0c24bc7afbca 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -29,6 +29,7 @@ #include "color.h" #include "map.h" #include "maps.h" +#include "mutex.h" #include "symbol.h" #include <api/fs/fs.h> #include "trace-event.h" /* For __maybe_unused */ @@ -180,8 +181,10 @@ struct map *get_target_map(const char *target, struct nsinfo *nsi, bool user) map = dso__new_map(target); if (map && map->dso) { + mutex_lock(&map->dso->lock); nsinfo__put(map->dso->nsinfo); map->dso->nsinfo = nsinfo__get(nsi); + mutex_unlock(&map->dso->lock); } return map; } else { diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 192c9274f7ad..1a4f10de29ff 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -943,6 +943,11 @@ static void perf_event__cpu_map_swap(union perf_event *event, default: pr_err("cpu_map swap: unsupported long size\n"); } + break; + case PERF_CPU_MAP__RANGE_CPUS: + data->range_cpu_data.start_cpu = bswap_16(data->range_cpu_data.start_cpu); + data->range_cpu_data.end_cpu = bswap_16(data->range_cpu_data.end_cpu); + break; default: break; } @@ -1180,7 +1185,7 @@ static void branch_stack__printf(struct perf_sample *sample, bool callstack) e->flags.abort ? "A" : " ", e->flags.in_tx ? "T" : " ", (unsigned)e->flags.reserved, - e->flags.type ? branch_type_name(e->flags.type) : ""); + get_branch_type(e)); } else { if (i == 0) { printf("..... %2"PRIu64": %016" PRIx64 "\n" diff --git a/tools/perf/util/smt.c b/tools/perf/util/smt.c index 2b0a36ebf27a..994e9e418227 100644 --- a/tools/perf/util/smt.c +++ b/tools/perf/util/smt.c @@ -1,99 +1,37 @@ -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <linux/bitops.h> +// SPDX-License-Identifier: GPL-2.0-only +#include <string.h> #include "api/fs/fs.h" +#include "cputopo.h" #include "smt.h" -/** - * hweight_str - Returns the number of bits set in str. Stops at first non-hex - * or ',' character. - */ -static int hweight_str(char *str) -{ - int result = 0; - - while (*str) { - switch (*str++) { - case '0': - case ',': - break; - case '1': - case '2': - case '4': - case '8': - result++; - break; - case '3': - case '5': - case '6': - case '9': - case 'a': - case 'A': - case 'c': - case 'C': - result += 2; - break; - case '7': - case 'b': - case 'B': - case 'd': - case 'D': - case 'e': - case 'E': - result += 3; - break; - case 'f': - case 'F': - result += 4; - break; - default: - goto done; - } - } -done: - return result; -} - -int smt_on(void) +bool smt_on(const struct cpu_topology *topology) { static bool cached; - static int cached_result; - int cpu; - int ncpu; + static bool cached_result; + int fs_value; if (cached) return cached_result; - if (sysfs__read_int("devices/system/cpu/smt/active", &cached_result) >= 0) { - cached = true; - return cached_result; - } - - cached_result = 0; - ncpu = sysconf(_SC_NPROCESSORS_CONF); - for (cpu = 0; cpu < ncpu; cpu++) { - unsigned long long siblings; - char *str; - size_t strlen; - char fn[256]; + if (sysfs__read_int("devices/system/cpu/smt/active", &fs_value) >= 0) + cached_result = (fs_value == 1); + else + cached_result = cpu_topology__smt_on(topology); - snprintf(fn, sizeof fn, - "devices/system/cpu/cpu%d/topology/thread_siblings", cpu); - if (sysfs__read_str(fn, &str, &strlen) < 0) { - snprintf(fn, sizeof fn, - "devices/system/cpu/cpu%d/topology/core_cpus", cpu); - if (sysfs__read_str(fn, &str, &strlen) < 0) - continue; - } - /* Entry is hex, but does not have 0x, so need custom parser */ - siblings = hweight_str(str); - free(str); - if (siblings > 1) { - cached_result = 1; - break; - } - } cached = true; return cached_result; } + +bool core_wide(bool system_wide, const char *user_requested_cpu_list, + const struct cpu_topology *topology) +{ + /* If not everything running on a core is being recorded then we can't use core_wide. */ + if (!system_wide) + return false; + + /* Cheap case that SMT is disabled and therefore we're inherently core_wide. */ + if (!smt_on(topology)) + return true; + + return cpu_topology__core_wide(topology, user_requested_cpu_list); +} diff --git a/tools/perf/util/smt.h b/tools/perf/util/smt.h index b8414b7bcbc8..ae9095f2c38c 100644 --- a/tools/perf/util/smt.h +++ b/tools/perf/util/smt.h @@ -1,6 +1,17 @@ -#ifndef SMT_H -#define SMT_H 1 +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __SMT_H +#define __SMT_H 1 -int smt_on(void); +struct cpu_topology; -#endif +/* Returns true if SMT (aka hyperthreading) is enabled. */ +bool smt_on(const struct cpu_topology *topology); + +/* + * Returns true when system wide and all SMT threads for a core are in the + * user_requested_cpus map. + */ +bool core_wide(bool system_wide, const char *user_requested_cpu_list, + const struct cpu_topology *topology); + +#endif /* __SMT_H */ diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 6d5588e80935..2e7330867e2e 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -1948,6 +1948,43 @@ struct sort_entry sort_dso_size = { .se_width_idx = HISTC_DSO_SIZE, }; +/* --sort dso_size */ + +static int64_t +sort__addr_cmp(struct hist_entry *left, struct hist_entry *right) +{ + u64 left_ip = left->ip; + u64 right_ip = right->ip; + struct map *left_map = left->ms.map; + struct map *right_map = right->ms.map; + + if (left_map) + left_ip = left_map->unmap_ip(left_map, left_ip); + if (right_map) + right_ip = right_map->unmap_ip(right_map, right_ip); + + return _sort__addr_cmp(left_ip, right_ip); +} + +static int hist_entry__addr_snprintf(struct hist_entry *he, char *bf, + size_t size, unsigned int width) +{ + u64 ip = he->ip; + struct map *map = he->ms.map; + + if (map) + ip = map->unmap_ip(map, ip); + + return repsep_snprintf(bf, size, "%-#*llx", width, ip); +} + +struct sort_entry sort_addr = { + .se_header = "Address", + .se_cmp = sort__addr_cmp, + .se_snprintf = hist_entry__addr_snprintf, + .se_width_idx = HISTC_ADDR, +}; + struct sort_dimension { const char *name; @@ -1997,6 +2034,7 @@ static struct sort_dimension common_sort_dimensions[] = { DIM(SORT_GLOBAL_INS_LAT, "ins_lat", sort_global_ins_lat), DIM(SORT_LOCAL_PIPELINE_STAGE_CYC, "local_p_stage_cyc", sort_local_p_stage_cyc), DIM(SORT_GLOBAL_PIPELINE_STAGE_CYC, "p_stage_cyc", sort_global_p_stage_cyc), + DIM(SORT_ADDR, "addr", sort_addr), }; #undef DIM diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 2ddc00d1c464..04ff8b61a2a7 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -34,7 +34,6 @@ extern struct sort_entry sort_dso_to; extern struct sort_entry sort_sym_from; extern struct sort_entry sort_sym_to; extern struct sort_entry sort_srcline; -extern enum sort_type sort__first_dimension; extern const char default_mem_sort_order[]; struct res_sample { @@ -237,6 +236,7 @@ enum sort_type { SORT_GLOBAL_INS_LAT, SORT_LOCAL_PIPELINE_STAGE_CYC, SORT_GLOBAL_PIPELINE_STAGE_CYC, + SORT_ADDR, /* branch stack specific sort keys */ __SORT_BRANCH_STACK, @@ -295,7 +295,6 @@ struct block_hist { }; extern struct sort_entry sort_thread; -extern struct list_head hist_entry__sort_list; struct evlist; struct tep_handle; diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index b82844cb0ce7..ba66bb7fc1ca 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -67,7 +67,7 @@ static void print_noise(struct perf_stat_config *config, return; ps = evsel->stats; - print_noise_pct(config, stddev_stats(&ps->res_stats[0]), avg); + print_noise_pct(config, stddev_stats(&ps->res_stats), avg); } static void print_cgroup(struct perf_stat_config *config, struct evsel *evsel) @@ -168,7 +168,7 @@ static void aggr_printout(struct perf_stat_config *config, id.socket, id.die, id.core); - } else if (id.core > -1) { + } else if (id.cpu.cpu > -1) { fprintf(config->output, "\"cpu\" : \"%d\", ", id.cpu.cpu); } @@ -179,7 +179,7 @@ static void aggr_printout(struct perf_stat_config *config, id.die, config->csv_output ? 0 : -3, id.core, config->csv_sep); - } else if (id.core > -1) { + } else if (id.cpu.cpu > -1) { fprintf(config->output, "CPU%*d%s", config->csv_output ? 0 : -7, id.cpu.cpu, config->csv_sep); @@ -189,14 +189,14 @@ static void aggr_printout(struct perf_stat_config *config, case AGGR_THREAD: if (config->json_output) { fprintf(config->output, "\"thread\" : \"%s-%d\", ", - perf_thread_map__comm(evsel->core.threads, id.thread), - perf_thread_map__pid(evsel->core.threads, id.thread)); + perf_thread_map__comm(evsel->core.threads, id.thread_idx), + perf_thread_map__pid(evsel->core.threads, id.thread_idx)); } else { fprintf(config->output, "%*s-%*d%s", config->csv_output ? 0 : 16, - perf_thread_map__comm(evsel->core.threads, id.thread), + perf_thread_map__comm(evsel->core.threads, id.thread_idx), config->csv_output ? 0 : -8, - perf_thread_map__pid(evsel->core.threads, id.thread), + perf_thread_map__pid(evsel->core.threads, id.thread_idx), config->csv_sep); } break; @@ -273,7 +273,7 @@ static void new_line_csv(struct perf_stat_config *config, void *ctx) fputc('\n', os->fh); if (os->prefix) - fprintf(os->fh, "%s%s", os->prefix, config->csv_sep); + fprintf(os->fh, "%s", os->prefix); aggr_printout(config, os->evsel, os->id, os->nr); for (i = 0; i < os->nfields; i++) fputs(config->csv_sep, os->fh); @@ -442,7 +442,7 @@ static void print_metric_header(struct perf_stat_config *config, fprintf(os->fh, "%*s ", config->metric_only_len, unit); } -static int first_shadow_cpu_map_idx(struct perf_stat_config *config, +static int first_shadow_map_idx(struct perf_stat_config *config, struct evsel *evsel, const struct aggr_cpu_id *id) { struct perf_cpu_map *cpus = evsel__cpus(evsel); @@ -452,6 +452,9 @@ static int first_shadow_cpu_map_idx(struct perf_stat_config *config, if (config->aggr_mode == AGGR_NONE) return perf_cpu_map__idx(cpus, id->cpu); + if (config->aggr_mode == AGGR_THREAD) + return id->thread_idx; + if (!config->aggr_get_id) return 0; @@ -556,7 +559,7 @@ static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int [AGGR_CORE] = 2, [AGGR_THREAD] = 1, [AGGR_UNSET] = 0, - [AGGR_NODE] = 0, + [AGGR_NODE] = 1, }; pm = config->metric_only ? print_metric_only_csv : print_metric_csv; @@ -646,7 +649,7 @@ static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int } perf_stat__print_shadow_stats(config, counter, uval, - first_shadow_cpu_map_idx(config, counter, &id), + first_shadow_map_idx(config, counter, &id), &out, &config->metric_events, st); if (!config->csv_output && !config->metric_only && !config->json_output) { print_noise(config, counter, noise); @@ -676,7 +679,7 @@ static void aggr_update_shadow(struct perf_stat_config *config, val += perf_counts(counter->counts, idx, 0)->val; } perf_stat__update_shadow_stats(counter, val, - first_shadow_cpu_map_idx(config, counter, &id), + first_shadow_map_idx(config, counter, &id), &rt_stat); } } @@ -943,7 +946,7 @@ static struct perf_aggr_thread_value *sort_aggr_thread( buf[i].counter = counter; buf[i].id = aggr_cpu_id__empty(); - buf[i].id.thread = thread; + buf[i].id.thread_idx = thread; buf[i].uval = uval; buf[i].val = val; buf[i].run = run; @@ -979,14 +982,9 @@ static void print_aggr_thread(struct perf_stat_config *config, fprintf(output, "%s", prefix); id = buf[thread].id; - if (config->stats) - printout(config, id, 0, buf[thread].counter, buf[thread].uval, - prefix, buf[thread].run, buf[thread].ena, 1.0, - &config->stats[id.thread]); - else - printout(config, id, 0, buf[thread].counter, buf[thread].uval, - prefix, buf[thread].run, buf[thread].ena, 1.0, - &rt_stat); + printout(config, id, 0, buf[thread].counter, buf[thread].uval, + prefix, buf[thread].run, buf[thread].ena, 1.0, + &rt_stat); fputc('\n', output); } @@ -1126,6 +1124,7 @@ static int aggr_header_lens[] = { [AGGR_SOCKET] = 12, [AGGR_NONE] = 6, [AGGR_THREAD] = 24, + [AGGR_NODE] = 6, [AGGR_GLOBAL] = 0, }; @@ -1135,6 +1134,7 @@ static const char *aggr_header_csv[] = { [AGGR_SOCKET] = "socket,cpus", [AGGR_NONE] = "cpu,", [AGGR_THREAD] = "comm-pid,", + [AGGR_NODE] = "node,", [AGGR_GLOBAL] = "" }; diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 788ce5e46470..07b29fe272c7 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -33,9 +33,8 @@ struct saved_value { struct evsel *evsel; enum stat_type type; int ctx; - int cpu_map_idx; + int map_idx; /* cpu or thread map index */ struct cgroup *cgrp; - struct runtime_stat *stat; struct stats stats; u64 metric_total; int metric_other; @@ -48,8 +47,8 @@ static int saved_value_cmp(struct rb_node *rb_node, const void *entry) rb_node); const struct saved_value *b = entry; - if (a->cpu_map_idx != b->cpu_map_idx) - return a->cpu_map_idx - b->cpu_map_idx; + if (a->map_idx != b->map_idx) + return a->map_idx - b->map_idx; /* * Previously the rbtree was used to link generic metrics. @@ -67,16 +66,6 @@ static int saved_value_cmp(struct rb_node *rb_node, const void *entry) if (a->cgrp != b->cgrp) return (char *)a->cgrp < (char *)b->cgrp ? -1 : +1; - if (a->evsel == NULL && b->evsel == NULL) { - if (a->stat == b->stat) - return 0; - - if ((char *)a->stat < (char *)b->stat) - return -1; - - return 1; - } - if (a->evsel == b->evsel) return 0; if ((char *)a->evsel < (char *)b->evsel) @@ -106,7 +95,7 @@ static void saved_value_delete(struct rblist *rblist __maybe_unused, } static struct saved_value *saved_value_lookup(struct evsel *evsel, - int cpu_map_idx, + int map_idx, bool create, enum stat_type type, int ctx, @@ -116,11 +105,10 @@ static struct saved_value *saved_value_lookup(struct evsel *evsel, struct rblist *rblist; struct rb_node *nd; struct saved_value dm = { - .cpu_map_idx = cpu_map_idx, + .map_idx = map_idx, .evsel = evsel, .type = type, .ctx = ctx, - .stat = st, .cgrp = cgrp, }; @@ -215,10 +203,10 @@ struct runtime_stat_data { static void update_runtime_stat(struct runtime_stat *st, enum stat_type type, - int cpu_map_idx, u64 count, + int map_idx, u64 count, struct runtime_stat_data *rsd) { - struct saved_value *v = saved_value_lookup(NULL, cpu_map_idx, true, type, + struct saved_value *v = saved_value_lookup(NULL, map_idx, true, type, rsd->ctx, st, rsd->cgrp); if (v) @@ -231,7 +219,7 @@ static void update_runtime_stat(struct runtime_stat *st, * instruction rates, etc: */ void perf_stat__update_shadow_stats(struct evsel *counter, u64 count, - int cpu_map_idx, struct runtime_stat *st) + int map_idx, struct runtime_stat *st) { u64 count_ns = count; struct saved_value *v; @@ -243,88 +231,88 @@ void perf_stat__update_shadow_stats(struct evsel *counter, u64 count, count *= counter->scale; if (evsel__is_clock(counter)) - update_runtime_stat(st, STAT_NSECS, cpu_map_idx, count_ns, &rsd); + update_runtime_stat(st, STAT_NSECS, map_idx, count_ns, &rsd); else if (evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) - update_runtime_stat(st, STAT_CYCLES, cpu_map_idx, count, &rsd); + update_runtime_stat(st, STAT_CYCLES, map_idx, count, &rsd); else if (perf_stat_evsel__is(counter, CYCLES_IN_TX)) - update_runtime_stat(st, STAT_CYCLES_IN_TX, cpu_map_idx, count, &rsd); + update_runtime_stat(st, STAT_CYCLES_IN_TX, map_idx, count, &rsd); else if (perf_stat_evsel__is(counter, TRANSACTION_START)) - update_runtime_stat(st, STAT_TRANSACTION, cpu_map_idx, count, &rsd); + update_runtime_stat(st, STAT_TRANSACTION, map_idx, count, &rsd); else if (perf_stat_evsel__is(counter, ELISION_START)) - update_runtime_stat(st, STAT_ELISION, cpu_map_idx, count, &rsd); + update_runtime_stat(st, STAT_ELISION, map_idx, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_TOTAL_SLOTS)) update_runtime_stat(st, STAT_TOPDOWN_TOTAL_SLOTS, - cpu_map_idx, count, &rsd); + map_idx, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_ISSUED)) update_runtime_stat(st, STAT_TOPDOWN_SLOTS_ISSUED, - cpu_map_idx, count, &rsd); + map_idx, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_RETIRED)) update_runtime_stat(st, STAT_TOPDOWN_SLOTS_RETIRED, - cpu_map_idx, count, &rsd); + map_idx, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_BUBBLES)) update_runtime_stat(st, STAT_TOPDOWN_FETCH_BUBBLES, - cpu_map_idx, count, &rsd); + map_idx, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES)) update_runtime_stat(st, STAT_TOPDOWN_RECOVERY_BUBBLES, - cpu_map_idx, count, &rsd); + map_idx, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_RETIRING)) update_runtime_stat(st, STAT_TOPDOWN_RETIRING, - cpu_map_idx, count, &rsd); + map_idx, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_BAD_SPEC)) update_runtime_stat(st, STAT_TOPDOWN_BAD_SPEC, - cpu_map_idx, count, &rsd); + map_idx, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_FE_BOUND)) update_runtime_stat(st, STAT_TOPDOWN_FE_BOUND, - cpu_map_idx, count, &rsd); + map_idx, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_BE_BOUND)) update_runtime_stat(st, STAT_TOPDOWN_BE_BOUND, - cpu_map_idx, count, &rsd); + map_idx, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_HEAVY_OPS)) update_runtime_stat(st, STAT_TOPDOWN_HEAVY_OPS, - cpu_map_idx, count, &rsd); + map_idx, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_BR_MISPREDICT)) update_runtime_stat(st, STAT_TOPDOWN_BR_MISPREDICT, - cpu_map_idx, count, &rsd); + map_idx, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_LAT)) update_runtime_stat(st, STAT_TOPDOWN_FETCH_LAT, - cpu_map_idx, count, &rsd); + map_idx, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_MEM_BOUND)) update_runtime_stat(st, STAT_TOPDOWN_MEM_BOUND, - cpu_map_idx, count, &rsd); + map_idx, count, &rsd); else if (evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) update_runtime_stat(st, STAT_STALLED_CYCLES_FRONT, - cpu_map_idx, count, &rsd); + map_idx, count, &rsd); else if (evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND)) update_runtime_stat(st, STAT_STALLED_CYCLES_BACK, - cpu_map_idx, count, &rsd); + map_idx, count, &rsd); else if (evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS)) - update_runtime_stat(st, STAT_BRANCHES, cpu_map_idx, count, &rsd); + update_runtime_stat(st, STAT_BRANCHES, map_idx, count, &rsd); else if (evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES)) - update_runtime_stat(st, STAT_CACHEREFS, cpu_map_idx, count, &rsd); + update_runtime_stat(st, STAT_CACHEREFS, map_idx, count, &rsd); else if (evsel__match(counter, HW_CACHE, HW_CACHE_L1D)) - update_runtime_stat(st, STAT_L1_DCACHE, cpu_map_idx, count, &rsd); + update_runtime_stat(st, STAT_L1_DCACHE, map_idx, count, &rsd); else if (evsel__match(counter, HW_CACHE, HW_CACHE_L1I)) - update_runtime_stat(st, STAT_L1_ICACHE, cpu_map_idx, count, &rsd); + update_runtime_stat(st, STAT_L1_ICACHE, map_idx, count, &rsd); else if (evsel__match(counter, HW_CACHE, HW_CACHE_LL)) - update_runtime_stat(st, STAT_LL_CACHE, cpu_map_idx, count, &rsd); + update_runtime_stat(st, STAT_LL_CACHE, map_idx, count, &rsd); else if (evsel__match(counter, HW_CACHE, HW_CACHE_DTLB)) - update_runtime_stat(st, STAT_DTLB_CACHE, cpu_map_idx, count, &rsd); + update_runtime_stat(st, STAT_DTLB_CACHE, map_idx, count, &rsd); else if (evsel__match(counter, HW_CACHE, HW_CACHE_ITLB)) - update_runtime_stat(st, STAT_ITLB_CACHE, cpu_map_idx, count, &rsd); + update_runtime_stat(st, STAT_ITLB_CACHE, map_idx, count, &rsd); else if (perf_stat_evsel__is(counter, SMI_NUM)) - update_runtime_stat(st, STAT_SMI_NUM, cpu_map_idx, count, &rsd); + update_runtime_stat(st, STAT_SMI_NUM, map_idx, count, &rsd); else if (perf_stat_evsel__is(counter, APERF)) - update_runtime_stat(st, STAT_APERF, cpu_map_idx, count, &rsd); + update_runtime_stat(st, STAT_APERF, map_idx, count, &rsd); if (counter->collect_stat) { - v = saved_value_lookup(counter, cpu_map_idx, true, STAT_NONE, 0, st, + v = saved_value_lookup(counter, map_idx, true, STAT_NONE, 0, st, rsd.cgrp); update_stats(&v->stats, count); if (counter->metric_leader) v->metric_total += count; } else if (counter->metric_leader) { v = saved_value_lookup(counter->metric_leader, - cpu_map_idx, true, STAT_NONE, 0, st, rsd.cgrp); + map_idx, true, STAT_NONE, 0, st, rsd.cgrp); v->metric_total += count; v->metric_other++; } @@ -466,12 +454,12 @@ void perf_stat__collect_metric_expr(struct evlist *evsel_list) } static double runtime_stat_avg(struct runtime_stat *st, - enum stat_type type, int cpu_map_idx, + enum stat_type type, int map_idx, struct runtime_stat_data *rsd) { struct saved_value *v; - v = saved_value_lookup(NULL, cpu_map_idx, false, type, rsd->ctx, st, rsd->cgrp); + v = saved_value_lookup(NULL, map_idx, false, type, rsd->ctx, st, rsd->cgrp); if (!v) return 0.0; @@ -479,12 +467,12 @@ static double runtime_stat_avg(struct runtime_stat *st, } static double runtime_stat_n(struct runtime_stat *st, - enum stat_type type, int cpu_map_idx, + enum stat_type type, int map_idx, struct runtime_stat_data *rsd) { struct saved_value *v; - v = saved_value_lookup(NULL, cpu_map_idx, false, type, rsd->ctx, st, rsd->cgrp); + v = saved_value_lookup(NULL, map_idx, false, type, rsd->ctx, st, rsd->cgrp); if (!v) return 0.0; @@ -492,7 +480,7 @@ static double runtime_stat_n(struct runtime_stat *st, } static void print_stalled_cycles_frontend(struct perf_stat_config *config, - int cpu_map_idx, double avg, + int map_idx, double avg, struct perf_stat_output_ctx *out, struct runtime_stat *st, struct runtime_stat_data *rsd) @@ -500,7 +488,7 @@ static void print_stalled_cycles_frontend(struct perf_stat_config *config, double total, ratio = 0.0; const char *color; - total = runtime_stat_avg(st, STAT_CYCLES, cpu_map_idx, rsd); + total = runtime_stat_avg(st, STAT_CYCLES, map_idx, rsd); if (total) ratio = avg / total * 100.0; @@ -515,7 +503,7 @@ static void print_stalled_cycles_frontend(struct perf_stat_config *config, } static void print_stalled_cycles_backend(struct perf_stat_config *config, - int cpu_map_idx, double avg, + int map_idx, double avg, struct perf_stat_output_ctx *out, struct runtime_stat *st, struct runtime_stat_data *rsd) @@ -523,7 +511,7 @@ static void print_stalled_cycles_backend(struct perf_stat_config *config, double total, ratio = 0.0; const char *color; - total = runtime_stat_avg(st, STAT_CYCLES, cpu_map_idx, rsd); + total = runtime_stat_avg(st, STAT_CYCLES, map_idx, rsd); if (total) ratio = avg / total * 100.0; @@ -534,7 +522,7 @@ static void print_stalled_cycles_backend(struct perf_stat_config *config, } static void print_branch_misses(struct perf_stat_config *config, - int cpu_map_idx, double avg, + int map_idx, double avg, struct perf_stat_output_ctx *out, struct runtime_stat *st, struct runtime_stat_data *rsd) @@ -542,7 +530,7 @@ static void print_branch_misses(struct perf_stat_config *config, double total, ratio = 0.0; const char *color; - total = runtime_stat_avg(st, STAT_BRANCHES, cpu_map_idx, rsd); + total = runtime_stat_avg(st, STAT_BRANCHES, map_idx, rsd); if (total) ratio = avg / total * 100.0; @@ -553,7 +541,7 @@ static void print_branch_misses(struct perf_stat_config *config, } static void print_l1_dcache_misses(struct perf_stat_config *config, - int cpu_map_idx, double avg, + int map_idx, double avg, struct perf_stat_output_ctx *out, struct runtime_stat *st, struct runtime_stat_data *rsd) @@ -561,7 +549,7 @@ static void print_l1_dcache_misses(struct perf_stat_config *config, double total, ratio = 0.0; const char *color; - total = runtime_stat_avg(st, STAT_L1_DCACHE, cpu_map_idx, rsd); + total = runtime_stat_avg(st, STAT_L1_DCACHE, map_idx, rsd); if (total) ratio = avg / total * 100.0; @@ -572,7 +560,7 @@ static void print_l1_dcache_misses(struct perf_stat_config *config, } static void print_l1_icache_misses(struct perf_stat_config *config, - int cpu_map_idx, double avg, + int map_idx, double avg, struct perf_stat_output_ctx *out, struct runtime_stat *st, struct runtime_stat_data *rsd) @@ -580,7 +568,7 @@ static void print_l1_icache_misses(struct perf_stat_config *config, double total, ratio = 0.0; const char *color; - total = runtime_stat_avg(st, STAT_L1_ICACHE, cpu_map_idx, rsd); + total = runtime_stat_avg(st, STAT_L1_ICACHE, map_idx, rsd); if (total) ratio = avg / total * 100.0; @@ -590,7 +578,7 @@ static void print_l1_icache_misses(struct perf_stat_config *config, } static void print_dtlb_cache_misses(struct perf_stat_config *config, - int cpu_map_idx, double avg, + int map_idx, double avg, struct perf_stat_output_ctx *out, struct runtime_stat *st, struct runtime_stat_data *rsd) @@ -598,7 +586,7 @@ static void print_dtlb_cache_misses(struct perf_stat_config *config, double total, ratio = 0.0; const char *color; - total = runtime_stat_avg(st, STAT_DTLB_CACHE, cpu_map_idx, rsd); + total = runtime_stat_avg(st, STAT_DTLB_CACHE, map_idx, rsd); if (total) ratio = avg / total * 100.0; @@ -608,7 +596,7 @@ static void print_dtlb_cache_misses(struct perf_stat_config *config, } static void print_itlb_cache_misses(struct perf_stat_config *config, - int cpu_map_idx, double avg, + int map_idx, double avg, struct perf_stat_output_ctx *out, struct runtime_stat *st, struct runtime_stat_data *rsd) @@ -616,7 +604,7 @@ static void print_itlb_cache_misses(struct perf_stat_config *config, double total, ratio = 0.0; const char *color; - total = runtime_stat_avg(st, STAT_ITLB_CACHE, cpu_map_idx, rsd); + total = runtime_stat_avg(st, STAT_ITLB_CACHE, map_idx, rsd); if (total) ratio = avg / total * 100.0; @@ -626,7 +614,7 @@ static void print_itlb_cache_misses(struct perf_stat_config *config, } static void print_ll_cache_misses(struct perf_stat_config *config, - int cpu_map_idx, double avg, + int map_idx, double avg, struct perf_stat_output_ctx *out, struct runtime_stat *st, struct runtime_stat_data *rsd) @@ -634,7 +622,7 @@ static void print_ll_cache_misses(struct perf_stat_config *config, double total, ratio = 0.0; const char *color; - total = runtime_stat_avg(st, STAT_LL_CACHE, cpu_map_idx, rsd); + total = runtime_stat_avg(st, STAT_LL_CACHE, map_idx, rsd); if (total) ratio = avg / total * 100.0; @@ -692,61 +680,61 @@ static double sanitize_val(double x) return x; } -static double td_total_slots(int cpu_map_idx, struct runtime_stat *st, +static double td_total_slots(int map_idx, struct runtime_stat *st, struct runtime_stat_data *rsd) { - return runtime_stat_avg(st, STAT_TOPDOWN_TOTAL_SLOTS, cpu_map_idx, rsd); + return runtime_stat_avg(st, STAT_TOPDOWN_TOTAL_SLOTS, map_idx, rsd); } -static double td_bad_spec(int cpu_map_idx, struct runtime_stat *st, +static double td_bad_spec(int map_idx, struct runtime_stat *st, struct runtime_stat_data *rsd) { double bad_spec = 0; double total_slots; double total; - total = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_ISSUED, cpu_map_idx, rsd) - - runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, cpu_map_idx, rsd) + - runtime_stat_avg(st, STAT_TOPDOWN_RECOVERY_BUBBLES, cpu_map_idx, rsd); + total = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_ISSUED, map_idx, rsd) - + runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, map_idx, rsd) + + runtime_stat_avg(st, STAT_TOPDOWN_RECOVERY_BUBBLES, map_idx, rsd); - total_slots = td_total_slots(cpu_map_idx, st, rsd); + total_slots = td_total_slots(map_idx, st, rsd); if (total_slots) bad_spec = total / total_slots; return sanitize_val(bad_spec); } -static double td_retiring(int cpu_map_idx, struct runtime_stat *st, +static double td_retiring(int map_idx, struct runtime_stat *st, struct runtime_stat_data *rsd) { double retiring = 0; - double total_slots = td_total_slots(cpu_map_idx, st, rsd); + double total_slots = td_total_slots(map_idx, st, rsd); double ret_slots = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, - cpu_map_idx, rsd); + map_idx, rsd); if (total_slots) retiring = ret_slots / total_slots; return retiring; } -static double td_fe_bound(int cpu_map_idx, struct runtime_stat *st, +static double td_fe_bound(int map_idx, struct runtime_stat *st, struct runtime_stat_data *rsd) { double fe_bound = 0; - double total_slots = td_total_slots(cpu_map_idx, st, rsd); + double total_slots = td_total_slots(map_idx, st, rsd); double fetch_bub = runtime_stat_avg(st, STAT_TOPDOWN_FETCH_BUBBLES, - cpu_map_idx, rsd); + map_idx, rsd); if (total_slots) fe_bound = fetch_bub / total_slots; return fe_bound; } -static double td_be_bound(int cpu_map_idx, struct runtime_stat *st, +static double td_be_bound(int map_idx, struct runtime_stat *st, struct runtime_stat_data *rsd) { - double sum = (td_fe_bound(cpu_map_idx, st, rsd) + - td_bad_spec(cpu_map_idx, st, rsd) + - td_retiring(cpu_map_idx, st, rsd)); + double sum = (td_fe_bound(map_idx, st, rsd) + + td_bad_spec(map_idx, st, rsd) + + td_retiring(map_idx, st, rsd)); if (sum == 0) return 0; return sanitize_val(1.0 - sum); @@ -757,15 +745,15 @@ static double td_be_bound(int cpu_map_idx, struct runtime_stat *st, * the ratios we need to recreate the sum. */ -static double td_metric_ratio(int cpu_map_idx, enum stat_type type, +static double td_metric_ratio(int map_idx, enum stat_type type, struct runtime_stat *stat, struct runtime_stat_data *rsd) { - double sum = runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, cpu_map_idx, rsd) + - runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, cpu_map_idx, rsd) + - runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, cpu_map_idx, rsd) + - runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, cpu_map_idx, rsd); - double d = runtime_stat_avg(stat, type, cpu_map_idx, rsd); + double sum = runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, map_idx, rsd) + + runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, map_idx, rsd) + + runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, map_idx, rsd) + + runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, map_idx, rsd); + double d = runtime_stat_avg(stat, type, map_idx, rsd); if (sum) return d / sum; @@ -777,23 +765,23 @@ static double td_metric_ratio(int cpu_map_idx, enum stat_type type, * We allow two missing. */ -static bool full_td(int cpu_map_idx, struct runtime_stat *stat, +static bool full_td(int map_idx, struct runtime_stat *stat, struct runtime_stat_data *rsd) { int c = 0; - if (runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, cpu_map_idx, rsd) > 0) + if (runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, map_idx, rsd) > 0) c++; - if (runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, cpu_map_idx, rsd) > 0) + if (runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, map_idx, rsd) > 0) c++; - if (runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, cpu_map_idx, rsd) > 0) + if (runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, map_idx, rsd) > 0) c++; - if (runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, cpu_map_idx, rsd) > 0) + if (runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, map_idx, rsd) > 0) c++; return c >= 2; } -static void print_smi_cost(struct perf_stat_config *config, int cpu_map_idx, +static void print_smi_cost(struct perf_stat_config *config, int map_idx, struct perf_stat_output_ctx *out, struct runtime_stat *st, struct runtime_stat_data *rsd) @@ -801,9 +789,9 @@ static void print_smi_cost(struct perf_stat_config *config, int cpu_map_idx, double smi_num, aperf, cycles, cost = 0.0; const char *color = NULL; - smi_num = runtime_stat_avg(st, STAT_SMI_NUM, cpu_map_idx, rsd); - aperf = runtime_stat_avg(st, STAT_APERF, cpu_map_idx, rsd); - cycles = runtime_stat_avg(st, STAT_CYCLES, cpu_map_idx, rsd); + smi_num = runtime_stat_avg(st, STAT_SMI_NUM, map_idx, rsd); + aperf = runtime_stat_avg(st, STAT_APERF, map_idx, rsd); + cycles = runtime_stat_avg(st, STAT_CYCLES, map_idx, rsd); if ((cycles == 0) || (aperf == 0)) return; @@ -820,7 +808,7 @@ static void print_smi_cost(struct perf_stat_config *config, int cpu_map_idx, static int prepare_metric(struct evsel **metric_events, struct metric_ref *metric_refs, struct expr_parse_ctx *pctx, - int cpu_map_idx, + int map_idx, struct runtime_stat *st) { double scale; @@ -859,17 +847,22 @@ static int prepare_metric(struct evsel **metric_events, abort(); } } else { - v = saved_value_lookup(metric_events[i], cpu_map_idx, false, + v = saved_value_lookup(metric_events[i], map_idx, false, STAT_NONE, 0, st, metric_events[i]->cgrp); if (!v) break; stats = &v->stats; - scale = 1.0; + /* + * If an event was scaled during stat gathering, reverse + * the scale before computing the metric. + */ + scale = 1.0 / metric_events[i]->scale; + source_count = evsel__source_count(metric_events[i]); if (v->metric_other) - metric_total = v->metric_total; + metric_total = v->metric_total * scale; } n = strdup(evsel__metric_id(metric_events[i])); if (!n) @@ -897,7 +890,7 @@ static void generic_metric(struct perf_stat_config *config, const char *metric_name, const char *metric_unit, int runtime, - int cpu_map_idx, + int map_idx, struct perf_stat_output_ctx *out, struct runtime_stat *st) { @@ -911,8 +904,11 @@ static void generic_metric(struct perf_stat_config *config, if (!pctx) return; - pctx->runtime = runtime; - i = prepare_metric(metric_events, metric_refs, pctx, cpu_map_idx, st); + if (config->user_requested_cpu_list) + pctx->sctx.user_requested_cpu_list = strdup(config->user_requested_cpu_list); + pctx->sctx.runtime = runtime; + pctx->sctx.system_wide = config->system_wide; + i = prepare_metric(metric_events, metric_refs, pctx, map_idx, st); if (i < 0) { expr__ctx_free(pctx); return; @@ -957,7 +953,7 @@ static void generic_metric(struct perf_stat_config *config, expr__ctx_free(pctx); } -double test_generic_metric(struct metric_expr *mexp, int cpu_map_idx, struct runtime_stat *st) +double test_generic_metric(struct metric_expr *mexp, int map_idx, struct runtime_stat *st) { struct expr_parse_ctx *pctx; double ratio = 0.0; @@ -966,7 +962,7 @@ double test_generic_metric(struct metric_expr *mexp, int cpu_map_idx, struct run if (!pctx) return NAN; - if (prepare_metric(mexp->metric_events, mexp->metric_refs, pctx, cpu_map_idx, st) < 0) + if (prepare_metric(mexp->metric_events, mexp->metric_refs, pctx, map_idx, st) < 0) goto out; if (expr__parse(&ratio, pctx, mexp->metric_expr)) @@ -979,7 +975,7 @@ out: void perf_stat__print_shadow_stats(struct perf_stat_config *config, struct evsel *evsel, - double avg, int cpu_map_idx, + double avg, int map_idx, struct perf_stat_output_ctx *out, struct rblist *metric_events, struct runtime_stat *st) @@ -998,7 +994,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, if (config->iostat_run) { iostat_print_metric(config, evsel, out); } else if (evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) { - total = runtime_stat_avg(st, STAT_CYCLES, cpu_map_idx, &rsd); + total = runtime_stat_avg(st, STAT_CYCLES, map_idx, &rsd); if (total) { ratio = avg / total; @@ -1008,11 +1004,11 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, print_metric(config, ctxp, NULL, NULL, "insn per cycle", 0); } - total = runtime_stat_avg(st, STAT_STALLED_CYCLES_FRONT, cpu_map_idx, &rsd); + total = runtime_stat_avg(st, STAT_STALLED_CYCLES_FRONT, map_idx, &rsd); total = max(total, runtime_stat_avg(st, STAT_STALLED_CYCLES_BACK, - cpu_map_idx, &rsd)); + map_idx, &rsd)); if (total && avg) { out->new_line(config, ctxp); @@ -1022,8 +1018,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, ratio); } } else if (evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) { - if (runtime_stat_n(st, STAT_BRANCHES, cpu_map_idx, &rsd) != 0) - print_branch_misses(config, cpu_map_idx, avg, out, st, &rsd); + if (runtime_stat_n(st, STAT_BRANCHES, map_idx, &rsd) != 0) + print_branch_misses(config, map_idx, avg, out, st, &rsd); else print_metric(config, ctxp, NULL, NULL, "of all branches", 0); } else if ( @@ -1032,8 +1028,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { - if (runtime_stat_n(st, STAT_L1_DCACHE, cpu_map_idx, &rsd) != 0) - print_l1_dcache_misses(config, cpu_map_idx, avg, out, st, &rsd); + if (runtime_stat_n(st, STAT_L1_DCACHE, map_idx, &rsd) != 0) + print_l1_dcache_misses(config, map_idx, avg, out, st, &rsd); else print_metric(config, ctxp, NULL, NULL, "of all L1-dcache accesses", 0); } else if ( @@ -1042,8 +1038,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { - if (runtime_stat_n(st, STAT_L1_ICACHE, cpu_map_idx, &rsd) != 0) - print_l1_icache_misses(config, cpu_map_idx, avg, out, st, &rsd); + if (runtime_stat_n(st, STAT_L1_ICACHE, map_idx, &rsd) != 0) + print_l1_icache_misses(config, map_idx, avg, out, st, &rsd); else print_metric(config, ctxp, NULL, NULL, "of all L1-icache accesses", 0); } else if ( @@ -1052,8 +1048,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { - if (runtime_stat_n(st, STAT_DTLB_CACHE, cpu_map_idx, &rsd) != 0) - print_dtlb_cache_misses(config, cpu_map_idx, avg, out, st, &rsd); + if (runtime_stat_n(st, STAT_DTLB_CACHE, map_idx, &rsd) != 0) + print_dtlb_cache_misses(config, map_idx, avg, out, st, &rsd); else print_metric(config, ctxp, NULL, NULL, "of all dTLB cache accesses", 0); } else if ( @@ -1062,8 +1058,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { - if (runtime_stat_n(st, STAT_ITLB_CACHE, cpu_map_idx, &rsd) != 0) - print_itlb_cache_misses(config, cpu_map_idx, avg, out, st, &rsd); + if (runtime_stat_n(st, STAT_ITLB_CACHE, map_idx, &rsd) != 0) + print_itlb_cache_misses(config, map_idx, avg, out, st, &rsd); else print_metric(config, ctxp, NULL, NULL, "of all iTLB cache accesses", 0); } else if ( @@ -1072,27 +1068,27 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { - if (runtime_stat_n(st, STAT_LL_CACHE, cpu_map_idx, &rsd) != 0) - print_ll_cache_misses(config, cpu_map_idx, avg, out, st, &rsd); + if (runtime_stat_n(st, STAT_LL_CACHE, map_idx, &rsd) != 0) + print_ll_cache_misses(config, map_idx, avg, out, st, &rsd); else print_metric(config, ctxp, NULL, NULL, "of all LL-cache accesses", 0); } else if (evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) { - total = runtime_stat_avg(st, STAT_CACHEREFS, cpu_map_idx, &rsd); + total = runtime_stat_avg(st, STAT_CACHEREFS, map_idx, &rsd); if (total) ratio = avg * 100 / total; - if (runtime_stat_n(st, STAT_CACHEREFS, cpu_map_idx, &rsd) != 0) + if (runtime_stat_n(st, STAT_CACHEREFS, map_idx, &rsd) != 0) print_metric(config, ctxp, NULL, "%8.3f %%", "of all cache refs", ratio); else print_metric(config, ctxp, NULL, NULL, "of all cache refs", 0); } else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) { - print_stalled_cycles_frontend(config, cpu_map_idx, avg, out, st, &rsd); + print_stalled_cycles_frontend(config, map_idx, avg, out, st, &rsd); } else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) { - print_stalled_cycles_backend(config, cpu_map_idx, avg, out, st, &rsd); + print_stalled_cycles_backend(config, map_idx, avg, out, st, &rsd); } else if (evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) { - total = runtime_stat_avg(st, STAT_NSECS, cpu_map_idx, &rsd); + total = runtime_stat_avg(st, STAT_NSECS, map_idx, &rsd); if (total) { ratio = avg / total; @@ -1101,7 +1097,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, print_metric(config, ctxp, NULL, NULL, "Ghz", 0); } } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) { - total = runtime_stat_avg(st, STAT_CYCLES, cpu_map_idx, &rsd); + total = runtime_stat_avg(st, STAT_CYCLES, map_idx, &rsd); if (total) print_metric(config, ctxp, NULL, @@ -1111,8 +1107,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, print_metric(config, ctxp, NULL, NULL, "transactional cycles", 0); } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) { - total = runtime_stat_avg(st, STAT_CYCLES, cpu_map_idx, &rsd); - total2 = runtime_stat_avg(st, STAT_CYCLES_IN_TX, cpu_map_idx, &rsd); + total = runtime_stat_avg(st, STAT_CYCLES, map_idx, &rsd); + total2 = runtime_stat_avg(st, STAT_CYCLES_IN_TX, map_idx, &rsd); if (total2 < avg) total2 = avg; @@ -1122,19 +1118,19 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, else print_metric(config, ctxp, NULL, NULL, "aborted cycles", 0); } else if (perf_stat_evsel__is(evsel, TRANSACTION_START)) { - total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, cpu_map_idx, &rsd); + total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, map_idx, &rsd); if (avg) ratio = total / avg; - if (runtime_stat_n(st, STAT_CYCLES_IN_TX, cpu_map_idx, &rsd) != 0) + if (runtime_stat_n(st, STAT_CYCLES_IN_TX, map_idx, &rsd) != 0) print_metric(config, ctxp, NULL, "%8.0f", "cycles / transaction", ratio); else print_metric(config, ctxp, NULL, NULL, "cycles / transaction", 0); } else if (perf_stat_evsel__is(evsel, ELISION_START)) { - total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, cpu_map_idx, &rsd); + total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, map_idx, &rsd); if (avg) ratio = total / avg; @@ -1147,28 +1143,28 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, else print_metric(config, ctxp, NULL, NULL, "CPUs utilized", 0); } else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_BUBBLES)) { - double fe_bound = td_fe_bound(cpu_map_idx, st, &rsd); + double fe_bound = td_fe_bound(map_idx, st, &rsd); if (fe_bound > 0.2) color = PERF_COLOR_RED; print_metric(config, ctxp, color, "%8.1f%%", "frontend bound", fe_bound * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_RETIRED)) { - double retiring = td_retiring(cpu_map_idx, st, &rsd); + double retiring = td_retiring(map_idx, st, &rsd); if (retiring > 0.7) color = PERF_COLOR_GREEN; print_metric(config, ctxp, color, "%8.1f%%", "retiring", retiring * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_RECOVERY_BUBBLES)) { - double bad_spec = td_bad_spec(cpu_map_idx, st, &rsd); + double bad_spec = td_bad_spec(map_idx, st, &rsd); if (bad_spec > 0.1) color = PERF_COLOR_RED; print_metric(config, ctxp, color, "%8.1f%%", "bad speculation", bad_spec * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_ISSUED)) { - double be_bound = td_be_bound(cpu_map_idx, st, &rsd); + double be_bound = td_be_bound(map_idx, st, &rsd); const char *name = "backend bound"; static int have_recovery_bubbles = -1; @@ -1181,14 +1177,14 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, if (be_bound > 0.2) color = PERF_COLOR_RED; - if (td_total_slots(cpu_map_idx, st, &rsd) > 0) + if (td_total_slots(map_idx, st, &rsd) > 0) print_metric(config, ctxp, color, "%8.1f%%", name, be_bound * 100.); else print_metric(config, ctxp, NULL, NULL, name, 0); } else if (perf_stat_evsel__is(evsel, TOPDOWN_RETIRING) && - full_td(cpu_map_idx, st, &rsd)) { - double retiring = td_metric_ratio(cpu_map_idx, + full_td(map_idx, st, &rsd)) { + double retiring = td_metric_ratio(map_idx, STAT_TOPDOWN_RETIRING, st, &rsd); if (retiring > 0.7) @@ -1196,8 +1192,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, print_metric(config, ctxp, color, "%8.1f%%", "Retiring", retiring * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_FE_BOUND) && - full_td(cpu_map_idx, st, &rsd)) { - double fe_bound = td_metric_ratio(cpu_map_idx, + full_td(map_idx, st, &rsd)) { + double fe_bound = td_metric_ratio(map_idx, STAT_TOPDOWN_FE_BOUND, st, &rsd); if (fe_bound > 0.2) @@ -1205,8 +1201,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, print_metric(config, ctxp, color, "%8.1f%%", "Frontend Bound", fe_bound * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_BE_BOUND) && - full_td(cpu_map_idx, st, &rsd)) { - double be_bound = td_metric_ratio(cpu_map_idx, + full_td(map_idx, st, &rsd)) { + double be_bound = td_metric_ratio(map_idx, STAT_TOPDOWN_BE_BOUND, st, &rsd); if (be_bound > 0.2) @@ -1214,8 +1210,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, print_metric(config, ctxp, color, "%8.1f%%", "Backend Bound", be_bound * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_BAD_SPEC) && - full_td(cpu_map_idx, st, &rsd)) { - double bad_spec = td_metric_ratio(cpu_map_idx, + full_td(map_idx, st, &rsd)) { + double bad_spec = td_metric_ratio(map_idx, STAT_TOPDOWN_BAD_SPEC, st, &rsd); if (bad_spec > 0.1) @@ -1223,11 +1219,11 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, print_metric(config, ctxp, color, "%8.1f%%", "Bad Speculation", bad_spec * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_HEAVY_OPS) && - full_td(cpu_map_idx, st, &rsd) && (config->topdown_level > 1)) { - double retiring = td_metric_ratio(cpu_map_idx, + full_td(map_idx, st, &rsd) && (config->topdown_level > 1)) { + double retiring = td_metric_ratio(map_idx, STAT_TOPDOWN_RETIRING, st, &rsd); - double heavy_ops = td_metric_ratio(cpu_map_idx, + double heavy_ops = td_metric_ratio(map_idx, STAT_TOPDOWN_HEAVY_OPS, st, &rsd); double light_ops = retiring - heavy_ops; @@ -1243,11 +1239,11 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, print_metric(config, ctxp, color, "%8.1f%%", "Light Operations", light_ops * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_BR_MISPREDICT) && - full_td(cpu_map_idx, st, &rsd) && (config->topdown_level > 1)) { - double bad_spec = td_metric_ratio(cpu_map_idx, + full_td(map_idx, st, &rsd) && (config->topdown_level > 1)) { + double bad_spec = td_metric_ratio(map_idx, STAT_TOPDOWN_BAD_SPEC, st, &rsd); - double br_mis = td_metric_ratio(cpu_map_idx, + double br_mis = td_metric_ratio(map_idx, STAT_TOPDOWN_BR_MISPREDICT, st, &rsd); double m_clears = bad_spec - br_mis; @@ -1263,11 +1259,11 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, print_metric(config, ctxp, color, "%8.1f%%", "Machine Clears", m_clears * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_LAT) && - full_td(cpu_map_idx, st, &rsd) && (config->topdown_level > 1)) { - double fe_bound = td_metric_ratio(cpu_map_idx, + full_td(map_idx, st, &rsd) && (config->topdown_level > 1)) { + double fe_bound = td_metric_ratio(map_idx, STAT_TOPDOWN_FE_BOUND, st, &rsd); - double fetch_lat = td_metric_ratio(cpu_map_idx, + double fetch_lat = td_metric_ratio(map_idx, STAT_TOPDOWN_FETCH_LAT, st, &rsd); double fetch_bw = fe_bound - fetch_lat; @@ -1283,11 +1279,11 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, print_metric(config, ctxp, color, "%8.1f%%", "Fetch Bandwidth", fetch_bw * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_MEM_BOUND) && - full_td(cpu_map_idx, st, &rsd) && (config->topdown_level > 1)) { - double be_bound = td_metric_ratio(cpu_map_idx, + full_td(map_idx, st, &rsd) && (config->topdown_level > 1)) { + double be_bound = td_metric_ratio(map_idx, STAT_TOPDOWN_BE_BOUND, st, &rsd); - double mem_bound = td_metric_ratio(cpu_map_idx, + double mem_bound = td_metric_ratio(map_idx, STAT_TOPDOWN_MEM_BOUND, st, &rsd); double core_bound = be_bound - mem_bound; @@ -1304,12 +1300,13 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, core_bound * 100.); } else if (evsel->metric_expr) { generic_metric(config, evsel->metric_expr, evsel->metric_events, NULL, - evsel->name, evsel->metric_name, NULL, 1, cpu_map_idx, out, st); - } else if (runtime_stat_n(st, STAT_NSECS, cpu_map_idx, &rsd) != 0) { + evsel->name, evsel->metric_name, NULL, 1, + map_idx, out, st); + } else if (runtime_stat_n(st, STAT_NSECS, map_idx, &rsd) != 0) { char unit = ' '; char unit_buf[10] = "/sec"; - total = runtime_stat_avg(st, STAT_NSECS, cpu_map_idx, &rsd); + total = runtime_stat_avg(st, STAT_NSECS, map_idx, &rsd); if (total) ratio = convert_unit_double(1000000000.0 * avg / total, &unit); @@ -1317,7 +1314,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit); print_metric(config, ctxp, NULL, "%8.3f", unit_buf, ratio); } else if (perf_stat_evsel__is(evsel, SMI_NUM)) { - print_smi_cost(config, cpu_map_idx, out, st, &rsd); + print_smi_cost(config, map_idx, out, st, &rsd); } else { num = 0; } @@ -1329,8 +1326,9 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, if (num++ > 0) out->new_line(config, ctxp); generic_metric(config, mexp->metric_expr, mexp->metric_events, - mexp->metric_refs, evsel->name, mexp->metric_name, - mexp->metric_unit, mexp->runtime, cpu_map_idx, out, st); + mexp->metric_refs, evsel->name, mexp->metric_name, + mexp->metric_unit, mexp->runtime, + map_idx, out, st); } } if (num == 0) diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 0882b4754fcf..8ec8bb4a9912 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -14,7 +14,11 @@ #include "evlist.h" #include "evsel.h" #include "thread_map.h" -#include "hashmap.h" +#ifdef HAVE_LIBBPF_SUPPORT +#include <bpf/hashmap.h> +#else +#include "util/hashmap.h" +#endif #include <linux/zalloc.h> void update_stats(struct stats *stats, u64 val) @@ -128,13 +132,9 @@ static void perf_stat_evsel_id_init(struct evsel *evsel) static void evsel__reset_stat_priv(struct evsel *evsel) { - int i; struct perf_stat_evsel *ps = evsel->stats; - for (i = 0; i < 3; i++) - init_stats(&ps->res_stats[i]); - - perf_stat_evsel_id_init(evsel); + init_stats(&ps->res_stats); } static int evsel__alloc_stat_priv(struct evsel *evsel) @@ -142,6 +142,7 @@ static int evsel__alloc_stat_priv(struct evsel *evsel) evsel->stats = zalloc(sizeof(struct perf_stat_evsel)); if (evsel->stats == NULL) return -ENOMEM; + perf_stat_evsel_id_init(evsel); evsel__reset_stat_priv(evsel); return 0; } @@ -388,12 +389,8 @@ process_counter_values(struct perf_stat_config *config, struct evsel *evsel, } if (config->aggr_mode == AGGR_THREAD) { - if (config->stats) - perf_stat__update_shadow_stats(evsel, - count->val, 0, &config->stats[thread]); - else - perf_stat__update_shadow_stats(evsel, - count->val, 0, &rt_stat); + perf_stat__update_shadow_stats(evsel, count->val, + thread, &rt_stat); } break; case AGGR_GLOBAL: @@ -416,9 +413,6 @@ static int process_counter_maps(struct perf_stat_config *config, int ncpus = evsel__nr_cpus(counter); int idx, thread; - if (counter->core.system_wide) - nthreads = 1; - for (thread = 0; thread < nthreads; thread++) { for (idx = 0; idx < ncpus; idx++) { if (process_counter_values(config, counter, idx, thread, @@ -436,7 +430,7 @@ int perf_stat_process_counter(struct perf_stat_config *config, struct perf_counts_values *aggr = &counter->counts->aggr; struct perf_stat_evsel *ps = counter->stats; u64 *count = counter->counts->aggr.values; - int i, ret; + int ret; aggr->val = aggr->ena = aggr->run = 0; @@ -454,8 +448,7 @@ int perf_stat_process_counter(struct perf_stat_config *config, evsel__compute_deltas(counter, -1, -1, aggr); perf_counts_values__scale(aggr, config->scale, &counter->counts->scaled); - for (i = 0; i < 3; i++) - update_stats(&ps->res_stats[i], count[i]); + update_stats(&ps->res_stats, *count); if (verbose > 0) { fprintf(config->output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n", diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 668250022f8c..b0899c6e002f 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -43,7 +43,7 @@ enum perf_stat_evsel_id { }; struct perf_stat_evsel { - struct stats res_stats[3]; + struct stats res_stats; enum perf_stat_evsel_id id; u64 *group_data; }; @@ -141,6 +141,8 @@ struct perf_stat_config { bool stop_read_counter; bool quiet; bool iostat_run; + char *user_requested_cpu_list; + bool system_wide; FILE *output; unsigned int interval; unsigned int timeout; @@ -151,8 +153,6 @@ struct perf_stat_config { int run_count; int print_free_counters_hint; int print_mixed_hw_group_error; - struct runtime_stat *stats; - int stats_num; const char *csv_sep; struct stats *walltime_nsecs_stats; struct rusage ru_data; @@ -232,7 +232,7 @@ void perf_stat__init_shadow_stats(void); void perf_stat__reset_shadow_stats(void); void perf_stat__reset_shadow_per_stat(struct runtime_stat *st); void perf_stat__update_shadow_stats(struct evsel *counter, u64 count, - int cpu_map_idx, struct runtime_stat *st); + int map_idx, struct runtime_stat *st); struct perf_stat_output_ctx { void *ctx; print_metric_t print_metric; @@ -242,7 +242,7 @@ struct perf_stat_output_ctx { void perf_stat__print_shadow_stats(struct perf_stat_config *config, struct evsel *evsel, - double avg, int cpu, + double avg, int map_idx, struct perf_stat_output_ctx *out, struct rblist *metric_events, struct runtime_stat *st); @@ -277,5 +277,5 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf struct target *_target, struct timespec *ts, int argc, const char **argv); struct metric_expr; -double test_generic_metric(struct metric_expr *mexp, int cpu_map_idx, struct runtime_stat *st); +double test_generic_metric(struct metric_expr *mexp, int map_idx, struct runtime_stat *st); #endif diff --git a/tools/perf/util/string.c b/tools/perf/util/string.c index f6d90cdd9225..4f12a96f33cc 100644 --- a/tools/perf/util/string.c +++ b/tools/perf/util/string.c @@ -15,7 +15,6 @@ const char *dots = "....................................................................." "....................................................................."; -#define K 1024LL /* * perf_atoll() * Parse (\d+)(b|B|kb|KB|mb|MB|gb|GB|tb|TB) (e.g. "256MB") diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index a4b22caa7c24..a3a165ae933a 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1791,6 +1791,7 @@ int dso__load(struct dso *dso, struct map *map) char newmapname[PATH_MAX]; const char *map_path = dso->long_name; + mutex_lock(&dso->lock); perfmap = strncmp(dso->name, "/tmp/perf-", 10) == 0; if (perfmap) { if (dso->nsinfo && (dso__find_perf_map(newmapname, @@ -1800,7 +1801,6 @@ int dso__load(struct dso *dso, struct map *map) } nsinfo__mountns_enter(dso->nsinfo, &nsc); - pthread_mutex_lock(&dso->lock); /* check again under the dso->lock */ if (dso__loaded(dso)) { @@ -1964,7 +1964,7 @@ out_free: ret = 0; out: dso__set_loaded(dso); - pthread_mutex_unlock(&dso->lock); + mutex_unlock(&dso->lock); nsinfo__mountns_exit(&nsc); return ret; diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index 538790758e24..cccd293b5312 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -364,11 +364,14 @@ static bool read_proc_maps_line(struct io *io, __u64 *start, __u64 *end, } static void perf_record_mmap2__read_build_id(struct perf_record_mmap2 *event, + struct machine *machine, bool is_kernel) { struct build_id bid; struct nsinfo *nsi; struct nscookie nc; + struct dso *dso = NULL; + struct dso_id id; int rc; if (is_kernel) { @@ -376,6 +379,18 @@ static void perf_record_mmap2__read_build_id(struct perf_record_mmap2 *event, goto out; } + id.maj = event->maj; + id.min = event->min; + id.ino = event->ino; + id.ino_generation = event->ino_generation; + + dso = dsos__findnew_id(&machine->dsos, event->filename, &id); + if (dso && dso->has_build_id) { + bid = dso->bid; + rc = 0; + goto out; + } + nsi = nsinfo__new(event->pid); nsinfo__mountns_enter(nsi, &nc); @@ -391,12 +406,16 @@ out: event->header.misc |= PERF_RECORD_MISC_MMAP_BUILD_ID; event->__reserved_1 = 0; event->__reserved_2 = 0; + + if (dso && !dso->has_build_id) + dso__set_build_id(dso, &bid); } else { if (event->filename[0] == '/') { pr_debug2("Failed to read build ID for %s\n", event->filename); } } + dso__put(dso); } int perf_event__synthesize_mmap_events(struct perf_tool *tool, @@ -507,7 +526,7 @@ out: event->mmap2.tid = pid; if (symbol_conf.buildid_mmap2) - perf_record_mmap2__read_build_id(&event->mmap2, false); + perf_record_mmap2__read_build_id(&event->mmap2, machine, false); if (perf_tool__process_synth_event(tool, event, machine, process) != 0) { rc = -1; @@ -690,7 +709,7 @@ int perf_event__synthesize_modules(struct perf_tool *tool, perf_event__handler_t memcpy(event->mmap2.filename, pos->dso->long_name, pos->dso->long_name_len + 1); - perf_record_mmap2__read_build_id(&event->mmap2, false); + perf_record_mmap2__read_build_id(&event->mmap2, machine, false); } else { size = PERF_ALIGN(pos->dso->long_name_len + 1, sizeof(u64)); event->mmap.header.type = PERF_RECORD_MMAP; @@ -1126,7 +1145,7 @@ static int __perf_event__synthesize_kernel_mmap(struct perf_tool *tool, event->mmap2.len = map->end - event->mmap.start; event->mmap2.pid = machine->pid; - perf_record_mmap2__read_build_id(&event->mmap2, true); + perf_record_mmap2__read_build_id(&event->mmap2, machine, true); } else { size = snprintf(event->mmap.filename, sizeof(event->mmap.filename), "%s%s", machine->mmap_name, kmap->ref_reloc_sym->name) + 1; @@ -1195,93 +1214,97 @@ int perf_event__synthesize_thread_map2(struct perf_tool *tool, return err; } -static void synthesize_cpus(struct perf_record_cpu_map_data *data, - const struct perf_cpu_map *map) -{ - int i, map_nr = perf_cpu_map__nr(map); - - data->cpus_data.nr = map_nr; +struct synthesize_cpu_map_data { + const struct perf_cpu_map *map; + int nr; + int min_cpu; + int max_cpu; + int has_any_cpu; + int type; + size_t size; + struct perf_record_cpu_map_data *data; +}; - for (i = 0; i < map_nr; i++) - data->cpus_data.cpu[i] = perf_cpu_map__cpu(map, i).cpu; +static void synthesize_cpus(struct synthesize_cpu_map_data *data) +{ + data->data->type = PERF_CPU_MAP__CPUS; + data->data->cpus_data.nr = data->nr; + for (int i = 0; i < data->nr; i++) + data->data->cpus_data.cpu[i] = perf_cpu_map__cpu(data->map, i).cpu; } -static void synthesize_mask(struct perf_record_cpu_map_data *data, - const struct perf_cpu_map *map, int max) +static void synthesize_mask(struct synthesize_cpu_map_data *data) { int idx; struct perf_cpu cpu; /* Due to padding, the 4bytes per entry mask variant is always smaller. */ - data->mask32_data.nr = BITS_TO_U32(max); - data->mask32_data.long_size = 4; + data->data->type = PERF_CPU_MAP__MASK; + data->data->mask32_data.nr = BITS_TO_U32(data->max_cpu); + data->data->mask32_data.long_size = 4; - perf_cpu_map__for_each_cpu(cpu, idx, map) { + perf_cpu_map__for_each_cpu(cpu, idx, data->map) { int bit_word = cpu.cpu / 32; - __u32 bit_mask = 1U << (cpu.cpu & 31); + u32 bit_mask = 1U << (cpu.cpu & 31); - data->mask32_data.mask[bit_word] |= bit_mask; + data->data->mask32_data.mask[bit_word] |= bit_mask; } } -static size_t cpus_size(const struct perf_cpu_map *map) -{ - return sizeof(struct cpu_map_entries) + perf_cpu_map__nr(map) * sizeof(u16); -} - -static size_t mask_size(const struct perf_cpu_map *map, int *max) +static void synthesize_range_cpus(struct synthesize_cpu_map_data *data) { - *max = perf_cpu_map__max(map).cpu; - return sizeof(struct perf_record_mask_cpu_map32) + BITS_TO_U32(*max) * sizeof(__u32); + data->data->type = PERF_CPU_MAP__RANGE_CPUS; + data->data->range_cpu_data.any_cpu = data->has_any_cpu; + data->data->range_cpu_data.start_cpu = data->min_cpu; + data->data->range_cpu_data.end_cpu = data->max_cpu; } -static void *cpu_map_data__alloc(const struct perf_cpu_map *map, size_t *size, - u16 *type, int *max) +static void *cpu_map_data__alloc(struct synthesize_cpu_map_data *syn_data, + size_t header_size) { size_t size_cpus, size_mask; - bool is_dummy = perf_cpu_map__empty(map); - /* - * Both array and mask data have variable size based - * on the number of cpus and their actual values. - * The size of the 'struct perf_record_cpu_map_data' is: - * - * array = size of 'struct cpu_map_entries' + - * number of cpus * sizeof(u64) - * - * mask = size of 'struct perf_record_record_cpu_map' + - * maximum cpu bit converted to size of longs - * - * and finally + the size of 'struct perf_record_cpu_map_data'. - */ - size_cpus = cpus_size(map); - size_mask = mask_size(map, max); + syn_data->nr = perf_cpu_map__nr(syn_data->map); + syn_data->has_any_cpu = (perf_cpu_map__cpu(syn_data->map, 0).cpu == -1) ? 1 : 0; - if (is_dummy || (size_cpus < size_mask)) { - *size += size_cpus; - *type = PERF_CPU_MAP__CPUS; - } else { - *size += size_mask; - *type = PERF_CPU_MAP__MASK; + syn_data->min_cpu = perf_cpu_map__cpu(syn_data->map, syn_data->has_any_cpu).cpu; + syn_data->max_cpu = perf_cpu_map__max(syn_data->map).cpu; + if (syn_data->max_cpu - syn_data->min_cpu + 1 == syn_data->nr - syn_data->has_any_cpu) { + /* A consecutive range of CPUs can be encoded using a range. */ + assert(sizeof(u16) + sizeof(struct perf_record_range_cpu_map) == sizeof(u64)); + syn_data->type = PERF_CPU_MAP__RANGE_CPUS; + syn_data->size = header_size + sizeof(u64); + return zalloc(syn_data->size); } - *size += sizeof(__u16); /* For perf_record_cpu_map_data.type. */ - *size = PERF_ALIGN(*size, sizeof(u64)); - return zalloc(*size); + size_cpus = sizeof(u16) + sizeof(struct cpu_map_entries) + syn_data->nr * sizeof(u16); + /* Due to padding, the 4bytes per entry mask variant is always smaller. */ + size_mask = sizeof(u16) + sizeof(struct perf_record_mask_cpu_map32) + + BITS_TO_U32(syn_data->max_cpu) * sizeof(__u32); + if (syn_data->has_any_cpu || size_cpus < size_mask) { + /* Follow the CPU map encoding. */ + syn_data->type = PERF_CPU_MAP__CPUS; + syn_data->size = header_size + PERF_ALIGN(size_cpus, sizeof(u64)); + return zalloc(syn_data->size); + } + /* Encode using a bitmask. */ + syn_data->type = PERF_CPU_MAP__MASK; + syn_data->size = header_size + PERF_ALIGN(size_mask, sizeof(u64)); + return zalloc(syn_data->size); } -static void cpu_map_data__synthesize(struct perf_record_cpu_map_data *data, - const struct perf_cpu_map *map, - u16 type, int max) +static void cpu_map_data__synthesize(struct synthesize_cpu_map_data *data) { - data->type = type; - - switch (type) { + switch (data->type) { case PERF_CPU_MAP__CPUS: - synthesize_cpus(data, map); + synthesize_cpus(data); break; case PERF_CPU_MAP__MASK: - synthesize_mask(data, map, max); + synthesize_mask(data); + break; + case PERF_CPU_MAP__RANGE_CPUS: + synthesize_range_cpus(data); + break; default: break; } @@ -1289,23 +1312,22 @@ static void cpu_map_data__synthesize(struct perf_record_cpu_map_data *data, static struct perf_record_cpu_map *cpu_map_event__new(const struct perf_cpu_map *map) { - size_t size = sizeof(struct perf_event_header); + struct synthesize_cpu_map_data syn_data = { .map = map }; struct perf_record_cpu_map *event; - int max; - u16 type; - event = cpu_map_data__alloc(map, &size, &type, &max); + + event = cpu_map_data__alloc(&syn_data, sizeof(struct perf_event_header)); if (!event) return NULL; + syn_data.data = &event->data; event->header.type = PERF_RECORD_CPU_MAP; - event->header.size = size; - event->data.type = type; - - cpu_map_data__synthesize(&event->data, map, type, max); + event->header.size = syn_data.size; + cpu_map_data__synthesize(&syn_data); return event; } + int perf_event__synthesize_cpu_map(struct perf_tool *tool, const struct perf_cpu_map *map, perf_event__handler_t process, @@ -1955,7 +1977,7 @@ int perf_event__synthesize_event_update_unit(struct perf_tool *tool, struct evse if (ev == NULL) return -ENOMEM; - strlcpy(ev->data, evsel->unit, size + 1); + strlcpy(ev->unit, evsel->unit, size + 1); err = process(tool, (union perf_event *)ev, NULL, NULL); free(ev); return err; @@ -1972,8 +1994,7 @@ int perf_event__synthesize_event_update_scale(struct perf_tool *tool, struct evs if (ev == NULL) return -ENOMEM; - ev_data = (struct perf_record_event_update_scale *)ev->data; - ev_data->scale = evsel->scale; + ev->scale.scale = evsel->scale; err = process(tool, (union perf_event *)ev, NULL, NULL); free(ev); return err; @@ -1990,7 +2011,7 @@ int perf_event__synthesize_event_update_name(struct perf_tool *tool, struct evse if (ev == NULL) return -ENOMEM; - strlcpy(ev->data, evsel->name, len + 1); + strlcpy(ev->name, evsel->name, len + 1); err = process(tool, (union perf_event *)ev, NULL, NULL); free(ev); return err; @@ -1999,25 +2020,20 @@ int perf_event__synthesize_event_update_name(struct perf_tool *tool, struct evse int perf_event__synthesize_event_update_cpus(struct perf_tool *tool, struct evsel *evsel, perf_event__handler_t process) { - size_t size = sizeof(struct perf_record_event_update); + struct synthesize_cpu_map_data syn_data = { .map = evsel->core.own_cpus }; struct perf_record_event_update *ev; - int max, err; - u16 type; - - if (!evsel->core.own_cpus) - return 0; + int err; - ev = cpu_map_data__alloc(evsel->core.own_cpus, &size, &type, &max); + ev = cpu_map_data__alloc(&syn_data, sizeof(struct perf_event_header) + 2 * sizeof(u64)); if (!ev) return -ENOMEM; + syn_data.data = &ev->cpus.cpus; ev->header.type = PERF_RECORD_EVENT_UPDATE; - ev->header.size = (u16)size; + ev->header.size = (u16)syn_data.size; ev->type = PERF_EVENT_UPDATE__CPUS; ev->id = evsel->core.id[0]; - - cpu_map_data__synthesize((struct perf_record_cpu_map_data *)ev->data, - evsel->core.own_cpus, type, max); + cpu_map_data__synthesize(&syn_data); err = process(tool, (union perf_event *)ev, NULL, NULL); free(ev); diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h index 1c2c0a838430..a8b0d79bd96c 100644 --- a/tools/perf/util/top.h +++ b/tools/perf/util/top.h @@ -5,6 +5,7 @@ #include "tool.h" #include "evswitch.h" #include "annotate.h" +#include "mutex.h" #include "ordered-events.h" #include "record.h" #include <linux/types.h> @@ -53,8 +54,8 @@ struct perf_top { struct ordered_events *in; struct ordered_events data[2]; bool rotate; - pthread_mutex_t mutex; - pthread_cond_t cond; + struct mutex mutex; + struct cond cond; } qe; }; |