diff options
Diffstat (limited to 'tools/perf')
50 files changed, 916 insertions, 363 deletions
diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt index d7e4869905f1..b6866a05edd2 100644 --- a/tools/perf/Documentation/perf-probe.txt +++ b/tools/perf/Documentation/perf-probe.txt @@ -170,7 +170,7 @@ Probe points are defined by following syntax. or, sdt_PROVIDER:SDTEVENT -'EVENT' specifies the name of new event, if omitted, it will be set the name of the probed function. You can also specify a group name by 'GROUP', if omitted, set 'probe' is used for kprobe and 'probe_<bin>' is used for uprobe. +'EVENT' specifies the name of new event, if omitted, it will be set the name of the probed function, and for return probes, a "\_\_return" suffix is automatically added to the function name. You can also specify a group name by 'GROUP', if omitted, set 'probe' is used for kprobe and 'probe_<bin>' is used for uprobe. Note that using existing group name can conflict with other events. Especially, using the group name reserved for kernel modules can hide embedded events in the modules. 'FUNC' specifies a probed function name, and it may have one of the following options; '+OFFS' is the offset from function entry address in bytes, ':RLN' is the relative-line number from function entry line, and '%return' means that it probes function return. And ';PTN' means lazy matching pattern (see LAZY MATCHING). Note that ';PTN' must be the end of the probe point definition. In addition, '@SRC' specifies a source file which has that function. @@ -182,6 +182,14 @@ Note that before using the SDT event, the target binary (on which SDT events are For details of the SDT, see below. https://sourceware.org/gdb/onlinedocs/gdb/Static-Probe-Points.html +ESCAPED CHARACTER +----------------- + +In the probe syntax, '=', '@', '+', ':' and ';' are treated as a special character. You can use a backslash ('\') to escape the special characters. +This is useful if you need to probe on a specific versioned symbols, like @GLIBC_... suffixes, or also you need to specify a source file which includes the special characters. +Note that usually single backslash is consumed by shell, so you might need to pass double backslash (\\) or wrapping with single quotes (\'AAA\@BBB'). +See EXAMPLES how it is used. + PROBE ARGUMENT -------------- Each probe argument follows below syntax. @@ -277,6 +285,14 @@ Add a USDT probe to a target process running in a different mount namespace ./perf probe --target-ns <target pid> -x /usr/lib/jvm/java-1.8.0-openjdk-1.8.0.121-0.b13.el7_3.x86_64/jre/lib/amd64/server/libjvm.so %sdt_hotspot:thread__sleep__end +Add a probe on specific versioned symbol by backslash escape + + ./perf probe -x /lib64/libc-2.25.so 'malloc_get_state\@GLIBC_2.2.5' + +Add a probe in a source file using special characters by backslash escape + + ./perf probe -x /opt/test/a.out 'foo\+bar.c:4' + SEE ALSO -------- diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 79b117a03fd7..f050f38d8fa3 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -22,6 +22,7 @@ include $(srctree)/tools/scripts/Makefile.arch $(call detected_var,SRCARCH) NO_PERF_REGS := 1 +NO_SYSCALL_TABLE := 1 # Additional ARCH settings for ppc ifeq ($(SRCARCH),powerpc) @@ -33,7 +34,8 @@ endif ifeq ($(SRCARCH),x86) $(call detected,CONFIG_X86) ifeq (${IS_64_BIT}, 1) - CFLAGS += -DHAVE_ARCH_X86_64_SUPPORT -DHAVE_SYSCALL_TABLE -I$(OUTPUT)arch/x86/include/generated + NO_SYSCALL_TABLE := 0 + CFLAGS += -DHAVE_ARCH_X86_64_SUPPORT -I$(OUTPUT)arch/x86/include/generated ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S ../../arch/x86/lib/memset_64.S LIBUNWIND_LIBS = -lunwind-x86_64 -lunwind -llzma $(call detected,CONFIG_X86_64) @@ -41,7 +43,6 @@ ifeq ($(SRCARCH),x86) LIBUNWIND_LIBS = -lunwind-x86 -llzma -lunwind endif NO_PERF_REGS := 0 - CFLAGS += -fPIC endif ifeq ($(SRCARCH),arm) @@ -56,12 +57,18 @@ endif ifeq ($(ARCH),s390) NO_PERF_REGS := 0 + NO_SYSCALL_TABLE := 0 + CFLAGS += -fPIC -I$(OUTPUT)arch/s390/include/generated endif ifeq ($(NO_PERF_REGS),0) $(call detected,CONFIG_PERF_REGS) endif +ifneq ($(NO_SYSCALL_TABLE),1) + CFLAGS += -DHAVE_SYSCALL_TABLE +endif + # So far there's only x86 and arm libdw unwind support merged in perf. # Disable it on all other architectures in case libdw unwind # support is detected in system. Add supported architectures diff --git a/tools/perf/arch/arm64/util/Build b/tools/perf/arch/arm64/util/Build index b1ab72d2a42e..e04f6cdd6f32 100644 --- a/tools/perf/arch/arm64/util/Build +++ b/tools/perf/arch/arm64/util/Build @@ -1,4 +1,5 @@ libperf-y += header.o +libperf-y += sym-handling.o libperf-$(CONFIG_DWARF) += dwarf-regs.o libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o diff --git a/tools/perf/arch/arm64/util/sym-handling.c b/tools/perf/arch/arm64/util/sym-handling.c new file mode 100644 index 000000000000..0051b1ee8450 --- /dev/null +++ b/tools/perf/arch/arm64/util/sym-handling.c @@ -0,0 +1,22 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * Copyright (C) 2015 Naveen N. Rao, IBM Corporation + */ + +#include "debug.h" +#include "symbol.h" +#include "map.h" +#include "probe-event.h" +#include "probe-file.h" + +#ifdef HAVE_LIBELF_SUPPORT +bool elf__needs_adjust_symbols(GElf_Ehdr ehdr) +{ + return ehdr.e_type == ET_EXEC || + ehdr.e_type == ET_REL || + ehdr.e_type == ET_DYN; +} +#endif diff --git a/tools/perf/arch/common.c b/tools/perf/arch/common.c index 8c0cfeb55f8e..c6f373508a4f 100644 --- a/tools/perf/arch/common.c +++ b/tools/perf/arch/common.c @@ -1,12 +1,10 @@ // SPDX-License-Identifier: GPL-2.0 #include <stdio.h> -#include <sys/utsname.h> #include "common.h" +#include "../util/env.h" #include "../util/util.h" #include "../util/debug.h" -#include "sane_ctype.h" - const char *const arm_triplets[] = { "arm-eabi-", "arm-linux-androideabi-", @@ -120,55 +118,19 @@ static int lookup_triplets(const char *const *triplets, const char *name) return -1; } -/* - * Return architecture name in a normalized form. - * The conversion logic comes from the Makefile. - */ -const char *normalize_arch(char *arch) -{ - if (!strcmp(arch, "x86_64")) - return "x86"; - if (arch[0] == 'i' && arch[2] == '8' && arch[3] == '6') - return "x86"; - if (!strcmp(arch, "sun4u") || !strncmp(arch, "sparc", 5)) - return "sparc"; - if (!strcmp(arch, "aarch64") || !strcmp(arch, "arm64")) - return "arm64"; - if (!strncmp(arch, "arm", 3) || !strcmp(arch, "sa110")) - return "arm"; - if (!strncmp(arch, "s390", 4)) - return "s390"; - if (!strncmp(arch, "parisc", 6)) - return "parisc"; - if (!strncmp(arch, "powerpc", 7) || !strncmp(arch, "ppc", 3)) - return "powerpc"; - if (!strncmp(arch, "mips", 4)) - return "mips"; - if (!strncmp(arch, "sh", 2) && isdigit(arch[2])) - return "sh"; - - return arch; -} - static int perf_env__lookup_binutils_path(struct perf_env *env, const char *name, const char **path) { int idx; - const char *arch, *cross_env; - struct utsname uts; + const char *arch = perf_env__arch(env), *cross_env; const char *const *path_list; char *buf = NULL; - arch = normalize_arch(env->arch); - - if (uname(&uts) < 0) - goto out; - /* * We don't need to try to find objdump path for native system. * Just use default binutils path (e.g.: "objdump"). */ - if (!strcmp(normalize_arch(uts.machine), arch)) + if (!strcmp(perf_env__arch(NULL), arch)) goto out; cross_env = getenv("CROSS_COMPILE"); diff --git a/tools/perf/arch/common.h b/tools/perf/arch/common.h index a1546509ad24..2d875baa92e6 100644 --- a/tools/perf/arch/common.h +++ b/tools/perf/arch/common.h @@ -7,6 +7,5 @@ extern const char *objdump_path; int perf_env__lookup_objdump(struct perf_env *env); -const char *normalize_arch(char *arch); #endif /* ARCH_PERF_COMMON_H */ diff --git a/tools/perf/arch/powerpc/util/sym-handling.c b/tools/perf/arch/powerpc/util/sym-handling.c index 9c4e23d8c8ce..53d83d7e6a09 100644 --- a/tools/perf/arch/powerpc/util/sym-handling.c +++ b/tools/perf/arch/powerpc/util/sym-handling.c @@ -64,6 +64,14 @@ int arch__compare_symbol_names_n(const char *namea, const char *nameb, return strncmp(namea, nameb, n); } + +const char *arch__normalize_symbol_name(const char *name) +{ + /* Skip over initial dot */ + if (name && *name == '.') + name++; + return name; +} #endif #if defined(_CALL_ELF) && _CALL_ELF == 2 diff --git a/tools/perf/arch/s390/Makefile b/tools/perf/arch/s390/Makefile index 09ba923debe8..48228de415d0 100644 --- a/tools/perf/arch/s390/Makefile +++ b/tools/perf/arch/s390/Makefile @@ -3,3 +3,24 @@ PERF_HAVE_DWARF_REGS := 1 endif HAVE_KVM_STAT_SUPPORT := 1 PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1 + +# +# Syscall table generation for perf +# + +out := $(OUTPUT)arch/s390/include/generated/asm +header := $(out)/syscalls_64.c +sysdef := $(srctree)/tools/arch/s390/include/uapi/asm/unistd.h +sysprf := $(srctree)/tools/perf/arch/s390/entry/syscalls/ +systbl := $(sysprf)/mksyscalltbl + +# Create output directory if not already present +_dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)') + +$(header): $(sysdef) $(systbl) + $(Q)$(SHELL) '$(systbl)' '$(CC)' $(sysdef) > $@ + +clean:: + $(call QUIET_CLEAN, s390) $(RM) $(header) + +archheaders: $(header) diff --git a/tools/perf/arch/s390/entry/syscalls/mksyscalltbl b/tools/perf/arch/s390/entry/syscalls/mksyscalltbl new file mode 100755 index 000000000000..7fa0d0abd419 --- /dev/null +++ b/tools/perf/arch/s390/entry/syscalls/mksyscalltbl @@ -0,0 +1,36 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# +# Generate system call table for perf +# +# +# Copyright IBM Corp. 2017 +# Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> +# + +gcc=$1 +input=$2 + +if ! test -r $input; then + echo "Could not read input file" >&2 + exit 1 +fi + +create_table() +{ + local max_nr + + echo 'static const char *syscalltbl_s390_64[] = {' + while read sc nr; do + printf '\t[%d] = "%s",\n' $nr $sc + max_nr=$nr + done + echo '};' + echo "#define SYSCALLTBL_S390_64_MAX_ID $max_nr" +} + + +$gcc -m64 -E -dM -x c $input \ + |sed -ne 's/^#define __NR_//p' \ + |sort -t' ' -k2 -nu \ + |create_table diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c index 2defb6df7fd0..9aa3a674829b 100644 --- a/tools/perf/bench/futex-hash.c +++ b/tools/perf/bench/futex-hash.c @@ -27,7 +27,6 @@ #include "cpumap.h" #include <err.h> -#include <sys/time.h> static unsigned int nthreads = 0; static unsigned int nsecs = 10; diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index f1da9b0833c0..c0debc3f79b6 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -27,13 +27,10 @@ #include "sort.h" #include "tool.h" #include "data.h" -#include "sort.h" #include "event.h" #include "evlist.h" #include "evsel.h" -#include <asm/bug.h> #include "ui/browsers/hists.h" -#include "evlist.h" #include "thread.h" struct c2c_hists { diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 0a5749ef8b94..50385d89c497 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -51,7 +51,6 @@ #include <signal.h> #include <sys/mman.h> #include <sys/wait.h> -#include <asm/bug.h> #include <linux/time64.h> struct switch_output { @@ -1805,8 +1804,8 @@ int cmd_record(int argc, const char **argv) goto out; } - /* Enable ignoring missing threads when -u option is defined. */ - rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX; + /* Enable ignoring missing threads when -u/-p option is defined. */ + rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid; err = -ENOMEM; if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 39d8b55f0db3..77e47cf39f2c 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -26,6 +26,7 @@ #include "util/string2.h" #include "util/thread-stack.h" #include "util/time-utils.h" +#include "util/path.h" #include "print_binary.h" #include <linux/bitmap.h> #include <linux/kernel.h> @@ -1548,7 +1549,8 @@ static void perf_sample__fprint_metric(struct perf_script *script, val = sample->period * evsel->scale; perf_stat__update_shadow_stats(evsel, val, - sample->cpu); + sample->cpu, + &rt_stat); evsel_script(evsel)->val = val; if (evsel_script(evsel->leader)->gnum == evsel->leader->nr_members) { for_each_group_member (ev2, evsel->leader) { @@ -1556,7 +1558,8 @@ static void perf_sample__fprint_metric(struct perf_script *script, evsel_script(ev2)->val, sample->cpu, &ctx, - NULL); + NULL, + &rt_stat); } evsel_script(evsel->leader)->gnum = 0; } @@ -2399,19 +2402,6 @@ out: return rc; } -/* Helper function for filesystems that return a dent->d_type DT_UNKNOWN */ -static int is_directory(const char *base_path, const struct dirent *dent) -{ - char path[PATH_MAX]; - struct stat st; - - sprintf(path, "%s/%s", base_path, dent->d_name); - if (stat(path, &st)) - return 0; - - return S_ISDIR(st.st_mode); -} - #define for_each_lang(scripts_path, scripts_dir, lang_dirent) \ while ((lang_dirent = readdir(scripts_dir)) != NULL) \ if ((lang_dirent->d_type == DT_DIR || \ diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index a027b4712e48..98bf9d32f222 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -63,7 +63,6 @@ #include "util/group.h" #include "util/session.h" #include "util/tool.h" -#include "util/group.h" #include "util/string2.h" #include "util/metricgroup.h" #include "asm/bug.h" @@ -214,8 +213,13 @@ static inline void diff_timespec(struct timespec *r, struct timespec *a, static void perf_stat__reset_stats(void) { + int i; + perf_evlist__reset_stats(evsel_list); perf_stat__reset_shadow_stats(); + + for (i = 0; i < stat_config.stats_num; i++) + perf_stat__reset_shadow_per_stat(&stat_config.stats[i]); } static int create_perf_stat_counter(struct perf_evsel *evsel) @@ -272,7 +276,7 @@ static int create_perf_stat_counter(struct perf_evsel *evsel) attr->enable_on_exec = 1; } - if (target__has_cpu(&target)) + if (target__has_cpu(&target) && !target__has_per_thread(&target)) return perf_evsel__open_per_cpu(evsel, perf_evsel__cpus(evsel)); return perf_evsel__open_per_thread(evsel, evsel_list->threads); @@ -335,7 +339,7 @@ static int read_counter(struct perf_evsel *counter) int nthreads = thread_map__nr(evsel_list->threads); int ncpus, cpu, thread; - if (target__has_cpu(&target)) + if (target__has_cpu(&target) && !target__has_per_thread(&target)) ncpus = perf_evsel__nr_cpus(counter); else ncpus = 1; @@ -1097,7 +1101,8 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) } static void printout(int id, int nr, struct perf_evsel *counter, double uval, - char *prefix, u64 run, u64 ena, double noise) + char *prefix, u64 run, u64 ena, double noise, + struct runtime_stat *st) { struct perf_stat_output_ctx out; struct outstate os = { @@ -1190,7 +1195,7 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval, perf_stat__print_shadow_stats(counter, uval, first_shadow_cpu(counter, id), - &out, &metric_events); + &out, &metric_events, st); if (!csv_output && !metric_only) { print_noise(counter, noise); print_running(run, ena); @@ -1214,7 +1219,8 @@ static void aggr_update_shadow(void) val += perf_counts(counter->counts, cpu, 0)->val; } perf_stat__update_shadow_stats(counter, val, - first_shadow_cpu(counter, id)); + first_shadow_cpu(counter, id), + &rt_stat); } } } @@ -1334,7 +1340,8 @@ static void print_aggr(char *prefix) fprintf(output, "%s", prefix); uval = val * counter->scale; - printout(id, nr, counter, uval, prefix, run, ena, 1.0); + printout(id, nr, counter, uval, prefix, run, ena, 1.0, + &rt_stat); if (!metric_only) fputc('\n', output); } @@ -1343,13 +1350,24 @@ static void print_aggr(char *prefix) } } -static void print_aggr_thread(struct perf_evsel *counter, char *prefix) +static int cmp_val(const void *a, const void *b) { - FILE *output = stat_config.output; - int nthreads = thread_map__nr(counter->threads); - int ncpus = cpu_map__nr(counter->cpus); - int cpu, thread; + return ((struct perf_aggr_thread_value *)b)->val - + ((struct perf_aggr_thread_value *)a)->val; +} + +static struct perf_aggr_thread_value *sort_aggr_thread( + struct perf_evsel *counter, + int nthreads, int ncpus, + int *ret) +{ + int cpu, thread, i = 0; double uval; + struct perf_aggr_thread_value *buf; + + buf = calloc(nthreads, sizeof(struct perf_aggr_thread_value)); + if (!buf) + return NULL; for (thread = 0; thread < nthreads; thread++) { u64 ena = 0, run = 0, val = 0; @@ -1360,13 +1378,63 @@ static void print_aggr_thread(struct perf_evsel *counter, char *prefix) run += perf_counts(counter->counts, cpu, thread)->run; } + uval = val * counter->scale; + + /* + * Skip value 0 when enabling --per-thread globally, + * otherwise too many 0 output. + */ + if (uval == 0.0 && target__has_per_thread(&target)) + continue; + + buf[i].counter = counter; + buf[i].id = thread; + buf[i].uval = uval; + buf[i].val = val; + buf[i].run = run; + buf[i].ena = ena; + i++; + } + + qsort(buf, i, sizeof(struct perf_aggr_thread_value), cmp_val); + + if (ret) + *ret = i; + + return buf; +} + +static void print_aggr_thread(struct perf_evsel *counter, char *prefix) +{ + FILE *output = stat_config.output; + int nthreads = thread_map__nr(counter->threads); + int ncpus = cpu_map__nr(counter->cpus); + int thread, sorted_threads, id; + struct perf_aggr_thread_value *buf; + + buf = sort_aggr_thread(counter, nthreads, ncpus, &sorted_threads); + if (!buf) { + perror("cannot sort aggr thread"); + return; + } + + for (thread = 0; thread < sorted_threads; thread++) { if (prefix) fprintf(output, "%s", prefix); - uval = val * counter->scale; - printout(thread, 0, counter, uval, prefix, run, ena, 1.0); + id = buf[thread].id; + if (stat_config.stats) + printout(id, 0, buf[thread].counter, buf[thread].uval, + prefix, buf[thread].run, buf[thread].ena, 1.0, + &stat_config.stats[id]); + else + printout(id, 0, buf[thread].counter, buf[thread].uval, + prefix, buf[thread].run, buf[thread].ena, 1.0, + &rt_stat); fputc('\n', output); } + + free(buf); } struct caggr_data { @@ -1401,7 +1469,8 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix) fprintf(output, "%s", prefix); uval = cd.avg * counter->scale; - printout(-1, 0, counter, uval, prefix, cd.avg_running, cd.avg_enabled, cd.avg); + printout(-1, 0, counter, uval, prefix, cd.avg_running, cd.avg_enabled, + cd.avg, &rt_stat); if (!metric_only) fprintf(output, "\n"); } @@ -1440,7 +1509,8 @@ static void print_counter(struct perf_evsel *counter, char *prefix) fprintf(output, "%s", prefix); uval = val * counter->scale; - printout(cpu, 0, counter, uval, prefix, run, ena, 1.0); + printout(cpu, 0, counter, uval, prefix, run, ena, 1.0, + &rt_stat); fputc('\n', output); } @@ -1472,7 +1542,8 @@ static void print_no_aggr_metric(char *prefix) run = perf_counts(counter->counts, cpu, 0)->run; uval = val * counter->scale; - printout(cpu, 0, counter, uval, prefix, run, ena, 1.0); + printout(cpu, 0, counter, uval, prefix, run, ena, 1.0, + &rt_stat); } fputc('\n', stat_config.output); } @@ -1528,7 +1599,8 @@ static void print_metric_headers(const char *prefix, bool no_indent) perf_stat__print_shadow_stats(counter, 0, 0, &out, - &metric_events); + &metric_events, + &rt_stat); } fputc('\n', stat_config.output); } @@ -2487,6 +2559,35 @@ int process_cpu_map_event(struct perf_tool *tool, return set_maps(st); } +static int runtime_stat_new(struct perf_stat_config *config, int nthreads) +{ + int i; + + config->stats = calloc(nthreads, sizeof(struct runtime_stat)); + if (!config->stats) + return -1; + + config->stats_num = nthreads; + + for (i = 0; i < nthreads; i++) + runtime_stat__init(&config->stats[i]); + + return 0; +} + +static void runtime_stat_delete(struct perf_stat_config *config) +{ + int i; + + if (!config->stats) + return; + + for (i = 0; i < config->stats_num; i++) + runtime_stat__exit(&config->stats[i]); + + free(config->stats); +} + static const char * const stat_report_usage[] = { "perf stat report [<options>]", NULL, @@ -2696,12 +2797,16 @@ int cmd_stat(int argc, const char **argv) run_count = 1; } - if ((stat_config.aggr_mode == AGGR_THREAD) && !target__has_task(&target)) { - fprintf(stderr, "The --per-thread option is only available " - "when monitoring via -p -t options.\n"); - parse_options_usage(NULL, stat_options, "p", 1); - parse_options_usage(NULL, stat_options, "t", 1); - goto out; + if ((stat_config.aggr_mode == AGGR_THREAD) && + !target__has_task(&target)) { + if (!target.system_wide || target.cpu_list) { + fprintf(stderr, "The --per-thread option is only " + "available when monitoring via -p -t -a " + "options or only --per-thread.\n"); + parse_options_usage(NULL, stat_options, "p", 1); + parse_options_usage(NULL, stat_options, "t", 1); + goto out; + } } /* @@ -2725,6 +2830,9 @@ int cmd_stat(int argc, const char **argv) target__validate(&target); + if ((stat_config.aggr_mode == AGGR_THREAD) && (target.system_wide)) + target.per_thread = true; + if (perf_evlist__create_maps(evsel_list, &target) < 0) { if (target__has_task(&target)) { pr_err("Problems finding threads of monitor\n"); @@ -2742,8 +2850,15 @@ int cmd_stat(int argc, const char **argv) * Initialize thread_map with comm names, * so we could print it out on output. */ - if (stat_config.aggr_mode == AGGR_THREAD) + if (stat_config.aggr_mode == AGGR_THREAD) { thread_map__read_comms(evsel_list->threads); + if (target.system_wide) { + if (runtime_stat_new(&stat_config, + thread_map__nr(evsel_list->threads))) { + goto out; + } + } + } if (interval && interval < 100) { if (interval < 10) { @@ -2833,5 +2948,8 @@ out: sysfs__write_int(FREEZE_ON_SMI_PATH, 0); perf_evlist__delete(evsel_list); + + runtime_stat_delete(&stat_config); + return status; } diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 540461f5e345..c6ccda52117d 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -138,7 +138,7 @@ static int perf_top__parse_source(struct perf_top *top, struct hist_entry *he) return err; } - err = symbol__annotate(sym, map, evsel, 0, NULL, NULL); + err = symbol__annotate(sym, map, evsel, 0, NULL); if (err == 0) { out_assign: top->sym_filter_entry = he; diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index ea602cd1b43a..f81ca508700c 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -33,6 +33,7 @@ arch/s390/include/uapi/asm/kvm.h arch/s390/include/uapi/asm/kvm_perf.h arch/s390/include/uapi/asm/ptrace.h arch/s390/include/uapi/asm/sie.h +arch/s390/include/uapi/asm/unistd.h arch/arm/include/uapi/asm/kvm.h arch/arm64/include/uapi/asm/kvm.h include/asm-generic/bitops/arch_hweight.h diff --git a/tools/perf/perf-completion.sh b/tools/perf/perf-completion.sh index 345f5d6e9ed5..fdf75d45efff 100644 --- a/tools/perf/perf-completion.sh +++ b/tools/perf/perf-completion.sh @@ -162,8 +162,37 @@ __perf_main () # List possible events for -e option elif [[ $prev == @("-e"|"--event") && $prev_skip_opts == @(record|stat|top) ]]; then - evts=$($cmd list --raw-dump) - __perfcomp_colon "$evts" "$cur" + + local cur1=${COMP_WORDS[COMP_CWORD]} + local raw_evts=$($cmd list --raw-dump) + local arr s tmp result + + if [[ "$cur1" == */* && ${cur1#*/} =~ ^[A-Z] ]]; then + OLD_IFS="$IFS" + IFS=" " + arr=($raw_evts) + IFS="$OLD_IFS" + + for s in ${arr[@]} + do + if [[ "$s" == *cpu/* ]]; then + tmp=${s#*cpu/} + result=$result" ""cpu/"${tmp^^} + else + result=$result" "$s + fi + done + + evts=${result}" "$(ls /sys/bus/event_source/devices/cpu/events) + else + evts=${raw_evts}" "$(ls /sys/bus/event_source/devices/cpu/events) + fi + + if [[ "$cur1" == , ]]; then + __perfcomp_colon "$evts" "" + else + __perfcomp_colon "$evts" "$cur1" + fi else # List subcommands for perf commands if [[ $prev_skip_opts == @(kvm|kmem|mem|lock|sched| @@ -246,11 +275,21 @@ fi type perf &>/dev/null && _perf() { + if [[ "$COMP_WORDBREAKS" != *,* ]]; then + COMP_WORDBREAKS="${COMP_WORDBREAKS}," + export COMP_WORDBREAKS + fi + + if [[ "$COMP_WORDBREAKS" == *:* ]]; then + COMP_WORDBREAKS="${COMP_WORDBREAKS/:/}" + export COMP_WORDBREAKS + fi + local cur words cword prev if [ $preload_get_comp_words_by_ref = "true" ]; then - _get_comp_words_by_ref -n =: cur words cword prev + _get_comp_words_by_ref -n =:, cur words cword prev else - __perf_get_comp_words_by_ref -n =: cur words cword prev + __perf_get_comp_words_by_ref -n =:, cur words cword prev fi __perf_main } && diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 766573e236e4..fafa014240cd 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -411,9 +411,9 @@ static const char *shell_test__description(char *description, size_t size, return description ? trim(description + 1) : NULL; } -#define for_each_shell_test(dir, ent) \ +#define for_each_shell_test(dir, base, ent) \ while ((ent = readdir(dir)) != NULL) \ - if (ent->d_type == DT_REG && ent->d_name[0] != '.') + if (!is_directory(base, ent)) static const char *shell_tests__dir(char *path, size_t size) { @@ -452,7 +452,7 @@ static int shell_tests__max_desc_width(void) if (!dir) return -1; - for_each_shell_test(dir, ent) { + for_each_shell_test(dir, path, ent) { char bf[256]; const char *desc = shell_test__description(bf, sizeof(bf), path, ent->d_name); @@ -504,7 +504,7 @@ static int run_shell_tests(int argc, const char *argv[], int i, int width) if (!dir) return -1; - for_each_shell_test(dir, ent) { + for_each_shell_test(dir, st.dir, ent) { int curr = i++; char desc[256]; struct test test = { @@ -614,7 +614,7 @@ static int perf_test__list_shell(int argc, const char **argv, int i) if (!dir) return -1; - for_each_shell_test(dir, ent) { + for_each_shell_test(dir, path, ent) { int curr = i++; char bf[256]; struct test t = { diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index f0679613bd18..18b06444f230 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -13,7 +13,6 @@ #include <unistd.h> #include <linux/kernel.h> #include <linux/hw_breakpoint.h> -#include <api/fs/fs.h> #include <api/fs/tracing_path.h> #define PERF_TP_SAMPLE_TYPE (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME | \ diff --git a/tools/perf/tests/shell/trace+probe_vfs_getname.sh b/tools/perf/tests/shell/trace+probe_vfs_getname.sh index 2a9ef080efd0..55ad9793d544 100755 --- a/tools/perf/tests/shell/trace+probe_vfs_getname.sh +++ b/tools/perf/tests/shell/trace+probe_vfs_getname.sh @@ -17,10 +17,9 @@ skip_if_no_perf_probe || exit 2 file=$(mktemp /tmp/temporary_file.XXXXX) trace_open_vfs_getname() { - test "$(uname -m)" = s390x && { svc="openat"; txt="dfd: +CWD, +"; } - - perf trace -e ${svc:-open} touch $file 2>&1 | \ - egrep " +[0-9]+\.[0-9]+ +\( +[0-9]+\.[0-9]+ ms\): +touch\/[0-9]+ ${svc:-open}\(${txt}filename: +${file}, +flags: CREAT\|NOCTTY\|NONBLOCK\|WRONLY, +mode: +IRUGO\|IWUGO\) += +[0-9]+$" + evts=$(echo $(perf list syscalls:sys_enter_open* |& egrep 'open(at)? ' | sed -r 's/.*sys_enter_([a-z]+) +\[.*$/\1/') | sed 's/ /,/') + perf trace -e $evts touch $file 2>&1 | \ + egrep " +[0-9]+\.[0-9]+ +\( +[0-9]+\.[0-9]+ ms\): +touch\/[0-9]+ open(at)?\((dfd: +CWD, +)?filename: +${file}, +flags: CREAT\|NOCTTY\|NONBLOCK\|WRONLY, +mode: +IRUGO\|IWUGO\) += +[0-9]+$" } diff --git a/tools/perf/tests/thread-map.c b/tools/perf/tests/thread-map.c index dbcb6a19b375..4de1939b58ba 100644 --- a/tools/perf/tests/thread-map.c +++ b/tools/perf/tests/thread-map.c @@ -105,7 +105,7 @@ int test__thread_map_remove(struct test *test __maybe_unused, int subtest __mayb TEST_ASSERT_VAL("failed to allocate map string", asprintf(&str, "%d,%d", getpid(), getppid()) >= 0); - threads = thread_map__new_str(str, NULL, 0); + threads = thread_map__new_str(str, NULL, 0, false); TEST_ASSERT_VAL("failed to allocate thread_map", threads); diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 03b7363a49c9..286427975112 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -1116,9 +1116,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, if (perf_evsel__is_group_event(evsel)) nr_pcnt = evsel->nr_members; - err = symbol__annotate(sym, map, evsel, - sizeof(struct browser_line), &browser.arch, - perf_evsel__env_cpuid(evsel)); + err = symbol__annotate(sym, map, evsel, sizeof(struct browser_line), &browser.arch); if (err) { char msg[BUFSIZ]; symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg)); diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c index cdb5ecf91666..aeeaf15029f0 100644 --- a/tools/perf/ui/gtk/annotate.c +++ b/tools/perf/ui/gtk/annotate.c @@ -169,7 +169,7 @@ static int symbol__gtk_annotate(struct symbol *sym, struct map *map, if (map->dso->annotate_warned) return -1; - err = symbol__annotate(sym, map, evsel, 0, NULL, NULL); + err = symbol__annotate(sym, map, evsel, 0, NULL); if (err) { char msg[BUFSIZ]; symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg)); diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index facad1e279a8..68e687d1bf99 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -26,7 +26,6 @@ #include <pthread.h> #include <linux/bitops.h> #include <linux/kernel.h> -#include <sys/utsname.h> #include "sane_ctype.h" @@ -1420,18 +1419,6 @@ fallback: return 0; } -static const char *annotate__norm_arch(const char *arch_name) -{ - struct utsname uts; - - if (!arch_name) { /* Assume we are annotating locally. */ - if (uname(&uts) < 0) - return NULL; - arch_name = uts.machine; - } - return normalize_arch((char *)arch_name); -} - static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) { struct map *map = args->map; @@ -1622,21 +1609,18 @@ void symbol__calc_percent(struct symbol *sym, struct perf_evsel *evsel) int symbol__annotate(struct symbol *sym, struct map *map, struct perf_evsel *evsel, size_t privsize, - struct arch **parch, char *cpuid) + struct arch **parch) { struct annotate_args args = { .privsize = privsize, .map = map, .evsel = evsel, }; - const char *arch_name = NULL; + struct perf_env *env = perf_evsel__env(evsel); + const char *arch_name = perf_env__arch(env); struct arch *arch; int err; - if (evsel) - arch_name = perf_evsel__env_arch(evsel); - - arch_name = annotate__norm_arch(arch_name); if (!arch_name) return -1; @@ -1648,7 +1632,7 @@ int symbol__annotate(struct symbol *sym, struct map *map, *parch = arch; if (arch->init) { - err = arch->init(arch, cpuid); + err = arch->init(arch, env ? env->cpuid : NULL); if (err) { pr_err("%s: failed to initialize %s arch priv area\n", __func__, arch->name); return err; @@ -1999,7 +1983,7 @@ int symbol__tty_annotate(struct symbol *sym, struct map *map, struct dso *dso = map->dso; struct rb_root source_line = RB_ROOT; - if (symbol__annotate(sym, map, evsel, 0, NULL, NULL) < 0) + if (symbol__annotate(sym, map, evsel, 0, NULL) < 0) return -1; symbol__calc_percent(sym, evsel); diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 6d7289e88fa3..ce427445671f 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -179,7 +179,7 @@ void symbol__annotate_zero_histograms(struct symbol *sym); int symbol__annotate(struct symbol *sym, struct map *map, struct perf_evsel *evsel, size_t privsize, - struct arch **parch, char *cpuid); + struct arch **parch); enum symbol_disassemble_errno { SYMBOL_ANNOTATE_ERRNO__SUCCESS = 0, diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index a33491416400..c76687e42344 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -31,9 +31,6 @@ #include <sys/param.h> #include <stdlib.h> #include <stdio.h> -#include <string.h> -#include <limits.h> -#include <errno.h> #include <linux/list.h> #include "../perf.h" diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index 6276b340f893..6d311868d850 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -1,8 +1,10 @@ // SPDX-License-Identifier: GPL-2.0 #include "cpumap.h" #include "env.h" +#include "sane_ctype.h" #include "util.h" #include <errno.h> +#include <sys/utsname.h> struct perf_env perf_env; @@ -93,3 +95,48 @@ void cpu_cache_level__free(struct cpu_cache_level *cache) free(cache->map); free(cache->size); } + +/* + * Return architecture name in a normalized form. + * The conversion logic comes from the Makefile. + */ +static const char *normalize_arch(char *arch) +{ + if (!strcmp(arch, "x86_64")) + return "x86"; + if (arch[0] == 'i' && arch[2] == '8' && arch[3] == '6') + return "x86"; + if (!strcmp(arch, "sun4u") || !strncmp(arch, "sparc", 5)) + return "sparc"; + if (!strcmp(arch, "aarch64") || !strcmp(arch, "arm64")) + return "arm64"; + if (!strncmp(arch, "arm", 3) || !strcmp(arch, "sa110")) + return "arm"; + if (!strncmp(arch, "s390", 4)) + return "s390"; + if (!strncmp(arch, "parisc", 6)) + return "parisc"; + if (!strncmp(arch, "powerpc", 7) || !strncmp(arch, "ppc", 3)) + return "powerpc"; + if (!strncmp(arch, "mips", 4)) + return "mips"; + if (!strncmp(arch, "sh", 2) && isdigit(arch[2])) + return "sh"; + + return arch; +} + +const char *perf_env__arch(struct perf_env *env) +{ + struct utsname uts; + char *arch_name; + + if (!env) { /* Assume local operation */ + if (uname(&uts) < 0) + return NULL; + arch_name = uts.machine; + } else + arch_name = env->arch; + + return normalize_arch(arch_name); +} diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h index 1eb35b190b34..bf970f57dce0 100644 --- a/tools/perf/util/env.h +++ b/tools/perf/util/env.h @@ -65,4 +65,6 @@ int perf_env__set_cmdline(struct perf_env *env, int argc, const char *argv[]); int perf_env__read_cpu_topology_map(struct perf_env *env); void cpu_cache_level__free(struct cpu_cache_level *cache); + +const char *perf_env__arch(struct perf_env *env); #endif /* __PERF_ENV_H */ diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 3570355bcf39..f0a5e09c4071 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1105,7 +1105,8 @@ int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target) struct cpu_map *cpus; struct thread_map *threads; - threads = thread_map__new_str(target->pid, target->tid, target->uid); + threads = thread_map__new_str(target->pid, target->tid, target->uid, + target->per_thread); if (!threads) return -1; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 95853c51c0ca..a4d256ea0dc4 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -36,6 +36,7 @@ #include "debug.h" #include "trace-event.h" #include "stat.h" +#include "memswap.h" #include "util/parse-branch-options.h" #include "sane_ctype.h" @@ -1598,10 +1599,46 @@ static int __open_attr__fprintf(FILE *fp, const char *name, const char *val, return fprintf(fp, " %-32s %s\n", name, val); } +static void perf_evsel__remove_fd(struct perf_evsel *pos, + int nr_cpus, int nr_threads, + int thread_idx) +{ + for (int cpu = 0; cpu < nr_cpus; cpu++) + for (int thread = thread_idx; thread < nr_threads - 1; thread++) + FD(pos, cpu, thread) = FD(pos, cpu, thread + 1); +} + +static int update_fds(struct perf_evsel *evsel, + int nr_cpus, int cpu_idx, + int nr_threads, int thread_idx) +{ + struct perf_evsel *pos; + + if (cpu_idx >= nr_cpus || thread_idx >= nr_threads) + return -EINVAL; + + evlist__for_each_entry(evsel->evlist, pos) { + nr_cpus = pos != evsel ? nr_cpus : cpu_idx; + + perf_evsel__remove_fd(pos, nr_cpus, nr_threads, thread_idx); + + /* + * Since fds for next evsel has not been created, + * there is no need to iterate whole event list. + */ + if (pos == evsel) + break; + } + return 0; +} + static bool ignore_missing_thread(struct perf_evsel *evsel, + int nr_cpus, int cpu, struct thread_map *threads, int thread, int err) { + pid_t ignore_pid = thread_map__pid(threads, thread); + if (!evsel->ignore_missing_thread) return false; @@ -1617,11 +1654,18 @@ static bool ignore_missing_thread(struct perf_evsel *evsel, if (threads->nr == 1) return false; + /* + * We should remove fd for missing_thread first + * because thread_map__remove() will decrease threads->nr. + */ + if (update_fds(evsel, nr_cpus, cpu, threads->nr, thread)) + return false; + if (thread_map__remove(threads, thread)) return false; pr_warning("WARNING: Ignored open failure for pid %d\n", - thread_map__pid(threads, thread)); + ignore_pid); return true; } @@ -1726,7 +1770,7 @@ retry_open: if (fd < 0) { err = -errno; - if (ignore_missing_thread(evsel, threads, thread, err)) { + if (ignore_missing_thread(evsel, cpus->nr, cpu, threads, thread, err)) { /* * We just removed 1 thread, so take a step * back on thread index and lower the upper @@ -2131,14 +2175,27 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, if (type & PERF_SAMPLE_RAW) { OVERFLOW_CHECK_u64(array); u.val64 = *array; - if (WARN_ONCE(swapped, - "Endianness of raw data not corrected!\n")) { - /* undo swap of u64, then swap on individual u32s */ + + /* + * Undo swap of u64, then swap on individual u32s, + * get the size of the raw area and undo all of the + * swap. The pevent interface handles endianity by + * itself. + */ + if (swapped) { u.val64 = bswap_64(u.val64); u.val32[0] = bswap_32(u.val32[0]); u.val32[1] = bswap_32(u.val32[1]); } data->raw_size = u.val32[0]; + + /* + * The raw data is aligned on 64bits including the + * u32 size, so it's safe to use mem_bswap_64. + */ + if (swapped) + mem_bswap_64((void *) array, data->raw_size); + array = (void *)array + sizeof(u32); OVERFLOW_CHECK(array, data->raw_size, max_size); @@ -2835,16 +2892,9 @@ int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target, perf_evsel__name(evsel)); } -char *perf_evsel__env_arch(struct perf_evsel *evsel) -{ - if (evsel && evsel->evlist && evsel->evlist->env) - return evsel->evlist->env->arch; - return NULL; -} - -char *perf_evsel__env_cpuid(struct perf_evsel *evsel) +struct perf_env *perf_evsel__env(struct perf_evsel *evsel) { - if (evsel && evsel->evlist && evsel->evlist->env) - return evsel->evlist->env->cpuid; + if (evsel && evsel->evlist) + return evsel->evlist->env; return NULL; } diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index c3663a70c9b9..846e41644525 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -446,7 +446,6 @@ typedef int (*attr__fprintf_f)(FILE *, const char *, const char *, void *); int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr, attr__fprintf_f attr__fprintf, void *priv); -char *perf_evsel__env_arch(struct perf_evsel *evsel); -char *perf_evsel__env_cpuid(struct perf_evsel *evsel); +struct perf_env *perf_evsel__env(struct perf_evsel *evsel); #endif /* __PERF_EVSEL_H */ diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 5890e08e0754..ca73aa7be708 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -15,9 +15,7 @@ #include <linux/bitops.h> #include <linux/stringify.h> #include <sys/stat.h> -#include <sys/types.h> #include <sys/utsname.h> -#include <unistd.h> #include "evlist.h" #include "evsel.h" diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index e48410c99b39..1ddc3d1d0147 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -20,12 +20,10 @@ #include "pmu.h" #include "expr.h" #include "rblist.h" -#include "pmu.h" #include <string.h> #include <stdbool.h> #include <errno.h> #include "pmu-events/pmu-events.h" -#include "strbuf.h" #include "strlist.h" #include <assert.h> #include <ctype.h> diff --git a/tools/perf/util/path.c b/tools/perf/util/path.c index 933f5c6bffb4..ca56ba2dd3da 100644 --- a/tools/perf/util/path.c +++ b/tools/perf/util/path.c @@ -18,6 +18,7 @@ #include <stdio.h> #include <sys/types.h> #include <sys/stat.h> +#include <dirent.h> #include <unistd.h> static char bad_path[] = "/bad-path/"; @@ -77,3 +78,16 @@ bool is_regular_file(const char *file) return S_ISREG(st.st_mode); } + +/* Helper function for filesystems that return a dent->d_type DT_UNKNOWN */ +bool is_directory(const char *base_path, const struct dirent *dent) +{ + char path[PATH_MAX]; + struct stat st; + + sprintf(path, "%s/%s", base_path, dent->d_name); + if (stat(path, &st)) + return false; + + return S_ISDIR(st.st_mode); +} diff --git a/tools/perf/util/path.h b/tools/perf/util/path.h index 14a254ada7eb..f014f905df50 100644 --- a/tools/perf/util/path.h +++ b/tools/perf/util/path.h @@ -2,9 +2,12 @@ #ifndef _PERF_PATH_H #define _PERF_PATH_H +struct dirent; + int path__join(char *bf, size_t size, const char *path1, const char *path2); int path__join3(char *bf, size_t size, const char *path1, const char *path2, const char *path3); bool is_regular_file(const char *file); +bool is_directory(const char *base_path, const struct dirent *dent); #endif /* _PERF_PATH_H */ diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index b7aaf9b2294d..e1dbc9821617 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -1325,27 +1325,30 @@ static int parse_perf_probe_event_name(char **arg, struct perf_probe_event *pev) { char *ptr; - ptr = strchr(*arg, ':'); + ptr = strpbrk_esc(*arg, ":"); if (ptr) { *ptr = '\0'; if (!pev->sdt && !is_c_func_name(*arg)) goto ng_name; - pev->group = strdup(*arg); + pev->group = strdup_esc(*arg); if (!pev->group) return -ENOMEM; *arg = ptr + 1; } else pev->group = NULL; - if (!pev->sdt && !is_c_func_name(*arg)) { + + pev->event = strdup_esc(*arg); + if (pev->event == NULL) + return -ENOMEM; + + if (!pev->sdt && !is_c_func_name(pev->event)) { + zfree(&pev->event); ng_name: + zfree(&pev->group); semantic_error("%s is bad for event name -it must " "follow C symbol-naming rule.\n", *arg); return -EINVAL; } - pev->event = strdup(*arg); - if (pev->event == NULL) - return -ENOMEM; - return 0; } @@ -1373,7 +1376,7 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) arg++; } - ptr = strpbrk(arg, ";=@+%"); + ptr = strpbrk_esc(arg, ";=@+%"); if (pev->sdt) { if (ptr) { if (*ptr != '@') { @@ -1387,7 +1390,7 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) pev->target = build_id_cache__origname(tmp); free(tmp); } else - pev->target = strdup(ptr + 1); + pev->target = strdup_esc(ptr + 1); if (!pev->target) return -ENOMEM; *ptr = '\0'; @@ -1421,13 +1424,14 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) * * Otherwise, we consider arg to be a function specification. */ - if (!strpbrk(arg, "+@%") && (ptr = strpbrk(arg, ";:")) != NULL) { + if (!strpbrk_esc(arg, "+@%")) { + ptr = strpbrk_esc(arg, ";:"); /* This is a file spec if it includes a '.' before ; or : */ - if (memchr(arg, '.', ptr - arg)) + if (ptr && memchr(arg, '.', ptr - arg)) file_spec = true; } - ptr = strpbrk(arg, ";:+@%"); + ptr = strpbrk_esc(arg, ";:+@%"); if (ptr) { nc = *ptr; *ptr++ = '\0'; @@ -1436,7 +1440,7 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) if (arg[0] == '\0') tmp = NULL; else { - tmp = strdup(arg); + tmp = strdup_esc(arg); if (tmp == NULL) return -ENOMEM; } @@ -1469,12 +1473,12 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) arg = ptr; c = nc; if (c == ';') { /* Lazy pattern must be the last part */ - pp->lazy_line = strdup(arg); + pp->lazy_line = strdup(arg); /* let leave escapes */ if (pp->lazy_line == NULL) return -ENOMEM; break; } - ptr = strpbrk(arg, ";:+@%"); + ptr = strpbrk_esc(arg, ";:+@%"); if (ptr) { nc = *ptr; *ptr++ = '\0'; @@ -1501,7 +1505,7 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) semantic_error("SRC@SRC is not allowed.\n"); return -EINVAL; } - pp->file = strdup(arg); + pp->file = strdup_esc(arg); if (pp->file == NULL) return -ENOMEM; break; @@ -2573,7 +2577,8 @@ int show_perf_probe_events(struct strfilter *filter) } static int get_new_event_name(char *buf, size_t len, const char *base, - struct strlist *namelist, bool allow_suffix) + struct strlist *namelist, bool ret_event, + bool allow_suffix) { int i, ret; char *p, *nbase; @@ -2584,13 +2589,13 @@ static int get_new_event_name(char *buf, size_t len, const char *base, if (!nbase) return -ENOMEM; - /* Cut off the dot suffixes (e.g. .const, .isra)*/ - p = strchr(nbase, '.'); + /* Cut off the dot suffixes (e.g. .const, .isra) and version suffixes */ + p = strpbrk(nbase, ".@"); if (p && p != nbase) *p = '\0'; /* Try no suffix number */ - ret = e_snprintf(buf, len, "%s", nbase); + ret = e_snprintf(buf, len, "%s%s", nbase, ret_event ? "__return" : ""); if (ret < 0) { pr_debug("snprintf() failed: %d\n", ret); goto out; @@ -2625,6 +2630,14 @@ static int get_new_event_name(char *buf, size_t len, const char *base, out: free(nbase); + + /* Final validation */ + if (ret >= 0 && !is_c_func_name(buf)) { + pr_warning("Internal error: \"%s\" is an invalid event name.\n", + buf); + ret = -EINVAL; + } + return ret; } @@ -2681,8 +2694,8 @@ static int probe_trace_event__set_name(struct probe_trace_event *tev, group = PERFPROBE_GROUP; /* Get an unused new event name */ - ret = get_new_event_name(buf, 64, event, - namelist, allow_suffix); + ret = get_new_event_name(buf, 64, event, namelist, + tev->point.retprobe, allow_suffix); if (ret < 0) return ret; @@ -2792,16 +2805,40 @@ static int find_probe_functions(struct map *map, char *name, int found = 0; struct symbol *sym; struct rb_node *tmp; + const char *norm, *ver; + char *buf = NULL; + bool cut_version = true; if (map__load(map) < 0) return 0; + /* If user gives a version, don't cut off the version from symbols */ + if (strchr(name, '@')) + cut_version = false; + map__for_each_symbol(map, sym, tmp) { - if (strglobmatch(sym->name, name)) { + norm = arch__normalize_symbol_name(sym->name); + if (!norm) + continue; + + if (cut_version) { + /* We don't care about default symbol or not */ + ver = strchr(norm, '@'); + if (ver) { + buf = strndup(norm, ver - norm); + if (!buf) + return -ENOMEM; + norm = buf; + } + } + + if (strglobmatch(norm, name)) { found++; if (syms && found < probe_conf.max_probes) syms[found - 1] = sym; } + if (buf) + zfree(&buf); } return found; @@ -2847,7 +2884,7 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev, * same name but different addresses, this lists all the symbols. */ num_matched_functions = find_probe_functions(map, pp->function, syms); - if (num_matched_functions == 0) { + if (num_matched_functions <= 0) { pr_err("Failed to find symbol %s in %s\n", pp->function, pev->target ? : "kernel"); ret = -ENOENT; diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources index b4f2f06722a7..7aa0ea64544e 100644 --- a/tools/perf/util/python-ext-sources +++ b/tools/perf/util/python-ext-sources @@ -10,6 +10,7 @@ util/ctype.c util/evlist.c util/evsel.c util/cpumap.c +util/memswap.c util/mmap.c util/namespaces.c ../lib/bitmap.c diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index c7187f067d31..c1848b543f27 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -43,7 +43,6 @@ #include "../db-export.h" #include "../thread-stack.h" #include "../trace-event.h" -#include "../machine.h" #include "../call-path.h" #include "thread_map.h" #include "cpumap.h" diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 57ec22513971..594d14a02b67 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -9,17 +9,6 @@ #include "expr.h" #include "metricgroup.h" -enum { - CTX_BIT_USER = 1 << 0, - CTX_BIT_KERNEL = 1 << 1, - CTX_BIT_HV = 1 << 2, - CTX_BIT_HOST = 1 << 3, - CTX_BIT_IDLE = 1 << 4, - CTX_BIT_MAX = 1 << 5, -}; - -#define NUM_CTX CTX_BIT_MAX - /* * AGGR_GLOBAL: Use CPU 0 * AGGR_SOCKET: Use first CPU of socket @@ -27,36 +16,18 @@ enum { * AGGR_NONE: Use matching CPU * AGGR_THREAD: Not supported? */ -static struct stats runtime_nsecs_stats[MAX_NR_CPUS]; -static struct stats runtime_cycles_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_stalled_cycles_front_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_stalled_cycles_back_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_branches_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_cacherefs_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_l1_dcache_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_l1_icache_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_ll_cache_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_itlb_cache_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_dtlb_cache_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_cycles_in_tx_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_transaction_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_elision_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_topdown_total_slots[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_topdown_slots_issued[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_topdown_slots_retired[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_topdown_fetch_bubbles[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_topdown_recovery_bubbles[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_smi_num_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_aperf_stats[NUM_CTX][MAX_NR_CPUS]; -static struct rblist runtime_saved_values; static bool have_frontend_stalled; +struct runtime_stat rt_stat; struct stats walltime_nsecs_stats; struct saved_value { struct rb_node rb_node; struct perf_evsel *evsel; + enum stat_type type; + int ctx; int cpu; + struct runtime_stat *stat; struct stats stats; }; @@ -69,6 +40,30 @@ static int saved_value_cmp(struct rb_node *rb_node, const void *entry) if (a->cpu != b->cpu) return a->cpu - b->cpu; + + /* + * Previously the rbtree was used to link generic metrics. + * The keys were evsel/cpu. Now the rbtree is extended to support + * per-thread shadow stats. For shadow stats case, the keys + * are cpu/type/ctx/stat (evsel is NULL). For generic metrics + * case, the keys are still evsel/cpu (type/ctx/stat are 0 or NULL). + */ + if (a->type != b->type) + return a->type - b->type; + + if (a->ctx != b->ctx) + return a->ctx - b->ctx; + + if (a->evsel == NULL && b->evsel == NULL) { + if (a->stat == b->stat) + return 0; + + if ((char *)a->stat < (char *)b->stat) + return -1; + + return 1; + } + if (a->evsel == b->evsel) return 0; if ((char *)a->evsel < (char *)b->evsel) @@ -99,32 +94,54 @@ static void saved_value_delete(struct rblist *rblist __maybe_unused, static struct saved_value *saved_value_lookup(struct perf_evsel *evsel, int cpu, - bool create) + bool create, + enum stat_type type, + int ctx, + struct runtime_stat *st) { + struct rblist *rblist; struct rb_node *nd; struct saved_value dm = { .cpu = cpu, .evsel = evsel, + .type = type, + .ctx = ctx, + .stat = st, }; - nd = rblist__find(&runtime_saved_values, &dm); + + rblist = &st->value_list; + + nd = rblist__find(rblist, &dm); if (nd) return container_of(nd, struct saved_value, rb_node); if (create) { - rblist__add_node(&runtime_saved_values, &dm); - nd = rblist__find(&runtime_saved_values, &dm); + rblist__add_node(rblist, &dm); + nd = rblist__find(rblist, &dm); if (nd) return container_of(nd, struct saved_value, rb_node); } return NULL; } +void runtime_stat__init(struct runtime_stat *st) +{ + struct rblist *rblist = &st->value_list; + + rblist__init(rblist); + rblist->node_cmp = saved_value_cmp; + rblist->node_new = saved_value_new; + rblist->node_delete = saved_value_delete; +} + +void runtime_stat__exit(struct runtime_stat *st) +{ + rblist__exit(&st->value_list); +} + void perf_stat__init_shadow_stats(void) { have_frontend_stalled = pmu_have_event("cpu", "stalled-cycles-frontend"); - rblist__init(&runtime_saved_values); - runtime_saved_values.node_cmp = saved_value_cmp; - runtime_saved_values.node_new = saved_value_new; - runtime_saved_values.node_delete = saved_value_delete; + runtime_stat__init(&rt_stat); } static int evsel_context(struct perf_evsel *evsel) @@ -145,36 +162,13 @@ static int evsel_context(struct perf_evsel *evsel) return ctx; } -void perf_stat__reset_shadow_stats(void) +static void reset_stat(struct runtime_stat *st) { + struct rblist *rblist; struct rb_node *pos, *next; - memset(runtime_nsecs_stats, 0, sizeof(runtime_nsecs_stats)); - memset(runtime_cycles_stats, 0, sizeof(runtime_cycles_stats)); - memset(runtime_stalled_cycles_front_stats, 0, sizeof(runtime_stalled_cycles_front_stats)); - memset(runtime_stalled_cycles_back_stats, 0, sizeof(runtime_stalled_cycles_back_stats)); - memset(runtime_branches_stats, 0, sizeof(runtime_branches_stats)); - memset(runtime_cacherefs_stats, 0, sizeof(runtime_cacherefs_stats)); - memset(runtime_l1_dcache_stats, 0, sizeof(runtime_l1_dcache_stats)); - memset(runtime_l1_icache_stats, 0, sizeof(runtime_l1_icache_stats)); - memset(runtime_ll_cache_stats, 0, sizeof(runtime_ll_cache_stats)); - memset(runtime_itlb_cache_stats, 0, sizeof(runtime_itlb_cache_stats)); - memset(runtime_dtlb_cache_stats, 0, sizeof(runtime_dtlb_cache_stats)); - memset(runtime_cycles_in_tx_stats, 0, - sizeof(runtime_cycles_in_tx_stats)); - memset(runtime_transaction_stats, 0, - sizeof(runtime_transaction_stats)); - memset(runtime_elision_stats, 0, sizeof(runtime_elision_stats)); - memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats)); - memset(runtime_topdown_total_slots, 0, sizeof(runtime_topdown_total_slots)); - memset(runtime_topdown_slots_retired, 0, sizeof(runtime_topdown_slots_retired)); - memset(runtime_topdown_slots_issued, 0, sizeof(runtime_topdown_slots_issued)); - memset(runtime_topdown_fetch_bubbles, 0, sizeof(runtime_topdown_fetch_bubbles)); - memset(runtime_topdown_recovery_bubbles, 0, sizeof(runtime_topdown_recovery_bubbles)); - memset(runtime_smi_num_stats, 0, sizeof(runtime_smi_num_stats)); - memset(runtime_aperf_stats, 0, sizeof(runtime_aperf_stats)); - - next = rb_first(&runtime_saved_values.entries); + rblist = &st->value_list; + next = rb_first(&rblist->entries); while (next) { pos = next; next = rb_next(pos); @@ -184,13 +178,35 @@ void perf_stat__reset_shadow_stats(void) } } +void perf_stat__reset_shadow_stats(void) +{ + reset_stat(&rt_stat); + memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats)); +} + +void perf_stat__reset_shadow_per_stat(struct runtime_stat *st) +{ + reset_stat(st); +} + +static void update_runtime_stat(struct runtime_stat *st, + enum stat_type type, + int ctx, int cpu, u64 count) +{ + struct saved_value *v = saved_value_lookup(NULL, cpu, true, + type, ctx, st); + + if (v) + update_stats(&v->stats, count); +} + /* * Update various tracking values we maintain to print * more semantic information such as miss/hit ratios, * instruction rates, etc: */ void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 count, - int cpu) + int cpu, struct runtime_stat *st) { int ctx = evsel_context(counter); @@ -198,50 +214,58 @@ void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 count, if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK) || perf_evsel__match(counter, SOFTWARE, SW_CPU_CLOCK)) - update_stats(&runtime_nsecs_stats[cpu], count); + update_runtime_stat(st, STAT_NSECS, 0, cpu, count); else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) - update_stats(&runtime_cycles_stats[ctx][cpu], count); + update_runtime_stat(st, STAT_CYCLES, ctx, cpu, count); else if (perf_stat_evsel__is(counter, CYCLES_IN_TX)) - update_stats(&runtime_cycles_in_tx_stats[ctx][cpu], count); + update_runtime_stat(st, STAT_CYCLES_IN_TX, ctx, cpu, count); else if (perf_stat_evsel__is(counter, TRANSACTION_START)) - update_stats(&runtime_transaction_stats[ctx][cpu], count); + update_runtime_stat(st, STAT_TRANSACTION, ctx, cpu, count); else if (perf_stat_evsel__is(counter, ELISION_START)) - update_stats(&runtime_elision_stats[ctx][cpu], count); + update_runtime_stat(st, STAT_ELISION, ctx, cpu, count); else if (perf_stat_evsel__is(counter, TOPDOWN_TOTAL_SLOTS)) - update_stats(&runtime_topdown_total_slots[ctx][cpu], count); + update_runtime_stat(st, STAT_TOPDOWN_TOTAL_SLOTS, + ctx, cpu, count); else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_ISSUED)) - update_stats(&runtime_topdown_slots_issued[ctx][cpu], count); + update_runtime_stat(st, STAT_TOPDOWN_SLOTS_ISSUED, + ctx, cpu, count); else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_RETIRED)) - update_stats(&runtime_topdown_slots_retired[ctx][cpu], count); + update_runtime_stat(st, STAT_TOPDOWN_SLOTS_RETIRED, + ctx, cpu, count); else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_BUBBLES)) - update_stats(&runtime_topdown_fetch_bubbles[ctx][cpu], count); + update_runtime_stat(st, STAT_TOPDOWN_FETCH_BUBBLES, + ctx, cpu, count); else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES)) - update_stats(&runtime_topdown_recovery_bubbles[ctx][cpu], count); + update_runtime_stat(st, STAT_TOPDOWN_RECOVERY_BUBBLES, + ctx, cpu, count); else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) - update_stats(&runtime_stalled_cycles_front_stats[ctx][cpu], count); + update_runtime_stat(st, STAT_STALLED_CYCLES_FRONT, + ctx, cpu, count); else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND)) - update_stats(&runtime_stalled_cycles_back_stats[ctx][cpu], count); + update_runtime_stat(st, STAT_STALLED_CYCLES_BACK, + ctx, cpu, count); else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS)) - update_stats(&runtime_branches_stats[ctx][cpu], count); + update_runtime_stat(st, STAT_BRANCHES, ctx, cpu, count); else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES)) - update_stats(&runtime_cacherefs_stats[ctx][cpu], count); + update_runtime_stat(st, STAT_CACHEREFS, ctx, cpu, count); else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D)) - update_stats(&runtime_l1_dcache_stats[ctx][cpu], count); + update_runtime_stat(st, STAT_L1_DCACHE, ctx, cpu, count); else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I)) - update_stats(&runtime_ll_cache_stats[ctx][cpu], count); + update_runtime_stat(st, STAT_L1_ICACHE, ctx, cpu, count); else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL)) - update_stats(&runtime_ll_cache_stats[ctx][cpu], count); + update_runtime_stat(st, STAT_LL_CACHE, ctx, cpu, count); else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB)) - update_stats(&runtime_dtlb_cache_stats[ctx][cpu], count); + update_runtime_stat(st, STAT_DTLB_CACHE, ctx, cpu, count); else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB)) - update_stats(&runtime_itlb_cache_stats[ctx][cpu], count); + update_runtime_stat(st, STAT_ITLB_CACHE, ctx, cpu, count); else if (perf_stat_evsel__is(counter, SMI_NUM)) - update_stats(&runtime_smi_num_stats[ctx][cpu], count); + update_runtime_stat(st, STAT_SMI_NUM, ctx, cpu, count); else if (perf_stat_evsel__is(counter, APERF)) - update_stats(&runtime_aperf_stats[ctx][cpu], count); + update_runtime_stat(st, STAT_APERF, ctx, cpu, count); if (counter->collect_stat) { - struct saved_value *v = saved_value_lookup(counter, cpu, true); + struct saved_value *v = saved_value_lookup(counter, cpu, true, + STAT_NONE, 0, st); update_stats(&v->stats, count); } } @@ -362,15 +386,40 @@ void perf_stat__collect_metric_expr(struct perf_evlist *evsel_list) } } +static double runtime_stat_avg(struct runtime_stat *st, + enum stat_type type, int ctx, int cpu) +{ + struct saved_value *v; + + v = saved_value_lookup(NULL, cpu, false, type, ctx, st); + if (!v) + return 0.0; + + return avg_stats(&v->stats); +} + +static double runtime_stat_n(struct runtime_stat *st, + enum stat_type type, int ctx, int cpu) +{ + struct saved_value *v; + + v = saved_value_lookup(NULL, cpu, false, type, ctx, st); + if (!v) + return 0.0; + + return v->stats.n; +} + static void print_stalled_cycles_frontend(int cpu, struct perf_evsel *evsel, double avg, - struct perf_stat_output_ctx *out) + struct perf_stat_output_ctx *out, + struct runtime_stat *st) { double total, ratio = 0.0; const char *color; int ctx = evsel_context(evsel); - total = avg_stats(&runtime_cycles_stats[ctx][cpu]); + total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); if (total) ratio = avg / total * 100.0; @@ -386,13 +435,14 @@ static void print_stalled_cycles_frontend(int cpu, static void print_stalled_cycles_backend(int cpu, struct perf_evsel *evsel, double avg, - struct perf_stat_output_ctx *out) + struct perf_stat_output_ctx *out, + struct runtime_stat *st) { double total, ratio = 0.0; const char *color; int ctx = evsel_context(evsel); - total = avg_stats(&runtime_cycles_stats[ctx][cpu]); + total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); if (total) ratio = avg / total * 100.0; @@ -405,13 +455,14 @@ static void print_stalled_cycles_backend(int cpu, static void print_branch_misses(int cpu, struct perf_evsel *evsel, double avg, - struct perf_stat_output_ctx *out) + struct perf_stat_output_ctx *out, + struct runtime_stat *st) { double total, ratio = 0.0; const char *color; int ctx = evsel_context(evsel); - total = avg_stats(&runtime_branches_stats[ctx][cpu]); + total = runtime_stat_avg(st, STAT_BRANCHES, ctx, cpu); if (total) ratio = avg / total * 100.0; @@ -424,13 +475,15 @@ static void print_branch_misses(int cpu, static void print_l1_dcache_misses(int cpu, struct perf_evsel *evsel, double avg, - struct perf_stat_output_ctx *out) + struct perf_stat_output_ctx *out, + struct runtime_stat *st) + { double total, ratio = 0.0; const char *color; int ctx = evsel_context(evsel); - total = avg_stats(&runtime_l1_dcache_stats[ctx][cpu]); + total = runtime_stat_avg(st, STAT_L1_DCACHE, ctx, cpu); if (total) ratio = avg / total * 100.0; @@ -443,13 +496,15 @@ static void print_l1_dcache_misses(int cpu, static void print_l1_icache_misses(int cpu, struct perf_evsel *evsel, double avg, - struct perf_stat_output_ctx *out) + struct perf_stat_output_ctx *out, + struct runtime_stat *st) + { double total, ratio = 0.0; const char *color; int ctx = evsel_context(evsel); - total = avg_stats(&runtime_l1_icache_stats[ctx][cpu]); + total = runtime_stat_avg(st, STAT_L1_ICACHE, ctx, cpu); if (total) ratio = avg / total * 100.0; @@ -461,13 +516,14 @@ static void print_l1_icache_misses(int cpu, static void print_dtlb_cache_misses(int cpu, struct perf_evsel *evsel, double avg, - struct perf_stat_output_ctx *out) + struct perf_stat_output_ctx *out, + struct runtime_stat *st) { double total, ratio = 0.0; const char *color; int ctx = evsel_context(evsel); - total = avg_stats(&runtime_dtlb_cache_stats[ctx][cpu]); + total = runtime_stat_avg(st, STAT_DTLB_CACHE, ctx, cpu); if (total) ratio = avg / total * 100.0; @@ -479,13 +535,14 @@ static void print_dtlb_cache_misses(int cpu, static void print_itlb_cache_misses(int cpu, struct perf_evsel *evsel, double avg, - struct perf_stat_output_ctx *out) + struct perf_stat_output_ctx *out, + struct runtime_stat *st) { double total, ratio = 0.0; const char *color; int ctx = evsel_context(evsel); - total = avg_stats(&runtime_itlb_cache_stats[ctx][cpu]); + total = runtime_stat_avg(st, STAT_ITLB_CACHE, ctx, cpu); if (total) ratio = avg / total * 100.0; @@ -497,13 +554,14 @@ static void print_itlb_cache_misses(int cpu, static void print_ll_cache_misses(int cpu, struct perf_evsel *evsel, double avg, - struct perf_stat_output_ctx *out) + struct perf_stat_output_ctx *out, + struct runtime_stat *st) { double total, ratio = 0.0; const char *color; int ctx = evsel_context(evsel); - total = avg_stats(&runtime_ll_cache_stats[ctx][cpu]); + total = runtime_stat_avg(st, STAT_LL_CACHE, ctx, cpu); if (total) ratio = avg / total * 100.0; @@ -561,68 +619,72 @@ static double sanitize_val(double x) return x; } -static double td_total_slots(int ctx, int cpu) +static double td_total_slots(int ctx, int cpu, struct runtime_stat *st) { - return avg_stats(&runtime_topdown_total_slots[ctx][cpu]); + return runtime_stat_avg(st, STAT_TOPDOWN_TOTAL_SLOTS, ctx, cpu); } -static double td_bad_spec(int ctx, int cpu) +static double td_bad_spec(int ctx, int cpu, struct runtime_stat *st) { double bad_spec = 0; double total_slots; double total; - total = avg_stats(&runtime_topdown_slots_issued[ctx][cpu]) - - avg_stats(&runtime_topdown_slots_retired[ctx][cpu]) + - avg_stats(&runtime_topdown_recovery_bubbles[ctx][cpu]); - total_slots = td_total_slots(ctx, cpu); + total = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_ISSUED, ctx, cpu) - + runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, ctx, cpu) + + runtime_stat_avg(st, STAT_TOPDOWN_RECOVERY_BUBBLES, ctx, cpu); + + total_slots = td_total_slots(ctx, cpu, st); if (total_slots) bad_spec = total / total_slots; return sanitize_val(bad_spec); } -static double td_retiring(int ctx, int cpu) +static double td_retiring(int ctx, int cpu, struct runtime_stat *st) { double retiring = 0; - double total_slots = td_total_slots(ctx, cpu); - double ret_slots = avg_stats(&runtime_topdown_slots_retired[ctx][cpu]); + double total_slots = td_total_slots(ctx, cpu, st); + double ret_slots = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, + ctx, cpu); if (total_slots) retiring = ret_slots / total_slots; return retiring; } -static double td_fe_bound(int ctx, int cpu) +static double td_fe_bound(int ctx, int cpu, struct runtime_stat *st) { double fe_bound = 0; - double total_slots = td_total_slots(ctx, cpu); - double fetch_bub = avg_stats(&runtime_topdown_fetch_bubbles[ctx][cpu]); + double total_slots = td_total_slots(ctx, cpu, st); + double fetch_bub = runtime_stat_avg(st, STAT_TOPDOWN_FETCH_BUBBLES, + ctx, cpu); if (total_slots) fe_bound = fetch_bub / total_slots; return fe_bound; } -static double td_be_bound(int ctx, int cpu) +static double td_be_bound(int ctx, int cpu, struct runtime_stat *st) { - double sum = (td_fe_bound(ctx, cpu) + - td_bad_spec(ctx, cpu) + - td_retiring(ctx, cpu)); + double sum = (td_fe_bound(ctx, cpu, st) + + td_bad_spec(ctx, cpu, st) + + td_retiring(ctx, cpu, st)); if (sum == 0) return 0; return sanitize_val(1.0 - sum); } static void print_smi_cost(int cpu, struct perf_evsel *evsel, - struct perf_stat_output_ctx *out) + struct perf_stat_output_ctx *out, + struct runtime_stat *st) { double smi_num, aperf, cycles, cost = 0.0; int ctx = evsel_context(evsel); const char *color = NULL; - smi_num = avg_stats(&runtime_smi_num_stats[ctx][cpu]); - aperf = avg_stats(&runtime_aperf_stats[ctx][cpu]); - cycles = avg_stats(&runtime_cycles_stats[ctx][cpu]); + smi_num = runtime_stat_avg(st, STAT_SMI_NUM, ctx, cpu); + aperf = runtime_stat_avg(st, STAT_APERF, ctx, cpu); + cycles = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); if ((cycles == 0) || (aperf == 0)) return; @@ -642,7 +704,8 @@ static void generic_metric(const char *metric_expr, const char *metric_name, double avg, int cpu, - struct perf_stat_output_ctx *out) + struct perf_stat_output_ctx *out, + struct runtime_stat *st) { print_metric_t print_metric = out->print_metric; struct parse_ctx pctx; @@ -661,7 +724,8 @@ static void generic_metric(const char *metric_expr, stats = &walltime_nsecs_stats; scale = 1e-9; } else { - v = saved_value_lookup(metric_events[i], cpu, false); + v = saved_value_lookup(metric_events[i], cpu, false, + STAT_NONE, 0, st); if (!v) break; stats = &v->stats; @@ -689,7 +753,8 @@ static void generic_metric(const char *metric_expr, void perf_stat__print_shadow_stats(struct perf_evsel *evsel, double avg, int cpu, struct perf_stat_output_ctx *out, - struct rblist *metric_events) + struct rblist *metric_events, + struct runtime_stat *st) { void *ctxp = out->ctx; print_metric_t print_metric = out->print_metric; @@ -700,7 +765,8 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, int num = 1; if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) { - total = avg_stats(&runtime_cycles_stats[ctx][cpu]); + total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); + if (total) { ratio = avg / total; print_metric(ctxp, NULL, "%7.2f ", @@ -708,8 +774,13 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, } else { print_metric(ctxp, NULL, NULL, "insn per cycle", 0); } - total = avg_stats(&runtime_stalled_cycles_front_stats[ctx][cpu]); - total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[ctx][cpu])); + + total = runtime_stat_avg(st, STAT_STALLED_CYCLES_FRONT, + ctx, cpu); + + total = max(total, runtime_stat_avg(st, + STAT_STALLED_CYCLES_BACK, + ctx, cpu)); if (total && avg) { out->new_line(ctxp); @@ -722,8 +793,8 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, "stalled cycles per insn", 0); } } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) { - if (runtime_branches_stats[ctx][cpu].n != 0) - print_branch_misses(cpu, evsel, avg, out); + if (runtime_stat_n(st, STAT_BRANCHES, ctx, cpu) != 0) + print_branch_misses(cpu, evsel, avg, out, st); else print_metric(ctxp, NULL, NULL, "of all branches", 0); } else if ( @@ -731,8 +802,9 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1D | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { - if (runtime_l1_dcache_stats[ctx][cpu].n != 0) - print_l1_dcache_misses(cpu, evsel, avg, out); + + if (runtime_stat_n(st, STAT_L1_DCACHE, ctx, cpu) != 0) + print_l1_dcache_misses(cpu, evsel, avg, out, st); else print_metric(ctxp, NULL, NULL, "of all L1-dcache hits", 0); } else if ( @@ -740,8 +812,9 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1I | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { - if (runtime_l1_icache_stats[ctx][cpu].n != 0) - print_l1_icache_misses(cpu, evsel, avg, out); + + if (runtime_stat_n(st, STAT_L1_ICACHE, ctx, cpu) != 0) + print_l1_icache_misses(cpu, evsel, avg, out, st); else print_metric(ctxp, NULL, NULL, "of all L1-icache hits", 0); } else if ( @@ -749,8 +822,9 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, evsel->attr.config == ( PERF_COUNT_HW_CACHE_DTLB | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { - if (runtime_dtlb_cache_stats[ctx][cpu].n != 0) - print_dtlb_cache_misses(cpu, evsel, avg, out); + + if (runtime_stat_n(st, STAT_DTLB_CACHE, ctx, cpu) != 0) + print_dtlb_cache_misses(cpu, evsel, avg, out, st); else print_metric(ctxp, NULL, NULL, "of all dTLB cache hits", 0); } else if ( @@ -758,8 +832,9 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, evsel->attr.config == ( PERF_COUNT_HW_CACHE_ITLB | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { - if (runtime_itlb_cache_stats[ctx][cpu].n != 0) - print_itlb_cache_misses(cpu, evsel, avg, out); + + if (runtime_stat_n(st, STAT_ITLB_CACHE, ctx, cpu) != 0) + print_itlb_cache_misses(cpu, evsel, avg, out, st); else print_metric(ctxp, NULL, NULL, "of all iTLB cache hits", 0); } else if ( @@ -767,27 +842,28 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, evsel->attr.config == ( PERF_COUNT_HW_CACHE_LL | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { - if (runtime_ll_cache_stats[ctx][cpu].n != 0) - print_ll_cache_misses(cpu, evsel, avg, out); + + if (runtime_stat_n(st, STAT_LL_CACHE, ctx, cpu) != 0) + print_ll_cache_misses(cpu, evsel, avg, out, st); else print_metric(ctxp, NULL, NULL, "of all LL-cache hits", 0); } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) { - total = avg_stats(&runtime_cacherefs_stats[ctx][cpu]); + total = runtime_stat_avg(st, STAT_CACHEREFS, ctx, cpu); if (total) ratio = avg * 100 / total; - if (runtime_cacherefs_stats[ctx][cpu].n != 0) + if (runtime_stat_n(st, STAT_CACHEREFS, ctx, cpu) != 0) print_metric(ctxp, NULL, "%8.3f %%", "of all cache refs", ratio); else print_metric(ctxp, NULL, NULL, "of all cache refs", 0); } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) { - print_stalled_cycles_frontend(cpu, evsel, avg, out); + print_stalled_cycles_frontend(cpu, evsel, avg, out, st); } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) { - print_stalled_cycles_backend(cpu, evsel, avg, out); + print_stalled_cycles_backend(cpu, evsel, avg, out, st); } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) { - total = avg_stats(&runtime_nsecs_stats[cpu]); + total = runtime_stat_avg(st, STAT_NSECS, 0, cpu); if (total) { ratio = avg / total; @@ -796,7 +872,8 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, print_metric(ctxp, NULL, NULL, "Ghz", 0); } } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) { - total = avg_stats(&runtime_cycles_stats[ctx][cpu]); + total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); + if (total) print_metric(ctxp, NULL, "%7.2f%%", "transactional cycles", @@ -805,8 +882,9 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, print_metric(ctxp, NULL, NULL, "transactional cycles", 0); } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) { - total = avg_stats(&runtime_cycles_stats[ctx][cpu]); - total2 = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); + total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); + total2 = runtime_stat_avg(st, STAT_CYCLES_IN_TX, ctx, cpu); + if (total2 < avg) total2 = avg; if (total) @@ -815,19 +893,21 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, else print_metric(ctxp, NULL, NULL, "aborted cycles", 0); } else if (perf_stat_evsel__is(evsel, TRANSACTION_START)) { - total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); + total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, + ctx, cpu); if (avg) ratio = total / avg; - if (runtime_cycles_in_tx_stats[ctx][cpu].n != 0) + if (runtime_stat_n(st, STAT_CYCLES_IN_TX, ctx, cpu) != 0) print_metric(ctxp, NULL, "%8.0f", "cycles / transaction", ratio); else print_metric(ctxp, NULL, NULL, "cycles / transaction", - 0); + 0); } else if (perf_stat_evsel__is(evsel, ELISION_START)) { - total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); + total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, + ctx, cpu); if (avg) ratio = total / avg; @@ -841,28 +921,28 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, else print_metric(ctxp, NULL, NULL, "CPUs utilized", 0); } else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_BUBBLES)) { - double fe_bound = td_fe_bound(ctx, cpu); + double fe_bound = td_fe_bound(ctx, cpu, st); if (fe_bound > 0.2) color = PERF_COLOR_RED; print_metric(ctxp, color, "%8.1f%%", "frontend bound", fe_bound * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_RETIRED)) { - double retiring = td_retiring(ctx, cpu); + double retiring = td_retiring(ctx, cpu, st); if (retiring > 0.7) color = PERF_COLOR_GREEN; print_metric(ctxp, color, "%8.1f%%", "retiring", retiring * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_RECOVERY_BUBBLES)) { - double bad_spec = td_bad_spec(ctx, cpu); + double bad_spec = td_bad_spec(ctx, cpu, st); if (bad_spec > 0.1) color = PERF_COLOR_RED; print_metric(ctxp, color, "%8.1f%%", "bad speculation", bad_spec * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_ISSUED)) { - double be_bound = td_be_bound(ctx, cpu); + double be_bound = td_be_bound(ctx, cpu, st); const char *name = "backend bound"; static int have_recovery_bubbles = -1; @@ -875,19 +955,19 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, if (be_bound > 0.2) color = PERF_COLOR_RED; - if (td_total_slots(ctx, cpu) > 0) + if (td_total_slots(ctx, cpu, st) > 0) print_metric(ctxp, color, "%8.1f%%", name, be_bound * 100.); else print_metric(ctxp, NULL, NULL, name, 0); } else if (evsel->metric_expr) { generic_metric(evsel->metric_expr, evsel->metric_events, evsel->name, - evsel->metric_name, avg, cpu, out); - } else if (runtime_nsecs_stats[cpu].n != 0) { + evsel->metric_name, avg, cpu, out, st); + } else if (runtime_stat_n(st, STAT_NSECS, 0, cpu) != 0) { char unit = 'M'; char unit_buf[10]; - total = avg_stats(&runtime_nsecs_stats[cpu]); + total = runtime_stat_avg(st, STAT_NSECS, 0, cpu); if (total) ratio = 1000.0 * avg / total; @@ -898,7 +978,7 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit); print_metric(ctxp, NULL, "%8.3f", unit_buf, ratio); } else if (perf_stat_evsel__is(evsel, SMI_NUM)) { - print_smi_cost(cpu, evsel, out); + print_smi_cost(cpu, evsel, out, st); } else { num = 0; } @@ -911,7 +991,7 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, out->new_line(ctxp); generic_metric(mexp->metric_expr, mexp->metric_events, evsel->name, mexp->metric_name, - avg, cpu, out); + avg, cpu, out, st); } } if (num == 0) diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 151e9efd7286..32235657c1ac 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -278,9 +278,16 @@ process_counter_values(struct perf_stat_config *config, struct perf_evsel *evsel perf_evsel__compute_deltas(evsel, cpu, thread, count); perf_counts_values__scale(count, config->scale, NULL); if (config->aggr_mode == AGGR_NONE) - perf_stat__update_shadow_stats(evsel, count->val, cpu); - if (config->aggr_mode == AGGR_THREAD) - perf_stat__update_shadow_stats(evsel, count->val, 0); + perf_stat__update_shadow_stats(evsel, count->val, cpu, + &rt_stat); + if (config->aggr_mode == AGGR_THREAD) { + if (config->stats) + perf_stat__update_shadow_stats(evsel, + count->val, 0, &config->stats[thread]); + else + perf_stat__update_shadow_stats(evsel, + count->val, 0, &rt_stat); + } break; case AGGR_GLOBAL: aggr->val += count->val; @@ -362,7 +369,7 @@ int perf_stat_process_counter(struct perf_stat_config *config, /* * Save the full runtime - to allow normalization during printout: */ - perf_stat__update_shadow_stats(counter, *count, 0); + perf_stat__update_shadow_stats(counter, *count, 0, &rt_stat); return 0; } diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index eefca5c981fd..dbc6f7134f61 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -5,6 +5,7 @@ #include <linux/types.h> #include <stdio.h> #include "xyarray.h" +#include "rblist.h" struct stats { @@ -43,11 +44,54 @@ enum aggr_mode { AGGR_UNSET, }; +enum { + CTX_BIT_USER = 1 << 0, + CTX_BIT_KERNEL = 1 << 1, + CTX_BIT_HV = 1 << 2, + CTX_BIT_HOST = 1 << 3, + CTX_BIT_IDLE = 1 << 4, + CTX_BIT_MAX = 1 << 5, +}; + +#define NUM_CTX CTX_BIT_MAX + +enum stat_type { + STAT_NONE = 0, + STAT_NSECS, + STAT_CYCLES, + STAT_STALLED_CYCLES_FRONT, + STAT_STALLED_CYCLES_BACK, + STAT_BRANCHES, + STAT_CACHEREFS, + STAT_L1_DCACHE, + STAT_L1_ICACHE, + STAT_LL_CACHE, + STAT_ITLB_CACHE, + STAT_DTLB_CACHE, + STAT_CYCLES_IN_TX, + STAT_TRANSACTION, + STAT_ELISION, + STAT_TOPDOWN_TOTAL_SLOTS, + STAT_TOPDOWN_SLOTS_ISSUED, + STAT_TOPDOWN_SLOTS_RETIRED, + STAT_TOPDOWN_FETCH_BUBBLES, + STAT_TOPDOWN_RECOVERY_BUBBLES, + STAT_SMI_NUM, + STAT_APERF, + STAT_MAX +}; + +struct runtime_stat { + struct rblist value_list; +}; + struct perf_stat_config { enum aggr_mode aggr_mode; bool scale; FILE *output; unsigned int interval; + struct runtime_stat *stats; + int stats_num; }; void update_stats(struct stats *stats, u64 val); @@ -67,6 +111,15 @@ static inline void init_stats(struct stats *stats) struct perf_evsel; struct perf_evlist; +struct perf_aggr_thread_value { + struct perf_evsel *counter; + int id; + double uval; + u64 val; + u64 run; + u64 ena; +}; + bool __perf_evsel_stat__is(struct perf_evsel *evsel, enum perf_stat_evsel_id id); @@ -75,16 +128,20 @@ bool __perf_evsel_stat__is(struct perf_evsel *evsel, void perf_stat_evsel_id_init(struct perf_evsel *evsel); +extern struct runtime_stat rt_stat; extern struct stats walltime_nsecs_stats; typedef void (*print_metric_t)(void *ctx, const char *color, const char *unit, const char *fmt, double val); typedef void (*new_line_t )(void *ctx); +void runtime_stat__init(struct runtime_stat *st); +void runtime_stat__exit(struct runtime_stat *st); void perf_stat__init_shadow_stats(void); void perf_stat__reset_shadow_stats(void); +void perf_stat__reset_shadow_per_stat(struct runtime_stat *st); void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 count, - int cpu); + int cpu, struct runtime_stat *st); struct perf_stat_output_ctx { void *ctx; print_metric_t print_metric; @@ -92,11 +149,11 @@ struct perf_stat_output_ctx { bool force_header; }; -struct rblist; void perf_stat__print_shadow_stats(struct perf_evsel *evsel, double avg, int cpu, struct perf_stat_output_ctx *out, - struct rblist *metric_events); + struct rblist *metric_events, + struct runtime_stat *st); void perf_stat__collect_metric_expr(struct perf_evlist *); int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw); diff --git a/tools/perf/util/string.c b/tools/perf/util/string.c index aaa08ee8c717..d8bfd0c4d2cb 100644 --- a/tools/perf/util/string.c +++ b/tools/perf/util/string.c @@ -396,3 +396,49 @@ out_err_overflow: free(expr); return NULL; } + +/* Like strpbrk(), but not break if it is right after a backslash (escaped) */ +char *strpbrk_esc(char *str, const char *stopset) +{ + char *ptr; + + do { + ptr = strpbrk(str, stopset); + if (ptr == str || + (ptr == str + 1 && *(ptr - 1) != '\\')) + break; + str = ptr + 1; + } while (ptr && *(ptr - 1) == '\\' && *(ptr - 2) != '\\'); + + return ptr; +} + +/* Like strdup, but do not copy a single backslash */ +char *strdup_esc(const char *str) +{ + char *s, *d, *p, *ret = strdup(str); + + if (!ret) + return NULL; + + d = strchr(ret, '\\'); + if (!d) + return ret; + + s = d + 1; + do { + if (*s == '\0') { + *d = '\0'; + break; + } + p = strchr(s + 1, '\\'); + if (p) { + memmove(d, s, p - s); + d += p - s; + s = p + 1; + } else + memmove(d, s, strlen(s) + 1); + } while (p); + + return ret; +} diff --git a/tools/perf/util/string2.h b/tools/perf/util/string2.h index ee14ca5451ab..4c68a09b97e8 100644 --- a/tools/perf/util/string2.h +++ b/tools/perf/util/string2.h @@ -39,5 +39,7 @@ static inline char *asprintf_expr_not_in_ints(const char *var, size_t nints, int return asprintf_expr_inout_ints(var, false, nints, ints); } +char *strpbrk_esc(char *str, const char *stopset); +char *strdup_esc(const char *str); #endif /* PERF_STRING_H */ diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 1b67a8639dfe..cc065d4bfafc 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -94,6 +94,11 @@ static int prefix_underscores_count(const char *str) return tail - str; } +const char * __weak arch__normalize_symbol_name(const char *name) +{ + return name; +} + int __weak arch__compare_symbol_names(const char *namea, const char *nameb) { return strcmp(namea, nameb); diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index a4f0075b4e5c..0563f33c1eb3 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -349,6 +349,7 @@ bool elf__needs_adjust_symbols(GElf_Ehdr ehdr); void arch__sym_update(struct symbol *s, GElf_Sym *sym); #endif +const char *arch__normalize_symbol_name(const char *name); #define SYMBOL_A 0 #define SYMBOL_B 1 diff --git a/tools/perf/util/syscalltbl.c b/tools/perf/util/syscalltbl.c index 6eea7cff3d4e..303bdb84ab5a 100644 --- a/tools/perf/util/syscalltbl.c +++ b/tools/perf/util/syscalltbl.c @@ -26,6 +26,10 @@ #include <asm/syscalls_64.c> const int syscalltbl_native_max_id = SYSCALLTBL_x86_64_MAX_ID; static const char **syscalltbl_native = syscalltbl_x86_64; +#elif defined(__s390x__) +#include <asm/syscalls_64.c> +const int syscalltbl_native_max_id = SYSCALLTBL_S390_64_MAX_ID; +static const char **syscalltbl_native = syscalltbl_s390_64; #endif struct syscall { diff --git a/tools/perf/util/target.h b/tools/perf/util/target.h index 446aa7a56f25..6ef01a83b24e 100644 --- a/tools/perf/util/target.h +++ b/tools/perf/util/target.h @@ -64,6 +64,11 @@ static inline bool target__none(struct target *target) return !target__has_task(target) && !target__has_cpu(target); } +static inline bool target__has_per_thread(struct target *target) +{ + return target->system_wide && target->per_thread; +} + static inline bool target__uses_dummy_map(struct target *target) { bool use_dummy = false; @@ -73,6 +78,8 @@ static inline bool target__uses_dummy_map(struct target *target) else if (target__has_task(target) || (!target__has_cpu(target) && !target->uses_mmap)) use_dummy = true; + else if (target__has_per_thread(target)) + use_dummy = true; return use_dummy; } diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c index 2b653853eec2..3e1038f6491c 100644 --- a/tools/perf/util/thread_map.c +++ b/tools/perf/util/thread_map.c @@ -323,7 +323,7 @@ out_free_threads: } struct thread_map *thread_map__new_str(const char *pid, const char *tid, - uid_t uid) + uid_t uid, bool per_thread) { if (pid) return thread_map__new_by_pid_str(pid); @@ -331,6 +331,9 @@ struct thread_map *thread_map__new_str(const char *pid, const char *tid, if (!tid && uid != UINT_MAX) return thread_map__new_by_uid(uid); + if (per_thread) + return thread_map__new_all_cpus(); + return thread_map__new_by_tid_str(tid); } diff --git a/tools/perf/util/thread_map.h b/tools/perf/util/thread_map.h index 07a765fb22bb..0a806b99e73c 100644 --- a/tools/perf/util/thread_map.h +++ b/tools/perf/util/thread_map.h @@ -31,7 +31,7 @@ struct thread_map *thread_map__get(struct thread_map *map); void thread_map__put(struct thread_map *map); struct thread_map *thread_map__new_str(const char *pid, - const char *tid, uid_t uid); + const char *tid, uid_t uid, bool per_thread); struct thread_map *thread_map__new_by_tid_str(const char *tid_str); diff --git a/tools/perf/util/unwind-libunwind.c b/tools/perf/util/unwind-libunwind.c index 647a1e6b4c7b..b029a5e9ae49 100644 --- a/tools/perf/util/unwind-libunwind.c +++ b/tools/perf/util/unwind-libunwind.c @@ -3,7 +3,7 @@ #include "thread.h" #include "session.h" #include "debug.h" -#include "arch/common.h" +#include "env.h" struct unwind_libunwind_ops __weak *local_unwind_libunwind_ops; struct unwind_libunwind_ops __weak *x86_32_unwind_libunwind_ops; @@ -39,7 +39,7 @@ int unwind__prepare_access(struct thread *thread, struct map *map, if (dso_type == DSO__TYPE_UNKNOWN) return 0; - arch = normalize_arch(thread->mg->machine->env->arch); + arch = perf_env__arch(thread->mg->machine->env); if (!strcmp(arch, "x86")) { if (dso_type != DSO__TYPE_64BIT) |