diff options
Diffstat (limited to 'tools/perf')
90 files changed, 1791 insertions, 1447 deletions
diff --git a/tools/perf/Documentation/perf-buildid-cache.txt b/tools/perf/Documentation/perf-buildid-cache.txt index fd77d81ea748..0294c57b1f5e 100644 --- a/tools/perf/Documentation/perf-buildid-cache.txt +++ b/tools/perf/Documentation/perf-buildid-cache.txt @@ -38,7 +38,7 @@ OPTIONS --remove=:: Remove specified file from the cache. -M:: ---missing=:: +--missing=:: List missing build ids in the cache for the specified file. -u:: --update:: diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt index cbb4f743d921..3e2aec94f806 100644 --- a/tools/perf/Documentation/perf-list.txt +++ b/tools/perf/Documentation/perf-list.txt @@ -89,6 +89,19 @@ raw encoding of 0x1A8 can be used: You should refer to the processor specific documentation for getting these details. Some of them are referenced in the SEE ALSO section below. +PARAMETERIZED EVENTS +-------------------- + +Some pmu events listed by 'perf-list' will be displayed with '?' in them. For +example: + + hv_gpci/dtbp_ptitc,phys_processor_idx=?/ + +This means that when provided as an event, a value for '?' must +also be supplied. For example: + + perf stat -C 0 -e 'hv_gpci/dtbp_ptitc,phys_processor_idx=0x2/' ... + OPTIONS ------- diff --git a/tools/perf/Documentation/perf-mem.txt b/tools/perf/Documentation/perf-mem.txt index 1d78a4064da4..43310d8661fe 100644 --- a/tools/perf/Documentation/perf-mem.txt +++ b/tools/perf/Documentation/perf-mem.txt @@ -12,11 +12,12 @@ SYNOPSIS DESCRIPTION ----------- -"perf mem -t <TYPE> record" runs a command and gathers memory operation data +"perf mem record" runs a command and gathers memory operation data from it, into perf.data. Perf record options are accepted and are passed through. -"perf mem -t <TYPE> report" displays the result. It invokes perf report with the -right set of options to display a memory access profile. +"perf mem report" displays the result. It invokes perf report with the +right set of options to display a memory access profile. By default, loads +and stores are sampled. Use the -t option to limit to loads or stores. Note that on Intel systems the memory latency reported is the use-latency, not the pure load (or store latency). Use latency includes any pipeline @@ -29,7 +30,7 @@ OPTIONS -t:: --type=:: - Select the memory operation type: load or store (default: load) + Select the memory operation type: load or store (default: load,store) -D:: --dump-raw-samples=:: diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 81a20f21a3e6..31e977459c51 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -33,6 +33,18 @@ OPTIONS - a raw PMU event (eventsel+umask) in the form of rNNN where NNN is a hexadecimal event descriptor. + - a symbolically formed PMU event like 'pmu/param1=0x3,param2/' where + 'param1', 'param2', etc are defined as formats for the PMU in + /sys/bus/event_sources/devices/<pmu>/format/*. + + - a symbolically formed event like 'pmu/config=M,config1=N,config3=K/' + + where M, N, K are numbers (in decimal, hex, octal format). Acceptable + values for each of 'config', 'config1' and 'config2' are defined by + corresponding entries in /sys/bus/event_sources/devices/<pmu>/format/* + param1 and param2 are defined as formats for the PMU in: + /sys/bus/event_sources/devices/<pmu>/format/* + - a hardware breakpoint event in the form of '\mem:addr[/len][:access]' where addr is the address in memory you want to break in. Access is the memory access type (read, write, execute) it can diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 0927bf4e6c2a..dd7cccdde498 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -159,7 +159,7 @@ OPTIONS --dump-raw-trace:: Dump raw trace in ASCII. --g [type,min[,limit],order[,key]]:: +-g [type,min[,limit],order[,key][,branch]]:: --call-graph:: Display call chains using type, min percent threshold, optional print limit and order. @@ -177,6 +177,11 @@ OPTIONS - function: compare on functions - address: compare on individual code addresses + branch can be: + - branch: include last branch information in callgraph + when available. Usually more convenient to use --branch-history + for this. + Default: fractal,0.5,callee,function. --children:: @@ -266,6 +271,11 @@ OPTIONS branch stacks and it will automatically switch to the branch view mode, unless --no-branch-stack is used. +--branch-history:: + Add the addresses of sampled taken branches to the callstack. + This allows to examine the path the program took to each sample. + The data collection must have used -b (or -j) and -g. + --objdump=<path>:: Path to objdump binary. diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index 21494806c0ab..a21eec05bc42 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -125,46 +125,46 @@ OPTIONS is equivalent to: perf script -f trace:<fields> -f sw:<fields> -f hw:<fields> - + i.e., the specified fields apply to all event types if the type string is not given. - + The arguments are processed in the order received. A later usage can reset a prior request. e.g.: - + -f trace: -f comm,tid,time,ip,sym - + The first -f suppresses trace events (field list is ""), but then the second invocation sets the fields to comm,tid,time,ip,sym. In this case a warning is given to the user: - + "Overriding previous field request for all events." - + Alternatively, consider the order: - + -f comm,tid,time,ip,sym -f trace: - + The first -f sets the fields for all events and the second -f suppresses trace events. The user is given a warning message about the override, and the result of the above is that only S/W and H/W events are displayed with the given fields. - + For the 'wildcard' option if a user selected field is invalid for an event type, a message is displayed to the user that the option is ignored for that type. For example: - + $ perf script -f comm,tid,trace 'trace' not valid for hardware events. Ignoring. 'trace' not valid for software events. Ignoring. - + Alternatively, if the type is given an invalid field is specified it is an error. For example: - + perf script -v -f sw:comm,tid,trace 'trace' not valid for software events. - + At this point usage is displayed, and perf-script exits. - + Finally, a user may not set fields to none for all event types. i.e., -f "" is not allowed. diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 29ee857c09c6..04e150d83e7d 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -25,10 +25,22 @@ OPTIONS -e:: --event=:: - Select the PMU event. Selection can be a symbolic event name - (use 'perf list' to list all events) or a raw PMU - event (eventsel+umask) in the form of rNNN where NNN is a - hexadecimal event descriptor. + Select the PMU event. Selection can be: + + - a symbolic event name (use 'perf list' to list all events) + + - a raw PMU event (eventsel+umask) in the form of rNNN where NNN is a + hexadecimal event descriptor. + + - a symbolically formed event like 'pmu/param1=0x3,param2/' where + param1 and param2 are defined as formats for the PMU in + /sys/bus/event_sources/devices/<pmu>/format/* + + - a symbolically formed event like 'pmu/config=M,config1=N,config2=K/' + where M, N, K are numbers (in decimal, hex, octal format). + Acceptable values for each of 'config', 'config1' and 'config2' + parameters are defined by corresponding entries in + /sys/bus/event_sources/devices/<pmu>/format/* -i:: --no-inherit:: diff --git a/tools/perf/Documentation/perf.txt b/tools/perf/Documentation/perf.txt index d240bb2e5b22..1e8e400b4493 100644 --- a/tools/perf/Documentation/perf.txt +++ b/tools/perf/Documentation/perf.txt @@ -18,6 +18,10 @@ OPTIONS --debug verbose # sets verbose = 1 --debug verbose=2 # sets verbose = 2 +--buildid-dir:: + Setup buildid cache directory. It has higher priority than + buildid.dir config file option. + DESCRIPTION ----------- Performance counters for Linux are a new kernel-based subsystem diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index 344c4d3d0a4a..fbbfdc39271d 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST @@ -4,17 +4,37 @@ tools/lib/traceevent tools/lib/api tools/lib/symbol/kallsyms.c tools/lib/symbol/kallsyms.h +tools/lib/util/find_next_bit.c tools/include/asm/bug.h +tools/include/asm-generic/bitops/arch_hweight.h +tools/include/asm-generic/bitops/atomic.h +tools/include/asm-generic/bitops/const_hweight.h +tools/include/asm-generic/bitops/__ffs.h +tools/include/asm-generic/bitops/__fls.h +tools/include/asm-generic/bitops/find.h +tools/include/asm-generic/bitops/fls64.h +tools/include/asm-generic/bitops/fls.h +tools/include/asm-generic/bitops/hweight.h +tools/include/asm-generic/bitops.h +tools/include/linux/bitops.h tools/include/linux/compiler.h -tools/include/linux/hash.h tools/include/linux/export.h +tools/include/linux/hash.h +tools/include/linux/log2.h tools/include/linux/types.h +include/asm-generic/bitops/arch_hweight.h +include/asm-generic/bitops/const_hweight.h +include/asm-generic/bitops/fls64.h +include/asm-generic/bitops/__fls.h +include/asm-generic/bitops/fls.h include/linux/const.h include/linux/perf_event.h include/linux/rbtree.h include/linux/list.h include/linux/hash.h include/linux/stringify.h +lib/find_next_bit.c +lib/hweight.c lib/rbtree.c include/linux/swab.h arch/*/include/asm/unistd*.h diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 478efa9b2364..aa6a50447c32 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -231,8 +231,19 @@ LIB_H += ../../include/uapi/linux/const.h LIB_H += ../include/linux/hash.h LIB_H += ../../include/linux/stringify.h LIB_H += util/include/linux/bitmap.h -LIB_H += util/include/linux/bitops.h +LIB_H += ../include/linux/bitops.h +LIB_H += ../include/asm-generic/bitops/arch_hweight.h +LIB_H += ../include/asm-generic/bitops/atomic.h +LIB_H += ../include/asm-generic/bitops/const_hweight.h +LIB_H += ../include/asm-generic/bitops/find.h +LIB_H += ../include/asm-generic/bitops/fls64.h +LIB_H += ../include/asm-generic/bitops/fls.h +LIB_H += ../include/asm-generic/bitops/__ffs.h +LIB_H += ../include/asm-generic/bitops/__fls.h +LIB_H += ../include/asm-generic/bitops/hweight.h +LIB_H += ../include/asm-generic/bitops.h LIB_H += ../include/linux/compiler.h +LIB_H += ../include/linux/log2.h LIB_H += util/include/linux/const.h LIB_H += util/include/linux/ctype.h LIB_H += util/include/linux/kernel.h @@ -247,7 +258,6 @@ LIB_H += util/include/linux/linkage.h LIB_H += util/include/asm/asm-offsets.h LIB_H += ../include/asm/bug.h LIB_H += util/include/asm/byteorder.h -LIB_H += util/include/asm/hweight.h LIB_H += util/include/asm/swab.h LIB_H += util/include/asm/system.h LIB_H += util/include/asm/uaccess.h @@ -335,6 +345,7 @@ LIB_OBJS += $(OUTPUT)util/event.o LIB_OBJS += $(OUTPUT)util/evlist.o LIB_OBJS += $(OUTPUT)util/evsel.o LIB_OBJS += $(OUTPUT)util/exec_cmd.o +LIB_OBJS += $(OUTPUT)util/find_next_bit.o LIB_OBJS += $(OUTPUT)util/help.o LIB_OBJS += $(OUTPUT)util/kallsyms.o LIB_OBJS += $(OUTPUT)util/levenshtein.o @@ -453,12 +464,13 @@ BUILTIN_OBJS += $(OUTPUT)builtin-bench.o # Benchmark modules BUILTIN_OBJS += $(OUTPUT)bench/sched-messaging.o BUILTIN_OBJS += $(OUTPUT)bench/sched-pipe.o -ifeq ($(RAW_ARCH),x86_64) +ifeq ($(ARCH), x86) +ifeq ($(IS_64_BIT), 1) BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy-x86-64-asm.o BUILTIN_OBJS += $(OUTPUT)bench/mem-memset-x86-64-asm.o endif +endif BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy.o -BUILTIN_OBJS += $(OUTPUT)bench/mem-memset.o BUILTIN_OBJS += $(OUTPUT)bench/futex-hash.o BUILTIN_OBJS += $(OUTPUT)bench/futex-wake.o BUILTIN_OBJS += $(OUTPUT)bench/futex-requeue.o @@ -735,6 +747,12 @@ $(OUTPUT)util/kallsyms.o: ../lib/symbol/kallsyms.c $(OUTPUT)PERF-CFLAGS $(OUTPUT)util/rbtree.o: ../../lib/rbtree.c $(OUTPUT)PERF-CFLAGS $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -Wno-unused-parameter -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $< +$(OUTPUT)util/hweight.o: ../../lib/hweight.c $(OUTPUT)PERF-CFLAGS + $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -Wno-unused-parameter -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $< + +$(OUTPUT)util/find_next_bit.o: ../lib/util/find_next_bit.c $(OUTPUT)PERF-CFLAGS + $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -Wno-unused-parameter -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $< + $(OUTPUT)util/parse-events.o: util/parse-events.c $(OUTPUT)PERF-CFLAGS $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -Wno-redundant-decls $< diff --git a/tools/perf/arch/powerpc/util/skip-callchain-idx.c b/tools/perf/arch/powerpc/util/skip-callchain-idx.c index 3bb50eac5542..0c370f81e002 100644 --- a/tools/perf/arch/powerpc/util/skip-callchain-idx.c +++ b/tools/perf/arch/powerpc/util/skip-callchain-idx.c @@ -103,7 +103,7 @@ static Dwarf_Frame *get_eh_frame(Dwfl_Module *mod, Dwarf_Addr pc) return NULL; } - result = dwarf_cfi_addrframe(cfi, pc, &frame); + result = dwarf_cfi_addrframe(cfi, pc-bias, &frame); if (result) { pr_debug("%s(): %s\n", __func__, dwfl_errmsg(-1)); return NULL; @@ -128,7 +128,7 @@ static Dwarf_Frame *get_dwarf_frame(Dwfl_Module *mod, Dwarf_Addr pc) return NULL; } - result = dwarf_cfi_addrframe(cfi, pc, &frame); + result = dwarf_cfi_addrframe(cfi, pc-bias, &frame); if (result) { pr_debug("%s(): %s\n", __func__, dwfl_errmsg(-1)); return NULL; @@ -145,7 +145,7 @@ static Dwarf_Frame *get_dwarf_frame(Dwfl_Module *mod, Dwarf_Addr pc) * yet used) * -1 in case of errors */ -static int check_return_addr(struct dso *dso, Dwarf_Addr pc) +static int check_return_addr(struct dso *dso, u64 map_start, Dwarf_Addr pc) { int rc = -1; Dwfl *dwfl; @@ -155,6 +155,7 @@ static int check_return_addr(struct dso *dso, Dwarf_Addr pc) Dwarf_Addr start = pc; Dwarf_Addr end = pc; bool signalp; + const char *exec_file = dso->long_name; dwfl = dso->dwfl; @@ -165,8 +166,10 @@ static int check_return_addr(struct dso *dso, Dwarf_Addr pc) return -1; } - if (dwfl_report_offline(dwfl, "", dso->long_name, -1) == NULL) { - pr_debug("dwfl_report_offline() failed %s\n", + mod = dwfl_report_elf(dwfl, exec_file, exec_file, -1, + map_start, false); + if (!mod) { + pr_debug("dwfl_report_elf() failed %s\n", dwarf_errmsg(-1)); /* * We normally cache the DWARF debug info and never @@ -256,10 +259,10 @@ int arch_skip_callchain_idx(struct thread *thread, struct ip_callchain *chain) return skip_slot; } - rc = check_return_addr(dso, ip); + rc = check_return_addr(dso, al.map->start, ip); - pr_debug("DSO %s, nr %" PRIx64 ", ip 0x%" PRIx64 "rc %d\n", - dso->long_name, chain->nr, ip, rc); + pr_debug("[DSO %s, sym %s, ip 0x%" PRIx64 "] rc %d\n", + dso->long_name, al.sym->name, ip, rc); if (rc == 0) { /* diff --git a/tools/perf/bench/mem-memcpy.c b/tools/perf/bench/mem-memcpy.c index 2465141b554b..6c14afe8c1b1 100644 --- a/tools/perf/bench/mem-memcpy.c +++ b/tools/perf/bench/mem-memcpy.c @@ -13,6 +13,7 @@ #include "../util/cloexec.h" #include "bench.h" #include "mem-memcpy-arch.h" +#include "mem-memset-arch.h" #include <stdio.h> #include <stdlib.h> @@ -48,20 +49,24 @@ static const struct option options[] = { }; typedef void *(*memcpy_t)(void *, const void *, size_t); +typedef void *(*memset_t)(void *, int, size_t); struct routine { const char *name; const char *desc; - memcpy_t fn; + union { + memcpy_t memcpy; + memset_t memset; + } fn; }; -struct routine routines[] = { - { "default", - "Default memcpy() provided by glibc", - memcpy }, +struct routine memcpy_routines[] = { + { .name = "default", + .desc = "Default memcpy() provided by glibc", + .fn.memcpy = memcpy }, #ifdef HAVE_ARCH_X86_64_SUPPORT -#define MEMCPY_FN(fn, name, desc) { name, desc, fn }, +#define MEMCPY_FN(_fn, _name, _desc) {.name = _name, .desc = _desc, .fn.memcpy = _fn}, #include "mem-memcpy-x86-64-asm-def.h" #undef MEMCPY_FN @@ -69,7 +74,7 @@ struct routine routines[] = { { NULL, NULL, - NULL } + {NULL} } }; static const char * const bench_mem_memcpy_usage[] = { @@ -110,63 +115,6 @@ static double timeval2double(struct timeval *ts) (double)ts->tv_usec / (double)1000000; } -static void alloc_mem(void **dst, void **src, size_t length) -{ - *dst = zalloc(length); - if (!*dst) - die("memory allocation failed - maybe length is too large?\n"); - - *src = zalloc(length); - if (!*src) - die("memory allocation failed - maybe length is too large?\n"); - /* Make sure to always replace the zero pages even if MMAP_THRESH is crossed */ - memset(*src, 0, length); -} - -static u64 do_memcpy_cycle(memcpy_t fn, size_t len, bool prefault) -{ - u64 cycle_start = 0ULL, cycle_end = 0ULL; - void *src = NULL, *dst = NULL; - int i; - - alloc_mem(&src, &dst, len); - - if (prefault) - fn(dst, src, len); - - cycle_start = get_cycle(); - for (i = 0; i < iterations; ++i) - fn(dst, src, len); - cycle_end = get_cycle(); - - free(src); - free(dst); - return cycle_end - cycle_start; -} - -static double do_memcpy_gettimeofday(memcpy_t fn, size_t len, bool prefault) -{ - struct timeval tv_start, tv_end, tv_diff; - void *src = NULL, *dst = NULL; - int i; - - alloc_mem(&src, &dst, len); - - if (prefault) - fn(dst, src, len); - - BUG_ON(gettimeofday(&tv_start, NULL)); - for (i = 0; i < iterations; ++i) - fn(dst, src, len); - BUG_ON(gettimeofday(&tv_end, NULL)); - - timersub(&tv_end, &tv_start, &tv_diff); - - free(src); - free(dst); - return (double)((double)len / timeval2double(&tv_diff)); -} - #define pf (no_prefault ? 0 : 1) #define print_bps(x) do { \ @@ -180,16 +128,25 @@ static double do_memcpy_gettimeofday(memcpy_t fn, size_t len, bool prefault) printf(" %14lf GB/Sec", x / K / K / K); \ } while (0) -int bench_mem_memcpy(int argc, const char **argv, - const char *prefix __maybe_unused) +struct bench_mem_info { + const struct routine *routines; + u64 (*do_cycle)(const struct routine *r, size_t len, bool prefault); + double (*do_gettimeofday)(const struct routine *r, size_t len, bool prefault); + const char *const *usage; +}; + +static int bench_mem_common(int argc, const char **argv, + const char *prefix __maybe_unused, + struct bench_mem_info *info) { int i; size_t len; + double totallen; double result_bps[2]; u64 result_cycle[2]; argc = parse_options(argc, argv, options, - bench_mem_memcpy_usage, 0); + info->usage, 0); if (no_prefault && only_prefault) { fprintf(stderr, "Invalid options: -o and -n are mutually exclusive\n"); @@ -200,6 +157,7 @@ int bench_mem_memcpy(int argc, const char **argv, init_cycle(); len = (size_t)perf_atoll((char *)length_str); + totallen = (double)len * iterations; result_cycle[0] = result_cycle[1] = 0ULL; result_bps[0] = result_bps[1] = 0.0; @@ -213,16 +171,16 @@ int bench_mem_memcpy(int argc, const char **argv, if (only_prefault && no_prefault) only_prefault = no_prefault = false; - for (i = 0; routines[i].name; i++) { - if (!strcmp(routines[i].name, routine)) + for (i = 0; info->routines[i].name; i++) { + if (!strcmp(info->routines[i].name, routine)) break; } - if (!routines[i].name) { + if (!info->routines[i].name) { printf("Unknown routine:%s\n", routine); printf("Available routines...\n"); - for (i = 0; routines[i].name; i++) { + for (i = 0; info->routines[i].name; i++) { printf("\t%s ... %s\n", - routines[i].name, routines[i].desc); + info->routines[i].name, info->routines[i].desc); } return 1; } @@ -234,25 +192,25 @@ int bench_mem_memcpy(int argc, const char **argv, /* show both of results */ if (use_cycle) { result_cycle[0] = - do_memcpy_cycle(routines[i].fn, len, false); + info->do_cycle(&info->routines[i], len, false); result_cycle[1] = - do_memcpy_cycle(routines[i].fn, len, true); + info->do_cycle(&info->routines[i], len, true); } else { result_bps[0] = - do_memcpy_gettimeofday(routines[i].fn, + info->do_gettimeofday(&info->routines[i], len, false); result_bps[1] = - do_memcpy_gettimeofday(routines[i].fn, + info->do_gettimeofday(&info->routines[i], len, true); } } else { if (use_cycle) { result_cycle[pf] = - do_memcpy_cycle(routines[i].fn, + info->do_cycle(&info->routines[i], len, only_prefault); } else { result_bps[pf] = - do_memcpy_gettimeofday(routines[i].fn, + info->do_gettimeofday(&info->routines[i], len, only_prefault); } } @@ -263,10 +221,10 @@ int bench_mem_memcpy(int argc, const char **argv, if (use_cycle) { printf(" %14lf Cycle/Byte\n", (double)result_cycle[0] - / (double)len); + / totallen); printf(" %14lf Cycle/Byte (with prefault)\n", (double)result_cycle[1] - / (double)len); + / totallen); } else { print_bps(result_bps[0]); printf("\n"); @@ -277,7 +235,7 @@ int bench_mem_memcpy(int argc, const char **argv, if (use_cycle) { printf(" %14lf Cycle/Byte", (double)result_cycle[pf] - / (double)len); + / totallen); } else print_bps(result_bps[pf]); @@ -288,8 +246,8 @@ int bench_mem_memcpy(int argc, const char **argv, if (!only_prefault && !no_prefault) { if (use_cycle) { printf("%lf %lf\n", - (double)result_cycle[0] / (double)len, - (double)result_cycle[1] / (double)len); + (double)result_cycle[0] / totallen, + (double)result_cycle[1] / totallen); } else { printf("%lf %lf\n", result_bps[0], result_bps[1]); @@ -297,7 +255,7 @@ int bench_mem_memcpy(int argc, const char **argv, } else { if (use_cycle) { printf("%lf\n", (double)result_cycle[pf] - / (double)len); + / totallen); } else printf("%lf\n", result_bps[pf]); } @@ -310,3 +268,163 @@ int bench_mem_memcpy(int argc, const char **argv, return 0; } + +static void memcpy_alloc_mem(void **dst, void **src, size_t length) +{ + *dst = zalloc(length); + if (!*dst) + die("memory allocation failed - maybe length is too large?\n"); + + *src = zalloc(length); + if (!*src) + die("memory allocation failed - maybe length is too large?\n"); + /* Make sure to always replace the zero pages even if MMAP_THRESH is crossed */ + memset(*src, 0, length); +} + +static u64 do_memcpy_cycle(const struct routine *r, size_t len, bool prefault) +{ + u64 cycle_start = 0ULL, cycle_end = 0ULL; + void *src = NULL, *dst = NULL; + memcpy_t fn = r->fn.memcpy; + int i; + + memcpy_alloc_mem(&src, &dst, len); + + if (prefault) + fn(dst, src, len); + + cycle_start = get_cycle(); + for (i = 0; i < iterations; ++i) + fn(dst, src, len); + cycle_end = get_cycle(); + + free(src); + free(dst); + return cycle_end - cycle_start; +} + +static double do_memcpy_gettimeofday(const struct routine *r, size_t len, + bool prefault) +{ + struct timeval tv_start, tv_end, tv_diff; + memcpy_t fn = r->fn.memcpy; + void *src = NULL, *dst = NULL; + int i; + + memcpy_alloc_mem(&src, &dst, len); + + if (prefault) + fn(dst, src, len); + + BUG_ON(gettimeofday(&tv_start, NULL)); + for (i = 0; i < iterations; ++i) + fn(dst, src, len); + BUG_ON(gettimeofday(&tv_end, NULL)); + + timersub(&tv_end, &tv_start, &tv_diff); + + free(src); + free(dst); + return (double)(((double)len * iterations) / timeval2double(&tv_diff)); +} + +int bench_mem_memcpy(int argc, const char **argv, + const char *prefix __maybe_unused) +{ + struct bench_mem_info info = { + .routines = memcpy_routines, + .do_cycle = do_memcpy_cycle, + .do_gettimeofday = do_memcpy_gettimeofday, + .usage = bench_mem_memcpy_usage, + }; + + return bench_mem_common(argc, argv, prefix, &info); +} + +static void memset_alloc_mem(void **dst, size_t length) +{ + *dst = zalloc(length); + if (!*dst) + die("memory allocation failed - maybe length is too large?\n"); +} + +static u64 do_memset_cycle(const struct routine *r, size_t len, bool prefault) +{ + u64 cycle_start = 0ULL, cycle_end = 0ULL; + memset_t fn = r->fn.memset; + void *dst = NULL; + int i; + + memset_alloc_mem(&dst, len); + + if (prefault) + fn(dst, -1, len); + + cycle_start = get_cycle(); + for (i = 0; i < iterations; ++i) + fn(dst, i, len); + cycle_end = get_cycle(); + + free(dst); + return cycle_end - cycle_start; +} + +static double do_memset_gettimeofday(const struct routine *r, size_t len, + bool prefault) +{ + struct timeval tv_start, tv_end, tv_diff; + memset_t fn = r->fn.memset; + void *dst = NULL; + int i; + + memset_alloc_mem(&dst, len); + + if (prefault) + fn(dst, -1, len); + + BUG_ON(gettimeofday(&tv_start, NULL)); + for (i = 0; i < iterations; ++i) + fn(dst, i, len); + BUG_ON(gettimeofday(&tv_end, NULL)); + + timersub(&tv_end, &tv_start, &tv_diff); + + free(dst); + return (double)(((double)len * iterations) / timeval2double(&tv_diff)); +} + +static const char * const bench_mem_memset_usage[] = { + "perf bench mem memset <options>", + NULL +}; + +static const struct routine memset_routines[] = { + { .name ="default", + .desc = "Default memset() provided by glibc", + .fn.memset = memset }, +#ifdef HAVE_ARCH_X86_64_SUPPORT + +#define MEMSET_FN(_fn, _name, _desc) { .name = _name, .desc = _desc, .fn.memset = _fn }, +#include "mem-memset-x86-64-asm-def.h" +#undef MEMSET_FN + +#endif + + { .name = NULL, + .desc = NULL, + .fn.memset = NULL } +}; + +int bench_mem_memset(int argc, const char **argv, + const char *prefix __maybe_unused) +{ + struct bench_mem_info info = { + .routines = memset_routines, + .do_cycle = do_memset_cycle, + .do_gettimeofday = do_memset_gettimeofday, + .usage = bench_mem_memset_usage, + }; + + return bench_mem_common(argc, argv, prefix, &info); +} diff --git a/tools/perf/bench/mem-memset.c b/tools/perf/bench/mem-memset.c deleted file mode 100644 index 75fc3e65fb2a..000000000000 --- a/tools/perf/bench/mem-memset.c +++ /dev/null @@ -1,304 +0,0 @@ -/* - * mem-memset.c - * - * memset: Simple memory set in various ways - * - * Trivial clone of mem-memcpy.c. - */ - -#include "../perf.h" -#include "../util/util.h" -#include "../util/parse-options.h" -#include "../util/header.h" -#include "../util/cloexec.h" -#include "bench.h" -#include "mem-memset-arch.h" - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <sys/time.h> -#include <errno.h> - -#define K 1024 - -static const char *length_str = "1MB"; -static const char *routine = "default"; -static int iterations = 1; -static bool use_cycle; -static int cycle_fd; -static bool only_prefault; -static bool no_prefault; - -static const struct option options[] = { - OPT_STRING('l', "length", &length_str, "1MB", - "Specify length of memory to set. " - "Available units: B, KB, MB, GB and TB (upper and lower)"), - OPT_STRING('r', "routine", &routine, "default", - "Specify routine to set"), - OPT_INTEGER('i', "iterations", &iterations, - "repeat memset() invocation this number of times"), - OPT_BOOLEAN('c', "cycle", &use_cycle, - "Use cycles event instead of gettimeofday() for measuring"), - OPT_BOOLEAN('o', "only-prefault", &only_prefault, - "Show only the result with page faults before memset()"), - OPT_BOOLEAN('n', "no-prefault", &no_prefault, - "Show only the result without page faults before memset()"), - OPT_END() -}; - -typedef void *(*memset_t)(void *, int, size_t); - -struct routine { - const char *name; - const char *desc; - memset_t fn; -}; - -static const struct routine routines[] = { - { "default", - "Default memset() provided by glibc", - memset }, -#ifdef HAVE_ARCH_X86_64_SUPPORT - -#define MEMSET_FN(fn, name, desc) { name, desc, fn }, -#include "mem-memset-x86-64-asm-def.h" -#undef MEMSET_FN - -#endif - - { NULL, - NULL, - NULL } -}; - -static const char * const bench_mem_memset_usage[] = { - "perf bench mem memset <options>", - NULL -}; - -static struct perf_event_attr cycle_attr = { - .type = PERF_TYPE_HARDWARE, - .config = PERF_COUNT_HW_CPU_CYCLES -}; - -static void init_cycle(void) -{ - cycle_fd = sys_perf_event_open(&cycle_attr, getpid(), -1, -1, - perf_event_open_cloexec_flag()); - - if (cycle_fd < 0 && errno == ENOSYS) - die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); - else - BUG_ON(cycle_fd < 0); -} - -static u64 get_cycle(void) -{ - int ret; - u64 clk; - - ret = read(cycle_fd, &clk, sizeof(u64)); - BUG_ON(ret != sizeof(u64)); - - return clk; -} - -static double timeval2double(struct timeval *ts) -{ - return (double)ts->tv_sec + - (double)ts->tv_usec / (double)1000000; -} - -static void alloc_mem(void **dst, size_t length) -{ - *dst = zalloc(length); - if (!*dst) - die("memory allocation failed - maybe length is too large?\n"); -} - -static u64 do_memset_cycle(memset_t fn, size_t len, bool prefault) -{ - u64 cycle_start = 0ULL, cycle_end = 0ULL; - void *dst = NULL; - int i; - - alloc_mem(&dst, len); - - if (prefault) - fn(dst, -1, len); - - cycle_start = get_cycle(); - for (i = 0; i < iterations; ++i) - fn(dst, i, len); - cycle_end = get_cycle(); - - free(dst); - return cycle_end - cycle_start; -} - -static double do_memset_gettimeofday(memset_t fn, size_t len, bool prefault) -{ - struct timeval tv_start, tv_end, tv_diff; - void *dst = NULL; - int i; - - alloc_mem(&dst, len); - - if (prefault) - fn(dst, -1, len); - - BUG_ON(gettimeofday(&tv_start, NULL)); - for (i = 0; i < iterations; ++i) - fn(dst, i, len); - BUG_ON(gettimeofday(&tv_end, NULL)); - - timersub(&tv_end, &tv_start, &tv_diff); - - free(dst); - return (double)((double)len / timeval2double(&tv_diff)); -} - -#define pf (no_prefault ? 0 : 1) - -#define print_bps(x) do { \ - if (x < K) \ - printf(" %14lf B/Sec", x); \ - else if (x < K * K) \ - printf(" %14lfd KB/Sec", x / K); \ - else if (x < K * K * K) \ - printf(" %14lf MB/Sec", x / K / K); \ - else \ - printf(" %14lf GB/Sec", x / K / K / K); \ - } while (0) - -int bench_mem_memset(int argc, const char **argv, - const char *prefix __maybe_unused) -{ - int i; - size_t len; - double result_bps[2]; - u64 result_cycle[2]; - - argc = parse_options(argc, argv, options, - bench_mem_memset_usage, 0); - - if (no_prefault && only_prefault) { - fprintf(stderr, "Invalid options: -o and -n are mutually exclusive\n"); - return 1; - } - - if (use_cycle) - init_cycle(); - - len = (size_t)perf_atoll((char *)length_str); - - result_cycle[0] = result_cycle[1] = 0ULL; - result_bps[0] = result_bps[1] = 0.0; - - if ((s64)len <= 0) { - fprintf(stderr, "Invalid length:%s\n", length_str); - return 1; - } - - /* same to without specifying either of prefault and no-prefault */ - if (only_prefault && no_prefault) - only_prefault = no_prefault = false; - - for (i = 0; routines[i].name; i++) { - if (!strcmp(routines[i].name, routine)) - break; - } - if (!routines[i].name) { - printf("Unknown routine:%s\n", routine); - printf("Available routines...\n"); - for (i = 0; routines[i].name; i++) { - printf("\t%s ... %s\n", - routines[i].name, routines[i].desc); - } - return 1; - } - - if (bench_format == BENCH_FORMAT_DEFAULT) - printf("# Copying %s Bytes ...\n\n", length_str); - - if (!only_prefault && !no_prefault) { - /* show both of results */ - if (use_cycle) { - result_cycle[0] = - do_memset_cycle(routines[i].fn, len, false); - result_cycle[1] = - do_memset_cycle(routines[i].fn, len, true); - } else { - result_bps[0] = - do_memset_gettimeofday(routines[i].fn, - len, false); - result_bps[1] = - do_memset_gettimeofday(routines[i].fn, - len, true); - } - } else { - if (use_cycle) { - result_cycle[pf] = - do_memset_cycle(routines[i].fn, - len, only_prefault); - } else { - result_bps[pf] = - do_memset_gettimeofday(routines[i].fn, - len, only_prefault); - } - } - - switch (bench_format) { - case BENCH_FORMAT_DEFAULT: - if (!only_prefault && !no_prefault) { - if (use_cycle) { - printf(" %14lf Cycle/Byte\n", - (double)result_cycle[0] - / (double)len); - printf(" %14lf Cycle/Byte (with prefault)\n ", - (double)result_cycle[1] - / (double)len); - } else { - print_bps(result_bps[0]); - printf("\n"); - print_bps(result_bps[1]); - printf(" (with prefault)\n"); - } - } else { - if (use_cycle) { - printf(" %14lf Cycle/Byte", - (double)result_cycle[pf] - / (double)len); - } else - print_bps(result_bps[pf]); - - printf("%s\n", only_prefault ? " (with prefault)" : ""); - } - break; - case BENCH_FORMAT_SIMPLE: - if (!only_prefault && !no_prefault) { - if (use_cycle) { - printf("%lf %lf\n", - (double)result_cycle[0] / (double)len, - (double)result_cycle[1] / (double)len); - } else { - printf("%lf %lf\n", - result_bps[0], result_bps[1]); - } - } else { - if (use_cycle) { - printf("%lf\n", (double)result_cycle[pf] - / (double)len); - } else - printf("%lf\n", result_bps[pf]); - } - break; - default: - /* reaching this means there's some disaster: */ - die("unknown format: %d\n", bench_format); - break; - } - - return 0; -} diff --git a/tools/perf/bench/sched-pipe.c b/tools/perf/bench/sched-pipe.c index 07a8d7646a15..005cc283790c 100644 --- a/tools/perf/bench/sched-pipe.c +++ b/tools/perf/bench/sched-pipe.c @@ -19,12 +19,12 @@ #include <stdlib.h> #include <signal.h> #include <sys/wait.h> -#include <linux/unistd.h> #include <string.h> #include <errno.h> #include <assert.h> #include <sys/time.h> #include <sys/types.h> +#include <sys/syscall.h> #include <pthread.h> diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index e7417fe97a97..747f86103599 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -232,7 +232,7 @@ static int __cmd_annotate(struct perf_annotate *ann) if (nr_samples > 0) { total_nr_samples += nr_samples; hists__collapse_resort(hists, NULL); - hists__output_resort(hists); + hists__output_resort(hists, NULL); if (symbol_conf.event_group && !perf_evsel__is_group_leader(pos)) diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c index 70385756da63..50e6b66aea1f 100644 --- a/tools/perf/builtin-buildid-cache.c +++ b/tools/perf/builtin-buildid-cache.c @@ -236,10 +236,10 @@ static bool dso__missing_buildid_cache(struct dso *dso, int parm __maybe_unused) if (errno == ENOENT) return false; - pr_warning("Problems with %s file, consider removing it from the cache\n", + pr_warning("Problems with %s file, consider removing it from the cache\n", filename); } else if (memcmp(dso->build_id, build_id, sizeof(dso->build_id))) { - pr_warning("Problems with %s file, consider removing it from the cache\n", + pr_warning("Problems with %s file, consider removing it from the cache\n", filename); } @@ -285,12 +285,11 @@ int cmd_buildid_cache(int argc, const char **argv, struct str_node *pos; int ret = 0; bool force = false; - char debugdir[PATH_MAX]; char const *add_name_list_str = NULL, *remove_name_list_str = NULL, *missing_filename = NULL, *update_name_list_str = NULL, - *kcore_filename; + *kcore_filename = NULL; char sbuf[STRERR_BUFSIZE]; struct perf_data_file file = { @@ -335,13 +334,11 @@ int cmd_buildid_cache(int argc, const char **argv, setup_pager(); - snprintf(debugdir, sizeof(debugdir), "%s", buildid_dir); - if (add_name_list_str) { list = strlist__new(true, add_name_list_str); if (list) { strlist__for_each(pos, list) - if (build_id_cache__add_file(pos->s, debugdir)) { + if (build_id_cache__add_file(pos->s, buildid_dir)) { if (errno == EEXIST) { pr_debug("%s already in the cache\n", pos->s); @@ -359,7 +356,7 @@ int cmd_buildid_cache(int argc, const char **argv, list = strlist__new(true, remove_name_list_str); if (list) { strlist__for_each(pos, list) - if (build_id_cache__remove_file(pos->s, debugdir)) { + if (build_id_cache__remove_file(pos->s, buildid_dir)) { if (errno == ENOENT) { pr_debug("%s wasn't in the cache\n", pos->s); @@ -380,7 +377,7 @@ int cmd_buildid_cache(int argc, const char **argv, list = strlist__new(true, update_name_list_str); if (list) { strlist__for_each(pos, list) - if (build_id_cache__update_file(pos->s, debugdir)) { + if (build_id_cache__update_file(pos->s, buildid_dir)) { if (errno == ENOENT) { pr_debug("%s wasn't in the cache\n", pos->s); @@ -395,7 +392,7 @@ int cmd_buildid_cache(int argc, const char **argv, } if (kcore_filename && - build_id_cache__add_kcore(kcore_filename, debugdir, force)) + build_id_cache__add_kcore(kcore_filename, buildid_dir, force)) pr_warning("Couldn't add %s\n", kcore_filename); out: diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index 1ce425d101a9..74aada554b12 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -390,6 +390,15 @@ static void perf_evlist__collapse_resort(struct perf_evlist *evlist) } } +static struct data__file *fmt_to_data_file(struct perf_hpp_fmt *fmt) +{ + struct diff_hpp_fmt *dfmt = container_of(fmt, struct diff_hpp_fmt, fmt); + void *ptr = dfmt - dfmt->idx; + struct data__file *d = container_of(ptr, struct data__file, fmt); + + return d; +} + static struct hist_entry* get_pair_data(struct hist_entry *he, struct data__file *d) { @@ -407,8 +416,7 @@ get_pair_data(struct hist_entry *he, struct data__file *d) static struct hist_entry* get_pair_fmt(struct hist_entry *he, struct diff_hpp_fmt *dfmt) { - void *ptr = dfmt - dfmt->idx; - struct data__file *d = container_of(ptr, struct data__file, fmt); + struct data__file *d = fmt_to_data_file(&dfmt->fmt); return get_pair_data(he, d); } @@ -430,7 +438,7 @@ static void hists__baseline_only(struct hists *hists) next = rb_next(&he->rb_node_in); if (!hist_entry__next_pair(he)) { rb_erase(&he->rb_node_in, root); - hist_entry__free(he); + hist_entry__delete(he); } } } @@ -448,26 +456,30 @@ static void hists__precompute(struct hists *hists) next = rb_first(root); while (next != NULL) { struct hist_entry *he, *pair; + struct data__file *d; + int i; he = rb_entry(next, struct hist_entry, rb_node_in); next = rb_next(&he->rb_node_in); - pair = get_pair_data(he, &data__files[sort_compute]); - if (!pair) - continue; + data__for_each_file_new(i, d) { + pair = get_pair_data(he, d); + if (!pair) + continue; - switch (compute) { - case COMPUTE_DELTA: - compute_delta(he, pair); - break; - case COMPUTE_RATIO: - compute_ratio(he, pair); - break; - case COMPUTE_WEIGHTED_DIFF: - compute_wdiff(he, pair); - break; - default: - BUG_ON(1); + switch (compute) { + case COMPUTE_DELTA: + compute_delta(he, pair); + break; + case COMPUTE_RATIO: + compute_ratio(he, pair); + break; + case COMPUTE_WEIGHTED_DIFF: + compute_wdiff(he, pair); + break; + default: + BUG_ON(1); + } } } } @@ -517,7 +529,7 @@ __hist_entry__cmp_compute(struct hist_entry *left, struct hist_entry *right, static int64_t hist_entry__cmp_compute(struct hist_entry *left, struct hist_entry *right, - int c) + int c, int sort_idx) { bool pairs_left = hist_entry__has_pairs(left); bool pairs_right = hist_entry__has_pairs(right); @@ -529,8 +541,8 @@ hist_entry__cmp_compute(struct hist_entry *left, struct hist_entry *right, if (!pairs_left || !pairs_right) return pairs_left ? -1 : 1; - p_left = get_pair_data(left, &data__files[sort_compute]); - p_right = get_pair_data(right, &data__files[sort_compute]); + p_left = get_pair_data(left, &data__files[sort_idx]); + p_right = get_pair_data(right, &data__files[sort_idx]); if (!p_left && !p_right) return 0; @@ -545,55 +557,103 @@ hist_entry__cmp_compute(struct hist_entry *left, struct hist_entry *right, return __hist_entry__cmp_compute(p_left, p_right, c); } -static void insert_hist_entry_by_compute(struct rb_root *root, - struct hist_entry *he, - int c) +static int64_t +hist_entry__cmp_compute_idx(struct hist_entry *left, struct hist_entry *right, + int c, int sort_idx) { - struct rb_node **p = &root->rb_node; - struct rb_node *parent = NULL; - struct hist_entry *iter; - - while (*p != NULL) { - parent = *p; - iter = rb_entry(parent, struct hist_entry, rb_node); - if (hist_entry__cmp_compute(he, iter, c) < 0) - p = &(*p)->rb_left; - else - p = &(*p)->rb_right; + struct hist_entry *p_right, *p_left; + + p_left = get_pair_data(left, &data__files[sort_idx]); + p_right = get_pair_data(right, &data__files[sort_idx]); + + if (!p_left && !p_right) + return 0; + + if (!p_left || !p_right) + return p_left ? -1 : 1; + + if (c != COMPUTE_DELTA) { + /* + * The delta can be computed without the baseline, but + * others are not. Put those entries which have no + * values below. + */ + if (left->dummy && right->dummy) + return 0; + + if (left->dummy || right->dummy) + return left->dummy ? 1 : -1; } - rb_link_node(&he->rb_node, parent, p); - rb_insert_color(&he->rb_node, root); + return __hist_entry__cmp_compute(p_left, p_right, c); } -static void hists__compute_resort(struct hists *hists) +static int64_t +hist_entry__cmp_nop(struct perf_hpp_fmt *fmt __maybe_unused, + struct hist_entry *left __maybe_unused, + struct hist_entry *right __maybe_unused) { - struct rb_root *root; - struct rb_node *next; + return 0; +} - if (sort__need_collapse) - root = &hists->entries_collapsed; - else - root = hists->entries_in; +static int64_t +hist_entry__cmp_baseline(struct perf_hpp_fmt *fmt __maybe_unused, + struct hist_entry *left, struct hist_entry *right) +{ + if (left->stat.period == right->stat.period) + return 0; + return left->stat.period > right->stat.period ? 1 : -1; +} - hists->entries = RB_ROOT; - next = rb_first(root); +static int64_t +hist_entry__cmp_delta(struct perf_hpp_fmt *fmt, + struct hist_entry *left, struct hist_entry *right) +{ + struct data__file *d = fmt_to_data_file(fmt); - hists__reset_stats(hists); - hists__reset_col_len(hists); + return hist_entry__cmp_compute(right, left, COMPUTE_DELTA, d->idx); +} - while (next != NULL) { - struct hist_entry *he; +static int64_t +hist_entry__cmp_ratio(struct perf_hpp_fmt *fmt, + struct hist_entry *left, struct hist_entry *right) +{ + struct data__file *d = fmt_to_data_file(fmt); - he = rb_entry(next, struct hist_entry, rb_node_in); - next = rb_next(&he->rb_node_in); + return hist_entry__cmp_compute(right, left, COMPUTE_RATIO, d->idx); +} + +static int64_t +hist_entry__cmp_wdiff(struct perf_hpp_fmt *fmt, + struct hist_entry *left, struct hist_entry *right) +{ + struct data__file *d = fmt_to_data_file(fmt); - insert_hist_entry_by_compute(&hists->entries, he, compute); - hists__inc_stats(hists, he); + return hist_entry__cmp_compute(right, left, COMPUTE_WEIGHTED_DIFF, d->idx); +} - if (!he->filtered) - hists__calc_col_len(hists, he); - } +static int64_t +hist_entry__cmp_delta_idx(struct perf_hpp_fmt *fmt __maybe_unused, + struct hist_entry *left, struct hist_entry *right) +{ + return hist_entry__cmp_compute_idx(right, left, COMPUTE_DELTA, + sort_compute); +} + +static int64_t +hist_entry__cmp_ratio_idx(struct perf_hpp_fmt *fmt __maybe_unused, + struct hist_entry *left, struct hist_entry *right) +{ + return hist_entry__cmp_compute_idx(right, left, COMPUTE_RATIO, + sort_compute); +} + +static int64_t +hist_entry__cmp_wdiff_idx(struct perf_hpp_fmt *fmt __maybe_unused, + struct hist_entry *left, struct hist_entry *right) +{ + return hist_entry__cmp_compute_idx(right, left, COMPUTE_WEIGHTED_DIFF, + sort_compute); } static void hists__process(struct hists *hists) @@ -601,12 +661,8 @@ static void hists__process(struct hists *hists) if (show_baseline_only) hists__baseline_only(hists); - if (sort_compute) { - hists__precompute(hists); - hists__compute_resort(hists); - } else { - hists__output_resort(hists); - } + hists__precompute(hists); + hists__output_resort(hists, NULL); hists__fprintf(hists, true, 0, 0, 0, stdout); } @@ -805,7 +861,7 @@ static int __hpp__color_compare(struct perf_hpp_fmt *fmt, char pfmt[20] = " "; if (!pair) - goto dummy_print; + goto no_print; switch (comparison_method) { case COMPUTE_DELTA: @@ -814,8 +870,6 @@ static int __hpp__color_compare(struct perf_hpp_fmt *fmt, else diff = compute_delta(he, pair); - if (fabs(diff) < 0.01) - goto dummy_print; scnprintf(pfmt, 20, "%%%+d.2f%%%%", dfmt->header_width - 1); return percent_color_snprintf(hpp->buf, hpp->size, pfmt, diff); @@ -847,6 +901,9 @@ static int __hpp__color_compare(struct perf_hpp_fmt *fmt, } dummy_print: return scnprintf(hpp->buf, hpp->size, "%*s", + dfmt->header_width, "N/A"); +no_print: + return scnprintf(hpp->buf, hpp->size, "%*s", dfmt->header_width, pfmt); } @@ -896,14 +953,15 @@ hpp__entry_pair(struct hist_entry *he, struct hist_entry *pair, else diff = compute_delta(he, pair); - if (fabs(diff) >= 0.01) - scnprintf(buf, size, "%+4.2F%%", diff); + scnprintf(buf, size, "%+4.2F%%", diff); break; case PERF_HPP_DIFF__RATIO: /* No point for ratio number if we are dummy.. */ - if (he->dummy) + if (he->dummy) { + scnprintf(buf, size, "N/A"); break; + } if (pair->diff.computed) ratio = pair->diff.period_ratio; @@ -916,8 +974,10 @@ hpp__entry_pair(struct hist_entry *he, struct hist_entry *pair, case PERF_HPP_DIFF__WEIGHTED_DIFF: /* No point for wdiff number if we are dummy.. */ - if (he->dummy) + if (he->dummy) { + scnprintf(buf, size, "N/A"); break; + } if (pair->diff.computed) wdiff = pair->diff.wdiff; @@ -1038,32 +1098,41 @@ static void data__hpp_register(struct data__file *d, int idx) fmt->header = hpp__header; fmt->width = hpp__width; fmt->entry = hpp__entry_global; + fmt->cmp = hist_entry__cmp_nop; + fmt->collapse = hist_entry__cmp_nop; /* TODO more colors */ switch (idx) { case PERF_HPP_DIFF__BASELINE: fmt->color = hpp__color_baseline; + fmt->sort = hist_entry__cmp_baseline; break; case PERF_HPP_DIFF__DELTA: fmt->color = hpp__color_delta; + fmt->sort = hist_entry__cmp_delta; break; case PERF_HPP_DIFF__RATIO: fmt->color = hpp__color_ratio; + fmt->sort = hist_entry__cmp_ratio; break; case PERF_HPP_DIFF__WEIGHTED_DIFF: fmt->color = hpp__color_wdiff; + fmt->sort = hist_entry__cmp_wdiff; break; default: + fmt->sort = hist_entry__cmp_nop; break; } init_header(d, dfmt); perf_hpp__column_register(fmt); + perf_hpp__register_sort_field(fmt); } -static void ui_init(void) +static int ui_init(void) { struct data__file *d; + struct perf_hpp_fmt *fmt; int i; data__for_each_file(i, d) { @@ -1093,6 +1162,46 @@ static void ui_init(void) data__hpp_register(d, i ? PERF_HPP_DIFF__PERIOD : PERF_HPP_DIFF__PERIOD_BASELINE); } + + if (!sort_compute) + return 0; + + /* + * Prepend an fmt to sort on columns at 'sort_compute' first. + * This fmt is added only to the sort list but not to the + * output fields list. + * + * Note that this column (data) can be compared twice - one + * for this 'sort_compute' fmt and another for the normal + * diff_hpp_fmt. But it shouldn't a problem as most entries + * will be sorted out by first try or baseline and comparing + * is not a costly operation. + */ + fmt = zalloc(sizeof(*fmt)); + if (fmt == NULL) { + pr_err("Memory allocation failed\n"); + return -1; + } + + fmt->cmp = hist_entry__cmp_nop; + fmt->collapse = hist_entry__cmp_nop; + + switch (compute) { + case COMPUTE_DELTA: + fmt->sort = hist_entry__cmp_delta_idx; + break; + case COMPUTE_RATIO: + fmt->sort = hist_entry__cmp_ratio_idx; + break; + case COMPUTE_WEIGHTED_DIFF: + fmt->sort = hist_entry__cmp_wdiff_idx; + break; + default: + BUG_ON(1); + } + + list_add(&fmt->sort_list, &perf_hpp__sort_list); + return 0; } static int data_init(int argc, const char **argv) @@ -1158,7 +1267,8 @@ int cmd_diff(int argc, const char **argv, const char *prefix __maybe_unused) if (data_init(argc, argv) < 0) return -1; - ui_init(); + if (ui_init() < 0) + return -1; sort__mode = SORT_MODE__DIFF; diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 3c0f3d4fb021..0894a817f67e 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -1293,7 +1293,8 @@ static int kvm_events_live(struct perf_kvm_stat *kvm, OPT_UINTEGER('d', "display", &kvm->display_time, "time in seconds between display updates"), OPT_STRING(0, "event", &kvm->report_event, "report event", - "event for reporting: vmexit, mmio, ioport"), + "event for reporting: " + "vmexit, mmio (x86 only), ioport (x86 only)"), OPT_INTEGER(0, "vcpu", &kvm->trace_vcpu, "vcpu id to report"), OPT_STRING('k', "key", &kvm->sort_key, "sort-key", diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c index 011195e38f21..198f3c3aff95 100644 --- a/tools/perf/builtin-list.c +++ b/tools/perf/builtin-list.c @@ -19,7 +19,9 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) { int i; - const struct option list_options[] = { + bool raw_dump = false; + struct option list_options[] = { + OPT_BOOLEAN(0, "raw-dump", &raw_dump, "Dump raw events"), OPT_END() }; const char * const list_usage[] = { @@ -27,11 +29,18 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) NULL }; + set_option_flag(list_options, 0, "raw-dump", PARSE_OPT_HIDDEN); + argc = parse_options(argc, argv, list_options, list_usage, PARSE_OPT_STOP_AT_NON_OPTION); setup_pager(); + if (raw_dump) { + print_events(NULL, true); + return 0; + } + if (argc == 0) { print_events(NULL, false); return 0; @@ -53,8 +62,6 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) print_hwcache_events(NULL, false); else if (strcmp(argv[i], "pmu") == 0) print_pmu_events(NULL, false); - else if (strcmp(argv[i], "--raw-dump") == 0) - print_events(NULL, true); else { char *sep = strchr(argv[i], ':'), *s; int sep_idx; diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index 24db6ffe2957..9b5663950a4d 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -7,44 +7,47 @@ #include "util/session.h" #include "util/data.h" -#define MEM_OPERATION_LOAD "load" -#define MEM_OPERATION_STORE "store" - -static const char *mem_operation = MEM_OPERATION_LOAD; +#define MEM_OPERATION_LOAD 0x1 +#define MEM_OPERATION_STORE 0x2 struct perf_mem { struct perf_tool tool; char const *input_name; bool hide_unresolved; bool dump_raw; + int operation; const char *cpu_list; DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); }; -static int __cmd_record(int argc, const char **argv) +static int __cmd_record(int argc, const char **argv, struct perf_mem *mem) { int rec_argc, i = 0, j; const char **rec_argv; - char event[64]; int ret; - rec_argc = argc + 4; + rec_argc = argc + 7; /* max number of arguments */ rec_argv = calloc(rec_argc + 1, sizeof(char *)); if (!rec_argv) return -1; - rec_argv[i++] = strdup("record"); - if (!strcmp(mem_operation, MEM_OPERATION_LOAD)) - rec_argv[i++] = strdup("-W"); - rec_argv[i++] = strdup("-d"); - rec_argv[i++] = strdup("-e"); + rec_argv[i++] = "record"; - if (strcmp(mem_operation, MEM_OPERATION_LOAD)) - sprintf(event, "cpu/mem-stores/pp"); - else - sprintf(event, "cpu/mem-loads/pp"); + if (mem->operation & MEM_OPERATION_LOAD) + rec_argv[i++] = "-W"; + + rec_argv[i++] = "-d"; + + if (mem->operation & MEM_OPERATION_LOAD) { + rec_argv[i++] = "-e"; + rec_argv[i++] = "cpu/mem-loads/pp"; + } + + if (mem->operation & MEM_OPERATION_STORE) { + rec_argv[i++] = "-e"; + rec_argv[i++] = "cpu/mem-stores/pp"; + } - rec_argv[i++] = strdup(event); for (j = 1; j < argc; j++, i++) rec_argv[i] = argv[j]; @@ -162,17 +165,17 @@ static int report_events(int argc, const char **argv, struct perf_mem *mem) if (!rep_argv) return -1; - rep_argv[i++] = strdup("report"); - rep_argv[i++] = strdup("--mem-mode"); - rep_argv[i++] = strdup("-n"); /* display number of samples */ + rep_argv[i++] = "report"; + rep_argv[i++] = "--mem-mode"; + rep_argv[i++] = "-n"; /* display number of samples */ /* * there is no weight (cost) associated with stores, so don't print * the column */ - if (strcmp(mem_operation, MEM_OPERATION_LOAD)) - rep_argv[i++] = strdup("--sort=mem,sym,dso,symbol_daddr," - "dso_daddr,tlb,locked"); + if (!(mem->operation & MEM_OPERATION_LOAD)) + rep_argv[i++] = "--sort=mem,sym,dso,symbol_daddr," + "dso_daddr,tlb,locked"; for (j = 1; j < argc; j++, i++) rep_argv[i] = argv[j]; @@ -182,6 +185,75 @@ static int report_events(int argc, const char **argv, struct perf_mem *mem) return ret; } +struct mem_mode { + const char *name; + int mode; +}; + +#define MEM_OPT(n, m) \ + { .name = n, .mode = (m) } + +#define MEM_END { .name = NULL } + +static const struct mem_mode mem_modes[]={ + MEM_OPT("load", MEM_OPERATION_LOAD), + MEM_OPT("store", MEM_OPERATION_STORE), + MEM_END +}; + +static int +parse_mem_ops(const struct option *opt, const char *str, int unset) +{ + int *mode = (int *)opt->value; + const struct mem_mode *m; + char *s, *os = NULL, *p; + int ret = -1; + + if (unset) + return 0; + + /* str may be NULL in case no arg is passed to -t */ + if (str) { + /* because str is read-only */ + s = os = strdup(str); + if (!s) + return -1; + + /* reset mode */ + *mode = 0; + + for (;;) { + p = strchr(s, ','); + if (p) + *p = '\0'; + + for (m = mem_modes; m->name; m++) { + if (!strcasecmp(s, m->name)) + break; + } + if (!m->name) { + fprintf(stderr, "unknown sampling op %s," + " check man page\n", s); + goto error; + } + + *mode |= m->mode; + + if (!p) + break; + + s = p + 1; + } + } + ret = 0; + + if (*mode == 0) + *mode = MEM_OPERATION_LOAD; +error: + free(os); + return ret; +} + int cmd_mem(int argc, const char **argv, const char *prefix __maybe_unused) { struct stat st; @@ -197,10 +269,15 @@ int cmd_mem(int argc, const char **argv, const char *prefix __maybe_unused) .ordered_events = true, }, .input_name = "perf.data", + /* + * default to both load an store sampling + */ + .operation = MEM_OPERATION_LOAD | MEM_OPERATION_STORE, }; const struct option mem_options[] = { - OPT_STRING('t', "type", &mem_operation, - "type", "memory operations(load/store)"), + OPT_CALLBACK('t', "type", &mem.operation, + "type", "memory operations(load,store) Default load,store", + parse_mem_ops), OPT_BOOLEAN('D', "dump-raw-samples", &mem.dump_raw, "dump raw samples in ASCII"), OPT_BOOLEAN('U', "hide-unresolved", &mem.hide_unresolved, @@ -225,7 +302,7 @@ int cmd_mem(int argc, const char **argv, const char *prefix __maybe_unused) argc = parse_options_subcommand(argc, argv, mem_options, mem_subcommands, mem_usage, PARSE_OPT_STOP_AT_NON_OPTION); - if (!argc || !(strncmp(argv[0], "rec", 3) || mem_operation)) + if (!argc || !(strncmp(argv[0], "rec", 3) || mem.operation)) usage_with_options(mem_usage, mem_options); if (!mem.input_name || !strlen(mem.input_name)) { @@ -236,7 +313,7 @@ int cmd_mem(int argc, const char **argv, const char *prefix __maybe_unused) } if (!strncmp(argv[0], "rec", 3)) - return __cmd_record(argc, argv); + return __cmd_record(argc, argv, &mem); else if (!strncmp(argv[0], "rep", 3)) return report_events(argc, argv, &mem); else diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 140a6cd88351..2f91094e228b 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -86,17 +86,6 @@ static int report__config(const char *var, const char *value, void *cb) return perf_default_config(var, value, cb); } -static void report__inc_stats(struct report *rep, struct hist_entry *he) -{ - /* - * The @he is either of a newly created one or an existing one - * merging current sample. We only want to count a new one so - * checking ->nr_events being 1. - */ - if (he->stat.nr_events == 1) - rep->nr_entries++; -} - static int hist_iter__report_callback(struct hist_entry_iter *iter, struct addr_location *al, bool single, void *arg) @@ -108,8 +97,6 @@ static int hist_iter__report_callback(struct hist_entry_iter *iter, struct mem_info *mi; struct branch_info *bi; - report__inc_stats(rep, he); - if (!ui__has_annotation()) return 0; @@ -226,8 +213,9 @@ static int report__setup_sample_type(struct report *rep) return -EINVAL; } if (symbol_conf.use_callchain) { - ui__error("Selected -g but no callchain data. Did " - "you call 'perf record' without -g?\n"); + ui__error("Selected -g or --branch-history but no " + "callchain data. Did\n" + "you call 'perf record' without -g?\n"); return -1; } } else if (!rep->dont_use_callchains && @@ -456,6 +444,19 @@ static void report__collapse_hists(struct report *rep) ui_progress__finish(); } +static void report__output_resort(struct report *rep) +{ + struct ui_progress prog; + struct perf_evsel *pos; + + ui_progress__init(&prog, rep->nr_entries, "Sorting events for output..."); + + evlist__for_each(rep->session->evlist, pos) + hists__output_resort(evsel__hists(pos), &prog); + + ui_progress__finish(); +} + static int __cmd_report(struct report *rep) { int ret; @@ -485,6 +486,9 @@ static int __cmd_report(struct report *rep) report__warn_kptr_restrict(rep); + evlist__for_each(session->evlist, pos) + rep->nr_entries += evsel__hists(pos)->nr_entries; + if (use_browser == 0) { if (verbose > 3) perf_session__fprintf(session, stdout); @@ -504,13 +508,20 @@ static int __cmd_report(struct report *rep) if (session_done()) return 0; + /* + * recalculate number of entries after collapsing since it + * might be changed during the collapse phase. + */ + rep->nr_entries = 0; + evlist__for_each(session->evlist, pos) + rep->nr_entries += evsel__hists(pos)->nr_entries; + if (rep->nr_entries == 0) { ui__error("The %s file has no samples!\n", file->path); return 0; } - evlist__for_each(session->evlist, pos) - hists__output_resort(evsel__hists(pos)); + report__output_resort(rep); return report__browse_hists(rep); } @@ -575,6 +586,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) struct stat st; bool has_br_stack = false; int branch_mode = -1; + bool branch_call_mode = false; char callchain_default_opt[] = "fractal,0.5,callee"; const char * const report_usage[] = { "perf report [<options>]", @@ -637,8 +649,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) "regex filter to identify parent, see: '--sort parent'"), OPT_BOOLEAN('x', "exclude-other", &symbol_conf.exclude_other, "Only display entries with parent-match"), - OPT_CALLBACK_DEFAULT('g', "call-graph", &report, "output_type,min_percent[,print_limit],call_order", - "Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold, optional print limit, callchain order, key (function or address). " + OPT_CALLBACK_DEFAULT('g', "call-graph", &report, "output_type,min_percent[,print_limit],call_order[,branch]", + "Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold, optional print limit, callchain order, key (function or address), add branches. " "Default: fractal,0.5,callee,function", &report_parse_callchain_opt, callchain_default_opt), OPT_BOOLEAN(0, "children", &symbol_conf.cumulate_callchain, "Accumulate callchains of children and show total overhead as well"), @@ -684,7 +696,10 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) OPT_BOOLEAN(0, "group", &symbol_conf.event_group, "Show event group information together"), OPT_CALLBACK_NOOPT('b', "branch-stack", &branch_mode, "", - "use branch records for histogram filling", parse_branch_mode), + "use branch records for per branch histogram filling", + parse_branch_mode), + OPT_BOOLEAN(0, "branch-history", &branch_call_mode, + "add last branch records to call history"), OPT_STRING(0, "objdump", &objdump_path, "path", "objdump binary to use for disassembly and annotations"), OPT_BOOLEAN(0, "demangle", &symbol_conf.demangle, @@ -745,10 +760,24 @@ repeat: has_br_stack = perf_header__has_feat(&session->header, HEADER_BRANCH_STACK); - if ((branch_mode == -1 && has_br_stack) || branch_mode == 1) { + /* + * Branch mode is a tristate: + * -1 means default, so decide based on the file having branch data. + * 0/1 means the user chose a mode. + */ + if (((branch_mode == -1 && has_br_stack) || branch_mode == 1) && + branch_call_mode == -1) { sort__mode = SORT_MODE__BRANCH; symbol_conf.cumulate_callchain = false; } + if (branch_call_mode) { + callchain_param.key = CCKEY_ADDRESS; + callchain_param.branch_callstack = 1; + symbol_conf.use_callchain = true; + callchain_register_param(&callchain_param); + if (sort_order == NULL) + sort_order = "srcline,symbol,dso"; + } if (report.mem_mode) { if (sort__mode == SORT_MODE__BRANCH) { diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 055ce9232c9e..e598e4e98170 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -388,20 +388,102 @@ static void update_shadow_stats(struct perf_evsel *counter, u64 *count) update_stats(&runtime_itlb_cache_stats[0], count[0]); } +static void zero_per_pkg(struct perf_evsel *counter) +{ + if (counter->per_pkg_mask) + memset(counter->per_pkg_mask, 0, MAX_NR_CPUS); +} + +static int check_per_pkg(struct perf_evsel *counter, int cpu, bool *skip) +{ + unsigned long *mask = counter->per_pkg_mask; + struct cpu_map *cpus = perf_evsel__cpus(counter); + int s; + + *skip = false; + + if (!counter->per_pkg) + return 0; + + if (cpu_map__empty(cpus)) + return 0; + + if (!mask) { + mask = zalloc(MAX_NR_CPUS); + if (!mask) + return -ENOMEM; + + counter->per_pkg_mask = mask; + } + + s = cpu_map__get_socket(cpus, cpu); + if (s < 0) + return -1; + + *skip = test_and_set_bit(s, mask) == 1; + return 0; +} + +static int read_cb(struct perf_evsel *evsel, int cpu, int thread __maybe_unused, + struct perf_counts_values *count) +{ + struct perf_counts_values *aggr = &evsel->counts->aggr; + static struct perf_counts_values zero; + bool skip = false; + + if (check_per_pkg(evsel, cpu, &skip)) { + pr_err("failed to read per-pkg counter\n"); + return -1; + } + + if (skip) + count = &zero; + + switch (aggr_mode) { + case AGGR_CORE: + case AGGR_SOCKET: + case AGGR_NONE: + if (!evsel->snapshot) + perf_evsel__compute_deltas(evsel, cpu, count); + perf_counts_values__scale(count, scale, NULL); + evsel->counts->cpu[cpu] = *count; + update_shadow_stats(evsel, count->values); + break; + case AGGR_GLOBAL: + aggr->val += count->val; + if (scale) { + aggr->ena += count->ena; + aggr->run += count->run; + } + default: + break; + } + + return 0; +} + +static int read_counter(struct perf_evsel *counter); + /* * Read out the results of a single counter: * aggregate counts across CPUs in system-wide mode */ static int read_counter_aggr(struct perf_evsel *counter) { + struct perf_counts_values *aggr = &counter->counts->aggr; struct perf_stat *ps = counter->priv; u64 *count = counter->counts->aggr.values; int i; - if (__perf_evsel__read(counter, perf_evsel__nr_cpus(counter), - thread_map__nr(evsel_list->threads), scale) < 0) + aggr->val = aggr->ena = aggr->run = 0; + + if (read_counter(counter)) return -1; + if (!counter->snapshot) + perf_evsel__compute_deltas(counter, -1, aggr); + perf_counts_values__scale(aggr, scale, &counter->counts->scaled); + for (i = 0; i < 3; i++) update_stats(&ps->res_stats[i], count[i]); @@ -424,16 +506,21 @@ static int read_counter_aggr(struct perf_evsel *counter) */ static int read_counter(struct perf_evsel *counter) { - u64 *count; - int cpu; + int nthreads = thread_map__nr(evsel_list->threads); + int ncpus = perf_evsel__nr_cpus(counter); + int cpu, thread; - for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { - if (__perf_evsel__read_on_cpu(counter, cpu, 0, scale) < 0) - return -1; + if (counter->system_wide) + nthreads = 1; - count = counter->counts->cpu[cpu].values; + if (counter->per_pkg) + zero_per_pkg(counter); - update_shadow_stats(counter, count); + for (thread = 0; thread < nthreads; thread++) { + for (cpu = 0; cpu < ncpus; cpu++) { + if (perf_evsel__read_cb(counter, cpu, thread, read_cb)) + return -1; + } } return 0; @@ -1643,7 +1730,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) "detailed run - start a lot of events"), OPT_BOOLEAN('S', "sync", &sync_run, "call sync() before starting a run"), - OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL, + OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL, "print large numbers with thousands\' separators", stat__set_big_num), OPT_STRING('C', "cpu", &target.cpu_list, "cpu", diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 0aa7747ff139..c4c7eac69de4 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -66,7 +66,6 @@ #include <sys/utsname.h> #include <sys/mman.h> -#include <linux/unistd.h> #include <linux/types.h> static volatile int done; @@ -166,7 +165,7 @@ static void ui__warn_map_erange(struct map *map, struct symbol *sym, u64 ip) err ? "[unknown]" : uts.release, perf_version_string); if (use_browser <= 0) sleep(5); - + map->erange_warned = true; } @@ -285,7 +284,7 @@ static void perf_top__print_sym_table(struct perf_top *top) } hists__collapse_resort(hists, NULL); - hists__output_resort(hists); + hists__output_resort(hists, NULL); hists__output_recalc_col_len(hists, top->print_entries - printed); putchar('\n'); @@ -554,7 +553,7 @@ static void perf_top__sort_new_samples(void *arg) } hists__collapse_resort(hists, NULL); - hists__output_resort(hists); + hists__output_resort(hists, NULL); } static void *display_thread_tui(void *arg) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 83a4835c8118..7e935f1083ec 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -929,66 +929,66 @@ static struct syscall_fmt { .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, }, { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), }, { .name = "close", .errmsg = true, - .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, }, { .name = "connect", .errmsg = true, }, { .name = "dup", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "dup2", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "dup3", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "epoll_ctl", .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), }, { .name = "eventfd2", .errmsg = true, .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, }, { .name = "faccessat", .errmsg = true, .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, { .name = "fadvise64", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "fallocate", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "fchdir", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "fchmod", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "fchmodat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, { .name = "fchown", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "fchownat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, { .name = "fcntl", .errmsg = true, .arg_scnprintf = { [0] = SCA_FD, /* fd */ [1] = SCA_STRARRAY, /* cmd */ }, .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, }, { .name = "fdatasync", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "flock", .errmsg = true, .arg_scnprintf = { [0] = SCA_FD, /* fd */ [1] = SCA_FLOCK, /* cmd */ }, }, { .name = "fsetxattr", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "fstat", .errmsg = true, .alias = "newfstat", - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "fstatat", .errmsg = true, .alias = "newfstatat", - .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, { .name = "fstatfs", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "fsync", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "ftruncate", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "futex", .errmsg = true, .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, }, { .name = "futimesat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, { .name = "getdents", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "getdents64", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), }, { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), }, { .name = "ioctl", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ + .arg_scnprintf = { [0] = SCA_FD, /* fd */ #if defined(__i386__) || defined(__x86_64__) /* * FIXME: Make this available to all arches. @@ -1002,7 +1002,7 @@ static struct syscall_fmt { { .name = "kill", .errmsg = true, .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, }, { .name = "linkat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, { .name = "lseek", .errmsg = true, .arg_scnprintf = { [0] = SCA_FD, /* fd */ [2] = SCA_STRARRAY, /* whence */ }, @@ -1012,9 +1012,9 @@ static struct syscall_fmt { .arg_scnprintf = { [0] = SCA_HEX, /* start */ [2] = SCA_MADV_BHV, /* behavior */ }, }, { .name = "mkdirat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, { .name = "mknodat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, { .name = "mlock", .errmsg = true, .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, }, { .name = "mlockall", .errmsg = true, @@ -1036,9 +1036,9 @@ static struct syscall_fmt { { .name = "munmap", .errmsg = true, .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, }, { .name = "name_to_handle_at", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, { .name = "newfstatat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, { .name = "open", .errmsg = true, .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, }, { .name = "open_by_handle_at", .errmsg = true, @@ -1052,20 +1052,20 @@ static struct syscall_fmt { { .name = "poll", .errmsg = true, .timeout = true, }, { .name = "ppoll", .errmsg = true, .timeout = true, }, { .name = "pread", .errmsg = true, .alias = "pread64", - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "preadv", .errmsg = true, .alias = "pread", - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "prlimit64", .errmsg = true, STRARRAY(1, resource, rlimit_resources), }, { .name = "pwrite", .errmsg = true, .alias = "pwrite64", - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "pwritev", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "read", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "readlinkat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, { .name = "readv", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "recvfrom", .errmsg = true, .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, }, { .name = "recvmmsg", .errmsg = true, @@ -1073,7 +1073,7 @@ static struct syscall_fmt { { .name = "recvmsg", .errmsg = true, .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, }, { .name = "renameat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, { .name = "rt_sigaction", .errmsg = true, .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, }, { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), }, @@ -1091,7 +1091,7 @@ static struct syscall_fmt { { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), }, { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), }, { .name = "shutdown", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "socket", .errmsg = true, .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */ [1] = SCA_SK_TYPE, /* type */ }, @@ -1102,7 +1102,7 @@ static struct syscall_fmt { .arg_parm = { [0] = &strarray__socket_families, /* family */ }, }, { .name = "stat", .errmsg = true, .alias = "newstat", }, { .name = "symlinkat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, { .name = "tgkill", .errmsg = true, .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, }, { .name = "tkill", .errmsg = true, @@ -1113,9 +1113,9 @@ static struct syscall_fmt { { .name = "utimensat", .errmsg = true, .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, }, { .name = "write", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "writev", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, }; static int syscall_fmt__cmp(const void *name, const void *fmtp) @@ -1191,7 +1191,7 @@ static struct thread_trace *thread__trace(struct thread *thread, FILE *fp) if (thread__priv(thread) == NULL) thread__set_priv(thread, thread_trace__new()); - + if (thread__priv(thread) == NULL) goto fail; @@ -2045,7 +2045,6 @@ static int trace__run(struct trace *trace, int argc, const char **argv) unsigned long before; const bool forks = argc > 0; bool draining = false; - char sbuf[STRERR_BUFSIZE]; trace->live = true; @@ -2057,23 +2056,24 @@ static int trace__run(struct trace *trace, int argc, const char **argv) if (trace->trace_syscalls && perf_evlist__add_syscall_newtp(evlist, trace__sys_enter, trace__sys_exit)) - goto out_error_tp; + goto out_error_raw_syscalls; if (trace->trace_syscalls) perf_evlist__add_vfs_getname(evlist); if ((trace->trace_pgfaults & TRACE_PFMAJ) && - perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) - goto out_error_tp; + perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) { + goto out_error_mem; + } if ((trace->trace_pgfaults & TRACE_PFMIN) && perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN)) - goto out_error_tp; + goto out_error_mem; if (trace->sched && - perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime", - trace__sched_stat_runtime)) - goto out_error_tp; + perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime", + trace__sched_stat_runtime)) + goto out_error_sched_stat_runtime; err = perf_evlist__create_maps(evlist, &trace->opts.target); if (err < 0) { @@ -2106,11 +2106,8 @@ static int trace__run(struct trace *trace, int argc, const char **argv) goto out_error_open; err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false); - if (err < 0) { - fprintf(trace->output, "Couldn't mmap the events: %s\n", - strerror_r(errno, sbuf, sizeof(sbuf))); - goto out_delete_evlist; - } + if (err < 0) + goto out_error_mmap; perf_evlist__enable(evlist); @@ -2206,8 +2203,16 @@ out: { char errbuf[BUFSIZ]; -out_error_tp: - perf_evlist__strerror_tp(evlist, errno, errbuf, sizeof(errbuf)); +out_error_sched_stat_runtime: + debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime"); + goto out_error; + +out_error_raw_syscalls: + debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)"); + goto out_error; + +out_error_mmap: + perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf)); goto out_error; out_error_open: @@ -2217,6 +2222,9 @@ out_error: fprintf(trace->output, "%s\n", errbuf); goto out_delete_evlist; } +out_error_mem: + fprintf(trace->output, "Not enough memory to run!\n"); + goto out_delete_evlist; } static int trace__replay(struct trace *trace) @@ -2485,7 +2493,7 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) .user_freq = UINT_MAX, .user_interval = ULLONG_MAX, .no_buffering = true, - .mmap_pages = 1024, + .mmap_pages = UINT_MAX, }, .output = stdout, .show_comm = true, diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 5d4b039fe1ed..648e31ff4021 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -20,7 +20,7 @@ NO_PERF_REGS := 1 # Additional ARCH settings for x86 ifeq ($(ARCH),x86) - ifeq (${IS_X86_64}, 1) + ifeq (${IS_64_BIT}, 1) CFLAGS += -DHAVE_ARCH_X86_64_SUPPORT ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S ../../arch/x86/lib/memset_64.S LIBUNWIND_LIBS = -lunwind -lunwind-x86_64 diff --git a/tools/perf/config/Makefile.arch b/tools/perf/config/Makefile.arch index 851cd0172a76..ff95a68741d1 100644 --- a/tools/perf/config/Makefile.arch +++ b/tools/perf/config/Makefile.arch @@ -1,7 +1,7 @@ uname_M := $(shell uname -m 2>/dev/null || echo not) -ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ \ +RAW_ARCH := $(shell echo $(uname_M) | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ \ -e s/arm.*/arm/ -e s/sa110/arm/ \ -e s/s390x/s390/ -e s/parisc64/parisc/ \ -e s/ppc.*/powerpc/ -e s/mips.*/mips/ \ @@ -9,23 +9,23 @@ ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ \ -e s/tile.*/tile/ ) # Additional ARCH settings for x86 -ifeq ($(ARCH),i386) - override ARCH := x86 +ifeq ($(RAW_ARCH),i386) + ARCH ?= x86 endif -ifeq ($(ARCH),x86_64) - override ARCH := x86 - IS_X86_64 := 0 - ifeq (, $(findstring m32,$(CFLAGS))) - IS_X86_64 := $(shell echo __x86_64__ | ${CC} -E -x c - | tail -n 1) - RAW_ARCH := x86_64 +ifeq ($(RAW_ARCH),x86_64) + ARCH ?= x86 + + ifneq (, $(findstring m32,$(CFLAGS))) + RAW_ARCH := x86_32 endif endif -ifeq (${IS_X86_64}, 1) +ARCH ?= $(RAW_ARCH) + +LP64 := $(shell echo __LP64__ | ${CC} ${CFLAGS} -E -x c - | tail -n 1) +ifeq ($(LP64), 1) IS_64_BIT := 1 -else ifeq ($(ARCH),x86) - IS_64_BIT := 0 else - IS_64_BIT := $(shell echo __LP64__ | ${CC} ${CFLAGS} -E -x c - | tail -n 1) + IS_64_BIT := 0 endif diff --git a/tools/perf/perf-sys.h b/tools/perf/perf-sys.h index a3b13d7dc1d4..6ef68165c9db 100644 --- a/tools/perf/perf-sys.h +++ b/tools/perf/perf-sys.h @@ -6,7 +6,6 @@ #include <sys/syscall.h> #include <linux/types.h> #include <linux/perf_event.h> -#include <asm/unistd.h> #if defined(__i386__) #define mb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory") diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 452a8474d29d..3700a7faca6c 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -200,6 +200,16 @@ static int handle_options(const char ***argv, int *argc, int *envchanged) *envchanged = 1; (*argv)++; (*argc)--; + } else if (!strcmp(cmd, "--buildid-dir")) { + if (*argc < 2) { + fprintf(stderr, "No directory given for --buildid-dir.\n"); + usage(perf_usage_string); + } + set_buildid_dir((*argv)[1]); + if (envchanged) + *envchanged = 1; + (*argv)++; + (*argc)--; } else if (!prefixcmp(cmd, CMD_DEBUGFS_DIR)) { perf_debugfs_set_path(cmd + strlen(CMD_DEBUGFS_DIR)); fprintf(stderr, "dir: %s\n", debugfs_mountpoint); @@ -499,7 +509,7 @@ int main(int argc, const char **argv) } if (!prefixcmp(cmd, "trace")) { #ifdef HAVE_LIBAUDIT_SUPPORT - set_buildid_dir(); + set_buildid_dir(NULL); setup_path(); argv[0] = "trace"; return cmd_trace(argc, argv, NULL); @@ -514,7 +524,7 @@ int main(int argc, const char **argv) argc--; handle_options(&argv, &argc, NULL); commit_pager_choice(); - set_buildid_dir(); + set_buildid_dir(NULL); if (argc > 0) { if (!prefixcmp(argv[0], "--")) diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/Context.c b/tools/perf/scripts/perl/Perf-Trace-Util/Context.c index 790ceba6ad3f..28431d1bbcf5 100644 --- a/tools/perf/scripts/perl/Perf-Trace-Util/Context.c +++ b/tools/perf/scripts/perl/Perf-Trace-Util/Context.c @@ -5,7 +5,10 @@ * ANY CHANGES MADE HERE WILL BE LOST! * */ - +#include <stdbool.h> +#ifndef HAS_BOOL +# define HAS_BOOL 1 +#endif #line 1 "Context.xs" /* * Context.xs. XS interfaces for perf script. diff --git a/tools/perf/tests/attr.py b/tools/perf/tests/attr.py index c9b4b6269b51..1091bd47adfd 100644 --- a/tools/perf/tests/attr.py +++ b/tools/perf/tests/attr.py @@ -104,7 +104,6 @@ class Event(dict): continue if not self.compare_data(self[t], other[t]): log.warning("expected %s=%s, got %s" % (t, self[t], other[t])) - # Test file description needs to have following sections: # [config] diff --git a/tools/perf/tests/attr/base-record b/tools/perf/tests/attr/base-record index f710b92ccff6..d3095dafed36 100644 --- a/tools/perf/tests/attr/base-record +++ b/tools/perf/tests/attr/base-record @@ -5,7 +5,7 @@ group_fd=-1 flags=0|8 cpu=* type=0|1 -size=96 +size=104 config=0 sample_period=4000 sample_type=263 diff --git a/tools/perf/tests/attr/base-stat b/tools/perf/tests/attr/base-stat index dc3ada2470c0..872ed7e24c7c 100644 --- a/tools/perf/tests/attr/base-stat +++ b/tools/perf/tests/attr/base-stat @@ -5,7 +5,7 @@ group_fd=-1 flags=0|8 cpu=* type=0 -size=96 +size=104 config=0 sample_period=0 sample_type=0 diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c index ab28cca2cb97..0bf06bec68c7 100644 --- a/tools/perf/tests/dwarf-unwind.c +++ b/tools/perf/tests/dwarf-unwind.c @@ -11,6 +11,9 @@ #include "thread.h" #include "callchain.h" +/* For bsearch. We try to unwind functions in shared object. */ +#include <stdlib.h> + static int mmap_handler(struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample __maybe_unused, @@ -28,7 +31,7 @@ static int init_live_machine(struct machine *machine) mmap_handler, machine, true); } -#define MAX_STACK 6 +#define MAX_STACK 8 static int unwind_entry(struct unwind_entry *entry, void *arg) { @@ -37,6 +40,8 @@ static int unwind_entry(struct unwind_entry *entry, void *arg) static const char *funcs[MAX_STACK] = { "test__arch_unwind_sample", "unwind_thread", + "compare", + "bsearch", "krava_3", "krava_2", "krava_1", @@ -88,10 +93,37 @@ static int unwind_thread(struct thread *thread) return err; } +static int global_unwind_retval = -INT_MAX; + +__attribute__ ((noinline)) +static int compare(void *p1, void *p2) +{ + /* Any possible value should be 'thread' */ + struct thread *thread = *(struct thread **)p1; + + if (global_unwind_retval == -INT_MAX) + global_unwind_retval = unwind_thread(thread); + + return p1 - p2; +} + __attribute__ ((noinline)) static int krava_3(struct thread *thread) { - return unwind_thread(thread); + struct thread *array[2] = {thread, thread}; + void *fp = &bsearch; + /* + * make _bsearch a volatile function pointer to + * prevent potential optimization, which may expand + * bsearch and call compare directly from this function, + * instead of libc shared object. + */ + void *(*volatile _bsearch)(void *, void *, size_t, + size_t, int (*)(void *, void *)); + + _bsearch = fp; + _bsearch(array, &thread, 2, sizeof(struct thread **), compare); + return global_unwind_retval; } __attribute__ ((noinline)) diff --git a/tools/perf/tests/hists_cumulate.c b/tools/perf/tests/hists_cumulate.c index 614d5c4978ab..18619966454c 100644 --- a/tools/perf/tests/hists_cumulate.c +++ b/tools/perf/tests/hists_cumulate.c @@ -140,7 +140,7 @@ static void del_hist_entries(struct hists *hists) he = rb_entry(node, struct hist_entry, rb_node); rb_erase(node, root_out); rb_erase(&he->rb_node_in, root_in); - hist_entry__free(he); + hist_entry__delete(he); } } @@ -187,7 +187,7 @@ static int do_test(struct hists *hists, struct result *expected, size_t nr_expec * function since TEST_ASSERT_VAL() returns in case of failure. */ hists__collapse_resort(hists, NULL); - hists__output_resort(hists); + hists__output_resort(hists, NULL); if (verbose > 2) { pr_info("use callchain: %d, cumulate callchain: %d\n", @@ -454,12 +454,12 @@ static int test3(struct perf_evsel *evsel, struct machine *machine) * 30.00% 10.00% perf perf [.] cmd_record * 20.00% 0.00% bash libc [.] malloc * 10.00% 10.00% bash [kernel] [k] page_fault - * 10.00% 10.00% perf [kernel] [k] schedule - * 10.00% 0.00% perf [kernel] [k] sys_perf_event_open + * 10.00% 10.00% bash bash [.] xmalloc * 10.00% 10.00% perf [kernel] [k] page_fault - * 10.00% 10.00% perf libc [.] free * 10.00% 10.00% perf libc [.] malloc - * 10.00% 10.00% bash bash [.] xmalloc + * 10.00% 10.00% perf [kernel] [k] schedule + * 10.00% 10.00% perf libc [.] free + * 10.00% 0.00% perf [kernel] [k] sys_perf_event_open */ struct result expected[] = { { 7000, 2000, "perf", "perf", "main" }, @@ -468,12 +468,12 @@ static int test3(struct perf_evsel *evsel, struct machine *machine) { 3000, 1000, "perf", "perf", "cmd_record" }, { 2000, 0, "bash", "libc", "malloc" }, { 1000, 1000, "bash", "[kernel]", "page_fault" }, - { 1000, 1000, "perf", "[kernel]", "schedule" }, - { 1000, 0, "perf", "[kernel]", "sys_perf_event_open" }, + { 1000, 1000, "bash", "bash", "xmalloc" }, { 1000, 1000, "perf", "[kernel]", "page_fault" }, + { 1000, 1000, "perf", "[kernel]", "schedule" }, { 1000, 1000, "perf", "libc", "free" }, { 1000, 1000, "perf", "libc", "malloc" }, - { 1000, 1000, "bash", "bash", "xmalloc" }, + { 1000, 0, "perf", "[kernel]", "sys_perf_event_open" }, }; symbol_conf.use_callchain = false; @@ -537,10 +537,13 @@ static int test4(struct perf_evsel *evsel, struct machine *machine) * malloc * main * - * 10.00% 10.00% perf [kernel] [k] schedule + * 10.00% 10.00% bash bash [.] xmalloc * | - * --- schedule - * run_command + * --- xmalloc + * malloc + * xmalloc <--- NOTE: there's a cycle + * malloc + * xmalloc * main * * 10.00% 0.00% perf [kernel] [k] sys_perf_event_open @@ -556,6 +559,12 @@ static int test4(struct perf_evsel *evsel, struct machine *machine) * run_command * main * + * 10.00% 10.00% perf [kernel] [k] schedule + * | + * --- schedule + * run_command + * main + * * 10.00% 10.00% perf libc [.] free * | * --- free @@ -570,15 +579,6 @@ static int test4(struct perf_evsel *evsel, struct machine *machine) * run_command * main * - * 10.00% 10.00% bash bash [.] xmalloc - * | - * --- xmalloc - * malloc - * xmalloc <--- NOTE: there's a cycle - * malloc - * xmalloc - * main - * */ struct result expected[] = { { 7000, 2000, "perf", "perf", "main" }, @@ -587,12 +587,12 @@ static int test4(struct perf_evsel *evsel, struct machine *machine) { 3000, 1000, "perf", "perf", "cmd_record" }, { 2000, 0, "bash", "libc", "malloc" }, { 1000, 1000, "bash", "[kernel]", "page_fault" }, - { 1000, 1000, "perf", "[kernel]", "schedule" }, + { 1000, 1000, "bash", "bash", "xmalloc" }, { 1000, 0, "perf", "[kernel]", "sys_perf_event_open" }, { 1000, 1000, "perf", "[kernel]", "page_fault" }, + { 1000, 1000, "perf", "[kernel]", "schedule" }, { 1000, 1000, "perf", "libc", "free" }, { 1000, 1000, "perf", "libc", "malloc" }, - { 1000, 1000, "bash", "bash", "xmalloc" }, }; struct callchain_result expected_callchain[] = { { @@ -622,9 +622,12 @@ static int test4(struct perf_evsel *evsel, struct machine *machine) { "bash", "main" }, }, }, { - 3, { { "[kernel]", "schedule" }, - { "perf", "run_command" }, - { "perf", "main" }, }, + 6, { { "bash", "xmalloc" }, + { "libc", "malloc" }, + { "bash", "xmalloc" }, + { "libc", "malloc" }, + { "bash", "xmalloc" }, + { "bash", "main" }, }, }, { 3, { { "[kernel]", "sys_perf_event_open" }, @@ -638,6 +641,11 @@ static int test4(struct perf_evsel *evsel, struct machine *machine) { "perf", "main" }, }, }, { + 3, { { "[kernel]", "schedule" }, + { "perf", "run_command" }, + { "perf", "main" }, }, + }, + { 4, { { "libc", "free" }, { "perf", "cmd_record" }, { "perf", "run_command" }, @@ -649,14 +657,6 @@ static int test4(struct perf_evsel *evsel, struct machine *machine) { "perf", "run_command" }, { "perf", "main" }, }, }, - { - 6, { { "bash", "xmalloc" }, - { "libc", "malloc" }, - { "bash", "xmalloc" }, - { "libc", "malloc" }, - { "bash", "xmalloc" }, - { "bash", "main" }, }, - }, }; symbol_conf.use_callchain = true; diff --git a/tools/perf/tests/hists_filter.c b/tools/perf/tests/hists_filter.c index 74f257a81265..59e53db7914c 100644 --- a/tools/perf/tests/hists_filter.c +++ b/tools/perf/tests/hists_filter.c @@ -138,7 +138,7 @@ int test__hists_filter(void) struct hists *hists = evsel__hists(evsel); hists__collapse_resort(hists, NULL); - hists__output_resort(hists); + hists__output_resort(hists, NULL); if (verbose > 2) { pr_info("Normal histogram\n"); diff --git a/tools/perf/tests/hists_output.c b/tools/perf/tests/hists_output.c index a748f2be1222..b52c9faea224 100644 --- a/tools/perf/tests/hists_output.c +++ b/tools/perf/tests/hists_output.c @@ -106,7 +106,7 @@ static void del_hist_entries(struct hists *hists) he = rb_entry(node, struct hist_entry, rb_node); rb_erase(node, root_out); rb_erase(&he->rb_node_in, root_in); - hist_entry__free(he); + hist_entry__delete(he); } } @@ -152,7 +152,7 @@ static int test1(struct perf_evsel *evsel, struct machine *machine) goto out; hists__collapse_resort(hists, NULL); - hists__output_resort(hists); + hists__output_resort(hists, NULL); if (verbose > 2) { pr_info("[fields = %s, sort = %s]\n", field_order, sort_order); @@ -252,7 +252,7 @@ static int test2(struct perf_evsel *evsel, struct machine *machine) goto out; hists__collapse_resort(hists, NULL); - hists__output_resort(hists); + hists__output_resort(hists, NULL); if (verbose > 2) { pr_info("[fields = %s, sort = %s]\n", field_order, sort_order); @@ -306,7 +306,7 @@ static int test3(struct perf_evsel *evsel, struct machine *machine) goto out; hists__collapse_resort(hists, NULL); - hists__output_resort(hists); + hists__output_resort(hists, NULL); if (verbose > 2) { pr_info("[fields = %s, sort = %s]\n", field_order, sort_order); @@ -384,7 +384,7 @@ static int test4(struct perf_evsel *evsel, struct machine *machine) goto out; hists__collapse_resort(hists, NULL); - hists__output_resort(hists); + hists__output_resort(hists, NULL); if (verbose > 2) { pr_info("[fields = %s, sort = %s]\n", field_order, sort_order); @@ -487,7 +487,7 @@ static int test5(struct perf_evsel *evsel, struct machine *machine) goto out; hists__collapse_resort(hists, NULL); - hists__output_resort(hists); + hists__output_resort(hists, NULL); if (verbose > 2) { pr_info("[fields = %s, sort = %s]\n", field_order, sort_order); diff --git a/tools/perf/tests/make b/tools/perf/tests/make index 69a71ff84e01..75709d2b17b4 100644 --- a/tools/perf/tests/make +++ b/tools/perf/tests/make @@ -222,7 +222,6 @@ tarpkg: @cmd="$(PERF)/tests/perf-targz-src-pkg $(PERF)"; \ echo "- $@: $$cmd" && echo $$cmd > $@ && \ ( eval $$cmd ) >> $@ 2>&1 - all: $(run) $(run_O) tarpkg @echo OK diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index 4169f460efa1..1cdab0ce00e2 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -1529,7 +1529,7 @@ static int test_event(struct evlist_test *e) } else { ret = e->check(evlist); } - + perf_evlist__delete(evlist); return ret; diff --git a/tools/perf/tests/sample-parsing.c b/tools/perf/tests/sample-parsing.c index 4908c648a597..30c02181e78b 100644 --- a/tools/perf/tests/sample-parsing.c +++ b/tools/perf/tests/sample-parsing.c @@ -110,7 +110,7 @@ static bool samples_same(const struct perf_sample *s1, if (type & PERF_SAMPLE_STACK_USER) { COMP(user_stack.size); - if (memcmp(s1->user_stack.data, s1->user_stack.data, + if (memcmp(s1->user_stack.data, s2->user_stack.data, s1->user_stack.size)) { pr_debug("Samples differ at 'user_stack'\n"); return false; diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 1e0a2fd80115..9d32e3c0cfee 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -517,7 +517,7 @@ static bool annotate_browser__jump(struct annotate_browser *browser) } annotate_browser__set_top(browser, dl, idx); - + return true; } @@ -867,7 +867,6 @@ static void annotate_browser__mark_jump_targets(struct annotate_browser *browser ++browser->nr_jumps; } - } static inline int width_jumps(int n) diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 12c17c5a3d68..788506eef567 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -227,10 +227,14 @@ static void callchain_node__init_have_children_rb_tree(struct callchain_node *no } } -static void callchain_node__init_have_children(struct callchain_node *node) +static void callchain_node__init_have_children(struct callchain_node *node, + bool has_sibling) { struct callchain_list *chain; + chain = list_entry(node->val.next, struct callchain_list, list); + chain->ms.has_children = has_sibling; + if (!list_empty(&node->val)) { chain = list_entry(node->val.prev, struct callchain_list, list); chain->ms.has_children = !RB_EMPTY_ROOT(&node->rb_root); @@ -241,11 +245,12 @@ static void callchain_node__init_have_children(struct callchain_node *node) static void callchain__init_have_children(struct rb_root *root) { - struct rb_node *nd; + struct rb_node *nd = rb_first(root); + bool has_sibling = nd && rb_next(nd); for (nd = rb_first(root); nd; nd = rb_next(nd)) { struct callchain_node *node = rb_entry(nd, struct callchain_node, rb_node); - callchain_node__init_have_children(node); + callchain_node__init_have_children(node, has_sibling); } } @@ -542,8 +547,11 @@ static int hist_browser__show_callchain(struct hist_browser *browser, struct rb_node *node; int first_row = row, offset = level * LEVEL_OFFSET_STEP; u64 new_total; + bool need_percent; node = rb_first(root); + need_percent = node && rb_next(node); + while (node) { struct callchain_node *child = rb_entry(node, struct callchain_node, rb_node); struct rb_node *next = rb_next(node); @@ -560,7 +568,7 @@ static int hist_browser__show_callchain(struct hist_browser *browser, if (first) first = false; - else if (level > 1) + else if (need_percent) extra_offset = LEVEL_OFFSET_STEP; folded_sign = callchain_list__folded(chain); @@ -573,7 +581,7 @@ static int hist_browser__show_callchain(struct hist_browser *browser, str = callchain_list__sym_name(chain, bf, sizeof(bf), browser->show_dso); - if (was_first && level > 1) { + if (was_first && need_percent) { double percent = cumul * 100.0 / total; if (asprintf(&alloc_str, "%2.2f%% %s", percent, str) < 0) @@ -790,6 +798,13 @@ static int hist_browser__show_entry(struct hist_browser *browser, .is_current_entry = current_entry, }; + if (callchain_param.mode == CHAIN_GRAPH_REL) { + if (symbol_conf.cumulate_callchain) + total = entry->stat_acc->period; + else + total = entry->stat.period; + } + printed += hist_browser__show_callchain(browser, &entry->sorted_chain, 1, row, total, hist_browser__show_callchain_entry, &arg, @@ -1237,7 +1252,7 @@ static int hists__browser_title(struct hists *hists, nr_samples = convert_unit(nr_samples, &unit); printed = scnprintf(bf, size, - "Samples: %lu%c of event '%s', Event count (approx.): %lu", + "Samples: %lu%c of event '%s', Event count (approx.): %" PRIu64, nr_samples, unit, ev_name, nr_events); diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index 2af18376b077..25d608394d74 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -162,8 +162,8 @@ static int __hpp__sort(struct hist_entry *a, struct hist_entry *b, return ret; nr_members = evsel->nr_members; - fields_a = calloc(sizeof(*fields_a), nr_members); - fields_b = calloc(sizeof(*fields_b), nr_members); + fields_a = calloc(nr_members, sizeof(*fields_a)); + fields_b = calloc(nr_members, sizeof(*fields_b)); if (!fields_a || !fields_b) goto out; @@ -204,6 +204,9 @@ static int __hpp__sort_acc(struct hist_entry *a, struct hist_entry *b, if (ret) return ret; + if (a->thread != b->thread || !symbol_conf.use_callchain) + return 0; + ret = b->callchain->max_depth - a->callchain->max_depth; } return ret; @@ -282,7 +285,8 @@ static int hpp__entry_##_type(struct perf_hpp_fmt *fmt, \ } #define __HPP_SORT_FN(_type, _field) \ -static int64_t hpp__sort_##_type(struct hist_entry *a, struct hist_entry *b) \ +static int64_t hpp__sort_##_type(struct perf_hpp_fmt *fmt __maybe_unused, \ + struct hist_entry *a, struct hist_entry *b) \ { \ return __hpp__sort(a, b, he_get_##_field); \ } @@ -309,7 +313,8 @@ static int hpp__entry_##_type(struct perf_hpp_fmt *fmt, \ } #define __HPP_SORT_ACC_FN(_type, _field) \ -static int64_t hpp__sort_##_type(struct hist_entry *a, struct hist_entry *b) \ +static int64_t hpp__sort_##_type(struct perf_hpp_fmt *fmt __maybe_unused, \ + struct hist_entry *a, struct hist_entry *b) \ { \ return __hpp__sort_acc(a, b, he_get_acc_##_field); \ } @@ -328,7 +333,8 @@ static int hpp__entry_##_type(struct perf_hpp_fmt *fmt, \ } #define __HPP_SORT_RAW_FN(_type, _field) \ -static int64_t hpp__sort_##_type(struct hist_entry *a, struct hist_entry *b) \ +static int64_t hpp__sort_##_type(struct perf_hpp_fmt *fmt __maybe_unused, \ + struct hist_entry *a, struct hist_entry *b) \ { \ return __hpp__sort(a, b, he_get_raw_##_field); \ } @@ -358,7 +364,8 @@ HPP_PERCENT_ACC_FNS(overhead_acc, period) HPP_RAW_FNS(samples, nr_events) HPP_RAW_FNS(period, period) -static int64_t hpp__nop_cmp(struct hist_entry *a __maybe_unused, +static int64_t hpp__nop_cmp(struct perf_hpp_fmt *fmt __maybe_unused, + struct hist_entry *a __maybe_unused, struct hist_entry *b __maybe_unused) { return 0; diff --git a/tools/perf/ui/progress.h b/tools/perf/ui/progress.h index f34f89eb607c..717d39d3052b 100644 --- a/tools/perf/ui/progress.h +++ b/tools/perf/ui/progress.h @@ -4,12 +4,12 @@ #include <linux/types.h> void ui_progress__finish(void); - + struct ui_progress { const char *title; u64 curr, next, step, total; }; - + void ui_progress__init(struct ui_progress *p, u64 total, const char *title); void ui_progress__update(struct ui_progress *p, u64 adv); diff --git a/tools/perf/ui/tui/helpline.c b/tools/perf/ui/tui/helpline.c index 1c8b9afd5d6e..88f5143a5981 100644 --- a/tools/perf/ui/tui/helpline.c +++ b/tools/perf/ui/tui/helpline.c @@ -9,6 +9,7 @@ #include "../libslang.h" char ui_helpline__last_msg[1024]; +bool tui_helpline__set; static void tui_helpline__pop(void) { @@ -35,6 +36,8 @@ static int tui_helpline__show(const char *format, va_list ap) sizeof(ui_helpline__last_msg) - backlog, format, ap); backlog += ret; + tui_helpline__set = true; + if (ui_helpline__last_msg[backlog - 1] == '\n') { ui_helpline__puts(ui_helpline__last_msg); SLsmg_refresh(); diff --git a/tools/perf/ui/tui/setup.c b/tools/perf/ui/tui/setup.c index 2f612562978c..b77e1d771363 100644 --- a/tools/perf/ui/tui/setup.c +++ b/tools/perf/ui/tui/setup.c @@ -1,5 +1,8 @@ #include <signal.h> #include <stdbool.h> +#ifdef HAVE_BACKTRACE_SUPPORT +#include <execinfo.h> +#endif #include "../../util/cache.h" #include "../../util/debug.h" @@ -14,6 +17,7 @@ static volatile int ui__need_resize; extern struct perf_error_ops perf_tui_eops; +extern bool tui_helpline__set; extern void hist_browser__init_hpp(void); @@ -88,6 +92,25 @@ int ui__getch(int delay_secs) return SLkp_getkey(); } +#ifdef HAVE_BACKTRACE_SUPPORT +static void ui__signal_backtrace(int sig) +{ + void *stackdump[32]; + size_t size; + + ui__exit(false); + psignal(sig, "perf"); + + printf("-------- backtrace --------\n"); + size = backtrace(stackdump, ARRAY_SIZE(stackdump)); + backtrace_symbols_fd(stackdump, size, STDOUT_FILENO); + + exit(0); +} +#else +# define ui__signal_backtrace ui__signal +#endif + static void ui__signal(int sig) { ui__exit(false); @@ -122,8 +145,8 @@ int ui__init(void) ui_browser__init(); tui_progress__init(); - signal(SIGSEGV, ui__signal); - signal(SIGFPE, ui__signal); + signal(SIGSEGV, ui__signal_backtrace); + signal(SIGFPE, ui__signal_backtrace); signal(SIGINT, ui__signal); signal(SIGQUIT, ui__signal); signal(SIGTERM, ui__signal); @@ -137,7 +160,7 @@ out: void ui__exit(bool wait_for_ok) { - if (wait_for_ok) + if (wait_for_ok && tui_helpline__set) ui__question_window("Fatal Error", ui_helpline__last_msg, "Press any key...", 0); diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index e5670f1af737..61bf9128e1f2 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -177,14 +177,17 @@ static int lock__parse(struct ins_operands *ops) goto out_free_ops; ops->locked.ins = ins__find(name); + free(name); + if (ops->locked.ins == NULL) goto out_free_ops; if (!ops->locked.ins->ops) return 0; - if (ops->locked.ins->ops->parse) - ops->locked.ins->ops->parse(ops->locked.ops); + if (ops->locked.ins->ops->parse && + ops->locked.ins->ops->parse(ops->locked.ops) < 0) + goto out_free_ops; return 0; @@ -208,6 +211,13 @@ static int lock__scnprintf(struct ins *ins, char *bf, size_t size, static void lock__delete(struct ins_operands *ops) { + struct ins *ins = ops->locked.ins; + + if (ins && ins->ops->free) + ins->ops->free(ops->locked.ops); + else + ins__delete(ops->locked.ops); + zfree(&ops->locked.ops); zfree(&ops->target.raw); zfree(&ops->target.name); @@ -229,7 +239,7 @@ static int mov__parse(struct ins_operands *ops) *s = '\0'; ops->source.raw = strdup(ops->raw); *s = ','; - + if (ops->source.raw == NULL) return -1; @@ -531,8 +541,8 @@ static void disasm_line__init_ins(struct disasm_line *dl) if (!dl->ins->ops) return; - if (dl->ins->ops->parse) - dl->ins->ops->parse(&dl->ops); + if (dl->ins->ops->parse && dl->ins->ops->parse(&dl->ops) < 0) + dl->ins = NULL; } static int disasm_line__parse(char *line, char **namep, char **rawp) @@ -1192,7 +1202,7 @@ static int symbol__get_source_line(struct symbol *sym, struct map *map, goto next; offset = start + i; - src_line->path = get_srcline(map->dso, offset); + src_line->path = get_srcline(map->dso, offset, NULL, false); insert_source_line(&tmp_root, src_line); next: diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 0784a9420528..cadbdc90a5cb 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -116,11 +116,6 @@ struct annotation { struct annotated_source *src; }; -struct sannotation { - struct annotation annotation; - struct symbol symbol; -}; - static inline struct sym_hist *annotation__histogram(struct annotation *notes, int idx) { return (((void *)¬es->src->histograms) + @@ -129,8 +124,7 @@ static inline struct sym_hist *annotation__histogram(struct annotation *notes, i static inline struct annotation *symbol__annotation(struct symbol *sym) { - struct sannotation *a = container_of(sym, struct sannotation, symbol); - return &a->annotation; + return (void *)sym - symbol_conf.priv_size; } int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, int evidx); diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index e8d79e5bfaf7..0c72680a977f 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -410,21 +410,18 @@ int perf_session__cache_build_ids(struct perf_session *session) { struct rb_node *nd; int ret; - char debugdir[PATH_MAX]; if (no_buildid_cache) return 0; - snprintf(debugdir, sizeof(debugdir), "%s", buildid_dir); - - if (mkdir(debugdir, 0755) != 0 && errno != EEXIST) + if (mkdir(buildid_dir, 0755) != 0 && errno != EEXIST) return -1; - ret = machine__cache_build_ids(&session->machines.host, debugdir); + ret = machine__cache_build_ids(&session->machines.host, buildid_dir); for (nd = rb_first(&session->machines.guests); nd; nd = rb_next(nd)) { struct machine *pos = rb_entry(nd, struct machine, rb_node); - ret |= machine__cache_build_ids(pos, debugdir); + ret |= machine__cache_build_ids(pos, buildid_dir); } return ret ? -1 : 0; } diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h index 5cf9e1b5989d..d04d770d90f6 100644 --- a/tools/perf/util/cache.h +++ b/tools/perf/util/cache.h @@ -71,7 +71,9 @@ extern char *perf_path(const char *fmt, ...) __attribute__((format (printf, 1, 2 extern char *perf_pathdup(const char *fmt, ...) __attribute__((format (printf, 1, 2))); +#ifndef __UCLIBC__ /* Matches the libc/libbsd function attribute so we declare this unconditionally: */ extern size_t strlcpy(char *dest, const char *src, size_t size); +#endif #endif /* __PERF_CACHE_H */ diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 38da69c8c1ff..14e7a123d43b 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -77,7 +77,7 @@ int parse_callchain_record_opt(const char *arg) ret = 0; } else pr_err("callchain: No more arguments " - "needed for -g fp\n"); + "needed for --call-graph fp\n"); break; #ifdef HAVE_DWARF_UNWIND_SUPPORT @@ -149,6 +149,10 @@ static int parse_callchain_sort_key(const char *value) callchain_param.key = CCKEY_ADDRESS; return 0; } + if (!strncmp(value, "branch", strlen(value))) { + callchain_param.branch_callstack = 1; + return 0; + } return -1; } @@ -815,7 +819,17 @@ char *callchain_list__sym_name(struct callchain_list *cl, int printed; if (cl->ms.sym) { - printed = scnprintf(bf, bfsize, "%s", cl->ms.sym->name); + if (callchain_param.key == CCKEY_ADDRESS && + cl->ms.map && !cl->srcline) + cl->srcline = get_srcline(cl->ms.map->dso, + map__rip_2objdump(cl->ms.map, + cl->ip), + cl->ms.sym, false); + if (cl->srcline) + printed = scnprintf(bf, bfsize, "%s %s", + cl->ms.sym->name, cl->srcline); + else + printed = scnprintf(bf, bfsize, "%s", cl->ms.sym->name); } else printed = scnprintf(bf, bfsize, "%#" PRIx64, cl->ip); @@ -827,3 +841,33 @@ char *callchain_list__sym_name(struct callchain_list *cl, return bf; } + +static void free_callchain_node(struct callchain_node *node) +{ + struct callchain_list *list, *tmp; + struct callchain_node *child; + struct rb_node *n; + + list_for_each_entry_safe(list, tmp, &node->val, list) { + list_del(&list->list); + free(list); + } + + n = rb_first(&node->rb_root_in); + while (n) { + child = container_of(n, struct callchain_node, rb_node_in); + n = rb_next(n); + rb_erase(&child->rb_node_in, &node->rb_root_in); + + free_callchain_node(child); + free(child); + } +} + +void free_callchain(struct callchain_root *root) +{ + if (!symbol_conf.use_callchain) + return; + + free_callchain_node(&root->node); +} diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 3e1ed15d11f1..c0ec1acc38e4 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -63,6 +63,7 @@ struct callchain_param { sort_chain_func_t sort; enum chain_order order; enum chain_key key; + bool branch_callstack; }; extern struct callchain_param callchain_param; @@ -70,6 +71,7 @@ extern struct callchain_param callchain_param; struct callchain_list { u64 ip; struct map_symbol ms; + char *srcline; struct list_head list; }; @@ -196,4 +198,6 @@ static inline int arch_skip_callchain_idx(struct thread *thread __maybe_unused, char *callchain_list__sym_name(struct callchain_list *cl, char *bf, size_t bfsize, bool show_dso); +void free_callchain(struct callchain_root *root); + #endif /* __PERF_CALLCHAIN_H */ diff --git a/tools/perf/util/color.c b/tools/perf/util/color.c index f4654183d391..55355b3d4f85 100644 --- a/tools/perf/util/color.c +++ b/tools/perf/util/color.c @@ -5,132 +5,6 @@ int perf_use_color_default = -1; -static int parse_color(const char *name, int len) -{ - static const char * const color_names[] = { - "normal", "black", "red", "green", "yellow", - "blue", "magenta", "cyan", "white" - }; - char *end; - int i; - - for (i = 0; i < (int)ARRAY_SIZE(color_names); i++) { - const char *str = color_names[i]; - if (!strncasecmp(name, str, len) && !str[len]) - return i - 1; - } - i = strtol(name, &end, 10); - if (end - name == len && i >= -1 && i <= 255) - return i; - return -2; -} - -static int parse_attr(const char *name, int len) -{ - static const int attr_values[] = { 1, 2, 4, 5, 7 }; - static const char * const attr_names[] = { - "bold", "dim", "ul", "blink", "reverse" - }; - unsigned int i; - - for (i = 0; i < ARRAY_SIZE(attr_names); i++) { - const char *str = attr_names[i]; - if (!strncasecmp(name, str, len) && !str[len]) - return attr_values[i]; - } - return -1; -} - -void color_parse(const char *value, const char *var, char *dst) -{ - color_parse_mem(value, strlen(value), var, dst); -} - -void color_parse_mem(const char *value, int value_len, const char *var, - char *dst) -{ - const char *ptr = value; - int len = value_len; - int attr = -1; - int fg = -2; - int bg = -2; - - if (!strncasecmp(value, "reset", len)) { - strcpy(dst, PERF_COLOR_RESET); - return; - } - - /* [fg [bg]] [attr] */ - while (len > 0) { - const char *word = ptr; - int val, wordlen = 0; - - while (len > 0 && !isspace(word[wordlen])) { - wordlen++; - len--; - } - - ptr = word + wordlen; - while (len > 0 && isspace(*ptr)) { - ptr++; - len--; - } - - val = parse_color(word, wordlen); - if (val >= -1) { - if (fg == -2) { - fg = val; - continue; - } - if (bg == -2) { - bg = val; - continue; - } - goto bad; - } - val = parse_attr(word, wordlen); - if (val < 0 || attr != -1) - goto bad; - attr = val; - } - - if (attr >= 0 || fg >= 0 || bg >= 0) { - int sep = 0; - - *dst++ = '\033'; - *dst++ = '['; - if (attr >= 0) { - *dst++ = '0' + attr; - sep++; - } - if (fg >= 0) { - if (sep++) - *dst++ = ';'; - if (fg < 8) { - *dst++ = '3'; - *dst++ = '0' + fg; - } else { - dst += sprintf(dst, "38;5;%d", fg); - } - } - if (bg >= 0) { - if (sep++) - *dst++ = ';'; - if (bg < 8) { - *dst++ = '4'; - *dst++ = '0' + bg; - } else { - dst += sprintf(dst, "48;5;%d", bg); - } - } - *dst++ = 'm'; - } - *dst = 0; - return; -bad: - die("bad color value '%.*s' for variable '%s'", value_len, value, var); -} - int perf_config_colorbool(const char *var, const char *value, int stdout_is_tty) { if (value) { diff --git a/tools/perf/util/color.h b/tools/perf/util/color.h index 0a594b8a0c26..38146f922c54 100644 --- a/tools/perf/util/color.h +++ b/tools/perf/util/color.h @@ -30,8 +30,6 @@ extern int perf_use_color_default; int perf_color_default_config(const char *var, const char *value, void *cb); int perf_config_colorbool(const char *var, const char *value, int stdout_is_tty); -void color_parse(const char *value, const char *var, char *dst); -void color_parse_mem(const char *value, int len, const char *var, char *dst); int color_vsnprintf(char *bf, size_t size, const char *color, const char *fmt, va_list args); int color_vfprintf(FILE *fp, const char *color, const char *fmt, va_list args); diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c index 57ff826f150b..e18f653cd7db 100644 --- a/tools/perf/util/config.c +++ b/tools/perf/util/config.c @@ -522,7 +522,7 @@ static int buildid_dir_command_config(const char *var, const char *value, const char *v; /* same dir for all commands */ - if (!prefixcmp(var, "buildid.") && !strcmp(var + 8, "dir")) { + if (!strcmp(var, "buildid.dir")) { v = perf_config_dirname(var, value); if (!v) return -1; @@ -539,12 +539,14 @@ static void check_buildid_dir_config(void) perf_config(buildid_dir_command_config, &c); } -void set_buildid_dir(void) +void set_buildid_dir(const char *dir) { - buildid_dir[0] = '\0'; + if (dir) + scnprintf(buildid_dir, MAXPATHLEN-1, "%s", dir); /* try config file */ - check_buildid_dir_config(); + if (buildid_dir[0] == '\0') + check_buildid_dir_config(); /* default to $HOME/.debug */ if (buildid_dir[0] == '\0') { diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index ba357f3226c6..ad60b2f20258 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -19,13 +19,14 @@ int verbose; bool dump_trace = false, quiet = false; int debug_ordered_events; +static int redirect_to_stderr; static int _eprintf(int level, int var, const char *fmt, va_list args) { int ret = 0; if (var >= level) { - if (use_browser >= 1) + if (use_browser >= 1 && !redirect_to_stderr) ui_helpline__vshow(fmt, args); else ret = vfprintf(stderr, fmt, args); @@ -145,6 +146,7 @@ static struct debug_variable { } debug_variables[] = { { .name = "verbose", .ptr = &verbose }, { .name = "ordered-events", .ptr = &debug_ordered_events}, + { .name = "stderr", .ptr = &redirect_to_stderr}, { .name = NULL, } }; diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 09b9e8d3fcf7..c4ffe2bd0738 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -242,6 +242,7 @@ struct events_stats { u32 nr_invalid_chains; u32 nr_unknown_id; u32 nr_unprocessable_samples; + u32 nr_unordered_events; }; struct attr_event { diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index cfbe2b99b9aa..28b8ce86bf12 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -8,6 +8,7 @@ */ #include "util.h" #include <api/fs/debugfs.h> +#include <api/fs/fs.h> #include <poll.h> #include "cpumap.h" #include "thread_map.h" @@ -24,6 +25,7 @@ #include <linux/bitops.h> #include <linux/hash.h> +#include <linux/log2.h> static void perf_evlist__mmap_put(struct perf_evlist *evlist, int idx); static void __perf_evlist__munmap(struct perf_evlist *evlist, int idx); @@ -892,10 +894,24 @@ out_unmap: static size_t perf_evlist__mmap_size(unsigned long pages) { - /* 512 kiB: default amount of unprivileged mlocked memory */ - if (pages == UINT_MAX) - pages = (512 * 1024) / page_size; - else if (!is_power_of_2(pages)) + if (pages == UINT_MAX) { + int max; + + if (sysctl__read_int("kernel/perf_event_mlock_kb", &max) < 0) { + /* + * Pick a once upon a time good value, i.e. things look + * strange since we can't read a sysctl value, but lets not + * die yet... + */ + max = 512; + } else { + max -= (page_size / 1024); + } + + pages = (max * 1024) / page_size; + if (!is_power_of_2(pages)) + pages = rounddown_pow_of_two(pages); + } else if (!is_power_of_2(pages)) return 0; return (pages + 1) * page_size; @@ -932,7 +948,7 @@ static long parse_pages_arg(const char *str, unsigned long min, /* leave number of pages at 0 */ } else if (!is_power_of_2(pages)) { /* round pages up to next power of 2 */ - pages = next_pow2_l(pages); + pages = roundup_pow_of_two(pages); if (!pages) return -EINVAL; pr_info("rounding mmap pages size to %lu bytes (%lu pages)\n", @@ -1420,33 +1436,6 @@ size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp) return printed + fprintf(fp, "\n"); } -int perf_evlist__strerror_tp(struct perf_evlist *evlist __maybe_unused, - int err, char *buf, size_t size) -{ - char sbuf[128]; - - switch (err) { - case ENOENT: - scnprintf(buf, size, "%s", - "Error:\tUnable to find debugfs\n" - "Hint:\tWas your kernel was compiled with debugfs support?\n" - "Hint:\tIs the debugfs filesystem mounted?\n" - "Hint:\tTry 'sudo mount -t debugfs nodev /sys/kernel/debug'"); - break; - case EACCES: - scnprintf(buf, size, - "Error:\tNo permissions to read %s/tracing/events/raw_syscalls\n" - "Hint:\tTry 'sudo mount -o remount,mode=755 %s'\n", - debugfs_mountpoint, debugfs_mountpoint); - break; - default: - scnprintf(buf, size, "%s", strerror_r(err, sbuf, sizeof(sbuf))); - break; - } - - return 0; -} - int perf_evlist__strerror_open(struct perf_evlist *evlist __maybe_unused, int err, char *buf, size_t size) { @@ -1483,6 +1472,37 @@ int perf_evlist__strerror_open(struct perf_evlist *evlist __maybe_unused, return 0; } +int perf_evlist__strerror_mmap(struct perf_evlist *evlist, int err, char *buf, size_t size) +{ + char sbuf[STRERR_BUFSIZE], *emsg = strerror_r(err, sbuf, sizeof(sbuf)); + int pages_attempted = evlist->mmap_len / 1024, pages_max_per_user, printed = 0; + + switch (err) { + case EPERM: + sysctl__read_int("kernel/perf_event_mlock_kb", &pages_max_per_user); + printed += scnprintf(buf + printed, size - printed, + "Error:\t%s.\n" + "Hint:\tCheck /proc/sys/kernel/perf_event_mlock_kb (%d kB) setting.\n" + "Hint:\tTried using %zd kB.\n", + emsg, pages_max_per_user, pages_attempted); + + if (pages_attempted >= pages_max_per_user) { + printed += scnprintf(buf + printed, size - printed, + "Hint:\tTry 'sudo sh -c \"echo %d > /proc/sys/kernel/perf_event_mlock_kb\"', or\n", + pages_max_per_user + pages_attempted); + } + + printed += scnprintf(buf + printed, size - printed, + "Hint:\tTry using a smaller -m/--mmap-pages value."); + break; + default: + scnprintf(buf, size, "%s", emsg); + break; + } + + return 0; +} + void perf_evlist__to_front(struct perf_evlist *evlist, struct perf_evsel *move_evsel) { diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 649b0c597283..c94a9e03ecf1 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -183,8 +183,8 @@ static inline struct perf_evsel *perf_evlist__last(struct perf_evlist *evlist) size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp); -int perf_evlist__strerror_tp(struct perf_evlist *evlist, int err, char *buf, size_t size); int perf_evlist__strerror_open(struct perf_evlist *evlist, int err, char *buf, size_t size); +int perf_evlist__strerror_mmap(struct perf_evlist *evlist, int err, char *buf, size_t size); static inline unsigned int perf_mmap__read_head(struct perf_mmap *mm) { diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index f2dc91fb87fa..1e90c8557ede 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -876,9 +876,8 @@ void perf_evsel__delete(struct perf_evsel *evsel) free(evsel); } -static inline void compute_deltas(struct perf_evsel *evsel, - int cpu, - struct perf_counts_values *count) +void perf_evsel__compute_deltas(struct perf_evsel *evsel, int cpu, + struct perf_counts_values *count) { struct perf_counts_values tmp; @@ -898,81 +897,60 @@ static inline void compute_deltas(struct perf_evsel *evsel, count->run = count->run - tmp.run; } -int __perf_evsel__read_on_cpu(struct perf_evsel *evsel, - int cpu, int thread, bool scale) +void perf_counts_values__scale(struct perf_counts_values *count, + bool scale, s8 *pscaled) { - struct perf_counts_values count; - size_t nv = scale ? 3 : 1; - - if (FD(evsel, cpu, thread) < 0) - return -EINVAL; - - if (evsel->counts == NULL && perf_evsel__alloc_counts(evsel, cpu + 1) < 0) - return -ENOMEM; - - if (readn(FD(evsel, cpu, thread), &count, nv * sizeof(u64)) < 0) - return -errno; - - compute_deltas(evsel, cpu, &count); + s8 scaled = 0; if (scale) { - if (count.run == 0) - count.val = 0; - else if (count.run < count.ena) - count.val = (u64)((double)count.val * count.ena / count.run + 0.5); + if (count->run == 0) { + scaled = -1; + count->val = 0; + } else if (count->run < count->ena) { + scaled = 1; + count->val = (u64)((double) count->val * count->ena / count->run + 0.5); + } } else - count.ena = count.run = 0; + count->ena = count->run = 0; - evsel->counts->cpu[cpu] = count; - return 0; + if (pscaled) + *pscaled = scaled; } -int __perf_evsel__read(struct perf_evsel *evsel, - int ncpus, int nthreads, bool scale) +int perf_evsel__read_cb(struct perf_evsel *evsel, int cpu, int thread, + perf_evsel__read_cb_t cb) { - size_t nv = scale ? 3 : 1; - int cpu, thread; - struct perf_counts_values *aggr = &evsel->counts->aggr, count; + struct perf_counts_values count; - if (evsel->system_wide) - nthreads = 1; + memset(&count, 0, sizeof(count)); - aggr->val = aggr->ena = aggr->run = 0; + if (FD(evsel, cpu, thread) < 0) + return -EINVAL; - for (cpu = 0; cpu < ncpus; cpu++) { - for (thread = 0; thread < nthreads; thread++) { - if (FD(evsel, cpu, thread) < 0) - continue; + if (readn(FD(evsel, cpu, thread), &count, sizeof(count)) < 0) + return -errno; - if (readn(FD(evsel, cpu, thread), - &count, nv * sizeof(u64)) < 0) - return -errno; + return cb(evsel, cpu, thread, &count); +} - aggr->val += count.val; - if (scale) { - aggr->ena += count.ena; - aggr->run += count.run; - } - } - } +int __perf_evsel__read_on_cpu(struct perf_evsel *evsel, + int cpu, int thread, bool scale) +{ + struct perf_counts_values count; + size_t nv = scale ? 3 : 1; - compute_deltas(evsel, -1, aggr); + if (FD(evsel, cpu, thread) < 0) + return -EINVAL; - evsel->counts->scaled = 0; - if (scale) { - if (aggr->run == 0) { - evsel->counts->scaled = -1; - aggr->val = 0; - return 0; - } + if (evsel->counts == NULL && perf_evsel__alloc_counts(evsel, cpu + 1) < 0) + return -ENOMEM; - if (aggr->run < aggr->ena) { - evsel->counts->scaled = 1; - aggr->val = (u64)((double)aggr->val * aggr->ena / aggr->run + 0.5); - } - } else - aggr->ena = aggr->run = 0; + if (readn(FD(evsel, cpu, thread), &count, nv * sizeof(u64)) < 0) + return -errno; + perf_evsel__compute_deltas(evsel, cpu, &count); + perf_counts_values__scale(&count, scale, NULL); + evsel->counts->cpu[cpu] = count; return 0; } diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 979790951bfb..38622747d130 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -73,6 +73,7 @@ struct perf_evsel { char *name; double scale; const char *unit; + bool snapshot; struct event_format *tp_format; union { void *priv; @@ -91,6 +92,8 @@ struct perf_evsel { bool immediate; bool system_wide; bool tracking; + bool per_pkg; + unsigned long *per_pkg_mask; /* parse modifier helper */ int exclude_GH; int nr_members; @@ -110,6 +113,12 @@ struct thread_map; struct perf_evlist; struct record_opts; +void perf_counts_values__scale(struct perf_counts_values *count, + bool scale, s8 *pscaled); + +void perf_evsel__compute_deltas(struct perf_evsel *evsel, int cpu, + struct perf_counts_values *count); + int perf_evsel__object_config(size_t object_size, int (*init)(struct perf_evsel *evsel), void (*fini)(struct perf_evsel *evsel)); @@ -227,6 +236,13 @@ static inline bool perf_evsel__match2(struct perf_evsel *e1, (a)->attr.type == (b)->attr.type && \ (a)->attr.config == (b)->attr.config) +typedef int (perf_evsel__read_cb_t)(struct perf_evsel *evsel, + int cpu, int thread, + struct perf_counts_values *count); + +int perf_evsel__read_cb(struct perf_evsel *evsel, int cpu, int thread, + perf_evsel__read_cb_t cb); + int __perf_evsel__read_on_cpu(struct perf_evsel *evsel, int cpu, int thread, bool scale); @@ -256,35 +272,6 @@ static inline int perf_evsel__read_on_cpu_scaled(struct perf_evsel *evsel, return __perf_evsel__read_on_cpu(evsel, cpu, thread, true); } -int __perf_evsel__read(struct perf_evsel *evsel, int ncpus, int nthreads, - bool scale); - -/** - * perf_evsel__read - Read the aggregate results on all CPUs - * - * @evsel - event selector to read value - * @ncpus - Number of cpus affected, from zero - * @nthreads - Number of threads affected, from zero - */ -static inline int perf_evsel__read(struct perf_evsel *evsel, - int ncpus, int nthreads) -{ - return __perf_evsel__read(evsel, ncpus, nthreads, false); -} - -/** - * perf_evsel__read_scaled - Read the aggregate results on all CPUs, scaled - * - * @evsel - event selector to read value - * @ncpus - Number of cpus affected, from zero - * @nthreads - Number of threads affected, from zero - */ -static inline int perf_evsel__read_scaled(struct perf_evsel *evsel, - int ncpus, int nthreads) -{ - return __perf_evsel__read(evsel, ncpus, nthreads, true); -} - int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, struct perf_sample *sample); diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 6e88b9e395df..70b48a65064c 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -6,6 +6,7 @@ #include "evlist.h" #include "evsel.h" #include "annotate.h" +#include "ui/progress.h" #include <math.h> static bool hists__filter_entry_by_dso(struct hists *hists, @@ -240,6 +241,20 @@ static bool hists__decay_entry(struct hists *hists, struct hist_entry *he) return he->stat.period == 0; } +static void hists__delete_entry(struct hists *hists, struct hist_entry *he) +{ + rb_erase(&he->rb_node, &hists->entries); + + if (sort__need_collapse) + rb_erase(&he->rb_node_in, &hists->entries_collapsed); + + --hists->nr_entries; + if (!he->filtered) + --hists->nr_non_filtered_entries; + + hist_entry__delete(he); +} + void hists__decay_entries(struct hists *hists, bool zap_user, bool zap_kernel) { struct rb_node *next = rb_first(&hists->entries); @@ -257,16 +272,7 @@ void hists__decay_entries(struct hists *hists, bool zap_user, bool zap_kernel) (zap_kernel && n->level != '.') || hists__decay_entry(hists, n)) && !n->used) { - rb_erase(&n->rb_node, &hists->entries); - - if (sort__need_collapse) - rb_erase(&n->rb_node_in, &hists->entries_collapsed); - - --hists->nr_entries; - if (!n->filtered) - --hists->nr_non_filtered_entries; - - hist_entry__free(n); + hists__delete_entry(hists, n); } } } @@ -280,16 +286,7 @@ void hists__delete_entries(struct hists *hists) n = rb_entry(next, struct hist_entry, rb_node); next = rb_next(&n->rb_node); - rb_erase(&n->rb_node, &hists->entries); - - if (sort__need_collapse) - rb_erase(&n->rb_node_in, &hists->entries_collapsed); - - --hists->nr_entries; - if (!n->filtered) - --hists->nr_non_filtered_entries; - - hist_entry__free(n); + hists__delete_entry(hists, n); } } @@ -303,7 +300,7 @@ static struct hist_entry *hist_entry__new(struct hist_entry *template, size_t callchain_size = 0; struct hist_entry *he; - if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) + if (symbol_conf.use_callchain) callchain_size = sizeof(struct callchain_root); he = zalloc(sizeof(*he) + callchain_size); @@ -432,6 +429,8 @@ static struct hist_entry *add_hist_entry(struct hists *hists, if (!he) return NULL; + hists->nr_entries++; + rb_link_node(&he->rb_node_in, parent, p); rb_insert_color(&he->rb_node_in, hists->entries_in); out: @@ -736,7 +735,7 @@ iter_add_single_cumulative_entry(struct hist_entry_iter *iter, iter->he = he; he_cache[iter->curr++] = he; - callchain_append(he->callchain, &callchain_cursor, sample->period); + hist_entry__append_callchain(he, sample); /* * We need to re-initialize the cursor since callchain_append() @@ -809,7 +808,8 @@ iter_add_next_cumulative_entry(struct hist_entry_iter *iter, iter->he = he; he_cache[iter->curr++] = he; - callchain_append(he->callchain, &cursor, sample->period); + if (symbol_conf.use_callchain) + callchain_append(he->callchain, &cursor, sample->period); return 0; } @@ -913,7 +913,7 @@ hist_entry__cmp(struct hist_entry *left, struct hist_entry *right) if (perf_hpp__should_skip(fmt)) continue; - cmp = fmt->cmp(left, right); + cmp = fmt->cmp(fmt, left, right); if (cmp) break; } @@ -931,7 +931,7 @@ hist_entry__collapse(struct hist_entry *left, struct hist_entry *right) if (perf_hpp__should_skip(fmt)) continue; - cmp = fmt->collapse(left, right); + cmp = fmt->collapse(fmt, left, right); if (cmp) break; } @@ -939,12 +939,13 @@ hist_entry__collapse(struct hist_entry *left, struct hist_entry *right) return cmp; } -void hist_entry__free(struct hist_entry *he) +void hist_entry__delete(struct hist_entry *he) { zfree(&he->branch_info); zfree(&he->mem_info); zfree(&he->stat_acc); free_srcline(he->srcline); + free_callchain(he->callchain); free(he); } @@ -978,7 +979,7 @@ static bool hists__collapse_insert_entry(struct hists *hists __maybe_unused, iter->callchain, he->callchain); } - hist_entry__free(he); + hist_entry__delete(he); return false; } @@ -987,6 +988,7 @@ static bool hists__collapse_insert_entry(struct hists *hists __maybe_unused, else p = &(*p)->rb_right; } + hists->nr_entries++; rb_link_node(&he->rb_node_in, parent, p); rb_insert_color(&he->rb_node_in, root); @@ -1024,7 +1026,10 @@ void hists__collapse_resort(struct hists *hists, struct ui_progress *prog) if (!sort__need_collapse) return; + hists->nr_entries = 0; + root = hists__get_rotate_entries_in(hists); + next = rb_first(root); while (next) { @@ -1056,7 +1061,7 @@ static int hist_entry__sort(struct hist_entry *a, struct hist_entry *b) if (perf_hpp__should_skip(fmt)) continue; - cmp = fmt->sort(a, b); + cmp = fmt->sort(fmt, a, b); if (cmp) break; } @@ -1119,7 +1124,7 @@ static void __hists__insert_output_entry(struct rb_root *entries, rb_insert_color(&he->rb_node, entries); } -void hists__output_resort(struct hists *hists) +void hists__output_resort(struct hists *hists, struct ui_progress *prog) { struct rb_root *root; struct rb_node *next; @@ -1148,6 +1153,9 @@ void hists__output_resort(struct hists *hists) if (!n->filtered) hists__calc_col_len(hists, n); + + if (prog) + ui_progress__update(prog, 1); } } diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index d0ef9a19a744..2b690d028907 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -119,9 +119,9 @@ int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry *right); int hist_entry__transaction_len(void); int hist_entry__sort_snprintf(struct hist_entry *he, char *bf, size_t size, struct hists *hists); -void hist_entry__free(struct hist_entry *); +void hist_entry__delete(struct hist_entry *he); -void hists__output_resort(struct hists *hists); +void hists__output_resort(struct hists *hists, struct ui_progress *prog); void hists__collapse_resort(struct hists *hists, struct ui_progress *prog); void hists__decay_entries(struct hists *hists, bool zap_user, bool zap_kernel); @@ -195,9 +195,12 @@ struct perf_hpp_fmt { struct hist_entry *he); int (*entry)(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, struct hist_entry *he); - int64_t (*cmp)(struct hist_entry *a, struct hist_entry *b); - int64_t (*collapse)(struct hist_entry *a, struct hist_entry *b); - int64_t (*sort)(struct hist_entry *a, struct hist_entry *b); + int64_t (*cmp)(struct perf_hpp_fmt *fmt, + struct hist_entry *a, struct hist_entry *b); + int64_t (*collapse)(struct perf_hpp_fmt *fmt, + struct hist_entry *a, struct hist_entry *b); + int64_t (*sort)(struct perf_hpp_fmt *fmt, + struct hist_entry *a, struct hist_entry *b); struct list_head list; struct list_head sort_list; diff --git a/tools/perf/util/hweight.c b/tools/perf/util/hweight.c deleted file mode 100644 index 5c1d0d099f0d..000000000000 --- a/tools/perf/util/hweight.c +++ /dev/null @@ -1,31 +0,0 @@ -#include <linux/bitops.h> - -/** - * hweightN - returns the hamming weight of a N-bit word - * @x: the word to weigh - * - * The Hamming Weight of a number is the total number of bits set in it. - */ - -unsigned int hweight32(unsigned int w) -{ - unsigned int res = w - ((w >> 1) & 0x55555555); - res = (res & 0x33333333) + ((res >> 2) & 0x33333333); - res = (res + (res >> 4)) & 0x0F0F0F0F; - res = res + (res >> 8); - return (res + (res >> 16)) & 0x000000FF; -} - -unsigned long hweight64(__u64 w) -{ -#if BITS_PER_LONG == 32 - return hweight32((unsigned int)(w >> 32)) + hweight32((unsigned int)w); -#elif BITS_PER_LONG == 64 - __u64 res = w - ((w >> 1) & 0x5555555555555555ul); - res = (res & 0x3333333333333333ul) + ((res >> 2) & 0x3333333333333333ul); - res = (res + (res >> 4)) & 0x0F0F0F0F0F0F0F0Ful; - res = res + (res >> 8); - res = res + (res >> 16); - return (res + (res >> 32)) & 0x00000000000000FFul; -#endif -} diff --git a/tools/perf/util/include/asm/hash.h b/tools/perf/util/include/asm/hash.h deleted file mode 100644 index d82b170bb216..000000000000 --- a/tools/perf/util/include/asm/hash.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __ASM_GENERIC_HASH_H -#define __ASM_GENERIC_HASH_H - -/* Stub */ - -#endif /* __ASM_GENERIC_HASH_H */ diff --git a/tools/perf/util/include/asm/hweight.h b/tools/perf/util/include/asm/hweight.h deleted file mode 100644 index 36cf26d434a5..000000000000 --- a/tools/perf/util/include/asm/hweight.h +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef PERF_HWEIGHT_H -#define PERF_HWEIGHT_H - -#include <linux/types.h> -unsigned int hweight32(unsigned int w); -unsigned long hweight64(__u64 w); - -#endif /* PERF_HWEIGHT_H */ diff --git a/tools/perf/util/include/linux/bitops.h b/tools/perf/util/include/linux/bitops.h deleted file mode 100644 index c3294163de17..000000000000 --- a/tools/perf/util/include/linux/bitops.h +++ /dev/null @@ -1,162 +0,0 @@ -#ifndef _PERF_LINUX_BITOPS_H_ -#define _PERF_LINUX_BITOPS_H_ - -#include <linux/kernel.h> -#include <linux/compiler.h> -#include <asm/hweight.h> - -#ifndef __WORDSIZE -#define __WORDSIZE (__SIZEOF_LONG__ * 8) -#endif - -#define BITS_PER_LONG __WORDSIZE -#define BITS_PER_BYTE 8 -#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long)) -#define BITS_TO_U64(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(u64)) -#define BITS_TO_U32(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(u32)) -#define BITS_TO_BYTES(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE) -#define BIT_WORD(nr) ((nr) / BITS_PER_LONG) -#define BIT_MASK(nr) (1UL << ((nr) % BITS_PER_LONG)) - -#define for_each_set_bit(bit, addr, size) \ - for ((bit) = find_first_bit((addr), (size)); \ - (bit) < (size); \ - (bit) = find_next_bit((addr), (size), (bit) + 1)) - -/* same as for_each_set_bit() but use bit as value to start with */ -#define for_each_set_bit_from(bit, addr, size) \ - for ((bit) = find_next_bit((addr), (size), (bit)); \ - (bit) < (size); \ - (bit) = find_next_bit((addr), (size), (bit) + 1)) - -static inline void set_bit(int nr, unsigned long *addr) -{ - addr[nr / BITS_PER_LONG] |= 1UL << (nr % BITS_PER_LONG); -} - -static inline void clear_bit(int nr, unsigned long *addr) -{ - addr[nr / BITS_PER_LONG] &= ~(1UL << (nr % BITS_PER_LONG)); -} - -static __always_inline int test_bit(unsigned int nr, const unsigned long *addr) -{ - return ((1UL << (nr % BITS_PER_LONG)) & - (((unsigned long *)addr)[nr / BITS_PER_LONG])) != 0; -} - -static inline unsigned long hweight_long(unsigned long w) -{ - return sizeof(w) == 4 ? hweight32(w) : hweight64(w); -} - -#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG) - -/** - * __ffs - find first bit in word. - * @word: The word to search - * - * Undefined if no bit exists, so code should check against 0 first. - */ -static __always_inline unsigned long __ffs(unsigned long word) -{ - int num = 0; - -#if BITS_PER_LONG == 64 - if ((word & 0xffffffff) == 0) { - num += 32; - word >>= 32; - } -#endif - if ((word & 0xffff) == 0) { - num += 16; - word >>= 16; - } - if ((word & 0xff) == 0) { - num += 8; - word >>= 8; - } - if ((word & 0xf) == 0) { - num += 4; - word >>= 4; - } - if ((word & 0x3) == 0) { - num += 2; - word >>= 2; - } - if ((word & 0x1) == 0) - num += 1; - return num; -} - -typedef const unsigned long __attribute__((__may_alias__)) long_alias_t; - -/* - * Find the first set bit in a memory region. - */ -static inline unsigned long -find_first_bit(const unsigned long *addr, unsigned long size) -{ - long_alias_t *p = (long_alias_t *) addr; - unsigned long result = 0; - unsigned long tmp; - - while (size & ~(BITS_PER_LONG-1)) { - if ((tmp = *(p++))) - goto found; - result += BITS_PER_LONG; - size -= BITS_PER_LONG; - } - if (!size) - return result; - - tmp = (*p) & (~0UL >> (BITS_PER_LONG - size)); - if (tmp == 0UL) /* Are any bits set? */ - return result + size; /* Nope. */ -found: - return result + __ffs(tmp); -} - -/* - * Find the next set bit in a memory region. - */ -static inline unsigned long -find_next_bit(const unsigned long *addr, unsigned long size, unsigned long offset) -{ - const unsigned long *p = addr + BITOP_WORD(offset); - unsigned long result = offset & ~(BITS_PER_LONG-1); - unsigned long tmp; - - if (offset >= size) - return size; - size -= result; - offset %= BITS_PER_LONG; - if (offset) { - tmp = *(p++); - tmp &= (~0UL << offset); - if (size < BITS_PER_LONG) - goto found_first; - if (tmp) - goto found_middle; - size -= BITS_PER_LONG; - result += BITS_PER_LONG; - } - while (size & ~(BITS_PER_LONG-1)) { - if ((tmp = *(p++))) - goto found_middle; - result += BITS_PER_LONG; - size -= BITS_PER_LONG; - } - if (!size) - return result; - tmp = *p; - -found_first: - tmp &= (~0UL >> (BITS_PER_LONG - size)); - if (tmp == 0UL) /* Are any bits set? */ - return result + size; /* Nope. */ -found_middle: - return result + __ffs(tmp); -} - -#endif diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index d97309c87bd6..1bca3a9f2b16 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -12,6 +12,7 @@ #include <stdbool.h> #include <symbol/kallsyms.h> #include "unwind.h" +#include "linux/hash.h" static void dsos__init(struct dsos *dsos) { @@ -388,7 +389,6 @@ static struct thread *__machine__findnew_thread(struct machine *machine, if (th != NULL) { rb_link_node(&th->rb_node, parent, p); rb_insert_color(&th->rb_node, &machine->threads); - machine->last_match = th; /* * We have to initialize map_groups separately @@ -399,9 +399,12 @@ static struct thread *__machine__findnew_thread(struct machine *machine, * leader and that would screwed the rb tree. */ if (thread__init_map_groups(th, machine)) { + rb_erase(&th->rb_node, &machine->threads); thread__delete(th); return NULL; } + + machine->last_match = th; } return th; @@ -1106,8 +1109,8 @@ static int machine__process_kernel_mmap_event(struct machine *machine, if (__machine__create_kernel_maps(machine, kernel) < 0) goto out_problem; - if (strstr(dso->long_name, "vmlinux")) - dso__set_short_name(dso, "[kernel.vmlinux]", false); + if (strstr(kernel->long_name, "vmlinux")) + dso__set_short_name(kernel, "[kernel.vmlinux]", false); machine__set_kernel_mmap_len(machine, event); @@ -1384,15 +1387,46 @@ struct mem_info *sample__resolve_mem(struct perf_sample *sample, static int add_callchain_ip(struct thread *thread, struct symbol **parent, struct addr_location *root_al, - int cpumode, + bool branch_history, u64 ip) { struct addr_location al; al.filtered = 0; al.sym = NULL; - thread__find_addr_location(thread, cpumode, MAP__FUNCTION, + if (branch_history) + thread__find_cpumode_addr_location(thread, MAP__FUNCTION, + ip, &al); + else { + u8 cpumode = PERF_RECORD_MISC_USER; + + if (ip >= PERF_CONTEXT_MAX) { + switch (ip) { + case PERF_CONTEXT_HV: + cpumode = PERF_RECORD_MISC_HYPERVISOR; + break; + case PERF_CONTEXT_KERNEL: + cpumode = PERF_RECORD_MISC_KERNEL; + break; + case PERF_CONTEXT_USER: + cpumode = PERF_RECORD_MISC_USER; + break; + default: + pr_debug("invalid callchain context: " + "%"PRId64"\n", (s64) ip); + /* + * It seems the callchain is corrupted. + * Discard all. + */ + callchain_cursor_reset(&callchain_cursor); + return 1; + } + return 0; + } + thread__find_addr_location(thread, cpumode, MAP__FUNCTION, ip, &al); + } + if (al.sym != NULL) { if (sort__has_parent && !*parent && symbol__match_regex(al.sym, &parent_regex)) @@ -1427,33 +1461,132 @@ struct branch_info *sample__resolve_bstack(struct perf_sample *sample, return bi; } +#define CHASHSZ 127 +#define CHASHBITS 7 +#define NO_ENTRY 0xff + +#define PERF_MAX_BRANCH_DEPTH 127 + +/* Remove loops. */ +static int remove_loops(struct branch_entry *l, int nr) +{ + int i, j, off; + unsigned char chash[CHASHSZ]; + + memset(chash, NO_ENTRY, sizeof(chash)); + + BUG_ON(PERF_MAX_BRANCH_DEPTH > 255); + + for (i = 0; i < nr; i++) { + int h = hash_64(l[i].from, CHASHBITS) % CHASHSZ; + + /* no collision handling for now */ + if (chash[h] == NO_ENTRY) { + chash[h] = i; + } else if (l[chash[h]].from == l[i].from) { + bool is_loop = true; + /* check if it is a real loop */ + off = 0; + for (j = chash[h]; j < i && i + off < nr; j++, off++) + if (l[j].from != l[i + off].from) { + is_loop = false; + break; + } + if (is_loop) { + memmove(l + i, l + i + off, + (nr - (i + off)) * sizeof(*l)); + nr -= off; + } + } + } + return nr; +} + static int thread__resolve_callchain_sample(struct thread *thread, struct ip_callchain *chain, + struct branch_stack *branch, struct symbol **parent, struct addr_location *root_al, int max_stack) { - u8 cpumode = PERF_RECORD_MISC_USER; int chain_nr = min(max_stack, (int)chain->nr); - int i; - int j; - int err; - int skip_idx __maybe_unused; + int i, j, err; + int skip_idx = -1; + int first_call = 0; + + /* + * Based on DWARF debug information, some architectures skip + * a callchain entry saved by the kernel. + */ + if (chain->nr < PERF_MAX_STACK_DEPTH) + skip_idx = arch_skip_callchain_idx(thread, chain); callchain_cursor_reset(&callchain_cursor); + /* + * Add branches to call stack for easier browsing. This gives + * more context for a sample than just the callers. + * + * This uses individual histograms of paths compared to the + * aggregated histograms the normal LBR mode uses. + * + * Limitations for now: + * - No extra filters + * - No annotations (should annotate somehow) + */ + + if (branch && callchain_param.branch_callstack) { + int nr = min(max_stack, (int)branch->nr); + struct branch_entry be[nr]; + + if (branch->nr > PERF_MAX_BRANCH_DEPTH) { + pr_warning("corrupted branch chain. skipping...\n"); + goto check_calls; + } + + for (i = 0; i < nr; i++) { + if (callchain_param.order == ORDER_CALLEE) { + be[i] = branch->entries[i]; + /* + * Check for overlap into the callchain. + * The return address is one off compared to + * the branch entry. To adjust for this + * assume the calling instruction is not longer + * than 8 bytes. + */ + if (i == skip_idx || + chain->ips[first_call] >= PERF_CONTEXT_MAX) + first_call++; + else if (be[i].from < chain->ips[first_call] && + be[i].from >= chain->ips[first_call] - 8) + first_call++; + } else + be[i] = branch->entries[branch->nr - i - 1]; + } + + nr = remove_loops(be, nr); + + for (i = 0; i < nr; i++) { + err = add_callchain_ip(thread, parent, root_al, + true, be[i].to); + if (!err) + err = add_callchain_ip(thread, parent, root_al, + true, be[i].from); + if (err == -EINVAL) + break; + if (err) + return err; + } + chain_nr -= nr; + } + +check_calls: if (chain->nr > PERF_MAX_STACK_DEPTH) { pr_warning("corrupted callchain. skipping...\n"); return 0; } - /* - * Based on DWARF debug information, some architectures skip - * a callchain entry saved by the kernel. - */ - skip_idx = arch_skip_callchain_idx(thread, chain); - - for (i = 0; i < chain_nr; i++) { + for (i = first_call; i < chain_nr; i++) { u64 ip; if (callchain_param.order == ORDER_CALLEE) @@ -1467,36 +1600,10 @@ static int thread__resolve_callchain_sample(struct thread *thread, #endif ip = chain->ips[j]; - if (ip >= PERF_CONTEXT_MAX) { - switch (ip) { - case PERF_CONTEXT_HV: - cpumode = PERF_RECORD_MISC_HYPERVISOR; - break; - case PERF_CONTEXT_KERNEL: - cpumode = PERF_RECORD_MISC_KERNEL; - break; - case PERF_CONTEXT_USER: - cpumode = PERF_RECORD_MISC_USER; - break; - default: - pr_debug("invalid callchain context: " - "%"PRId64"\n", (s64) ip); - /* - * It seems the callchain is corrupted. - * Discard all. - */ - callchain_cursor_reset(&callchain_cursor); - return 0; - } - continue; - } + err = add_callchain_ip(thread, parent, root_al, false, ip); - err = add_callchain_ip(thread, parent, root_al, - cpumode, ip); - if (err == -EINVAL) - break; if (err) - return err; + return (err < 0) ? err : 0; } return 0; @@ -1517,6 +1624,7 @@ int thread__resolve_callchain(struct thread *thread, int max_stack) { int ret = thread__resolve_callchain_sample(thread, sample->callchain, + sample->branch_stack, parent, root_al, max_stack); if (ret) return ret; diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 040a785c857b..62ca9f2607d5 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -360,7 +360,7 @@ int map__fprintf_srcline(struct map *map, u64 addr, const char *prefix, if (map && map->dso) { srcline = get_srcline(map->dso, - map__rip_2objdump(map, addr)); + map__rip_2objdump(map, addr), NULL, true); if (srcline != SRCLINE_UNKNOWN) ret = fprintf(fp, "%s%s", prefix, srcline); free_srcline(srcline); diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index 6951a9d42339..0e42438b1e59 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -116,6 +116,22 @@ struct thread; #define map__for_each_symbol(map, pos, n) \ dso__for_each_symbol(map->dso, pos, n, map->type) +/* map__for_each_symbol_with_name - iterate over the symbols in the given map + * that have the given name + * + * @map: the 'struct map *' in which symbols itereated + * @sym_name: the symbol name + * @pos: the 'struct symbol *' to use as a loop cursor + * @filter: to use when loading the DSO + */ +#define __map__for_each_symbol_by_name(map, sym_name, pos, filter) \ + for (pos = map__find_symbol_by_name(map, sym_name, filter); \ + pos && strcmp(pos->name, sym_name) == 0; \ + pos = symbol__next_by_name(pos)) + +#define map__for_each_symbol_by_name(map, sym_name, pos) \ + __map__for_each_symbol_by_name(map, sym_name, (pos), NULL) + typedef int (*symbol_filter_t)(struct map *map, struct symbol *sym); void map__init(struct map *map, enum map_type type, diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index efa1ff4cca63..7f8ec6ce2823 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -682,6 +682,8 @@ int parse_events_add_pmu(struct list_head *list, int *idx, if (evsel) { evsel->unit = info.unit; evsel->scale = info.scale; + evsel->per_pkg = info.per_pkg; + evsel->snapshot = info.snapshot; } return evsel ? 0 : -ENOMEM; @@ -1120,7 +1122,7 @@ void print_tracepoint_events(const char *subsys_glob, const char *event_glob, return; for_each_subsystem(sys_dir, sys_dirent, sys_next) { - if (subsys_glob != NULL && + if (subsys_glob != NULL && !strglobmatch(sys_dirent.d_name, subsys_glob)) continue; @@ -1131,7 +1133,7 @@ void print_tracepoint_events(const char *subsys_glob, const char *event_glob, continue; for_each_event(sys_dirent, evt_dir, evt_dirent, evt_next) { - if (event_glob != NULL && + if (event_glob != NULL && !strglobmatch(evt_dirent.d_name, event_glob)) continue; @@ -1304,7 +1306,7 @@ static void print_symbol_events(const char *event_glob, unsigned type, for (i = 0; i < max; i++, syms++) { - if (event_glob != NULL && + if (event_glob != NULL && !(strglobmatch(syms->symbol, event_glob) || (syms->alias && strglobmatch(syms->alias, event_glob)))) continue; diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index a19fbeb80943..ff6e1fa4111e 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -71,6 +71,7 @@ struct parse_events_term { int type_val; int type_term; struct list_head list; + bool used; }; struct parse_events_evlist { diff --git a/tools/perf/util/parse-options.c b/tools/perf/util/parse-options.c index f62dee7bd924..4a015f77e2b5 100644 --- a/tools/perf/util/parse-options.c +++ b/tools/perf/util/parse-options.c @@ -46,7 +46,7 @@ static int get_value(struct parse_opt_ctx_t *p, return opterror(opt, "is not usable", flags); if (opt->flags & PARSE_OPT_EXCLUSIVE) { - if (p->excl_opt) { + if (p->excl_opt && p->excl_opt != opt) { char msg[128]; if (((flags & OPT_SHORT) && p->excl_opt->short_name) || diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 881b75490533..48411674da0f 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -163,6 +163,41 @@ error: return -1; } +static int +perf_pmu__parse_per_pkg(struct perf_pmu_alias *alias, char *dir, char *name) +{ + char path[PATH_MAX]; + int fd; + + snprintf(path, PATH_MAX, "%s/%s.per-pkg", dir, name); + + fd = open(path, O_RDONLY); + if (fd == -1) + return -1; + + close(fd); + + alias->per_pkg = true; + return 0; +} + +static int perf_pmu__parse_snapshot(struct perf_pmu_alias *alias, + char *dir, char *name) +{ + char path[PATH_MAX]; + int fd; + + snprintf(path, PATH_MAX, "%s/%s.snapshot", dir, name); + + fd = open(path, O_RDONLY); + if (fd == -1) + return -1; + + alias->snapshot = true; + close(fd); + return 0; +} + static int perf_pmu__new_alias(struct list_head *list, char *dir, char *name, FILE *file) { struct perf_pmu_alias *alias; @@ -181,6 +216,7 @@ static int perf_pmu__new_alias(struct list_head *list, char *dir, char *name, FI INIT_LIST_HEAD(&alias->terms); alias->scale = 1.0; alias->unit[0] = '\0'; + alias->per_pkg = false; ret = parse_events_terms(&alias->terms, buf); if (ret) { @@ -194,6 +230,8 @@ static int perf_pmu__new_alias(struct list_head *list, char *dir, char *name, FI */ perf_pmu__parse_unit(alias, dir, name); perf_pmu__parse_scale(alias, dir, name); + perf_pmu__parse_per_pkg(alias, dir, name); + perf_pmu__parse_snapshot(alias, dir, name); list_add_tail(&alias->list, list); @@ -209,6 +247,10 @@ static inline bool pmu_alias_info_file(char *name) return true; if (len > 6 && !strcmp(name + len - 6, ".scale")) return true; + if (len > 8 && !strcmp(name + len - 8, ".per-pkg")) + return true; + if (len > 9 && !strcmp(name + len - 9, ".snapshot")) + return true; return false; } @@ -509,31 +551,68 @@ static void pmu_format_value(unsigned long *format, __u64 value, __u64 *v, } /* + * Term is a string term, and might be a param-term. Try to look up it's value + * in the remaining terms. + * - We have a term like "base-or-format-term=param-term", + * - We need to find the value supplied for "param-term" (with param-term named + * in a config string) later on in the term list. + */ +static int pmu_resolve_param_term(struct parse_events_term *term, + struct list_head *head_terms, + __u64 *value) +{ + struct parse_events_term *t; + + list_for_each_entry(t, head_terms, list) { + if (t->type_val == PARSE_EVENTS__TERM_TYPE_NUM) { + if (!strcmp(t->config, term->config)) { + t->used = true; + *value = t->val.num; + return 0; + } + } + } + + if (verbose) + printf("Required parameter '%s' not specified\n", term->config); + + return -1; +} + +/* * Setup one of config[12] attr members based on the * user input data - term parameter. */ static int pmu_config_term(struct list_head *formats, struct perf_event_attr *attr, struct parse_events_term *term, + struct list_head *head_terms, bool zero) { struct perf_pmu_format *format; __u64 *vp; + __u64 val; + + /* + * If this is a parameter we've already used for parameterized-eval, + * skip it in normal eval. + */ + if (term->used) + return 0; /* - * Support only for hardcoded and numnerial terms. * Hardcoded terms should be already in, so nothing * to be done for them. */ if (parse_events__is_hardcoded_term(term)) return 0; - if (term->type_val != PARSE_EVENTS__TERM_TYPE_NUM) - return -EINVAL; - format = pmu_find_format(formats, term->config); - if (!format) + if (!format) { + if (verbose) + printf("Invalid event/parameter '%s'\n", term->config); return -EINVAL; + } switch (format->value) { case PERF_PMU_FORMAT_VALUE_CONFIG: @@ -550,11 +629,25 @@ static int pmu_config_term(struct list_head *formats, } /* - * XXX If we ever decide to go with string values for - * non-hardcoded terms, here's the place to translate - * them into value. + * Either directly use a numeric term, or try to translate string terms + * using event parameters. */ - pmu_format_value(format->bits, term->val.num, vp, zero); + if (term->type_val == PARSE_EVENTS__TERM_TYPE_NUM) + val = term->val.num; + else if (term->type_val == PARSE_EVENTS__TERM_TYPE_STR) { + if (strcmp(term->val.str, "?")) { + if (verbose) + pr_info("Invalid sysfs entry %s=%s\n", + term->config, term->val.str); + return -EINVAL; + } + + if (pmu_resolve_param_term(term, head_terms, &val)) + return -EINVAL; + } else + return -EINVAL; + + pmu_format_value(format->bits, val, vp, zero); return 0; } @@ -565,9 +658,10 @@ int perf_pmu__config_terms(struct list_head *formats, { struct parse_events_term *term; - list_for_each_entry(term, head_terms, list) - if (pmu_config_term(formats, attr, term, zero)) + list_for_each_entry(term, head_terms, list) { + if (pmu_config_term(formats, attr, term, head_terms, zero)) return -EINVAL; + } return 0; } @@ -617,23 +711,27 @@ static struct perf_pmu_alias *pmu_find_alias(struct perf_pmu *pmu, } -static int check_unit_scale(struct perf_pmu_alias *alias, - const char **unit, double *scale) +static int check_info_data(struct perf_pmu_alias *alias, + struct perf_pmu_info *info) { /* * Only one term in event definition can - * define unit and scale, fail if there's - * more than one. + * define unit, scale and snapshot, fail + * if there's more than one. */ - if ((*unit && alias->unit) || - (*scale && alias->scale)) + if ((info->unit && alias->unit) || + (info->scale && alias->scale) || + (info->snapshot && alias->snapshot)) return -EINVAL; if (alias->unit) - *unit = alias->unit; + info->unit = alias->unit; if (alias->scale) - *scale = alias->scale; + info->scale = alias->scale; + + if (alias->snapshot) + info->snapshot = alias->snapshot; return 0; } @@ -649,12 +747,15 @@ int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms, struct perf_pmu_alias *alias; int ret; + info->per_pkg = false; + /* * Mark unit and scale as not set * (different from default values, see below) */ - info->unit = NULL; - info->scale = 0.0; + info->unit = NULL; + info->scale = 0.0; + info->snapshot = false; list_for_each_entry_safe(term, h, head_terms, list) { alias = pmu_find_alias(pmu, term); @@ -664,10 +765,13 @@ int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms, if (ret) return ret; - ret = check_unit_scale(alias, &info->unit, &info->scale); + ret = check_info_data(alias, info); if (ret) return ret; + if (alias->per_pkg) + info->per_pkg = true; + list_del(&term->list); free(term); } @@ -715,10 +819,36 @@ void perf_pmu__set_format(unsigned long *bits, long from, long to) set_bit(b, bits); } +static int sub_non_neg(int a, int b) +{ + if (b > a) + return 0; + return a - b; +} + static char *format_alias(char *buf, int len, struct perf_pmu *pmu, struct perf_pmu_alias *alias) { - snprintf(buf, len, "%s/%s/", pmu->name, alias->name); + struct parse_events_term *term; + int used = snprintf(buf, len, "%s/%s", pmu->name, alias->name); + + list_for_each_entry(term, &alias->terms, list) { + if (term->type_val == PARSE_EVENTS__TERM_TYPE_STR) + used += snprintf(buf + used, sub_non_neg(len, used), + ",%s=%s", term->config, + term->val.str); + } + + if (sub_non_neg(len, used) > 0) { + buf[used] = '/'; + used++; + } + if (sub_non_neg(len, used) > 0) { + buf[used] = '\0'; + used++; + } else + buf[len - 1] = '\0'; + return buf; } diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 8092de78e818..6b1249fbdb5f 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -29,6 +29,8 @@ struct perf_pmu { struct perf_pmu_info { const char *unit; double scale; + bool per_pkg; + bool snapshot; }; #define UNIT_MAX_LEN 31 /* max length for event unit name */ @@ -39,6 +41,8 @@ struct perf_pmu_alias { struct list_head list; /* ELEM */ char unit[UNIT_MAX_LEN+1]; double scale; + bool per_pkg; + bool snapshot; }; struct perf_pmu *perf_pmu__find(const char *name); diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 28eb1417cb2a..919937eb0be2 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -446,7 +446,7 @@ static int post_process_probe_trace_events(struct probe_trace_event *tevs, } for (i = 0; i < ntevs; i++) { - if (tevs[i].point.address) { + if (tevs[i].point.address && !tevs[i].point.retprobe) { tmp = strdup(reloc_sym->name); if (!tmp) return -ENOMEM; @@ -495,9 +495,11 @@ static int try_to_find_probe_trace_events(struct perf_probe_event *pev, } if (ntevs == 0) { /* No error but failed to find probe point. */ - pr_warning("Probe point '%s' not found.\n", + pr_warning("Probe point '%s' not found in debuginfo.\n", synthesize_perf_probe_point(&pev->point)); - return -ENOENT; + if (need_dwarf) + return -ENOENT; + return 0; } /* Error path : ntevs < 0 */ pr_debug("An error occurred in debuginfo analysis (%d).\n", ntevs); @@ -2050,9 +2052,11 @@ static int write_probe_trace_event(int fd, struct probe_trace_event *tev) pr_debug("Writing event: %s\n", buf); if (!probe_event_dry_run) { ret = write(fd, buf, strlen(buf)); - if (ret <= 0) + if (ret <= 0) { + ret = -errno; pr_warning("Failed to write event: %s\n", strerror_r(errno, sbuf, sizeof(sbuf))); + } } free(buf); return ret; @@ -2189,18 +2193,17 @@ static int __add_probe_trace_events(struct perf_probe_event *pev, return ret; } -static char *looking_function_name; -static int num_matched_functions; - -static int probe_function_filter(struct map *map __maybe_unused, - struct symbol *sym) +static int find_probe_functions(struct map *map, char *name) { - if ((sym->binding == STB_GLOBAL || sym->binding == STB_LOCAL) && - strcmp(looking_function_name, sym->name) == 0) { - num_matched_functions++; - return 0; + int found = 0; + struct symbol *sym; + + map__for_each_symbol_by_name(map, name, sym) { + if (sym->binding == STB_GLOBAL || sym->binding == STB_LOCAL) + found++; } - return 1; + + return found; } #define strdup_or_goto(str, label) \ @@ -2218,10 +2221,10 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev, struct kmap *kmap = NULL; struct ref_reloc_sym *reloc_sym = NULL; struct symbol *sym; - struct rb_node *nd; struct probe_trace_event *tev; struct perf_probe_point *pp = &pev->point; struct probe_trace_point *tp; + int num_matched_functions; int ret, i; /* Init maps of given executable or kernel */ @@ -2238,10 +2241,8 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev, * Load matched symbols: Since the different local symbols may have * same name but different addresses, this lists all the symbols. */ - num_matched_functions = 0; - looking_function_name = pp->function; - ret = map__load(map, probe_function_filter); - if (ret || num_matched_functions == 0) { + num_matched_functions = find_probe_functions(map, pp->function); + if (num_matched_functions == 0) { pr_err("Failed to find symbol %s in %s\n", pp->function, target ? : "kernel"); ret = -ENOENT; @@ -2253,7 +2254,7 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev, goto out; } - if (!pev->uprobes) { + if (!pev->uprobes && !pp->retprobe) { kmap = map__kmap(map); reloc_sym = kmap->ref_reloc_sym; if (!reloc_sym) { @@ -2271,7 +2272,8 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev, } ret = 0; - map__for_each_symbol(map, sym, nd) { + + map__for_each_symbol_by_name(map, pp->function, sym) { tev = (*tevs) + ret; tp = &tev->point; if (ret == num_matched_functions) { diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index c7918f83b300..b5247d777f0e 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -989,8 +989,24 @@ static int debuginfo__find_probes(struct debuginfo *dbg, int ret = 0; #if _ELFUTILS_PREREQ(0, 142) + Elf *elf; + GElf_Ehdr ehdr; + GElf_Shdr shdr; + /* Get the call frame information from this dwarf */ - pf->cfi = dwarf_getcfi_elf(dwarf_getelf(dbg->dbg)); + elf = dwarf_getelf(dbg->dbg); + if (elf == NULL) + return -EINVAL; + + if (gelf_getehdr(elf, &ehdr) == NULL) + return -EINVAL; + + if (elf_section_by_name(elf, &ehdr, &shdr, ".eh_frame", NULL) && + shdr.sh_type == SHT_PROGBITS) { + pf->cfi = dwarf_getcfi_elf(elf); + } else { + pf->cfi = dwarf_getcfi(dbg->dbg); + } #endif off = 0; diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources index 16a475a7d492..6c6a6953fa93 100644 --- a/tools/perf/util/python-ext-sources +++ b/tools/perf/util/python-ext-sources @@ -10,7 +10,7 @@ util/ctype.c util/evlist.c util/evsel.c util/cpumap.c -util/hweight.c +../../lib/hweight.c util/thread_map.c util/util.c util/xyarray.c diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 3dda85ca50c1..d906d0ad5d40 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -768,7 +768,7 @@ static PyObject *pyrf_evlist__get_pollfd(struct pyrf_evlist *pevlist, Py_DECREF(file); goto free_list; } - + Py_DECREF(file); } diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c index cf69325b985f..8acd0df88b5c 100644 --- a/tools/perf/util/record.c +++ b/tools/perf/util/record.c @@ -137,16 +137,7 @@ void perf_evlist__config(struct perf_evlist *evlist, struct record_opts *opts) static int get_max_rate(unsigned int *rate) { - char path[PATH_MAX]; - const char *procfs = procfs__mountpoint(); - - if (!procfs) - return -1; - - snprintf(path, PATH_MAX, - "%s/sys/kernel/perf_event_max_sample_rate", procfs); - - return filename__read_int(path, (int *) rate); + return sysctl__read_int("kernel/perf_event_max_sample_rate", (int *)rate); } static int record_opts__config_freq(struct record_opts *opts) diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index d808a328f4dc..0c815a40a6e8 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -89,7 +89,7 @@ static void handler_call_die(const char *handler_name) /* * Insert val into into the dictionary and decrement the reference counter. - * This is necessary for dictionaries since PyDict_SetItemString() does not + * This is necessary for dictionaries since PyDict_SetItemString() does not * steal a reference, as opposed to PyTuple_SetItem(). */ static void pydict_set_item_string_decref(PyObject *dict, const char *key, PyObject *val) diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 6ac62ae6b8fa..b0ce3d6e6231 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -274,7 +274,7 @@ void perf_tool__fill_defaults(struct perf_tool *tool) if (tool->id_index == NULL) tool->id_index = process_id_index_stub; } - + static void swap_sample_id_all(union perf_event *event, void *data) { void *end = (void *) event + event->header.size; @@ -533,15 +533,11 @@ int perf_session_queue_event(struct perf_session *s, union perf_event *event, return -ETIME; if (timestamp < oe->last_flush) { - WARN_ONCE(1, "Timestamp below last timeslice flush\n"); - - pr_oe_time(timestamp, "out of order event"); + pr_oe_time(timestamp, "out of order event\n"); pr_oe_time(oe->last_flush, "last flush, last_flush_type %d\n", oe->last_flush_type); - /* We could get out of order messages after forced flush. */ - if (oe->last_flush_type != OE_FLUSH__HALF) - return -EINVAL; + s->stats.nr_unordered_events++; } new = ordered_events__new(oe, timestamp, event); @@ -1118,6 +1114,9 @@ static void perf_session__warn_about_errors(const struct perf_session *session, "Do you have a KVM guest running and not using 'perf kvm'?\n", session->stats.nr_unprocessable_samples); } + + if (session->stats.nr_unordered_events != 0) + ui__warning("%u out of order events recorded.\n", session->stats.nr_unordered_events); } volatile int session_done; diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 82a5596241a7..7a39c1ed8d37 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -291,7 +291,8 @@ sort__srcline_cmp(struct hist_entry *left, struct hist_entry *right) else { struct map *map = left->ms.map; left->srcline = get_srcline(map->dso, - map__rip_2objdump(map, left->ip)); + map__rip_2objdump(map, left->ip), + left->ms.sym, true); } } if (!right->srcline) { @@ -300,7 +301,8 @@ sort__srcline_cmp(struct hist_entry *left, struct hist_entry *right) else { struct map *map = right->ms.map; right->srcline = get_srcline(map->dso, - map__rip_2objdump(map, right->ip)); + map__rip_2objdump(map, right->ip), + right->ms.sym, true); } } return strcmp(right->srcline, left->srcline); @@ -1302,6 +1304,37 @@ static int __sort__hpp_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, return hse->se->se_snprintf(he, hpp->buf, hpp->size, len); } +static int64_t __sort__hpp_cmp(struct perf_hpp_fmt *fmt, + struct hist_entry *a, struct hist_entry *b) +{ + struct hpp_sort_entry *hse; + + hse = container_of(fmt, struct hpp_sort_entry, hpp); + return hse->se->se_cmp(a, b); +} + +static int64_t __sort__hpp_collapse(struct perf_hpp_fmt *fmt, + struct hist_entry *a, struct hist_entry *b) +{ + struct hpp_sort_entry *hse; + int64_t (*collapse_fn)(struct hist_entry *, struct hist_entry *); + + hse = container_of(fmt, struct hpp_sort_entry, hpp); + collapse_fn = hse->se->se_collapse ?: hse->se->se_cmp; + return collapse_fn(a, b); +} + +static int64_t __sort__hpp_sort(struct perf_hpp_fmt *fmt, + struct hist_entry *a, struct hist_entry *b) +{ + struct hpp_sort_entry *hse; + int64_t (*sort_fn)(struct hist_entry *, struct hist_entry *); + + hse = container_of(fmt, struct hpp_sort_entry, hpp); + sort_fn = hse->se->se_sort ?: hse->se->se_cmp; + return sort_fn(a, b); +} + static struct hpp_sort_entry * __sort_dimension__alloc_hpp(struct sort_dimension *sd) { @@ -1320,9 +1353,9 @@ __sort_dimension__alloc_hpp(struct sort_dimension *sd) hse->hpp.entry = __sort__hpp_entry; hse->hpp.color = NULL; - hse->hpp.cmp = sd->entry->se_cmp; - hse->hpp.collapse = sd->entry->se_collapse ? : sd->entry->se_cmp; - hse->hpp.sort = sd->entry->se_sort ? : hse->hpp.collapse; + hse->hpp.cmp = __sort__hpp_cmp; + hse->hpp.collapse = __sort__hpp_collapse; + hse->hpp.sort = __sort__hpp_sort; INIT_LIST_HEAD(&hse->hpp.list); INIT_LIST_HEAD(&hse->hpp.sort_list); diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c index 77c180637138..c93fb0c5bd0b 100644 --- a/tools/perf/util/srcline.c +++ b/tools/perf/util/srcline.c @@ -8,6 +8,8 @@ #include "util/util.h" #include "util/debug.h" +#include "symbol.h" + #ifdef HAVE_LIBBFD_SUPPORT /* @@ -18,7 +20,7 @@ struct a2l_data { const char *input; - unsigned long addr; + u64 addr; bool found; const char *filename; @@ -145,7 +147,7 @@ static void addr2line_cleanup(struct a2l_data *a2l) free(a2l); } -static int addr2line(const char *dso_name, unsigned long addr, +static int addr2line(const char *dso_name, u64 addr, char **file, unsigned int *line, struct dso *dso) { int ret = 0; @@ -191,7 +193,7 @@ void dso__free_a2l(struct dso *dso) #else /* HAVE_LIBBFD_SUPPORT */ -static int addr2line(const char *dso_name, unsigned long addr, +static int addr2line(const char *dso_name, u64 addr, char **file, unsigned int *line_nr, struct dso *dso __maybe_unused) { @@ -250,7 +252,8 @@ void dso__free_a2l(struct dso *dso __maybe_unused) */ #define A2L_FAIL_LIMIT 123 -char *get_srcline(struct dso *dso, unsigned long addr) +char *get_srcline(struct dso *dso, u64 addr, struct symbol *sym, + bool show_sym) { char *file = NULL; unsigned line = 0; @@ -258,7 +261,7 @@ char *get_srcline(struct dso *dso, unsigned long addr) const char *dso_name; if (!dso->has_srcline) - return SRCLINE_UNKNOWN; + goto out; if (dso->symsrc_filename) dso_name = dso->symsrc_filename; @@ -289,7 +292,13 @@ out: dso->has_srcline = 0; dso__free_a2l(dso); } - return SRCLINE_UNKNOWN; + if (sym) { + if (asprintf(&srcline, "%s+%" PRIu64, show_sym ? sym->name : "", + addr - sym->start) < 0) + return SRCLINE_UNKNOWN; + } else if (asprintf(&srcline, "%s[%" PRIx64 "]", dso->short_name, addr) < 0) + return SRCLINE_UNKNOWN; + return srcline; } void free_srcline(char *srcline) diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index efc7eb6b8f0f..06fcd1bf98b6 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -11,6 +11,27 @@ #include <symbol/kallsyms.h> #include "debug.h" +#ifdef HAVE_CPLUS_DEMANGLE_SUPPORT +extern char *cplus_demangle(const char *, int); + +static inline char *bfd_demangle(void __maybe_unused *v, const char *c, int i) +{ + return cplus_demangle(c, i); +} +#else +#ifdef NO_DEMANGLE +static inline char *bfd_demangle(void __maybe_unused *v, + const char __maybe_unused *c, + int __maybe_unused i) +{ + return NULL; +} +#else +#define PACKAGE 'perf' +#include <bfd.h> +#endif +#endif + #ifndef HAVE_ELF_GETPHDRNUM_SUPPORT static int elf_getphdrnum(Elf *elf, size_t *dst) { diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c index fa585c63f56a..d7efb03b3f9a 100644 --- a/tools/perf/util/symbol-minimal.c +++ b/tools/perf/util/symbol-minimal.c @@ -129,6 +129,7 @@ int filename__read_build_id(const char *filename, void *bf, size_t size) for (i = 0, phdr = buf; i < ehdr.e_phnum; i++, phdr++) { void *tmp; + long offset; if (need_swap) { phdr->p_type = bswap_32(phdr->p_type); @@ -140,12 +141,13 @@ int filename__read_build_id(const char *filename, void *bf, size_t size) continue; buf_size = phdr->p_filesz; + offset = phdr->p_offset; tmp = realloc(buf, buf_size); if (tmp == NULL) goto out_free; buf = tmp; - fseek(fp, phdr->p_offset, SEEK_SET); + fseek(fp, offset, SEEK_SET); if (fread(buf, buf_size, 1, fp) != 1) goto out_free; @@ -178,6 +180,7 @@ int filename__read_build_id(const char *filename, void *bf, size_t size) for (i = 0, phdr = buf; i < ehdr.e_phnum; i++, phdr++) { void *tmp; + long offset; if (need_swap) { phdr->p_type = bswap_32(phdr->p_type); @@ -189,12 +192,13 @@ int filename__read_build_id(const char *filename, void *bf, size_t size) continue; buf_size = phdr->p_filesz; + offset = phdr->p_offset; tmp = realloc(buf, buf_size); if (tmp == NULL) goto out_free; buf = tmp; - fseek(fp, phdr->p_offset, SEEK_SET); + fseek(fp, offset, SEEK_SET); if (fread(buf, buf_size, 1, fp) != 1) goto out_free; diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index c24c5b83156c..a69066865a55 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -396,6 +396,7 @@ static struct symbol *symbols__find_by_name(struct rb_root *symbols, const char *name) { struct rb_node *n; + struct symbol_name_rb_node *s; if (symbols == NULL) return NULL; @@ -403,7 +404,6 @@ static struct symbol *symbols__find_by_name(struct rb_root *symbols, n = symbols->rb_node; while (n) { - struct symbol_name_rb_node *s; int cmp; s = rb_entry(n, struct symbol_name_rb_node, rb_node); @@ -414,10 +414,24 @@ static struct symbol *symbols__find_by_name(struct rb_root *symbols, else if (cmp > 0) n = n->rb_right; else - return &s->sym; + break; } - return NULL; + if (n == NULL) + return NULL; + + /* return first symbol that has same name (if any) */ + for (n = rb_prev(n); n; n = rb_prev(n)) { + struct symbol_name_rb_node *tmp; + + tmp = rb_entry(n, struct symbol_name_rb_node, rb_node); + if (strcmp(tmp->sym.name, s->sym.name)) + break; + + s = tmp; + } + + return &s->sym; } struct symbol *dso__find_symbol(struct dso *dso, @@ -436,6 +450,17 @@ struct symbol *dso__next_symbol(struct symbol *sym) return symbols__next(sym); } +struct symbol *symbol__next_by_name(struct symbol *sym) +{ + struct symbol_name_rb_node *s = container_of(sym, struct symbol_name_rb_node, sym); + struct rb_node *n = rb_next(&s->rb_node); + + return n ? &rb_entry(n, struct symbol_name_rb_node, rb_node)->sym : NULL; +} + + /* + * Teturns first symbol that matched with @name. + */ struct symbol *dso__find_symbol_by_name(struct dso *dso, enum map_type type, const char *name) { @@ -660,7 +685,7 @@ static int dso__split_kallsyms(struct dso *dso, struct map *map, u64 delta, struct machine *machine = kmaps->machine; struct map *curr_map = map; struct symbol *pos; - int count = 0, moved = 0; + int count = 0, moved = 0; struct rb_root *root = &dso->symbols[map->type]; struct rb_node *next = rb_first(root); int kernel_range = 0; diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index ded3ca7266de..1650dcb3a67b 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -23,27 +23,6 @@ #include "dso.h" -#ifdef HAVE_CPLUS_DEMANGLE_SUPPORT -extern char *cplus_demangle(const char *, int); - -static inline char *bfd_demangle(void __maybe_unused *v, const char *c, int i) -{ - return cplus_demangle(c, i); -} -#else -#ifdef NO_DEMANGLE -static inline char *bfd_demangle(void __maybe_unused *v, - const char __maybe_unused *c, - int __maybe_unused i) -{ - return NULL; -} -#else -#define PACKAGE 'perf' -#include <bfd.h> -#endif -#endif - /* * libelf 0.8.x and earlier do not support ELF_C_READ_MMAP; * for newer versions we can use mmap to reduce memory usage: @@ -123,7 +102,8 @@ struct symbol_conf { demangle, demangle_kernel, filter_relative, - show_hist_headers; + show_hist_headers, + branch_callstack; const char *vmlinux_name, *kallsyms_name, *source_prefix, @@ -251,6 +231,7 @@ struct symbol *dso__find_symbol(struct dso *dso, enum map_type type, u64 addr); struct symbol *dso__find_symbol_by_name(struct dso *dso, enum map_type type, const char *name); +struct symbol *symbol__next_by_name(struct symbol *sym); struct symbol *dso__first_symbol(struct dso *dso, enum map_type type); struct symbol *dso__next_symbol(struct symbol *sym); diff --git a/tools/perf/util/unwind-libunwind.c b/tools/perf/util/unwind-libunwind.c index 371219a6daf1..6edf535f65c2 100644 --- a/tools/perf/util/unwind-libunwind.c +++ b/tools/perf/util/unwind-libunwind.c @@ -185,6 +185,28 @@ static u64 elf_section_offset(int fd, const char *name) return offset; } +#ifndef NO_LIBUNWIND_DEBUG_FRAME +static int elf_is_exec(int fd, const char *name) +{ + Elf *elf; + GElf_Ehdr ehdr; + int retval = 0; + + elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL); + if (elf == NULL) + return 0; + if (gelf_getehdr(elf, &ehdr) == NULL) + goto out; + + retval = (ehdr.e_type == ET_EXEC); + +out: + elf_end(elf); + pr_debug("unwind: elf_is_exec(%s): %d\n", name, retval); + return retval; +} +#endif + struct table_entry { u32 start_ip_offset; u32 fde_offset; @@ -322,8 +344,12 @@ find_proc_info(unw_addr_space_t as, unw_word_t ip, unw_proc_info_t *pi, #ifndef NO_LIBUNWIND_DEBUG_FRAME /* Check the .debug_frame section for unwinding info */ if (!read_unwind_spec_debug_frame(map->dso, ui->machine, &segbase)) { + int fd = dso__data_fd(map->dso, ui->machine); + int is_exec = elf_is_exec(fd, map->dso->name); + unw_word_t base = is_exec ? 0 : map->start; + memset(&di, 0, sizeof(di)); - if (dwarf_find_debug_frame(0, &di, ip, 0, map->dso->name, + if (dwarf_find_debug_frame(0, &di, ip, base, map->dso->name, map->start, map->end)) return dwarf_search_unwind_table(as, ip, &di, pi, need_unwind_info, arg); diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index d5eab3f3323f..b86744f29eef 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -442,23 +442,6 @@ unsigned long parse_tag_value(const char *str, struct parse_tag *tags) return (unsigned long) -1; } -int filename__read_int(const char *filename, int *value) -{ - char line[64]; - int fd = open(filename, O_RDONLY), err = -1; - - if (fd < 0) - return -1; - - if (read(fd, line, sizeof(line)) > 0) { - *value = atoi(line); - err = 0; - } - - close(fd); - return err; -} - int filename__read_str(const char *filename, char **buf, size_t *sizep) { size_t size = 0, alloc_size = 0; @@ -523,16 +506,9 @@ const char *get_filename_for_perf_kvm(void) int perf_event_paranoid(void) { - char path[PATH_MAX]; - const char *procfs = procfs__mountpoint(); int value; - if (!procfs) - return INT_MAX; - - scnprintf(path, PATH_MAX, "%s/sys/kernel/perf_event_paranoid", procfs); - - if (filename__read_int(path, &value)) + if (sysctl__read_int("kernel/perf_event_paranoid", &value)) return INT_MAX; return value; diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 76d23d83eae5..027a5153495c 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -153,7 +153,7 @@ extern void warning(const char *err, ...) __attribute__((format (printf, 1, 2))) extern void set_die_routine(void (*routine)(const char *err, va_list params) NORETURN); extern int prefixcmp(const char *str, const char *prefix); -extern void set_buildid_dir(void); +extern void set_buildid_dir(const char *dir); static inline const char *skip_prefix(const char *str, const char *prefix) { @@ -269,35 +269,6 @@ void event_attr_init(struct perf_event_attr *attr); #define _STR(x) #x #define STR(x) _STR(x) -/* - * Determine whether some value is a power of two, where zero is - * *not* considered a power of two. - */ - -static inline __attribute__((const)) -bool is_power_of_2(unsigned long n) -{ - return (n != 0 && ((n & (n - 1)) == 0)); -} - -static inline unsigned next_pow2(unsigned x) -{ - if (!x) - return 1; - return 1ULL << (32 - __builtin_clz(x - 1)); -} - -static inline unsigned long next_pow2_l(unsigned long x) -{ -#if BITS_PER_LONG == 64 - if (x <= (1UL << 31)) - return next_pow2(x); - return (unsigned long)next_pow2(x >> 32) << 32; -#else - return next_pow2(x); -#endif -} - size_t hex_width(u64 v); int hex2u64(const char *ptr, u64 *val); @@ -337,11 +308,12 @@ static inline int path__join3(char *bf, size_t size, } struct dso; +struct symbol; -char *get_srcline(struct dso *dso, unsigned long addr); +char *get_srcline(struct dso *dso, u64 addr, struct symbol *sym, + bool show_sym); void free_srcline(char *srcline); -int filename__read_int(const char *filename, int *value); int filename__read_str(const char *filename, char **buf, size_t *sizep); int perf_event_paranoid(void); |