From cfc5acd4c80b875d2f739d6a93562034aee5563f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 23 Sep 2015 15:15:54 -0300 Subject: perf top: Filter symbols based on __map__is_kernel(map) Instead of using dso->kernel, this is equivalent at the moment, and helps in reducing the accesses to dso->kernel. Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-1pc2v63iphtifovw3bv0bo1v@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-top.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index bdaf44f24d5d..38d4d6cac823 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -655,7 +655,7 @@ static int symbol_filter(struct map *map, struct symbol *sym) { const char *name = sym->name; - if (!map->dso->kernel) + if (!__map__is_kernel(map)) return 0; /* * ppc64 uses function descriptors and appends a '.' to the -- cgit v1.2.3 From 045b80dd03403b8e61a29460987c231317ebfbd4 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 23 Sep 2015 15:38:55 -0300 Subject: perf hists browser: Use the map to determine if a DSO is being used as a kernel The map is what should say if an ELF (or some other format) image is being used for some particular purpose, as a kernel, host or guest. Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-zufousvfar0710p4qj71c32d@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/hists.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index e1f28f4cdc8e..a4e9b370c037 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -1527,7 +1527,7 @@ add_thread_opt(struct hist_browser *browser, struct popup_action *act, static int do_zoom_dso(struct hist_browser *browser, struct popup_action *act) { - struct dso *dso = act->dso; + struct map *map = act->ms.map; if (browser->hists->dso_filter) { pstack__remove(browser->pstack, &browser->hists->dso_filter); @@ -1535,11 +1535,11 @@ do_zoom_dso(struct hist_browser *browser, struct popup_action *act) browser->hists->dso_filter = NULL; ui_helpline__pop(); } else { - if (dso == NULL) + if (map == NULL) return 0; ui_helpline__fpush("To zoom out press <- or -> + \"Zoom out of %s DSO\"", - dso->kernel ? "the Kernel" : dso->short_name); - browser->hists->dso_filter = dso; + __map__is_kernel(map) ? "the Kernel" : map->dso->short_name); + browser->hists->dso_filter = map->dso; perf_hpp__set_elide(HISTC_DSO, true); pstack__push(browser->pstack, &browser->hists->dso_filter); } @@ -1551,17 +1551,18 @@ do_zoom_dso(struct hist_browser *browser, struct popup_action *act) static int add_dso_opt(struct hist_browser *browser, struct popup_action *act, - char **optstr, struct dso *dso) + char **optstr, struct map *map) { - if (dso == NULL) + if (map == NULL) return 0; if (asprintf(optstr, "Zoom %s %s DSO", browser->hists->dso_filter ? "out of" : "into", - dso->kernel ? "the Kernel" : dso->short_name) < 0) + __map__is_kernel(map) ? "the Kernel" : map->dso->short_name) < 0) return 0; - act->dso = dso; + act->ms.map = map; + act->dso = map->dso; act->fn = do_zoom_dso; return 1; } @@ -1814,6 +1815,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, while (1) { struct thread *thread = NULL; struct dso *dso = NULL; + struct map *map = NULL; int choice = 0; int socked_id = -1; @@ -1823,7 +1825,9 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, if (browser->he_selection != NULL) { thread = hist_browser__selected_thread(browser); - dso = browser->selection->map ? browser->selection->map->dso : NULL; + map = browser->selection->map; + if (map) + dso = map->dso; socked_id = browser->he_selection->socket; } switch (key) { @@ -2014,7 +2018,7 @@ skip_annotation: nr_options += add_thread_opt(browser, &actions[nr_options], &options[nr_options], thread); nr_options += add_dso_opt(browser, &actions[nr_options], - &options[nr_options], dso); + &options[nr_options], map); nr_options += add_map_opt(browser, &actions[nr_options], &options[nr_options], browser->selection ? -- cgit v1.2.3 From ab9c2bdc8947482057b81258c0128952763661cb Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 23 Sep 2015 15:45:20 -0300 Subject: perf tools: Use __map__is_kernel() when synthesizing kernel module mmap records Equivalent and removes one more case of using dso->kernel. # perf record -a usleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.768 MB perf.data (30 samples) ] Before: [root@zoo ~]# perf script --show-task --show-mmap | head -3 swapper 0 [0] 0.0: PERF_RECORD_MMAP -1/0: [0xffffffff81000000(0x1f000000) @ 0xffffffff81000000]: x [kernel.kallsyms]_text swapper 0 [0] 0.0: PERF_RECORD_MMAP -1/0: [0xffffffffa0000000(0xa000) @ 0]: x /lib/modules/4.3.0-rc1+/kernel/drivers/acpi/video.ko swapper 0 [0] 0.0: PERF_RECORD_MMAP -1/0: [0xffffffffa000a000(0x5000) @ 0]: x /lib/modules/4.3.0-rc1+/kernel/drivers/i2c/algos/i2c-algo-bit.ko # # perf script --show-task --show-mmap | head -3 swapper 0 [0] 0.0: PERF_RECORD_MMAP -1/0: [0xffffffff81000000(0x1f000000) @ 0xffffffff81000000]: x [kernel.kallsyms]_text swapper 0 [0] 0.0: PERF_RECORD_MMAP -1/0: [0xffffffffa0000000(0xa000) @ 0]: x /lib/modules/4.3.0-rc1+/kernel/drivers/acpi/video.ko swapper 0 [0] 0.0: PERF_RECORD_MMAP -1/0: [0xffffffffa000a000(0x5000) @ 0]: x /lib/modules/4.3.0-rc1+/kernel/drivers/i2c/algos/i2c-algo-bit.ko # Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-b65xe578dwq22mzmmj5y94wr@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 6214ad47d554..b1bb348ec3b6 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -378,7 +378,7 @@ int perf_event__synthesize_modules(struct perf_tool *tool, for (pos = maps__first(maps); pos; pos = map__next(pos)) { size_t size; - if (pos->dso->kernel) + if (__map__is_kernel(pos)) continue; size = PERF_ALIGN(pos->dso->long_name_len + 1, sizeof(u64)); -- cgit v1.2.3 From dc240c5dc2a02e335c5bcb50ad3a1274818c8609 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sat, 19 Sep 2015 16:47:07 +0200 Subject: tools lib api fs: Store tracing mountpoint for better error message Storing the actual tracing path mountpoint to display correct error message hint ('Hint:' line). The error hint rediscovers mountpoints, but it could be different from what we actually used in tracing path. Before we'd display debugfs mount even though tracefs was used: $ perf record -e sched:sched_krava ls event syntax error: 'sched:sched_krava' \___ can't access trace events Error: No permissions to read /sys/kernel/debug/tracing/events/sched/sched_krava Hint: Try 'sudo mount -o remount,mode=755 /sys/kernel/debug' ... After this change, correct mountpoint is displayed: $ perf record -e sched:sched_krava ls event syntax error: 'sched:sched_krava' \___ can't access trace events Error: No permissions to read /sys/kernel/debug/tracing/events/sched/sched_krava Hint: Try 'sudo mount -o remount,mode=755 /sys/kernel/debug/tracing' ... Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Matt Fleming Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Raphael Beamonte Link: http://lkml.kernel.org/r/1442674027-19427-1-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/api/fs/tracing_path.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/tools/lib/api/fs/tracing_path.c b/tools/lib/api/fs/tracing_path.c index 38aca2dd1946..0406a7d5c891 100644 --- a/tools/lib/api/fs/tracing_path.c +++ b/tools/lib/api/fs/tracing_path.c @@ -12,12 +12,14 @@ #include "tracing_path.h" +char tracing_mnt[PATH_MAX + 1] = "/sys/kernel/debug"; char tracing_path[PATH_MAX + 1] = "/sys/kernel/debug/tracing"; char tracing_events_path[PATH_MAX + 1] = "/sys/kernel/debug/tracing/events"; static void __tracing_path_set(const char *tracing, const char *mountpoint) { + snprintf(tracing_mnt, sizeof(tracing_mnt), "%s", mountpoint); snprintf(tracing_path, sizeof(tracing_path), "%s/%s", mountpoint, tracing); snprintf(tracing_events_path, sizeof(tracing_events_path), "%s/%s%s", @@ -109,19 +111,10 @@ static int strerror_open(int err, char *buf, size_t size, const char *filename) "Hint:\tTry 'sudo mount -t debugfs nodev /sys/kernel/debug'"); break; case EACCES: { - const char *mountpoint = debugfs__mountpoint(); - - if (!access(mountpoint, R_OK) && strncmp(filename, "tracing/", 8) == 0) { - const char *tracefs_mntpoint = tracefs__mountpoint(); - - if (tracefs_mntpoint) - mountpoint = tracefs__mountpoint(); - } - snprintf(buf, size, "Error:\tNo permissions to read %s/%s\n" "Hint:\tTry 'sudo mount -o remount,mode=755 %s'\n", - tracing_events_path, filename, mountpoint); + tracing_events_path, filename, tracing_mnt); } break; default: -- cgit v1.2.3 From ab6201d09b1840c7ffcd6606c1d3dae68b8b3048 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 23 Sep 2015 12:33:56 +0200 Subject: tools build: Add Makefile.include To ease up build framework code setup for users. More shared code will be added in the following patches. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1443004442-32660-2-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/Documentation/Build.txt | 45 ++++++++++++++++++++++++++----------- tools/build/Makefile.include | 1 + tools/build/tests/ex/Makefile | 3 ++- tools/lib/api/Makefile | 2 +- tools/lib/bpf/Makefile | 2 +- tools/lib/lockdep/Makefile | 2 +- tools/perf/Makefile.perf | 2 +- 7 files changed, 39 insertions(+), 18 deletions(-) create mode 100644 tools/build/Makefile.include diff --git a/tools/build/Documentation/Build.txt b/tools/build/Documentation/Build.txt index aa5e092c4352..88824359d595 100644 --- a/tools/build/Documentation/Build.txt +++ b/tools/build/Documentation/Build.txt @@ -11,8 +11,9 @@ Unlike the kernel we don't have a single build object 'obj-y' list that where we setup source objects, but we support more. This allows one 'Build' file to carry a sources list for multiple build objects. -a) Build framework makefiles ----------------------------- + +Build framework makefiles +------------------------- The build framework consists of 2 Makefiles: @@ -23,7 +24,7 @@ While the 'Build.include' file contains just some generic definitions, the 'Makefile.build' file is the makefile used from the outside. It's interface/usage is following: - $ make -f tools/build/Makefile srctree=$(KSRC) dir=$(DIR) obj=$(OBJECT) + $ make -f tools/build/Makefile.build srctree=$(KSRC) dir=$(DIR) obj=$(OBJECT) where: @@ -38,8 +39,9 @@ called $(OBJECT)-in.o: which includes all compiled sources described in 'Build' makefiles. -a) Build makefiles ------------------- + +Build makefiles +--------------- The user supplies 'Build' makefiles that contains a objects list, and connects the build to nested directories. @@ -95,8 +97,24 @@ It's only a matter of 2 single commands to create the final binaries: You can check the 'ex' example in 'tools/build/tests/ex' for more details. -b) Rules --------- + +Makefile.include +---------------- + +The tools/build/Makefile.include makefile could be included +via user makefiles to get usefull definitions. + +It defines following interface: + + - build macro definition: + build := -f $(srctree)/tools/build/Makefile.build dir=. obj + + to make it easier to invoke build like: + make $(build)=ex + + +Rules +----- The build framework provides standard compilation rules to handle .S and .c compilation. @@ -104,8 +122,9 @@ compilation. It's possible to include special rule if needed (like we do for flex or bison code generation). -c) CFLAGS ---------- + +CFLAGS +------ It's possible to alter the standard object C flags in the following way: @@ -115,8 +134,8 @@ It's possible to alter the standard object C flags in the following way: This C flags changes has the scope of the Build makefile they are defined in. -d) Dependencies ---------------- +Dependencies +------------ For each built object file 'a.o' the '.a.cmd' is created and holds: @@ -130,8 +149,8 @@ All existing '.cmd' files are included in the Build process to follow properly the dependencies and trigger a rebuild when necessary. -e) Single rules ---------------- +Single rules +------------ It's possible to build single object file by choice, like: diff --git a/tools/build/Makefile.include b/tools/build/Makefile.include new file mode 100644 index 000000000000..91bc60616de5 --- /dev/null +++ b/tools/build/Makefile.include @@ -0,0 +1 @@ +build := -f $(srctree)/tools/build/Makefile.build dir=. obj diff --git a/tools/build/tests/ex/Makefile b/tools/build/tests/ex/Makefile index 52d2476073a3..a8f596e37fd2 100644 --- a/tools/build/tests/ex/Makefile +++ b/tools/build/tests/ex/Makefile @@ -3,7 +3,8 @@ export CC := gcc export LD := ld export AR := ar -build := -f $(srctree)/tools/build/Makefile.build dir=. obj +include $(srctree)/tools/build/Makefile.include + ex: ex-in.o libex-in.o gcc -o $@ $^ diff --git a/tools/lib/api/Makefile b/tools/lib/api/Makefile index fe1b02c2c95b..8806ea7c2f99 100644 --- a/tools/lib/api/Makefile +++ b/tools/lib/api/Makefile @@ -21,10 +21,10 @@ CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 RM = rm -f -build := -f $(srctree)/tools/build/Makefile.build dir=. obj API_IN := $(OUTPUT)libapi-in.o export srctree OUTPUT CC LD CFLAGS V +include $(srctree)/tools/build/Makefile.include all: $(LIBFILE) diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index e630f9fc4fb6..c66ade68d4a1 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -124,7 +124,7 @@ endif MAKEOVERRIDES= export srctree OUTPUT CC LD CFLAGS V -build := -f $(srctree)/tools/build/Makefile.build dir=. obj +include $(srctree)/tools/build/Makefile.include BPF_IN := $(OUTPUT)libbpf-in.o LIB_FILE := $(addprefix $(OUTPUT),$(LIB_FILE)) diff --git a/tools/lib/lockdep/Makefile b/tools/lib/lockdep/Makefile index 18ffccf00426..d12081da383b 100644 --- a/tools/lib/lockdep/Makefile +++ b/tools/lib/lockdep/Makefile @@ -94,7 +94,7 @@ else endif export srctree OUTPUT CC LD CFLAGS V -build := -f $(srctree)/tools/build/Makefile.build dir=. obj +include $(srctree)/tools/build/Makefile.include do_compile_shared_library = \ ($(print_shared_lib_compile) \ diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 6c5c699002cb..6dec86665acc 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -297,7 +297,7 @@ strip: $(PROGRAMS) $(OUTPUT)perf PERF_IN := $(OUTPUT)perf-in.o export srctree OUTPUT RM CC LD AR CFLAGS V BISON FLEX AWK -build := -f $(srctree)/tools/build/Makefile.build dir=. obj +include $(srctree)/tools/build/Makefile.include $(PERF_IN): $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h FORCE $(Q)$(MAKE) $(build)=perf -- cgit v1.2.3 From 0c00c3fb4e4a6ff714b7ad864f58e0fb33b3534c Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 23 Sep 2015 12:33:57 +0200 Subject: tools build: Add test for missing include The current build framework fails to cope with header file removal. The reason is that the removed header file stays in the .cmd file target rule and forces the build to fail. This issue is fixed and explained in the following patches. Adding a new build test that simulates header removal. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1443004442-32660-3-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/tests/ex/Build | 1 + tools/build/tests/ex/Makefile | 2 +- tools/build/tests/ex/ex.c | 2 ++ tools/build/tests/ex/inc.c | 8 ++++++++ tools/build/tests/run.sh | 27 +++++++++++++++++++++++++++ 5 files changed, 39 insertions(+), 1 deletion(-) create mode 100644 tools/build/tests/ex/inc.c diff --git a/tools/build/tests/ex/Build b/tools/build/tests/ex/Build index 429c7d452101..4d502f9b1a50 100644 --- a/tools/build/tests/ex/Build +++ b/tools/build/tests/ex/Build @@ -4,6 +4,7 @@ ex-y += b.o ex-y += b.o ex-y += empty/ ex-y += empty2/ +ex-y += inc.o libex-y += c.o libex-y += d.o diff --git a/tools/build/tests/ex/Makefile b/tools/build/tests/ex/Makefile index a8f596e37fd2..f279b84cb859 100644 --- a/tools/build/tests/ex/Makefile +++ b/tools/build/tests/ex/Makefile @@ -1,4 +1,4 @@ -export srctree := ../../../.. +export srctree := $(abspath ../../../..) export CC := gcc export LD := ld export AR := ar diff --git a/tools/build/tests/ex/ex.c b/tools/build/tests/ex/ex.c index dc42eb2e1a67..57de6074d252 100644 --- a/tools/build/tests/ex/ex.c +++ b/tools/build/tests/ex/ex.c @@ -5,6 +5,7 @@ int c(void); int d(void); int e(void); int f(void); +int inc(void); int main(void) { @@ -14,6 +15,7 @@ int main(void) d(); e(); f(); + inc(); return 0; } diff --git a/tools/build/tests/ex/inc.c b/tools/build/tests/ex/inc.c new file mode 100644 index 000000000000..c20f1e9033a3 --- /dev/null +++ b/tools/build/tests/ex/inc.c @@ -0,0 +1,8 @@ +#ifdef INCLUDE +#include "krava.h" +#endif + +int inc(void) +{ + return 0; +} diff --git a/tools/build/tests/run.sh b/tools/build/tests/run.sh index 5494f8ea7567..44d2a0fade67 100755 --- a/tools/build/tests/run.sh +++ b/tools/build/tests/run.sh @@ -34,9 +34,36 @@ function test_ex_suffix { make -C ex V=1 clean > /dev/null 2>&1 rm -f ex.out } + +function test_ex_include { + make -C ex V=1 clean > ex.out 2>&1 + + # build with krava.h include + touch ex/krava.h + make -C ex V=1 CFLAGS=-DINCLUDE >> ex.out 2>&1 + + if [ ! -x ./ex/ex ]; then + echo FAILED + exit -1 + fi + + # build without the include + rm -f ex/krava.h ex/ex + make -C ex V=1 >> ex.out 2>&1 + + if [ ! -x ./ex/ex ]; then + echo FAILED + exit -1 + fi + + make -C ex V=1 clean > /dev/null 2>&1 + rm -f ex.out +} + echo -n Testing.. test_ex test_ex_suffix +test_ex_include echo OK -- cgit v1.2.3 From 9f7ef9854e800bc3bab3d9a527e8f8f960eec1a6 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 23 Sep 2015 12:33:58 +0200 Subject: tools build: Add fixdep dependency helper For dependency tracking we currently use targets that fall out of the gcc -MD command. We store this info in the .cmd file and include as makefile during the build. This format put object as target and all the c and header files as dependencies, like: util/abspath.o: util/abspath.c /usr/include/stdc-predef.h util/cache.h \ /usr/include/bits/wordsize.h /usr/include/gnu/stubs.h \ ... If any of those dependency header files (krava.h below) is removed the build fails on: make[1]: *** No rule to make target 'krava.h', needed by 'inc.o'. Stop. This patch adds fixdep helper, that is used by kbuild to alter the shape of the object dependencies like: source_util/abspath.o := util/abspath.c deps_util/abspath.o := \ /usr/include/stdc-predef.h \ util/cache.h \ ... util/abspath.o: $(deps_util/abspath.o) $(deps_util/abspath.o): With this format the header removal won't make the build fail, because it'll be picked up by the last empty target defined for each header. As previously mentioned the fixdep tool is taken from kbuild. It's not complete backport, only the part that alters the standard dependency info was taken, the part that adds the CONFIG_* dependency logic will be probably taken later on. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Kai Germaschewski Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1443004442-32660-4-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/Build | 1 + tools/build/Makefile | 43 +++++++++++++ tools/build/fixdep.c | 168 +++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 212 insertions(+) create mode 100644 tools/build/Build create mode 100644 tools/build/Makefile create mode 100644 tools/build/fixdep.c diff --git a/tools/build/Build b/tools/build/Build new file mode 100644 index 000000000000..63a6c34c0c88 --- /dev/null +++ b/tools/build/Build @@ -0,0 +1 @@ +fixdep-y := fixdep.o diff --git a/tools/build/Makefile b/tools/build/Makefile new file mode 100644 index 000000000000..a93036272d43 --- /dev/null +++ b/tools/build/Makefile @@ -0,0 +1,43 @@ +ifeq ($(srctree),) +srctree := $(patsubst %/,%,$(dir $(shell pwd))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +endif + +include $(srctree)/tools//scripts/Makefile.include + +define allow-override + $(if $(or $(findstring environment,$(origin $(1))),\ + $(findstring command line,$(origin $(1)))),,\ + $(eval $(1) = $(2))) +endef + +$(call allow-override,CC,$(CROSS_COMPILE)gcc) +$(call allow-override,LD,$(CROSS_COMPILE)ld) + +ifeq ($(V),1) + Q = +else + Q = @ +endif + +export Q srctree CC LD + +MAKEFLAGS := --no-print-directory +build := -f $(srctree)/tools/build/Makefile.build dir=. obj + +all: fixdep + +clean: + $(call QUIET_CLEAN, fixdep) + $(Q)find . -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete + $(Q)rm -f fixdep + +$(OUTPUT)fixdep-in.o: FORCE + $(Q)$(MAKE) $(build)=fixdep + +$(OUTPUT)fixdep: $(OUTPUT)fixdep-in.o + $(QUIET_LINK)$(CC) $(LDFLAGS) -o $@ $< + +FORCE: + +.PHONY: FORCE diff --git a/tools/build/fixdep.c b/tools/build/fixdep.c new file mode 100644 index 000000000000..1521d36cef0d --- /dev/null +++ b/tools/build/fixdep.c @@ -0,0 +1,168 @@ +/* + * "Optimize" a list of dependencies as spit out by gcc -MD + * for the build framework. + * + * Original author: + * Copyright 2002 by Kai Germaschewski + * + * This code has been borrowed from kbuild's fixdep (scripts/basic/fixdep.c), + * Please check it for detailed explanation. This fixdep borow only the + * base transformation of dependecies without the CONFIG mangle. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +char *target; +char *depfile; +char *cmdline; + +static void usage(void) +{ + fprintf(stderr, "Usage: fixdep \n"); + exit(1); +} + +/* + * Print out the commandline prefixed with cmd_ := + */ +static void print_cmdline(void) +{ + printf("cmd_%s := %s\n\n", target, cmdline); +} + +/* + * Important: The below generated source_foo.o and deps_foo.o variable + * assignments are parsed not only by make, but also by the rather simple + * parser in scripts/mod/sumversion.c. + */ +static void parse_dep_file(void *map, size_t len) +{ + char *m = map; + char *end = m + len; + char *p; + char s[PATH_MAX]; + int is_target; + int saw_any_target = 0; + int is_first_dep = 0; + + while (m < end) { + /* Skip any "white space" */ + while (m < end && (*m == ' ' || *m == '\\' || *m == '\n')) + m++; + /* Find next "white space" */ + p = m; + while (p < end && *p != ' ' && *p != '\\' && *p != '\n') + p++; + /* Is the token we found a target name? */ + is_target = (*(p-1) == ':'); + /* Don't write any target names into the dependency file */ + if (is_target) { + /* The /next/ file is the first dependency */ + is_first_dep = 1; + } else { + /* Save this token/filename */ + memcpy(s, m, p-m); + s[p - m] = 0; + + /* + * Do not list the source file as dependency, + * so that kbuild is not confused if a .c file + * is rewritten into .S or vice versa. Storing + * it in source_* is needed for modpost to + * compute srcversions. + */ + if (is_first_dep) { + /* + * If processing the concatenation of + * multiple dependency files, only + * process the first target name, which + * will be the original source name, + * and ignore any other target names, + * which will be intermediate temporary + * files. + */ + if (!saw_any_target) { + saw_any_target = 1; + printf("source_%s := %s\n\n", + target, s); + printf("deps_%s := \\\n", + target); + } + is_first_dep = 0; + } else + printf(" %s \\\n", s); + } + /* + * Start searching for next token immediately after the first + * "whitespace" character that follows this token. + */ + m = p + 1; + } + + if (!saw_any_target) { + fprintf(stderr, "fixdep: parse error; no targets found\n"); + exit(1); + } + + printf("\n%s: $(deps_%s)\n\n", target, target); + printf("$(deps_%s):\n", target); +} + +static void print_deps(void) +{ + struct stat st; + int fd; + void *map; + + fd = open(depfile, O_RDONLY); + if (fd < 0) { + fprintf(stderr, "fixdep: error opening depfile: "); + perror(depfile); + exit(2); + } + if (fstat(fd, &st) < 0) { + fprintf(stderr, "fixdep: error fstat'ing depfile: "); + perror(depfile); + exit(2); + } + if (st.st_size == 0) { + fprintf(stderr, "fixdep: %s is empty\n", depfile); + close(fd); + return; + } + map = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0); + if ((long) map == -1) { + perror("fixdep: mmap"); + close(fd); + return; + } + + parse_dep_file(map, st.st_size); + + munmap(map, st.st_size); + + close(fd); +} + +int main(int argc, char **argv) +{ + if (argc != 4) + usage(); + + depfile = argv[1]; + target = argv[2]; + cmdline = argv[3]; + + print_cmdline(); + print_deps(); + + return 0; +} -- cgit v1.2.3 From 275e2d95591e2714d6b541d4e26959381d6b2705 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 23 Sep 2015 12:33:59 +0200 Subject: tools build: Move dependency copy into function So it's easier to add more functionality in the following commit. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1443004442-32660-5-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/Build.include | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/tools/build/Build.include b/tools/build/Build.include index 4c8daaccb82a..851c420098e7 100644 --- a/tools/build/Build.include +++ b/tools/build/Build.include @@ -54,15 +54,20 @@ make-cmd = $(call escsq,$(subst \#,\\\#,$(subst $$,$$$$,$(cmd_$(1))))) # PHONY targets skipped in both cases. any-prereq = $(filter-out $(PHONY),$?) $(filter-out $(PHONY) $(wildcard $^),$^) +### +# Copy dependency data into .cmd file +# - gcc -M dependency info +# - command line to create object 'cmd_object :=' +dep-cmd = cat $(depfile) > $(dot-target).cmd; \ + printf '%s\n' 'cmd_$@ := $(make-cmd)' >> $(dot-target).cmd + ### # if_changed_dep - execute command if any prerequisite is newer than # target, or command line has changed and update # dependencies in the cmd file if_changed_dep = $(if $(strip $(any-prereq) $(arg-check)), \ @set -e; \ - $(echo-cmd) $(cmd_$(1)); \ - cat $(depfile) > $(dot-target).cmd; \ - printf '%s\n' 'cmd_$@ := $(make-cmd)' >> $(dot-target).cmd) + $(echo-cmd) $(cmd_$(1)) && $(dep-cmd)) # if_changed - execute command if any prerequisite is newer than # target, or command line has changed -- cgit v1.2.3 From 9fb81323eb3085b6a47fe81d78541958ae7eaea3 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 23 Sep 2015 12:34:00 +0200 Subject: tools build: Make the fixdep helper part of the build process Making the fixdep helper to be invoked within dep-cmd. Each user of the build framework needs to make sure fixdep exists before executing the build itself. If the build doesn't find fixdep, it falls back to the old style dependency tracking. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1443004442-32660-6-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/Build.include | 10 ++++++++-- tools/build/Makefile.build | 7 +++++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/tools/build/Build.include b/tools/build/Build.include index 851c420098e7..4d000bc959b4 100644 --- a/tools/build/Build.include +++ b/tools/build/Build.include @@ -58,8 +58,14 @@ any-prereq = $(filter-out $(PHONY),$?) $(filter-out $(PHONY) $(wildcard $^),$^) # Copy dependency data into .cmd file # - gcc -M dependency info # - command line to create object 'cmd_object :=' -dep-cmd = cat $(depfile) > $(dot-target).cmd; \ - printf '%s\n' 'cmd_$@ := $(make-cmd)' >> $(dot-target).cmd +dep-cmd = $(if $(wildcard $(fixdep)), \ + $(fixdep) $(depfile) $@ '$(make-cmd)' > $(dot-target).tmp; \ + rm -f $(depfile); \ + mv -f $(dot-target).tmp $(dot-target).cmd, \ + printf '\# cannot find fixdep (%s)\n' $(fixdep) > $(dot-target).cmd; \ + printf '\# using basic dep data\n\n' >> $(dot-target).cmd; \ + cat $(depfile) >> $(dot-target).cmd; \ + printf '%s\n' 'cmd_$@ := $(make-cmd)' >> $(dot-target).cmd) ### # if_changed_dep - execute command if any prerequisite is newer than diff --git a/tools/build/Makefile.build b/tools/build/Makefile.build index 0c5f485521d6..4a96473b180f 100644 --- a/tools/build/Makefile.build +++ b/tools/build/Makefile.build @@ -21,6 +21,13 @@ endif build-dir := $(srctree)/tools/build +# Define $(fixdep) for dep-cmd function +ifeq ($(OUTPUT),) + fixdep := $(build-dir)/fixdep +else + fixdep := $(OUTPUT)/fixdep +endif + # Generic definitions include $(build-dir)/Build.include -- cgit v1.2.3 From 324c824ade1cad094a21e6177b9aa7977146feeb Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 23 Sep 2015 12:34:01 +0200 Subject: perf tools: Rename the 'single_dep' target to 'prepare' And use the new 'prepare' target for the $(PERF_IN) target. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1443004442-32660-7-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 6dec86665acc..25c1753ffbeb 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -299,7 +299,7 @@ PERF_IN := $(OUTPUT)perf-in.o export srctree OUTPUT RM CC LD AR CFLAGS V BISON FLEX AWK include $(srctree)/tools/build/Makefile.include -$(PERF_IN): $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h FORCE +$(PERF_IN): prepare FORCE $(Q)$(MAKE) $(build)=perf $(OUTPUT)perf: $(PERFLIBS) $(PERF_IN) $(LIBTRACEEVENT_DYNAMIC_LIST) @@ -349,27 +349,27 @@ endif __build-dir = $(subst $(OUTPUT),,$(dir $@)) build-dir = $(if $(__build-dir),$(__build-dir),.) -single_dep: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h +prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h -$(OUTPUT)%.o: %.c single_dep FORCE +$(OUTPUT)%.o: %.c prepare FORCE $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ -$(OUTPUT)%.i: %.c single_dep FORCE +$(OUTPUT)%.i: %.c prepare FORCE $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ -$(OUTPUT)%.s: %.c single_dep FORCE +$(OUTPUT)%.s: %.c prepare FORCE $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ -$(OUTPUT)%-bison.o: %.c single_dep FORCE +$(OUTPUT)%-bison.o: %.c prepare FORCE $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ -$(OUTPUT)%-flex.o: %.c single_dep FORCE +$(OUTPUT)%-flex.o: %.c prepare FORCE $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ -$(OUTPUT)%.o: %.S single_dep FORCE +$(OUTPUT)%.o: %.S prepare FORCE $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ -$(OUTPUT)%.i: %.S single_dep FORCE +$(OUTPUT)%.i: %.S prepare FORCE $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ $(OUTPUT)perf-%: %.o $(PERFLIBS) @@ -591,6 +591,6 @@ FORCE: .PHONY: all install clean config-clean strip install-gtk .PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell -.PHONY: $(GIT-HEAD-PHONY) TAGS tags cscope FORCE single_dep +.PHONY: $(GIT-HEAD-PHONY) TAGS tags cscope FORCE prepare .PHONY: libtraceevent_plugins -- cgit v1.2.3 From 7c422f5572667fef0db38d2046ecce69dcf0afc8 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 23 Sep 2015 12:34:02 +0200 Subject: tools build: Build fixdep helper from perf and basic libs Adding the fixdep target into the Makefile.include to ease up building of fixdep helper, that needs to be built before we dive in to the build itself. The user can invoke the fixdep target to build the helper. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1443004442-32660-8-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/Documentation/Build.txt | 7 +++++++ tools/build/Makefile.include | 5 +++++ tools/build/tests/ex/Makefile | 8 +++++--- tools/lib/api/Makefile | 4 +++- tools/lib/bpf/Makefile | 4 +++- tools/lib/lockdep/Makefile | 4 +++- tools/perf/Makefile.perf | 12 ++++++------ 7 files changed, 32 insertions(+), 12 deletions(-) diff --git a/tools/build/Documentation/Build.txt b/tools/build/Documentation/Build.txt index 88824359d595..a47bffbae159 100644 --- a/tools/build/Documentation/Build.txt +++ b/tools/build/Documentation/Build.txt @@ -113,6 +113,13 @@ It defines following interface: make $(build)=ex +Fixdep +------ +It is necessary to build the fixdep helper before invoking the build. +The Makefile.include file adds the fixdep target, that could be +invoked by the user. + + Rules ----- diff --git a/tools/build/Makefile.include b/tools/build/Makefile.include index 91bc60616de5..6572bb023543 100644 --- a/tools/build/Makefile.include +++ b/tools/build/Makefile.include @@ -1 +1,6 @@ build := -f $(srctree)/tools/build/Makefile.build dir=. obj + +fixdep: + $(Q)$(MAKE) -C $(srctree)/tools/build fixdep + +.PHONY: fixdep diff --git a/tools/build/tests/ex/Makefile b/tools/build/tests/ex/Makefile index f279b84cb859..c50d5782ad5a 100644 --- a/tools/build/tests/ex/Makefile +++ b/tools/build/tests/ex/Makefile @@ -3,18 +3,20 @@ export CC := gcc export LD := ld export AR := ar +ex: + include $(srctree)/tools/build/Makefile.include ex: ex-in.o libex-in.o gcc -o $@ $^ -ex.%: FORCE +ex.%: fixdep FORCE make -f $(srctree)/tools/build/Makefile.build dir=. $@ -ex-in.o: FORCE +ex-in.o: fixdep FORCE make $(build)=ex -libex-in.o: FORCE +libex-in.o: fixdep FORCE make $(build)=libex clean: diff --git a/tools/lib/api/Makefile b/tools/lib/api/Makefile index 8806ea7c2f99..d85904dc9b38 100644 --- a/tools/lib/api/Makefile +++ b/tools/lib/api/Makefile @@ -23,10 +23,12 @@ RM = rm -f API_IN := $(OUTPUT)libapi-in.o +all: + export srctree OUTPUT CC LD CFLAGS V include $(srctree)/tools/build/Makefile.include -all: $(LIBFILE) +all: fixdep $(LIBFILE) $(API_IN): FORCE @$(MAKE) $(build)=libapi diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index c66ade68d4a1..fc9af57b666e 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -123,6 +123,8 @@ endif # the same command line setup. MAKEOVERRIDES= +all: + export srctree OUTPUT CC LD CFLAGS V include $(srctree)/tools/build/Makefile.include @@ -133,7 +135,7 @@ CMD_TARGETS = $(LIB_FILE) TARGETS = $(CMD_TARGETS) -all: $(VERSION_FILES) all_cmd +all: fixdep $(VERSION_FILES) all_cmd all_cmd: $(CMD_TARGETS) diff --git a/tools/lib/lockdep/Makefile b/tools/lib/lockdep/Makefile index d12081da383b..7e319afac78a 100644 --- a/tools/lib/lockdep/Makefile +++ b/tools/lib/lockdep/Makefile @@ -93,6 +93,8 @@ else print_install = echo ' INSTALL '$1' to $(DESTDIR_SQ)$2'; endif +all: + export srctree OUTPUT CC LD CFLAGS V include $(srctree)/tools/build/Makefile.include @@ -109,7 +111,7 @@ CMD_TARGETS = $(LIB_FILE) TARGETS = $(CMD_TARGETS) -all: all_cmd +all: fixdep all_cmd all_cmd: $(CMD_TARGETS) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 25c1753ffbeb..56517d304772 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -306,7 +306,7 @@ $(OUTPUT)perf: $(PERFLIBS) $(PERF_IN) $(LIBTRACEEVENT_DYNAMIC_LIST) $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $(LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS) \ $(PERF_IN) $(LIBS) -o $@ -$(GTK_IN): FORCE +$(GTK_IN): fixdep FORCE $(Q)$(MAKE) $(build)=gtk $(OUTPUT)libperf-gtk.so: $(GTK_IN) $(PERFLIBS) @@ -349,7 +349,7 @@ endif __build-dir = $(subst $(OUTPUT),,$(dir $@)) build-dir = $(if $(__build-dir),$(__build-dir),.) -prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h +prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h fixdep $(OUTPUT)%.o: %.c prepare FORCE $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ @@ -389,7 +389,7 @@ $(patsubst perf-%,%.o,$(PROGRAMS)): $(wildcard */*.h) LIBPERF_IN := $(OUTPUT)libperf-in.o -$(LIBPERF_IN): FORCE +$(LIBPERF_IN): fixdep FORCE $(Q)$(MAKE) $(build)=libperf $(LIB_FILE): $(LIBPERF_IN) @@ -397,10 +397,10 @@ $(LIB_FILE): $(LIBPERF_IN) LIBTRACEEVENT_FLAGS += plugin_dir=$(plugindir_SQ) -$(LIBTRACEEVENT): FORCE +$(LIBTRACEEVENT): fixdep FORCE $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)libtraceevent.a -libtraceevent_plugins: FORCE +libtraceevent_plugins: fixdep FORCE $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) plugins $(LIBTRACEEVENT_DYNAMIC_LIST): libtraceevent_plugins @@ -413,7 +413,7 @@ $(LIBTRACEEVENT)-clean: install-traceevent-plugins: $(LIBTRACEEVENT) $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) install_plugins -$(LIBAPI): FORCE +$(LIBAPI): fixdep FORCE $(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) $(OUTPUT)libapi.a $(LIBAPI)-clean: -- cgit v1.2.3 From e1791347b5d57d13326cf0114df1a3f3b1c4ca24 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 25 Sep 2015 16:15:32 +0300 Subject: perf auxtrace: Fix 'instructions' period of zero Instruction tracing options (i.e. --itrace) include an option for sampling instructions at an arbitrary period. e.g. --itrace=i10us means make an 'instructions' sample for every 10us of trace. Currently the logic does not distinguish between a period of zero and no period being specified at all, so it gets treated as the default period which is 100000. That doesn't really make sense. Fix it so that zero period is accepted and treated as meaning "as often as possible". In the case of Intel PT that is the same as a period of 1 and a unit of 'instructions' (i.e. --itrace=i1i). Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/1443186956-18718-2-git-send-email-adrian.hunter@intel.com [ Add a few lines describing this in the Documentation/intel-pt.txt file ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/intel-pt.txt | 5 +++++ tools/perf/util/auxtrace.c | 4 +++- tools/perf/util/intel-pt.c | 2 +- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/tools/perf/Documentation/intel-pt.txt b/tools/perf/Documentation/intel-pt.txt index c94c9de3173e..886612b50961 100644 --- a/tools/perf/Documentation/intel-pt.txt +++ b/tools/perf/Documentation/intel-pt.txt @@ -707,6 +707,11 @@ on the sample is *not* adjusted and reflects the last known value of TSC. For Intel PT, the default period is 100us. +Setting it to a zero period means "as often as possible". + +In the case of Intel PT that is the same as a period of 1 and a unit of +'instructions' (i.e. --itrace=i1i). + Also the call chain size (default 16, max. 1024) for instructions or transactions events can be specified. e.g. diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index a980e7c50ee0..c4993b2e6c50 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -950,6 +950,7 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str, const char *p; char *endptr; bool period_type_set = false; + bool period_set = false; synth_opts->set = true; @@ -971,6 +972,7 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str, p += 1; if (isdigit(*p)) { synth_opts->period = strtoull(p, &endptr, 10); + period_set = true; p = endptr; while (*p == ' ' || *p == ',') p += 1; @@ -1053,7 +1055,7 @@ out: if (!period_type_set) synth_opts->period_type = PERF_ITRACE_DEFAULT_PERIOD_TYPE; - if (!synth_opts->period) + if (!period_set) synth_opts->period = PERF_ITRACE_DEFAULT_PERIOD; } diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 38942e1eac8f..c8bb5ca6a157 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -720,7 +720,7 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt, if (!params.period) { params.period_type = INTEL_PT_PERIOD_INSTRUCTIONS; - params.period = 1000; + params.period = 1; } } -- cgit v1.2.3 From d062ac16f53d1a24047bcc9eded5514a71c363b8 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 25 Sep 2015 16:15:33 +0300 Subject: perf report: Fix sample type validation for synthesized callchains Processing instruction tracing data (e.g. Intel PT) can synthesize callchains e.g. $ perf record -e intel_pt//u uname $ perf report --stdio --itrace=ige However perf report's callgraph option gets extra validation, so: $ perf report --stdio --itrace=ige -gflat Error: Selected -g or --branch-history but no callchain data. Did you call 'perf record' without -g? # To display the perf.data header info, # please use --header/--header-only options. # Fix the validation to know about instruction tracing options so above command works. A side-effect of the change is that the default option to accumulate the callchain of child functions comes into force. To get the previous behaviour the --no-children option can be used e.g. $ perf report --stdio --itrace=ige -gflat --no-children Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/1443186956-18718-3-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index e4e3f1432622..0d53b485a87b 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -214,6 +214,12 @@ static int report__setup_sample_type(struct report *rep) u64 sample_type = perf_evlist__combined_sample_type(session->evlist); bool is_pipe = perf_data_file__is_pipe(session->file); + if (session->itrace_synth_opts->callchain || + (!is_pipe && + perf_header__has_feat(&session->header, HEADER_AUXTRACE) && + !session->itrace_synth_opts->set)) + sample_type |= PERF_SAMPLE_CALLCHAIN; + if (!is_pipe && !(sample_type & PERF_SAMPLE_CALLCHAIN)) { if (sort__has_parent) { ui__error("Selected --sort parent, but no " -- cgit v1.2.3 From 9992c2d50a73f442653968a98a9e5f3bf4e769e9 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 25 Sep 2015 16:15:34 +0300 Subject: perf intel-pt: Fix potential loop forever TSC packets contain only 7 bytes of TSC. The 8th byte is assumed to change so infrequently that its value can be inferred. However the logic must cater for a 7 byte wraparound, which it does by adding 1 to the top byte. The existing code was doing that with a while loop even though the addition should only need to be done once. That logic won't work (will loop forever) if TSC wraps around at the 8th byte. Theoretically that would take at least 10 years, unless something else went wrong. And what else could go wrong. Well, if the chunks of trace data are processed out of order, it will make it look like the 7-byte TSC has gone backwards (i.e. wrapped). If that happens 256 times then stuck in the while loop it will be. Fix that by getting rid of the unnecessary while loop. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/1443186956-18718-4-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/intel-pt-decoder/intel-pt-decoder.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index 22ba50224319..9409d014b46c 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -650,7 +650,7 @@ static int intel_pt_calc_cyc_cb(struct intel_pt_pkt_info *pkt_info) if (data->from_mtc && timestamp < data->timestamp && data->timestamp - timestamp < decoder->tsc_slip) return 1; - while (timestamp < data->timestamp) + if (timestamp < data->timestamp) timestamp += (1ULL << 56); if (pkt_info->last_packet_type != INTEL_PT_CYC) { if (data->from_mtc) @@ -1191,7 +1191,7 @@ static void intel_pt_calc_tsc_timestamp(struct intel_pt_decoder *decoder) timestamp); timestamp = decoder->timestamp; } - while (timestamp < decoder->timestamp) { + if (timestamp < decoder->timestamp) { intel_pt_log_to("Wraparound timestamp", timestamp); timestamp += (1ULL << 56); decoder->tsc_timestamp = timestamp; -- cgit v1.2.3 From 116f349c5bf8c7aec4047dd6e06c310354b46e4f Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 25 Sep 2015 16:15:35 +0300 Subject: perf intel-pt: Make logging slightly more efficient Logging is only used for debugging. Use macros to save calling into the functions only to return immediately when logging is not enabled. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/1443186956-18718-5-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/intel-pt-decoder/intel-pt-log.c | 21 +++++++------- tools/perf/util/intel-pt-decoder/intel-pt-log.h | 38 +++++++++++++++++++++---- 2 files changed, 43 insertions(+), 16 deletions(-) diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-log.c b/tools/perf/util/intel-pt-decoder/intel-pt-log.c index d09c7d9f9050..319bef33a64b 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-log.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-log.c @@ -29,18 +29,18 @@ static FILE *f; static char log_name[MAX_LOG_NAME]; -static bool enable_logging; +bool intel_pt_enable_logging; void intel_pt_log_enable(void) { - enable_logging = true; + intel_pt_enable_logging = true; } void intel_pt_log_disable(void) { if (f) fflush(f); - enable_logging = false; + intel_pt_enable_logging = false; } void intel_pt_log_set_name(const char *name) @@ -80,7 +80,7 @@ static void intel_pt_print_no_data(uint64_t pos, int indent) static int intel_pt_log_open(void) { - if (!enable_logging) + if (!intel_pt_enable_logging) return -1; if (f) @@ -91,15 +91,15 @@ static int intel_pt_log_open(void) f = fopen(log_name, "w+"); if (!f) { - enable_logging = false; + intel_pt_enable_logging = false; return -1; } return 0; } -void intel_pt_log_packet(const struct intel_pt_pkt *packet, int pkt_len, - uint64_t pos, const unsigned char *buf) +void __intel_pt_log_packet(const struct intel_pt_pkt *packet, int pkt_len, + uint64_t pos, const unsigned char *buf) { char desc[INTEL_PT_PKT_DESC_MAX]; @@ -111,7 +111,7 @@ void intel_pt_log_packet(const struct intel_pt_pkt *packet, int pkt_len, fprintf(f, "%s\n", desc); } -void intel_pt_log_insn(struct intel_pt_insn *intel_pt_insn, uint64_t ip) +void __intel_pt_log_insn(struct intel_pt_insn *intel_pt_insn, uint64_t ip) { char desc[INTEL_PT_INSN_DESC_MAX]; size_t len = intel_pt_insn->length; @@ -128,7 +128,8 @@ void intel_pt_log_insn(struct intel_pt_insn *intel_pt_insn, uint64_t ip) fprintf(f, "Bad instruction!\n"); } -void intel_pt_log_insn_no_data(struct intel_pt_insn *intel_pt_insn, uint64_t ip) +void __intel_pt_log_insn_no_data(struct intel_pt_insn *intel_pt_insn, + uint64_t ip) { char desc[INTEL_PT_INSN_DESC_MAX]; @@ -142,7 +143,7 @@ void intel_pt_log_insn_no_data(struct intel_pt_insn *intel_pt_insn, uint64_t ip) fprintf(f, "Bad instruction!\n"); } -void intel_pt_log(const char *fmt, ...) +void __intel_pt_log(const char *fmt, ...) { va_list args; diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-log.h b/tools/perf/util/intel-pt-decoder/intel-pt-log.h index db3942f83677..debe751dc3d6 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-log.h +++ b/tools/perf/util/intel-pt-decoder/intel-pt-log.h @@ -25,20 +25,46 @@ void intel_pt_log_enable(void); void intel_pt_log_disable(void); void intel_pt_log_set_name(const char *name); -void intel_pt_log_packet(const struct intel_pt_pkt *packet, int pkt_len, - uint64_t pos, const unsigned char *buf); +void __intel_pt_log_packet(const struct intel_pt_pkt *packet, int pkt_len, + uint64_t pos, const unsigned char *buf); struct intel_pt_insn; -void intel_pt_log_insn(struct intel_pt_insn *intel_pt_insn, uint64_t ip); -void intel_pt_log_insn_no_data(struct intel_pt_insn *intel_pt_insn, - uint64_t ip); +void __intel_pt_log_insn(struct intel_pt_insn *intel_pt_insn, uint64_t ip); +void __intel_pt_log_insn_no_data(struct intel_pt_insn *intel_pt_insn, + uint64_t ip); __attribute__((format(printf, 1, 2))) -void intel_pt_log(const char *fmt, ...); +void __intel_pt_log(const char *fmt, ...); + +#define intel_pt_log(fmt, ...) \ + do { \ + if (intel_pt_enable_logging) \ + __intel_pt_log(fmt, ##__VA_ARGS__); \ + } while (0) + +#define intel_pt_log_packet(arg, ...) \ + do { \ + if (intel_pt_enable_logging) \ + __intel_pt_log_packet(arg, ##__VA_ARGS__); \ + } while (0) + +#define intel_pt_log_insn(arg, ...) \ + do { \ + if (intel_pt_enable_logging) \ + __intel_pt_log_insn(arg, ##__VA_ARGS__); \ + } while (0) + +#define intel_pt_log_insn_no_data(arg, ...) \ + do { \ + if (intel_pt_enable_logging) \ + __intel_pt_log_insn_no_data(arg, ##__VA_ARGS__); \ + } while (0) #define x64_fmt "0x%" PRIx64 +extern bool intel_pt_enable_logging; + static inline void intel_pt_log_at(const char *msg, uint64_t u) { intel_pt_log("%s at " x64_fmt "\n", msg, u); -- cgit v1.2.3 From 83e1986032dfcd3f9e9fc0d06e11d9153edae19b Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 25 Sep 2015 16:15:36 +0300 Subject: perf script: Allow time to be displayed in nanoseconds Add option --ns to display time to 9 decimal places. That is useful in some cases, for example when using Intel PT cycle accurate mode. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/1443186956-18718-6-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-script.txt | 3 +++ tools/perf/builtin-script.c | 8 +++++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index dc3ec783b7bd..b3b42f9285df 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -249,6 +249,9 @@ include::itrace.txt[] --full-source-path:: Show the full path for source files for srcline output. +--ns:: + Use 9 decimal places when displaying time (i.e. show the nanoseconds) + SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-script-perl[1], diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 284a76e04628..092843968791 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -29,6 +29,7 @@ static bool no_callchain; static bool latency_format; static bool system_wide; static bool print_flags; +static bool nanosecs; static const char *cpu_list; static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); @@ -415,7 +416,10 @@ static void print_sample_start(struct perf_sample *sample, secs = nsecs / NSECS_PER_SEC; nsecs -= secs * NSECS_PER_SEC; usecs = nsecs / NSECS_PER_USEC; - printf("%5lu.%06lu: ", secs, usecs); + if (nanosecs) + printf("%5lu.%09llu: ", secs, nsecs); + else + printf("%5lu.%06lu: ", secs, usecs); } } @@ -1695,6 +1699,8 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) OPT_BOOLEAN('\0', "show-switch-events", &script.show_switch_events, "Show context switch events (if recorded)"), OPT_BOOLEAN('f', "force", &file.force, "don't complain, do it"), + OPT_BOOLEAN(0, "ns", &nanosecs, + "Use 9 decimal places when displaying time"), OPT_CALLBACK_OPTARG(0, "itrace", &itrace_synth_opts, NULL, "opts", "Instruction Tracing options", itrace_parse_synth_opts), -- cgit v1.2.3 From a38f48e300f9dac30a9b2d2ce958c8dbd7def351 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 25 Sep 2015 16:15:37 +0300 Subject: perf session: Warn when AUX data has been lost By default 'perf record' will postprocess the perf.data file to determine build-ids. When that happens, the number of lost perf events is displayed. Make that also happen for AUX events. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/1443186956-18718-7-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/event.h | 1 + tools/perf/util/session.c | 10 ++++++++++ 2 files changed, 11 insertions(+) diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index be5cbc7be889..a0dbcbd4f6d8 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -257,6 +257,7 @@ struct events_stats { u64 total_non_filtered_period; u64 total_lost; u64 total_lost_samples; + u64 total_aux_lost; u64 total_invalid_chains; u32 nr_events[PERF_RECORD_HEADER_MAX]; u32 nr_non_filtered_samples; diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index f5e000030a5e..15c84cad213a 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1101,6 +1101,9 @@ static int machines__deliver_event(struct machines *machines, case PERF_RECORD_UNTHROTTLE: return tool->unthrottle(tool, event, sample, machine); case PERF_RECORD_AUX: + if (tool->aux == perf_event__process_aux && + (event->aux.flags & PERF_AUX_FLAG_TRUNCATED)) + evlist->stats.total_aux_lost += 1; return tool->aux(tool, event, sample, machine); case PERF_RECORD_ITRACE_START: return tool->itrace_start(tool, event, sample, machine); @@ -1346,6 +1349,13 @@ static void perf_session__warn_about_errors(const struct perf_session *session) } } + if (session->tool->aux == perf_event__process_aux && + stats->total_aux_lost != 0) { + ui__warning("AUX data lost %" PRIu64 " times out of %u!\n\n", + stats->total_aux_lost, + stats->nr_events[PERF_RECORD_AUX]); + } + if (stats->nr_unknown_events != 0) { ui__warning("Found %u unknown events!\n\n" "Is this an older tool processing a perf.data " -- cgit v1.2.3 From 35ca01c117da9b8e5b60204f730cdde414735596 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 25 Sep 2015 16:15:38 +0300 Subject: perf tools: Add more documentation to export-to-postgresql.py script Add some comments to the script and some 'views' to the created database that better illustrate the database structure and how it can be used. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/1443186956-18718-8-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/export-to-postgresql.py | 221 ++++++++++++++++++++++ 1 file changed, 221 insertions(+) diff --git a/tools/perf/scripts/python/export-to-postgresql.py b/tools/perf/scripts/python/export-to-postgresql.py index 84a32037a80f..1b02cdc0cab6 100644 --- a/tools/perf/scripts/python/export-to-postgresql.py +++ b/tools/perf/scripts/python/export-to-postgresql.py @@ -61,6 +61,142 @@ import datetime # # An example of using the database is provided by the script # call-graph-from-postgresql.py. Refer to that script for details. +# +# Tables: +# +# The tables largely correspond to perf tools' data structures. They are largely self-explanatory. +# +# samples +# +# 'samples' is the main table. It represents what instruction was executing at a point in time +# when something (a selected event) happened. The memory address is the instruction pointer or 'ip'. +# +# calls +# +# 'calls' represents function calls and is related to 'samples' by 'call_id' and 'return_id'. +# 'calls' is only created when the 'calls' option to this script is specified. +# +# call_paths +# +# 'call_paths' represents all the call stacks. Each 'call' has an associated record in 'call_paths'. +# 'calls_paths' is only created when the 'calls' option to this script is specified. +# +# branch_types +# +# 'branch_types' provides descriptions for each type of branch. +# +# comm_threads +# +# 'comm_threads' shows how 'comms' relates to 'threads'. +# +# comms +# +# 'comms' contains a record for each 'comm' - the name given to the executable that is running. +# +# dsos +# +# 'dsos' contains a record for each executable file or library. +# +# machines +# +# 'machines' can be used to distinguish virtual machines if virtualization is supported. +# +# selected_events +# +# 'selected_events' contains a record for each kind of event that has been sampled. +# +# symbols +# +# 'symbols' contains a record for each symbol. Only symbols that have samples are present. +# +# threads +# +# 'threads' contains a record for each thread. +# +# Views: +# +# Most of the tables have views for more friendly display. The views are: +# +# calls_view +# call_paths_view +# comm_threads_view +# dsos_view +# machines_view +# samples_view +# symbols_view +# threads_view +# +# More examples of browsing the database with psql: +# Note that some of the examples are not the most optimal SQL query. +# Note that call information is only available if the script's 'calls' option has been used. +# +# Top 10 function calls (not aggregated by symbol): +# +# SELECT * FROM calls_view ORDER BY elapsed_time DESC LIMIT 10; +# +# Top 10 function calls (aggregated by symbol): +# +# SELECT symbol_id,(SELECT name FROM symbols WHERE id = symbol_id) AS symbol, +# SUM(elapsed_time) AS tot_elapsed_time,SUM(branch_count) AS tot_branch_count +# FROM calls_view GROUP BY symbol_id ORDER BY tot_elapsed_time DESC LIMIT 10; +# +# Note that the branch count gives a rough estimation of cpu usage, so functions +# that took a long time but have a relatively low branch count must have spent time +# waiting. +# +# Find symbols by pattern matching on part of the name (e.g. names containing 'alloc'): +# +# SELECT * FROM symbols_view WHERE name LIKE '%alloc%'; +# +# Top 10 function calls for a specific symbol (e.g. whose symbol_id is 187): +# +# SELECT * FROM calls_view WHERE symbol_id = 187 ORDER BY elapsed_time DESC LIMIT 10; +# +# Show function calls made by function in the same context (i.e. same call path) (e.g. one with call_path_id 254): +# +# SELECT * FROM calls_view WHERE parent_call_path_id = 254; +# +# Show branches made during a function call (e.g. where call_id is 29357 and return_id is 29370 and tid is 29670) +# +# SELECT * FROM samples_view WHERE id >= 29357 AND id <= 29370 AND tid = 29670 AND event LIKE 'branches%'; +# +# Show transactions: +# +# SELECT * FROM samples_view WHERE event = 'transactions'; +# +# Note transaction start has 'in_tx' true whereas, transaction end has 'in_tx' false. +# Transaction aborts have branch_type_name 'transaction abort' +# +# Show transaction aborts: +# +# SELECT * FROM samples_view WHERE event = 'transactions' AND branch_type_name = 'transaction abort'; +# +# To print a call stack requires walking the call_paths table. For example this python script: +# #!/usr/bin/python2 +# +# import sys +# from PySide.QtSql import * +# +# if __name__ == '__main__': +# if (len(sys.argv) < 3): +# print >> sys.stderr, "Usage is: printcallstack.py " +# raise Exception("Too few arguments") +# dbname = sys.argv[1] +# call_path_id = sys.argv[2] +# db = QSqlDatabase.addDatabase('QPSQL') +# db.setDatabaseName(dbname) +# if not db.open(): +# raise Exception("Failed to open database " + dbname + " error: " + db.lastError().text()) +# query = QSqlQuery(db) +# print " id ip symbol_id symbol dso_id dso_short_name" +# while call_path_id != 0 and call_path_id != 1: +# ret = query.exec_('SELECT * FROM call_paths_view WHERE id = ' + str(call_path_id)) +# if not ret: +# raise Exception("Query failed: " + query.lastError().text()) +# if not query.next(): +# raise Exception("Query failed") +# print "{0:>6} {1:>10} {2:>9} {3:<30} {4:>6} {5:<30}".format(query.value(0), query.value(1), query.value(2), query.value(3), query.value(4), query.value(5)) +# call_path_id = query.value(6) from PySide.QtSql import * @@ -244,6 +380,91 @@ if perf_db_export_calls: 'parent_call_path_id bigint,' 'flags integer)') +do_query(query, 'CREATE VIEW machines_view AS ' + 'SELECT ' + 'id,' + 'pid,' + 'root_dir,' + 'CASE WHEN id=0 THEN \'unknown\' WHEN pid=-1 THEN \'host\' ELSE \'guest\' END AS host_or_guest' + ' FROM machines') + +do_query(query, 'CREATE VIEW dsos_view AS ' + 'SELECT ' + 'id,' + 'machine_id,' + '(SELECT host_or_guest FROM machines_view WHERE id = machine_id) AS host_or_guest,' + 'short_name,' + 'long_name,' + 'build_id' + ' FROM dsos') + +do_query(query, 'CREATE VIEW symbols_view AS ' + 'SELECT ' + 'id,' + 'name,' + '(SELECT short_name FROM dsos WHERE id=dso_id) AS dso,' + 'dso_id,' + 'sym_start,' + 'sym_end,' + 'CASE WHEN binding=0 THEN \'local\' WHEN binding=1 THEN \'global\' ELSE \'weak\' END AS binding' + ' FROM symbols') + +do_query(query, 'CREATE VIEW threads_view AS ' + 'SELECT ' + 'id,' + 'machine_id,' + '(SELECT host_or_guest FROM machines_view WHERE id = machine_id) AS host_or_guest,' + 'process_id,' + 'pid,' + 'tid' + ' FROM threads') + +do_query(query, 'CREATE VIEW comm_threads_view AS ' + 'SELECT ' + 'comm_id,' + '(SELECT comm FROM comms WHERE id = comm_id) AS command,' + 'thread_id,' + '(SELECT pid FROM threads WHERE id = thread_id) AS pid,' + '(SELECT tid FROM threads WHERE id = thread_id) AS tid' + ' FROM comm_threads') + +if perf_db_export_calls: + do_query(query, 'CREATE VIEW call_paths_view AS ' + 'SELECT ' + 'c.id,' + 'to_hex(c.ip) AS ip,' + 'c.symbol_id,' + '(SELECT name FROM symbols WHERE id = c.symbol_id) AS symbol,' + '(SELECT dso_id FROM symbols WHERE id = c.symbol_id) AS dso_id,' + '(SELECT dso FROM symbols_view WHERE id = c.symbol_id) AS dso_short_name,' + 'c.parent_id,' + 'to_hex(p.ip) AS parent_ip,' + 'p.symbol_id AS parent_symbol_id,' + '(SELECT name FROM symbols WHERE id = p.symbol_id) AS parent_symbol,' + '(SELECT dso_id FROM symbols WHERE id = p.symbol_id) AS parent_dso_id,' + '(SELECT dso FROM symbols_view WHERE id = p.symbol_id) AS parent_dso_short_name' + ' FROM call_paths c INNER JOIN call_paths p ON p.id = c.parent_id') + do_query(query, 'CREATE VIEW calls_view AS ' + 'SELECT ' + 'calls.id,' + 'thread_id,' + '(SELECT pid FROM threads WHERE id = thread_id) AS pid,' + '(SELECT tid FROM threads WHERE id = thread_id) AS tid,' + '(SELECT comm FROM comms WHERE id = comm_id) AS command,' + 'call_path_id,' + 'to_hex(ip) AS ip,' + 'symbol_id,' + '(SELECT name FROM symbols WHERE id = symbol_id) AS symbol,' + 'call_time,' + 'return_time,' + 'return_time - call_time AS elapsed_time,' + 'branch_count,' + 'call_id,' + 'return_id,' + 'CASE WHEN flags=1 THEN \'no call\' WHEN flags=2 THEN \'no return\' WHEN flags=3 THEN \'no call/return\' ELSE \'\' END AS flags,' + 'parent_call_path_id' + ' FROM calls INNER JOIN call_paths ON call_paths.id = call_path_id') + do_query(query, 'CREATE VIEW samples_view AS ' 'SELECT ' 'id,' -- cgit v1.2.3 From 601897b54c7ed492a89b262dccd7c6f7faf12b30 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 25 Sep 2015 16:15:39 +0300 Subject: perf auxtrace: Add option to synthesize branch stacks on samples Add AUX area tracing option 'l' to synthesize branch stacks on samples just like sample type PERF_SAMPLE_BRANCH_STACK. This is taken into use by Intel PT in a subsequent patch. Based-on-patch-by: Andi Kleen Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/1443186956-18718-9-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/itrace.txt | 4 ++++ tools/perf/util/auxtrace.c | 20 ++++++++++++++++++++ tools/perf/util/auxtrace.h | 4 ++++ 3 files changed, 28 insertions(+) diff --git a/tools/perf/Documentation/itrace.txt b/tools/perf/Documentation/itrace.txt index 2ff946677e3b..65453f4c7006 100644 --- a/tools/perf/Documentation/itrace.txt +++ b/tools/perf/Documentation/itrace.txt @@ -6,6 +6,7 @@ e synthesize error events d create a debug log g synthesize a call chain (use with i or x) + l synthesize last branch entries (use with i or x) The default is all events i.e. the same as --itrace=ibxe @@ -20,3 +21,6 @@ Also the call chain size (default 16, max. 1024) for instructions or transactions events can be specified. + + Also the number of last branch entries (default 64, max. 1024) for + instructions or transactions events can be specified. diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index c4993b2e6c50..7f10430af39c 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -926,6 +926,8 @@ s64 perf_event__process_auxtrace(struct perf_tool *tool, #define PERF_ITRACE_DEFAULT_PERIOD 100000 #define PERF_ITRACE_DEFAULT_CALLCHAIN_SZ 16 #define PERF_ITRACE_MAX_CALLCHAIN_SZ 1024 +#define PERF_ITRACE_DEFAULT_LAST_BRANCH_SZ 64 +#define PERF_ITRACE_MAX_LAST_BRANCH_SZ 1024 void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts) { @@ -936,6 +938,7 @@ void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts) synth_opts->period_type = PERF_ITRACE_DEFAULT_PERIOD_TYPE; synth_opts->period = PERF_ITRACE_DEFAULT_PERIOD; synth_opts->callchain_sz = PERF_ITRACE_DEFAULT_CALLCHAIN_SZ; + synth_opts->last_branch_sz = PERF_ITRACE_DEFAULT_LAST_BRANCH_SZ; } /* @@ -1043,6 +1046,23 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str, synth_opts->callchain_sz = val; } break; + case 'l': + synth_opts->last_branch = true; + synth_opts->last_branch_sz = + PERF_ITRACE_DEFAULT_LAST_BRANCH_SZ; + while (*p == ' ' || *p == ',') + p += 1; + if (isdigit(*p)) { + unsigned int val; + + val = strtoul(p, &endptr, 10); + p = endptr; + if (!val || + val > PERF_ITRACE_MAX_LAST_BRANCH_SZ) + goto out_err; + synth_opts->last_branch_sz = val; + } + break; case ' ': case ',': break; diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index bf72b77a588a..b86f90db1352 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -63,7 +63,9 @@ enum itrace_period_type { * @calls: limit branch samples to calls (can be combined with @returns) * @returns: limit branch samples to returns (can be combined with @calls) * @callchain: add callchain to 'instructions' events + * @last_branch: add branch context to 'instruction' events * @callchain_sz: maximum callchain size + * @last_branch_sz: branch context size * @period: 'instructions' events period * @period_type: 'instructions' events period type */ @@ -79,7 +81,9 @@ struct itrace_synth_opts { bool calls; bool returns; bool callchain; + bool last_branch; unsigned int callchain_sz; + unsigned int last_branch_sz; unsigned long long period; enum itrace_period_type period_type; }; -- cgit v1.2.3 From c7eced63f2f67bd06ceb2269062416db9d81d29d Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 25 Sep 2015 16:15:40 +0300 Subject: perf report: Adjust sample type validation for synthesized branch stacks perf report looks at event sample types to determine if branch stacks have been sampled. Adjust the validation to know about instruction tracing options. This change allows the use of the -b option which otherwise would complain with an error like: Error: Selected -b but no branch data. Did you call perf record without -b? # To display the perf.data header info, # please use --header/--header-only options. # Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/1443186956-18718-10-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 0d53b485a87b..7af35af5a5e5 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -220,6 +220,9 @@ static int report__setup_sample_type(struct report *rep) !session->itrace_synth_opts->set)) sample_type |= PERF_SAMPLE_CALLCHAIN; + if (session->itrace_synth_opts->last_branch) + sample_type |= PERF_SAMPLE_BRANCH_STACK; + if (!is_pipe && !(sample_type & PERF_SAMPLE_CALLCHAIN)) { if (sort__has_parent) { ui__error("Selected --sort parent, but no " -- cgit v1.2.3 From fb9fab66e6e3ee737e521c899684c6d684b24a22 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 25 Sep 2015 16:15:41 +0300 Subject: perf report: Also do default setup for synthesized branch stacks The 'perf report' tool will default to displaying branch stacks (-b option) if they are present. Make that also happen for synthesized branch stacks. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/1443186956-18718-11-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 7af35af5a5e5..92f7c5a75208 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -829,6 +829,9 @@ repeat: has_br_stack = perf_header__has_feat(&session->header, HEADER_BRANCH_STACK); + if (itrace_synth_opts.last_branch) + has_br_stack = true; + /* * Branch mode is a tristate: * -1 means default, so decide based on the file having branch data. -- cgit v1.2.3 From f86225db3aa0e394915af45eea1c3cca6f3e2dba Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 25 Sep 2015 16:15:42 +0300 Subject: perf report: Skip events with null branch stacks A non-synthesized event might not have a branch stack if branch stacks have been synthesized (using itrace options). An example of that is when Intel PT records sched_switch events for decoding purposes. Those sched_switch events do not have branch stacks even though the Intel PT decoder may be synthesizing other events that do due to the itrace options. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/1443186956-18718-12-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 92f7c5a75208..e94e5c7155af 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -163,14 +163,21 @@ static int process_sample_event(struct perf_tool *tool, if (rep->cpu_list && !test_bit(sample->cpu, rep->cpu_bitmap)) goto out_put; - if (sort__mode == SORT_MODE__BRANCH) + if (sort__mode == SORT_MODE__BRANCH) { + /* + * A non-synthesized event might not have a branch stack if + * branch stacks have been synthesized (using itrace options). + */ + if (!sample->branch_stack) + goto out_put; iter.ops = &hist_iter_branch; - else if (rep->mem_mode) + } else if (rep->mem_mode) { iter.ops = &hist_iter_mem; - else if (symbol_conf.cumulate_callchain) + } else if (symbol_conf.cumulate_callchain) { iter.ops = &hist_iter_cumulative; - else + } else { iter.ops = &hist_iter_normal; + } if (al.map != NULL) al.map->dso->hit = 1; -- cgit v1.2.3 From 051a01b9a2c1c1ef3049973a43d9ed4ddcc946f3 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 25 Sep 2015 16:15:43 +0300 Subject: perf inject: Set branch stack feature flag when synthesizing branch stacks The branch stack feature flag is set by 'perf record' when recording data that contains branch stacks. Consequently, when 'perf inject' synthesizes branch stacks, the feature flag should be set also. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/1443186956-18718-13-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-inject.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index f62c49b35be0..8638fad8a085 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -537,9 +537,13 @@ static int __cmd_inject(struct perf_inject *inject) * The AUX areas have been removed and replaced with * synthesized hardware events, so clear the feature flag. */ - if (inject->itrace_synth_opts.set) + if (inject->itrace_synth_opts.set) { perf_header__clear_feat(&session->header, HEADER_AUXTRACE); + if (inject->itrace_synth_opts.last_branch) + perf_header__set_feat(&session->header, + HEADER_BRANCH_STACK); + } session->header.data_offset = output_data_offset; session->header.data_size = inject->bytes_written; perf_session__write_header(session, session->evlist, fd, true); -- cgit v1.2.3 From 385e33063fb963f5cccb0a37fe539319b6481fa5 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 25 Sep 2015 16:15:44 +0300 Subject: perf intel-pt: Move branch filter logic intel_pt_synth_branch_sample() skips synthesizing if the branch does not match the branch filter. That logic was sitting in the middle of the function but is more efficiently placed at the start of the function, so move it. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/1443186956-18718-14-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/intel-pt.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index c8bb5ca6a157..2c01e723826a 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -891,6 +891,9 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq) union perf_event *event = ptq->event_buf; struct perf_sample sample = { .ip = 0, }; + if (pt->branches_filter && !(pt->branches_filter & ptq->flags)) + return 0; + event->sample.header.type = PERF_RECORD_SAMPLE; event->sample.header.misc = PERF_RECORD_MISC_USER; event->sample.header.size = sizeof(struct perf_event_header); @@ -909,9 +912,6 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq) sample.flags = ptq->flags; sample.insn_len = ptq->insn_len; - if (pt->branches_filter && !(pt->branches_filter & ptq->flags)) - return 0; - if (pt->synth_opts.inject) { ret = intel_pt_inject_event(event, &sample, pt->branches_sample_type, -- cgit v1.2.3 From f14445ee72c59f32aa5cbf4d0f0330a5f62a752d Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 25 Sep 2015 16:15:45 +0300 Subject: perf intel-pt: Support generating branch stack Add support for generating branch stack context for PT samples. The decoder reports a configurable number of branches as branch context for each sample. Internally it keeps track of them by using a simple sliding window. We also flush the last branch buffer on each sample to avoid overlapping intervals. This is useful for: - Reporting accurate basic block edge frequencies through the perf report branch view - Using with --branch-history to get the wider context of samples - Other users of LBRs Also the Documentation is updated. Examples: Record with Intel PT: perf record -e intel_pt//u ls Branch stacks are used by default if synthesized so: perf report --itrace=ile is the same as: perf report --itrace=ile -b Branch history can be requested also: perf report --itrace=igle --branch-history Based-on-patch-by: Andi Kleen Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/1443186956-18718-15-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/intel-pt.txt | 10 +++ tools/perf/util/intel-pt.c | 115 ++++++++++++++++++++++++++++++++++ 2 files changed, 125 insertions(+) diff --git a/tools/perf/Documentation/intel-pt.txt b/tools/perf/Documentation/intel-pt.txt index 886612b50961..a0fbb5d71f7d 100644 --- a/tools/perf/Documentation/intel-pt.txt +++ b/tools/perf/Documentation/intel-pt.txt @@ -671,6 +671,7 @@ The letters are: e synthesize tracing error events d create a debug log g synthesize a call chain (use with i or x) + l synthesize last branch entries (use with i or x) "Instructions" events look like they were recorded by "perf record -e instructions". @@ -718,6 +719,15 @@ transactions events can be specified. e.g. --itrace=ig32 --itrace=xg32 +Also the number of last branch entries (default 64, max. 1024) for instructions or +transactions events can be specified. e.g. + + --itrace=il10 + --itrace=xl10 + +Note that last branch entries are cleared for each sample, so there is no overlap +from one sample to the next. + To disable trace decoding entirely, use the option --no-itrace. diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 2c01e723826a..05e8fcc5188b 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -22,6 +22,7 @@ #include "../perf.h" #include "session.h" #include "machine.h" +#include "sort.h" #include "tool.h" #include "event.h" #include "evlist.h" @@ -115,6 +116,9 @@ struct intel_pt_queue { void *decoder; const struct intel_pt_state *state; struct ip_callchain *chain; + struct branch_stack *last_branch; + struct branch_stack *last_branch_rb; + size_t last_branch_pos; union perf_event *event_buf; bool on_heap; bool stop; @@ -675,6 +679,19 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt, goto out_free; } + if (pt->synth_opts.last_branch) { + size_t sz = sizeof(struct branch_stack); + + sz += pt->synth_opts.last_branch_sz * + sizeof(struct branch_entry); + ptq->last_branch = zalloc(sz); + if (!ptq->last_branch) + goto out_free; + ptq->last_branch_rb = zalloc(sz); + if (!ptq->last_branch_rb) + goto out_free; + } + ptq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE); if (!ptq->event_buf) goto out_free; @@ -732,6 +749,8 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt, out_free: zfree(&ptq->event_buf); + zfree(&ptq->last_branch); + zfree(&ptq->last_branch_rb); zfree(&ptq->chain); free(ptq); return NULL; @@ -746,6 +765,8 @@ static void intel_pt_free_queue(void *priv) thread__zput(ptq->thread); intel_pt_decoder_free(ptq->decoder); zfree(&ptq->event_buf); + zfree(&ptq->last_branch); + zfree(&ptq->last_branch_rb); zfree(&ptq->chain); free(ptq); } @@ -876,6 +897,57 @@ static int intel_pt_setup_queues(struct intel_pt *pt) return 0; } +static inline void intel_pt_copy_last_branch_rb(struct intel_pt_queue *ptq) +{ + struct branch_stack *bs_src = ptq->last_branch_rb; + struct branch_stack *bs_dst = ptq->last_branch; + size_t nr = 0; + + bs_dst->nr = bs_src->nr; + + if (!bs_src->nr) + return; + + nr = ptq->pt->synth_opts.last_branch_sz - ptq->last_branch_pos; + memcpy(&bs_dst->entries[0], + &bs_src->entries[ptq->last_branch_pos], + sizeof(struct branch_entry) * nr); + + if (bs_src->nr >= ptq->pt->synth_opts.last_branch_sz) { + memcpy(&bs_dst->entries[nr], + &bs_src->entries[0], + sizeof(struct branch_entry) * ptq->last_branch_pos); + } +} + +static inline void intel_pt_reset_last_branch_rb(struct intel_pt_queue *ptq) +{ + ptq->last_branch_pos = 0; + ptq->last_branch_rb->nr = 0; +} + +static void intel_pt_update_last_branch_rb(struct intel_pt_queue *ptq) +{ + const struct intel_pt_state *state = ptq->state; + struct branch_stack *bs = ptq->last_branch_rb; + struct branch_entry *be; + + if (!ptq->last_branch_pos) + ptq->last_branch_pos = ptq->pt->synth_opts.last_branch_sz; + + ptq->last_branch_pos -= 1; + + be = &bs->entries[ptq->last_branch_pos]; + be->from = state->from_ip; + be->to = state->to_ip; + be->flags.abort = !!(state->flags & INTEL_PT_ABORT_TX); + be->flags.in_tx = !!(state->flags & INTEL_PT_IN_TX); + /* No support for mispredict */ + + if (bs->nr < ptq->pt->synth_opts.last_branch_sz) + bs->nr += 1; +} + static int intel_pt_inject_event(union perf_event *event, struct perf_sample *sample, u64 type, bool swapped) @@ -890,6 +962,10 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq) struct intel_pt *pt = ptq->pt; union perf_event *event = ptq->event_buf; struct perf_sample sample = { .ip = 0, }; + struct dummy_branch_stack { + u64 nr; + struct branch_entry entries; + } dummy_bs; if (pt->branches_filter && !(pt->branches_filter & ptq->flags)) return 0; @@ -912,6 +988,21 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq) sample.flags = ptq->flags; sample.insn_len = ptq->insn_len; + /* + * perf report cannot handle events without a branch stack when using + * SORT_MODE__BRANCH so make a dummy one. + */ + if (pt->synth_opts.last_branch && sort__mode == SORT_MODE__BRANCH) { + dummy_bs = (struct dummy_branch_stack){ + .nr = 1, + .entries = { + .from = sample.ip, + .to = sample.addr, + }, + }; + sample.branch_stack = (struct branch_stack *)&dummy_bs; + } + if (pt->synth_opts.inject) { ret = intel_pt_inject_event(event, &sample, pt->branches_sample_type, @@ -961,6 +1052,11 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq) sample.callchain = ptq->chain; } + if (pt->synth_opts.last_branch) { + intel_pt_copy_last_branch_rb(ptq); + sample.branch_stack = ptq->last_branch; + } + if (pt->synth_opts.inject) { ret = intel_pt_inject_event(event, &sample, pt->instructions_sample_type, @@ -974,6 +1070,9 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq) pr_err("Intel Processor Trace: failed to deliver instruction event, error %d\n", ret); + if (pt->synth_opts.last_branch) + intel_pt_reset_last_branch_rb(ptq); + return ret; } @@ -1008,6 +1107,11 @@ static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq) sample.callchain = ptq->chain; } + if (pt->synth_opts.last_branch) { + intel_pt_copy_last_branch_rb(ptq); + sample.branch_stack = ptq->last_branch; + } + if (pt->synth_opts.inject) { ret = intel_pt_inject_event(event, &sample, pt->transactions_sample_type, @@ -1021,6 +1125,9 @@ static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq) pr_err("Intel Processor Trace: failed to deliver transaction event, error %d\n", ret); + if (pt->synth_opts.callchain) + intel_pt_reset_last_branch_rb(ptq); + return ret; } @@ -1116,6 +1223,9 @@ static int intel_pt_sample(struct intel_pt_queue *ptq) return err; } + if (pt->synth_opts.last_branch) + intel_pt_update_last_branch_rb(ptq); + if (!pt->sync_switch) return 0; @@ -1763,6 +1873,8 @@ static int intel_pt_synth_events(struct intel_pt *pt, pt->instructions_sample_period = attr.sample_period; if (pt->synth_opts.callchain) attr.sample_type |= PERF_SAMPLE_CALLCHAIN; + if (pt->synth_opts.last_branch) + attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; pr_debug("Synthesizing 'instructions' event with id %" PRIu64 " sample type %#" PRIx64 "\n", id, (u64)attr.sample_type); err = intel_pt_synth_event(session, &attr, id); @@ -1782,6 +1894,8 @@ static int intel_pt_synth_events(struct intel_pt *pt, attr.sample_period = 1; if (pt->synth_opts.callchain) attr.sample_type |= PERF_SAMPLE_CALLCHAIN; + if (pt->synth_opts.last_branch) + attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; pr_debug("Synthesizing 'transactions' event with id %" PRIu64 " sample type %#" PRIx64 "\n", id, (u64)attr.sample_type); err = intel_pt_synth_event(session, &attr, id); @@ -1808,6 +1922,7 @@ static int intel_pt_synth_events(struct intel_pt *pt, attr.sample_period = 1; attr.sample_type |= PERF_SAMPLE_ADDR; attr.sample_type &= ~(u64)PERF_SAMPLE_CALLCHAIN; + attr.sample_type &= ~(u64)PERF_SAMPLE_BRANCH_STACK; pr_debug("Synthesizing 'branches' event with id %" PRIu64 " sample type %#" PRIx64 "\n", id, (u64)attr.sample_type); err = intel_pt_synth_event(session, &attr, id); -- cgit v1.2.3 From 188bb5e2ce112463428994f91291e5df6fc05521 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 25 Sep 2015 16:15:46 +0300 Subject: perf report: Make max_stack value allow for synthesized callchains perf report has an option (--max-stack) to set the maximum stack depth when processing callchains. The option defaults to the hard-coded maximum definition PERF_MAX_STACK_DEPTH which is 127. The intention of the option is to allow the user to reduce the processing time by reducing the amount of the callchain that is processed. It is also possible, when processing instruction traces, to synthesize callchains. Synthesized callchains do not have the kernel size limitation and are whatever size the user requests, although validation presently prevents the user requested a value greater that 1024. The default value is 16. To allow for synthesized callchains, make the max_stack value at least the same size as the synthesized callchain size. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/1443186956-18718-16-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index e94e5c7155af..37c9f5125887 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -809,6 +809,10 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) if (report.inverted_callchain) callchain_param.order = ORDER_CALLER; + if (itrace_synth_opts.callchain && + (int)itrace_synth_opts.callchain_sz > report.max_stack) + report.max_stack = itrace_synth_opts.callchain_sz; + if (!input_name || !strlen(input_name)) { if (!fstat(STDIN_FILENO, &st) && S_ISFIFO(st.st_mode)) input_name = "-"; -- cgit v1.2.3 From 96b40f3c05f36e061fd4dde920b9e9c795a88b69 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 25 Sep 2015 16:15:47 +0300 Subject: perf hists: Allow for max_stack greater than PERF_MAX_STACK_DEPTH Use the max_stack value instead of PERF_MAX_STACK_DEPTH so that arbitrary-sized callchains can be supported. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/1443186956-18718-17-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/hist.c | 6 ++++-- tools/perf/util/hist.h | 1 + 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index b3567a25f0c4..0cad9e07c5b4 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -695,7 +695,7 @@ iter_finish_normal_entry(struct hist_entry_iter *iter, } static int -iter_prepare_cumulative_entry(struct hist_entry_iter *iter __maybe_unused, +iter_prepare_cumulative_entry(struct hist_entry_iter *iter, struct addr_location *al __maybe_unused) { struct hist_entry **he_cache; @@ -707,7 +707,7 @@ iter_prepare_cumulative_entry(struct hist_entry_iter *iter __maybe_unused, * cumulated only one time to prevent entries more than 100% * overhead. */ - he_cache = malloc(sizeof(*he_cache) * (PERF_MAX_STACK_DEPTH + 1)); + he_cache = malloc(sizeof(*he_cache) * (iter->max_stack + 1)); if (he_cache == NULL) return -ENOMEM; @@ -868,6 +868,8 @@ int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al, if (err) return err; + iter->max_stack = max_stack_depth; + err = iter->ops->prepare_entry(iter, al); if (err) goto out; diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 4d6aa1dbdaee..8c20a8f6b214 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -90,6 +90,7 @@ struct hist_entry_iter { int curr; bool hide_unresolved; + int max_stack; struct perf_evsel *evsel; struct perf_sample *sample; -- cgit v1.2.3 From 03cd1fed2b8730271d3a8dbabd87989abddc33c4 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 25 Sep 2015 16:15:49 +0300 Subject: perf script: Add a setting for maximum stack depth Add a setting for maximum stack depth in preparation for allowing for synthesized callchains. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/1443186956-18718-19-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 6 ++++-- tools/perf/util/session.c | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 092843968791..a65b498df097 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -33,6 +33,8 @@ static bool nanosecs; static const char *cpu_list; static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); +static unsigned int scripting_max_stack = PERF_MAX_STACK_DEPTH; + enum perf_output_field { PERF_OUTPUT_COMM = 1U << 0, PERF_OUTPUT_TID = 1U << 1, @@ -475,7 +477,7 @@ static void print_sample_bts(union perf_event *event, } } perf_evsel__print_ip(evsel, sample, al, print_opts, - PERF_MAX_STACK_DEPTH); + scripting_max_stack); } /* print branch_to information */ @@ -552,7 +554,7 @@ static void process_event(union perf_event *event, struct perf_sample *sample, perf_evsel__print_ip(evsel, sample, al, output[attr->type].print_ip_opts, - PERF_MAX_STACK_DEPTH); + scripting_max_stack); } if (PRINT_FIELD(IREGS)) diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 15c84cad213a..84a02eae4394 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1800,7 +1800,7 @@ void perf_evsel__print_ip(struct perf_evsel *evsel, struct perf_sample *sample, if (thread__resolve_callchain(al->thread, evsel, sample, NULL, NULL, - PERF_MAX_STACK_DEPTH) != 0) { + stack_depth) != 0) { if (verbose) error("Failed to resolve callchain. Skipping\n"); return; -- cgit v1.2.3 From 44cbe7295c3808977159f500a5bcdebf12a7db5f Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 25 Sep 2015 16:15:50 +0300 Subject: perf scripting python: Allow for max_stack greater than PERF_MAX_STACK_DEPTH Use the scripting_max_stack value to allow for values greater than PERF_MAX_STACK_DEPTH. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/1443186956-18718-20-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 2 +- tools/perf/util/scripting-engines/trace-event-python.c | 2 +- tools/perf/util/trace-event.h | 2 ++ 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index a65b498df097..5c3c02d5af53 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -33,7 +33,7 @@ static bool nanosecs; static const char *cpu_list; static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); -static unsigned int scripting_max_stack = PERF_MAX_STACK_DEPTH; +unsigned int scripting_max_stack = PERF_MAX_STACK_DEPTH; enum perf_output_field { PERF_OUTPUT_COMM = 1U << 0, diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index aa9e1257c1ee..a8e825fca42a 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -319,7 +319,7 @@ static PyObject *python_process_callchain(struct perf_sample *sample, if (thread__resolve_callchain(al->thread, evsel, sample, NULL, NULL, - PERF_MAX_STACK_DEPTH) != 0) { + scripting_max_stack) != 0) { pr_err("Failed to resolve callchain. Skipping\n"); goto exit; } diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h index da6cc4cc2a4f..b85ee55cca0c 100644 --- a/tools/perf/util/trace-event.h +++ b/tools/perf/util/trace-event.h @@ -78,6 +78,8 @@ struct scripting_ops { int (*generate_script) (struct pevent *pevent, const char *outfile); }; +extern unsigned int scripting_max_stack; + int script_spec_register(const char *spec, struct scripting_ops *ops); void setup_perl_scripting(void); -- cgit v1.2.3 From 3c5b645faee7afbd417f6127694adbd26778a9eb Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 25 Sep 2015 16:15:51 +0300 Subject: perf script: Make scripting_max_stack value allow for synthesized callchains perf script has a setting to set the maximum stack depth when processing callchains. The setting defaults to the hard-coded maximum definition PERF_MAX_STACK_DEPTH which is 127. It is possible, when processing instruction traces, to synthesize callchains. Synthesized callchains do not have the kernel size limitation and are whatever size the user requests, although validation presently prevents the user requested a value greater that 1024. The default value is 16. To allow for synthesized callchains, make the scripting_max_stack value at least the same size as the synthesized callchain size. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/1443186956-18718-21-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 5c3c02d5af53..8ce1c6bbfa45 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -1748,6 +1748,10 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) } } + if (itrace_synth_opts.callchain && + itrace_synth_opts.callchain_sz > scripting_max_stack) + scripting_max_stack = itrace_synth_opts.callchain_sz; + /* make sure PERF_EXEC_PATH is set for scripts */ perf_set_argv_exec_path(perf_exec_path()); -- cgit v1.2.3 From dddcf6abbf5946f9ec1183dd2099cede6dbe12fc Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 25 Sep 2015 16:15:52 +0300 Subject: perf evlist: Add perf_evlist__id2evsel_strict() perf_evlist__id2evsel_strict() is the same as perf_evlist__id2evsel() except that it ensures that the id must match. This will be used by perf inject to find a specific evsel that is to be deleted, hence the need to match exactly. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/1443186956-18718-22-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 15 +++++++++++++++ tools/perf/util/evlist.h | 2 ++ 2 files changed, 17 insertions(+) diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index a8643735dcea..e6760380d731 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -617,6 +617,21 @@ struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id) return NULL; } +struct perf_evsel *perf_evlist__id2evsel_strict(struct perf_evlist *evlist, + u64 id) +{ + struct perf_sample_id *sid; + + if (!id) + return NULL; + + sid = perf_evlist__id2sid(evlist, id); + if (sid) + return sid->evsel; + + return NULL; +} + static int perf_evlist__event2id(struct perf_evlist *evlist, union perf_event *event, u64 *id) { diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 115d8b53c601..0edf0d4f4efa 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -104,6 +104,8 @@ int perf_evlist__filter_pollfd(struct perf_evlist *evlist, short revents_and_mas int perf_evlist__poll(struct perf_evlist *evlist, int timeout); struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id); +struct perf_evsel *perf_evlist__id2evsel_strict(struct perf_evlist *evlist, + u64 id); struct perf_sample_id *perf_evlist__id2sid(struct perf_evlist *evlist, u64 id); -- cgit v1.2.3 From 4768230ad57d4e4fc6d36c44e98e0062c89b0dc0 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 25 Sep 2015 16:15:53 +0300 Subject: perf evlist: Add perf_evlist__remove() Add a counterpart to perf_evlist__add() that does the opposite and deletes the evsel. This will be used by perf inject to remove unwanted evsels. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/1443186956-18718-23-git-send-email-adrian.hunter@intel.com [ Renamed it from perf_evlist__del() to perf_evlist__remove() and removed the perf_evsel__delete() call ] Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 7 +++++++ tools/perf/util/evlist.h | 1 + 2 files changed, 8 insertions(+) diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index e6760380d731..89546228b8ed 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -165,6 +165,13 @@ void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry) __perf_evlist__propagate_maps(evlist, entry); } +void perf_evlist__remove(struct perf_evlist *evlist, struct perf_evsel *evsel) +{ + evsel->evlist = NULL; + list_del_init(&evsel->node); + evlist->nr_entries -= 1; +} + void perf_evlist__splice_list_tail(struct perf_evlist *evlist, struct list_head *list) { diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 0edf0d4f4efa..66bc9d4c0869 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -73,6 +73,7 @@ void perf_evlist__exit(struct perf_evlist *evlist); void perf_evlist__delete(struct perf_evlist *evlist); void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry); +void perf_evlist__remove(struct perf_evlist *evlist, struct perf_evsel *evsel); int perf_evlist__add_default(struct perf_evlist *evlist); int __perf_evlist__add_default_attrs(struct perf_evlist *evlist, struct perf_event_attr *attrs, size_t nr_attrs); -- cgit v1.2.3 From 73117308f953afb60a1383725b7d5372feeb2433 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 25 Sep 2015 16:15:54 +0300 Subject: perf inject: Remove more aux-related stuff when processing instruction traces perf inject can process instruction traces (using the --itrace option) which removes aux-related events and replaces them with the requested synthesized events. However there are still some leftovers, namely PERF_RECORD_ITRACE_START events and the original evsel (selected event) e.g. intel_pt// For the sake of completeness, remove them too. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/1443186956-18718-24-git-send-email-adrian.hunter@intel.com [ Made it use perf_evlist__remove() + perf_evsel__delete() ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-inject.c | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 8638fad8a085..9b6119f134b4 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -31,6 +31,7 @@ struct perf_inject { const char *input_name; struct perf_data_file output; u64 bytes_written; + u64 aux_id; struct list_head samples; struct itrace_synth_opts itrace_synth_opts; }; @@ -176,6 +177,19 @@ static int perf_event__repipe(struct perf_tool *tool, return perf_event__repipe_synth(tool, event); } +static int perf_event__drop_aux(struct perf_tool *tool, + union perf_event *event __maybe_unused, + struct perf_sample *sample, + struct machine *machine __maybe_unused) +{ + struct perf_inject *inject = container_of(tool, struct perf_inject, tool); + + if (!inject->aux_id) + inject->aux_id = sample->id; + + return 0; +} + typedef int (*inject_handler)(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -512,6 +526,8 @@ static int __cmd_inject(struct perf_inject *inject) inject->tool.id_index = perf_event__repipe_id_index; inject->tool.auxtrace_info = perf_event__process_auxtrace_info; inject->tool.auxtrace = perf_event__process_auxtrace; + inject->tool.aux = perf_event__drop_aux; + inject->tool.itrace_start = perf_event__drop_aux, inject->tool.ordered_events = true; inject->tool.ordering_requires_timestamps = true; /* Allow space in the header for new attributes */ @@ -535,14 +551,25 @@ static int __cmd_inject(struct perf_inject *inject) } /* * The AUX areas have been removed and replaced with - * synthesized hardware events, so clear the feature flag. + * synthesized hardware events, so clear the feature flag and + * remove the evsel. */ if (inject->itrace_synth_opts.set) { + struct perf_evsel *evsel; + perf_header__clear_feat(&session->header, HEADER_AUXTRACE); if (inject->itrace_synth_opts.last_branch) perf_header__set_feat(&session->header, HEADER_BRANCH_STACK); + evsel = perf_evlist__id2evsel_strict(session->evlist, + inject->aux_id); + if (evsel) { + pr_debug("Deleting %s\n", + perf_evsel__name(evsel)); + perf_evlist__remove(session->evlist, evsel); + perf_evsel__delete(evsel); + } } session->header.data_offset = output_data_offset; session->header.data_size = inject->bytes_written; -- cgit v1.2.3 From f56fb9864c501dc85ebe40af5bf925dd07d990c0 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 25 Sep 2015 16:15:55 +0300 Subject: perf inject: Add --strip option to strip out non-synthesized events Add a new option --strip which is used with --itrace to strip out non-synthesized events. This results in a perf.data file that is simpler for external tools to parse. In particular, this can be used to prepare a perf.data file for consumption by autofdo. A subsequent patch makes a change to Intel PT also to enable use with autofdo and gives an example of that use. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/1443186956-18718-25-git-send-email-adrian.hunter@intel.com [ Made it use perf_evlist__remove() + perf_evsel__delete() ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-inject.txt | 3 ++ tools/perf/builtin-inject.c | 92 ++++++++++++++++++++++++++++++++ 2 files changed, 95 insertions(+) diff --git a/tools/perf/Documentation/perf-inject.txt b/tools/perf/Documentation/perf-inject.txt index 0c721c3e37e1..0b1cedeef895 100644 --- a/tools/perf/Documentation/perf-inject.txt +++ b/tools/perf/Documentation/perf-inject.txt @@ -50,6 +50,9 @@ OPTIONS include::itrace.txt[] +--strip:: + Use with --itrace to strip out non-synthesized events. + SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-report[1], linkperf:perf-archive[1] diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 9b6119f134b4..0a945d2e8ca5 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -28,6 +28,7 @@ struct perf_inject { bool build_ids; bool sched_stat; bool have_auxtrace; + bool strip; const char *input_name; struct perf_data_file output; u64 bytes_written; @@ -177,6 +178,14 @@ static int perf_event__repipe(struct perf_tool *tool, return perf_event__repipe_synth(tool, event); } +static int perf_event__drop(struct perf_tool *tool __maybe_unused, + union perf_event *event __maybe_unused, + struct perf_sample *sample __maybe_unused, + struct machine *machine __maybe_unused) +{ + return 0; +} + static int perf_event__drop_aux(struct perf_tool *tool, union perf_event *event __maybe_unused, struct perf_sample *sample, @@ -480,6 +489,78 @@ static int perf_evsel__check_stype(struct perf_evsel *evsel, return 0; } +static int drop_sample(struct perf_tool *tool __maybe_unused, + union perf_event *event __maybe_unused, + struct perf_sample *sample __maybe_unused, + struct perf_evsel *evsel __maybe_unused, + struct machine *machine __maybe_unused) +{ + return 0; +} + +static void strip_init(struct perf_inject *inject) +{ + struct perf_evlist *evlist = inject->session->evlist; + struct perf_evsel *evsel; + + inject->tool.context_switch = perf_event__drop; + + evlist__for_each(evlist, evsel) + evsel->handler = drop_sample; +} + +static bool has_tracking(struct perf_evsel *evsel) +{ + return evsel->attr.mmap || evsel->attr.mmap2 || evsel->attr.comm || + evsel->attr.task; +} + +#define COMPAT_MASK (PERF_SAMPLE_ID | PERF_SAMPLE_TID | PERF_SAMPLE_TIME | \ + PERF_SAMPLE_ID | PERF_SAMPLE_CPU | PERF_SAMPLE_IDENTIFIER) + +/* + * In order that the perf.data file is parsable, tracking events like MMAP need + * their selected event to exist, except if there is only 1 selected event left + * and it has a compatible sample type. + */ +static bool ok_to_remove(struct perf_evlist *evlist, + struct perf_evsel *evsel_to_remove) +{ + struct perf_evsel *evsel; + int cnt = 0; + bool ok = false; + + if (!has_tracking(evsel_to_remove)) + return true; + + evlist__for_each(evlist, evsel) { + if (evsel->handler != drop_sample) { + cnt += 1; + if ((evsel->attr.sample_type & COMPAT_MASK) == + (evsel_to_remove->attr.sample_type & COMPAT_MASK)) + ok = true; + } + } + + return ok && cnt == 1; +} + +static void strip_fini(struct perf_inject *inject) +{ + struct perf_evlist *evlist = inject->session->evlist; + struct perf_evsel *evsel, *tmp; + + /* Remove non-synthesized evsels if possible */ + evlist__for_each_safe(evlist, tmp, evsel) { + if (evsel->handler == drop_sample && + ok_to_remove(evlist, evsel)) { + pr_debug("Deleting %s\n", perf_evsel__name(evsel)); + perf_evlist__remove(evlist, evsel); + perf_evsel__delete(evsel); + } + } +} + static int __cmd_inject(struct perf_inject *inject) { int ret = -EINVAL; @@ -532,6 +613,8 @@ static int __cmd_inject(struct perf_inject *inject) inject->tool.ordering_requires_timestamps = true; /* Allow space in the header for new attributes */ output_data_offset = 4096; + if (inject->strip) + strip_init(inject); } if (!inject->itrace_synth_opts.set) @@ -570,6 +653,8 @@ static int __cmd_inject(struct perf_inject *inject) perf_evlist__remove(session->evlist, evsel); perf_evsel__delete(evsel); } + if (inject->strip) + strip_fini(inject); } session->header.data_offset = output_data_offset; session->header.data_size = inject->bytes_written; @@ -635,6 +720,8 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused) OPT_CALLBACK_OPTARG(0, "itrace", &inject.itrace_synth_opts, NULL, "opts", "Instruction Tracing options", itrace_parse_synth_opts), + OPT_BOOLEAN(0, "strip", &inject.strip, + "strip non-synthesized events (use with --itrace)"), OPT_END() }; const char * const inject_usage[] = { @@ -650,6 +737,11 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused) if (argc) usage_with_options(inject_usage, options); + if (inject.strip && !inject.itrace_synth_opts.set) { + pr_err("--strip option requires --itrace option\n"); + return -1; + } + if (perf_data_file__open(&inject.output)) { perror("failed to create output file"); return -1; -- cgit v1.2.3 From ba11ba65e02836c475427ae199adfc2d8cc4a900 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 25 Sep 2015 16:15:56 +0300 Subject: perf intel-pt: Add mispred-all config option to aid use with autofdo autofdo incorrectly expects branch flags to include either mispred or predicted. In fact mispred = predicted = 0 is valid and means the flags are not supported, which they aren't by Intel PT. To make autofdo work, add a config option which will cause Intel PT decoder to set the mispred flag on all branches. Below is an example of using Intel PT with autofdo. The example is also added to the Intel PT documentation. It requires autofdo (https://github.com/google/autofdo) and gcc version 5. The bubble sort example is from the AutoFDO tutorial (https://gcc.gnu.org/wiki/AutoFDO/Tutorial) amended to take the number of elements as a parameter. $ gcc-5 -O3 sort.c -o sort_optimized $ ./sort_optimized 30000 Bubble sorting array of 30000 elements 2254 ms $ cat ~/.perfconfig [intel-pt] mispred-all $ perf record -e intel_pt//u ./sort 3000 Bubble sorting array of 3000 elements 58 ms [ perf record: Woken up 2 times to write data ] [ perf record: Captured and wrote 3.939 MB perf.data ] $ perf inject -i perf.data -o inj --itrace=i100usle --strip $ ./create_gcov --binary=./sort --profile=inj --gcov=sort.gcov -gcov_version=1 $ gcc-5 -O3 -fauto-profile=sort.gcov sort.c -o sort_autofdo $ ./sort_autofdo 30000 Bubble sorting array of 30000 elements 2155 ms Note there is currently no advantage to using Intel PT instead of LBR, but that may change in the future if greater use is made of the data. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/1443186956-18718-26-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/intel-pt.txt | 29 +++++++++++++++++++++++++++++ tools/perf/util/intel-pt.c | 14 ++++++++++++++ 2 files changed, 43 insertions(+) diff --git a/tools/perf/Documentation/intel-pt.txt b/tools/perf/Documentation/intel-pt.txt index a0fbb5d71f7d..be764f9ec769 100644 --- a/tools/perf/Documentation/intel-pt.txt +++ b/tools/perf/Documentation/intel-pt.txt @@ -764,3 +764,32 @@ perf inject also accepts the --itrace option in which case tracing data is removed and replaced with the synthesized events. e.g. perf inject --itrace -i perf.data -o perf.data.new + +Below is an example of using Intel PT with autofdo. It requires autofdo +(https://github.com/google/autofdo) and gcc version 5. The bubble +sort example is from the AutoFDO tutorial (https://gcc.gnu.org/wiki/AutoFDO/Tutorial) +amended to take the number of elements as a parameter. + + $ gcc-5 -O3 sort.c -o sort_optimized + $ ./sort_optimized 30000 + Bubble sorting array of 30000 elements + 2254 ms + + $ cat ~/.perfconfig + [intel-pt] + mispred-all + + $ perf record -e intel_pt//u ./sort 3000 + Bubble sorting array of 3000 elements + 58 ms + [ perf record: Woken up 2 times to write data ] + [ perf record: Captured and wrote 3.939 MB perf.data ] + $ perf inject -i perf.data -o inj --itrace=i100usle --strip + $ ./create_gcov --binary=./sort --profile=inj --gcov=sort.gcov -gcov_version=1 + $ gcc-5 -O3 -fauto-profile=sort.gcov sort.c -o sort_autofdo + $ ./sort_autofdo 30000 + Bubble sorting array of 30000 elements + 2155 ms + +Note there is currently no advantage to using Intel PT instead of LBR, but +that may change in the future if greater use is made of the data. diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 05e8fcc5188b..03ff072b5993 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -64,6 +64,7 @@ struct intel_pt { bool data_queued; bool est_tsc; bool sync_switch; + bool mispred_all; int have_sched_switch; u32 pmu_type; u64 kernel_start; @@ -943,6 +944,7 @@ static void intel_pt_update_last_branch_rb(struct intel_pt_queue *ptq) be->flags.abort = !!(state->flags & INTEL_PT_ABORT_TX); be->flags.in_tx = !!(state->flags & INTEL_PT_IN_TX); /* No support for mispredict */ + be->flags.mispred = ptq->pt->mispred_all; if (bs->nr < ptq->pt->synth_opts.last_branch_sz) bs->nr += 1; @@ -1967,6 +1969,16 @@ static bool intel_pt_find_switch(struct perf_evlist *evlist) return false; } +static int intel_pt_perf_config(const char *var, const char *value, void *data) +{ + struct intel_pt *pt = data; + + if (!strcmp(var, "intel-pt.mispred-all")) + pt->mispred_all = perf_config_bool(var, value); + + return 0; +} + static const char * const intel_pt_info_fmts[] = { [INTEL_PT_PMU_TYPE] = " PMU Type %"PRId64"\n", [INTEL_PT_TIME_SHIFT] = " Time Shift %"PRIu64"\n", @@ -2011,6 +2023,8 @@ int intel_pt_process_auxtrace_info(union perf_event *event, if (!pt) return -ENOMEM; + perf_config(intel_pt_perf_config, pt); + err = auxtrace_queues__init(&pt->queues); if (err) goto err_free; -- cgit v1.2.3 From 0b8891a8e62cb537b65ebc55cfbbb4ec22333c44 Mon Sep 17 00:00:00 2001 From: He Kuang Date: Mon, 28 Sep 2015 03:52:13 +0000 Subject: perf tools: Adds the config_term callback for different type events Currently, function config_term() is used for checking config terms of all types of events, while unknown terms is not reported as an error because pmu events have valid terms in sysfs. But this is wrong when unknown terms are specificed to hw/sw events. This patch Adds the config_term callback so we can use separate check routines for each type of events. Signed-off-by: He Kuang Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Kan Liang Cc: Peter Zijlstra Cc: Wang Nan Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1443412336-120050-1-git-send-email-hekuang@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.c | 38 ++++++++++++++++++++++++++------------ 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 61c2bc20926d..9dc3fb6ee81e 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -599,9 +599,13 @@ static int check_type_val(struct parse_events_term *term, return -EINVAL; } -static int config_term(struct perf_event_attr *attr, - struct parse_events_term *term, - struct parse_events_error *err) +typedef int config_term_func_t(struct perf_event_attr *attr, + struct parse_events_term *term, + struct parse_events_error *err); + +static int config_term_common(struct perf_event_attr *attr, + struct parse_events_term *term, + struct parse_events_error *err) { #define CHECK_TYPE_VAL(type) \ do { \ @@ -610,12 +614,6 @@ do { \ } while (0) switch (term->type_term) { - case PARSE_EVENTS__TERM_TYPE_USER: - /* - * Always succeed for sysfs terms, as we dont know - * at this point what type they need to have. - */ - return 0; case PARSE_EVENTS__TERM_TYPE_CONFIG: CHECK_TYPE_VAL(NUM); attr->config = term->val.num; @@ -665,9 +663,24 @@ do { \ #undef CHECK_TYPE_VAL } +static int config_term_pmu(struct perf_event_attr *attr, + struct parse_events_term *term, + struct parse_events_error *err) +{ + if (term->type_term == PARSE_EVENTS__TERM_TYPE_USER) + /* + * Always succeed for sysfs terms, as we dont know + * at this point what type they need to have. + */ + return 0; + else + return config_term_common(attr, term, err); +} + static int config_attr(struct perf_event_attr *attr, struct list_head *head, - struct parse_events_error *err) + struct parse_events_error *err, + config_term_func_t config_term) { struct parse_events_term *term; @@ -735,7 +748,8 @@ int parse_events_add_numeric(struct parse_events_evlist *data, attr.config = config; if (head_config) { - if (config_attr(&attr, head_config, data->error)) + if (config_attr(&attr, head_config, data->error, + config_term_common)) return -EINVAL; if (get_config_terms(head_config, &config_terms)) @@ -795,7 +809,7 @@ int parse_events_add_pmu(struct parse_events_evlist *data, * Configure hardcoded terms first, no need to check * return value when called with fail == 0 ;) */ - if (config_attr(&attr, head_config, data->error)) + if (config_attr(&attr, head_config, data->error, config_term_pmu)) return -EINVAL; if (get_config_terms(head_config, &config_terms)) -- cgit v1.2.3 From ffeb883e5662e94b14948078e85812261277ad67 Mon Sep 17 00:00:00 2001 From: He Kuang Date: Mon, 28 Sep 2015 03:52:14 +0000 Subject: perf tools: Show proper error message for wrong terms of hw/sw events Show proper error message and show valid terms when wrong config terms is specified for hw/sw type perf events. This patch makes the original error format function formats_error_string() more generic, which only outputs the static config terms for hw/sw perf events, and prepends pmu formats for pmu events. Before this patch: $ perf record -e 'cpu-clock/freqx=200/' -a sleep 1 invalid or unsupported event: 'cpu-clock/freqx=200/' Run 'perf list' for a list of valid events usage: perf record [] [] or: perf record [] -- [] -e, --event event selector. use 'perf list' to list available events After this patch: $ perf record -e 'cpu-clock/freqx=200/' -a sleep 1 event syntax error: 'cpu-clock/freqx=200/' \___ unknown term valid terms: config,config1,config2,name,period,freq,branch_type,time,call-graph,stack-size Run 'perf list' for a list of valid events usage: perf record [] [] or: perf record [] -- [] -e, --event event selector. use 'perf list' to list available events Signed-off-by: He Kuang Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Kan Liang Cc: Peter Zijlstra Cc: Wang Nan Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1443412336-120050-2-git-send-email-hekuang@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.c | 29 +++++++++++++++++++++++++++++ tools/perf/util/parse-events.h | 1 + tools/perf/util/parse-events.l | 2 +- tools/perf/util/pmu.c | 37 ++++++++++++++----------------------- 4 files changed, 45 insertions(+), 24 deletions(-) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 9dc3fb6ee81e..ea64ec0720ca 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -656,6 +656,9 @@ do { \ CHECK_TYPE_VAL(STR); break; default: + err->str = strdup("unknown term"); + err->idx = term->err_term; + err->help = parse_events_formats_error_string(NULL); return -EINVAL; } @@ -1875,3 +1878,29 @@ void parse_events_evlist_error(struct parse_events_evlist *data, err->str = strdup(str); WARN_ONCE(!err->str, "WARNING: failed to allocate error string"); } + +/* + * Return string contains valid config terms of an event. + * @additional_terms: For terms such as PMU sysfs terms. + */ +char *parse_events_formats_error_string(char *additional_terms) +{ + char *str; + static const char *static_terms = "config,config1,config2,name," + "period,freq,branch_type,time," + "call-graph,stack-size\n"; + + /* valid terms */ + if (additional_terms) { + if (!asprintf(&str, "valid terms: %s,%s", + additional_terms, static_terms)) + goto fail; + } else { + if (!asprintf(&str, "valid terms: %s", static_terms)) + goto fail; + } + return str; + +fail: + return NULL; +} diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index ffee7ece75a6..c7b904a49189 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -156,5 +156,6 @@ int print_hwcache_events(const char *event_glob, bool name_only); extern int is_valid_tracepoint(const char *event_string); int valid_event_mount(const char *eventfs); +char *parse_events_formats_error_string(char *additional_terms); #endif /* __PERF_PARSE_EVENTS_H */ diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index 936d566f48d8..c29832bce496 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -174,7 +174,7 @@ modifier_bp [rwx]{1,3} { /* - * Please update formats_error_string any time + * Please update parse_events_formats_error_string any time * new static term is added. */ config { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG); } diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 89c91a1a67e7..ac42c97be9e4 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -626,38 +626,26 @@ static int pmu_resolve_param_term(struct parse_events_term *term, return -1; } -static char *formats_error_string(struct list_head *formats) +static char *pmu_formats_string(struct list_head *formats) { struct perf_pmu_format *format; - char *err, *str; - static const char *static_terms = "config,config1,config2,name," - "period,freq,branch_type,time," - "call-graph,stack-size\n"; + char *str; + struct strbuf buf; unsigned i = 0; - if (!asprintf(&str, "valid terms:")) + if (!formats) return NULL; + strbuf_init(&buf, 0); /* sysfs exported terms */ - list_for_each_entry(format, formats, list) { - char c = i++ ? ',' : ' '; - - err = str; - if (!asprintf(&str, "%s%c%s", err, c, format->name)) - goto fail; - free(err); - } + list_for_each_entry(format, formats, list) + strbuf_addf(&buf, i++ ? ",%s" : "%s", + format->name); - /* static terms */ - err = str; - if (!asprintf(&str, "%s,%s", err, static_terms)) - goto fail; + str = strbuf_detach(&buf, NULL); + strbuf_release(&buf); - free(err); return str; -fail: - free(err); - return NULL; } /* @@ -693,9 +681,12 @@ static int pmu_config_term(struct list_head *formats, if (verbose) printf("Invalid event/parameter '%s'\n", term->config); if (err) { + char *pmu_term = pmu_formats_string(formats); + err->idx = term->err_term; err->str = strdup("unknown term"); - err->help = formats_error_string(formats); + err->help = parse_events_formats_error_string(pmu_term); + free(pmu_term); } return -EINVAL; } -- cgit v1.2.3 From 865582c3f48e12b7ab9e260161868313e4a37f44 Mon Sep 17 00:00:00 2001 From: He Kuang Date: Mon, 28 Sep 2015 03:52:15 +0000 Subject: perf tools: Adds the tracepoint name parsing support Adds rules for parsing tracepoint names. Change rules of tracepoint which derives from PE_NAMEs into tracepoint names directly, so adding more rules based on tracepoint names will be easier. Changes v2-v3: - Change __event_legacy_tracepoint label in bison file to tracepoint_name - Fix formats error. Signed-off-by: He Kuang Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Kan Liang Cc: Peter Zijlstra Cc: Wang Nan Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1443412336-120050-3-git-send-email-hekuang@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.y | 37 +++++++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index 8bcc45868457..1c598c2f8dfc 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -67,6 +67,7 @@ static inc_group_count(struct list_head *list, %type event_legacy_cache %type event_legacy_mem %type event_legacy_tracepoint +%type tracepoint_name %type event_legacy_numeric %type event_legacy_raw %type event_def @@ -84,6 +85,10 @@ static inc_group_count(struct list_head *list, u64 num; struct list_head *head; struct parse_events_term *term; + struct tracepoint_name { + char *sys; + char *event; + } tracepoint_name; } %% @@ -368,36 +373,40 @@ PE_PREFIX_MEM PE_VALUE sep_dc } event_legacy_tracepoint: -PE_NAME '-' PE_NAME ':' PE_NAME +tracepoint_name { struct parse_events_evlist *data = _data; struct parse_events_error *error = data->error; struct list_head *list; - char sys_name[128]; - snprintf(&sys_name, 128, "%s-%s", $1, $3); ALLOC_LIST(list); - if (parse_events_add_tracepoint(list, &data->idx, &sys_name, $5, error)) { + if (parse_events_add_tracepoint(list, &data->idx, $1.sys, $1.event, + error)) { if (error) error->idx = @1.first_column; return -1; } $$ = list; } + +tracepoint_name: +PE_NAME '-' PE_NAME ':' PE_NAME +{ + char sys_name[128]; + struct tracepoint_name tracepoint; + + snprintf(&sys_name, 128, "%s-%s", $1, $3); + tracepoint.sys = &sys_name; + tracepoint.event = $5; + + $$ = tracepoint; +} | PE_NAME ':' PE_NAME { - struct parse_events_evlist *data = _data; - struct parse_events_error *error = data->error; - struct list_head *list; + struct tracepoint_name tracepoint = {$1, $3}; - ALLOC_LIST(list); - if (parse_events_add_tracepoint(list, &data->idx, $1, $3, error)) { - if (error) - error->idx = @1.first_column; - return -1; - } - $$ = list; + $$ = tracepoint; } event_legacy_numeric: -- cgit v1.2.3 From e637d17757a10732fa5d573c18f20b3cd4d31245 Mon Sep 17 00:00:00 2001 From: He Kuang Date: Mon, 28 Sep 2015 03:52:16 +0000 Subject: perf tools: Enable event_config terms to tracepoint events This patch enables config terms for tracepoint perf events. Valid terms for tracepoint events are 'call-graph' and 'stack-size', so we can use different callgraph settings for each event and eliminate unnecessary overhead. Here is an example for using different call-graph config for each tracepoint. $ perf record -e syscalls:sys_enter_write/call-graph=fp/ -e syscalls:sys_exit_write/call-graph=no/ dd if=/dev/zero of=test bs=4k count=10 $ perf report --stdio # # Total Lost Samples: 0 # # Samples: 13 of event 'syscalls:sys_enter_write' # Event count (approx.): 13 # # Children Self Command Shared Object Symbol # ........ ........ ....... .................. ...................... # 76.92% 76.92% dd libpthread-2.20.so [.] __write_nocancel | ---__write_nocancel 23.08% 23.08% dd libc-2.20.so [.] write | ---write | |--33.33%-- 0x2031342820736574 | |--33.33%-- 0xa6e69207364726f | --33.33%-- 0x34202c7320393039 ... # Samples: 13 of event 'syscalls:sys_exit_write' # Event count (approx.): 13 # # Children Self Command Shared Object Symbol # ........ ........ ....... .................. ...................... # 76.92% 76.92% dd libpthread-2.20.so [.] __write_nocancel 23.08% 23.08% dd libc-2.20.so [.] write 7.69% 0.00% dd [unknown] [.] 0x0a6e69207364726f 7.69% 0.00% dd [unknown] [.] 0x2031342820736574 7.69% 0.00% dd [unknown] [.] 0x34202c7320393039 Signed-off-by: He Kuang Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Kan Liang Cc: Peter Zijlstra Cc: Wang Nan Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1443412336-120050-4-git-send-email-hekuang@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.c | 84 +++++++++++++++++++++++++++++++++--------- tools/perf/util/parse-events.h | 3 +- tools/perf/util/parse-events.y | 26 +++++++++++-- 3 files changed, 90 insertions(+), 23 deletions(-) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index ea64ec0720ca..5ffb356cbcc6 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -27,6 +27,8 @@ extern int parse_events_debug; #endif int parse_events_parse(void *data, void *scanner); +static int get_config_terms(struct list_head *head_config, + struct list_head *head_terms __maybe_unused); static struct perf_pmu_event_symbol *perf_pmu_events_list; /* @@ -416,7 +418,8 @@ static void tracepoint_error(struct parse_events_error *error, int err, static int add_tracepoint(struct list_head *list, int *idx, char *sys_name, char *evt_name, - struct parse_events_error *error __maybe_unused) + struct parse_events_error *error __maybe_unused, + struct list_head *head_config) { struct perf_evsel *evsel; @@ -426,13 +429,22 @@ static int add_tracepoint(struct list_head *list, int *idx, return PTR_ERR(evsel); } + if (head_config) { + LIST_HEAD(config_terms); + + if (get_config_terms(head_config, &config_terms)) + return -ENOMEM; + list_splice(&config_terms, &evsel->config_terms); + } + list_add_tail(&evsel->node, list); return 0; } static int add_tracepoint_multi_event(struct list_head *list, int *idx, char *sys_name, char *evt_name, - struct parse_events_error *error) + struct parse_events_error *error, + struct list_head *head_config) { char evt_path[MAXPATHLEN]; struct dirent *evt_ent; @@ -456,7 +468,8 @@ static int add_tracepoint_multi_event(struct list_head *list, int *idx, if (!strglobmatch(evt_ent->d_name, evt_name)) continue; - ret = add_tracepoint(list, idx, sys_name, evt_ent->d_name, error); + ret = add_tracepoint(list, idx, sys_name, evt_ent->d_name, + error, head_config); } closedir(evt_dir); @@ -465,16 +478,20 @@ static int add_tracepoint_multi_event(struct list_head *list, int *idx, static int add_tracepoint_event(struct list_head *list, int *idx, char *sys_name, char *evt_name, - struct parse_events_error *error) + struct parse_events_error *error, + struct list_head *head_config) { return strpbrk(evt_name, "*?") ? - add_tracepoint_multi_event(list, idx, sys_name, evt_name, error) : - add_tracepoint(list, idx, sys_name, evt_name, error); + add_tracepoint_multi_event(list, idx, sys_name, evt_name, + error, head_config) : + add_tracepoint(list, idx, sys_name, evt_name, + error, head_config); } static int add_tracepoint_multi_sys(struct list_head *list, int *idx, char *sys_name, char *evt_name, - struct parse_events_error *error) + struct parse_events_error *error, + struct list_head *head_config) { struct dirent *events_ent; DIR *events_dir; @@ -498,23 +515,13 @@ static int add_tracepoint_multi_sys(struct list_head *list, int *idx, continue; ret = add_tracepoint_event(list, idx, events_ent->d_name, - evt_name, error); + evt_name, error, head_config); } closedir(events_dir); return ret; } -int parse_events_add_tracepoint(struct list_head *list, int *idx, - char *sys, char *event, - struct parse_events_error *error) -{ - if (strpbrk(sys, "*?")) - return add_tracepoint_multi_sys(list, idx, sys, event, error); - else - return add_tracepoint_event(list, idx, sys, event, error); -} - static int parse_breakpoint_type(const char *type, struct perf_event_attr *attr) { @@ -680,6 +687,26 @@ static int config_term_pmu(struct perf_event_attr *attr, return config_term_common(attr, term, err); } +static int config_term_tracepoint(struct perf_event_attr *attr, + struct parse_events_term *term, + struct parse_events_error *err) +{ + switch (term->type_term) { + case PARSE_EVENTS__TERM_TYPE_CALLGRAPH: + case PARSE_EVENTS__TERM_TYPE_STACKSIZE: + return config_term_common(attr, term, err); + default: + if (err) { + err->idx = term->err_term; + err->str = strdup("unknown term"); + err->help = strdup("valid terms: call-graph,stack-size\n"); + } + return -EINVAL; + } + + return 0; +} + static int config_attr(struct perf_event_attr *attr, struct list_head *head, struct parse_events_error *err, @@ -738,6 +765,27 @@ do { \ return 0; } +int parse_events_add_tracepoint(struct list_head *list, int *idx, + char *sys, char *event, + struct parse_events_error *error, + struct list_head *head_config) +{ + if (head_config) { + struct perf_event_attr attr; + + if (config_attr(&attr, head_config, error, + config_term_tracepoint)) + return -EINVAL; + } + + if (strpbrk(sys, "*?")) + return add_tracepoint_multi_sys(list, idx, sys, event, + error, head_config); + else + return add_tracepoint_event(list, idx, sys, event, + error, head_config); +} + int parse_events_add_numeric(struct parse_events_evlist *data, struct list_head *list, u32 type, u64 config, diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index c7b904a49189..f13d3ccda444 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -119,7 +119,8 @@ int parse_events__modifier_group(struct list_head *list, char *event_mod); int parse_events_name(struct list_head *list, char *name); int parse_events_add_tracepoint(struct list_head *list, int *idx, char *sys, char *event, - struct parse_events_error *error); + struct parse_events_error *error, + struct list_head *head_config); int parse_events_add_numeric(struct parse_events_evlist *data, struct list_head *list, u32 type, u64 config, diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index 1c598c2f8dfc..ae6af269f9c9 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -380,12 +380,30 @@ tracepoint_name struct list_head *list; ALLOC_LIST(list); + if (error) + error->idx = @1.first_column; + if (parse_events_add_tracepoint(list, &data->idx, $1.sys, $1.event, - error)) { - if (error) - error->idx = @1.first_column; + error, NULL)) return -1; - } + + $$ = list; +} +| +tracepoint_name '/' event_config '/' +{ + struct parse_events_evlist *data = _data; + struct parse_events_error *error = data->error; + struct list_head *list; + + ALLOC_LIST(list); + if (error) + error->idx = @1.first_column; + + if (parse_events_add_tracepoint(list, &data->idx, $1.sys, $1.event, + error, $3)) + return -1; + $$ = list; } -- cgit v1.2.3