From 7ba4da100261482afcb693a1b39e483c9c03aba2 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 10 Jan 2019 11:12:56 +0100 Subject: perf session: Rearrange perf_session__process_events function To reduce function arguments and the code. Signed-off-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20190110101301.6196-2-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/session.c | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 5456c84c7dd1..ad14192db811 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1820,13 +1820,14 @@ fetch_mmaped_event(struct perf_session *session, #define NUM_MMAPS 128 #endif -static int __perf_session__process_events(struct perf_session *session, - u64 data_offset, u64 data_size, - u64 file_size) +static int __perf_session__process_events(struct perf_session *session) { struct ordered_events *oe = &session->ordered_events; struct perf_tool *tool = session->tool; int fd = perf_data__fd(session->data); + u64 file_size = perf_data__size(session->data); + u64 data_offset = session->header.data_offset; + u64 data_size = session->header.data_size; u64 head, page_offset, file_offset, file_pos, size; int err, mmap_prot, mmap_flags, map_idx = 0; size_t mmap_size; @@ -1944,20 +1945,13 @@ out_err: int perf_session__process_events(struct perf_session *session) { - u64 size = perf_data__size(session->data); - int err; - if (perf_session__register_idle_thread(session) < 0) return -ENOMEM; - if (!perf_data__is_pipe(session->data)) - err = __perf_session__process_events(session, - session->header.data_offset, - session->header.data_size, size); - else - err = __perf_session__process_pipe_events(session); + if (perf_data__is_pipe(session->data)) + return __perf_session__process_pipe_events(session); - return err; + return __perf_session__process_events(session); } bool perf_session__has_traces(struct perf_session *session, const char *msg) -- cgit v1.2.3 From 4f5a473d79c755d2e325c9a8dab01804d5416923 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 10 Jan 2019 11:12:57 +0100 Subject: perf session: Get rid of file_size variable It's not needed and removing it makes the code a little simpler for the upcoming changes. It's safe to replace file_size with data_size, because the perf_data__size() value is never smaller than data_offset + data_size. Signed-off-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20190110101301.6196-3-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/session.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index ad14192db811..c8c2069fb01e 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1825,7 +1825,6 @@ static int __perf_session__process_events(struct perf_session *session) struct ordered_events *oe = &session->ordered_events; struct perf_tool *tool = session->tool; int fd = perf_data__fd(session->data); - u64 file_size = perf_data__size(session->data); u64 data_offset = session->header.data_offset; u64 data_size = session->header.data_size; u64 head, page_offset, file_offset, file_pos, size; @@ -1845,14 +1844,13 @@ static int __perf_session__process_events(struct perf_session *session) if (data_size == 0) goto out; - if (data_offset + data_size < file_size) - file_size = data_offset + data_size; + ui_progress__init_size(&prog, data_size, "Processing events..."); - ui_progress__init_size(&prog, file_size, "Processing events..."); + data_size += data_offset; mmap_size = MMAP_SIZE; - if (mmap_size > file_size) { - mmap_size = file_size; + if (mmap_size > data_size) { + mmap_size = data_size; session->one_mmap = true; } @@ -1917,7 +1915,7 @@ more: if (session_done()) goto out; - if (file_pos < file_size) + if (file_pos < data_size) goto more; out: -- cgit v1.2.3 From 82715eb184c55b5097c8f1022f54303c2ddbc329 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 10 Jan 2019 11:12:58 +0100 Subject: perf session: Add reader object Add a session private reader object to encapsulate the reading of the event data block. Starting with a 'fd' field. Signed-off-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20190110101301.6196-4-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/session.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index c8c2069fb01e..06379cc87f59 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1820,11 +1820,17 @@ fetch_mmaped_event(struct perf_session *session, #define NUM_MMAPS 128 #endif +struct reader { + int fd; +}; + static int __perf_session__process_events(struct perf_session *session) { + struct reader rd = { + .fd = perf_data__fd(session->data), + }; struct ordered_events *oe = &session->ordered_events; struct perf_tool *tool = session->tool; - int fd = perf_data__fd(session->data); u64 data_offset = session->header.data_offset; u64 data_size = session->header.data_size; u64 head, page_offset, file_offset, file_pos, size; @@ -1864,7 +1870,7 @@ static int __perf_session__process_events(struct perf_session *session) mmap_flags = MAP_PRIVATE; } remap: - buf = mmap(NULL, mmap_size, mmap_prot, mmap_flags, fd, + buf = mmap(NULL, mmap_size, mmap_prot, mmap_flags, rd.fd, file_offset); if (buf == MAP_FAILED) { pr_err("failed to mmap file\n"); -- cgit v1.2.3 From f66f0950527cd070d2949a0133275c30e717ba9b Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 10 Jan 2019 11:12:59 +0100 Subject: perf session: Add 'data_size' member to reader object Add a 'data_size' member to the reader object. Keep the 'data_size' variable instead of replacing it with rd.data_size as it will be used in the following patch. Signed-off-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20190110101301.6196-5-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/session.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 06379cc87f59..9f29ed743425 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1822,17 +1822,19 @@ fetch_mmaped_event(struct perf_session *session, struct reader { int fd; + u64 data_size; }; static int __perf_session__process_events(struct perf_session *session) { struct reader rd = { .fd = perf_data__fd(session->data), + .data_size = session->header.data_size, }; struct ordered_events *oe = &session->ordered_events; struct perf_tool *tool = session->tool; u64 data_offset = session->header.data_offset; - u64 data_size = session->header.data_size; + u64 data_size = rd.data_size; u64 head, page_offset, file_offset, file_pos, size; int err, mmap_prot, mmap_flags, map_idx = 0; size_t mmap_size; -- cgit v1.2.3 From 71002bd214822a15bee0f7bc033a0dbf95cf780b Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 10 Jan 2019 11:13:00 +0100 Subject: perf session: Add 'data_offset' member to reader object Add 'data_offset' member to reader object. Signed-off-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20190110101301.6196-6-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/session.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 9f29ed743425..173cf445475c 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1823,6 +1823,7 @@ fetch_mmaped_event(struct perf_session *session, struct reader { int fd; u64 data_size; + u64 data_offset; }; static int __perf_session__process_events(struct perf_session *session) @@ -1830,10 +1831,10 @@ static int __perf_session__process_events(struct perf_session *session) struct reader rd = { .fd = perf_data__fd(session->data), .data_size = session->header.data_size, + .data_offset = session->header.data_offset, }; struct ordered_events *oe = &session->ordered_events; struct perf_tool *tool = session->tool; - u64 data_offset = session->header.data_offset; u64 data_size = rd.data_size; u64 head, page_offset, file_offset, file_pos, size; int err, mmap_prot, mmap_flags, map_idx = 0; @@ -1845,16 +1846,16 @@ static int __perf_session__process_events(struct perf_session *session) perf_tool__fill_defaults(tool); - page_offset = page_size * (data_offset / page_size); + page_offset = page_size * (rd.data_offset / page_size); file_offset = page_offset; - head = data_offset - page_offset; + head = rd.data_offset - page_offset; if (data_size == 0) goto out; ui_progress__init_size(&prog, data_size, "Processing events..."); - data_size += data_offset; + data_size += rd.data_offset; mmap_size = MMAP_SIZE; if (mmap_size > data_size) { -- cgit v1.2.3 From 3c7b67b23eb317787d113b1e9a6d82250202a1e3 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 10 Jan 2019 11:13:01 +0100 Subject: perf session: Add reader__process_events function The reader object is defined by file's fd, data offset and data size. Now we can simply define a reader object for an arbitrary file data portion and pass it to reader__process_events(). Signed-off-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20190110101301.6196-7-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/session.c | 62 +++++++++++++++++++++++++++++------------------ 1 file changed, 38 insertions(+), 24 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 173cf445475c..d6f41611f504 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1826,36 +1826,25 @@ struct reader { u64 data_offset; }; -static int __perf_session__process_events(struct perf_session *session) +static int +reader__process_events(struct reader *rd, struct perf_session *session, + struct ui_progress *prog) { - struct reader rd = { - .fd = perf_data__fd(session->data), - .data_size = session->header.data_size, - .data_offset = session->header.data_offset, - }; - struct ordered_events *oe = &session->ordered_events; - struct perf_tool *tool = session->tool; - u64 data_size = rd.data_size; + u64 data_size = rd->data_size; u64 head, page_offset, file_offset, file_pos, size; - int err, mmap_prot, mmap_flags, map_idx = 0; + int err = 0, mmap_prot, mmap_flags, map_idx = 0; size_t mmap_size; char *buf, *mmaps[NUM_MMAPS]; union perf_event *event; - struct ui_progress prog; s64 skip; - perf_tool__fill_defaults(tool); - - page_offset = page_size * (rd.data_offset / page_size); + page_offset = page_size * (rd->data_offset / page_size); file_offset = page_offset; - head = rd.data_offset - page_offset; + head = rd->data_offset - page_offset; - if (data_size == 0) - goto out; + ui_progress__init_size(prog, data_size, "Processing events..."); - ui_progress__init_size(&prog, data_size, "Processing events..."); - - data_size += rd.data_offset; + data_size += rd->data_offset; mmap_size = MMAP_SIZE; if (mmap_size > data_size) { @@ -1873,12 +1862,12 @@ static int __perf_session__process_events(struct perf_session *session) mmap_flags = MAP_PRIVATE; } remap: - buf = mmap(NULL, mmap_size, mmap_prot, mmap_flags, rd.fd, + buf = mmap(NULL, mmap_size, mmap_prot, mmap_flags, rd->fd, file_offset); if (buf == MAP_FAILED) { pr_err("failed to mmap file\n"); err = -errno; - goto out_err; + goto out; } mmaps[map_idx] = buf; map_idx = (map_idx + 1) & (ARRAY_SIZE(mmaps) - 1); @@ -1910,7 +1899,7 @@ more: file_offset + head, event->header.size, event->header.type); err = -EINVAL; - goto out_err; + goto out; } if (skip) @@ -1919,7 +1908,7 @@ more: head += size; file_pos += size; - ui_progress__update(&prog, size); + ui_progress__update(prog, size); if (session_done()) goto out; @@ -1928,6 +1917,31 @@ more: goto more; out: + return err; +} + +static int __perf_session__process_events(struct perf_session *session) +{ + struct reader rd = { + .fd = perf_data__fd(session->data), + .data_size = session->header.data_size, + .data_offset = session->header.data_offset, + }; + struct ordered_events *oe = &session->ordered_events; + struct perf_tool *tool = session->tool; + struct ui_progress prog; + int err; + + perf_tool__fill_defaults(tool); + + if (rd.data_size == 0) + return -1; + + ui_progress__init_size(&prog, rd.data_size, "Processing events..."); + + err = reader__process_events(&rd, session, &prog); + if (err) + goto out_err; /* do the final flush for ordered samples */ err = ordered_events__flush(oe, OE_FLUSH__FINAL); if (err) -- cgit v1.2.3 From 3eb03a5208a4eb46443e23e3fa9eb7e4c7ae4885 Mon Sep 17 00:00:00 2001 From: Brajeswar Ghosh Date: Tue, 15 Jan 2019 19:29:16 +0530 Subject: perf tools: Remove duplicate headers Remove duplicate headers which are included more than once in the same file. Signed-off-by: Brajeswar Ghosh Acked-by: Souptick Joarder Cc: Alexander Shishkin Cc: Colin King Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Sabyasachi Gupta Link: http://lkml.kernel.org/r/20190115135916.GA3629@hp-pavilion-15-notebook-pc-brajeswar Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 1 - tools/perf/tests/bp_account.c | 1 - tools/perf/util/dso.h | 1 - tools/perf/util/zlib.c | 1 - 4 files changed, 4 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 63a3afc7f32b..e587808591e8 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -83,7 +83,6 @@ #include #include #include -#include #include "sane_ctype.h" diff --git a/tools/perf/tests/bp_account.c b/tools/perf/tests/bp_account.c index a20cbc445426..57fc544aedb0 100644 --- a/tools/perf/tests/bp_account.c +++ b/tools/perf/tests/bp_account.c @@ -15,7 +15,6 @@ #include #include #include -#include #include "tests.h" #include "debug.h" diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index 8c8a7abe809d..af2eda29660f 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -8,7 +8,6 @@ #include #include #include "rwsem.h" -#include #include #include "map.h" #include "namespaces.h" diff --git a/tools/perf/util/zlib.c b/tools/perf/util/zlib.c index 902ce6384f57..512ad7c09b13 100644 --- a/tools/perf/util/zlib.c +++ b/tools/perf/util/zlib.c @@ -6,7 +6,6 @@ #include #include #include -#include #include "util/compress.h" #include "util/util.h" -- cgit v1.2.3 From 93115d32e8666614f3578601a0b7b3f4914509f4 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Thu, 17 Jan 2019 10:37:17 -0300 Subject: perf report: Display arch specific diagnostic counter sets, starting with s390 On s390 the event bc000 (also named CF_DIAG) extracts the CPU Measurement Facility diagnostic counter sets and displays them as counter number and counter value pairs sorted by counter set number. Output: [root@s35lp76 perf]# ./perf report -D --stdio [00000000] Counterset:0 Counters:6 Counter:000 Value:0x000000000085ec36 Counter:001 Value:0x0000000000796c94 Counter:002 Value:0x0000000000005ada Counter:003 Value:0x0000000000092460 Counter:004 Value:0x0000000000006073 Counter:005 Value:0x00000000001a9a73 [0x000038] Counterset:1 Counters:2 Counter:000 Value:0x000000000007c59f Counter:001 Value:0x000000000002fad6 [0x000050] Counterset:2 Counters:16 Counter:000 Value:000000000000000000 Counter:001 Value:000000000000000000 Counter:002 Value:000000000000000000 Counter:003 Value:000000000000000000 Counter:004 Value:000000000000000000 Counter:005 Value:000000000000000000 Counter:006 Value:000000000000000000 Counter:007 Value:000000000000000000 Counter:008 Value:000000000000000000 Counter:009 Value:000000000000000000 Counter:010 Value:000000000000000000 Counter:011 Value:000000000000000000 Counter:012 Value:000000000000000000 Counter:013 Value:000000000000000000 Counter:014 Value:000000000000000000 Counter:015 Value:000000000000000000 [0x0000d8] Counterset:3 Counters:128 Counter:000 Value:0x000000000000020f Counter:001 Value:0x00000000000001d8 Counter:002 Value:0x000000000000d7fa Counter:003 Value:0x000000000000008b ... The number in brackets is the offset into the raw data field of the sample. New functions trace_event_sample_raw__init() and s390_sample_raw() are introduced in the code path to enable interpretation on non s390 platforms. This event bc000 attached raw data is generated only on s390 platform. Correct display on other platforms requires correct endianness handling. Committer notes: Added a init function that sets up a evlist function pointer to avoid repeated tests on evlist->env and calls to perf_env__name() that involves normalizing, etc, for each PERF_RECORD_SAMPLE. Removed needless __maybe_unused from the trace_event_raw() prototype in session.h, move it to be an static function in evlist. The 'offset' variable is a size_t, not an u64, fix it to avoid this on some arches: CC /tmp/build/perf/util/s390-sample-raw.o util/s390-sample-raw.c: In function 's390_cpumcfdg_testctr': util/s390-sample-raw.c:77:4: error: format '%llx' expects argument of type 'long long unsigned int', but argument 4 has type 'size_t' [-Werror=format=] pr_err("Invalid counter set entry at %#" PRIx64 "\n", ^ cc1: all warnings being treated as errors Signed-off-by: Thomas Richter Reviewed-by: Hendrik Brueckner Link: https://lkml.kernel.org/r/9c856ac0-ef23-72b5-901d-a1f815508976@linux.ibm.com Cc: Martin Schwidefsky Cc: Heiko Carstens Link: https://lkml.kernel.org/n/tip-s3jhif06et9ug78qhclw41z1@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/Build | 2 + tools/perf/util/evlist.h | 4 +- tools/perf/util/s390-cpumcf-kernel.h | 62 +++++++++++++ tools/perf/util/s390-sample-raw.c | 173 +++++++++++++++++++++++++++++++++++ tools/perf/util/sample-raw.c | 18 ++++ tools/perf/util/sample-raw.h | 14 +++ tools/perf/util/session.c | 5 + 7 files changed, 277 insertions(+), 1 deletion(-) create mode 100644 tools/perf/util/s390-cpumcf-kernel.h create mode 100644 tools/perf/util/s390-sample-raw.c create mode 100644 tools/perf/util/sample-raw.c create mode 100644 tools/perf/util/sample-raw.h (limited to 'tools/perf/util') diff --git a/tools/perf/util/Build b/tools/perf/util/Build index af72be7f5b3b..3ad6a800948d 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -44,6 +44,8 @@ libperf-y += machine.o libperf-y += map.o libperf-y += pstack.o libperf-y += session.o +libperf-y += sample-raw.o +libperf-y += s390-sample-raw.o libperf-$(CONFIG_TRACE) += syscalltbl.o libperf-y += ordered-events.o libperf-y += namespaces.o diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 868294491194..00ab43c6dd15 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -49,6 +49,9 @@ struct perf_evlist { struct perf_evsel *selected; struct events_stats stats; struct perf_env *env; + void (*trace_event_sample_raw)(struct perf_evlist *evlist, + union perf_event *event, + struct perf_sample *sample); u64 first_sample_time; u64 last_sample_time; }; @@ -314,5 +317,4 @@ void perf_evlist__force_leader(struct perf_evlist *evlist); struct perf_evsel *perf_evlist__reset_weak_group(struct perf_evlist *evlist, struct perf_evsel *evsel); - #endif /* __PERF_EVLIST_H */ diff --git a/tools/perf/util/s390-cpumcf-kernel.h b/tools/perf/util/s390-cpumcf-kernel.h new file mode 100644 index 000000000000..d4356030b504 --- /dev/null +++ b/tools/perf/util/s390-cpumcf-kernel.h @@ -0,0 +1,62 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Support for s390 CPU measurement counter set diagnostic facility + * + * Copyright IBM Corp. 2019 + Author(s): Hendrik Brueckner + * Thomas Richter + */ +#ifndef S390_CPUMCF_KERNEL_H +#define S390_CPUMCF_KERNEL_H + +#define S390_CPUMCF_DIAG_DEF 0xfeef /* Counter diagnostic entry ID */ +#define PERF_EVENT_CPUM_CF_DIAG 0xBC000 /* Event: Counter sets */ + +struct cf_ctrset_entry { /* CPU-M CF counter set entry (8 byte) */ + unsigned int def:16; /* 0-15 Data Entry Format */ + unsigned int set:16; /* 16-23 Counter set identifier */ + unsigned int ctr:16; /* 24-39 Number of stored counters */ + unsigned int res1:16; /* 40-63 Reserved */ +}; + +struct cf_trailer_entry { /* CPU-M CF trailer for raw traces (64 byte) */ + /* 0 - 7 */ + union { + struct { + unsigned int clock_base:1; /* TOD clock base */ + unsigned int speed:1; /* CPU speed */ + /* Measurement alerts */ + unsigned int mtda:1; /* Loss of MT ctr. data alert */ + unsigned int caca:1; /* Counter auth. change alert */ + unsigned int lcda:1; /* Loss of counter data alert */ + }; + unsigned long flags; /* 0-63 All indicators */ + }; + /* 8 - 15 */ + unsigned int cfvn:16; /* 64-79 Ctr First Version */ + unsigned int csvn:16; /* 80-95 Ctr Second Version */ + unsigned int cpu_speed:32; /* 96-127 CPU speed */ + /* 16 - 23 */ + unsigned long timestamp; /* 128-191 Timestamp (TOD) */ + /* 24 - 55 */ + union { + struct { + unsigned long progusage1; + unsigned long progusage2; + unsigned long progusage3; + unsigned long tod_base; + }; + unsigned long progusage[4]; + }; + /* 56 - 63 */ + unsigned int mach_type:16; /* Machine type */ + unsigned int res1:16; /* Reserved */ + unsigned int res2:32; /* Reserved */ +}; + +#define CPUMF_CTR_SET_BASIC 0 /* Basic Counter Set */ +#define CPUMF_CTR_SET_USER 1 /* Problem-State Counter Set */ +#define CPUMF_CTR_SET_CRYPTO 2 /* Crypto-Activity Counter Set */ +#define CPUMF_CTR_SET_EXT 3 /* Extended Counter Set */ +#define CPUMF_CTR_SET_MT_DIAG 4 /* MT-diagnostic Counter Set */ +#endif diff --git a/tools/perf/util/s390-sample-raw.c b/tools/perf/util/s390-sample-raw.c new file mode 100644 index 000000000000..1c5a4d63bc9d --- /dev/null +++ b/tools/perf/util/s390-sample-raw.c @@ -0,0 +1,173 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright IBM Corp. 2019 + * Author(s): Thomas Richter + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * Architecture specific trace_event function. Save event's bc000 raw data + * to file. File name is aux.ctr.## where ## stands for the CPU number the + * sample was taken from. + */ + +#include +#include +#include +#include + +#include +#include +#include + +#include "debug.h" +#include "util.h" +#include "auxtrace.h" +#include "session.h" +#include "evlist.h" +#include "config.h" +#include "color.h" +#include "sample-raw.h" +#include "s390-cpumcf-kernel.h" + +static size_t ctrset_size(struct cf_ctrset_entry *set) +{ + return sizeof(*set) + set->ctr * sizeof(u64); +} + +static bool ctrset_valid(struct cf_ctrset_entry *set) +{ + return set->def == S390_CPUMCF_DIAG_DEF; +} + +/* CPU Measurement Counter Facility raw data is a byte stream. It is 8 byte + * aligned and might have trailing padding bytes. + * Display the raw data on screen. + */ +static bool s390_cpumcfdg_testctr(struct perf_sample *sample) +{ + size_t len = sample->raw_size, offset = 0; + unsigned char *buf = sample->raw_data; + struct cf_trailer_entry *te; + struct cf_ctrset_entry *cep, ce; + + if (!len) + return false; + while (offset < len) { + cep = (struct cf_ctrset_entry *)(buf + offset); + ce.def = be16_to_cpu(cep->def); + ce.set = be16_to_cpu(cep->set); + ce.ctr = be16_to_cpu(cep->ctr); + ce.res1 = be16_to_cpu(cep->res1); + + if (!ctrset_valid(&ce) || offset + ctrset_size(&ce) > len) { + /* Raw data for counter sets are always multiple of 8 + * bytes. Prepending a 4 bytes size field to the + * raw data block in the sample causes the perf tool + * to append 4 padding bytes to make the raw data part + * of the sample a multiple of eight bytes again. + * + * If the last entry (trailer) is 4 bytes off the raw + * area data end, all is good. + */ + if (len - offset - sizeof(*te) == 4) + break; + pr_err("Invalid counter set entry at %zd\n", offset); + return false; + } + offset += ctrset_size(&ce); + } + return true; +} + +/* Dump event bc000 on screen, already tested on correctness. */ +static void s390_cpumcfdg_dumptrail(const char *color, size_t offset, + struct cf_trailer_entry *tep) +{ + struct cf_trailer_entry te; + + te.flags = be64_to_cpu(tep->flags); + te.cfvn = be16_to_cpu(tep->cfvn); + te.csvn = be16_to_cpu(tep->csvn); + te.cpu_speed = be32_to_cpu(tep->cpu_speed); + te.timestamp = be64_to_cpu(tep->timestamp); + te.progusage1 = be64_to_cpu(tep->progusage1); + te.progusage2 = be64_to_cpu(tep->progusage2); + te.progusage3 = be64_to_cpu(tep->progusage3); + te.tod_base = be64_to_cpu(tep->tod_base); + te.mach_type = be16_to_cpu(tep->mach_type); + te.res1 = be16_to_cpu(tep->res1); + te.res2 = be32_to_cpu(tep->res2); + + color_fprintf(stdout, color, " [%#08zx] Trailer:%c%c%c%c%c" + " Cfvn:%d Csvn:%d Speed:%d TOD:%#llx\n", + offset, te.clock_base ? 'T' : ' ', + te.speed ? 'S' : ' ', te.mtda ? 'M' : ' ', + te.caca ? 'C' : ' ', te.lcda ? 'L' : ' ', + te.cfvn, te.csvn, te.cpu_speed, te.timestamp); + color_fprintf(stdout, color, "\t\t1:%lx 2:%lx 3:%lx TOD-Base:%#llx" + " Type:%x\n\n", + te.progusage1, te.progusage2, te.progusage3, + te.tod_base, te.mach_type); +} + +static void s390_cpumcfdg_dump(struct perf_sample *sample) +{ + size_t i, len = sample->raw_size, offset = 0; + unsigned char *buf = sample->raw_data; + const char *color = PERF_COLOR_BLUE; + struct cf_ctrset_entry *cep, ce; + u64 *p; + + while (offset < len) { + cep = (struct cf_ctrset_entry *)(buf + offset); + + ce.def = be16_to_cpu(cep->def); + ce.set = be16_to_cpu(cep->set); + ce.ctr = be16_to_cpu(cep->ctr); + ce.res1 = be16_to_cpu(cep->res1); + + if (!ctrset_valid(&ce)) { /* Print trailer */ + s390_cpumcfdg_dumptrail(color, offset, + (struct cf_trailer_entry *)cep); + return; + } + + color_fprintf(stdout, color, " [%#08zx] Counterset:%d" + " Counters:%d\n", offset, ce.set, ce.ctr); + for (i = 0, p = (u64 *)(cep + 1); i < ce.ctr; i += 2, p += 2) + color_fprintf(stdout, color, + "\tCounter:%03d Value:%#018lx" + " Counter:%03d Value:%#018lx\n", + i, be64_to_cpu(*p), + i + 1, be64_to_cpu(*(p + 1))); + offset += ctrset_size(&ce); + } +} + +/* S390 specific trace event function. Check for PERF_RECORD_SAMPLE events + * and if the event was triggered by a counter set diagnostic event display + * its raw data. + * The function is only invoked when the dump flag -D is set. + */ +void perf_evlist__s390_sample_raw(struct perf_evlist *evlist, union perf_event *event, + struct perf_sample *sample) +{ + struct perf_evsel *ev_bc000; + + if (event->header.type != PERF_RECORD_SAMPLE) + return; + + ev_bc000 = perf_evlist__event2evsel(evlist, event); + if (ev_bc000 == NULL || + ev_bc000->attr.config != PERF_EVENT_CPUM_CF_DIAG) + return; + + /* Display raw data on screen */ + if (!s390_cpumcfdg_testctr(sample)) { + pr_err("Invalid counter set data encountered\n"); + return; + } + s390_cpumcfdg_dump(sample); +} diff --git a/tools/perf/util/sample-raw.c b/tools/perf/util/sample-raw.c new file mode 100644 index 000000000000..c21e1311fb0f --- /dev/null +++ b/tools/perf/util/sample-raw.c @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include +#include "evlist.h" +#include "env.h" +#include "sample-raw.h" + +/* + * Check platform the perf data file was created on and perform platform + * specific interpretation. + */ +void perf_evlist__init_trace_event_sample_raw(struct perf_evlist *evlist) +{ + const char *arch_pf = perf_env__arch(evlist->env); + + if (arch_pf && !strcmp("s390", arch_pf)) + evlist->trace_event_sample_raw = perf_evlist__s390_sample_raw; +} diff --git a/tools/perf/util/sample-raw.h b/tools/perf/util/sample-raw.h new file mode 100644 index 000000000000..95d445c87e93 --- /dev/null +++ b/tools/perf/util/sample-raw.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __SAMPLE_RAW_H +#define __SAMPLE_RAW_H 1 + +struct perf_evlist; +union perf_event; +struct perf_sample; + +void perf_evlist__s390_sample_raw(struct perf_evlist *evlist, + union perf_event *event, + struct perf_sample *sample); + +void perf_evlist__init_trace_event_sample_raw(struct perf_evlist *evlist); +#endif /* __PERF_EVLIST_H */ diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index d6f41611f504..b26bc9c54200 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -23,6 +23,7 @@ #include "auxtrace.h" #include "thread.h" #include "thread-stack.h" +#include "sample-raw.h" #include "stat.h" #include "arch/common.h" @@ -147,6 +148,8 @@ struct perf_session *perf_session__new(struct perf_data *data, perf_session__set_id_hdr_size(session); perf_session__set_comm_exec(session); } + + perf_evlist__init_trace_event_sample_raw(session->evlist); } } else { session->machines.host.env = &perf_env; @@ -1065,6 +1068,8 @@ static void dump_event(struct perf_evlist *evlist, union perf_event *event, file_offset, event->header.size, event->header.type); trace_event(event); + if (event->header.type == PERF_RECORD_SAMPLE && evlist->trace_event_sample_raw) + evlist->trace_event_sample_raw(evlist, event, sample); if (sample) perf_evlist__print_tstamp(evlist, event, sample); -- cgit v1.2.3 From 3e4a1c536b2690cf461e930485b0ab0cdafba73b Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Thu, 17 Jan 2019 10:30:02 +0100 Subject: perf report: Display names in s390 diagnostic counter sets On s390 the CPU Measurement Facility diagnostic counter sets are displayed by counter number and value. Add the logical counter name in the output (if it is available). Otherwise "unknown" is shown. Output before: [root@s35lp76 perf]# ./perf report -D --stdio [00000000] Counterset:0 Counters:6 Counter:000 Value:0x000000000085ec36 Counter:001 Value:0x0000000000796c94 Counter:002 Value:0x0000000000005ada Counter:003 Value:0x0000000000092460 Counter:004 Value:0x0000000000006073 Counter:005 Value:0x00000000001a9a73 [0x000038] Counterset:1 Counters:2 Counter:000 Value:0x000000000007c59f Counter:001 Value:0x000000000002fad6 [0x000050] Counterset:2 Counters:16 Counter:000 Value:000000000000000000 Counter:001 Value:000000000000000000 Output after: [root@s35lp76 perf]# ./perf report -D --stdio [00000000] Counterset:0 Counters:6 Counter:000 cpu_cycles Value:0x000000000085ec36 Counter:001 instructions Value:0x0000000000796c94 Counter:002 l1i_dir_writes Value:0x0000000000005ada Counter:003 l1i_penalty_cycles Value:0x0000000000092460 Counter:004 l1d_dir_writes Value:0x0000000000006073 Counter:005 l1d_penalty_cycles Value:0x00000000001a9a73 [0x000038] Counterset:1 Counters:2 Counter:000 problem_state_cpu_cycles Value:0x000000000007c59f Counter:001 problem_state_instructions Value:0x000000000002fad6 [0x000050] Counterset:2 Counters:16 Counter:000 prng_functions Value:000000000000000000 Signed-off-by: Thomas Richter Reviewed-by: Hendrik Brueckner Cc: Heiko Carstens Cc: Martin Schwidefsky Link: http://lkml.kernel.org/r/20190117093003.96287-3-tmricht@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/s390-sample-raw.c | 59 +++++++++++++++++++++++++++++++++++---- 1 file changed, 54 insertions(+), 5 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/s390-sample-raw.c b/tools/perf/util/s390-sample-raw.c index 1c5a4d63bc9d..6650f599ed9c 100644 --- a/tools/perf/util/s390-sample-raw.c +++ b/tools/perf/util/s390-sample-raw.c @@ -30,6 +30,7 @@ #include "color.h" #include "sample-raw.h" #include "s390-cpumcf-kernel.h" +#include "pmu-events/pmu-events.h" static size_t ctrset_size(struct cf_ctrset_entry *set) { @@ -112,14 +113,61 @@ static void s390_cpumcfdg_dumptrail(const char *color, size_t offset, te.tod_base, te.mach_type); } +/* Return starting number of a counter set */ +static int get_counterset_start(int setnr) +{ + switch (setnr) { + case CPUMF_CTR_SET_BASIC: /* Basic counter set */ + return 0; + case CPUMF_CTR_SET_USER: /* Problem state counter set */ + return 32; + case CPUMF_CTR_SET_CRYPTO: /* Crypto counter set */ + return 64; + case CPUMF_CTR_SET_EXT: /* Extended counter set */ + return 128; + case CPUMF_CTR_SET_MT_DIAG: /* Diagnostic counter set */ + return 448; + default: + return -1; + } +} + +/* Scan the PMU table and extract the logical name of a counter from the + * PMU events table. Input is the counter set and counter number with in the + * set. Construct the event number and use this as key. If they match return + * the name of this counter. + * If no match is found a NULL pointer is returned. + */ +static const char *get_counter_name(int set, int nr, struct pmu_events_map *map) +{ + int rc, event_nr, wanted = get_counterset_start(set) + nr; + + if (map) { + struct pmu_event *evp = map->table; + + for (; evp->name || evp->event || evp->desc; ++evp) { + if (evp->name == NULL || evp->event == NULL) + continue; + rc = sscanf(evp->event, "event=%x", &event_nr); + if (rc == 1 && event_nr == wanted) + return evp->name; + } + } + return NULL; +} + static void s390_cpumcfdg_dump(struct perf_sample *sample) { size_t i, len = sample->raw_size, offset = 0; unsigned char *buf = sample->raw_data; const char *color = PERF_COLOR_BLUE; struct cf_ctrset_entry *cep, ce; + struct pmu_events_map *map; + struct perf_pmu pmu; u64 *p; + memset(&pmu, 0, sizeof(pmu)); + map = perf_pmu__find_map(&pmu); while (offset < len) { cep = (struct cf_ctrset_entry *)(buf + offset); @@ -136,12 +184,13 @@ static void s390_cpumcfdg_dump(struct perf_sample *sample) color_fprintf(stdout, color, " [%#08zx] Counterset:%d" " Counters:%d\n", offset, ce.set, ce.ctr); - for (i = 0, p = (u64 *)(cep + 1); i < ce.ctr; i += 2, p += 2) + for (i = 0, p = (u64 *)(cep + 1); i < ce.ctr; ++i, ++p) { + const char *ev_name = get_counter_name(ce.set, i, map); + color_fprintf(stdout, color, - "\tCounter:%03d Value:%#018lx" - " Counter:%03d Value:%#018lx\n", - i, be64_to_cpu(*p), - i + 1, be64_to_cpu(*(p + 1))); + "\tCounter:%03d %s Value:%#018lx\n", i, + ev_name ?: "", be64_to_cpu(*p)); + } offset += ctrset_size(&ce); } } -- cgit v1.2.3 From 8dabe9c43af7aa78b16ce0d61bc595eca20c7a70 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Thu, 17 Jan 2019 10:30:03 +0100 Subject: perf report: Dump s390 counter set data to file Add support for the new s390 PMU device cpum_cf_diag to extract the counter set diagnostic data. This data is available as event raw data and can be created with this command: [root@s35lp76 perf]# ./perf record -R -e '{rbd000,rbc000}' -- ~/mytests/facultaet 2500 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.009 MB perf.data ] [root@s35lp76 perf]# The new event 0xbc000 generated this counter set diagnostic trace data. The data can be extracted using command: [root@s35lp76 perf]# ./perf report --stdio --itrace=d # # Total Lost Samples: 0 # # Samples: 21 of events 'anon group { rbd000, rbc000 }' # Event count (approx.): 21 # # Overhead Command Shared Object Symbol # ................ ......... ................. ........................ # 80.95% 0.00% facultaet facultaet [.] facultaet 4.76% 0.00% facultaet [kernel.kallsyms] [k] check_chain_key 4.76% 0.00% facultaet [kernel.kallsyms] [k] ftrace_likely_update 4.76% 0.00% facultaet [kernel.kallsyms] [k] lock_release 4.76% 0.00% facultaet libc-2.26.so [.] _dl_addr [root@s35lp76 perf]# ll aux* -rw-r--r-- 1 root root 3408 Oct 16 12:40 aux.ctr.02 -rw-r--r-- 1 root root 4096 Oct 16 12:40 aux.smp.02 [root@s35lp76 perf]# The files named aux.ctr.## contain the counter set diagnostic data and the files named aux.smp.## contain the sampling diagnostic data. ## stand for the CPU number the data was taken from. Signed-off-by: Thomas Richter Reviewed-by: Hendrik Brueckner Cc: Heiko Carstens Cc: Martin Schwidefsky Link: http://lkml.kernel.org/r/20190117093003.96287-4-tmricht@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/s390-cpumsf.c | 77 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 73 insertions(+), 4 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/s390-cpumsf.c b/tools/perf/util/s390-cpumsf.c index 68b2570304ec..835249c77f56 100644 --- a/tools/perf/util/s390-cpumsf.c +++ b/tools/perf/util/s390-cpumsf.c @@ -162,6 +162,7 @@ #include "auxtrace.h" #include "s390-cpumsf.h" #include "s390-cpumsf-kernel.h" +#include "s390-cpumcf-kernel.h" #include "config.h" struct s390_cpumsf { @@ -184,8 +185,58 @@ struct s390_cpumsf_queue { struct auxtrace_buffer *buffer; int cpu; FILE *logfile; + FILE *logfile_ctr; }; +/* Check if the raw data should be dumped to file. If this is the case and + * the file to dump to has not been opened for writing, do so. + * + * Return 0 on success and greater zero on error so processing continues. + */ +static int s390_cpumcf_dumpctr(struct s390_cpumsf *sf, + struct perf_sample *sample) +{ + struct s390_cpumsf_queue *sfq; + struct auxtrace_queue *q; + int rc = 0; + + if (!sf->use_logfile || sf->queues.nr_queues <= sample->cpu) + return rc; + + q = &sf->queues.queue_array[sample->cpu]; + sfq = q->priv; + if (!sfq) /* Queue not yet allocated */ + return rc; + + if (!sfq->logfile_ctr) { + char *name; + + rc = (sf->logdir) + ? asprintf(&name, "%s/aux.ctr.%02x", + sf->logdir, sample->cpu) + : asprintf(&name, "aux.ctr.%02x", sample->cpu); + if (rc > 0) + sfq->logfile_ctr = fopen(name, "w"); + if (sfq->logfile_ctr == NULL) { + pr_err("Failed to open counter set log file %s, " + "continue...\n", name); + rc = 1; + } + free(name); + } + + if (sfq->logfile_ctr) { + /* See comment above for -4 */ + size_t n = fwrite(sample->raw_data, sample->raw_size - 4, 1, + sfq->logfile_ctr); + if (n != 1) { + pr_err("Failed to write counter set data\n"); + rc = 1; + } + } + return rc; +} + /* Display s390 CPU measurement facility basic-sampling data entry */ static bool s390_cpumsf_basic_show(const char *color, size_t pos, struct hws_basic_entry *basic) @@ -792,7 +843,7 @@ static int s390_cpumsf_lost(struct s390_cpumsf *sf, struct perf_sample *sample) } static int -s390_cpumsf_process_event(struct perf_session *session __maybe_unused, +s390_cpumsf_process_event(struct perf_session *session, union perf_event *event, struct perf_sample *sample, struct perf_tool *tool) @@ -801,6 +852,8 @@ s390_cpumsf_process_event(struct perf_session *session __maybe_unused, struct s390_cpumsf, auxtrace); u64 timestamp = sample->time; + struct perf_evsel *ev_bc000; + int err = 0; if (dump_trace) @@ -811,6 +864,16 @@ s390_cpumsf_process_event(struct perf_session *session __maybe_unused, return -EINVAL; } + if (event->header.type == PERF_RECORD_SAMPLE && + sample->raw_size) { + /* Handle event with raw data */ + ev_bc000 = perf_evlist__event2evsel(session->evlist, event); + if (ev_bc000 && + ev_bc000->attr.config == PERF_EVENT_CPUM_CF_DIAG) + err = s390_cpumcf_dumpctr(sf, sample); + return err; + } + if (event->header.type == PERF_RECORD_AUX && event->aux.flags & PERF_AUX_FLAG_TRUNCATED) return s390_cpumsf_lost(sf, sample); @@ -891,9 +954,15 @@ static void s390_cpumsf_free_queues(struct perf_session *session) struct s390_cpumsf_queue *sfq = (struct s390_cpumsf_queue *) queues->queue_array[i].priv; - if (sfq != NULL && sfq->logfile) { - fclose(sfq->logfile); - sfq->logfile = NULL; + if (sfq != NULL) { + if (sfq->logfile) { + fclose(sfq->logfile); + sfq->logfile = NULL; + } + if (sfq->logfile_ctr) { + fclose(sfq->logfile_ctr); + sfq->logfile_ctr = NULL; + } } zfree(&queues->queue_array[i].priv); } -- cgit v1.2.3 From 9aa0bfa370b278a539077002b3c660468d66b5e7 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Thu, 17 Jan 2019 08:15:17 -0800 Subject: perf tools: Handle PERF_RECORD_KSYMBOL This patch handles PERF_RECORD_KSYMBOL in perf record/report. Specifically, map and symbol are created for ksymbol register, and removed for ksymbol unregister. This patch also sets perf_event_attr.ksymbol properly. The flag is ON by default. Committer notes: Use proper inttypes.h for u64, fixing the build in some environments like in the android NDK r15c targetting ARM 32-bit. I.e. fixing this build error: util/event.c: In function 'perf_event__fprintf_ksymbol': util/event.c:1489:10: error: format '%lx' expects argument of type 'long unsigned int', but argument 3 has type 'u64' [-Werror=format=] event->ksymbol_event.flags, event->ksymbol_event.name); ^ cc1: all warnings being treated as errors Signed-off-by: Song Liu Reviewed-by: Arnaldo Carvalho de Melo Tested-by: Arnaldo Carvalho de Melo Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Peter Zijlstra Cc: kernel-team@fb.com Cc: netdev@vger.kernel.org Link: http://lkml.kernel.org/r/20190117161521.1341602-6-songliubraving@fb.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/event.c | 21 ++++++++++++++++++ tools/perf/util/event.h | 20 +++++++++++++++++ tools/perf/util/evsel.c | 10 ++++++++- tools/perf/util/evsel.h | 1 + tools/perf/util/machine.c | 55 +++++++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/machine.h | 3 +++ tools/perf/util/session.c | 4 ++++ tools/perf/util/tool.h | 4 +++- 8 files changed, 116 insertions(+), 2 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 937a5a4f71cc..f06f3811b25b 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -24,6 +24,7 @@ #include "symbol/kallsyms.h" #include "asm/bug.h" #include "stat.h" +#include "session.h" #define DEFAULT_PROC_MAP_PARSE_TIMEOUT 500 @@ -45,6 +46,7 @@ static const char *perf_event__names[] = { [PERF_RECORD_SWITCH] = "SWITCH", [PERF_RECORD_SWITCH_CPU_WIDE] = "SWITCH_CPU_WIDE", [PERF_RECORD_NAMESPACES] = "NAMESPACES", + [PERF_RECORD_KSYMBOL] = "KSYMBOL", [PERF_RECORD_HEADER_ATTR] = "ATTR", [PERF_RECORD_HEADER_EVENT_TYPE] = "EVENT_TYPE", [PERF_RECORD_HEADER_TRACING_DATA] = "TRACING_DATA", @@ -1329,6 +1331,14 @@ int perf_event__process_switch(struct perf_tool *tool __maybe_unused, return machine__process_switch_event(machine, event); } +int perf_event__process_ksymbol(struct perf_tool *tool __maybe_unused, + union perf_event *event, + struct perf_sample *sample __maybe_unused, + struct machine *machine) +{ + return machine__process_ksymbol(machine, event, sample); +} + size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp) { return fprintf(fp, " %d/%d: [%#" PRIx64 "(%#" PRIx64 ") @ %#" PRIx64 "]: %c %s\n", @@ -1461,6 +1471,14 @@ static size_t perf_event__fprintf_lost(union perf_event *event, FILE *fp) return fprintf(fp, " lost %" PRIu64 "\n", event->lost.lost); } +size_t perf_event__fprintf_ksymbol(union perf_event *event, FILE *fp) +{ + return fprintf(fp, " ksymbol event with addr %" PRIx64 " len %u type %u flags 0x%x name %s\n", + event->ksymbol_event.addr, event->ksymbol_event.len, + event->ksymbol_event.ksym_type, + event->ksymbol_event.flags, event->ksymbol_event.name); +} + size_t perf_event__fprintf(union perf_event *event, FILE *fp) { size_t ret = fprintf(fp, "PERF_RECORD_%s", @@ -1496,6 +1514,9 @@ size_t perf_event__fprintf(union perf_event *event, FILE *fp) case PERF_RECORD_LOST: ret += perf_event__fprintf_lost(event, fp); break; + case PERF_RECORD_KSYMBOL: + ret += perf_event__fprintf_ksymbol(event, fp); + break; default: ret += fprintf(fp, "\n"); } diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index eb95f3384958..018322f2a13e 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -5,6 +5,7 @@ #include #include #include +#include #include "../perf.h" #include "build-id.h" @@ -84,6 +85,19 @@ struct throttle_event { u64 stream_id; }; +#ifndef KSYM_NAME_LEN +#define KSYM_NAME_LEN 256 +#endif + +struct ksymbol_event { + struct perf_event_header header; + u64 addr; + u32 len; + u16 ksym_type; + u16 flags; + char name[KSYM_NAME_LEN]; +}; + #define PERF_SAMPLE_MASK \ (PERF_SAMPLE_IP | PERF_SAMPLE_TID | \ PERF_SAMPLE_TIME | PERF_SAMPLE_ADDR | \ @@ -651,6 +665,7 @@ union perf_event { struct stat_round_event stat_round; struct time_conv_event time_conv; struct feature_event feat; + struct ksymbol_event ksymbol_event; }; void perf_event__print_totals(void); @@ -748,6 +763,10 @@ int perf_event__process_exit(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct machine *machine); +int perf_event__process_ksymbol(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine); int perf_tool__process_synth_event(struct perf_tool *tool, union perf_event *event, struct machine *machine, @@ -811,6 +830,7 @@ size_t perf_event__fprintf_switch(union perf_event *event, FILE *fp); size_t perf_event__fprintf_thread_map(union perf_event *event, FILE *fp); size_t perf_event__fprintf_cpu_map(union perf_event *event, FILE *fp); size_t perf_event__fprintf_namespaces(union perf_event *event, FILE *fp); +size_t perf_event__fprintf_ksymbol(union perf_event *event, FILE *fp); size_t perf_event__fprintf(union perf_event *event, FILE *fp); int kallsyms__get_function_start(const char *kallsyms_filename, diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index dbc0466db368..9c8dc6d1aa7f 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1035,6 +1035,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts, attr->mmap = track; attr->mmap2 = track && !perf_missing_features.mmap2; attr->comm = track; + attr->ksymbol = track && !perf_missing_features.ksymbol; if (opts->record_namespaces) attr->namespaces = track; @@ -1652,6 +1653,7 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr, PRINT_ATTRf(context_switch, p_unsigned); PRINT_ATTRf(write_backward, p_unsigned); PRINT_ATTRf(namespaces, p_unsigned); + PRINT_ATTRf(ksymbol, p_unsigned); PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned); PRINT_ATTRf(bp_type, p_unsigned); @@ -1811,6 +1813,8 @@ fallback_missing_features: PERF_SAMPLE_BRANCH_NO_CYCLES); if (perf_missing_features.group_read && evsel->attr.inherit) evsel->attr.read_format &= ~(PERF_FORMAT_GROUP|PERF_FORMAT_ID); + if (perf_missing_features.ksymbol) + evsel->attr.ksymbol = 0; retry_sample_id: if (perf_missing_features.sample_id_all) evsel->attr.sample_id_all = 0; @@ -1930,7 +1934,11 @@ try_fallback: * Must probe features in the order they were added to the * perf_event_attr interface. */ - if (!perf_missing_features.write_backward && evsel->attr.write_backward) { + if (!perf_missing_features.ksymbol && evsel->attr.ksymbol) { + perf_missing_features.ksymbol = true; + pr_debug2("switching off ksymbol\n"); + goto fallback_missing_features; + } else if (!perf_missing_features.write_backward && evsel->attr.write_backward) { perf_missing_features.write_backward = true; pr_debug2("switching off write_backward\n"); goto out_close; diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 82a289ce8b0c..4a8c3e7f4808 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -168,6 +168,7 @@ struct perf_missing_features { bool lbr_flags; bool write_backward; bool group_read; + bool ksymbol; }; extern struct perf_missing_features perf_missing_features; diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 143f7057d581..9bca61c7d5bf 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -681,6 +681,59 @@ int machine__process_switch_event(struct machine *machine __maybe_unused, return 0; } +static int machine__process_ksymbol_register(struct machine *machine, + union perf_event *event, + struct perf_sample *sample __maybe_unused) +{ + struct symbol *sym; + struct map *map; + + map = map_groups__find(&machine->kmaps, event->ksymbol_event.addr); + if (!map) { + map = dso__new_map(event->ksymbol_event.name); + if (!map) + return -ENOMEM; + + map->start = event->ksymbol_event.addr; + map->pgoff = map->start; + map->end = map->start + event->ksymbol_event.len; + map_groups__insert(&machine->kmaps, map); + } + + sym = symbol__new(event->ksymbol_event.addr, event->ksymbol_event.len, + 0, 0, event->ksymbol_event.name); + if (!sym) + return -ENOMEM; + dso__insert_symbol(map->dso, sym); + return 0; +} + +static int machine__process_ksymbol_unregister(struct machine *machine, + union perf_event *event, + struct perf_sample *sample __maybe_unused) +{ + struct map *map; + + map = map_groups__find(&machine->kmaps, event->ksymbol_event.addr); + if (map) + map_groups__remove(&machine->kmaps, map); + + return 0; +} + +int machine__process_ksymbol(struct machine *machine __maybe_unused, + union perf_event *event, + struct perf_sample *sample) +{ + if (dump_trace) + perf_event__fprintf_ksymbol(event, stdout); + + if (event->ksymbol_event.flags & PERF_RECORD_KSYMBOL_FLAGS_UNREGISTER) + return machine__process_ksymbol_unregister(machine, event, + sample); + return machine__process_ksymbol_register(machine, event, sample); +} + static void dso__adjust_kmod_long_name(struct dso *dso, const char *filename) { const char *dup_filename; @@ -1812,6 +1865,8 @@ int machine__process_event(struct machine *machine, union perf_event *event, case PERF_RECORD_SWITCH: case PERF_RECORD_SWITCH_CPU_WIDE: ret = machine__process_switch_event(machine, event); break; + case PERF_RECORD_KSYMBOL: + ret = machine__process_ksymbol(machine, event, sample); break; default: ret = -1; break; diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index a5d1da60f751..4ecd380ce1b4 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -130,6 +130,9 @@ int machine__process_mmap_event(struct machine *machine, union perf_event *event struct perf_sample *sample); int machine__process_mmap2_event(struct machine *machine, union perf_event *event, struct perf_sample *sample); +int machine__process_ksymbol(struct machine *machine, + union perf_event *event, + struct perf_sample *sample); int machine__process_event(struct machine *machine, union perf_event *event, struct perf_sample *sample); diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index b26bc9c54200..dcfacfb036a3 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -379,6 +379,8 @@ void perf_tool__fill_defaults(struct perf_tool *tool) tool->itrace_start = perf_event__process_itrace_start; if (tool->context_switch == NULL) tool->context_switch = perf_event__process_switch; + if (tool->ksymbol == NULL) + tool->ksymbol = perf_event__process_ksymbol; if (tool->read == NULL) tool->read = process_event_sample_stub; if (tool->throttle == NULL) @@ -1310,6 +1312,8 @@ static int machines__deliver_event(struct machines *machines, case PERF_RECORD_SWITCH: case PERF_RECORD_SWITCH_CPU_WIDE: return tool->context_switch(tool, event, sample, machine); + case PERF_RECORD_KSYMBOL: + return tool->ksymbol(tool, event, sample, machine); default: ++evlist->stats.nr_unknown_events; return -1; diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h index 56e4ca54020a..9c81ca2f3cf7 100644 --- a/tools/perf/util/tool.h +++ b/tools/perf/util/tool.h @@ -53,7 +53,9 @@ struct perf_tool { itrace_start, context_switch, throttle, - unthrottle; + unthrottle, + ksymbol; + event_attr_op attr; event_attr_op event_update; event_op2 tracing_data; -- cgit v1.2.3 From 45178a928a4b7c6093f6621e627d09909e81cc13 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Thu, 17 Jan 2019 08:15:18 -0800 Subject: perf tools: Handle PERF_RECORD_BPF_EVENT This patch adds basic handling of PERF_RECORD_BPF_EVENT. Tracking of PERF_RECORD_BPF_EVENT is OFF by default. Option --bpf-event is added to turn it on. Committer notes: Add dummy machine__process_bpf_event() variant that returns zero for systems without HAVE_LIBBPF_SUPPORT, such as Alpine Linux, unbreaking the build in such systems. Remove the needless include from bpf->event.h, provide just forward declarations for the structs and unions in the parameters, to reduce compilation time and needless rebuilds when machine.h gets changed. Committer testing: When running with: # perf record --bpf-event On an older kernel where PERF_RECORD_BPF_EVENT and PERF_RECORD_KSYMBOL is not present, we fallback to removing those two bits from perf_event_attr, making the tool to continue to work on older kernels: perf_event_attr: size 112 { sample_period, sample_freq } 4000 sample_type IP|TID|TIME|PERIOD read_format ID disabled 1 inherit 1 mmap 1 comm 1 freq 1 enable_on_exec 1 task 1 precise_ip 3 sample_id_all 1 exclude_guest 1 mmap2 1 comm_exec 1 ksymbol 1 bpf_event 1 ------------------------------------------------------------ sys_perf_event_open: pid 5779 cpu 0 group_fd -1 flags 0x8 sys_perf_event_open failed, error -22 switching off bpf_event ------------------------------------------------------------ perf_event_attr: size 112 { sample_period, sample_freq } 4000 sample_type IP|TID|TIME|PERIOD read_format ID disabled 1 inherit 1 mmap 1 comm 1 freq 1 enable_on_exec 1 task 1 precise_ip 3 sample_id_all 1 exclude_guest 1 mmap2 1 comm_exec 1 ksymbol 1 ------------------------------------------------------------ sys_perf_event_open: pid 5779 cpu 0 group_fd -1 flags 0x8 sys_perf_event_open failed, error -22 switching off ksymbol ------------------------------------------------------------ perf_event_attr: size 112 { sample_period, sample_freq } 4000 sample_type IP|TID|TIME|PERIOD read_format ID disabled 1 inherit 1 mmap 1 comm 1 freq 1 enable_on_exec 1 task 1 precise_ip 3 sample_id_all 1 exclude_guest 1 mmap2 1 comm_exec 1 ------------------------------------------------------------ And then proceeds to work without those two features. As passing --bpf-event is an explicit action performed by the user, perhaps we should emit a warning telling that the kernel has no such feature, but this can be done on top of this patch. Now with a kernel that supports these events, start the 'record --bpf-event -a' and then run 'perf trace sleep 10000' that will use the BPF augmented_raw_syscalls.o prebuilt (for another kernel version even) and thus should generate PERF_RECORD_BPF_EVENT events: [root@quaco ~]# perf record -e dummy -a --bpf-event ^C[ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.713 MB perf.data ] [root@quaco ~]# bpftool prog 13: cgroup_skb tag 7be49e3934a125ba gpl loaded_at 2019-01-19T09:09:43-0300 uid 0 xlated 296B jited 229B memlock 4096B map_ids 13,14 14: cgroup_skb tag 2a142ef67aaad174 gpl loaded_at 2019-01-19T09:09:43-0300 uid 0 xlated 296B jited 229B memlock 4096B map_ids 13,14 15: cgroup_skb tag 7be49e3934a125ba gpl loaded_at 2019-01-19T09:09:43-0300 uid 0 xlated 296B jited 229B memlock 4096B map_ids 15,16 16: cgroup_skb tag 2a142ef67aaad174 gpl loaded_at 2019-01-19T09:09:43-0300 uid 0 xlated 296B jited 229B memlock 4096B map_ids 15,16 17: cgroup_skb tag 7be49e3934a125ba gpl loaded_at 2019-01-19T09:09:44-0300 uid 0 xlated 296B jited 229B memlock 4096B map_ids 17,18 18: cgroup_skb tag 2a142ef67aaad174 gpl loaded_at 2019-01-19T09:09:44-0300 uid 0 xlated 296B jited 229B memlock 4096B map_ids 17,18 21: cgroup_skb tag 7be49e3934a125ba gpl loaded_at 2019-01-19T09:09:45-0300 uid 0 xlated 296B jited 229B memlock 4096B map_ids 21,22 22: cgroup_skb tag 2a142ef67aaad174 gpl loaded_at 2019-01-19T09:09:45-0300 uid 0 xlated 296B jited 229B memlock 4096B map_ids 21,22 31: tracepoint name sys_enter tag 12504ba9402f952f gpl loaded_at 2019-01-19T09:19:56-0300 uid 0 xlated 512B jited 374B memlock 4096B map_ids 30,29,28 32: tracepoint name sys_exit tag c1bd85c092d6e4aa gpl loaded_at 2019-01-19T09:19:56-0300 uid 0 xlated 256B jited 191B memlock 4096B map_ids 30,29 # perf report -D | grep PERF_RECORD_BPF_EVENT | nl 1 0 55834574849 0x4fc8 [0x18]: PERF_RECORD_BPF_EVENT bpf event with type 1, flags 0, id 13 2 0 60129542145 0x5118 [0x18]: PERF_RECORD_BPF_EVENT bpf event with type 1, flags 0, id 14 3 0 64424509441 0x5268 [0x18]: PERF_RECORD_BPF_EVENT bpf event with type 1, flags 0, id 15 4 0 68719476737 0x53b8 [0x18]: PERF_RECORD_BPF_EVENT bpf event with type 1, flags 0, id 16 5 0 73014444033 0x5508 [0x18]: PERF_RECORD_BPF_EVENT bpf event with type 1, flags 0, id 17 6 0 77309411329 0x5658 [0x18]: PERF_RECORD_BPF_EVENT bpf event with type 1, flags 0, id 18 7 0 90194313217 0x57a8 [0x18]: PERF_RECORD_BPF_EVENT bpf event with type 1, flags 0, id 21 8 0 94489280513 0x58f8 [0x18]: PERF_RECORD_BPF_EVENT bpf event with type 1, flags 0, id 22 9 7 620922484360 0xb6390 [0x30]: PERF_RECORD_BPF_EVENT bpf event with type 1, flags 0, id 29 10 7 620922486018 0xb6410 [0x30]: PERF_RECORD_BPF_EVENT bpf event with type 2, flags 0, id 29 11 7 620922579199 0xb6490 [0x30]: PERF_RECORD_BPF_EVENT bpf event with type 1, flags 0, id 30 12 7 620922580240 0xb6510 [0x30]: PERF_RECORD_BPF_EVENT bpf event with type 2, flags 0, id 30 13 7 620922765207 0xb6598 [0x30]: PERF_RECORD_BPF_EVENT bpf event with type 1, flags 0, id 31 14 7 620922874543 0xb6620 [0x30]: PERF_RECORD_BPF_EVENT bpf event with type 1, flags 0, id 32 # There, the 31 and 32 tracepoint BPF programs put in place by 'perf trace'. Signed-off-by: Song Liu Reviewed-by: Arnaldo Carvalho de Melo Tested-by: Arnaldo Carvalho de Melo Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Peter Zijlstra Cc: kernel-team@fb.com Cc: netdev@vger.kernel.org Link: http://lkml.kernel.org/r/20190117161521.1341602-7-songliubraving@fb.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 1 + tools/perf/perf.h | 1 + tools/perf/util/Build | 2 ++ tools/perf/util/bpf-event.c | 15 +++++++++++++++ tools/perf/util/bpf-event.h | 22 ++++++++++++++++++++++ tools/perf/util/event.c | 20 ++++++++++++++++++++ tools/perf/util/event.h | 16 ++++++++++++++++ tools/perf/util/evsel.c | 11 ++++++++++- tools/perf/util/evsel.h | 1 + tools/perf/util/machine.c | 3 +++ tools/perf/util/session.c | 4 ++++ tools/perf/util/tool.h | 3 ++- 12 files changed, 97 insertions(+), 2 deletions(-) create mode 100644 tools/perf/util/bpf-event.c create mode 100644 tools/perf/util/bpf-event.h (limited to 'tools/perf/util') diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 882285fb9f64..deaf9b902094 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -1839,6 +1839,7 @@ static struct option __record_options[] = { OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize, "synthesize non-sample events at the end of output"), OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"), + OPT_BOOLEAN(0, "bpf-event", &record.opts.bpf_event, "record bpf events"), OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq, "Fail if the specified frequency can't be used"), OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'", diff --git a/tools/perf/perf.h b/tools/perf/perf.h index 388c6dd128b8..5941fb6eccfc 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -66,6 +66,7 @@ struct record_opts { bool ignore_missing_thread; bool strict_freq; bool sample_id; + bool bpf_event; unsigned int freq; unsigned int mmap_pages; unsigned int auxtrace_mmap_pages; diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 3ad6a800948d..c359af408334 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -154,6 +154,8 @@ endif libperf-y += perf-hooks.o +libperf-$(CONFIG_LIBBPF) += bpf-event.o + libperf-$(CONFIG_CXX) += c++/ CFLAGS_config.o += -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c new file mode 100644 index 000000000000..87004706874f --- /dev/null +++ b/tools/perf/util/bpf-event.c @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include "bpf-event.h" +#include "debug.h" +#include "symbol.h" + +int machine__process_bpf_event(struct machine *machine __maybe_unused, + union perf_event *event, + struct perf_sample *sample __maybe_unused) +{ + if (dump_trace) + perf_event__fprintf_bpf_event(event, stdout); + return 0; +} diff --git a/tools/perf/util/bpf-event.h b/tools/perf/util/bpf-event.h new file mode 100644 index 000000000000..da0dfc035fc6 --- /dev/null +++ b/tools/perf/util/bpf-event.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PERF_BPF_EVENT_H +#define __PERF_BPF_EVENT_H + +#include + +struct machine; +union perf_event; +struct perf_sample; + +#ifdef HAVE_LIBBPF_SUPPORT +int machine__process_bpf_event(struct machine *machine, union perf_event *event, + struct perf_sample *sample); +#else +static inline int machine__process_bpf_event(struct machine *machine __maybe_unused, + union perf_event *event __maybe_unused, + struct perf_sample *sample __maybe_unused) +{ + return 0; +} +#endif // HAVE_LIBBPF_SUPPORT +#endif diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index f06f3811b25b..1b5091a3d14f 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -25,6 +25,7 @@ #include "asm/bug.h" #include "stat.h" #include "session.h" +#include "bpf-event.h" #define DEFAULT_PROC_MAP_PARSE_TIMEOUT 500 @@ -47,6 +48,7 @@ static const char *perf_event__names[] = { [PERF_RECORD_SWITCH_CPU_WIDE] = "SWITCH_CPU_WIDE", [PERF_RECORD_NAMESPACES] = "NAMESPACES", [PERF_RECORD_KSYMBOL] = "KSYMBOL", + [PERF_RECORD_BPF_EVENT] = "BPF_EVENT", [PERF_RECORD_HEADER_ATTR] = "ATTR", [PERF_RECORD_HEADER_EVENT_TYPE] = "EVENT_TYPE", [PERF_RECORD_HEADER_TRACING_DATA] = "TRACING_DATA", @@ -1339,6 +1341,14 @@ int perf_event__process_ksymbol(struct perf_tool *tool __maybe_unused, return machine__process_ksymbol(machine, event, sample); } +int perf_event__process_bpf_event(struct perf_tool *tool __maybe_unused, + union perf_event *event, + struct perf_sample *sample __maybe_unused, + struct machine *machine) +{ + return machine__process_bpf_event(machine, event, sample); +} + size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp) { return fprintf(fp, " %d/%d: [%#" PRIx64 "(%#" PRIx64 ") @ %#" PRIx64 "]: %c %s\n", @@ -1479,6 +1489,13 @@ size_t perf_event__fprintf_ksymbol(union perf_event *event, FILE *fp) event->ksymbol_event.flags, event->ksymbol_event.name); } +size_t perf_event__fprintf_bpf_event(union perf_event *event, FILE *fp) +{ + return fprintf(fp, " bpf event with type %u, flags %u, id %u\n", + event->bpf_event.type, event->bpf_event.flags, + event->bpf_event.id); +} + size_t perf_event__fprintf(union perf_event *event, FILE *fp) { size_t ret = fprintf(fp, "PERF_RECORD_%s", @@ -1517,6 +1534,9 @@ size_t perf_event__fprintf(union perf_event *event, FILE *fp) case PERF_RECORD_KSYMBOL: ret += perf_event__fprintf_ksymbol(event, fp); break; + case PERF_RECORD_BPF_EVENT: + ret += perf_event__fprintf_bpf_event(event, fp); + break; default: ret += fprintf(fp, "\n"); } diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 018322f2a13e..dad32b81fe71 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -98,6 +98,16 @@ struct ksymbol_event { char name[KSYM_NAME_LEN]; }; +struct bpf_event { + struct perf_event_header header; + u16 type; + u16 flags; + u32 id; + + /* for bpf_prog types */ + u8 tag[BPF_TAG_SIZE]; // prog tag +}; + #define PERF_SAMPLE_MASK \ (PERF_SAMPLE_IP | PERF_SAMPLE_TID | \ PERF_SAMPLE_TIME | PERF_SAMPLE_ADDR | \ @@ -666,6 +676,7 @@ union perf_event { struct time_conv_event time_conv; struct feature_event feat; struct ksymbol_event ksymbol_event; + struct bpf_event bpf_event; }; void perf_event__print_totals(void); @@ -767,6 +778,10 @@ int perf_event__process_ksymbol(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct machine *machine); +int perf_event__process_bpf_event(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine); int perf_tool__process_synth_event(struct perf_tool *tool, union perf_event *event, struct machine *machine, @@ -831,6 +846,7 @@ size_t perf_event__fprintf_thread_map(union perf_event *event, FILE *fp); size_t perf_event__fprintf_cpu_map(union perf_event *event, FILE *fp); size_t perf_event__fprintf_namespaces(union perf_event *event, FILE *fp); size_t perf_event__fprintf_ksymbol(union perf_event *event, FILE *fp); +size_t perf_event__fprintf_bpf_event(union perf_event *event, FILE *fp); size_t perf_event__fprintf(union perf_event *event, FILE *fp); int kallsyms__get_function_start(const char *kallsyms_filename, diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 9c8dc6d1aa7f..684c893ca6bc 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1036,6 +1036,8 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts, attr->mmap2 = track && !perf_missing_features.mmap2; attr->comm = track; attr->ksymbol = track && !perf_missing_features.ksymbol; + attr->bpf_event = track && opts->bpf_event && + !perf_missing_features.bpf_event; if (opts->record_namespaces) attr->namespaces = track; @@ -1654,6 +1656,7 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr, PRINT_ATTRf(write_backward, p_unsigned); PRINT_ATTRf(namespaces, p_unsigned); PRINT_ATTRf(ksymbol, p_unsigned); + PRINT_ATTRf(bpf_event, p_unsigned); PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned); PRINT_ATTRf(bp_type, p_unsigned); @@ -1815,6 +1818,8 @@ fallback_missing_features: evsel->attr.read_format &= ~(PERF_FORMAT_GROUP|PERF_FORMAT_ID); if (perf_missing_features.ksymbol) evsel->attr.ksymbol = 0; + if (perf_missing_features.bpf_event) + evsel->attr.bpf_event = 0; retry_sample_id: if (perf_missing_features.sample_id_all) evsel->attr.sample_id_all = 0; @@ -1934,7 +1939,11 @@ try_fallback: * Must probe features in the order they were added to the * perf_event_attr interface. */ - if (!perf_missing_features.ksymbol && evsel->attr.ksymbol) { + if (!perf_missing_features.bpf_event && evsel->attr.bpf_event) { + perf_missing_features.bpf_event = true; + pr_debug2("switching off bpf_event\n"); + goto fallback_missing_features; + } else if (!perf_missing_features.ksymbol && evsel->attr.ksymbol) { perf_missing_features.ksymbol = true; pr_debug2("switching off ksymbol\n"); goto fallback_missing_features; diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 4a8c3e7f4808..29c5eb68c44b 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -169,6 +169,7 @@ struct perf_missing_features { bool write_backward; bool group_read; bool ksymbol; + bool bpf_event; }; extern struct perf_missing_features perf_missing_features; diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 9bca61c7d5bf..ae85106bb5bf 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -21,6 +21,7 @@ #include "unwind.h" #include "linux/hash.h" #include "asm/bug.h" +#include "bpf-event.h" #include "sane_ctype.h" #include @@ -1867,6 +1868,8 @@ int machine__process_event(struct machine *machine, union perf_event *event, ret = machine__process_switch_event(machine, event); break; case PERF_RECORD_KSYMBOL: ret = machine__process_ksymbol(machine, event, sample); break; + case PERF_RECORD_BPF_EVENT: + ret = machine__process_bpf_event(machine, event, sample); break; default: ret = -1; break; diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index dcfacfb036a3..24fd62528a33 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -381,6 +381,8 @@ void perf_tool__fill_defaults(struct perf_tool *tool) tool->context_switch = perf_event__process_switch; if (tool->ksymbol == NULL) tool->ksymbol = perf_event__process_ksymbol; + if (tool->bpf_event == NULL) + tool->bpf_event = perf_event__process_bpf_event; if (tool->read == NULL) tool->read = process_event_sample_stub; if (tool->throttle == NULL) @@ -1314,6 +1316,8 @@ static int machines__deliver_event(struct machines *machines, return tool->context_switch(tool, event, sample, machine); case PERF_RECORD_KSYMBOL: return tool->ksymbol(tool, event, sample, machine); + case PERF_RECORD_BPF_EVENT: + return tool->bpf_event(tool, event, sample, machine); default: ++evlist->stats.nr_unknown_events; return -1; diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h index 9c81ca2f3cf7..250391672f9f 100644 --- a/tools/perf/util/tool.h +++ b/tools/perf/util/tool.h @@ -54,7 +54,8 @@ struct perf_tool { context_switch, throttle, unthrottle, - ksymbol; + ksymbol, + bpf_event; event_attr_op attr; event_attr_op event_update; -- cgit v1.2.3 From 7b612e291a5affb12b9d0b87332c71bcbe9c5db4 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Thu, 17 Jan 2019 08:15:19 -0800 Subject: perf tools: Synthesize PERF_RECORD_* for loaded BPF programs This patch synthesize PERF_RECORD_KSYMBOL and PERF_RECORD_BPF_EVENT for BPF programs loaded before perf-record. This is achieved by gathering information about all BPF programs via sys_bpf. Committer notes: Fix the build on some older systems such as amazonlinux:1 where it was breaking with: util/bpf-event.c: In function 'perf_event__synthesize_one_bpf_prog': util/bpf-event.c:52:9: error: missing initializer for field 'type' of 'struct bpf_prog_info' [-Werror=missing-field-initializers] struct bpf_prog_info info = {}; ^ In file included from /git/linux/tools/lib/bpf/bpf.h:26:0, from util/bpf-event.c:3: /git/linux/tools/include/uapi/linux/bpf.h:2699:8: note: 'type' declared here __u32 type; ^ cc1: all warnings being treated as errors Further fix on a centos:6 system: cc1: warnings being treated as errors util/bpf-event.c: In function 'perf_event__synthesize_one_bpf_prog': util/bpf-event.c:50: error: 'func_info_rec_size' may be used uninitialized in this function The compiler is wrong, but to silence it, initialize that variable to zero. One more fix, this time for debian:experimental-x-mips, x-mips64 and x-mipsel: util/bpf-event.c: In function 'perf_event__synthesize_one_bpf_prog': util/bpf-event.c:93:16: error: implicit declaration of function 'calloc' [-Werror=implicit-function-declaration] func_infos = calloc(sub_prog_cnt, func_info_rec_size); ^~~~~~ util/bpf-event.c:93:16: error: incompatible implicit declaration of built-in function 'calloc' [-Werror] util/bpf-event.c:93:16: note: include '' or provide a declaration of 'calloc' Add the missing header. Committer testing: # perf record --bpf-event sleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.021 MB perf.data (7 samples) ] # perf report -D | grep PERF_RECORD_BPF_EVENT | nl 1 0 0x4b10 [0x18]: PERF_RECORD_BPF_EVENT bpf event with type 1, flags 0, id 13 2 0 0x4c60 [0x18]: PERF_RECORD_BPF_EVENT bpf event with type 1, flags 0, id 14 3 0 0x4db0 [0x18]: PERF_RECORD_BPF_EVENT bpf event with type 1, flags 0, id 15 4 0 0x4f00 [0x18]: PERF_RECORD_BPF_EVENT bpf event with type 1, flags 0, id 16 5 0 0x5050 [0x18]: PERF_RECORD_BPF_EVENT bpf event with type 1, flags 0, id 17 6 0 0x51a0 [0x18]: PERF_RECORD_BPF_EVENT bpf event with type 1, flags 0, id 18 7 0 0x52f0 [0x18]: PERF_RECORD_BPF_EVENT bpf event with type 1, flags 0, id 21 8 0 0x5440 [0x18]: PERF_RECORD_BPF_EVENT bpf event with type 1, flags 0, id 22 # bpftool prog 13: cgroup_skb tag 7be49e3934a125ba gpl loaded_at 2019-01-19T09:09:43-0300 uid 0 xlated 296B jited 229B memlock 4096B map_ids 13,14 14: cgroup_skb tag 2a142ef67aaad174 gpl loaded_at 2019-01-19T09:09:43-0300 uid 0 xlated 296B jited 229B memlock 4096B map_ids 13,14 15: cgroup_skb tag 7be49e3934a125ba gpl loaded_at 2019-01-19T09:09:43-0300 uid 0 xlated 296B jited 229B memlock 4096B map_ids 15,16 16: cgroup_skb tag 2a142ef67aaad174 gpl loaded_at 2019-01-19T09:09:43-0300 uid 0 xlated 296B jited 229B memlock 4096B map_ids 15,16 17: cgroup_skb tag 7be49e3934a125ba gpl loaded_at 2019-01-19T09:09:44-0300 uid 0 xlated 296B jited 229B memlock 4096B map_ids 17,18 18: cgroup_skb tag 2a142ef67aaad174 gpl loaded_at 2019-01-19T09:09:44-0300 uid 0 xlated 296B jited 229B memlock 4096B map_ids 17,18 21: cgroup_skb tag 7be49e3934a125ba gpl loaded_at 2019-01-19T09:09:45-0300 uid 0 xlated 296B jited 229B memlock 4096B map_ids 21,22 22: cgroup_skb tag 2a142ef67aaad174 gpl loaded_at 2019-01-19T09:09:45-0300 uid 0 xlated 296B jited 229B memlock 4096B map_ids 21,22 # # perf report -D | grep -B22 PERF_RECORD_KSYMBOL . ... raw event: size 312 bytes . 0000: 11 00 00 00 00 00 38 01 ff 44 06 c0 ff ff ff ff ......8..D...... . 0010: e5 00 00 00 01 00 00 00 62 70 66 5f 70 72 6f 67 ........bpf_prog . 0020: 5f 37 62 65 34 39 65 33 39 33 34 61 31 32 35 62 _7be49e3934a125b . 0030: 61 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 a............... . 0110: 00 00 00 00 00 00 00 00 21 00 00 00 00 00 00 00 ........!....... . 0120: 7b e4 9e 39 34 a1 25 ba 00 00 00 00 00 00 00 00 {..94.%......... . 0130: 00 00 00 00 00 00 00 00 ........ 0 0x49d8 [0x138]: PERF_RECORD_KSYMBOL ksymbol event with addr ffffffffc00644ff len 229 type 1 flags 0x0 name bpf_prog_7be49e3934a125ba -- . ... raw event: size 312 bytes . 0000: 11 00 00 00 00 00 38 01 48 6d 06 c0 ff ff ff ff ......8.Hm...... . 0010: e5 00 00 00 01 00 00 00 62 70 66 5f 70 72 6f 67 ........bpf_prog . 0020: 5f 32 61 31 34 32 65 66 36 37 61 61 61 64 31 37 _2a142ef67aaad17 . 0030: 34 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 4............... . 0110: 00 00 00 00 00 00 00 00 21 00 00 00 00 00 00 00 ........!....... . 0120: 2a 14 2e f6 7a aa d1 74 00 00 00 00 00 00 00 00 *...z..t........ . 0130: 00 00 00 00 00 00 00 00 ........ 0 0x4b28 [0x138]: PERF_RECORD_KSYMBOL ksymbol event with addr ffffffffc0066d48 len 229 type 1 flags 0x0 name bpf_prog_2a142ef67aaad174 -- . ... raw event: size 312 bytes . 0000: 11 00 00 00 00 00 38 01 04 cf 03 c0 ff ff ff ff ......8......... . 0010: e5 00 00 00 01 00 00 00 62 70 66 5f 70 72 6f 67 ........bpf_prog . 0020: 5f 37 62 65 34 39 65 33 39 33 34 61 31 32 35 62 _7be49e3934a125b . 0030: 61 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 a............... . 0110: 00 00 00 00 00 00 00 00 21 00 00 00 00 00 00 00 ........!....... . 0120: 7b e4 9e 39 34 a1 25 ba 00 00 00 00 00 00 00 00 {..94.%......... . 0130: 00 00 00 00 00 00 00 00 ........ 0 0x4c78 [0x138]: PERF_RECORD_KSYMBOL ksymbol event with addr ffffffffc003cf04 len 229 type 1 flags 0x0 name bpf_prog_7be49e3934a125ba -- . ... raw event: size 312 bytes . 0000: 11 00 00 00 00 00 38 01 96 28 04 c0 ff ff ff ff ......8..(...... . 0010: e5 00 00 00 01 00 00 00 62 70 66 5f 70 72 6f 67 ........bpf_prog . 0020: 5f 32 61 31 34 32 65 66 36 37 61 61 61 64 31 37 _2a142ef67aaad17 . 0030: 34 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 4............... . 0110: 00 00 00 00 00 00 00 00 21 00 00 00 00 00 00 00 ........!....... . 0120: 2a 14 2e f6 7a aa d1 74 00 00 00 00 00 00 00 00 *...z..t........ . 0130: 00 00 00 00 00 00 00 00 ........ 0 0x4dc8 [0x138]: PERF_RECORD_KSYMBOL ksymbol event with addr ffffffffc0042896 len 229 type 1 flags 0x0 name bpf_prog_2a142ef67aaad174 -- . ... raw event: size 312 bytes . 0000: 11 00 00 00 00 00 38 01 05 13 17 c0 ff ff ff ff ......8......... . 0010: e5 00 00 00 01 00 00 00 62 70 66 5f 70 72 6f 67 ........bpf_prog . 0020: 5f 37 62 65 34 39 65 33 39 33 34 61 31 32 35 62 _7be49e3934a125b . 0030: 61 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 a............... . 0110: 00 00 00 00 00 00 00 00 21 00 00 00 00 00 00 00 ........!....... . 0120: 7b e4 9e 39 34 a1 25 ba 00 00 00 00 00 00 00 00 {..94.%......... . 0130: 00 00 00 00 00 00 00 00 ........ 0 0x4f18 [0x138]: PERF_RECORD_KSYMBOL ksymbol event with addr ffffffffc0171305 len 229 type 1 flags 0x0 name bpf_prog_7be49e3934a125ba -- . ... raw event: size 312 bytes . 0000: 11 00 00 00 00 00 38 01 0a 8c 23 c0 ff ff ff ff ......8...#..... . 0010: e5 00 00 00 01 00 00 00 62 70 66 5f 70 72 6f 67 ........bpf_prog . 0020: 5f 32 61 31 34 32 65 66 36 37 61 61 61 64 31 37 _2a142ef67aaad17 . 0030: 34 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 4............... . 0110: 00 00 00 00 00 00 00 00 21 00 00 00 00 00 00 00 ........!....... . 0120: 2a 14 2e f6 7a aa d1 74 00 00 00 00 00 00 00 00 *...z..t........ . 0130: 00 00 00 00 00 00 00 00 ........ 0 0x5068 [0x138]: PERF_RECORD_KSYMBOL ksymbol event with addr ffffffffc0238c0a len 229 type 1 flags 0x0 name bpf_prog_2a142ef67aaad174 -- . ... raw event: size 312 bytes . 0000: 11 00 00 00 00 00 38 01 2a a5 a4 c0 ff ff ff ff ......8.*....... . 0010: e5 00 00 00 01 00 00 00 62 70 66 5f 70 72 6f 67 ........bpf_prog . 0020: 5f 37 62 65 34 39 65 33 39 33 34 61 31 32 35 62 _7be49e3934a125b . 0030: 61 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 a............... . 0110: 00 00 00 00 00 00 00 00 21 00 00 00 00 00 00 00 ........!....... . 0120: 7b e4 9e 39 34 a1 25 ba 00 00 00 00 00 00 00 00 {..94.%......... . 0130: 00 00 00 00 00 00 00 00 ........ 0 0x51b8 [0x138]: PERF_RECORD_KSYMBOL ksymbol event with addr ffffffffc0a4a52a len 229 type 1 flags 0x0 name bpf_prog_7be49e3934a125ba -- . ... raw event: size 312 bytes . 0000: 11 00 00 00 00 00 38 01 9b c9 a4 c0 ff ff ff ff ......8......... . 0010: e5 00 00 00 01 00 00 00 62 70 66 5f 70 72 6f 67 ........bpf_prog . 0020: 5f 32 61 31 34 32 65 66 36 37 61 61 61 64 31 37 _2a142ef67aaad17 . 0030: 34 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 4............... . 0110: 00 00 00 00 00 00 00 00 21 00 00 00 00 00 00 00 ........!....... . 0120: 2a 14 2e f6 7a aa d1 74 00 00 00 00 00 00 00 00 *...z..t........ . 0130: 00 00 00 00 00 00 00 00 ........ 0 0x5308 [0x138]: PERF_RECORD_KSYMBOL ksymbol event with addr ffffffffc0a4c99b len 229 type 1 flags 0x0 name bpf_prog_2a142ef67aaad174 Signed-off-by: Song Liu Reviewed-by: Arnaldo Carvalho de Melo Tested-by: Arnaldo Carvalho de Melo Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Peter Zijlstra Cc: kernel-team@fb.com Cc: netdev@vger.kernel.org Link: http://lkml.kernel.org/r/20190117161521.1341602-8-songliubraving@fb.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 6 ++ tools/perf/util/bpf-event.c | 242 ++++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/bpf-event.h | 16 +++ 3 files changed, 264 insertions(+) (limited to 'tools/perf/util') diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index deaf9b902094..88ea11d57c6f 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -41,6 +41,7 @@ #include "util/perf-hooks.h" #include "util/time-utils.h" #include "util/units.h" +#include "util/bpf-event.h" #include "asm/bug.h" #include @@ -1082,6 +1083,11 @@ static int record__synthesize(struct record *rec, bool tail) return err; } + err = perf_event__synthesize_bpf_events(tool, process_synthesized_event, + machine, opts); + if (err < 0) + pr_warning("Couldn't synthesize bpf events.\n"); + err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads, process_synthesized_event, opts->sample_address, 1); diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c index 87004706874f..01e1dc1bb7fb 100644 --- a/tools/perf/util/bpf-event.c +++ b/tools/perf/util/bpf-event.c @@ -1,10 +1,25 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include #include +#include +#include #include "bpf-event.h" #include "debug.h" #include "symbol.h" +#define ptr_to_u64(ptr) ((__u64)(unsigned long)(ptr)) + +static int snprintf_hex(char *buf, size_t size, unsigned char *data, size_t len) +{ + int ret = 0; + size_t i; + + for (i = 0; i < len; i++) + ret += snprintf(buf + ret, size - ret, "%02x", data[i]); + return ret; +} + int machine__process_bpf_event(struct machine *machine __maybe_unused, union perf_event *event, struct perf_sample *sample __maybe_unused) @@ -13,3 +28,230 @@ int machine__process_bpf_event(struct machine *machine __maybe_unused, perf_event__fprintf_bpf_event(event, stdout); return 0; } + +/* + * Synthesize PERF_RECORD_KSYMBOL and PERF_RECORD_BPF_EVENT for one bpf + * program. One PERF_RECORD_BPF_EVENT is generated for the program. And + * one PERF_RECORD_KSYMBOL is generated for each sub program. + * + * Returns: + * 0 for success; + * -1 for failures; + * -2 for lack of kernel support. + */ +static int perf_event__synthesize_one_bpf_prog(struct perf_tool *tool, + perf_event__handler_t process, + struct machine *machine, + int fd, + union perf_event *event, + struct record_opts *opts) +{ + struct ksymbol_event *ksymbol_event = &event->ksymbol_event; + struct bpf_event *bpf_event = &event->bpf_event; + u32 sub_prog_cnt, i, func_info_rec_size = 0; + u8 (*prog_tags)[BPF_TAG_SIZE] = NULL; + struct bpf_prog_info info = { .type = 0, }; + u32 info_len = sizeof(info); + void *func_infos = NULL; + u64 *prog_addrs = NULL; + struct btf *btf = NULL; + u32 *prog_lens = NULL; + bool has_btf = false; + char errbuf[512]; + int err = 0; + + /* Call bpf_obj_get_info_by_fd() to get sizes of arrays */ + err = bpf_obj_get_info_by_fd(fd, &info, &info_len); + + if (err) { + pr_debug("%s: failed to get BPF program info: %s, aborting\n", + __func__, str_error_r(errno, errbuf, sizeof(errbuf))); + return -1; + } + if (info_len < offsetof(struct bpf_prog_info, prog_tags)) { + pr_debug("%s: the kernel is too old, aborting\n", __func__); + return -2; + } + + /* number of ksyms, func_lengths, and tags should match */ + sub_prog_cnt = info.nr_jited_ksyms; + if (sub_prog_cnt != info.nr_prog_tags || + sub_prog_cnt != info.nr_jited_func_lens) + return -1; + + /* check BTF func info support */ + if (info.btf_id && info.nr_func_info && info.func_info_rec_size) { + /* btf func info number should be same as sub_prog_cnt */ + if (sub_prog_cnt != info.nr_func_info) { + pr_debug("%s: mismatch in BPF sub program count and BTF function info count, aborting\n", __func__); + return -1; + } + if (btf__get_from_id(info.btf_id, &btf)) { + pr_debug("%s: failed to get BTF of id %u, aborting\n", __func__, info.btf_id); + return -1; + } + func_info_rec_size = info.func_info_rec_size; + func_infos = calloc(sub_prog_cnt, func_info_rec_size); + if (!func_infos) { + pr_debug("%s: failed to allocate memory for func_infos, aborting\n", __func__); + return -1; + } + has_btf = true; + } + + /* + * We need address, length, and tag for each sub program. + * Allocate memory and call bpf_obj_get_info_by_fd() again + */ + prog_addrs = calloc(sub_prog_cnt, sizeof(u64)); + if (!prog_addrs) { + pr_debug("%s: failed to allocate memory for prog_addrs, aborting\n", __func__); + goto out; + } + prog_lens = calloc(sub_prog_cnt, sizeof(u32)); + if (!prog_lens) { + pr_debug("%s: failed to allocate memory for prog_lens, aborting\n", __func__); + goto out; + } + prog_tags = calloc(sub_prog_cnt, BPF_TAG_SIZE); + if (!prog_tags) { + pr_debug("%s: failed to allocate memory for prog_tags, aborting\n", __func__); + goto out; + } + + memset(&info, 0, sizeof(info)); + info.nr_jited_ksyms = sub_prog_cnt; + info.nr_jited_func_lens = sub_prog_cnt; + info.nr_prog_tags = sub_prog_cnt; + info.jited_ksyms = ptr_to_u64(prog_addrs); + info.jited_func_lens = ptr_to_u64(prog_lens); + info.prog_tags = ptr_to_u64(prog_tags); + info_len = sizeof(info); + if (has_btf) { + info.nr_func_info = sub_prog_cnt; + info.func_info_rec_size = func_info_rec_size; + info.func_info = ptr_to_u64(func_infos); + } + + err = bpf_obj_get_info_by_fd(fd, &info, &info_len); + if (err) { + pr_debug("%s: failed to get BPF program info, aborting\n", __func__); + goto out; + } + + /* Synthesize PERF_RECORD_KSYMBOL */ + for (i = 0; i < sub_prog_cnt; i++) { + const struct bpf_func_info *finfo; + const char *short_name = NULL; + const struct btf_type *t; + int name_len; + + *ksymbol_event = (struct ksymbol_event){ + .header = { + .type = PERF_RECORD_KSYMBOL, + .size = sizeof(struct ksymbol_event), + }, + .addr = prog_addrs[i], + .len = prog_lens[i], + .ksym_type = PERF_RECORD_KSYMBOL_TYPE_BPF, + .flags = 0, + }; + name_len = snprintf(ksymbol_event->name, KSYM_NAME_LEN, + "bpf_prog_"); + name_len += snprintf_hex(ksymbol_event->name + name_len, + KSYM_NAME_LEN - name_len, + prog_tags[i], BPF_TAG_SIZE); + if (has_btf) { + finfo = func_infos + i * info.func_info_rec_size; + t = btf__type_by_id(btf, finfo->type_id); + short_name = btf__name_by_offset(btf, t->name_off); + } else if (i == 0 && sub_prog_cnt == 1) { + /* no subprog */ + if (info.name[0]) + short_name = info.name; + } else + short_name = "F"; + if (short_name) + name_len += snprintf(ksymbol_event->name + name_len, + KSYM_NAME_LEN - name_len, + "_%s", short_name); + + ksymbol_event->header.size += PERF_ALIGN(name_len + 1, + sizeof(u64)); + err = perf_tool__process_synth_event(tool, event, + machine, process); + } + + /* Synthesize PERF_RECORD_BPF_EVENT */ + if (opts->bpf_event) { + *bpf_event = (struct bpf_event){ + .header = { + .type = PERF_RECORD_BPF_EVENT, + .size = sizeof(struct bpf_event), + }, + .type = PERF_BPF_EVENT_PROG_LOAD, + .flags = 0, + .id = info.id, + }; + memcpy(bpf_event->tag, prog_tags[i], BPF_TAG_SIZE); + err = perf_tool__process_synth_event(tool, event, + machine, process); + } + +out: + free(prog_tags); + free(prog_lens); + free(prog_addrs); + free(func_infos); + free(btf); + return err ? -1 : 0; +} + +int perf_event__synthesize_bpf_events(struct perf_tool *tool, + perf_event__handler_t process, + struct machine *machine, + struct record_opts *opts) +{ + union perf_event *event; + __u32 id = 0; + int err; + int fd; + + event = malloc(sizeof(event->bpf_event) + KSYM_NAME_LEN); + if (!event) + return -1; + while (true) { + err = bpf_prog_get_next_id(id, &id); + if (err) { + if (errno == ENOENT) { + err = 0; + break; + } + pr_debug("%s: can't get next program: %s%s", + __func__, strerror(errno), + errno == EINVAL ? " -- kernel too old?" : ""); + /* don't report error on old kernel */ + err = (errno == EINVAL) ? 0 : -1; + break; + } + fd = bpf_prog_get_fd_by_id(id); + if (fd < 0) { + pr_debug("%s: failed to get fd for prog_id %u\n", + __func__, id); + continue; + } + + err = perf_event__synthesize_one_bpf_prog(tool, process, + machine, fd, + event, opts); + close(fd); + if (err) { + /* do not return error for old kernel */ + if (err == -2) + err = 0; + break; + } + } + free(event); + return err; +} diff --git a/tools/perf/util/bpf-event.h b/tools/perf/util/bpf-event.h index da0dfc035fc6..7890067e1a37 100644 --- a/tools/perf/util/bpf-event.h +++ b/tools/perf/util/bpf-event.h @@ -3,14 +3,22 @@ #define __PERF_BPF_EVENT_H #include +#include "event.h" struct machine; union perf_event; struct perf_sample; +struct perf_tool; +struct record_opts; #ifdef HAVE_LIBBPF_SUPPORT int machine__process_bpf_event(struct machine *machine, union perf_event *event, struct perf_sample *sample); + +int perf_event__synthesize_bpf_events(struct perf_tool *tool, + perf_event__handler_t process, + struct machine *machine, + struct record_opts *opts); #else static inline int machine__process_bpf_event(struct machine *machine __maybe_unused, union perf_event *event __maybe_unused, @@ -18,5 +26,13 @@ static inline int machine__process_bpf_event(struct machine *machine __maybe_unu { return 0; } + +static inline int perf_event__synthesize_bpf_events(struct perf_tool *tool __maybe_unused, + perf_event__handler_t process __maybe_unused, + struct machine *machine __maybe_unused, + struct record_opts *opts __maybe_unused) +{ + return 0; +} #endif // HAVE_LIBBPF_SUPPORT #endif -- cgit v1.2.3 From a5dcc4ca9129f12a07722f41b6c5e8d7f71f4063 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 18 Jan 2019 11:34:15 -0300 Subject: perf python: Remove -fstack-clash-protection when building with some clang versions These options are not present in some (all?) clang versions, so when we build for a distro that has a gcc new enough to have these options and that the distro python build config settings use them but clang doesn't support, b00m. This is the case with fedora rawhide (now gearing towards f30), so check if clang has the and remove the missing ones from CFLAGS. Cc: Eduardo Habkost Cc: Thiago Macieira Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-5q50q9w458yawgxf9ez54jbp@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/setup.py | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools/perf/util') diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py index 63f758c655d5..64d1f36dee99 100644 --- a/tools/perf/util/setup.py +++ b/tools/perf/util/setup.py @@ -17,6 +17,8 @@ if cc == "clang": vars[var] = sub("-mcet", "", vars[var]) if not clang_has_option("-fcf-protection"): vars[var] = sub("-fcf-protection", "", vars[var]) + if not clang_has_option("-fstack-clash-protection"): + vars[var] = sub("-fstack-clash-protection", "", vars[var]) from distutils.core import setup, Extension -- cgit v1.2.3 From 32e9136e37840a62c659259a394ed3735e3b3c84 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 21 Jan 2019 15:45:13 -0300 Subject: perf utils: Move perf_config using routines from color.c to separate object To untangle objects a bit more, avoiding rebuilding the color_fprintf routines when changes are made to the perf config headers. Cc: Adrian Hunter Cc: Hendrik Brueckner Cc: Jiri Olsa Cc: Namhyung Kim Cc: Thomas Richter Link: https://lkml.kernel.org/n/tip-8qvu2ek26antm3a8jyl4ocbq@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/Build | 1 + tools/perf/util/color.c | 39 ----------------------------------- tools/perf/util/color_config.c | 47 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 48 insertions(+), 39 deletions(-) create mode 100644 tools/perf/util/color_config.c (limited to 'tools/perf/util') diff --git a/tools/perf/util/Build b/tools/perf/util/Build index c359af408334..b69d6294c88c 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -35,6 +35,7 @@ libperf-y += dso.o libperf-y += symbol.o libperf-y += symbol_fprintf.o libperf-y += color.o +libperf-y += color_config.o libperf-y += metricgroup.o libperf-y += header.o libperf-y += callchain.o diff --git a/tools/perf/util/color.c b/tools/perf/util/color.c index 39e628b8938e..39b8c4ec4e2e 100644 --- a/tools/perf/util/color.c +++ b/tools/perf/util/color.c @@ -1,7 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include #include "cache.h" -#include "config.h" #include #include #include "color.h" @@ -10,44 +9,6 @@ int perf_use_color_default = -1; -int perf_config_colorbool(const char *var, const char *value, int stdout_is_tty) -{ - if (value) { - if (!strcasecmp(value, "never")) - return 0; - if (!strcasecmp(value, "always")) - return 1; - if (!strcasecmp(value, "auto")) - goto auto_color; - } - - /* Missing or explicit false to turn off colorization */ - if (!perf_config_bool(var, value)) - return 0; - - /* any normal truth value defaults to 'auto' */ - auto_color: - if (stdout_is_tty < 0) - stdout_is_tty = isatty(1); - if (stdout_is_tty || pager_in_use()) { - char *term = getenv("TERM"); - if (term && strcmp(term, "dumb")) - return 1; - } - return 0; -} - -int perf_color_default_config(const char *var, const char *value, - void *cb __maybe_unused) -{ - if (!strcmp(var, "color.ui")) { - perf_use_color_default = perf_config_colorbool(var, value, -1); - return 0; - } - - return 0; -} - static int __color_vsnprintf(char *bf, size_t size, const char *color, const char *fmt, va_list args, const char *trail) { diff --git a/tools/perf/util/color_config.c b/tools/perf/util/color_config.c new file mode 100644 index 000000000000..817dc56e7e95 --- /dev/null +++ b/tools/perf/util/color_config.c @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include "cache.h" +#include "config.h" +#include +#include +#include "color.h" +#include +#include + +int perf_config_colorbool(const char *var, const char *value, int stdout_is_tty) +{ + if (value) { + if (!strcasecmp(value, "never")) + return 0; + if (!strcasecmp(value, "always")) + return 1; + if (!strcasecmp(value, "auto")) + goto auto_color; + } + + /* Missing or explicit false to turn off colorization */ + if (!perf_config_bool(var, value)) + return 0; + + /* any normal truth value defaults to 'auto' */ + auto_color: + if (stdout_is_tty < 0) + stdout_is_tty = isatty(1); + if (stdout_is_tty || pager_in_use()) { + char *term = getenv("TERM"); + if (term && strcmp(term, "dumb")) + return 1; + } + return 0; +} + +int perf_color_default_config(const char *var, const char *value, + void *cb __maybe_unused) +{ + if (!strcmp(var, "color.ui")) { + perf_use_color_default = perf_config_colorbool(var, value, -1); + return 0; + } + + return 0; +} -- cgit v1.2.3 From b2251c327a09caa622047fe74363f650d31e4349 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 25 Jan 2019 15:11:08 +0100 Subject: perf color: Add missing stdarg.g to color.h It was getting the va_list definition by luck. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-4mavb7pgt2nw9lsew1xuez09@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/color.h | 1 + 1 file changed, 1 insertion(+) (limited to 'tools/perf/util') diff --git a/tools/perf/util/color.h b/tools/perf/util/color.h index 22777b1812ee..01f7bed21c9b 100644 --- a/tools/perf/util/color.h +++ b/tools/perf/util/color.h @@ -3,6 +3,7 @@ #define __PERF_COLOR_H #include +#include /* "\033[1;38;5;2xx;48;5;2xxm\0" is 23 bytes */ #define COLOR_MAXLEN 24 -- cgit v1.2.3 From 19ea1b6f63dda171eb5f2a41a9d7badc722221cd Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 22 Jan 2019 10:32:27 -0200 Subject: perf symbols: Move symbol_conf to separate file So that we don't drag all the headers included in symbol.h when needing to access symbol_conf in another header, such as annotate.h. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-rvo9dzflkneqmprb0dgbfybx@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol.h | 64 +------------------------------------ tools/perf/util/symbol_conf.h | 73 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 74 insertions(+), 63 deletions(-) create mode 100644 tools/perf/util/symbol_conf.h (limited to 'tools/perf/util') diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 14d9d438e7e2..277841e3dbd1 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -15,6 +15,7 @@ #include "build-id.h" #include "event.h" #include "path.h" +#include "symbol_conf.h" #ifdef HAVE_LIBELF_SUPPORT #include @@ -89,69 +90,6 @@ static inline size_t symbol__size(const struct symbol *sym) struct strlist; struct intlist; -struct symbol_conf { - unsigned short priv_size; - bool try_vmlinux_path, - init_annotation, - force, - ignore_vmlinux, - ignore_vmlinux_buildid, - show_kernel_path, - use_modules, - allow_aliases, - sort_by_name, - show_nr_samples, - show_total_period, - use_callchain, - cumulate_callchain, - show_branchflag_count, - exclude_other, - show_cpu_utilization, - initialized, - kptr_restrict, - event_group, - demangle, - demangle_kernel, - filter_relative, - show_hist_headers, - branch_callstack, - has_filter, - show_ref_callgraph, - hide_unresolved, - raw_trace, - report_hierarchy, - inline_name; - const char *vmlinux_name, - *kallsyms_name, - *source_prefix, - *field_sep, - *graph_function; - const char *default_guest_vmlinux_name, - *default_guest_kallsyms, - *default_guest_modules; - const char *guestmount; - const char *dso_list_str, - *comm_list_str, - *pid_list_str, - *tid_list_str, - *sym_list_str, - *col_width_list_str, - *bt_stop_list_str; - struct strlist *dso_list, - *comm_list, - *sym_list, - *dso_from_list, - *dso_to_list, - *sym_from_list, - *sym_to_list, - *bt_stop_list; - struct intlist *pid_list, - *tid_list; - const char *symfs; -}; - -extern struct symbol_conf symbol_conf; - struct symbol_name_rb_node { struct rb_node rb_node; struct symbol sym; diff --git a/tools/perf/util/symbol_conf.h b/tools/perf/util/symbol_conf.h new file mode 100644 index 000000000000..fffea68c1203 --- /dev/null +++ b/tools/perf/util/symbol_conf.h @@ -0,0 +1,73 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PERF_SYMBOL_CONF +#define __PERF_SYMBOL_CONF 1 + +#include + +struct strlist; +struct intlist; + +struct symbol_conf { + unsigned short priv_size; + bool try_vmlinux_path, + init_annotation, + force, + ignore_vmlinux, + ignore_vmlinux_buildid, + show_kernel_path, + use_modules, + allow_aliases, + sort_by_name, + show_nr_samples, + show_total_period, + use_callchain, + cumulate_callchain, + show_branchflag_count, + exclude_other, + show_cpu_utilization, + initialized, + kptr_restrict, + event_group, + demangle, + demangle_kernel, + filter_relative, + show_hist_headers, + branch_callstack, + has_filter, + show_ref_callgraph, + hide_unresolved, + raw_trace, + report_hierarchy, + inline_name; + const char *vmlinux_name, + *kallsyms_name, + *source_prefix, + *field_sep, + *graph_function; + const char *default_guest_vmlinux_name, + *default_guest_kallsyms, + *default_guest_modules; + const char *guestmount; + const char *dso_list_str, + *comm_list_str, + *pid_list_str, + *tid_list_str, + *sym_list_str, + *col_width_list_str, + *bt_stop_list_str; + struct strlist *dso_list, + *comm_list, + *sym_list, + *dso_from_list, + *dso_to_list, + *sym_from_list, + *sym_to_list, + *bt_stop_list; + struct intlist *pid_list, + *tid_list; + const char *symfs; +}; + +extern struct symbol_conf symbol_conf; + +#endif // __PERF_SYMBOL_CONF -- cgit v1.2.3 From 8a249c73a5cc0f578ef2c5a392e20336203ebe3a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 22 Jan 2019 10:47:38 -0200 Subject: perf annotate: Remove lots of headers from annotate.h To reduce the chances changes trigger tons of rebuilds, more to come. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-ytbykaku63862guk7muflcy4@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/gtk/annotate.c | 1 + tools/perf/util/annotate.h | 14 +++++++++++--- tools/perf/util/stat-display.c | 1 + 3 files changed, 13 insertions(+), 3 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c index 48428c9acd89..f22a7d142ada 100644 --- a/tools/perf/ui/gtk/annotate.c +++ b/tools/perf/ui/gtk/annotate.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include "gtk.h" +#include "util/sort.h" #include "util/debug.h" #include "util/annotate.h" #include "util/evsel.h" diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index fb6463730ba4..95053cab41fe 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -4,16 +4,24 @@ #include #include +#include #include -#include "symbol.h" -#include "hist.h" -#include "sort.h" #include #include #include #include +#include "symbol_conf.h" +struct hist_browser_timer; +struct hist_entry; struct ins_ops; +struct map; +struct map_symbol; +struct addr_map_symbol; +struct option; +struct perf_sample; +struct perf_evsel; +struct symbol; struct ins { const char *name; diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 665ee374fc01..6d043c78f3c2 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -2,6 +2,7 @@ #include #include #include +#include "color.h" #include "evlist.h" #include "evsel.h" #include "stat.h" -- cgit v1.2.3 From f1a397f33743bbb7346cb01e41cc70ca318426e8 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 22 Jan 2019 10:58:22 -0200 Subject: perf tools: Move branch structs to branch.h We already have it, move those there from events.h so that we untangle the header dependencies a bit more. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-pnbkqo8jxbi49d4f3yd3b5w3@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/branch.h | 27 ++++++++++++++++++++++++--- tools/perf/util/event.h | 21 +-------------------- tools/perf/util/symbol.h | 1 + 3 files changed, 26 insertions(+), 23 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/branch.h b/tools/perf/util/branch.h index 1e3c7c5cdc63..64f96b79f1d7 100644 --- a/tools/perf/util/branch.h +++ b/tools/perf/util/branch.h @@ -1,8 +1,31 @@ #ifndef _PERF_BRANCH_H #define _PERF_BRANCH_H 1 +#include #include -#include "../perf.h" +#include +#include + +struct branch_flags { + u64 mispred:1; + u64 predicted:1; + u64 in_tx:1; + u64 abort:1; + u64 cycles:16; + u64 type:4; + u64 reserved:40; +}; + +struct branch_entry { + u64 from; + u64 to; + struct branch_flags flags; +}; + +struct branch_stack { + u64 nr; + struct branch_entry entries[0]; +}; struct branch_type_stat { bool branch_to; @@ -13,8 +36,6 @@ struct branch_type_stat { u64 cross_2m; }; -struct branch_flags; - void branch_type_count(struct branch_type_stat *st, struct branch_flags *flags, u64 from, u64 to); diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index dad32b81fe71..feba1aa819b4 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -161,26 +161,7 @@ struct ip_callchain { u64 ips[0]; }; -struct branch_flags { - u64 mispred:1; - u64 predicted:1; - u64 in_tx:1; - u64 abort:1; - u64 cycles:16; - u64 type:4; - u64 reserved:40; -}; - -struct branch_entry { - u64 from; - u64 to; - struct branch_flags flags; -}; - -struct branch_stack { - u64 nr; - struct branch_entry entries[0]; -}; +struct branch_stack; enum { PERF_IP_FLAG_BRANCH = 1ULL << 0, diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 277841e3dbd1..956d9916e364 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -12,6 +12,7 @@ #include #include #include +#include "branch.h" #include "build-id.h" #include "event.h" #include "path.h" -- cgit v1.2.3 From e7a795d3ba62897d326f1732d01325594a6ef64f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 22 Jan 2019 11:03:49 -0200 Subject: perf block-range: Add missing headers Some are being obtained indirectly and as we prune unnecessary includes, this stops working, fix it by adding the headers for things used in these file. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-1p65lyeebc2ose0lbozvemda@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/block-range.c | 2 ++ tools/perf/util/block-range.h | 6 +++++- 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/block-range.c b/tools/perf/util/block-range.c index f1451c987eec..1be432657501 100644 --- a/tools/perf/util/block-range.c +++ b/tools/perf/util/block-range.c @@ -1,6 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 #include "block-range.h" #include "annotate.h" +#include +#include struct { struct rb_root root; diff --git a/tools/perf/util/block-range.h b/tools/perf/util/block-range.h index a5ba719d69fb..ec0fb534bf56 100644 --- a/tools/perf/util/block-range.h +++ b/tools/perf/util/block-range.h @@ -2,7 +2,11 @@ #ifndef __PERF_BLOCK_RANGE_H #define __PERF_BLOCK_RANGE_H -#include "symbol.h" +#include +#include +#include + +struct symbol; /* * struct block_range - non-overlapping parts of basic blocks -- cgit v1.2.3 From d328e305ea1fe78f569a4ea2cacca7fa5beec06e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 22 Jan 2019 11:10:59 -0200 Subject: perf symbols: Remove include map.h from dso.h Disentangling the dependency tree, to reduce build time. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-n2gcrfmh480rm44p7fra13vv@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/map.c | 1 + tools/perf/util/dso.h | 4 +++- tools/perf/util/symbol_fprintf.c | 1 + 3 files changed, 5 insertions(+), 1 deletion(-) (limited to 'tools/perf/util') diff --git a/tools/perf/ui/browsers/map.c b/tools/perf/ui/browsers/map.c index 5b8b8c637686..c70d9337405b 100644 --- a/tools/perf/ui/browsers/map.c +++ b/tools/perf/ui/browsers/map.c @@ -6,6 +6,7 @@ #include #include "../../util/util.h" #include "../../util/debug.h" +#include "../../util/map.h" #include "../../util/symbol.h" #include "../browser.h" #include "../helpline.h" diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index af2eda29660f..56001b82d448 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -7,12 +7,14 @@ #include #include #include +#include #include "rwsem.h" #include -#include "map.h" #include "namespaces.h" #include "build-id.h" +struct map; + enum dso_binary_type { DSO_BINARY_TYPE__KALLSYMS = 0, DSO_BINARY_TYPE__GUEST_KALLSYMS, diff --git a/tools/perf/util/symbol_fprintf.c b/tools/perf/util/symbol_fprintf.c index ed0205cc7942..50472c75da59 100644 --- a/tools/perf/util/symbol_fprintf.c +++ b/tools/perf/util/symbol_fprintf.c @@ -3,6 +3,7 @@ #include #include +#include "map.h" #include "symbol.h" size_t symbol__fprintf(struct symbol *sym, FILE *fp) -- cgit v1.2.3 From 68c0188ea77891266c048d0768250eba51441cb2 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 22 Jan 2019 11:14:55 -0200 Subject: perf symbols: Remove some unnecessary includes from symbol.h And fixup the fallout in places like annotation and jitdump that were using things like dirname() but weren't including libgen.h, etc. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-wrii9hy1a1wathc0398f9fgt@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 1 + tools/perf/util/dso.c | 1 + tools/perf/util/jitdump.c | 1 + tools/perf/util/symbol-minimal.c | 1 + tools/perf/util/symbol.h | 10 ++++------ 5 files changed, 8 insertions(+), 6 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 70de8f6b3aee..3d79add5f7ae 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -9,6 +9,7 @@ #include #include +#include #include "util.h" #include "ui/ui.h" #include "sort.h" diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index 62c8cf622607..1b17f6de957d 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -8,6 +8,7 @@ #include #include #include +#include #include "compress.h" #include "path.h" #include "symbol.h" diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c index bf249552a9b0..eda28d3570bc 100644 --- a/tools/perf/util/jitdump.c +++ b/tools/perf/util/jitdump.c @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c index 7119df77dc0b..17edbd4f6f85 100644 --- a/tools/perf/util/symbol-minimal.c +++ b/tools/perf/util/symbol-minimal.c @@ -3,6 +3,7 @@ #include "util.h" #include +#include #include #include #include diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 956d9916e364..2162a5d9b3ac 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -5,16 +5,10 @@ #include #include #include -#include "map.h" -#include "../perf.h" #include #include #include -#include -#include #include "branch.h" -#include "build-id.h" -#include "event.h" #include "path.h" #include "symbol_conf.h" @@ -26,6 +20,10 @@ #include "dso.h" +struct map; +struct map_groups; +struct option; + /* * libelf 0.8.x and earlier do not support ELF_C_READ_MMAP; * for newer versions we can use mmap to reduce memory usage: -- cgit v1.2.3 From 40f3b2d20b52b090976a60fe56fb838a45eb362f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 22 Jan 2019 11:24:34 -0200 Subject: perf namespaces: Remove namespaces.h from .h headers There we need just forward declarations, so remove it and add it just on the .c files that actually touch the struct definitions. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-wsjxzt99p83jubt6hu0med0f@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-probe.c | 1 + tools/perf/util/build-id.c | 1 + tools/perf/util/build-id.h | 3 ++- tools/perf/util/dso.c | 1 + tools/perf/util/dso.h | 2 +- tools/perf/util/probe-event.c | 1 + tools/perf/util/probe-event.h | 5 +++-- tools/perf/util/probe-file.c | 1 + tools/perf/util/thread.h | 1 + tools/perf/util/util.c | 1 + 10 files changed, 13 insertions(+), 4 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index 99de91698de1..46d3c2deeb40 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c @@ -32,6 +32,7 @@ #include "perf.h" #include "builtin.h" +#include "namespaces.h" #include "util/util.h" #include "util/strlist.h" #include "util/strfilter.h" diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index 04b1d53e4bf9..07ef7fad689c 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -15,6 +15,7 @@ #include #include "build-id.h" #include "event.h" +#include "namespaces.h" #include "symbol.h" #include "thread.h" #include diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h index f0c565164a97..93668f38f1ed 100644 --- a/tools/perf/util/build-id.h +++ b/tools/perf/util/build-id.h @@ -6,9 +6,10 @@ #define SBUILD_ID_SIZE (BUILD_ID_SIZE * 2 + 1) #include "tool.h" -#include "namespaces.h" #include +struct nsinfo; + extern struct perf_tool build_id__mark_dso_hit_ops; struct dso; struct feat_fd; diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index 1b17f6de957d..cbe6e7dc6af3 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -10,6 +10,7 @@ #include #include #include "compress.h" +#include "namespaces.h" #include "path.h" #include "symbol.h" #include "srcline.h" diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index 56001b82d448..2de54c0b26b2 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -10,9 +10,9 @@ #include #include "rwsem.h" #include -#include "namespaces.h" #include "build-id.h" +struct machine; struct map; enum dso_binary_type { diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 18a59fba97ff..736787a18b9e 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -35,6 +35,7 @@ #include "util.h" #include "event.h" +#include "namespaces.h" #include "strlist.h" #include "strfilter.h" #include "debug.h" diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h index 15a98c3a2a2f..05c8d571a901 100644 --- a/tools/perf/util/probe-event.h +++ b/tools/perf/util/probe-event.h @@ -4,8 +4,9 @@ #include #include -#include "intlist.h" -#include "namespaces.h" + +struct intlist; +struct nsinfo; /* Probe related configurations */ struct probe_conf { diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c index 0b1195cad0e5..4062bc4412a9 100644 --- a/tools/perf/util/probe-file.c +++ b/tools/perf/util/probe-file.c @@ -20,6 +20,7 @@ #include #include #include +#include "namespaces.h" #include "util.h" #include "event.h" #include "strlist.h" diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index 712dd48cc0ca..f3c939150f90 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -13,6 +13,7 @@ #include #include "rwsem.h" +struct namespaces_event; struct thread_stack; struct unwind_libunwind_ops; diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index 093352e93d50..320b0fef249a 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -2,6 +2,7 @@ #include "../perf.h" #include "util.h" #include "debug.h" +#include "namespaces.h" #include #include #include -- cgit v1.2.3 From f0049f2c3ee80d0808efe6fb4400b572e6858cb9 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 22 Jan 2019 11:28:42 -0200 Subject: perf comm: Remove needless headers from comm.h There we don't need rbtree, only in comm.c, also ditch perf.h, not needed at all. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-vr1jnwwujh99skrgldtimpmu@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/comm.c | 1 + tools/perf/util/comm.h | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/comm.c b/tools/perf/util/comm.c index 31279a7bd919..1066de92af12 100644 --- a/tools/perf/util/comm.c +++ b/tools/perf/util/comm.c @@ -6,6 +6,7 @@ #include #include #include +#include #include "rwsem.h" struct comm_str { diff --git a/tools/perf/util/comm.h b/tools/perf/util/comm.h index 3e5c438fe85e..f35d8fbfa2dd 100644 --- a/tools/perf/util/comm.h +++ b/tools/perf/util/comm.h @@ -2,9 +2,9 @@ #ifndef __PERF_COMM_H #define __PERF_COMM_H -#include "../perf.h" -#include #include +#include +#include struct comm_str; -- cgit v1.2.3 From 95420d338e2d802ea0dce4d770d3292beb587f71 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 22 Jan 2019 11:32:41 -0200 Subject: perf callchain: No need to include perf.h So ditch it. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-bodhwdvcds9ahk26dy4w8m71@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/callchain.h | 1 - 1 file changed, 1 deletion(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 99d38ac019b8..8afe1622edf3 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -2,7 +2,6 @@ #ifndef __PERF_CALLCHAIN_H #define __PERF_CALLCHAIN_H -#include "../perf.h" #include #include #include "event.h" -- cgit v1.2.3 From f3acb3a8a2081344801974ac5ec8e1b0d6f0ef36 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Thu, 6 Dec 2018 11:18:14 -0800 Subject: perf machine: Use cached rbtrees At the cost of an extra pointer, we can avoid the O(logN) cost of finding the first element in the tree (smallest node), which is something required for nearly every operation dealing with machine->guests and threads->entries. The conversion is straightforward, however, it's worth noticing that the rb_erase_init() calls have been replaced by rb_erase_cached() which has no _init() flavor, however, the node is explicitly cleared next anyway, which was redundant until now. Signed-off-by: Davidlohr Bueso Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/20181206191819.30182-3-dave@stgolabs.net Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 3 ++- tools/perf/util/build-id.c | 12 ++++++---- tools/perf/util/machine.c | 53 ++++++++++++++++++++++++++------------------- tools/perf/util/machine.h | 12 +++++----- tools/perf/util/rb_resort.h | 6 ++--- 5 files changed, 50 insertions(+), 36 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 794a0de5ac1d..c9ceaf88759c 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -753,7 +753,8 @@ static int tasks_print(struct report *rep, FILE *fp) for (i = 0; i < THREADS__TABLE_SIZE; i++) { struct threads *threads = &machine->threads[i]; - for (nd = rb_first(&threads->entries); nd; nd = rb_next(nd)) { + for (nd = rb_first_cached(&threads->entries); nd; + nd = rb_next(nd)) { task = tasks + itask++; task->thread = rb_entry(nd, struct thread, rb_node); diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index 07ef7fad689c..2ff802068f06 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -364,7 +364,8 @@ int perf_session__write_buildid_table(struct perf_session *session, if (err) return err; - for (nd = rb_first(&session->machines.guests); nd; nd = rb_next(nd)) { + for (nd = rb_first_cached(&session->machines.guests); nd; + nd = rb_next(nd)) { struct machine *pos = rb_entry(nd, struct machine, rb_node); err = machine__write_buildid_table(pos, fd); if (err) @@ -397,7 +398,8 @@ int dsos__hit_all(struct perf_session *session) if (err) return err; - for (nd = rb_first(&session->machines.guests); nd; nd = rb_next(nd)) { + for (nd = rb_first_cached(&session->machines.guests); nd; + nd = rb_next(nd)) { struct machine *pos = rb_entry(nd, struct machine, rb_node); err = machine__hit_all_dsos(pos); @@ -850,7 +852,8 @@ int perf_session__cache_build_ids(struct perf_session *session) ret = machine__cache_build_ids(&session->machines.host); - for (nd = rb_first(&session->machines.guests); nd; nd = rb_next(nd)) { + for (nd = rb_first_cached(&session->machines.guests); nd; + nd = rb_next(nd)) { struct machine *pos = rb_entry(nd, struct machine, rb_node); ret |= machine__cache_build_ids(pos); } @@ -867,7 +870,8 @@ bool perf_session__read_build_ids(struct perf_session *session, bool with_hits) struct rb_node *nd; bool ret = machine__read_build_ids(&session->machines.host, with_hits); - for (nd = rb_first(&session->machines.guests); nd; nd = rb_next(nd)) { + for (nd = rb_first_cached(&session->machines.guests); nd; + nd = rb_next(nd)) { struct machine *pos = rb_entry(nd, struct machine, rb_node); ret |= machine__read_build_ids(pos, with_hits); } diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index ae85106bb5bf..66f019fdc510 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -42,7 +42,7 @@ static void machine__threads_init(struct machine *machine) for (i = 0; i < THREADS__TABLE_SIZE; i++) { struct threads *threads = &machine->threads[i]; - threads->entries = RB_ROOT; + threads->entries = RB_ROOT_CACHED; init_rwsem(&threads->lock); threads->nr = 0; INIT_LIST_HEAD(&threads->dead); @@ -180,7 +180,7 @@ void machine__delete_threads(struct machine *machine) for (i = 0; i < THREADS__TABLE_SIZE; i++) { struct threads *threads = &machine->threads[i]; down_write(&threads->lock); - nd = rb_first(&threads->entries); + nd = rb_first_cached(&threads->entries); while (nd) { struct thread *t = rb_entry(nd, struct thread, rb_node); @@ -223,7 +223,7 @@ void machine__delete(struct machine *machine) void machines__init(struct machines *machines) { machine__init(&machines->host, "", HOST_KERNEL_ID); - machines->guests = RB_ROOT; + machines->guests = RB_ROOT_CACHED; } void machines__exit(struct machines *machines) @@ -235,9 +235,10 @@ void machines__exit(struct machines *machines) struct machine *machines__add(struct machines *machines, pid_t pid, const char *root_dir) { - struct rb_node **p = &machines->guests.rb_node; + struct rb_node **p = &machines->guests.rb_root.rb_node; struct rb_node *parent = NULL; struct machine *pos, *machine = malloc(sizeof(*machine)); + bool leftmost = true; if (machine == NULL) return NULL; @@ -252,12 +253,14 @@ struct machine *machines__add(struct machines *machines, pid_t pid, pos = rb_entry(parent, struct machine, rb_node); if (pid < pos->pid) p = &(*p)->rb_left; - else + else { p = &(*p)->rb_right; + leftmost = false; + } } rb_link_node(&machine->rb_node, parent, p); - rb_insert_color(&machine->rb_node, &machines->guests); + rb_insert_color_cached(&machine->rb_node, &machines->guests, leftmost); return machine; } @@ -268,7 +271,7 @@ void machines__set_comm_exec(struct machines *machines, bool comm_exec) machines->host.comm_exec = comm_exec; - for (nd = rb_first(&machines->guests); nd; nd = rb_next(nd)) { + for (nd = rb_first_cached(&machines->guests); nd; nd = rb_next(nd)) { struct machine *machine = rb_entry(nd, struct machine, rb_node); machine->comm_exec = comm_exec; @@ -277,7 +280,7 @@ void machines__set_comm_exec(struct machines *machines, bool comm_exec) struct machine *machines__find(struct machines *machines, pid_t pid) { - struct rb_node **p = &machines->guests.rb_node; + struct rb_node **p = &machines->guests.rb_root.rb_node; struct rb_node *parent = NULL; struct machine *machine; struct machine *default_machine = NULL; @@ -340,7 +343,7 @@ void machines__process_guests(struct machines *machines, { struct rb_node *nd; - for (nd = rb_first(&machines->guests); nd; nd = rb_next(nd)) { + for (nd = rb_first_cached(&machines->guests); nd; nd = rb_next(nd)) { struct machine *pos = rb_entry(nd, struct machine, rb_node); process(pos, data); } @@ -353,7 +356,8 @@ void machines__set_id_hdr_size(struct machines *machines, u16 id_hdr_size) machines->host.id_hdr_size = id_hdr_size; - for (node = rb_first(&machines->guests); node; node = rb_next(node)) { + for (node = rb_first_cached(&machines->guests); node; + node = rb_next(node)) { machine = rb_entry(node, struct machine, rb_node); machine->id_hdr_size = id_hdr_size; } @@ -466,9 +470,10 @@ static struct thread *____machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid, bool create) { - struct rb_node **p = &threads->entries.rb_node; + struct rb_node **p = &threads->entries.rb_root.rb_node; struct rb_node *parent = NULL; struct thread *th; + bool leftmost = true; th = threads__get_last_match(threads, machine, pid, tid); if (th) @@ -486,8 +491,10 @@ static struct thread *____machine__findnew_thread(struct machine *machine, if (tid < th->tid) p = &(*p)->rb_left; - else + else { p = &(*p)->rb_right; + leftmost = false; + } } if (!create) @@ -496,7 +503,7 @@ static struct thread *____machine__findnew_thread(struct machine *machine, th = thread__new(pid, tid); if (th != NULL) { rb_link_node(&th->rb_node, parent, p); - rb_insert_color(&th->rb_node, &threads->entries); + rb_insert_color_cached(&th->rb_node, &threads->entries, leftmost); /* * We have to initialize map_groups separately @@ -507,7 +514,7 @@ static struct thread *____machine__findnew_thread(struct machine *machine, * leader and that would screwed the rb tree. */ if (thread__init_map_groups(th, machine)) { - rb_erase_init(&th->rb_node, &threads->entries); + rb_erase_cached(&th->rb_node, &threads->entries); RB_CLEAR_NODE(&th->rb_node); thread__put(th); return NULL; @@ -798,7 +805,7 @@ size_t machines__fprintf_dsos(struct machines *machines, FILE *fp) struct rb_node *nd; size_t ret = __dsos__fprintf(&machines->host.dsos.head, fp); - for (nd = rb_first(&machines->guests); nd; nd = rb_next(nd)) { + for (nd = rb_first_cached(&machines->guests); nd; nd = rb_next(nd)) { struct machine *pos = rb_entry(nd, struct machine, rb_node); ret += __dsos__fprintf(&pos->dsos.head, fp); } @@ -818,7 +825,7 @@ size_t machines__fprintf_dsos_buildid(struct machines *machines, FILE *fp, struct rb_node *nd; size_t ret = machine__fprintf_dsos_buildid(&machines->host, fp, skip, parm); - for (nd = rb_first(&machines->guests); nd; nd = rb_next(nd)) { + for (nd = rb_first_cached(&machines->guests); nd; nd = rb_next(nd)) { struct machine *pos = rb_entry(nd, struct machine, rb_node); ret += machine__fprintf_dsos_buildid(pos, fp, skip, parm); } @@ -858,7 +865,8 @@ size_t machine__fprintf(struct machine *machine, FILE *fp) ret = fprintf(fp, "Threads: %u\n", threads->nr); - for (nd = rb_first(&threads->entries); nd; nd = rb_next(nd)) { + for (nd = rb_first_cached(&threads->entries); nd; + nd = rb_next(nd)) { struct thread *pos = rb_entry(nd, struct thread, rb_node); ret += thread__fprintf(pos, fp); @@ -1161,7 +1169,7 @@ failure: void machines__destroy_kernel_maps(struct machines *machines) { - struct rb_node *next = rb_first(&machines->guests); + struct rb_node *next = rb_first_cached(&machines->guests); machine__destroy_kernel_maps(&machines->host); @@ -1169,7 +1177,7 @@ void machines__destroy_kernel_maps(struct machines *machines) struct machine *pos = rb_entry(next, struct machine, rb_node); next = rb_next(&pos->rb_node); - rb_erase(&pos->rb_node, &machines->guests); + rb_erase_cached(&pos->rb_node, &machines->guests); machine__delete(pos); } } @@ -1734,7 +1742,7 @@ static void __machine__remove_thread(struct machine *machine, struct thread *th, BUG_ON(refcount_read(&th->refcnt) == 0); if (lock) down_write(&threads->lock); - rb_erase_init(&th->rb_node, &threads->entries); + rb_erase_cached(&th->rb_node, &threads->entries); RB_CLEAR_NODE(&th->rb_node); --threads->nr; /* @@ -2511,7 +2519,8 @@ int machine__for_each_thread(struct machine *machine, for (i = 0; i < THREADS__TABLE_SIZE; i++) { threads = &machine->threads[i]; - for (nd = rb_first(&threads->entries); nd; nd = rb_next(nd)) { + for (nd = rb_first_cached(&threads->entries); nd; + nd = rb_next(nd)) { thread = rb_entry(nd, struct thread, rb_node); rc = fn(thread, priv); if (rc != 0) @@ -2538,7 +2547,7 @@ int machines__for_each_thread(struct machines *machines, if (rc != 0) return rc; - for (nd = rb_first(&machines->guests); nd; nd = rb_next(nd)) { + for (nd = rb_first_cached(&machines->guests); nd; nd = rb_next(nd)) { struct machine *machine = rb_entry(nd, struct machine, rb_node); rc = machine__for_each_thread(machine, fn, priv); diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index 4ecd380ce1b4..e2f3df45410a 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -29,11 +29,11 @@ struct vdso_info; #define THREADS__TABLE_SIZE (1 << THREADS__TABLE_BITS) struct threads { - struct rb_root entries; - struct rw_semaphore lock; - unsigned int nr; - struct list_head dead; - struct thread *last_match; + struct rb_root_cached entries; + struct rw_semaphore lock; + unsigned int nr; + struct list_head dead; + struct thread *last_match; }; struct machine { @@ -140,7 +140,7 @@ typedef void (*machine__process_t)(struct machine *machine, void *data); struct machines { struct machine host; - struct rb_root guests; + struct rb_root_cached guests; }; void machines__init(struct machines *machines); diff --git a/tools/perf/util/rb_resort.h b/tools/perf/util/rb_resort.h index a920f702a74d..f272e181d3d6 100644 --- a/tools/perf/util/rb_resort.h +++ b/tools/perf/util/rb_resort.h @@ -144,8 +144,8 @@ struct __name##_sorted *__name = __name##_sorted__new __ilist->rblist.nr_entries) /* For 'struct machine->threads' */ -#define DECLARE_RESORT_RB_MACHINE_THREADS(__name, __machine, hash_bucket) \ - DECLARE_RESORT_RB(__name)(&__machine->threads[hash_bucket].entries, \ - __machine->threads[hash_bucket].nr) +#define DECLARE_RESORT_RB_MACHINE_THREADS(__name, __machine, hash_bucket) \ + DECLARE_RESORT_RB(__name)(&__machine->threads[hash_bucket].entries.rb_root, \ + __machine->threads[hash_bucket].nr) #endif /* _PERF_RESORT_RB_H_ */ -- cgit v1.2.3 From 55ecd6310f9fe48cf7e435be408862da1e0e6baa Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Thu, 6 Dec 2018 11:18:15 -0800 Subject: perf callchain: Use cached rbtrees At the cost of an extra pointer, we can avoid the O(logN) cost of finding the first element in the tree (smallest node), which is something required for nearly every in/srcline callchain node deletion (in/srcline__tree_delete()). Signed-off-by: Davidlohr Bueso Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/20181206191819.30182-4-dave@stgolabs.net Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/dso.c | 4 ++-- tools/perf/util/dso.h | 4 ++-- tools/perf/util/srcline.c | 43 +++++++++++++++++++++++++------------------ tools/perf/util/srcline.h | 13 +++++++------ 4 files changed, 36 insertions(+), 28 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index cbe6e7dc6af3..9ec4b2e6d4ac 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -1199,8 +1199,8 @@ struct dso *dso__new(const char *name) dso__set_short_name(dso, dso->name, false); dso->symbols = dso->symbol_names = RB_ROOT; dso->data.cache = RB_ROOT; - dso->inlined_nodes = RB_ROOT; - dso->srclines = RB_ROOT; + dso->inlined_nodes = RB_ROOT_CACHED; + dso->srclines = RB_ROOT_CACHED; dso->data.fd = -1; dso->data.status = DSO_DATA_STATUS_UNKNOWN; dso->symtab_type = DSO_BINARY_TYPE__NOT_FOUND; diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index 2de54c0b26b2..40edfb375a8b 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -143,8 +143,8 @@ struct dso { struct rb_root *root; /* root of rbtree that rb_node is in */ struct rb_root symbols; struct rb_root symbol_names; - struct rb_root inlined_nodes; - struct rb_root srclines; + struct rb_root_cached inlined_nodes; + struct rb_root_cached srclines; struct { u64 addr; struct symbol *symbol; diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c index dc86597d0cc4..00f215580b5a 100644 --- a/tools/perf/util/srcline.c +++ b/tools/perf/util/srcline.c @@ -594,11 +594,12 @@ struct srcline_node { struct rb_node rb_node; }; -void srcline__tree_insert(struct rb_root *tree, u64 addr, char *srcline) +void srcline__tree_insert(struct rb_root_cached *tree, u64 addr, char *srcline) { - struct rb_node **p = &tree->rb_node; + struct rb_node **p = &tree->rb_root.rb_node; struct rb_node *parent = NULL; struct srcline_node *i, *node; + bool leftmost = true; node = zalloc(sizeof(struct srcline_node)); if (!node) { @@ -614,16 +615,18 @@ void srcline__tree_insert(struct rb_root *tree, u64 addr, char *srcline) i = rb_entry(parent, struct srcline_node, rb_node); if (addr < i->addr) p = &(*p)->rb_left; - else + else { p = &(*p)->rb_right; + leftmost = false; + } } rb_link_node(&node->rb_node, parent, p); - rb_insert_color(&node->rb_node, tree); + rb_insert_color_cached(&node->rb_node, tree, leftmost); } -char *srcline__tree_find(struct rb_root *tree, u64 addr) +char *srcline__tree_find(struct rb_root_cached *tree, u64 addr) { - struct rb_node *n = tree->rb_node; + struct rb_node *n = tree->rb_root.rb_node; while (n) { struct srcline_node *i = rb_entry(n, struct srcline_node, @@ -640,15 +643,15 @@ char *srcline__tree_find(struct rb_root *tree, u64 addr) return NULL; } -void srcline__tree_delete(struct rb_root *tree) +void srcline__tree_delete(struct rb_root_cached *tree) { struct srcline_node *pos; - struct rb_node *next = rb_first(tree); + struct rb_node *next = rb_first_cached(tree); while (next) { pos = rb_entry(next, struct srcline_node, rb_node); next = rb_next(&pos->rb_node); - rb_erase(&pos->rb_node, tree); + rb_erase_cached(&pos->rb_node, tree); free_srcline(pos->srcline); zfree(&pos); } @@ -682,28 +685,32 @@ void inline_node__delete(struct inline_node *node) free(node); } -void inlines__tree_insert(struct rb_root *tree, struct inline_node *inlines) +void inlines__tree_insert(struct rb_root_cached *tree, + struct inline_node *inlines) { - struct rb_node **p = &tree->rb_node; + struct rb_node **p = &tree->rb_root.rb_node; struct rb_node *parent = NULL; const u64 addr = inlines->addr; struct inline_node *i; + bool leftmost = true; while (*p != NULL) { parent = *p; i = rb_entry(parent, struct inline_node, rb_node); if (addr < i->addr) p = &(*p)->rb_left; - else + else { p = &(*p)->rb_right; + leftmost = false; + } } rb_link_node(&inlines->rb_node, parent, p); - rb_insert_color(&inlines->rb_node, tree); + rb_insert_color_cached(&inlines->rb_node, tree, leftmost); } -struct inline_node *inlines__tree_find(struct rb_root *tree, u64 addr) +struct inline_node *inlines__tree_find(struct rb_root_cached *tree, u64 addr) { - struct rb_node *n = tree->rb_node; + struct rb_node *n = tree->rb_root.rb_node; while (n) { struct inline_node *i = rb_entry(n, struct inline_node, @@ -720,15 +727,15 @@ struct inline_node *inlines__tree_find(struct rb_root *tree, u64 addr) return NULL; } -void inlines__tree_delete(struct rb_root *tree) +void inlines__tree_delete(struct rb_root_cached *tree) { struct inline_node *pos; - struct rb_node *next = rb_first(tree); + struct rb_node *next = rb_first_cached(tree); while (next) { pos = rb_entry(next, struct inline_node, rb_node); next = rb_next(&pos->rb_node); - rb_erase(&pos->rb_node, tree); + rb_erase_cached(&pos->rb_node, tree); inline_node__delete(pos); } } diff --git a/tools/perf/util/srcline.h b/tools/perf/util/srcline.h index 5762212dc342..b11a0aaaa676 100644 --- a/tools/perf/util/srcline.h +++ b/tools/perf/util/srcline.h @@ -19,11 +19,11 @@ void free_srcline(char *srcline); char *get_srcline_split(struct dso *dso, u64 addr, unsigned *line); /* insert the srcline into the DSO, which will take ownership */ -void srcline__tree_insert(struct rb_root *tree, u64 addr, char *srcline); +void srcline__tree_insert(struct rb_root_cached *tree, u64 addr, char *srcline); /* find previously inserted srcline */ -char *srcline__tree_find(struct rb_root *tree, u64 addr); +char *srcline__tree_find(struct rb_root_cached *tree, u64 addr); /* delete all srclines within the tree */ -void srcline__tree_delete(struct rb_root *tree); +void srcline__tree_delete(struct rb_root_cached *tree); #define SRCLINE_UNKNOWN ((char *) "??:0") @@ -46,10 +46,11 @@ struct inline_node *dso__parse_addr_inlines(struct dso *dso, u64 addr, void inline_node__delete(struct inline_node *node); /* insert the inline node list into the DSO, which will take ownership */ -void inlines__tree_insert(struct rb_root *tree, struct inline_node *inlines); +void inlines__tree_insert(struct rb_root_cached *tree, + struct inline_node *inlines); /* find previously inserted inline node list */ -struct inline_node *inlines__tree_find(struct rb_root *tree, u64 addr); +struct inline_node *inlines__tree_find(struct rb_root_cached *tree, u64 addr); /* delete all nodes within the tree of inline_node s */ -void inlines__tree_delete(struct rb_root *tree); +void inlines__tree_delete(struct rb_root_cached *tree); #endif /* PERF_SRCLINE_H */ -- cgit v1.2.3 From ca2270292e6c3415102242bf9dc3d05f622b7b28 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Thu, 6 Dec 2018 11:18:16 -0800 Subject: perf util: Use cached rbtree for rblists At the cost of an extra pointer, we can avoid the O(logN) cost of finding the first element in the tree (smallest node), which is something required for any of the strlist or intlist traversals (XXX_for_each_entry()). There are a number of users in perf of these (particularly strlists), including probes, and buildid. Signed-off-by: Davidlohr Bueso Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/20181206191819.30182-5-dave@stgolabs.net Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/intlist.h | 2 +- tools/perf/util/metricgroup.c | 2 +- tools/perf/util/rb_resort.h | 2 +- tools/perf/util/rblist.c | 28 ++++++++++++++++++---------- tools/perf/util/rblist.h | 2 +- tools/perf/util/stat-shadow.c | 2 +- tools/perf/util/strlist.h | 2 +- 7 files changed, 24 insertions(+), 16 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/intlist.h b/tools/perf/util/intlist.h index 85bab8735fa9..5c19ee001299 100644 --- a/tools/perf/util/intlist.h +++ b/tools/perf/util/intlist.h @@ -45,7 +45,7 @@ static inline unsigned int intlist__nr_entries(const struct intlist *ilist) /* For intlist iteration */ static inline struct int_node *intlist__first(struct intlist *ilist) { - struct rb_node *rn = rb_first(&ilist->rblist.entries); + struct rb_node *rn = rb_first_cached(&ilist->rblist.entries); return rn ? rb_entry(rn, struct int_node, rb_node) : NULL; } static inline struct int_node *intlist__next(struct int_node *in) diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index a28f9b5cc4ff..8529cbd3955b 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -352,7 +352,7 @@ void metricgroup__print(bool metrics, bool metricgroups, char *filter, else if (metrics && !raw) printf("\nMetrics:\n\n"); - for (node = rb_first(&groups.entries); node; node = next) { + for (node = rb_first_cached(&groups.entries); node; node = next) { struct mep *me = container_of(node, struct mep, nd); if (metricgroups) diff --git a/tools/perf/util/rb_resort.h b/tools/perf/util/rb_resort.h index f272e181d3d6..376e86cb4c3c 100644 --- a/tools/perf/util/rb_resort.h +++ b/tools/perf/util/rb_resort.h @@ -140,7 +140,7 @@ struct __name##_sorted *__name = __name##_sorted__new /* For 'struct intlist' */ #define DECLARE_RESORT_RB_INTLIST(__name, __ilist) \ - DECLARE_RESORT_RB(__name)(&__ilist->rblist.entries, \ + DECLARE_RESORT_RB(__name)(&__ilist->rblist.entries.rb_root, \ __ilist->rblist.nr_entries) /* For 'struct machine->threads' */ diff --git a/tools/perf/util/rblist.c b/tools/perf/util/rblist.c index 0efc3258c648..11e07fab20dc 100644 --- a/tools/perf/util/rblist.c +++ b/tools/perf/util/rblist.c @@ -13,8 +13,9 @@ int rblist__add_node(struct rblist *rblist, const void *new_entry) { - struct rb_node **p = &rblist->entries.rb_node; + struct rb_node **p = &rblist->entries.rb_root.rb_node; struct rb_node *parent = NULL, *new_node; + bool leftmost = true; while (*p != NULL) { int rc; @@ -24,8 +25,10 @@ int rblist__add_node(struct rblist *rblist, const void *new_entry) rc = rblist->node_cmp(parent, new_entry); if (rc > 0) p = &(*p)->rb_left; - else if (rc < 0) + else if (rc < 0) { p = &(*p)->rb_right; + leftmost = false; + } else return -EEXIST; } @@ -35,7 +38,7 @@ int rblist__add_node(struct rblist *rblist, const void *new_entry) return -ENOMEM; rb_link_node(new_node, parent, p); - rb_insert_color(new_node, &rblist->entries); + rb_insert_color_cached(new_node, &rblist->entries, leftmost); ++rblist->nr_entries; return 0; @@ -43,7 +46,7 @@ int rblist__add_node(struct rblist *rblist, const void *new_entry) void rblist__remove_node(struct rblist *rblist, struct rb_node *rb_node) { - rb_erase(rb_node, &rblist->entries); + rb_erase_cached(rb_node, &rblist->entries); --rblist->nr_entries; rblist->node_delete(rblist, rb_node); } @@ -52,8 +55,9 @@ static struct rb_node *__rblist__findnew(struct rblist *rblist, const void *entry, bool create) { - struct rb_node **p = &rblist->entries.rb_node; + struct rb_node **p = &rblist->entries.rb_root.rb_node; struct rb_node *parent = NULL, *new_node = NULL; + bool leftmost = true; while (*p != NULL) { int rc; @@ -63,8 +67,10 @@ static struct rb_node *__rblist__findnew(struct rblist *rblist, rc = rblist->node_cmp(parent, entry); if (rc > 0) p = &(*p)->rb_left; - else if (rc < 0) + else if (rc < 0) { p = &(*p)->rb_right; + leftmost = false; + } else return parent; } @@ -73,7 +79,8 @@ static struct rb_node *__rblist__findnew(struct rblist *rblist, new_node = rblist->node_new(rblist, entry); if (new_node) { rb_link_node(new_node, parent, p); - rb_insert_color(new_node, &rblist->entries); + rb_insert_color_cached(new_node, + &rblist->entries, leftmost); ++rblist->nr_entries; } } @@ -94,7 +101,7 @@ struct rb_node *rblist__findnew(struct rblist *rblist, const void *entry) void rblist__init(struct rblist *rblist) { if (rblist != NULL) { - rblist->entries = RB_ROOT; + rblist->entries = RB_ROOT_CACHED; rblist->nr_entries = 0; } @@ -103,7 +110,7 @@ void rblist__init(struct rblist *rblist) void rblist__exit(struct rblist *rblist) { - struct rb_node *pos, *next = rb_first(&rblist->entries); + struct rb_node *pos, *next = rb_first_cached(&rblist->entries); while (next) { pos = next; @@ -124,7 +131,8 @@ struct rb_node *rblist__entry(const struct rblist *rblist, unsigned int idx) { struct rb_node *node; - for (node = rb_first(&rblist->entries); node; node = rb_next(node)) { + for (node = rb_first_cached(&rblist->entries); node; + node = rb_next(node)) { if (!idx--) return node; } diff --git a/tools/perf/util/rblist.h b/tools/perf/util/rblist.h index 76df15c27f5f..14b232a4d0b6 100644 --- a/tools/perf/util/rblist.h +++ b/tools/perf/util/rblist.h @@ -20,7 +20,7 @@ */ struct rblist { - struct rb_root entries; + struct rb_root_cached entries; unsigned int nr_entries; int (*node_cmp)(struct rb_node *rbn, const void *entry); diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 3c22c58b3e90..83d8094be4fe 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -168,7 +168,7 @@ static void reset_stat(struct runtime_stat *st) struct rb_node *pos, *next; rblist = &st->value_list; - next = rb_first(&rblist->entries); + next = rb_first_cached(&rblist->entries); while (next) { pos = next; next = rb_next(pos); diff --git a/tools/perf/util/strlist.h b/tools/perf/util/strlist.h index d58f1e08b170..7e82c71dcc42 100644 --- a/tools/perf/util/strlist.h +++ b/tools/perf/util/strlist.h @@ -57,7 +57,7 @@ static inline unsigned int strlist__nr_entries(const struct strlist *slist) /* For strlist iteration */ static inline struct str_node *strlist__first(struct strlist *slist) { - struct rb_node *rn = rb_first(&slist->rblist.entries); + struct rb_node *rn = rb_first_cached(&slist->rblist.entries); return rn ? rb_entry(rn, struct str_node, rb_node) : NULL; } static inline struct str_node *strlist__next(struct str_node *sn) -- cgit v1.2.3 From 7137ff50b68a48bc28270c91b1c313259ab0c1c4 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Thu, 6 Dec 2018 11:18:17 -0800 Subject: perf symbols: Use cached rbtrees At the cost of an extra pointer, we can avoid the O(logN) cost of finding the first element in the tree (smallest node). Signed-off-by: Davidlohr Bueso Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/20181206191819.30182-6-dave@stgolabs.net Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-annotate.c | 2 +- tools/perf/util/dso.c | 4 +- tools/perf/util/dso.h | 6 +-- tools/perf/util/map.c | 8 ++-- tools/perf/util/probe-event.c | 3 +- tools/perf/util/symbol.c | 87 ++++++++++++++++++++++------------------ tools/perf/util/symbol.h | 13 +++--- tools/perf/util/symbol_fprintf.c | 2 +- 8 files changed, 67 insertions(+), 58 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 93d679eaf1f4..cc3da5564300 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -227,7 +227,7 @@ static int perf_evsel__add_sample(struct perf_evsel *evsel, * the DSO? */ if (al->sym != NULL) { - rb_erase(&al->sym->rb_node, + rb_erase_cached(&al->sym->rb_node, &al->map->dso->symbols); symbol__delete(al->sym); dso__reset_find_symbol_cache(al->map->dso); diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index 9ec4b2e6d4ac..a8a54115b115 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -1197,7 +1197,7 @@ struct dso *dso__new(const char *name) strcpy(dso->name, name); dso__set_long_name(dso, dso->name, false); dso__set_short_name(dso, dso->name, false); - dso->symbols = dso->symbol_names = RB_ROOT; + dso->symbols = dso->symbol_names = RB_ROOT_CACHED; dso->data.cache = RB_ROOT; dso->inlined_nodes = RB_ROOT_CACHED; dso->srclines = RB_ROOT_CACHED; @@ -1469,7 +1469,7 @@ size_t dso__fprintf(struct dso *dso, FILE *fp) ret += fprintf(fp, "%sloaded, ", dso__loaded(dso) ? "" : "NOT "); ret += dso__fprintf_buildid(dso, fp); ret += fprintf(fp, ")\n"); - for (nd = rb_first(&dso->symbols); nd; nd = rb_next(nd)) { + for (nd = rb_first_cached(&dso->symbols); nd; nd = rb_next(nd)) { struct symbol *pos = rb_entry(nd, struct symbol, rb_node); ret += symbol__fprintf(pos, fp); } diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index 40edfb375a8b..bb417c54c25a 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -141,8 +141,8 @@ struct dso { struct list_head node; struct rb_node rb_node; /* rbtree node sorted by long name */ struct rb_root *root; /* root of rbtree that rb_node is in */ - struct rb_root symbols; - struct rb_root symbol_names; + struct rb_root_cached symbols; + struct rb_root_cached symbol_names; struct rb_root_cached inlined_nodes; struct rb_root_cached srclines; struct { @@ -236,7 +236,7 @@ bool dso__loaded(const struct dso *dso); static inline bool dso__has_symbols(const struct dso *dso) { - return !RB_EMPTY_ROOT(&dso->symbols); + return !RB_EMPTY_ROOT(&dso->symbols.rb_root); } bool dso__sorted_by_name(const struct dso *dso); diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 6751301a755c..cf407cc9d915 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -286,8 +286,8 @@ void map__put(struct map *map) void map__fixup_start(struct map *map) { - struct rb_root *symbols = &map->dso->symbols; - struct rb_node *nd = rb_first(symbols); + struct rb_root_cached *symbols = &map->dso->symbols; + struct rb_node *nd = rb_first_cached(symbols); if (nd != NULL) { struct symbol *sym = rb_entry(nd, struct symbol, rb_node); map->start = sym->start; @@ -296,8 +296,8 @@ void map__fixup_start(struct map *map) void map__fixup_end(struct map *map) { - struct rb_root *symbols = &map->dso->symbols; - struct rb_node *nd = rb_last(symbols); + struct rb_root_cached *symbols = &map->dso->symbols; + struct rb_node *nd = rb_last(&symbols->rb_root); if (nd != NULL) { struct symbol *sym = rb_entry(nd, struct symbol, rb_node); map->end = sym->end; diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 736787a18b9e..4008f0cc36e5 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -3529,7 +3529,8 @@ int show_available_funcs(const char *target, struct nsinfo *nsi, /* Show all (filtered) symbols */ setup_pager(); - for (nd = rb_first(&map->dso->symbol_names); nd; nd = rb_next(nd)) { + for (nd = rb_first_cached(&map->dso->symbol_names); nd; + nd = rb_next(nd)) { struct symbol_name_rb_node *pos = rb_entry(nd, struct symbol_name_rb_node, rb_node); if (strfilter__compare(_filter, pos->sym.name)) diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 48efad6d0f90..bcbcbd610460 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -163,7 +163,7 @@ static int choose_best_symbol(struct symbol *syma, struct symbol *symb) return arch__choose_best_symbol(syma, symb); } -void symbols__fixup_duplicate(struct rb_root *symbols) +void symbols__fixup_duplicate(struct rb_root_cached *symbols) { struct rb_node *nd; struct symbol *curr, *next; @@ -171,7 +171,7 @@ void symbols__fixup_duplicate(struct rb_root *symbols) if (symbol_conf.allow_aliases) return; - nd = rb_first(symbols); + nd = rb_first_cached(symbols); while (nd) { curr = rb_entry(nd, struct symbol, rb_node); @@ -186,20 +186,20 @@ again: continue; if (choose_best_symbol(curr, next) == SYMBOL_A) { - rb_erase(&next->rb_node, symbols); + rb_erase_cached(&next->rb_node, symbols); symbol__delete(next); goto again; } else { nd = rb_next(&curr->rb_node); - rb_erase(&curr->rb_node, symbols); + rb_erase_cached(&curr->rb_node, symbols); symbol__delete(curr); } } } -void symbols__fixup_end(struct rb_root *symbols) +void symbols__fixup_end(struct rb_root_cached *symbols) { - struct rb_node *nd, *prevnd = rb_first(symbols); + struct rb_node *nd, *prevnd = rb_first_cached(symbols); struct symbol *curr, *prev; if (prevnd == NULL) @@ -282,25 +282,27 @@ void symbol__delete(struct symbol *sym) free(((void *)sym) - symbol_conf.priv_size); } -void symbols__delete(struct rb_root *symbols) +void symbols__delete(struct rb_root_cached *symbols) { struct symbol *pos; - struct rb_node *next = rb_first(symbols); + struct rb_node *next = rb_first_cached(symbols); while (next) { pos = rb_entry(next, struct symbol, rb_node); next = rb_next(&pos->rb_node); - rb_erase(&pos->rb_node, symbols); + rb_erase_cached(&pos->rb_node, symbols); symbol__delete(pos); } } -void __symbols__insert(struct rb_root *symbols, struct symbol *sym, bool kernel) +void __symbols__insert(struct rb_root_cached *symbols, + struct symbol *sym, bool kernel) { - struct rb_node **p = &symbols->rb_node; + struct rb_node **p = &symbols->rb_root.rb_node; struct rb_node *parent = NULL; const u64 ip = sym->start; struct symbol *s; + bool leftmost = true; if (kernel) { const char *name = sym->name; @@ -318,26 +320,28 @@ void __symbols__insert(struct rb_root *symbols, struct symbol *sym, bool kernel) s = rb_entry(parent, struct symbol, rb_node); if (ip < s->start) p = &(*p)->rb_left; - else + else { p = &(*p)->rb_right; + leftmost = false; + } } rb_link_node(&sym->rb_node, parent, p); - rb_insert_color(&sym->rb_node, symbols); + rb_insert_color_cached(&sym->rb_node, symbols, leftmost); } -void symbols__insert(struct rb_root *symbols, struct symbol *sym) +void symbols__insert(struct rb_root_cached *symbols, struct symbol *sym) { __symbols__insert(symbols, sym, false); } -static struct symbol *symbols__find(struct rb_root *symbols, u64 ip) +static struct symbol *symbols__find(struct rb_root_cached *symbols, u64 ip) { struct rb_node *n; if (symbols == NULL) return NULL; - n = symbols->rb_node; + n = symbols->rb_root.rb_node; while (n) { struct symbol *s = rb_entry(n, struct symbol, rb_node); @@ -353,9 +357,9 @@ static struct symbol *symbols__find(struct rb_root *symbols, u64 ip) return NULL; } -static struct symbol *symbols__first(struct rb_root *symbols) +static struct symbol *symbols__first(struct rb_root_cached *symbols) { - struct rb_node *n = rb_first(symbols); + struct rb_node *n = rb_first_cached(symbols); if (n) return rb_entry(n, struct symbol, rb_node); @@ -363,9 +367,9 @@ static struct symbol *symbols__first(struct rb_root *symbols) return NULL; } -static struct symbol *symbols__last(struct rb_root *symbols) +static struct symbol *symbols__last(struct rb_root_cached *symbols) { - struct rb_node *n = rb_last(symbols); + struct rb_node *n = rb_last(&symbols->rb_root); if (n) return rb_entry(n, struct symbol, rb_node); @@ -383,11 +387,12 @@ static struct symbol *symbols__next(struct symbol *sym) return NULL; } -static void symbols__insert_by_name(struct rb_root *symbols, struct symbol *sym) +static void symbols__insert_by_name(struct rb_root_cached *symbols, struct symbol *sym) { - struct rb_node **p = &symbols->rb_node; + struct rb_node **p = &symbols->rb_root.rb_node; struct rb_node *parent = NULL; struct symbol_name_rb_node *symn, *s; + bool leftmost = true; symn = container_of(sym, struct symbol_name_rb_node, sym); @@ -396,19 +401,21 @@ static void symbols__insert_by_name(struct rb_root *symbols, struct symbol *sym) s = rb_entry(parent, struct symbol_name_rb_node, rb_node); if (strcmp(sym->name, s->sym.name) < 0) p = &(*p)->rb_left; - else + else { p = &(*p)->rb_right; + leftmost = false; + } } rb_link_node(&symn->rb_node, parent, p); - rb_insert_color(&symn->rb_node, symbols); + rb_insert_color_cached(&symn->rb_node, symbols, leftmost); } -static void symbols__sort_by_name(struct rb_root *symbols, - struct rb_root *source) +static void symbols__sort_by_name(struct rb_root_cached *symbols, + struct rb_root_cached *source) { struct rb_node *nd; - for (nd = rb_first(source); nd; nd = rb_next(nd)) { + for (nd = rb_first_cached(source); nd; nd = rb_next(nd)) { struct symbol *pos = rb_entry(nd, struct symbol, rb_node); symbols__insert_by_name(symbols, pos); } @@ -431,7 +438,7 @@ int symbol__match_symbol_name(const char *name, const char *str, return arch__compare_symbol_names(name, str); } -static struct symbol *symbols__find_by_name(struct rb_root *symbols, +static struct symbol *symbols__find_by_name(struct rb_root_cached *symbols, const char *name, enum symbol_tag_include includes) { @@ -441,7 +448,7 @@ static struct symbol *symbols__find_by_name(struct rb_root *symbols, if (symbols == NULL) return NULL; - n = symbols->rb_node; + n = symbols->rb_root.rb_node; while (n) { int cmp; @@ -644,7 +651,7 @@ static int map__process_kallsym_symbol(void *arg, const char *name, { struct symbol *sym; struct dso *dso = arg; - struct rb_root *root = &dso->symbols; + struct rb_root_cached *root = &dso->symbols; if (!symbol_type__filter(type)) return 0; @@ -681,14 +688,14 @@ static int map_groups__split_kallsyms_for_kcore(struct map_groups *kmaps, struct struct map *curr_map; struct symbol *pos; int count = 0; - struct rb_root old_root = dso->symbols; - struct rb_root *root = &dso->symbols; - struct rb_node *next = rb_first(root); + struct rb_root_cached old_root = dso->symbols; + struct rb_root_cached *root = &dso->symbols; + struct rb_node *next = rb_first_cached(root); if (!kmaps) return -1; - *root = RB_ROOT; + *root = RB_ROOT_CACHED; while (next) { char *module; @@ -696,8 +703,8 @@ static int map_groups__split_kallsyms_for_kcore(struct map_groups *kmaps, struct pos = rb_entry(next, struct symbol, rb_node); next = rb_next(&pos->rb_node); - rb_erase_init(&pos->rb_node, &old_root); - + rb_erase_cached(&pos->rb_node, &old_root); + RB_CLEAR_NODE(&pos->rb_node); module = strchr(pos->name, '\t'); if (module) *module = '\0'; @@ -734,8 +741,8 @@ static int map_groups__split_kallsyms(struct map_groups *kmaps, struct dso *dso, struct map *curr_map = initial_map; struct symbol *pos; int count = 0, moved = 0; - struct rb_root *root = &dso->symbols; - struct rb_node *next = rb_first(root); + struct rb_root_cached *root = &dso->symbols; + struct rb_node *next = rb_first_cached(root); int kernel_range = 0; bool x86_64; @@ -849,7 +856,7 @@ static int map_groups__split_kallsyms(struct map_groups *kmaps, struct dso *dso, } add_symbol: if (curr_map != initial_map) { - rb_erase(&pos->rb_node, root); + rb_erase_cached(&pos->rb_node, root); symbols__insert(&curr_map->dso->symbols, pos); ++moved; } else @@ -857,7 +864,7 @@ add_symbol: continue; discard_symbol: - rb_erase(&pos->rb_node, root); + rb_erase_cached(&pos->rb_node, root); symbol__delete(pos); } diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 2162a5d9b3ac..56e2bcb907cc 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -68,7 +68,7 @@ struct symbol { }; void symbol__delete(struct symbol *sym); -void symbols__delete(struct rb_root *symbols); +void symbols__delete(struct rb_root_cached *symbols); /* symbols__for_each_entry - iterate over symbols (rb_root) * @@ -77,7 +77,7 @@ void symbols__delete(struct rb_root *symbols); * @nd: the 'struct rb_node *' to use as a temporary storage */ #define symbols__for_each_entry(symbols, pos, nd) \ - for (nd = rb_first(symbols); \ + for (nd = rb_first_cached(symbols); \ nd && (pos = rb_entry(nd, struct symbol, rb_node)); \ nd = rb_next(nd)) @@ -247,10 +247,11 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss); char *dso__demangle_sym(struct dso *dso, int kmodule, const char *elf_name); -void __symbols__insert(struct rb_root *symbols, struct symbol *sym, bool kernel); -void symbols__insert(struct rb_root *symbols, struct symbol *sym); -void symbols__fixup_duplicate(struct rb_root *symbols); -void symbols__fixup_end(struct rb_root *symbols); +void __symbols__insert(struct rb_root_cached *symbols, struct symbol *sym, + bool kernel); +void symbols__insert(struct rb_root_cached *symbols, struct symbol *sym); +void symbols__fixup_duplicate(struct rb_root_cached *symbols); +void symbols__fixup_end(struct rb_root_cached *symbols); void map_groups__fixup_end(struct map_groups *mg); typedef int (*mapfn_t)(u64 start, u64 len, u64 pgoff, void *data); diff --git a/tools/perf/util/symbol_fprintf.c b/tools/perf/util/symbol_fprintf.c index 50472c75da59..02e89b02c2ce 100644 --- a/tools/perf/util/symbol_fprintf.c +++ b/tools/perf/util/symbol_fprintf.c @@ -65,7 +65,7 @@ size_t dso__fprintf_symbols_by_name(struct dso *dso, struct rb_node *nd; struct symbol_name_rb_node *pos; - for (nd = rb_first(&dso->symbol_names); nd; nd = rb_next(nd)) { + for (nd = rb_first_cached(&dso->symbol_names); nd; nd = rb_next(nd)) { pos = rb_entry(nd, struct symbol_name_rb_node, rb_node); fprintf(fp, "%s\n", pos->sym.name); } -- cgit v1.2.3 From 2eb3d6894ae3b9cc8a94c91458a041c45773f23d Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Thu, 6 Dec 2018 11:18:18 -0800 Subject: perf hist: Use cached rbtrees At the cost of an extra pointer, we can avoid the O(logN) cost of finding the first element in the tree (smallest node), which is something heavily required for histograms. Specifically, the following are converted to rb_root_cached, and users accordingly: hist::entries_in_array hist::entries_in hist::entries hist::entries_collapsed hist_entry::hroot_in hist_entry::hroot_out Signed-off-by: Davidlohr Bueso Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/20181206191819.30182-7-dave@stgolabs.net [ Added some missing conversions to rb_first_cached() ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-annotate.c | 2 +- tools/perf/builtin-c2c.c | 6 +- tools/perf/builtin-diff.c | 10 +- tools/perf/builtin-top.c | 2 +- tools/perf/tests/hists_common.c | 8 +- tools/perf/tests/hists_cumulate.c | 14 +-- tools/perf/tests/hists_link.c | 8 +- tools/perf/tests/hists_output.c | 32 +++--- tools/perf/ui/browsers/hists.c | 16 +-- tools/perf/ui/gtk/hists.c | 6 +- tools/perf/ui/stdio/hist.c | 3 +- tools/perf/util/hist.c | 199 +++++++++++++++++++++----------------- tools/perf/util/hist.h | 10 +- tools/perf/util/sort.h | 4 +- 14 files changed, 173 insertions(+), 147 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index cc3da5564300..20db635347e5 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -305,7 +305,7 @@ static void hists__find_annotations(struct hists *hists, struct perf_evsel *evsel, struct perf_annotate *ann) { - struct rb_node *nd = rb_first(&hists->entries), *next; + struct rb_node *nd = rb_first_cached(&hists->entries), *next; int key = K_RIGHT; while (nd) { diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index f2863496dede..72ec0ae7d8ad 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -2088,7 +2088,7 @@ static int resort_hitm_cb(struct hist_entry *he) static int hists__iterate_cb(struct hists *hists, hists__resort_cb_t cb) { - struct rb_node *next = rb_first(&hists->entries); + struct rb_node *next = rb_first_cached(&hists->entries); int ret = 0; while (next) { @@ -2215,7 +2215,7 @@ static void print_pareto(FILE *out) if (WARN_ONCE(ret, "failed to setup sort entries\n")) return; - nd = rb_first(&c2c.hists.hists.entries); + nd = rb_first_cached(&c2c.hists.hists.entries); for (; nd; nd = rb_next(nd)) { struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node); @@ -2283,7 +2283,7 @@ static void perf_c2c__hists_fprintf(FILE *out, struct perf_session *session) static void c2c_browser__update_nr_entries(struct hist_browser *hb) { u64 nr_entries = 0; - struct rb_node *nd = rb_first(&hb->hists->entries); + struct rb_node *nd = rb_first_cached(&hb->hists->entries); while (nd) { struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node); diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index 39db2ee32d48..751e1971456b 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -429,7 +429,7 @@ get_pair_fmt(struct hist_entry *he, struct diff_hpp_fmt *dfmt) static void hists__baseline_only(struct hists *hists) { - struct rb_root *root; + struct rb_root_cached *root; struct rb_node *next; if (hists__has(hists, need_collapse)) @@ -437,13 +437,13 @@ static void hists__baseline_only(struct hists *hists) else root = hists->entries_in; - next = rb_first(root); + next = rb_first_cached(root); while (next != NULL) { struct hist_entry *he = rb_entry(next, struct hist_entry, rb_node_in); next = rb_next(&he->rb_node_in); if (!hist_entry__next_pair(he)) { - rb_erase(&he->rb_node_in, root); + rb_erase_cached(&he->rb_node_in, root); hist_entry__delete(he); } } @@ -451,7 +451,7 @@ static void hists__baseline_only(struct hists *hists) static void hists__precompute(struct hists *hists) { - struct rb_root *root; + struct rb_root_cached *root; struct rb_node *next; if (hists__has(hists, need_collapse)) @@ -459,7 +459,7 @@ static void hists__precompute(struct hists *hists) else root = hists->entries_in; - next = rb_first(root); + next = rb_first_cached(root); while (next != NULL) { struct hist_entry *he, *pair; struct data__file *d; diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 5a486d4de56e..57b1d7495d02 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -367,7 +367,7 @@ static void perf_top__prompt_symbol(struct perf_top *top, const char *msg) if (p) *p = 0; - next = rb_first(&hists->entries); + next = rb_first_cached(&hists->entries); while (next) { n = rb_entry(next, struct hist_entry, rb_node); if (n->ms.sym && !strcmp(buf, n->ms.sym->name)) { diff --git a/tools/perf/tests/hists_common.c b/tools/perf/tests/hists_common.c index b889a28fd80b..6b0649c8b33e 100644 --- a/tools/perf/tests/hists_common.c +++ b/tools/perf/tests/hists_common.c @@ -161,7 +161,7 @@ out: void print_hists_in(struct hists *hists) { int i = 0; - struct rb_root *root; + struct rb_root_cached *root; struct rb_node *node; if (hists__has(hists, need_collapse)) @@ -170,7 +170,7 @@ void print_hists_in(struct hists *hists) root = hists->entries_in; pr_info("----- %s --------\n", __func__); - node = rb_first(root); + node = rb_first_cached(root); while (node) { struct hist_entry *he; @@ -191,13 +191,13 @@ void print_hists_in(struct hists *hists) void print_hists_out(struct hists *hists) { int i = 0; - struct rb_root *root; + struct rb_root_cached *root; struct rb_node *node; root = &hists->entries; pr_info("----- %s --------\n", __func__); - node = rb_first(root); + node = rb_first_cached(root); while (node) { struct hist_entry *he; diff --git a/tools/perf/tests/hists_cumulate.c b/tools/perf/tests/hists_cumulate.c index 65fe02bebbee..4ca417d1a06b 100644 --- a/tools/perf/tests/hists_cumulate.c +++ b/tools/perf/tests/hists_cumulate.c @@ -125,8 +125,8 @@ out: static void del_hist_entries(struct hists *hists) { struct hist_entry *he; - struct rb_root *root_in; - struct rb_root *root_out; + struct rb_root_cached *root_in; + struct rb_root_cached *root_out; struct rb_node *node; if (hists__has(hists, need_collapse)) @@ -136,12 +136,12 @@ static void del_hist_entries(struct hists *hists) root_out = &hists->entries; - while (!RB_EMPTY_ROOT(root_out)) { - node = rb_first(root_out); + while (!RB_EMPTY_ROOT(&root_out->rb_root)) { + node = rb_first_cached(root_out); he = rb_entry(node, struct hist_entry, rb_node); - rb_erase(node, root_out); - rb_erase(&he->rb_node_in, root_in); + rb_erase_cached(node, root_out); + rb_erase_cached(&he->rb_node_in, root_in); hist_entry__delete(he); } } @@ -198,7 +198,7 @@ static int do_test(struct hists *hists, struct result *expected, size_t nr_expec print_hists_out(hists); } - root = &hists->entries; + root = &hists->entries.rb_root; for (node = rb_first(root), i = 0; node && (he = rb_entry(node, struct hist_entry, rb_node)); node = rb_next(node), i++) { diff --git a/tools/perf/tests/hists_link.c b/tools/perf/tests/hists_link.c index 9a9d06cb0222..af633db63f4d 100644 --- a/tools/perf/tests/hists_link.c +++ b/tools/perf/tests/hists_link.c @@ -142,7 +142,7 @@ static int find_sample(struct sample *samples, size_t nr_samples, static int __validate_match(struct hists *hists) { size_t count = 0; - struct rb_root *root; + struct rb_root_cached *root; struct rb_node *node; /* @@ -153,7 +153,7 @@ static int __validate_match(struct hists *hists) else root = hists->entries_in; - node = rb_first(root); + node = rb_first_cached(root); while (node) { struct hist_entry *he; @@ -192,7 +192,7 @@ static int __validate_link(struct hists *hists, int idx) size_t count = 0; size_t count_pair = 0; size_t count_dummy = 0; - struct rb_root *root; + struct rb_root_cached *root; struct rb_node *node; /* @@ -205,7 +205,7 @@ static int __validate_link(struct hists *hists, int idx) else root = hists->entries_in; - node = rb_first(root); + node = rb_first_cached(root); while (node) { struct hist_entry *he; diff --git a/tools/perf/tests/hists_output.c b/tools/perf/tests/hists_output.c index faacb4f41460..6c4bc68a77ee 100644 --- a/tools/perf/tests/hists_output.c +++ b/tools/perf/tests/hists_output.c @@ -91,8 +91,8 @@ out: static void del_hist_entries(struct hists *hists) { struct hist_entry *he; - struct rb_root *root_in; - struct rb_root *root_out; + struct rb_root_cached *root_in; + struct rb_root_cached *root_out; struct rb_node *node; if (hists__has(hists, need_collapse)) @@ -102,12 +102,12 @@ static void del_hist_entries(struct hists *hists) root_out = &hists->entries; - while (!RB_EMPTY_ROOT(root_out)) { - node = rb_first(root_out); + while (!RB_EMPTY_ROOT(&root_out->rb_root)) { + node = rb_first_cached(root_out); he = rb_entry(node, struct hist_entry, rb_node); - rb_erase(node, root_out); - rb_erase(&he->rb_node_in, root_in); + rb_erase_cached(node, root_out); + rb_erase_cached(&he->rb_node_in, root_in); hist_entry__delete(he); } } @@ -126,7 +126,7 @@ static int test1(struct perf_evsel *evsel, struct machine *machine) int err; struct hists *hists = evsel__hists(evsel); struct hist_entry *he; - struct rb_root *root; + struct rb_root_cached *root; struct rb_node *node; field_order = NULL; @@ -162,7 +162,7 @@ static int test1(struct perf_evsel *evsel, struct machine *machine) } root = &hists->entries; - node = rb_first(root); + node = rb_first_cached(root); he = rb_entry(node, struct hist_entry, rb_node); TEST_ASSERT_VAL("Invalid hist entry", !strcmp(COMM(he), "perf") && !strcmp(DSO(he), "perf") && @@ -228,7 +228,7 @@ static int test2(struct perf_evsel *evsel, struct machine *machine) int err; struct hists *hists = evsel__hists(evsel); struct hist_entry *he; - struct rb_root *root; + struct rb_root_cached *root; struct rb_node *node; field_order = "overhead,cpu"; @@ -262,7 +262,7 @@ static int test2(struct perf_evsel *evsel, struct machine *machine) } root = &hists->entries; - node = rb_first(root); + node = rb_first_cached(root); he = rb_entry(node, struct hist_entry, rb_node); TEST_ASSERT_VAL("Invalid hist entry", CPU(he) == 1 && PID(he) == 100 && he->stat.period == 300); @@ -284,7 +284,7 @@ static int test3(struct perf_evsel *evsel, struct machine *machine) int err; struct hists *hists = evsel__hists(evsel); struct hist_entry *he; - struct rb_root *root; + struct rb_root_cached *root; struct rb_node *node; field_order = "comm,overhead,dso"; @@ -316,7 +316,7 @@ static int test3(struct perf_evsel *evsel, struct machine *machine) } root = &hists->entries; - node = rb_first(root); + node = rb_first_cached(root); he = rb_entry(node, struct hist_entry, rb_node); TEST_ASSERT_VAL("Invalid hist entry", !strcmp(COMM(he), "bash") && !strcmp(DSO(he), "bash") && @@ -358,7 +358,7 @@ static int test4(struct perf_evsel *evsel, struct machine *machine) int err; struct hists *hists = evsel__hists(evsel); struct hist_entry *he; - struct rb_root *root; + struct rb_root_cached *root; struct rb_node *node; field_order = "dso,sym,comm,overhead,dso"; @@ -394,7 +394,7 @@ static int test4(struct perf_evsel *evsel, struct machine *machine) } root = &hists->entries; - node = rb_first(root); + node = rb_first_cached(root); he = rb_entry(node, struct hist_entry, rb_node); TEST_ASSERT_VAL("Invalid hist entry", !strcmp(DSO(he), "perf") && !strcmp(SYM(he), "cmd_record") && @@ -460,7 +460,7 @@ static int test5(struct perf_evsel *evsel, struct machine *machine) int err; struct hists *hists = evsel__hists(evsel); struct hist_entry *he; - struct rb_root *root; + struct rb_root_cached *root; struct rb_node *node; field_order = "cpu,pid,comm,dso,sym"; @@ -497,7 +497,7 @@ static int test5(struct perf_evsel *evsel, struct machine *machine) } root = &hists->entries; - node = rb_first(root); + node = rb_first_cached(root); he = rb_entry(node, struct hist_entry, rb_node); TEST_ASSERT_VAL("Invalid hist entry", diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index b32f505a7608..85790a4d1842 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -49,7 +49,7 @@ static int hist_browser__get_folding(struct hist_browser *browser) struct hists *hists = browser->hists; int unfolded_rows = 0; - for (nd = rb_first(&hists->entries); + for (nd = rb_first_cached(&hists->entries); (nd = hists__filter_entries(nd, browser->min_pcnt)) != NULL; nd = rb_hierarchy_next(nd)) { struct hist_entry *he = @@ -267,7 +267,7 @@ static int hierarchy_count_rows(struct hist_browser *hb, struct hist_entry *he, if (he->has_no_entry) return 1; - node = rb_first(&he->hroot_out); + node = rb_first_cached(&he->hroot_out); while (node) { float percent; @@ -372,7 +372,7 @@ static void hist_entry__init_have_children(struct hist_entry *he) he->has_children = !RB_EMPTY_ROOT(&he->sorted_chain); callchain__init_have_children(&he->sorted_chain); } else { - he->has_children = !RB_EMPTY_ROOT(&he->hroot_out); + he->has_children = !RB_EMPTY_ROOT(&he->hroot_out.rb_root); } he->init_have_children = true; @@ -508,7 +508,7 @@ static int hierarchy_set_folding(struct hist_browser *hb, struct hist_entry *he, struct hist_entry *child; int n = 0; - for (nd = rb_first(&he->hroot_out); nd; nd = rb_next(nd)) { + for (nd = rb_first_cached(&he->hroot_out); nd; nd = rb_next(nd)) { child = rb_entry(nd, struct hist_entry, rb_node); percent = hist_entry__get_percent_limit(child); if (!child->filtered && percent >= hb->min_pcnt) @@ -566,7 +566,7 @@ __hist_browser__set_folding(struct hist_browser *browser, bool unfold) struct rb_node *nd; struct hist_entry *he; - nd = rb_first(&browser->hists->entries); + nd = rb_first_cached(&browser->hists->entries); while (nd) { he = rb_entry(nd, struct hist_entry, rb_node); @@ -1738,7 +1738,7 @@ static void ui_browser__hists_init_top(struct ui_browser *browser) struct hist_browser *hb; hb = container_of(browser, struct hist_browser, b); - browser->top = rb_first(&hb->hists->entries); + browser->top = rb_first_cached(&hb->hists->entries); } } @@ -2649,7 +2649,7 @@ add_socket_opt(struct hist_browser *browser, struct popup_action *act, static void hist_browser__update_nr_entries(struct hist_browser *hb) { u64 nr_entries = 0; - struct rb_node *nd = rb_first(&hb->hists->entries); + struct rb_node *nd = rb_first_cached(&hb->hists->entries); if (hb->min_pcnt == 0 && !symbol_conf.report_hierarchy) { hb->nr_non_filtered_entries = hb->hists->nr_non_filtered_entries; @@ -2669,7 +2669,7 @@ static void hist_browser__update_percent_limit(struct hist_browser *hb, double percent) { struct hist_entry *he; - struct rb_node *nd = rb_first(&hb->hists->entries); + struct rb_node *nd = rb_first_cached(&hb->hists->entries); u64 total = hists__total_period(hb->hists); u64 min_callchain_hits = total * (percent / 100); diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c index 4ab663ec3e5e..74414ccd8c84 100644 --- a/tools/perf/ui/gtk/hists.c +++ b/tools/perf/ui/gtk/hists.c @@ -353,7 +353,7 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists, g_object_unref(GTK_TREE_MODEL(store)); - for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) { + for (nd = rb_first_cached(&hists->entries); nd; nd = rb_next(nd)) { struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); GtkTreeIter iter; u64 total = hists__total_period(h->hists); @@ -401,7 +401,7 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists, } static void perf_gtk__add_hierarchy_entries(struct hists *hists, - struct rb_root *root, + struct rb_root_cached *root, GtkTreeStore *store, GtkTreeIter *parent, struct perf_hpp *hpp, @@ -415,7 +415,7 @@ static void perf_gtk__add_hierarchy_entries(struct hists *hists, u64 total = hists__total_period(hists); int size; - for (node = rb_first(root); node; node = rb_next(node)) { + for (node = rb_first_cached(root); node; node = rb_next(node)) { GtkTreeIter iter; float percent; char *bf; diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c index 74c4ae1f0a05..2a9fa4dcbc2a 100644 --- a/tools/perf/ui/stdio/hist.c +++ b/tools/perf/ui/stdio/hist.c @@ -788,7 +788,8 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows, indent = hists__overhead_width(hists) + 4; - for (nd = rb_first(&hists->entries); nd; nd = __rb_hierarchy_next(nd, HMD_FORCE_CHILD)) { + for (nd = rb_first_cached(&hists->entries); nd; + nd = __rb_hierarchy_next(nd, HMD_FORCE_CHILD)) { struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); float percent; diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 8aad8330e392..9e7a8e044a0a 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -209,7 +209,7 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h) void hists__output_recalc_col_len(struct hists *hists, int max_rows) { - struct rb_node *next = rb_first(&hists->entries); + struct rb_node *next = rb_first_cached(&hists->entries); struct hist_entry *n; int row = 0; @@ -296,7 +296,7 @@ static bool hists__decay_entry(struct hists *hists, struct hist_entry *he) if (!he->leaf) { struct hist_entry *child; - struct rb_node *node = rb_first(&he->hroot_out); + struct rb_node *node = rb_first_cached(&he->hroot_out); while (node) { child = rb_entry(node, struct hist_entry, rb_node); node = rb_next(node); @@ -311,8 +311,8 @@ static bool hists__decay_entry(struct hists *hists, struct hist_entry *he) static void hists__delete_entry(struct hists *hists, struct hist_entry *he) { - struct rb_root *root_in; - struct rb_root *root_out; + struct rb_root_cached *root_in; + struct rb_root_cached *root_out; if (he->parent_he) { root_in = &he->parent_he->hroot_in; @@ -325,8 +325,8 @@ static void hists__delete_entry(struct hists *hists, struct hist_entry *he) root_out = &hists->entries; } - rb_erase(&he->rb_node_in, root_in); - rb_erase(&he->rb_node, root_out); + rb_erase_cached(&he->rb_node_in, root_in); + rb_erase_cached(&he->rb_node, root_out); --hists->nr_entries; if (!he->filtered) @@ -337,7 +337,7 @@ static void hists__delete_entry(struct hists *hists, struct hist_entry *he) void hists__decay_entries(struct hists *hists, bool zap_user, bool zap_kernel) { - struct rb_node *next = rb_first(&hists->entries); + struct rb_node *next = rb_first_cached(&hists->entries); struct hist_entry *n; while (next) { @@ -353,7 +353,7 @@ void hists__decay_entries(struct hists *hists, bool zap_user, bool zap_kernel) void hists__delete_entries(struct hists *hists) { - struct rb_node *next = rb_first(&hists->entries); + struct rb_node *next = rb_first_cached(&hists->entries); struct hist_entry *n; while (next) { @@ -435,8 +435,8 @@ static int hist_entry__init(struct hist_entry *he, } INIT_LIST_HEAD(&he->pairs.node); thread__get(he->thread); - he->hroot_in = RB_ROOT; - he->hroot_out = RB_ROOT; + he->hroot_in = RB_ROOT_CACHED; + he->hroot_out = RB_ROOT_CACHED; if (!symbol_conf.report_hierarchy) he->leaf = true; @@ -513,8 +513,9 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists, int64_t cmp; u64 period = entry->stat.period; u64 weight = entry->stat.weight; + bool leftmost = true; - p = &hists->entries_in->rb_node; + p = &hists->entries_in->rb_root.rb_node; while (*p != NULL) { parent = *p; @@ -557,8 +558,10 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists, if (cmp < 0) p = &(*p)->rb_left; - else + else { p = &(*p)->rb_right; + leftmost = false; + } } he = hist_entry__new(entry, sample_self); @@ -570,7 +573,7 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists, hists->nr_entries++; rb_link_node(&he->rb_node_in, parent, p); - rb_insert_color(&he->rb_node_in, hists->entries_in); + rb_insert_color_cached(&he->rb_node_in, hists->entries_in, leftmost); out: if (sample_self) he_stat__add_cpumode_period(&he->stat, al->cpumode, period); @@ -1279,16 +1282,17 @@ static void hist_entry__apply_hierarchy_filters(struct hist_entry *he) } static struct hist_entry *hierarchy_insert_entry(struct hists *hists, - struct rb_root *root, + struct rb_root_cached *root, struct hist_entry *he, struct hist_entry *parent_he, struct perf_hpp_list *hpp_list) { - struct rb_node **p = &root->rb_node; + struct rb_node **p = &root->rb_root.rb_node; struct rb_node *parent = NULL; struct hist_entry *iter, *new; struct perf_hpp_fmt *fmt; int64_t cmp; + bool leftmost = true; while (*p != NULL) { parent = *p; @@ -1308,8 +1312,10 @@ static struct hist_entry *hierarchy_insert_entry(struct hists *hists, if (cmp < 0) p = &parent->rb_left; - else + else { p = &parent->rb_right; + leftmost = false; + } } new = hist_entry__new(he, true); @@ -1343,12 +1349,12 @@ static struct hist_entry *hierarchy_insert_entry(struct hists *hists, } rb_link_node(&new->rb_node_in, parent, p); - rb_insert_color(&new->rb_node_in, root); + rb_insert_color_cached(&new->rb_node_in, root, leftmost); return new; } static int hists__hierarchy_insert_entry(struct hists *hists, - struct rb_root *root, + struct rb_root_cached *root, struct hist_entry *he) { struct perf_hpp_list_node *node; @@ -1395,13 +1401,14 @@ static int hists__hierarchy_insert_entry(struct hists *hists, } static int hists__collapse_insert_entry(struct hists *hists, - struct rb_root *root, + struct rb_root_cached *root, struct hist_entry *he) { - struct rb_node **p = &root->rb_node; + struct rb_node **p = &root->rb_root.rb_node; struct rb_node *parent = NULL; struct hist_entry *iter; int64_t cmp; + bool leftmost = true; if (symbol_conf.report_hierarchy) return hists__hierarchy_insert_entry(hists, root, he); @@ -1432,19 +1439,21 @@ static int hists__collapse_insert_entry(struct hists *hists, if (cmp < 0) p = &(*p)->rb_left; - else + else { p = &(*p)->rb_right; + leftmost = false; + } } hists->nr_entries++; rb_link_node(&he->rb_node_in, parent, p); - rb_insert_color(&he->rb_node_in, root); + rb_insert_color_cached(&he->rb_node_in, root, leftmost); return 1; } -struct rb_root *hists__get_rotate_entries_in(struct hists *hists) +struct rb_root_cached *hists__get_rotate_entries_in(struct hists *hists) { - struct rb_root *root; + struct rb_root_cached *root; pthread_mutex_lock(&hists->lock); @@ -1467,7 +1476,7 @@ static void hists__apply_filters(struct hists *hists, struct hist_entry *he) int hists__collapse_resort(struct hists *hists, struct ui_progress *prog) { - struct rb_root *root; + struct rb_root_cached *root; struct rb_node *next; struct hist_entry *n; int ret; @@ -1479,7 +1488,7 @@ int hists__collapse_resort(struct hists *hists, struct ui_progress *prog) root = hists__get_rotate_entries_in(hists); - next = rb_first(root); + next = rb_first_cached(root); while (next) { if (session_done()) @@ -1487,7 +1496,7 @@ int hists__collapse_resort(struct hists *hists, struct ui_progress *prog) n = rb_entry(next, struct hist_entry, rb_node_in); next = rb_next(&n->rb_node_in); - rb_erase(&n->rb_node_in, root); + rb_erase_cached(&n->rb_node_in, root); ret = hists__collapse_insert_entry(hists, &hists->entries_collapsed, n); if (ret < 0) return -1; @@ -1558,7 +1567,7 @@ static void hierarchy_recalc_total_periods(struct hists *hists) struct rb_node *node; struct hist_entry *he; - node = rb_first(&hists->entries); + node = rb_first_cached(&hists->entries); hists->stats.total_period = 0; hists->stats.total_non_filtered_period = 0; @@ -1578,13 +1587,14 @@ static void hierarchy_recalc_total_periods(struct hists *hists) } } -static void hierarchy_insert_output_entry(struct rb_root *root, +static void hierarchy_insert_output_entry(struct rb_root_cached *root, struct hist_entry *he) { - struct rb_node **p = &root->rb_node; + struct rb_node **p = &root->rb_root.rb_node; struct rb_node *parent = NULL; struct hist_entry *iter; struct perf_hpp_fmt *fmt; + bool leftmost = true; while (*p != NULL) { parent = *p; @@ -1592,12 +1602,14 @@ static void hierarchy_insert_output_entry(struct rb_root *root, if (hist_entry__sort(he, iter) > 0) p = &parent->rb_left; - else + else { p = &parent->rb_right; + leftmost = false; + } } rb_link_node(&he->rb_node, parent, p); - rb_insert_color(&he->rb_node, root); + rb_insert_color_cached(&he->rb_node, root, leftmost); /* update column width of dynamic entry */ perf_hpp_list__for_each_sort_list(he->hpp_list, fmt) { @@ -1608,16 +1620,16 @@ static void hierarchy_insert_output_entry(struct rb_root *root, static void hists__hierarchy_output_resort(struct hists *hists, struct ui_progress *prog, - struct rb_root *root_in, - struct rb_root *root_out, + struct rb_root_cached *root_in, + struct rb_root_cached *root_out, u64 min_callchain_hits, bool use_callchain) { struct rb_node *node; struct hist_entry *he; - *root_out = RB_ROOT; - node = rb_first(root_in); + *root_out = RB_ROOT_CACHED; + node = rb_first_cached(root_in); while (node) { he = rb_entry(node, struct hist_entry, rb_node_in); @@ -1660,15 +1672,16 @@ static void hists__hierarchy_output_resort(struct hists *hists, } } -static void __hists__insert_output_entry(struct rb_root *entries, +static void __hists__insert_output_entry(struct rb_root_cached *entries, struct hist_entry *he, u64 min_callchain_hits, bool use_callchain) { - struct rb_node **p = &entries->rb_node; + struct rb_node **p = &entries->rb_root.rb_node; struct rb_node *parent = NULL; struct hist_entry *iter; struct perf_hpp_fmt *fmt; + bool leftmost = true; if (use_callchain) { if (callchain_param.mode == CHAIN_GRAPH_REL) { @@ -1689,12 +1702,14 @@ static void __hists__insert_output_entry(struct rb_root *entries, if (hist_entry__sort(he, iter) > 0) p = &(*p)->rb_left; - else + else { p = &(*p)->rb_right; + leftmost = false; + } } rb_link_node(&he->rb_node, parent, p); - rb_insert_color(&he->rb_node, entries); + rb_insert_color_cached(&he->rb_node, entries, leftmost); perf_hpp_list__for_each_sort_list(&perf_hpp_list, fmt) { if (perf_hpp__is_dynamic_entry(fmt) && @@ -1706,7 +1721,7 @@ static void __hists__insert_output_entry(struct rb_root *entries, static void output_resort(struct hists *hists, struct ui_progress *prog, bool use_callchain, hists__resort_cb_t cb) { - struct rb_root *root; + struct rb_root_cached *root; struct rb_node *next; struct hist_entry *n; u64 callchain_total; @@ -1736,8 +1751,8 @@ static void output_resort(struct hists *hists, struct ui_progress *prog, else root = hists->entries_in; - next = rb_first(root); - hists->entries = RB_ROOT; + next = rb_first_cached(root); + hists->entries = RB_ROOT_CACHED; while (next) { n = rb_entry(next, struct hist_entry, rb_node_in); @@ -1798,7 +1813,7 @@ struct rb_node *rb_hierarchy_last(struct rb_node *node) struct hist_entry *he = rb_entry(node, struct hist_entry, rb_node); while (can_goto_child(he, HMD_NORMAL)) { - node = rb_last(&he->hroot_out); + node = rb_last(&he->hroot_out.rb_root); he = rb_entry(node, struct hist_entry, rb_node); } return node; @@ -1809,7 +1824,7 @@ struct rb_node *__rb_hierarchy_next(struct rb_node *node, enum hierarchy_move_di struct hist_entry *he = rb_entry(node, struct hist_entry, rb_node); if (can_goto_child(he, hmd)) - node = rb_first(&he->hroot_out); + node = rb_first_cached(&he->hroot_out); else node = rb_next(node); @@ -1847,7 +1862,7 @@ bool hist_entry__has_hierarchy_children(struct hist_entry *he, float limit) if (he->leaf) return false; - node = rb_first(&he->hroot_out); + node = rb_first_cached(&he->hroot_out); child = rb_entry(node, struct hist_entry, rb_node); while (node && child->filtered) { @@ -1965,7 +1980,7 @@ static void hists__filter_by_type(struct hists *hists, int type, filter_fn_t fil hists__reset_filter_stats(hists); hists__reset_col_len(hists); - for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) { + for (nd = rb_first_cached(&hists->entries); nd; nd = rb_next(nd)) { struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); if (filter(hists, h)) @@ -1975,13 +1990,15 @@ static void hists__filter_by_type(struct hists *hists, int type, filter_fn_t fil } } -static void resort_filtered_entry(struct rb_root *root, struct hist_entry *he) +static void resort_filtered_entry(struct rb_root_cached *root, + struct hist_entry *he) { - struct rb_node **p = &root->rb_node; + struct rb_node **p = &root->rb_root.rb_node; struct rb_node *parent = NULL; struct hist_entry *iter; - struct rb_root new_root = RB_ROOT; + struct rb_root_cached new_root = RB_ROOT_CACHED; struct rb_node *nd; + bool leftmost = true; while (*p != NULL) { parent = *p; @@ -1989,22 +2006,24 @@ static void resort_filtered_entry(struct rb_root *root, struct hist_entry *he) if (hist_entry__sort(he, iter) > 0) p = &(*p)->rb_left; - else + else { p = &(*p)->rb_right; + leftmost = false; + } } rb_link_node(&he->rb_node, parent, p); - rb_insert_color(&he->rb_node, root); + rb_insert_color_cached(&he->rb_node, root, leftmost); if (he->leaf || he->filtered) return; - nd = rb_first(&he->hroot_out); + nd = rb_first_cached(&he->hroot_out); while (nd) { struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); nd = rb_next(nd); - rb_erase(&h->rb_node, &he->hroot_out); + rb_erase_cached(&h->rb_node, &he->hroot_out); resort_filtered_entry(&new_root, h); } @@ -2015,14 +2034,14 @@ static void resort_filtered_entry(struct rb_root *root, struct hist_entry *he) static void hists__filter_hierarchy(struct hists *hists, int type, const void *arg) { struct rb_node *nd; - struct rb_root new_root = RB_ROOT; + struct rb_root_cached new_root = RB_ROOT_CACHED; hists->stats.nr_non_filtered_samples = 0; hists__reset_filter_stats(hists); hists__reset_col_len(hists); - nd = rb_first(&hists->entries); + nd = rb_first_cached(&hists->entries); while (nd) { struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); int ret; @@ -2066,12 +2085,12 @@ static void hists__filter_hierarchy(struct hists *hists, int type, const void *a * resort output after applying a new filter since filter in a lower * hierarchy can change periods in a upper hierarchy. */ - nd = rb_first(&hists->entries); + nd = rb_first_cached(&hists->entries); while (nd) { struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); nd = rb_next(nd); - rb_erase(&h->rb_node, &hists->entries); + rb_erase_cached(&h->rb_node, &hists->entries); resort_filtered_entry(&new_root, h); } @@ -2140,18 +2159,19 @@ void hists__inc_nr_samples(struct hists *hists, bool filtered) static struct hist_entry *hists__add_dummy_entry(struct hists *hists, struct hist_entry *pair) { - struct rb_root *root; + struct rb_root_cached *root; struct rb_node **p; struct rb_node *parent = NULL; struct hist_entry *he; int64_t cmp; + bool leftmost = true; if (hists__has(hists, need_collapse)) root = &hists->entries_collapsed; else root = hists->entries_in; - p = &root->rb_node; + p = &root->rb_root.rb_node; while (*p != NULL) { parent = *p; @@ -2164,8 +2184,10 @@ static struct hist_entry *hists__add_dummy_entry(struct hists *hists, if (cmp < 0) p = &(*p)->rb_left; - else + else { p = &(*p)->rb_right; + leftmost = false; + } } he = hist_entry__new(pair, true); @@ -2175,7 +2197,7 @@ static struct hist_entry *hists__add_dummy_entry(struct hists *hists, if (symbol_conf.cumulate_callchain) memset(he->stat_acc, 0, sizeof(he->stat)); rb_link_node(&he->rb_node_in, parent, p); - rb_insert_color(&he->rb_node_in, root); + rb_insert_color_cached(&he->rb_node_in, root, leftmost); hists__inc_stats(hists, he); he->dummy = true; } @@ -2184,15 +2206,16 @@ out: } static struct hist_entry *add_dummy_hierarchy_entry(struct hists *hists, - struct rb_root *root, + struct rb_root_cached *root, struct hist_entry *pair) { struct rb_node **p; struct rb_node *parent = NULL; struct hist_entry *he; struct perf_hpp_fmt *fmt; + bool leftmost = true; - p = &root->rb_node; + p = &root->rb_root.rb_node; while (*p != NULL) { int64_t cmp = 0; @@ -2209,14 +2232,16 @@ static struct hist_entry *add_dummy_hierarchy_entry(struct hists *hists, if (cmp < 0) p = &parent->rb_left; - else + else { p = &parent->rb_right; + leftmost = false; + } } he = hist_entry__new(pair, true); if (he) { rb_link_node(&he->rb_node_in, parent, p); - rb_insert_color(&he->rb_node_in, root); + rb_insert_color_cached(&he->rb_node_in, root, leftmost); he->dummy = true; he->hists = hists; @@ -2233,9 +2258,9 @@ static struct hist_entry *hists__find_entry(struct hists *hists, struct rb_node *n; if (hists__has(hists, need_collapse)) - n = hists->entries_collapsed.rb_node; + n = hists->entries_collapsed.rb_root.rb_node; else - n = hists->entries_in->rb_node; + n = hists->entries_in->rb_root.rb_node; while (n) { struct hist_entry *iter = rb_entry(n, struct hist_entry, rb_node_in); @@ -2252,10 +2277,10 @@ static struct hist_entry *hists__find_entry(struct hists *hists, return NULL; } -static struct hist_entry *hists__find_hierarchy_entry(struct rb_root *root, +static struct hist_entry *hists__find_hierarchy_entry(struct rb_root_cached *root, struct hist_entry *he) { - struct rb_node *n = root->rb_node; + struct rb_node *n = root->rb_root.rb_node; while (n) { struct hist_entry *iter; @@ -2280,13 +2305,13 @@ static struct hist_entry *hists__find_hierarchy_entry(struct rb_root *root, return NULL; } -static void hists__match_hierarchy(struct rb_root *leader_root, - struct rb_root *other_root) +static void hists__match_hierarchy(struct rb_root_cached *leader_root, + struct rb_root_cached *other_root) { struct rb_node *nd; struct hist_entry *pos, *pair; - for (nd = rb_first(leader_root); nd; nd = rb_next(nd)) { + for (nd = rb_first_cached(leader_root); nd; nd = rb_next(nd)) { pos = rb_entry(nd, struct hist_entry, rb_node_in); pair = hists__find_hierarchy_entry(other_root, pos); @@ -2302,7 +2327,7 @@ static void hists__match_hierarchy(struct rb_root *leader_root, */ void hists__match(struct hists *leader, struct hists *other) { - struct rb_root *root; + struct rb_root_cached *root; struct rb_node *nd; struct hist_entry *pos, *pair; @@ -2317,7 +2342,7 @@ void hists__match(struct hists *leader, struct hists *other) else root = leader->entries_in; - for (nd = rb_first(root); nd; nd = rb_next(nd)) { + for (nd = rb_first_cached(root); nd; nd = rb_next(nd)) { pos = rb_entry(nd, struct hist_entry, rb_node_in); pair = hists__find_entry(other, pos); @@ -2328,13 +2353,13 @@ void hists__match(struct hists *leader, struct hists *other) static int hists__link_hierarchy(struct hists *leader_hists, struct hist_entry *parent, - struct rb_root *leader_root, - struct rb_root *other_root) + struct rb_root_cached *leader_root, + struct rb_root_cached *other_root) { struct rb_node *nd; struct hist_entry *pos, *leader; - for (nd = rb_first(other_root); nd; nd = rb_next(nd)) { + for (nd = rb_first_cached(other_root); nd; nd = rb_next(nd)) { pos = rb_entry(nd, struct hist_entry, rb_node_in); if (hist_entry__has_pairs(pos)) { @@ -2377,7 +2402,7 @@ static int hists__link_hierarchy(struct hists *leader_hists, */ int hists__link(struct hists *leader, struct hists *other) { - struct rb_root *root; + struct rb_root_cached *root; struct rb_node *nd; struct hist_entry *pos, *pair; @@ -2393,7 +2418,7 @@ int hists__link(struct hists *leader, struct hists *other) else root = other->entries_in; - for (nd = rb_first(root); nd; nd = rb_next(nd)) { + for (nd = rb_first_cached(root); nd; nd = rb_next(nd)) { pos = rb_entry(nd, struct hist_entry, rb_node_in); if (!hist_entry__has_pairs(pos)) { @@ -2566,10 +2591,10 @@ int perf_hist_config(const char *var, const char *value) int __hists__init(struct hists *hists, struct perf_hpp_list *hpp_list) { memset(hists, 0, sizeof(*hists)); - hists->entries_in_array[0] = hists->entries_in_array[1] = RB_ROOT; + hists->entries_in_array[0] = hists->entries_in_array[1] = RB_ROOT_CACHED; hists->entries_in = &hists->entries_in_array[0]; - hists->entries_collapsed = RB_ROOT; - hists->entries = RB_ROOT; + hists->entries_collapsed = RB_ROOT_CACHED; + hists->entries = RB_ROOT_CACHED; pthread_mutex_init(&hists->lock, NULL); hists->socket_filter = -1; hists->hpp_list = hpp_list; @@ -2577,14 +2602,14 @@ int __hists__init(struct hists *hists, struct perf_hpp_list *hpp_list) return 0; } -static void hists__delete_remaining_entries(struct rb_root *root) +static void hists__delete_remaining_entries(struct rb_root_cached *root) { struct rb_node *node; struct hist_entry *he; - while (!RB_EMPTY_ROOT(root)) { - node = rb_first(root); - rb_erase(node, root); + while (!RB_EMPTY_ROOT(&root->rb_root)) { + node = rb_first_cached(root); + rb_erase_cached(node, root); he = rb_entry(node, struct hist_entry, rb_node_in); hist_entry__delete(he); diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 664b5eda8d51..08267af7439c 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -70,10 +70,10 @@ struct thread; struct dso; struct hists { - struct rb_root entries_in_array[2]; - struct rb_root *entries_in; - struct rb_root entries; - struct rb_root entries_collapsed; + struct rb_root_cached entries_in_array[2]; + struct rb_root_cached *entries_in; + struct rb_root_cached entries; + struct rb_root_cached entries_collapsed; u64 nr_entries; u64 nr_non_filtered_entries; u64 callchain_period; @@ -230,7 +230,7 @@ static __pure inline bool hists__has_callchains(struct hists *hists) int hists__init(void); int __hists__init(struct hists *hists, struct perf_hpp_list *hpp_list); -struct rb_root *hists__get_rotate_entries_in(struct hists *hists); +struct rb_root_cached *hists__get_rotate_entries_in(struct hists *hists); struct perf_hpp { char *buf; diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 130fe37fe2df..dd6312876492 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -145,8 +145,8 @@ struct hist_entry { union { /* this is for hierarchical entry structure */ struct { - struct rb_root hroot_in; - struct rb_root hroot_out; + struct rb_root_cached hroot_in; + struct rb_root_cached hroot_out; }; /* non-leaf entries */ struct rb_root sorted_chain; /* leaf entry has callchains */ }; -- cgit v1.2.3 From 811184fb6977bb02c21512d8af6a613a7ebce329 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Tue, 22 Jan 2019 13:02:18 -0800 Subject: perf bpf: Fix synthesized PERF_RECORD_KSYMBOL/BPF_EVENT Added missing machine->id_hdr_size to event->header.size. Also fixed size of PERF_RECORD_KSYMBOL by removing extra bytes for name. Committer notes: We need to malloc that extra machine->id_hdr_size at the start of perf_event__synthesize_bpf_events() and also need to cast the event to (void *) otherwise we segfault, fix it. Reported-by: Arnaldo Carvalho de Melo Suggested-by: Jiri Olsa Signed-off-by: Song Liu Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Peter Zijlstra Cc: kernel-team@fb.com Fixes: 7b612e291a5a ("perf tools: Synthesize PERF_RECORD_* for loaded BPF programs") Link: http://lkml.kernel.org/r/20190122210218.358664-1-songliubraving@fb.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/bpf-event.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c index 01e1dc1bb7fb..796ef793f4ce 100644 --- a/tools/perf/util/bpf-event.c +++ b/tools/perf/util/bpf-event.c @@ -7,6 +7,7 @@ #include "bpf-event.h" #include "debug.h" #include "symbol.h" +#include "machine.h" #define ptr_to_u64(ptr) ((__u64)(unsigned long)(ptr)) @@ -149,7 +150,7 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_tool *tool, *ksymbol_event = (struct ksymbol_event){ .header = { .type = PERF_RECORD_KSYMBOL, - .size = sizeof(struct ksymbol_event), + .size = offsetof(struct ksymbol_event, name), }, .addr = prog_addrs[i], .len = prog_lens[i], @@ -178,6 +179,9 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_tool *tool, ksymbol_event->header.size += PERF_ALIGN(name_len + 1, sizeof(u64)); + + memset((void *)event + event->header.size, 0, machine->id_hdr_size); + event->header.size += machine->id_hdr_size; err = perf_tool__process_synth_event(tool, event, machine, process); } @@ -194,6 +198,8 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_tool *tool, .id = info.id, }; memcpy(bpf_event->tag, prog_tags[i], BPF_TAG_SIZE); + memset((void *)event + event->header.size, 0, machine->id_hdr_size); + event->header.size += machine->id_hdr_size; err = perf_tool__process_synth_event(tool, event, machine, process); } @@ -217,7 +223,7 @@ int perf_event__synthesize_bpf_events(struct perf_tool *tool, int err; int fd; - event = malloc(sizeof(event->bpf_event) + KSYM_NAME_LEN); + event = malloc(sizeof(event->bpf_event) + KSYM_NAME_LEN + machine->id_hdr_size); if (!event) return -1; while (true) { -- cgit v1.2.3 From cc437642255224e4140fed1f3e3156fc8ad91903 Mon Sep 17 00:00:00 2001 From: Tony Jones Date: Wed, 23 Jan 2019 16:52:24 -0800 Subject: perf script python: Add trace_context extension module to sys.modules MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In Python3, the result of PyModule_Create (called from scripts/python/Perf-Trace-Util/Context.c) is not automatically added to sys.modules. See: https://bugs.python.org/issue4592 Below is the observed behavior without the fix: # ldd /usr/bin/perf | grep -i python libpython3.6m.so.1.0 => /usr/lib64/libpython3.6m.so.1.0 (0x00007f8e1dfb2000) # perf record /bin/false [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.015 MB perf.data (17 samples) ] # perf script -g python | cat generated Python script: perf-script.py # perf script -s ./perf-script.py Traceback (most recent call last): File "./perf-script.py", line 18, in from perf_trace_context import * ModuleNotFoundError: No module named 'perf_trace_context' Error running python script ./perf-script.py # Committer notes: To build with python3 use: $ make -C tools/perf PYTHON=python3 Use a non-const variable to pass the 'name' arg to PyImport_AppendInittab(), as python2.6 has that as 'char *', which ends up trowing this in some environments: CC /tmp/build/perf/util/parse-branch-options.o util/scripting-engines/trace-event-python.c: In function 'python_start_script': util/scripting-engines/trace-event-python.c:1520:2: error: passing argument 1 of 'PyImport_AppendInittab' discards 'const' qualifier from pointer target type [-Werror] PyImport_AppendInittab("perf_trace_context", initfunc); ^ In file included from /usr/include/python2.6/Python.h:130:0, from util/scripting-engines/trace-event-python.c:22: /usr/include/python2.6/import.h:54:17: note: expected 'char *' but argument is of type 'const char *' PyAPI_FUNC(int) PyImport_AppendInittab(char *name, void (*initfunc)(void)); ^ cc1: all warnings being treated as errors Signed-off-by: Tony Jones Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Jaroslav Škarvada Cc: Jonathan Corbet Cc: Ravi Bangoria Cc: Seeteena Thoufeek Fixes: 66dfdff03d19 ("perf tools: Add Python 3 support") Link: http://lkml.kernel.org/r/20190124005229.16146-2-tonyj@suse.de Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/scripting-engines/trace-event-python.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 87ef16a1b17e..315905c748fa 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -1494,34 +1494,40 @@ static void _free_command_line(wchar_t **command_line, int num) static int python_start_script(const char *script, int argc, const char **argv) { struct tables *tables = &tables_global; + PyMODINIT_FUNC (*initfunc)(void); #if PY_MAJOR_VERSION < 3 const char **command_line; #else wchar_t **command_line; #endif - char buf[PATH_MAX]; + /* + * Use a non-const name variable to cope with python 2.6's + * PyImport_AppendInittab prototype + */ + char buf[PATH_MAX], name[19] = "perf_trace_context"; int i, err = 0; FILE *fp; #if PY_MAJOR_VERSION < 3 + initfunc = initperf_trace_context; command_line = malloc((argc + 1) * sizeof(const char *)); command_line[0] = script; for (i = 1; i < argc + 1; i++) command_line[i] = argv[i - 1]; #else + initfunc = PyInit_perf_trace_context; command_line = malloc((argc + 1) * sizeof(wchar_t *)); command_line[0] = Py_DecodeLocale(script, NULL); for (i = 1; i < argc + 1; i++) command_line[i] = Py_DecodeLocale(argv[i - 1], NULL); #endif + PyImport_AppendInittab(name, initfunc); Py_Initialize(); #if PY_MAJOR_VERSION < 3 - initperf_trace_context(); PySys_SetArgv(argc + 1, (char **)command_line); #else - PyInit_perf_trace_context(); PySys_SetArgv(argc + 1, command_line); #endif -- cgit v1.2.3 From 72e0b15cb24a497d7d0d4707cf51ff40c185ae8c Mon Sep 17 00:00:00 2001 From: Tony Jones Date: Wed, 23 Jan 2019 16:52:25 -0800 Subject: perf script python: Use PyBytes for attr in trace-event-python MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With Python3. PyUnicode_FromStringAndSize is unsafe to call on attr and will return NULL. Use _PyBytes_FromStringAndSize (as with raw_buf). Below is the observed behavior without the fix. Note it is first necessary to apply the prior fix (Add trace_context extension module to sys,modules): # ldd /usr/bin/perf | grep -i python libpython3.6m.so.1.0 => /usr/lib64/libpython3.6m.so.1.0 (0x00007f8e1dfb2000) # perf record -e raw_syscalls:sys_enter /bin/false [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.018 MB perf.data (21 samples) ] # perf script -g python | cat generated Python script: perf-script.py # perf script -s ./perf-script.py in trace_begin Segmentation fault (core dumped) Signed-off-by: Tony Jones Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Jaroslav Škarvada Cc: Jonathan Corbet Cc: Ravi Bangoria Cc: Seeteena Thoufeek Fixes: 66dfdff03d19 ("perf tools: Add Python 3 support") Link: http://lkml.kernel.org/r/20190124005229.16146-3-tonyj@suse.de Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/scripting-engines/trace-event-python.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 315905c748fa..7059d1be2d09 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -733,8 +733,7 @@ static PyObject *get_perf_sample_dict(struct perf_sample *sample, Py_FatalError("couldn't create Python dictionary"); pydict_set_item_string_decref(dict, "ev_name", _PyUnicode_FromString(perf_evsel__name(evsel))); - pydict_set_item_string_decref(dict, "attr", _PyUnicode_FromStringAndSize( - (const char *)&evsel->attr, sizeof(evsel->attr))); + pydict_set_item_string_decref(dict, "attr", _PyBytes_FromStringAndSize((const char *)&evsel->attr, sizeof(evsel->attr))); pydict_set_item_string_decref(dict_sample, "pid", _PyLong_FromLong(sample->pid)); -- cgit v1.2.3 From 099b79ca25c507ecbb25fb04f434e10415b68de0 Mon Sep 17 00:00:00 2001 From: Tony Jones Date: Wed, 23 Jan 2019 16:52:26 -0800 Subject: perf script python: Remove explicit shebang from setup.py Makefile.perf invokes setup.py via an explicit invocation of python (PYTHON_WORD) so there is therefore no need for an explicit shebang. Also most distros follow pep-0394 which recommends that /usr/bin/python refer only to v2 and so may not exist on the system (if PYTHON=python3). Signed-off-by: Tony Jones Acked-by: Jiri Olsa Cc: Jonathan Corbet Cc: Ravi Bangoria Cc: Seeteena Thoufeek Link: http://lkml.kernel.org/r/20190124005229.16146-4-tonyj@suse.de Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/setup.py | 2 -- 1 file changed, 2 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py index 64d1f36dee99..d3ffc18424b5 100644 --- a/tools/perf/util/setup.py +++ b/tools/perf/util/setup.py @@ -1,5 +1,3 @@ -#!/usr/bin/python - from os import getenv from subprocess import Popen, PIPE from re import sub -- cgit v1.2.3 From 4fed072609b8aae27eac7169033f762867a5ab4c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 27 Jan 2019 11:31:39 +0100 Subject: perf srccode: Move struct definition from map.h to srccode.h To reduce the header dependencies, since we already have a srccode.h header, then there is where the 'struct srccode_state' should be, and map.h, that is more widely used should have just a forward declaraion of 'struct srccode_state'. Cc: Adrian Hunter Cc: Andi Kleen Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-64lrkjjaa7wlo1zi2gr5u3es@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/map.h | 13 +------------ tools/perf/util/srccode.h | 13 +++++++++++++ tools/perf/util/thread.h | 2 +- 3 files changed, 15 insertions(+), 13 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index 09282aa45c80..5a889db4fca5 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -174,18 +174,7 @@ char *map__srcline(struct map *map, u64 addr, struct symbol *sym); int map__fprintf_srcline(struct map *map, u64 addr, const char *prefix, FILE *fp); -struct srccode_state { - char *srcfile; - unsigned line; -}; - -static inline void srccode_state_init(struct srccode_state *state) -{ - state->srcfile = NULL; - state->line = 0; -} - -void srccode_state_free(struct srccode_state *state); +struct srccode_state; int map__fprintf_srccode(struct map *map, u64 addr, FILE *fp, struct srccode_state *state); diff --git a/tools/perf/util/srccode.h b/tools/perf/util/srccode.h index e500a746d5f1..1b5ed769779c 100644 --- a/tools/perf/util/srccode.h +++ b/tools/perf/util/srccode.h @@ -1,6 +1,19 @@ #ifndef SRCCODE_H #define SRCCODE_H 1 +struct srccode_state { + char *srcfile; + unsigned line; +}; + +static inline void srccode_state_init(struct srccode_state *state) +{ + state->srcfile = NULL; + state->line = 0; +} + +void srccode_state_free(struct srccode_state *state); + /* Result is not 0 terminated */ char *find_sourceline(char *fn, unsigned line, int *lenp); diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index f3c939150f90..856cf1a9040a 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -7,8 +7,8 @@ #include #include #include +#include "srccode.h" #include "symbol.h" -#include "map.h" #include #include #include "rwsem.h" -- cgit v1.2.3 From 7b644f9ad18f74aeac42360bee9c4c1cf874f4c0 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 27 Jan 2019 11:55:39 +0100 Subject: perf callchain: Uninline callchain_cursor_reset() to remove map.h dependency That was the only thing that made including map.h in callchain.h a requiriment, so uninline it and just add a 'struct map' forward declaration. Cc: Adrian Hunter Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-7fjz4hvv1bpzqaeriku44fn4@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/gtk/annotate.c | 1 + tools/perf/ui/stdio/hist.c | 1 + tools/perf/util/callchain.c | 15 +++++++++++++++ tools/perf/util/callchain.h | 18 +++--------------- 4 files changed, 20 insertions(+), 15 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c index f22a7d142ada..f42666ee0672 100644 --- a/tools/perf/ui/gtk/annotate.c +++ b/tools/perf/ui/gtk/annotate.c @@ -4,6 +4,7 @@ #include "util/debug.h" #include "util/annotate.h" #include "util/evsel.h" +#include "util/map.h" #include "ui/helpline.h" #include #include diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c index 2a9fa4dcbc2a..814abf5e9a5c 100644 --- a/tools/perf/ui/stdio/hist.c +++ b/tools/perf/ui/stdio/hist.c @@ -4,6 +4,7 @@ #include "../../util/util.h" #include "../../util/hist.h" +#include "../../util/map.h" #include "../../util/sort.h" #include "../../util/evsel.h" #include "../../util/srcline.h" diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index dc2212e12184..2974950039ac 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -1577,3 +1577,18 @@ int callchain_cursor__copy(struct callchain_cursor *dst, return rc; } + +/* + * Initialize a cursor before adding entries inside, but keep + * the previously allocated entries as a cache. + */ +void callchain_cursor_reset(struct callchain_cursor *cursor) +{ + struct callchain_cursor_node *node; + + cursor->nr = 0; + cursor->last = &cursor->first; + + for (node = cursor->first; node != NULL; node = node->next) + map__zput(node->map); +} diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 8afe1622edf3..b6f18c2d4b14 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -5,10 +5,11 @@ #include #include #include "event.h" -#include "map.h" #include "symbol.h" #include "branch.h" +struct map; + #define HELP_PAD "\t\t\t\t" #define CALLCHAIN_HELP "setup and enables call-graph (stack chain/backtrace):\n\n" @@ -187,20 +188,7 @@ int callchain_append(struct callchain_root *root, int callchain_merge(struct callchain_cursor *cursor, struct callchain_root *dst, struct callchain_root *src); -/* - * Initialize a cursor before adding entries inside, but keep - * the previously allocated entries as a cache. - */ -static inline void callchain_cursor_reset(struct callchain_cursor *cursor) -{ - struct callchain_cursor_node *node; - - cursor->nr = 0; - cursor->last = &cursor->first; - - for (node = cursor->first; node != NULL; node = node->next) - map__zput(node->map); -} +void callchain_cursor_reset(struct callchain_cursor *cursor); int callchain_cursor_append(struct callchain_cursor *cursor, u64 ip, struct map *map, struct symbol *sym, -- cgit v1.2.3 From 9f4e8ff27a807dd6919faa0ecb2a152c57cfa5b2 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 27 Jan 2019 12:02:41 +0100 Subject: perf symbols: Introduce map_symbol.h To allow headers just wanting this definition to be able to get it without all the things in symbol.h, to reduce the include dep tree. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-l32z2qyhs6fe8unf4gk2ead2@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/callchain.h | 2 +- tools/perf/util/hist.h | 1 + tools/perf/util/map_symbol.h | 22 ++++++++++++++++++++++ tools/perf/util/sort.h | 3 ++- tools/perf/util/symbol.h | 14 +------------- 5 files changed, 27 insertions(+), 15 deletions(-) create mode 100644 tools/perf/util/map_symbol.h (limited to 'tools/perf/util') diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index b6f18c2d4b14..80e056a3d882 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -5,7 +5,7 @@ #include #include #include "event.h" -#include "symbol.h" +#include "map_symbol.h" #include "branch.h" struct map; diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 08267af7439c..f50aad24928e 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -8,6 +8,7 @@ #include "evsel.h" #include "header.h" #include "color.h" +#include "symbol.h" #include "ui/progress.h" struct hist_entry; diff --git a/tools/perf/util/map_symbol.h b/tools/perf/util/map_symbol.h new file mode 100644 index 000000000000..5a1aed9f6bb4 --- /dev/null +++ b/tools/perf/util/map_symbol.h @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: GPL-2.0 +#ifndef __PERF_MAP_SYMBOL +#define __PERF_MAP_SYMBOL 1 + +#include + +struct map; +struct symbol; + +struct map_symbol { + struct map *map; + struct symbol *sym; +}; + +struct addr_map_symbol { + struct map *map; + struct symbol *sym; + u64 addr; + u64 al_addr; + u64 phys_addr; +}; +#endif // __PERF_MAP_SYMBOL diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index dd6312876492..2fbee0b1011c 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -9,7 +9,8 @@ #include #include "cache.h" #include -#include "symbol.h" +#include "map_symbol.h" +#include "symbol_conf.h" #include "string.h" #include "callchain.h" #include "values.h" diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 56e2bcb907cc..9a8fe012910a 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -8,6 +8,7 @@ #include #include #include +#include "map_symbol.h" #include "branch.h" #include "path.h" #include "symbol_conf.h" @@ -115,19 +116,6 @@ struct ref_reloc_sym { u64 unrelocated_addr; }; -struct map_symbol { - struct map *map; - struct symbol *sym; -}; - -struct addr_map_symbol { - struct map *map; - struct symbol *sym; - u64 addr; - u64 al_addr; - u64 phys_addr; -}; - struct branch_info { struct addr_map_symbol from; struct addr_map_symbol to; -- cgit v1.2.3 From 1101f69af5335a863765100d1df1999fd1e8c5bf Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 27 Jan 2019 13:42:37 +0100 Subject: pref tools: Add missing map.h includes Lots of places get the map.h file indirectly, and since we're going to remove it from machine.h, then those need to include it directly, do it now, before we remove that dep. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-ob8jehdjda8h5jsrv9dqj9tf@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-annotate.c | 1 + tools/perf/builtin-inject.c | 1 + tools/perf/builtin-kallsyms.c | 1 + tools/perf/builtin-kmem.c | 1 + tools/perf/builtin-mem.c | 1 + tools/perf/builtin-report.c | 1 + tools/perf/builtin-script.c | 1 + tools/perf/builtin-top.c | 1 + tools/perf/builtin-trace.c | 1 + tools/perf/tests/code-reading.c | 1 + tools/perf/tests/hists_common.c | 1 + tools/perf/tests/hists_cumulate.c | 1 + tools/perf/tests/hists_filter.c | 1 + tools/perf/tests/hists_output.c | 1 + tools/perf/tests/mmap-thread-lookup.c | 1 + tools/perf/ui/browsers/annotate.c | 1 + tools/perf/ui/browsers/hists.c | 1 + tools/perf/util/annotate.c | 1 + tools/perf/util/build-id.c | 1 + tools/perf/util/callchain.c | 1 + tools/perf/util/db-export.c | 1 + tools/perf/util/dso.c | 1 + tools/perf/util/event.c | 1 + tools/perf/util/intel-bts.c | 1 + tools/perf/util/probe-event.c | 1 + tools/perf/util/scripting-engines/trace-event-perl.c | 1 + tools/perf/util/scripting-engines/trace-event-python.c | 1 + tools/perf/util/session.c | 1 + tools/perf/util/sort.c | 1 + tools/perf/util/symbol-elf.c | 1 + tools/perf/util/symbol.c | 1 + tools/perf/util/unwind-libdw.c | 1 + tools/perf/util/unwind-libunwind-local.c | 1 + tools/perf/util/unwind-libunwind.c | 1 + tools/perf/util/vdso.c | 1 + 35 files changed, 35 insertions(+) (limited to 'tools/perf/util') diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 20db635347e5..7f3c3fea67b4 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -27,6 +27,7 @@ #include "util/thread.h" #include "util/sort.h" #include "util/hist.h" +#include "util/map.h" #include "util/session.h" #include "util/tool.h" #include "util/data.h" diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index eda41673c4f3..3499addcfc12 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -12,6 +12,7 @@ #include "util/color.h" #include "util/evlist.h" #include "util/evsel.h" +#include "util/map.h" #include "util/session.h" #include "util/tool.h" #include "util/debug.h" diff --git a/tools/perf/builtin-kallsyms.c b/tools/perf/builtin-kallsyms.c index 90d1a2305b72..bc7a2bc7aed7 100644 --- a/tools/perf/builtin-kallsyms.c +++ b/tools/perf/builtin-kallsyms.c @@ -13,6 +13,7 @@ #include #include "debug.h" #include "machine.h" +#include "map.h" #include "symbol.h" static int __cmd_kallsyms(int argc, const char **argv) diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index 088705c167bf..b80ec0883537 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -6,6 +6,7 @@ #include "util/evsel.h" #include "util/util.h" #include "util/config.h" +#include "util/map.h" #include "util/symbol.h" #include "util/thread.h" #include "util/header.h" diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index 57393e94d156..ba7e8d87dec3 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -13,6 +13,7 @@ #include "util/data.h" #include "util/mem-events.h" #include "util/debug.h" +#include "util/map.h" #include "util/symbol.h" #define MEM_OPERATION_LOAD 0x1 diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index c9ceaf88759c..a007ea9a3874 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -16,6 +16,7 @@ #include #include #include +#include "util/map.h" #include "util/symbol.h" #include "util/callchain.h" #include "util/values.h" diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index ac221f137ed2..8d5fe092525c 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -10,6 +10,7 @@ #include "util/perf_regs.h" #include "util/session.h" #include "util/tool.h" +#include "util/map.h" #include "util/symbol.h" #include "util/thread.h" #include "util/trace-event.h" diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 57b1d7495d02..619406339e4b 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -30,6 +30,7 @@ #include "util/evsel.h" #include "util/event.h" #include "util/machine.h" +#include "util/map.h" #include "util/session.h" #include "util/symbol.h" #include "util/thread.h" diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 1447993e1ee3..c0b91595d6e3 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -29,6 +29,7 @@ #include "util/evlist.h" #include #include "util/machine.h" +#include "util/map.h" #include "util/path.h" #include "util/session.h" #include "util/thread.h" diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index dbf2c69944d2..59d00b3d0a74 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -15,6 +15,7 @@ #include "thread_map.h" #include "cpumap.h" #include "machine.h" +#include "map.h" #include "event.h" #include "thread.h" diff --git a/tools/perf/tests/hists_common.c b/tools/perf/tests/hists_common.c index 6b0649c8b33e..469958cd7fe0 100644 --- a/tools/perf/tests/hists_common.c +++ b/tools/perf/tests/hists_common.c @@ -2,6 +2,7 @@ #include #include "perf.h" #include "util/debug.h" +#include "util/map.h" #include "util/symbol.h" #include "util/sort.h" #include "util/evsel.h" diff --git a/tools/perf/tests/hists_cumulate.c b/tools/perf/tests/hists_cumulate.c index 4ca417d1a06b..7a2eed6c783e 100644 --- a/tools/perf/tests/hists_cumulate.c +++ b/tools/perf/tests/hists_cumulate.c @@ -2,6 +2,7 @@ #include "perf.h" #include "util/debug.h" #include "util/event.h" +#include "util/map.h" #include "util/symbol.h" #include "util/sort.h" #include "util/evsel.h" diff --git a/tools/perf/tests/hists_filter.c b/tools/perf/tests/hists_filter.c index 1c5bedab3c2c..975844807fe2 100644 --- a/tools/perf/tests/hists_filter.c +++ b/tools/perf/tests/hists_filter.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include "perf.h" #include "util/debug.h" +#include "util/map.h" #include "util/symbol.h" #include "util/sort.h" #include "util/evsel.h" diff --git a/tools/perf/tests/hists_output.c b/tools/perf/tests/hists_output.c index 6c4bc68a77ee..0a510c524a5d 100644 --- a/tools/perf/tests/hists_output.c +++ b/tools/perf/tests/hists_output.c @@ -2,6 +2,7 @@ #include "perf.h" #include "util/debug.h" #include "util/event.h" +#include "util/map.h" #include "util/symbol.h" #include "util/sort.h" #include "util/evsel.h" diff --git a/tools/perf/tests/mmap-thread-lookup.c b/tools/perf/tests/mmap-thread-lookup.c index 5ede9b561d32..ba87e6e8d18c 100644 --- a/tools/perf/tests/mmap-thread-lookup.c +++ b/tools/perf/tests/mmap-thread-lookup.c @@ -11,6 +11,7 @@ #include "tests.h" #include "machine.h" #include "thread_map.h" +#include "map.h" #include "symbol.h" #include "thread.h" #include "util.h" diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 82e16bf84466..35bdfd8b1e71 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -7,6 +7,7 @@ #include "../../util/annotate.h" #include "../../util/hist.h" #include "../../util/sort.h" +#include "../../util/map.h" #include "../../util/symbol.h" #include "../../util/evsel.h" #include "../../util/evlist.h" diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 85790a4d1842..8ada0c690771 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -11,6 +11,7 @@ #include "../../util/evsel.h" #include "../../util/evlist.h" #include "../../util/hist.h" +#include "../../util/map.h" #include "../../util/pstack.h" #include "../../util/sort.h" #include "../../util/util.h" diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 3d79add5f7ae..2468b8aa0b6b 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -17,6 +17,7 @@ #include "color.h" #include "config.h" #include "cache.h" +#include "map.h" #include "symbol.h" #include "units.h" #include "debug.h" diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index 2ff802068f06..bff0d17920ed 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -16,6 +16,7 @@ #include "build-id.h" #include "event.h" #include "namespaces.h" +#include "map.h" #include "symbol.h" #include "thread.h" #include diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 2974950039ac..cc47dba973f2 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -23,6 +23,7 @@ #include "util.h" #include "sort.h" #include "machine.h" +#include "map.h" #include "callchain.h" #include "branch.h" diff --git a/tools/perf/util/db-export.c b/tools/perf/util/db-export.c index 69fbb0a72d0c..de9b4769d06c 100644 --- a/tools/perf/util/db-export.c +++ b/tools/perf/util/db-export.c @@ -20,6 +20,7 @@ #include "thread.h" #include "comm.h" #include "symbol.h" +#include "map.h" #include "event.h" #include "util.h" #include "thread-stack.h" diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index a8a54115b115..ba58ba603b69 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -12,6 +12,7 @@ #include "compress.h" #include "namespaces.h" #include "path.h" +#include "map.h" #include "symbol.h" #include "srcline.h" #include "dso.h" diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 1b5091a3d14f..4d84e7754be9 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -21,6 +21,7 @@ #include "thread.h" #include "thread_map.h" #include "sane_ctype.h" +#include "map.h" #include "symbol/kallsyms.h" #include "asm/bug.h" #include "stat.h" diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c index ee6ca65f81f4..f7fe8ccf6d65 100644 --- a/tools/perf/util/intel-bts.c +++ b/tools/perf/util/intel-bts.c @@ -27,6 +27,7 @@ #include "evsel.h" #include "evlist.h" #include "machine.h" +#include "map.h" #include "session.h" #include "util.h" #include "thread.h" diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 4008f0cc36e5..b0192160513d 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -41,6 +41,7 @@ #include "debug.h" #include "cache.h" #include "color.h" +#include "map.h" #include "symbol.h" #include "thread.h" #include diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c index b93f36b887b5..a9f22694eed1 100644 --- a/tools/perf/util/scripting-engines/trace-event-perl.c +++ b/tools/perf/util/scripting-engines/trace-event-perl.c @@ -37,6 +37,7 @@ #include "../../perf.h" #include "../callchain.h" #include "../machine.h" +#include "../map.h" #include "../thread.h" #include "../event.h" #include "../trace-event.h" diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 7059d1be2d09..d9c83776a80f 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -44,6 +44,7 @@ #include "../thread-stack.h" #include "../trace-event.h" #include "../call-path.h" +#include "map.h" #include "thread_map.h" #include "cpumap.h" #include "print_binary.h" diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 24fd62528a33..e9755a649993 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -13,6 +13,7 @@ #include "evlist.h" #include "evsel.h" #include "memswap.h" +#include "map.h" #include "session.h" #include "tool.h" #include "sort.h" diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 6c1a83768eb0..79e794406bef 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -6,6 +6,7 @@ #include "sort.h" #include "hist.h" #include "comm.h" +#include "map.h" #include "symbol.h" #include "thread.h" #include "evsel.h" diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 66a84d5846c8..848d1a0f8dc0 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -6,6 +6,7 @@ #include #include +#include "map.h" #include "symbol.h" #include "demangle-java.h" #include "demangle-rust.h" diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index bcbcbd610460..7b194cf53cc0 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -17,6 +17,7 @@ #include "util.h" #include "debug.h" #include "machine.h" +#include "map.h" #include "symbol.h" #include "strlist.h" #include "intlist.h" diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c index 5eff9bfc5758..8ea7c89e73fd 100644 --- a/tools/perf/util/unwind-libdw.c +++ b/tools/perf/util/unwind-libdw.c @@ -8,6 +8,7 @@ #include "unwind.h" #include "unwind-libdw.h" #include "machine.h" +#include "map.h" #include "thread.h" #include #include "event.h" diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c index 79f521a552cf..f3c666a84e4d 100644 --- a/tools/perf/util/unwind-libunwind-local.c +++ b/tools/perf/util/unwind-libunwind-local.c @@ -34,6 +34,7 @@ #include "session.h" #include "perf_regs.h" #include "unwind.h" +#include "map.h" #include "symbol.h" #include "util.h" #include "debug.h" diff --git a/tools/perf/util/unwind-libunwind.c b/tools/perf/util/unwind-libunwind.c index b029a5e9ae49..9778b3133b77 100644 --- a/tools/perf/util/unwind-libunwind.c +++ b/tools/perf/util/unwind-libunwind.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include "unwind.h" +#include "map.h" #include "thread.h" #include "session.h" #include "debug.h" diff --git a/tools/perf/util/vdso.c b/tools/perf/util/vdso.c index 3702cba11d7d..5031b7b22bbd 100644 --- a/tools/perf/util/vdso.c +++ b/tools/perf/util/vdso.c @@ -11,6 +11,7 @@ #include "vdso.h" #include "util.h" +#include "map.h" #include "symbol.h" #include "machine.h" #include "thread.h" -- cgit v1.2.3 From 41f30914fc33116cfd0fd0982c8effe435d97698 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 27 Jan 2019 13:44:29 +0100 Subject: perf map: Move structs and prototypes for map groups to a separate header And since machine.h only needs what is in there, make it stop including map.h and instead include this newly introduced map_groups.h instead. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-dbob25fv5rp2rjpwlnterf38@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm/tests/dwarf-unwind.c | 1 + tools/perf/arch/arm64/tests/dwarf-unwind.c | 1 + tools/perf/arch/powerpc/tests/dwarf-unwind.c | 1 + tools/perf/arch/x86/tests/dwarf-unwind.c | 1 + tools/perf/ui/stdio/hist.c | 1 + tools/perf/util/machine.h | 2 +- tools/perf/util/map.c | 6 ++ tools/perf/util/map.h | 87 +------------------------- tools/perf/util/map_groups.h | 91 ++++++++++++++++++++++++++++ tools/perf/util/probe-event.c | 1 + tools/perf/util/symbol-elf.c | 1 + 11 files changed, 106 insertions(+), 87 deletions(-) create mode 100644 tools/perf/util/map_groups.h (limited to 'tools/perf/util') diff --git a/tools/perf/arch/arm/tests/dwarf-unwind.c b/tools/perf/arch/arm/tests/dwarf-unwind.c index 9a0242e74cfc..2c35e532bc9a 100644 --- a/tools/perf/arch/arm/tests/dwarf-unwind.c +++ b/tools/perf/arch/arm/tests/dwarf-unwind.c @@ -3,6 +3,7 @@ #include "perf_regs.h" #include "thread.h" #include "map.h" +#include "map_groups.h" #include "event.h" #include "debug.h" #include "tests/tests.h" diff --git a/tools/perf/arch/arm64/tests/dwarf-unwind.c b/tools/perf/arch/arm64/tests/dwarf-unwind.c index 5522ce384723..a6a407fa1b8b 100644 --- a/tools/perf/arch/arm64/tests/dwarf-unwind.c +++ b/tools/perf/arch/arm64/tests/dwarf-unwind.c @@ -3,6 +3,7 @@ #include "perf_regs.h" #include "thread.h" #include "map.h" +#include "map_groups.h" #include "event.h" #include "debug.h" #include "tests/tests.h" diff --git a/tools/perf/arch/powerpc/tests/dwarf-unwind.c b/tools/perf/arch/powerpc/tests/dwarf-unwind.c index 5f39efef0856..5c178e4a1995 100644 --- a/tools/perf/arch/powerpc/tests/dwarf-unwind.c +++ b/tools/perf/arch/powerpc/tests/dwarf-unwind.c @@ -3,6 +3,7 @@ #include "perf_regs.h" #include "thread.h" #include "map.h" +#include "map_groups.h" #include "event.h" #include "debug.h" #include "tests/tests.h" diff --git a/tools/perf/arch/x86/tests/dwarf-unwind.c b/tools/perf/arch/x86/tests/dwarf-unwind.c index 7879df34569a..6ad0a1cedb13 100644 --- a/tools/perf/arch/x86/tests/dwarf-unwind.c +++ b/tools/perf/arch/x86/tests/dwarf-unwind.c @@ -3,6 +3,7 @@ #include "perf_regs.h" #include "thread.h" #include "map.h" +#include "map_groups.h" #include "event.h" #include "debug.h" #include "tests/tests.h" diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c index 814abf5e9a5c..bb653a47f47a 100644 --- a/tools/perf/ui/stdio/hist.c +++ b/tools/perf/ui/stdio/hist.c @@ -5,6 +5,7 @@ #include "../../util/util.h" #include "../../util/hist.h" #include "../../util/map.h" +#include "../../util/map_groups.h" #include "../../util/sort.h" #include "../../util/evsel.h" #include "../../util/srcline.h" diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index e2f3df45410a..f70ab98a7bde 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -4,7 +4,7 @@ #include #include -#include "map.h" +#include "map_groups.h" #include "dso.h" #include "event.h" #include "rwsem.h" diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index cf407cc9d915..fbeb0c6efaa6 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -557,6 +557,12 @@ void map_groups__init(struct map_groups *mg, struct machine *machine) refcount_set(&mg->refcnt, 1); } +void map_groups__insert(struct map_groups *mg, struct map *map) +{ + maps__insert(&mg->maps, map); + map->groups = mg; +} + static void __maps__purge(struct maps *maps) { struct rb_root *root = &maps->entries; diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index 5a889db4fca5..0e20749f2c55 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -6,12 +6,10 @@ #include #include #include -#include #include #include #include #include -#include "rwsem.h" struct dso; struct ip_callchain; @@ -48,38 +46,7 @@ struct map { refcount_t refcnt; }; -#define KMAP_NAME_LEN 256 - -struct kmap { - struct ref_reloc_sym *ref_reloc_sym; - struct map_groups *kmaps; - char name[KMAP_NAME_LEN]; -}; - -struct maps { - struct rb_root entries; - struct rb_root names; - struct rw_semaphore lock; -}; - -struct map_groups { - struct maps maps; - struct machine *machine; - refcount_t refcnt; -}; - -struct map_groups *map_groups__new(struct machine *machine); -void map_groups__delete(struct map_groups *mg); -bool map_groups__empty(struct map_groups *mg); - -static inline struct map_groups *map_groups__get(struct map_groups *mg) -{ - if (mg) - refcount_inc(&mg->refcnt); - return mg; -} - -void map_groups__put(struct map_groups *mg); +struct kmap; struct kmap *__map__kmap(struct map *map); struct kmap *map__kmap(struct map *map); @@ -187,61 +154,9 @@ void map__fixup_end(struct map *map); void map__reloc_vmlinux(struct map *map); -void maps__insert(struct maps *maps, struct map *map); -void maps__remove(struct maps *maps, struct map *map); -struct map *maps__find(struct maps *maps, u64 addr); -struct map *maps__first(struct maps *maps); -struct map *map__next(struct map *map); -struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name, - struct map **mapp); -void map_groups__init(struct map_groups *mg, struct machine *machine); -void map_groups__exit(struct map_groups *mg); -int map_groups__clone(struct thread *thread, - struct map_groups *parent); -size_t map_groups__fprintf(struct map_groups *mg, FILE *fp); - int map__set_kallsyms_ref_reloc_sym(struct map *map, const char *symbol_name, u64 addr); -static inline void map_groups__insert(struct map_groups *mg, struct map *map) -{ - maps__insert(&mg->maps, map); - map->groups = mg; -} - -static inline void map_groups__remove(struct map_groups *mg, struct map *map) -{ - maps__remove(&mg->maps, map); -} - -static inline struct map *map_groups__find(struct map_groups *mg, u64 addr) -{ - return maps__find(&mg->maps, addr); -} - -struct map *map_groups__first(struct map_groups *mg); - -static inline struct map *map_groups__next(struct map *map) -{ - return map__next(map); -} - -struct symbol *map_groups__find_symbol(struct map_groups *mg, - u64 addr, struct map **mapp); - -struct symbol *map_groups__find_symbol_by_name(struct map_groups *mg, - const char *name, - struct map **mapp); - -struct addr_map_symbol; - -int map_groups__find_ams(struct addr_map_symbol *ams); - -int map_groups__fixup_overlappings(struct map_groups *mg, struct map *map, - FILE *fp); - -struct map *map_groups__find_by_name(struct map_groups *mg, const char *name); - bool __map__is_kernel(const struct map *map); bool __map__is_extra_kernel_map(const struct map *map); diff --git a/tools/perf/util/map_groups.h b/tools/perf/util/map_groups.h new file mode 100644 index 000000000000..4dcda33e0fdf --- /dev/null +++ b/tools/perf/util/map_groups.h @@ -0,0 +1,91 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PERF_MAP_GROUPS_H +#define __PERF_MAP_GROUPS_H + +#include +#include +#include +#include +#include +#include "rwsem.h" + +struct ref_reloc_sym; +struct machine; +struct map; +struct thread; + +struct maps { + struct rb_root entries; + struct rb_root names; + struct rw_semaphore lock; +}; + +void maps__insert(struct maps *maps, struct map *map); +void maps__remove(struct maps *maps, struct map *map); +struct map *maps__find(struct maps *maps, u64 addr); +struct map *maps__first(struct maps *maps); +struct map *map__next(struct map *map); +struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name, struct map **mapp); + +struct map_groups { + struct maps maps; + struct machine *machine; + refcount_t refcnt; +}; + +#define KMAP_NAME_LEN 256 + +struct kmap { + struct ref_reloc_sym *ref_reloc_sym; + struct map_groups *kmaps; + char name[KMAP_NAME_LEN]; +}; + +struct map_groups *map_groups__new(struct machine *machine); +void map_groups__delete(struct map_groups *mg); +bool map_groups__empty(struct map_groups *mg); + +static inline struct map_groups *map_groups__get(struct map_groups *mg) +{ + if (mg) + refcount_inc(&mg->refcnt); + return mg; +} + +void map_groups__put(struct map_groups *mg); +void map_groups__init(struct map_groups *mg, struct machine *machine); +void map_groups__exit(struct map_groups *mg); +int map_groups__clone(struct thread *thread, struct map_groups *parent); +size_t map_groups__fprintf(struct map_groups *mg, FILE *fp); + +void map_groups__insert(struct map_groups *mg, struct map *map); + +static inline void map_groups__remove(struct map_groups *mg, struct map *map) +{ + maps__remove(&mg->maps, map); +} + +static inline struct map *map_groups__find(struct map_groups *mg, u64 addr) +{ + return maps__find(&mg->maps, addr); +} + +struct map *map_groups__first(struct map_groups *mg); + +static inline struct map *map_groups__next(struct map *map) +{ + return map__next(map); +} + +struct symbol *map_groups__find_symbol(struct map_groups *mg, u64 addr, struct map **mapp); +struct symbol *map_groups__find_symbol_by_name(struct map_groups *mg, const char *name, struct map **mapp); + +struct addr_map_symbol; + +int map_groups__find_ams(struct addr_map_symbol *ams); + +int map_groups__fixup_overlappings(struct map_groups *mg, struct map *map, FILE *fp); + +struct map *map_groups__find_by_name(struct map_groups *mg, const char *name); + +#endif // __PERF_MAP_GROUPS_H diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index b0192160513d..0030f9b9bf7e 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -42,6 +42,7 @@ #include "cache.h" #include "color.h" #include "map.h" +#include "map_groups.h" #include "symbol.h" #include "thread.h" #include diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 848d1a0f8dc0..6b9a2a70da29 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -7,6 +7,7 @@ #include #include "map.h" +#include "map_groups.h" #include "symbol.h" #include "demangle-java.h" #include "demangle-rust.h" -- cgit v1.2.3 From 7cadca8e1b4ae95f8be37d8da246eae8ebf5fb3a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 27 Jan 2019 14:10:31 +0100 Subject: perf hist: Remove symbol.h from hist.h, just fwd decls are needed To reduce the includes dependencies. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-cmvg5ght75mmfg1efeyna9rn@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/gtk/annotate.c | 1 + tools/perf/util/auxtrace.c | 1 + tools/perf/util/callchain.c | 1 + tools/perf/util/hist.h | 3 ++- 4 files changed, 5 insertions(+), 1 deletion(-) (limited to 'tools/perf/util') diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c index f42666ee0672..df49c9ba1785 100644 --- a/tools/perf/ui/gtk/annotate.c +++ b/tools/perf/ui/gtk/annotate.c @@ -5,6 +5,7 @@ #include "util/annotate.h" #include "util/evsel.h" #include "util/map.h" +#include "util/symbol.h" #include "ui/helpline.h" #include #include diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index f69961c4a4f3..94a22cc8004c 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -41,6 +41,7 @@ #include "pmu.h" #include "evsel.h" #include "cpumap.h" +#include "symbol.h" #include "thread_map.h" #include "asm/bug.h" #include "auxtrace.h" diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index cc47dba973f2..abb608b09269 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -26,6 +26,7 @@ #include "map.h" #include "callchain.h" #include "branch.h" +#include "symbol.h" #define CALLCHAIN_PARAM_DEFAULT \ .mode = CHAIN_GRAPH_ABS, \ diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index f50aad24928e..38a72eb81427 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -8,12 +8,13 @@ #include "evsel.h" #include "header.h" #include "color.h" -#include "symbol.h" #include "ui/progress.h" struct hist_entry; struct hist_entry_ops; struct addr_location; +struct mem_info; +struct branch_info; struct symbol; enum hist_filter { -- cgit v1.2.3 From daecf9e0fa8e1bb3b227fcc15c4070caccbbb14f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 28 Jan 2019 00:03:34 +0100 Subject: perf tools: Add missing include for symbols.h Several places were using definitions found in symbols.h but not including it, getting it by sheer luck from some other headers that now are in the process of removing that include because they don't need it or because simply having struct forward declarations is enough, fix it. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-xbcvvx296d70kpg9wb0qmeq9@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 1 + tools/perf/builtin-inject.c | 1 + tools/perf/builtin-trace.c | 1 + tools/perf/tests/code-reading.c | 1 + tools/perf/tests/dwarf-unwind.c | 1 + tools/perf/ui/browsers/hists.c | 1 + tools/perf/ui/stdio/hist.c | 1 + tools/perf/util/event.c | 1 + tools/perf/util/hist.c | 1 + tools/perf/util/intel-bts.c | 1 + tools/perf/util/machine.c | 1 + tools/perf/util/scripting-engines/trace-event-perl.c | 1 + tools/perf/util/scripting-engines/trace-event-python.c | 1 + tools/perf/util/session.c | 1 + tools/perf/util/thread.c | 1 + tools/perf/util/unwind-libdw.c | 1 + 16 files changed, 16 insertions(+) (limited to 'tools/perf/util') diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 72ec0ae7d8ad..b2bf117881f1 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -33,6 +33,7 @@ #include "ui/browsers/hists.h" #include "thread.h" #include "mem2node.h" +#include "symbol.h" struct c2c_hists { struct hists hists; diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 3499addcfc12..9bb1f35d5cb7 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -20,6 +20,7 @@ #include "util/data.h" #include "util/auxtrace.h" #include "util/jit.h" +#include "util/symbol.h" #include "util/thread.h" #include diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index c0b91595d6e3..81a44954d435 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -30,6 +30,7 @@ #include #include "util/machine.h" #include "util/map.h" +#include "util/symbol.h" #include "util/path.h" #include "util/session.h" #include "util/thread.h" diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index 59d00b3d0a74..4ebd2681e760 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -16,6 +16,7 @@ #include "cpumap.h" #include "machine.h" #include "map.h" +#include "symbol.h" #include "event.h" #include "thread.h" diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c index 7c8d2e422401..077c306c1cae 100644 --- a/tools/perf/tests/dwarf-unwind.c +++ b/tools/perf/tests/dwarf-unwind.c @@ -10,6 +10,7 @@ #include "../util/unwind.h" #include "perf_regs.h" #include "map.h" +#include "symbol.h" #include "thread.h" #include "callchain.h" diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 8ada0c690771..ac176b48178c 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -12,6 +12,7 @@ #include "../../util/evlist.h" #include "../../util/hist.h" #include "../../util/map.h" +#include "../../util/symbol.h" #include "../../util/pstack.h" #include "../../util/sort.h" #include "../../util/util.h" diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c index bb653a47f47a..f25116f70878 100644 --- a/tools/perf/ui/stdio/hist.c +++ b/tools/perf/ui/stdio/hist.c @@ -6,6 +6,7 @@ #include "../../util/hist.h" #include "../../util/map.h" #include "../../util/map_groups.h" +#include "../../util/symbol.h" #include "../../util/sort.h" #include "../../util/evsel.h" #include "../../util/srcline.h" diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 4d84e7754be9..ba7be74fad6e 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -22,6 +22,7 @@ #include "thread_map.h" #include "sane_ctype.h" #include "map.h" +#include "symbol.h" #include "symbol/kallsyms.h" #include "asm/bug.h" #include "stat.h" diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 9e7a8e044a0a..3560ad2e5551 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -11,6 +11,7 @@ #include "evsel.h" #include "annotate.h" #include "srcline.h" +#include "symbol.h" #include "thread.h" #include "ui/progress.h" #include diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c index f7fe8ccf6d65..f99ac0cbe3ff 100644 --- a/tools/perf/util/intel-bts.c +++ b/tools/perf/util/intel-bts.c @@ -28,6 +28,7 @@ #include "evlist.h" #include "machine.h" #include "map.h" +#include "symbol.h" #include "session.h" #include "util.h" #include "thread.h" diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 66f019fdc510..61959aba7e27 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -10,6 +10,7 @@ #include "hist.h" #include "machine.h" #include "map.h" +#include "symbol.h" #include "sort.h" #include "strlist.h" #include "thread.h" diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c index a9f22694eed1..5f06378a482b 100644 --- a/tools/perf/util/scripting-engines/trace-event-perl.c +++ b/tools/perf/util/scripting-engines/trace-event-perl.c @@ -38,6 +38,7 @@ #include "../callchain.h" #include "../machine.h" #include "../map.h" +#include "../symbol.h" #include "../thread.h" #include "../event.h" #include "../trace-event.h" diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index d9c83776a80f..0e17db41b49b 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -45,6 +45,7 @@ #include "../trace-event.h" #include "../call-path.h" #include "map.h" +#include "symbol.h" #include "thread_map.h" #include "cpumap.h" #include "print_binary.h" diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index e9755a649993..2012396abb7c 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -14,6 +14,7 @@ #include "evsel.h" #include "memswap.h" #include "map.h" +#include "symbol.h" #include "session.h" #include "tool.h" #include "sort.h" diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index c83372329f89..4c179fef442d 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -12,6 +12,7 @@ #include "debug.h" #include "namespaces.h" #include "comm.h" +#include "symbol.h" #include "unwind.h" #include diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c index 8ea7c89e73fd..407d0167b942 100644 --- a/tools/perf/util/unwind-libdw.c +++ b/tools/perf/util/unwind-libdw.c @@ -9,6 +9,7 @@ #include "unwind-libdw.h" #include "machine.h" #include "map.h" +#include "symbol.h" #include "thread.h" #include #include "event.h" -- cgit v1.2.3 From 9cd997f85e29c1c2b53e23fa98df7ec44c344d67 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 28 Jan 2019 00:06:16 +0100 Subject: perf evsel: No need to include symbol.h in evsel.h, symbol_conf.h is enough To reduce the header dependency and avoid unnecessary rebuilds when things change in symbol.h. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-6duflwliprh2tr47w5x4t260@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 29c5eb68c44b..cc578e02e08f 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -8,7 +8,7 @@ #include #include #include "xyarray.h" -#include "symbol.h" +#include "symbol_conf.h" #include "cpumap.h" #include "counts.h" -- cgit v1.2.3 From e22c1c751140aeacc5bba9596d7d5a21c5acbd8c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 28 Jan 2019 00:07:40 +0100 Subject: perf thread: Don't include symbol.h, symbol_conf.h is enough Also add stdio.h to get the FILE definition. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-8vx5396phynuxhdsxxfbdhsk@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/thread.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index 856cf1a9040a..8276ffeec556 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -5,14 +5,17 @@ #include #include #include +#include #include #include #include "srccode.h" -#include "symbol.h" +#include "symbol_conf.h" #include #include #include "rwsem.h" +struct addr_location; +struct map; struct namespaces_event; struct thread_stack; struct unwind_libunwind_ops; -- cgit v1.2.3 From b10ba7f1a278ce04d272b2b662f231552ab000ee Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 29 Jan 2019 11:11:04 +0100 Subject: perf tools: Add missing include in various places Its getting it from hist.h and that will go away, as that header doesn't need callchain.h at all. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-6ebl3mwwiqocl79yts44qltu@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/hists.c | 1 + tools/perf/ui/gtk/hists.c | 1 + tools/perf/ui/hist.c | 1 + tools/perf/ui/stdio/hist.c | 1 + tools/perf/util/config.c | 1 + tools/perf/util/hist.c | 1 + 6 files changed, 6 insertions(+) (limited to 'tools/perf/util') diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index ac176b48178c..aef800d97ea1 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -8,6 +8,7 @@ #include #include +#include "../../util/callchain.h" #include "../../util/evsel.h" #include "../../util/evlist.h" #include "../../util/hist.h" diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c index 74414ccd8c84..0c08890f006a 100644 --- a/tools/perf/ui/gtk/hists.c +++ b/tools/perf/ui/gtk/hists.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include "../evlist.h" #include "../cache.h" +#include "../callchain.h" #include "../evsel.h" #include "../sort.h" #include "../hist.h" diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index fe3dfaa64a91..412d6f1626e3 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -3,6 +3,7 @@ #include #include +#include "../util/callchain.h" #include "../util/hist.h" #include "../util/util.h" #include "../util/sort.h" diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c index f25116f70878..a60f2993d390 100644 --- a/tools/perf/ui/stdio/hist.c +++ b/tools/perf/ui/stdio/hist.c @@ -2,6 +2,7 @@ #include #include +#include "../../util/callchain.h" #include "../../util/util.h" #include "../../util/hist.h" #include "../../util/map.h" diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c index 1ea8f898f1a1..fa092511c52b 100644 --- a/tools/perf/util/config.c +++ b/tools/perf/util/config.c @@ -13,6 +13,7 @@ #include #include "util.h" #include "cache.h" +#include "callchain.h" #include #include "util/event.h" /* proc_map_timeout */ #include "util/hist.h" /* perf_hist_config */ diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 3560ad2e5551..7f9df74ab893 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#include "callchain.h" #include "util.h" #include "build-id.h" #include "hist.h" -- cgit v1.2.3 From 71551288d2fde0c24df3c248e3e80be92783376d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 29 Jan 2019 12:04:58 +0100 Subject: perf hist: Remove the needless callchain.h include from hist.h Nothing that is provided by callchain.h is used there, just things that should've be directly included in hist.h, such as rbtree.h and a map_symbol forward declaration. Remove it so that we reduce the headers dependency tree. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-zivvqfx93w5zzur7hr7h0nlh@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/hist.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 38a72eb81427..6b872079ead5 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -2,9 +2,9 @@ #ifndef __PERF_HIST_H #define __PERF_HIST_H +#include #include #include -#include "callchain.h" #include "evsel.h" #include "header.h" #include "color.h" @@ -13,6 +13,7 @@ struct hist_entry; struct hist_entry_ops; struct addr_location; +struct map_symbol; struct mem_info; struct branch_info; struct symbol; -- cgit v1.2.3 From eb563d6604b0f21396e21ac73619c9a5e8ec5aee Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 29 Jan 2019 12:24:52 +0100 Subject: perf pmu: Remove needless evsel.h include, only needs one fwd decl To reduce the header dependency tree. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-rc389o1z0htwukqv6ni1viun@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/pmu.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 76fecec7b3f9..ffed23fb920a 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -6,9 +6,10 @@ #include #include #include -#include "evsel.h" #include "parse-events.h" +struct perf_evsel_config_term; + enum { PERF_PMU_FORMAT_VALUE_CONFIG, PERF_PMU_FORMAT_VALUE_CONFIG1, -- cgit v1.2.3 From 5691903a6f5223b162c0c56a03b04d46bf653ae4 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 29 Jan 2019 12:42:55 +0100 Subject: perf kvm stat: Replace kvm-stat.h includes with forward declarations To reduce the include header dependency tree and speed up perf builds. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-dngwaxuhfnhksawgdpo6e74n@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/powerpc/util/kvm-stat.c | 1 + tools/perf/arch/s390/util/kvm-stat.c | 1 + tools/perf/arch/x86/util/kvm-stat.c | 1 + tools/perf/util/kvm-stat.h | 7 ++++--- 4 files changed, 7 insertions(+), 3 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/arch/powerpc/util/kvm-stat.c b/tools/perf/arch/powerpc/util/kvm-stat.c index 596ad6aedaac..86e64c47d8ea 100644 --- a/tools/perf/arch/powerpc/util/kvm-stat.c +++ b/tools/perf/arch/powerpc/util/kvm-stat.c @@ -3,6 +3,7 @@ #include "util/kvm-stat.h" #include "util/parse-events.h" #include "util/debug.h" +#include "util/evsel.h" #include "book3s_hv_exits.h" #include "book3s_hcalls.h" diff --git a/tools/perf/arch/s390/util/kvm-stat.c b/tools/perf/arch/s390/util/kvm-stat.c index aaabab5e2830..7e3961a4b292 100644 --- a/tools/perf/arch/s390/util/kvm-stat.c +++ b/tools/perf/arch/s390/util/kvm-stat.c @@ -11,6 +11,7 @@ #include #include "../../util/kvm-stat.h" +#include "../../util/evsel.h" #include define_exit_reasons_table(sie_exit_reasons, sie_intercept_code); diff --git a/tools/perf/arch/x86/util/kvm-stat.c b/tools/perf/arch/x86/util/kvm-stat.c index 081353d7b095..865a9762f22e 100644 --- a/tools/perf/arch/x86/util/kvm-stat.c +++ b/tools/perf/arch/x86/util/kvm-stat.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include #include "../../util/kvm-stat.h" +#include "../../util/evsel.h" #include #include #include diff --git a/tools/perf/util/kvm-stat.h b/tools/perf/util/kvm-stat.h index 7b1f06567521..1403dec189b4 100644 --- a/tools/perf/util/kvm-stat.h +++ b/tools/perf/util/kvm-stat.h @@ -3,12 +3,13 @@ #define __PERF_KVM_STAT_H #include "../perf.h" -#include "evsel.h" -#include "evlist.h" -#include "session.h" #include "tool.h" #include "stat.h" +struct perf_evsel; +struct perf_evlist; +struct perf_session; + struct event_key { #define INVALID_KEY (~0ULL) u64 key; -- cgit v1.2.3 From ebc52aee6130653b934a86637108ac2a7c95c74c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 29 Jan 2019 13:06:18 +0100 Subject: perf bpf-loader: Remove unecessary includes from bpf-loader.h To cut the header dep tree, to get unecessary object rebuilds to be reduced when a change happens in headers. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-ph72xhl9moqa0g1hxcyudwfn@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/bpf-loader.c | 1 + tools/perf/util/bpf-loader.h | 7 +++---- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c index 2f3eb6d293ee..6e68698aa0ad 100644 --- a/tools/perf/util/bpf-loader.c +++ b/tools/perf/util/bpf-loader.c @@ -15,6 +15,7 @@ #include #include "perf.h" #include "debug.h" +#include "evlist.h" #include "bpf-loader.h" #include "bpf-prologue.h" #include "probe-event.h" diff --git a/tools/perf/util/bpf-loader.h b/tools/perf/util/bpf-loader.h index 62d245a90e1d..3f46856e3330 100644 --- a/tools/perf/util/bpf-loader.h +++ b/tools/perf/util/bpf-loader.h @@ -8,11 +8,7 @@ #include #include -#include #include -#include "probe-event.h" -#include "evlist.h" -#include "debug.h" enum bpf_loader_errno { __BPF_LOADER_ERRNO__START = __LIBBPF_ERRNO__START - 100, @@ -44,6 +40,7 @@ enum bpf_loader_errno { }; struct perf_evsel; +struct perf_evlist; struct bpf_object; struct parse_events_term; #define PERF_BPF_PROBE_GROUP "perf_bpf_probe" @@ -87,6 +84,8 @@ struct perf_evsel *bpf__setup_output_event(struct perf_evlist *evlist, const cha int bpf__strerror_setup_output_event(struct perf_evlist *evlist, int err, char *buf, size_t size); #else #include +#include +#include "debug.h" static inline struct bpf_object * bpf__prepare_load(const char *filename __maybe_unused, -- cgit v1.2.3 From ffe8881eb20b0452e47efe151e870b4fae8bd5d2 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Thu, 31 Jan 2019 11:47:11 -0700 Subject: perf pmu: Move EVENT_SOURCE_DEVICE_PATH to PMU header file Move definition of EVENT_SOURCE_DEVICE_PATH to pmu.h so that it can be used by other files than pmu.c Signed-off-by: Mathieu Poirier Acked-by: Suzuki K Poulouse Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexei Starovoitov Cc: Greg Kroah-Hartman Cc: H. Peter Anvin Cc: Heiko Carstens Cc: Jiri Olsa Cc: Mark Rutland Cc: Martin Schwidefsky Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linux-s390@vger.kernel.org Link: http://lkml.kernel.org/r/20190131184714.20388-5-mathieu.poirier@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/pmu.c | 2 -- tools/perf/util/pmu.h | 1 + 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 11a234740632..51d437f55d18 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -29,8 +29,6 @@ struct perf_pmu_format { struct list_head list; }; -#define EVENT_SOURCE_DEVICE_PATH "/bus/event_source/devices/" - int perf_pmu_parse(struct list_head *list, char *name); extern FILE *perf_pmu_in; diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index ffed23fb920a..1c12a61ed0dc 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -17,6 +17,7 @@ enum { }; #define PERF_PMU_FORMAT_BITS 64 +#define EVENT_SOURCE_DEVICE_PATH "/bus/event_source/devices/" struct perf_event_attr; -- cgit v1.2.3 From 159b0da50adb021fe452d849fb73b408f21de3f8 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Thu, 31 Jan 2019 11:47:14 -0700 Subject: perf pmu: Remove set_drv_config API CoreSight was the only client of the PMU's set_drv_config() API. Now that it is no longer needed by CoreSight remove it from the code base. Signed-off-by: Mathieu Poirier Acked-by: Suzuki K Poulouse Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexei Starovoitov Cc: Greg Kroah-Hartman Cc: H. Peter Anvin Cc: Heiko Carstens Cc: Jiri Olsa Cc: Mark Rutland Cc: Martin Schwidefsky Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linux-s390@vger.kernel.org Link: http://lkml.kernel.org/r/20190131184714.20388-8-mathieu.poirier@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 10 ------ tools/perf/builtin-stat.c | 9 ----- tools/perf/builtin-top.c | 13 -------- tools/perf/util/Build | 1 - tools/perf/util/drv_configs.c | 78 ------------------------------------------- tools/perf/util/drv_configs.h | 26 --------------- tools/perf/util/pmu.h | 1 - 7 files changed, 138 deletions(-) delete mode 100644 tools/perf/util/drv_configs.c delete mode 100644 tools/perf/util/drv_configs.h (limited to 'tools/perf/util') diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 88ea11d57c6f..56e9d9e8c174 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -23,7 +23,6 @@ #include "util/evlist.h" #include "util/evsel.h" #include "util/debug.h" -#include "util/drv_configs.h" #include "util/session.h" #include "util/tool.h" #include "util/symbol.h" @@ -567,7 +566,6 @@ static int record__open(struct record *rec) struct perf_evlist *evlist = rec->evlist; struct perf_session *session = rec->session; struct record_opts *opts = &rec->opts; - struct perf_evsel_config_term *err_term; int rc = 0; /* @@ -620,14 +618,6 @@ try_again: goto out; } - if (perf_evlist__apply_drv_configs(evlist, &pos, &err_term)) { - pr_err("failed to set config \"%s\" on event %s with %d (%s)\n", - err_term->val.drv_cfg, perf_evsel__name(pos), errno, - str_error_r(errno, msg, sizeof(msg))); - rc = -1; - goto out; - } - rc = record__mmap(rec); if (rc) goto out; diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index e587808591e8..bb24f9c17f9a 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -52,7 +52,6 @@ #include "util/evlist.h" #include "util/evsel.h" #include "util/debug.h" -#include "util/drv_configs.h" #include "util/color.h" #include "util/stat.h" #include "util/header.h" @@ -417,7 +416,6 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx) int status = 0; const bool forks = (argc > 0); bool is_pipe = STAT_RECORD ? perf_stat.data.is_pipe : false; - struct perf_evsel_config_term *err_term; if (interval) { ts.tv_sec = interval / USEC_PER_MSEC; @@ -514,13 +512,6 @@ try_again: return -1; } - if (perf_evlist__apply_drv_configs(evsel_list, &counter, &err_term)) { - pr_err("failed to set config \"%s\" on event %s with %d (%s)\n", - err_term->val.drv_cfg, perf_evsel__name(counter), errno, - str_error_r(errno, msg, sizeof(msg))); - return -1; - } - if (STAT_RECORD) { int err, fd = perf_data__fd(&perf_stat.data); diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 619406339e4b..231a90daa958 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -25,7 +25,6 @@ #include "util/bpf-event.h" #include "util/config.h" #include "util/color.h" -#include "util/drv_configs.h" #include "util/evlist.h" #include "util/evsel.h" #include "util/event.h" @@ -1186,10 +1185,6 @@ static void init_process_thread(struct perf_top *top) static int __cmd_top(struct perf_top *top) { - char msg[512]; - struct perf_evsel *pos; - struct perf_evsel_config_term *err_term; - struct perf_evlist *evlist = top->evlist; struct record_opts *opts = &top->record_opts; pthread_t thread, thread_process; int ret; @@ -1240,14 +1235,6 @@ static int __cmd_top(struct perf_top *top) if (ret) goto out_delete; - ret = perf_evlist__apply_drv_configs(evlist, &pos, &err_term); - if (ret) { - pr_err("failed to set config \"%s\" on event %s with %d (%s)\n", - err_term->val.drv_cfg, perf_evsel__name(pos), errno, - str_error_r(errno, msg, sizeof(msg))); - goto out_delete; - } - top->session->evlist = top->evlist; perf_session__set_id_hdr_size(top->session); diff --git a/tools/perf/util/Build b/tools/perf/util/Build index b69d6294c88c..a36e6e5a6f4f 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -107,7 +107,6 @@ libperf-y += term.o libperf-y += help-unknown-cmd.o libperf-y += mem-events.o libperf-y += vsprintf.o -libperf-y += drv_configs.o libperf-y += units.o libperf-y += time-utils.o libperf-y += expr-bison.o diff --git a/tools/perf/util/drv_configs.c b/tools/perf/util/drv_configs.c deleted file mode 100644 index eec754243f4d..000000000000 --- a/tools/perf/util/drv_configs.c +++ /dev/null @@ -1,78 +0,0 @@ -/* - * drv_configs.h: Interface to apply PMU specific configuration - * Copyright (c) 2016-2018, Linaro Ltd. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - */ - -#include "drv_configs.h" -#include "evlist.h" -#include "evsel.h" -#include "pmu.h" -#include - -static int -perf_evsel__apply_drv_configs(struct perf_evsel *evsel, - struct perf_evsel_config_term **err_term) -{ - bool found = false; - int err = 0; - struct perf_evsel_config_term *term; - struct perf_pmu *pmu = NULL; - - while ((pmu = perf_pmu__scan(pmu)) != NULL) - if (pmu->type == evsel->attr.type) { - found = true; - break; - } - - list_for_each_entry(term, &evsel->config_terms, list) { - if (term->type != PERF_EVSEL__CONFIG_TERM_DRV_CFG) - continue; - - /* - * We have a configuration term, report an error if we - * can't find the PMU or if the PMU driver doesn't support - * cmd line driver configuration. - */ - if (!found || !pmu->set_drv_config) { - err = -EINVAL; - *err_term = term; - break; - } - - err = pmu->set_drv_config(term); - if (err) { - *err_term = term; - break; - } - } - - return err; -} - -int perf_evlist__apply_drv_configs(struct perf_evlist *evlist, - struct perf_evsel **err_evsel, - struct perf_evsel_config_term **err_term) -{ - struct perf_evsel *evsel; - int err = 0; - - evlist__for_each_entry(evlist, evsel) { - err = perf_evsel__apply_drv_configs(evsel, err_term); - if (err) { - *err_evsel = evsel; - break; - } - } - - return err; -} diff --git a/tools/perf/util/drv_configs.h b/tools/perf/util/drv_configs.h deleted file mode 100644 index 32bc9babc2e0..000000000000 --- a/tools/perf/util/drv_configs.h +++ /dev/null @@ -1,26 +0,0 @@ -/* - * drv_configs.h: Interface to apply PMU specific configuration - * Copyright (c) 2016-2018, Linaro Ltd. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - */ - -#ifndef __PERF_DRV_CONFIGS_H -#define __PERF_DRV_CONFIGS_H - -#include "drv_configs.h" -#include "evlist.h" -#include "evsel.h" - -int perf_evlist__apply_drv_configs(struct perf_evlist *evlist, - struct perf_evsel **err_evsel, - struct perf_evsel_config_term **term); -#endif diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 1c12a61ed0dc..47253c3daf55 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -31,7 +31,6 @@ struct perf_pmu { struct list_head format; /* HEAD struct perf_pmu_format -> list */ struct list_head aliases; /* HEAD struct perf_pmu_alias -> list */ struct list_head list; /* ELEM */ - int (*set_drv_config) (struct perf_evsel_config_term *term); }; struct perf_pmu_info { -- cgit v1.2.3 From 9d2ed64587c045304efe8872b0258c30803d370c Mon Sep 17 00:00:00 2001 From: Alexey Budankov Date: Tue, 22 Jan 2019 20:47:43 +0300 Subject: perf record: Allocate affinity masks Allocate affinity option and masks for mmap data buffers and record thread as well as initialize allocated objects. Signed-off-by: Alexey Budankov Reviewed-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/526fa2b0-07de-6dbd-a7e9-26ba875593c9@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 13 ++++++++++++- tools/perf/perf.h | 8 ++++++++ tools/perf/util/evlist.c | 6 +++--- tools/perf/util/evlist.h | 2 +- tools/perf/util/mmap.c | 2 ++ tools/perf/util/mmap.h | 3 ++- 6 files changed, 28 insertions(+), 6 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 56e9d9e8c174..7ced5f3e8100 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -81,12 +81,17 @@ struct record { bool timestamp_boundary; struct switch_output switch_output; unsigned long long samples; + cpu_set_t affinity_mask; }; static volatile int auxtrace_record__snapshot_started; static DEFINE_TRIGGER(auxtrace_snapshot_trigger); static DEFINE_TRIGGER(switch_output_trigger); +static const char *affinity_tags[PERF_AFFINITY_MAX] = { + "SYS", "NODE", "CPU" +}; + static bool switch_output_signal(struct record *rec) { return rec->switch_output.signal && @@ -533,7 +538,8 @@ static int record__mmap_evlist(struct record *rec, if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, opts->auxtrace_mmap_pages, - opts->auxtrace_snapshot_mode, opts->nr_cblocks) < 0) { + opts->auxtrace_snapshot_mode, + opts->nr_cblocks, opts->affinity) < 0) { if (errno == EPERM) { pr_err("Permission error mapping pages.\n" "Consider increasing " @@ -1977,6 +1983,9 @@ int cmd_record(int argc, const char **argv) # undef REASON #endif + CPU_ZERO(&rec->affinity_mask); + rec->opts.affinity = PERF_AFFINITY_SYS; + rec->evlist = perf_evlist__new(); if (rec->evlist == NULL) return -ENOMEM; @@ -2140,6 +2149,8 @@ int cmd_record(int argc, const char **argv) if (verbose > 0) pr_info("nr_cblocks: %d\n", rec->opts.nr_cblocks); + pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]); + err = __cmd_record(&record, argc, argv); out: perf_evlist__delete(rec->evlist); diff --git a/tools/perf/perf.h b/tools/perf/perf.h index 5941fb6eccfc..b120e547ddc7 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -84,6 +84,14 @@ struct record_opts { clockid_t clockid; u64 clockid_res_ns; int nr_cblocks; + int affinity; +}; + +enum perf_affinity { + PERF_AFFINITY_SYS = 0, + PERF_AFFINITY_NODE, + PERF_AFFINITY_CPU, + PERF_AFFINITY_MAX }; struct option; diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 8c902276d4b4..08cedb643ea6 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1022,7 +1022,7 @@ int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str, */ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, unsigned int auxtrace_pages, - bool auxtrace_overwrite, int nr_cblocks) + bool auxtrace_overwrite, int nr_cblocks, int affinity) { struct perf_evsel *evsel; const struct cpu_map *cpus = evlist->cpus; @@ -1032,7 +1032,7 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, * Its value is decided by evsel's write_backward. * So &mp should not be passed through const pointer. */ - struct mmap_params mp = { .nr_cblocks = nr_cblocks }; + struct mmap_params mp = { .nr_cblocks = nr_cblocks, .affinity = affinity }; if (!evlist->mmap) evlist->mmap = perf_evlist__alloc_mmap(evlist, false); @@ -1064,7 +1064,7 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages) { - return perf_evlist__mmap_ex(evlist, pages, 0, false, 0); + return perf_evlist__mmap_ex(evlist, pages, 0, false, 0, PERF_AFFINITY_SYS); } int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target) diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 00ab43c6dd15..744906dd4887 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -165,7 +165,7 @@ unsigned long perf_event_mlock_kb_in_pages(void); int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, unsigned int auxtrace_pages, - bool auxtrace_overwrite, int nr_cblocks); + bool auxtrace_overwrite, int nr_cblocks, int affinity); int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages); void perf_evlist__munmap(struct perf_evlist *evlist); diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index 8fc39311a30d..e68ba754a8e2 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -343,6 +343,8 @@ int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd, int c map->fd = fd; map->cpu = cpu; + CPU_ZERO(&map->affinity_mask); + if (auxtrace_mmap__mmap(&map->auxtrace_mmap, &mp->auxtrace_mp, map->base, fd)) return -1; diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h index aeb6942fdb00..e566c19b242b 100644 --- a/tools/perf/util/mmap.h +++ b/tools/perf/util/mmap.h @@ -38,6 +38,7 @@ struct perf_mmap { int nr_cblocks; } aio; #endif + cpu_set_t affinity_mask; }; /* @@ -69,7 +70,7 @@ enum bkw_mmap_state { }; struct mmap_params { - int prot, mask, nr_cblocks; + int prot, mask, nr_cblocks, affinity; struct auxtrace_mmap_params auxtrace_mp; }; -- cgit v1.2.3 From c44a8b44ca9f156a5395597109987d1c462ba655 Mon Sep 17 00:00:00 2001 From: Alexey Budankov Date: Tue, 22 Jan 2019 20:48:54 +0300 Subject: perf record: Bind the AIO user space buffers to nodes Allocate and bind AIO user space buffers to the memory nodes that mmap kernel buffers are bound to. Signed-off-by: Alexey Budankov Reviewed-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/5a5adebc-afe0-4806-81cd-180d49ec043f@linux.intel.com [ Do not use 'index' as a variable name, it is a define in older glibcs ] Link: http://lkml.kernel.org/r/20190205151526.GC10613@kernel.org [ Add -lnuma to the python build when -DHAVE_LIBNUMA_SUPPORT is present, fixing 'perf test python' ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/mmap.c | 77 +++++++++++++++++++++++++++++++++++++++++++++--- tools/perf/util/setup.py | 5 ++++ 2 files changed, 78 insertions(+), 4 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index e68ba754a8e2..d882f43148c3 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -10,6 +10,9 @@ #include #include #include +#ifdef HAVE_LIBNUMA_SUPPORT +#include +#endif #include "debug.h" #include "event.h" #include "mmap.h" @@ -154,9 +157,72 @@ void __weak auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp __mayb } #ifdef HAVE_AIO_SUPPORT + +#ifdef HAVE_LIBNUMA_SUPPORT +static int perf_mmap__aio_alloc(struct perf_mmap *map, int idx) +{ + map->aio.data[idx] = mmap(NULL, perf_mmap__mmap_len(map), PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANONYMOUS, 0, 0); + if (map->aio.data[idx] == MAP_FAILED) { + map->aio.data[idx] = NULL; + return -1; + } + + return 0; +} + +static void perf_mmap__aio_free(struct perf_mmap *map, int idx) +{ + if (map->aio.data[idx]) { + munmap(map->aio.data[idx], perf_mmap__mmap_len(map)); + map->aio.data[idx] = NULL; + } +} + +static int perf_mmap__aio_bind(struct perf_mmap *map, int idx, int cpu, int affinity) +{ + void *data; + size_t mmap_len; + unsigned long node_mask; + + if (affinity != PERF_AFFINITY_SYS && cpu__max_node() > 1) { + data = map->aio.data[idx]; + mmap_len = perf_mmap__mmap_len(map); + node_mask = 1UL << cpu__get_node(cpu); + if (mbind(data, mmap_len, MPOL_BIND, &node_mask, 1, 0)) { + pr_err("Failed to bind [%p-%p] AIO buffer to node %d: error %m\n", + data, data + mmap_len, cpu__get_node(cpu)); + return -1; + } + } + + return 0; +} +#else +static int perf_mmap__aio_alloc(struct perf_mmap *map, int idx) +{ + map->aio.data[idx] = malloc(perf_mmap__mmap_len(map)); + if (map->aio.data[idx] == NULL) + return -1; + + return 0; +} + +static void perf_mmap__aio_free(struct perf_mmap *map, int idx) +{ + zfree(&(map->aio.data[idx])); +} + +static int perf_mmap__aio_bind(struct perf_mmap *map __maybe_unused, int idx __maybe_unused, + int cpu __maybe_unused, int affinity __maybe_unused) +{ + return 0; +} +#endif + static int perf_mmap__aio_mmap(struct perf_mmap *map, struct mmap_params *mp) { - int delta_max, i, prio; + int delta_max, i, prio, ret; map->aio.nr_cblocks = mp->nr_cblocks; if (map->aio.nr_cblocks) { @@ -177,11 +243,14 @@ static int perf_mmap__aio_mmap(struct perf_mmap *map, struct mmap_params *mp) } delta_max = sysconf(_SC_AIO_PRIO_DELTA_MAX); for (i = 0; i < map->aio.nr_cblocks; ++i) { - map->aio.data[i] = malloc(perf_mmap__mmap_len(map)); - if (!map->aio.data[i]) { + ret = perf_mmap__aio_alloc(map, i); + if (ret == -1) { pr_debug2("failed to allocate data buffer area, error %m"); return -1; } + ret = perf_mmap__aio_bind(map, i, map->cpu, mp->affinity); + if (ret == -1) + return -1; /* * Use cblock.aio_fildes value different from -1 * to denote started aio write operation on the @@ -210,7 +279,7 @@ static void perf_mmap__aio_munmap(struct perf_mmap *map) int i; for (i = 0; i < map->aio.nr_cblocks; ++i) - zfree(&map->aio.data[i]); + perf_mmap__aio_free(map, i); if (map->aio.data) zfree(&map->aio.data); zfree(&map->aio.cblocks); diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py index d3ffc18424b5..5b5a167b43ce 100644 --- a/tools/perf/util/setup.py +++ b/tools/perf/util/setup.py @@ -53,9 +53,14 @@ ext_sources = [f.strip() for f in open('util/python-ext-sources') # use full paths with source files ext_sources = list(map(lambda x: '%s/%s' % (src_perf, x) , ext_sources)) +extra_libraries = [] +if '-DHAVE_LIBNUMA_SUPPORT' in cflags: + extra_libraries = [ 'numa' ] + perf = Extension('perf', sources = ext_sources, include_dirs = ['util/include'], + libraries = extra_libraries, extra_compile_args = cflags, extra_objects = [libtraceevent, libapikfs], ) -- cgit v1.2.3 From de20e3200c962213cdc67c0933f0ddaeb9285e62 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 5 Feb 2019 15:02:56 -0300 Subject: perf tools: Add fallback versions for CPU_{OR,EQUAL}() From the glibc sources, so that we can keep the tooling buildable in older systems while using recent sched.h CPU_ macros. Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexey Budankov Cc: Andi Kleen Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lkml.kernel.org/n/tip-hvm9ysmrjip75ebdzhzoh429@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cpu-set-sched.h | 50 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 tools/perf/util/cpu-set-sched.h (limited to 'tools/perf/util') diff --git a/tools/perf/util/cpu-set-sched.h b/tools/perf/util/cpu-set-sched.h new file mode 100644 index 000000000000..8cf4e40d322a --- /dev/null +++ b/tools/perf/util/cpu-set-sched.h @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: LGPL-2.1 +// Definitions taken from glibc for use with older systems, same licensing. +#ifndef _CPU_SET_SCHED_PERF_H +#define _CPU_SET_SCHED_PERF_H + +#include +#include + +#ifndef CPU_EQUAL +#ifndef __CPU_EQUAL_S +#if __GNUC_PREREQ (2, 91) +# define __CPU_EQUAL_S(setsize, cpusetp1, cpusetp2) \ + (__builtin_memcmp (cpusetp1, cpusetp2, setsize) == 0) +#else +# define __CPU_EQUAL_S(setsize, cpusetp1, cpusetp2) \ + (__extension__ \ + ({ const __cpu_mask *__arr1 = (cpusetp1)->__bits; \ + const __cpu_mask *__arr2 = (cpusetp2)->__bits; \ + size_t __imax = (setsize) / sizeof (__cpu_mask); \ + size_t __i; \ + for (__i = 0; __i < __imax; ++__i) \ + if (__arr1[__i] != __arr2[__i]) \ + break; \ + __i == __imax; })) +#endif +#endif // __CPU_EQUAL_S + +#define CPU_EQUAL(cpusetp1, cpusetp2) \ + __CPU_EQUAL_S (sizeof (cpu_set_t), cpusetp1, cpusetp2) +#endif // CPU_EQUAL + +#ifndef CPU_OR +#ifndef __CPU_OP_S +#define __CPU_OP_S(setsize, destset, srcset1, srcset2, op) \ + (__extension__ \ + ({ cpu_set_t *__dest = (destset); \ + const __cpu_mask *__arr1 = (srcset1)->__bits; \ + const __cpu_mask *__arr2 = (srcset2)->__bits; \ + size_t __imax = (setsize) / sizeof (__cpu_mask); \ + size_t __i; \ + for (__i = 0; __i < __imax; ++__i) \ + ((__cpu_mask *) __dest->__bits)[__i] = __arr1[__i] op __arr2[__i]; \ + __dest; })) +#endif // __CPU_OP_S + +#define CPU_OR(destset, srcset1, srcset2) \ + __CPU_OP_S (sizeof (cpu_set_t), destset, srcset1, srcset2, |) +#endif // CPU_OR + +#endif // _CPU_SET_SCHED_PERF_H -- cgit v1.2.3 From f13de6609a9a25ce4d6dc37c4427f5bc90072fb0 Mon Sep 17 00:00:00 2001 From: Alexey Budankov Date: Tue, 22 Jan 2019 20:50:57 +0300 Subject: perf record: Apply affinity masks when reading mmap buffers Build node cpu masks for mmap data buffers. Apply node cpu masks to tool thread every time it references data buffers cross node or cross cpu. Signed-off-by: Alexey Budankov Reviewed-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/b25e4ebc-078d-2c7b-216c-f0bed108d073@linux.intel.com [ Use cpu-set-sched.h to get the CPU_{EQUAL,OR}() fallbacks for older systems ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 15 +++++++++++++++ tools/perf/util/cpumap.c | 10 ++++++++++ tools/perf/util/cpumap.h | 1 + tools/perf/util/mmap.c | 28 +++++++++++++++++++++++++++- 4 files changed, 53 insertions(+), 1 deletion(-) (limited to 'tools/perf/util') diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 7ced5f3e8100..3fdfbaebd95e 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -38,6 +38,7 @@ #include "util/bpf-loader.h" #include "util/trigger.h" #include "util/perf-hooks.h" +#include "util/cpu-set-sched.h" #include "util/time-utils.h" #include "util/units.h" #include "util/bpf-event.h" @@ -536,6 +537,9 @@ static int record__mmap_evlist(struct record *rec, struct record_opts *opts = &rec->opts; char msg[512]; + if (opts->affinity != PERF_AFFINITY_SYS) + cpu__setup_cpunode_map(); + if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, opts->auxtrace_mmap_pages, opts->auxtrace_snapshot_mode, @@ -719,6 +723,16 @@ static struct perf_event_header finished_round_event = { .type = PERF_RECORD_FINISHED_ROUND, }; +static void record__adjust_affinity(struct record *rec, struct perf_mmap *map) +{ + if (rec->opts.affinity != PERF_AFFINITY_SYS && + !CPU_EQUAL(&rec->affinity_mask, &map->affinity_mask)) { + CPU_ZERO(&rec->affinity_mask); + CPU_OR(&rec->affinity_mask, &rec->affinity_mask, &map->affinity_mask); + sched_setaffinity(0, sizeof(rec->affinity_mask), &rec->affinity_mask); + } +} + static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist, bool overwrite) { @@ -746,6 +760,7 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli struct perf_mmap *map = &maps[i]; if (map->base) { + record__adjust_affinity(rec, map); if (!record__aio_enabled(rec)) { if (perf_mmap__push(map, rec, record__pushfn) != 0) { rc = -1; diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 383674f448fc..0bbc3feb0894 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -730,3 +730,13 @@ size_t cpu_map__snprint_mask(struct cpu_map *map, char *buf, size_t size) buf[size - 1] = '\0'; return ptr - buf; } + +const struct cpu_map *cpu_map__online(void) /* thread unsafe */ +{ + static const struct cpu_map *online = NULL; + + if (!online) + online = cpu_map__new(NULL); /* from /sys/devices/system/cpu/online */ + + return online; +} diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index ed8999d1a640..f00ce624b9f7 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -29,6 +29,7 @@ int cpu_map__get_core_id(int cpu); int cpu_map__get_core(struct cpu_map *map, int idx, void *data); int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp); int cpu_map__build_core_map(struct cpu_map *cpus, struct cpu_map **corep); +const struct cpu_map *cpu_map__online(void); /* thread unsafe */ struct cpu_map *cpu_map__get(struct cpu_map *map); void cpu_map__put(struct cpu_map *map); diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index d882f43148c3..cdc7740fc181 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -383,6 +383,32 @@ void perf_mmap__munmap(struct perf_mmap *map) auxtrace_mmap__munmap(&map->auxtrace_mmap); } +static void build_node_mask(int node, cpu_set_t *mask) +{ + int c, cpu, nr_cpus; + const struct cpu_map *cpu_map = NULL; + + cpu_map = cpu_map__online(); + if (!cpu_map) + return; + + nr_cpus = cpu_map__nr(cpu_map); + for (c = 0; c < nr_cpus; c++) { + cpu = cpu_map->map[c]; /* map c index to online cpu index */ + if (cpu__get_node(cpu) == node) + CPU_SET(cpu, mask); + } +} + +static void perf_mmap__setup_affinity_mask(struct perf_mmap *map, struct mmap_params *mp) +{ + CPU_ZERO(&map->affinity_mask); + if (mp->affinity == PERF_AFFINITY_NODE && cpu__max_node() > 1) + build_node_mask(cpu__get_node(map->cpu), &map->affinity_mask); + else if (mp->affinity == PERF_AFFINITY_CPU) + CPU_SET(map->cpu, &map->affinity_mask); +} + int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd, int cpu) { /* @@ -412,7 +438,7 @@ int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd, int c map->fd = fd; map->cpu = cpu; - CPU_ZERO(&map->affinity_mask); + perf_mmap__setup_affinity_mask(map, mp); if (auxtrace_mmap__mmap(&map->auxtrace_mmap, &mp->auxtrace_mp, map->base, fd)) -- cgit v1.2.3 From 5f40fa97669bb3d97a43cd5c8f69f520d8dcb106 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 4 Feb 2019 11:04:20 -0300 Subject: perf clang: Do not use 'return std::move(something)' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It prevents copy elision, generating this warning when building with fedora:rawhide's clang: clang version 7.0.1 (Fedora 7.0.1-2.fc30) Target: x86_64-unknown-linux-gnu Thread model: posix InstalledDir: /usr/bin Found candidate GCC installation: /usr/bin/../lib/gcc/x86_64-redhat-linux/9 Found candidate GCC installation: /usr/lib/gcc/x86_64-redhat-linux/9 Selected GCC installation: /usr/bin/../lib/gcc/x86_64-redhat-linux/9 Candidate multilib: .;@m64 Candidate multilib: 32;@m32 Selected multilib: .;@m64 $ make -C tools/perf CC=clang LIBCLANGLLVM=1 util/c++/clang.cpp: In function 'std::unique_ptr > perf::getBPFObjectFromModule(llvm::Module*)': util/c++/clang.cpp:163:18: error: moving a local object in a return statement prevents copy elision [-Werror=pessimizing-move] 163 | return std::move(Buffer); | ~~~~~~~~~^~~~~~~~ util/c++/clang.cpp:163:18: note: remove 'std::move' call cc1plus: all warnings being treated as errors References: http://www.cplusplus.com/forum/general/186411/#msg908572 https://en.cppreference.com/w/cpp/language/return#Notes https://en.cppreference.com/w/cpp/language/copy_elision Cc: Adrian Hunter Cc: Jiri Olsa Cc: Luis Cláudio Gonçalves Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-lehqf5x5q96l0o8myhb6blz6@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/c++/clang.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/c++/clang.cpp b/tools/perf/util/c++/clang.cpp index 89512504551b..39c0004f2886 100644 --- a/tools/perf/util/c++/clang.cpp +++ b/tools/perf/util/c++/clang.cpp @@ -160,7 +160,7 @@ getBPFObjectFromModule(llvm::Module *Module) } PM.run(*Module); - return std::move(Buffer); + return Buffer; } } -- cgit v1.2.3 From e4c38fd4a0f594d8ce0313db028af98b36edc8b3 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 4 Feb 2019 15:18:06 +0100 Subject: perf hists: Add argument to hists__resort_cb_t callback Add argument to hists__resort_cb_t so that we can pass data from upper layers to the callback function. It will be used in the following patches. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Jin Yao Cc: Kan Liang Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20190204141808.23031-2-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 8 ++++---- tools/perf/util/hist.c | 11 ++++++----- tools/perf/util/hist.h | 2 +- 3 files changed, 11 insertions(+), 10 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index b2bf117881f1..efaaab23c6fd 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -1970,7 +1970,7 @@ static void calc_width(struct c2c_hist_entry *c2c_he) set_nodestr(c2c_he); } -static int filter_cb(struct hist_entry *he) +static int filter_cb(struct hist_entry *he, void *arg __maybe_unused) { struct c2c_hist_entry *c2c_he; @@ -1987,7 +1987,7 @@ static int filter_cb(struct hist_entry *he) return 0; } -static int resort_cl_cb(struct hist_entry *he) +static int resort_cl_cb(struct hist_entry *he, void *arg __maybe_unused) { struct c2c_hist_entry *c2c_he; struct c2c_hists *c2c_hists; @@ -2074,7 +2074,7 @@ static int setup_nodes(struct perf_session *session) #define HAS_HITMS(__h) ((__h)->stats.lcl_hitm || (__h)->stats.rmt_hitm) -static int resort_hitm_cb(struct hist_entry *he) +static int resort_hitm_cb(struct hist_entry *he, void *arg __maybe_unused) { struct c2c_hist_entry *c2c_he; c2c_he = container_of(he, struct c2c_hist_entry, he); @@ -2096,7 +2096,7 @@ static int hists__iterate_cb(struct hists *hists, hists__resort_cb_t cb) struct hist_entry *he; he = rb_entry(next, struct hist_entry, rb_node); - ret = cb(he); + ret = cb(he, NULL); if (ret) break; next = rb_next(&he->rb_node); diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 7f9df74ab893..64ad603a0592 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -1721,7 +1721,8 @@ static void __hists__insert_output_entry(struct rb_root_cached *entries, } static void output_resort(struct hists *hists, struct ui_progress *prog, - bool use_callchain, hists__resort_cb_t cb) + bool use_callchain, hists__resort_cb_t cb, + void *cb_arg) { struct rb_root_cached *root; struct rb_node *next; @@ -1760,7 +1761,7 @@ static void output_resort(struct hists *hists, struct ui_progress *prog, n = rb_entry(next, struct hist_entry, rb_node_in); next = rb_next(&n->rb_node_in); - if (cb && cb(n)) + if (cb && cb(n, cb_arg)) continue; __hists__insert_output_entry(&hists->entries, n, min_callchain_hits, use_callchain); @@ -1785,18 +1786,18 @@ void perf_evsel__output_resort(struct perf_evsel *evsel, struct ui_progress *pro use_callchain |= symbol_conf.show_branchflag_count; - output_resort(evsel__hists(evsel), prog, use_callchain, NULL); + output_resort(evsel__hists(evsel), prog, use_callchain, NULL, NULL); } void hists__output_resort(struct hists *hists, struct ui_progress *prog) { - output_resort(hists, prog, symbol_conf.use_callchain, NULL); + output_resort(hists, prog, symbol_conf.use_callchain, NULL, NULL); } void hists__output_resort_cb(struct hists *hists, struct ui_progress *prog, hists__resort_cb_t cb) { - output_resort(hists, prog, symbol_conf.use_callchain, cb); + output_resort(hists, prog, symbol_conf.use_callchain, cb, NULL); } static bool can_goto_child(struct hist_entry *he, enum hierarchy_move_dir hmd) diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 6b872079ead5..69a367d4aeaf 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -163,7 +163,7 @@ int hist_entry__snprintf_alignment(struct hist_entry *he, struct perf_hpp *hpp, struct perf_hpp_fmt *fmt, int printed); void hist_entry__delete(struct hist_entry *he); -typedef int (*hists__resort_cb_t)(struct hist_entry *he); +typedef int (*hists__resort_cb_t)(struct hist_entry *he, void *arg); void perf_evsel__output_resort(struct perf_evsel *evsel, struct ui_progress *prog); void hists__output_resort(struct hists *hists, struct ui_progress *prog); -- cgit v1.2.3 From 5749618764c2dbbb0091035d84dd763529afe4e6 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 4 Feb 2019 15:18:07 +0100 Subject: perf evsel: Add output_resort_cb method Add perf_evsel__output_resort_cb() so we have an interface with a callback for each hist entry. It will be used in the following patch. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Jin Yao Cc: Kan Liang Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20190204141808.23031-3-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/hist.c | 10 ++++++++-- tools/perf/util/hist.h | 2 ++ 2 files changed, 10 insertions(+), 2 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 64ad603a0592..669f961316f0 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -1775,7 +1775,8 @@ static void output_resort(struct hists *hists, struct ui_progress *prog, } } -void perf_evsel__output_resort(struct perf_evsel *evsel, struct ui_progress *prog) +void perf_evsel__output_resort_cb(struct perf_evsel *evsel, struct ui_progress *prog, + hists__resort_cb_t cb, void *cb_arg) { bool use_callchain; @@ -1786,7 +1787,12 @@ void perf_evsel__output_resort(struct perf_evsel *evsel, struct ui_progress *pro use_callchain |= symbol_conf.show_branchflag_count; - output_resort(evsel__hists(evsel), prog, use_callchain, NULL, NULL); + output_resort(evsel__hists(evsel), prog, use_callchain, cb, cb_arg); +} + +void perf_evsel__output_resort(struct perf_evsel *evsel, struct ui_progress *prog) +{ + return perf_evsel__output_resort_cb(evsel, prog, NULL, NULL); } void hists__output_resort(struct hists *hists, struct ui_progress *prog) diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 69a367d4aeaf..4af27fbab24f 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -165,6 +165,8 @@ void hist_entry__delete(struct hist_entry *he); typedef int (*hists__resort_cb_t)(struct hist_entry *he, void *arg); +void perf_evsel__output_resort_cb(struct perf_evsel *evsel, struct ui_progress *prog, + hists__resort_cb_t cb, void *cb_arg); void perf_evsel__output_resort(struct perf_evsel *evsel, struct ui_progress *prog); void hists__output_resort(struct hists *hists, struct ui_progress *prog); void hists__output_resort_cb(struct hists *hists, struct ui_progress *prog, -- cgit v1.2.3 From dbd2a1d57f4ee6b909c94072ae9484199308acbd Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 4 Feb 2019 15:18:08 +0100 Subject: perf report: Move symbol annotation to the resort phase Currently we make the annotation for the IPC column during the entry display, already outside of the progress bar scope, so it appears like 'perf report' is stuck. Move the annotation retrieval to the resort phase, so that all the data are ready for display. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Jin Yao Cc: Kan Liang Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20190204141808.23031-4-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 21 +++++++++++++++++++-- tools/perf/util/sort.c | 7 ------- 2 files changed, 19 insertions(+), 9 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index a007ea9a3874..2e8c74d6430c 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -616,6 +616,21 @@ static int report__collapse_hists(struct report *rep) return ret; } +static int hists__resort_cb(struct hist_entry *he, void *arg) +{ + struct report *rep = arg; + struct symbol *sym = he->ms.sym; + + if (rep->symbol_ipc && sym && !sym->annotate2) { + struct perf_evsel *evsel = hists_to_evsel(he->hists); + + symbol__annotate2(sym, he->ms.map, evsel, + &annotation__default_options, NULL); + } + + return 0; +} + static void report__output_resort(struct report *rep) { struct ui_progress prog; @@ -623,8 +638,10 @@ static void report__output_resort(struct report *rep) ui_progress__init(&prog, rep->nr_entries, "Sorting events for output..."); - evlist__for_each_entry(rep->session->evlist, pos) - perf_evsel__output_resort(pos, &prog); + evlist__for_each_entry(rep->session->evlist, pos) { + perf_evsel__output_resort_cb(pos, &prog, + hists__resort_cb, rep); + } ui_progress__finish(); } diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 79e794406bef..2b6c1ccb878c 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -429,8 +429,6 @@ static int hist_entry__sym_ipc_snprintf(struct hist_entry *he, char *bf, { struct symbol *sym = he->ms.sym; - struct map *map = he->ms.map; - struct perf_evsel *evsel = hists_to_evsel(he->hists); struct annotation *notes; double ipc = 0.0, coverage = 0.0; char tmp[64]; @@ -438,11 +436,6 @@ static int hist_entry__sym_ipc_snprintf(struct hist_entry *he, char *bf, if (!sym) return repsep_snprintf(bf, size, "%-*s", width, "-"); - if (!sym->annotate2 && symbol__annotate2(sym, map, evsel, - &annotation__default_options, NULL) < 0) { - return 0; - } - notes = symbol__annotation(sym); if (notes->hit_cycles) -- cgit v1.2.3 From ca45d843a599b0812a655ba58d42dddacd538e31 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Tue, 29 Jan 2019 20:28:35 +0800 Subject: perf cs-etm: Add last instruction information in packet Decoder provides last instruction related information, these information can be used for trace analysis; specifically we can get to know what kind of branch instruction has been executed, mainly the information are contained in three element fields: last_i_type: this is significant type for waypoint calculation, it indicates the last instruction is one of immediate branch instruction, indirect branch instruction, instruction barrier (ISB), or data barrier (DSB/DMB). last_i_subtype: this is used for instruction sub type, it can be branch with link, ARMv8 return instruction, ARMv8 eret instruction (return from exception), or ARMv7 instruction which could imply return (e.g. MOV PC, LR; POP { ,PC}). last_instr_cond: it indicates if the last instruction was conditional. But these three fields are not saved into cs_etm_packet struct, thus cs-etm layer don't know related information and cannot generate sample flags for branch instructions. This patch add corresponding three new fields in cs_etm_packet struct and save related value into the packet structure, it is preparation for supporting sample flags. Signed-off-by: Leo Yan Reviewed-by: Mathieu Poirier Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mike Leach Cc: Namhyung Kim Cc: Robert Walker Cc: Suzuki K Poulouse Cc: coresight ml Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/20190129122842.32041-2-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cs-etm-decoder/cs-etm-decoder.c | 9 +++++++++ tools/perf/util/cs-etm-decoder/cs-etm-decoder.h | 3 +++ 2 files changed, 12 insertions(+) (limited to 'tools/perf/util') diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c index 8c155575c6c5..8a19310500d9 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -290,6 +290,9 @@ static void cs_etm_decoder__clear_buffer(struct cs_etm_decoder *decoder) decoder->packet_buffer[i].instr_count = 0; decoder->packet_buffer[i].last_instr_taken_branch = false; decoder->packet_buffer[i].last_instr_size = 0; + decoder->packet_buffer[i].last_instr_type = 0; + decoder->packet_buffer[i].last_instr_subtype = 0; + decoder->packet_buffer[i].last_instr_cond = 0; decoder->packet_buffer[i].cpu = INT_MIN; } } @@ -323,6 +326,9 @@ cs_etm_decoder__buffer_packet(struct cs_etm_decoder *decoder, decoder->packet_buffer[et].instr_count = 0; decoder->packet_buffer[et].last_instr_taken_branch = false; decoder->packet_buffer[et].last_instr_size = 0; + decoder->packet_buffer[et].last_instr_type = 0; + decoder->packet_buffer[et].last_instr_subtype = 0; + decoder->packet_buffer[et].last_instr_cond = 0; if (decoder->packet_count == MAX_BUFFER - 1) return OCSD_RESP_WAIT; @@ -366,6 +372,9 @@ cs_etm_decoder__buffer_range(struct cs_etm_decoder *decoder, packet->start_addr = elem->st_addr; packet->end_addr = elem->en_addr; packet->instr_count = elem->num_instr_range; + packet->last_instr_type = elem->last_i_type; + packet->last_instr_subtype = elem->last_i_subtype; + packet->last_instr_cond = elem->last_instr_cond; switch (elem->last_i_type) { case OCSD_INSTR_BR: diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h index a6407d41598f..7cdd6a9c68a7 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h @@ -43,6 +43,9 @@ struct cs_etm_packet { u64 start_addr; u64 end_addr; u32 instr_count; + u32 last_instr_type; + u32 last_instr_subtype; + u8 last_instr_cond; u8 last_instr_taken_branch; u8 last_instr_size; int cpu; -- cgit v1.2.3 From 06220bf472f2bd8f67bb776df73090945ee786ab Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Tue, 29 Jan 2019 20:28:36 +0800 Subject: perf cs-etm: Set sample flags for instruction range packet The perf sample data contains flags to indicate the hardware trace data is belonging to which type branch instruction, thus this can be used to print out the human readable string. Arm CoreSight ETM sample data is missed to set flags and it is always set to zeros, this results in perf tool skips to print string for instruction types. This patch is to set branch instruction flags for instruction range packet. Signed-off-by: Leo Yan Reviewed-by: Mathieu Poirier Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mike Leach Cc: Namhyung Kim Cc: Robert Walker Cc: Suzuki K Poulouse Cc: coresight ml Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/20190129122842.32041-3-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cs-etm-decoder/cs-etm-decoder.c | 2 + tools/perf/util/cs-etm-decoder/cs-etm-decoder.h | 1 + tools/perf/util/cs-etm.c | 90 ++++++++++++++++++++++++- 3 files changed, 91 insertions(+), 2 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c index 8a19310500d9..e98ee49a1527 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -293,6 +293,7 @@ static void cs_etm_decoder__clear_buffer(struct cs_etm_decoder *decoder) decoder->packet_buffer[i].last_instr_type = 0; decoder->packet_buffer[i].last_instr_subtype = 0; decoder->packet_buffer[i].last_instr_cond = 0; + decoder->packet_buffer[i].flags = 0; decoder->packet_buffer[i].cpu = INT_MIN; } } @@ -329,6 +330,7 @@ cs_etm_decoder__buffer_packet(struct cs_etm_decoder *decoder, decoder->packet_buffer[et].last_instr_type = 0; decoder->packet_buffer[et].last_instr_subtype = 0; decoder->packet_buffer[et].last_instr_cond = 0; + decoder->packet_buffer[et].flags = 0; if (decoder->packet_count == MAX_BUFFER - 1) return OCSD_RESP_WAIT; diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h index 7cdd6a9c68a7..23600e57a215 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h @@ -45,6 +45,7 @@ struct cs_etm_packet { u32 instr_count; u32 last_instr_type; u32 last_instr_subtype; + u32 flags; u8 last_instr_cond; u8 last_instr_taken_branch; u8 last_instr_size; diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 27a374ddf661..d05cac5295f1 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -12,6 +12,7 @@ #include #include +#include #include #include "auxtrace.h" @@ -719,7 +720,7 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq, sample.stream_id = etmq->etm->instructions_id; sample.period = period; sample.cpu = etmq->packet->cpu; - sample.flags = 0; + sample.flags = etmq->prev_packet->flags; sample.insn_len = 1; sample.cpumode = event->sample.header.misc; @@ -778,7 +779,7 @@ static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq) sample.stream_id = etmq->etm->branches_id; sample.period = 1; sample.cpu = etmq->packet->cpu; - sample.flags = 0; + sample.flags = etmq->prev_packet->flags; sample.cpumode = event->sample.header.misc; /* @@ -1107,6 +1108,80 @@ static int cs_etm__end_block(struct cs_etm_queue *etmq) return 0; } +static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq) +{ + struct cs_etm_packet *packet = etmq->packet; + + switch (packet->sample_type) { + case CS_ETM_RANGE: + /* + * Immediate branch instruction without neither link nor + * return flag, it's normal branch instruction within + * the function. + */ + if (packet->last_instr_type == OCSD_INSTR_BR && + packet->last_instr_subtype == OCSD_S_INSTR_NONE) { + packet->flags = PERF_IP_FLAG_BRANCH; + + if (packet->last_instr_cond) + packet->flags |= PERF_IP_FLAG_CONDITIONAL; + } + + /* + * Immediate branch instruction with link (e.g. BL), this is + * branch instruction for function call. + */ + if (packet->last_instr_type == OCSD_INSTR_BR && + packet->last_instr_subtype == OCSD_S_INSTR_BR_LINK) + packet->flags = PERF_IP_FLAG_BRANCH | + PERF_IP_FLAG_CALL; + + /* + * Indirect branch instruction with link (e.g. BLR), this is + * branch instruction for function call. + */ + if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT && + packet->last_instr_subtype == OCSD_S_INSTR_BR_LINK) + packet->flags = PERF_IP_FLAG_BRANCH | + PERF_IP_FLAG_CALL; + + /* + * Indirect branch instruction with subtype of + * OCSD_S_INSTR_V7_IMPLIED_RET, this is explicit hint for + * function return for A32/T32. + */ + if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT && + packet->last_instr_subtype == OCSD_S_INSTR_V7_IMPLIED_RET) + packet->flags = PERF_IP_FLAG_BRANCH | + PERF_IP_FLAG_RETURN; + + /* + * Indirect branch instruction without link (e.g. BR), usually + * this is used for function return, especially for functions + * within dynamic link lib. + */ + if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT && + packet->last_instr_subtype == OCSD_S_INSTR_NONE) + packet->flags = PERF_IP_FLAG_BRANCH | + PERF_IP_FLAG_RETURN; + + /* Return instruction for function return. */ + if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT && + packet->last_instr_subtype == OCSD_S_INSTR_V8_RET) + packet->flags = PERF_IP_FLAG_BRANCH | + PERF_IP_FLAG_RETURN; + break; + case CS_ETM_DISCONTINUITY: + case CS_ETM_EXCEPTION: + case CS_ETM_EXCEPTION_RET: + case CS_ETM_EMPTY: + default: + break; + } + + return 0; +} + static int cs_etm__run_decoder(struct cs_etm_queue *etmq) { struct cs_etm_auxtrace *etm = etmq->etm; @@ -1158,6 +1233,17 @@ static int cs_etm__run_decoder(struct cs_etm_queue *etmq) */ break; + /* + * Since packet addresses are swapped in packet + * handling within below switch() statements, + * thus setting sample flags must be called + * prior to switch() statement to use address + * information before packets swapping. + */ + err = cs_etm__set_sample_flags(etmq); + if (err < 0) + break; + switch (etmq->packet->sample_type) { case CS_ETM_RANGE: /* -- cgit v1.2.3 From 465eaaa89e2b7e450d53f4d3a6845f0c595a5f59 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Tue, 29 Jan 2019 20:28:37 +0800 Subject: perf cs-etm: Set sample flags for trace discontinuity In the middle of trace stream, it might be interrupted thus the trace data is not continuous, the trace stream firstly is ended for previous trace block and restarted for next block. To display related information for showing trace is restarted, this patch set sample flags for trace discontinuity: - If one discontinuity packet is coming, append flag PERF_IP_FLAG_TRACE_END to the previous packet to indicate the trace has been ended; - If one instruction packet is following discontinuity packet, this instruction packet is the first one packet to restarting trace. So set flag PERF_IP_FLAG_TRACE_START to discontinuity packet, this flag will be used to generate sample when connect with the sequential instruction packet. Signed-off-by: Leo Yan Reviewed-by: Mathieu Poirier Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mike Leach Cc: Namhyung Kim Cc: Robert Walker Cc: Suzuki K Poulouse Cc: coresight ml Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/20190129122842.32041-4-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cs-etm.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'tools/perf/util') diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index d05cac5295f1..1aa29633ce77 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -1111,6 +1111,7 @@ static int cs_etm__end_block(struct cs_etm_queue *etmq) static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq) { struct cs_etm_packet *packet = etmq->packet; + struct cs_etm_packet *prev_packet = etmq->prev_packet; switch (packet->sample_type) { case CS_ETM_RANGE: @@ -1170,8 +1171,26 @@ static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq) packet->last_instr_subtype == OCSD_S_INSTR_V8_RET) packet->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN; + + /* + * Decoder might insert a discontinuity in the middle of + * instruction packets, fixup prev_packet with flag + * PERF_IP_FLAG_TRACE_BEGIN to indicate restarting trace. + */ + if (prev_packet->sample_type == CS_ETM_DISCONTINUITY) + prev_packet->flags |= PERF_IP_FLAG_BRANCH | + PERF_IP_FLAG_TRACE_BEGIN; break; case CS_ETM_DISCONTINUITY: + /* + * The trace is discontinuous, if the previous packet is + * instruction packet, set flag PERF_IP_FLAG_TRACE_END + * for previous packet. + */ + if (prev_packet->sample_type == CS_ETM_RANGE) + prev_packet->flags |= PERF_IP_FLAG_BRANCH | + PERF_IP_FLAG_TRACE_END; + break; case CS_ETM_EXCEPTION: case CS_ETM_EXCEPTION_RET: case CS_ETM_EMPTY: -- cgit v1.2.3 From 47106e7413f9fc10780d1ec7df3eaa725df6ebe8 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Tue, 29 Jan 2019 20:28:38 +0800 Subject: perf cs-etm: Add exception number in exception packet When an exception packet comes, it contains the information for exception number; the exception number indicates the exception types, so from it we can know if the exception is taken for interrupt, system call or other traps, etc. This patch simply adds a field in cs_etm_packet struct, it records exception number for exception packet that will then be used to properly identify exception types to the perf synthesize mechanic. Signed-off-by: Leo Yan Reviewed-by: Mathieu Poirier Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mike Leach Cc: Namhyung Kim Cc: Robert Walker Cc: Suzuki K Poulouse Cc: coresight ml Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/20190129122842.32041-5-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cs-etm-decoder/cs-etm-decoder.c | 20 ++++++++++++++++---- tools/perf/util/cs-etm-decoder/cs-etm-decoder.h | 1 + 2 files changed, 17 insertions(+), 4 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c index e98ee49a1527..294efa76c9e3 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -294,6 +294,7 @@ static void cs_etm_decoder__clear_buffer(struct cs_etm_decoder *decoder) decoder->packet_buffer[i].last_instr_subtype = 0; decoder->packet_buffer[i].last_instr_cond = 0; decoder->packet_buffer[i].flags = 0; + decoder->packet_buffer[i].exception_number = UINT32_MAX; decoder->packet_buffer[i].cpu = INT_MIN; } } @@ -331,6 +332,7 @@ cs_etm_decoder__buffer_packet(struct cs_etm_decoder *decoder, decoder->packet_buffer[et].last_instr_subtype = 0; decoder->packet_buffer[et].last_instr_cond = 0; decoder->packet_buffer[et].flags = 0; + decoder->packet_buffer[et].exception_number = UINT32_MAX; if (decoder->packet_count == MAX_BUFFER - 1) return OCSD_RESP_WAIT; @@ -406,10 +408,20 @@ cs_etm_decoder__buffer_discontinuity(struct cs_etm_decoder *decoder, static ocsd_datapath_resp_t cs_etm_decoder__buffer_exception(struct cs_etm_decoder *decoder, + const ocsd_generic_trace_elem *elem, const uint8_t trace_chan_id) -{ - return cs_etm_decoder__buffer_packet(decoder, trace_chan_id, - CS_ETM_EXCEPTION); +{ int ret = 0; + struct cs_etm_packet *packet; + + ret = cs_etm_decoder__buffer_packet(decoder, trace_chan_id, + CS_ETM_EXCEPTION); + if (ret != OCSD_RESP_CONT && ret != OCSD_RESP_WAIT) + return ret; + + packet = &decoder->packet_buffer[decoder->tail]; + packet->exception_number = elem->exception_number; + + return ret; } static ocsd_datapath_resp_t @@ -443,7 +455,7 @@ static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer( trace_chan_id); break; case OCSD_GEN_TRC_ELEM_EXCEPTION: - resp = cs_etm_decoder__buffer_exception(decoder, + resp = cs_etm_decoder__buffer_exception(decoder, elem, trace_chan_id); break; case OCSD_GEN_TRC_ELEM_EXCEPTION_RET: diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h index 23600e57a215..012b4728a46f 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h @@ -46,6 +46,7 @@ struct cs_etm_packet { u32 last_instr_type; u32 last_instr_subtype; u32 flags; + u32 exception_number; u8 last_instr_cond; u8 last_instr_taken_branch; u8 last_instr_size; -- cgit v1.2.3 From 95c6fe970a0160cb770c5dce9f80311b42d030c0 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Tue, 29 Jan 2019 20:28:39 +0800 Subject: perf cs-etm: Change tuple from traceID-CPU# to traceID-metadata If packet processing wants to know the packet is bound with which ETM version, it needs to access metadata to decide that based on metadata magic number; but we cannot simply to use CPU logic ID number as index to access metadata sequential array, especially when system have hotplugged off CPUs, the metadata array are only allocated for online CPUs but not offline CPUs, so the CPU logic number doesn't match with its index in the array. This patch is to change tuple from traceID-CPU# to traceID-metadata, thus it can use the tuple to retrieve metadata pointer according to traceID. For safe accessing metadata fields, this patch provides helper function cs_etm__get_cpu() which is used to return CPU number according to traceID; cs_etm_decoder__buffer_packet() is the first consumer for this helper function. Signed-off-by: Leo Yan Reviewed-by: Mathieu Poirier Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mike Leach Cc: Namhyung Kim Cc: Robert Walker Cc: Suzuki K Poulouse Cc: coresight ml Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/20190129122842.32041-6-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cs-etm-decoder/cs-etm-decoder.c | 8 +++----- tools/perf/util/cs-etm.c | 26 +++++++++++++++++++------ tools/perf/util/cs-etm.h | 9 ++++++++- 3 files changed, 31 insertions(+), 12 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c index 294efa76c9e3..cdd38ffd10d2 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -305,14 +305,12 @@ cs_etm_decoder__buffer_packet(struct cs_etm_decoder *decoder, enum cs_etm_sample_type sample_type) { u32 et = 0; - struct int_node *inode = NULL; + int cpu; if (decoder->packet_count >= MAX_BUFFER - 1) return OCSD_RESP_FATAL_SYS_ERR; - /* Search the RB tree for the cpu associated with this traceID */ - inode = intlist__find(traceid_list, trace_chan_id); - if (!inode) + if (cs_etm__get_cpu(trace_chan_id, &cpu) < 0) return OCSD_RESP_FATAL_SYS_ERR; et = decoder->tail; @@ -322,7 +320,7 @@ cs_etm_decoder__buffer_packet(struct cs_etm_decoder *decoder, decoder->packet_buffer[et].sample_type = sample_type; decoder->packet_buffer[et].isa = CS_ETM_ISA_UNKNOWN; - decoder->packet_buffer[et].cpu = *((int *)inode->priv); + decoder->packet_buffer[et].cpu = cpu; decoder->packet_buffer[et].start_addr = CS_ETM_INVAL_ADDR; decoder->packet_buffer[et].end_addr = CS_ETM_INVAL_ADDR; decoder->packet_buffer[et].instr_count = 0; diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 1aa29633ce77..a5497a761db7 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -97,6 +97,20 @@ static u32 cs_etm__get_v7_protocol_version(u32 etmidr) return CS_ETM_PROTO_ETMV3; } +int cs_etm__get_cpu(u8 trace_chan_id, int *cpu) +{ + struct int_node *inode; + u64 *metadata; + + inode = intlist__find(traceid_list, trace_chan_id); + if (!inode) + return -EINVAL; + + metadata = inode->priv; + *cpu = (int)metadata[CS_ETM_CPU]; + return 0; +} + static void cs_etm__packet_dump(const char *pkt_string) { const char *color = PERF_COLOR_BLUE; @@ -252,7 +266,7 @@ static void cs_etm__free(struct perf_session *session) cs_etm__free_events(session); session->auxtrace = NULL; - /* First remove all traceID/CPU# nodes for the RB tree */ + /* First remove all traceID/metadata nodes for the RB tree */ intlist__for_each_entry_safe(inode, tmp, traceid_list) intlist__remove(traceid_list, inode); /* Then the RB tree itself */ @@ -1519,9 +1533,9 @@ int cs_etm__process_auxtrace_info(union perf_event *event, 0xffffffff); /* - * Create an RB tree for traceID-CPU# tuple. Since the conversion has - * to be made for each packet that gets decoded, optimizing access in - * anything other than a sequential array is worth doing. + * Create an RB tree for traceID-metadata tuple. Since the conversion + * has to be made for each packet that gets decoded, optimizing access + * in anything other than a sequential array is worth doing. */ traceid_list = intlist__new(NULL); if (!traceid_list) { @@ -1587,8 +1601,8 @@ int cs_etm__process_auxtrace_info(union perf_event *event, err = -EINVAL; goto err_free_metadata; } - /* All good, associate the traceID with the CPU# */ - inode->priv = &metadata[j][CS_ETM_CPU]; + /* All good, associate the traceID with the metadata pointer */ + inode->priv = metadata[j]; } /* diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h index 37f8d48179ca..fb5fc6538b7f 100644 --- a/tools/perf/util/cs-etm.h +++ b/tools/perf/util/cs-etm.h @@ -53,7 +53,7 @@ enum { CS_ETMV4_PRIV_MAX, }; -/* RB tree for quick conversion between traceID and CPUs */ +/* RB tree for quick conversion between traceID and metadata pointers */ struct intlist *traceid_list; #define KiB(x) ((x) * 1024) @@ -69,6 +69,7 @@ static const u64 __perf_cs_etmv4_magic = 0x4040404040404040ULL; #ifdef HAVE_CSTRACE_SUPPORT int cs_etm__process_auxtrace_info(union perf_event *event, struct perf_session *session); +int cs_etm__get_cpu(u8 trace_chan_id, int *cpu); #else static inline int cs_etm__process_auxtrace_info(union perf_event *event __maybe_unused, @@ -76,6 +77,12 @@ cs_etm__process_auxtrace_info(union perf_event *event __maybe_unused, { return -1; } + +static inline int cs_etm__get_cpu(u8 trace_chan_id __maybe_unused, + int *cpu __maybe_unused) +{ + return -1; +} #endif #endif -- cgit v1.2.3 From 03919e526b29be8d0c77142e008b397a2c089398 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Tue, 29 Jan 2019 20:28:40 +0800 Subject: perf cs-etm: Add traceID in packet Add traceID in packet, thus we can use traceID to retrieve metadata pointer from traceID-metadata tuple. Signed-off-by: Leo Yan Reviewed-by: Mathieu Poirier Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mike Leach Cc: Namhyung Kim Cc: Robert Walker Cc: Suzuki K Poulouse Cc: coresight ml Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/20190129122842.32041-7-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cs-etm-decoder/cs-etm-decoder.c | 2 ++ tools/perf/util/cs-etm-decoder/cs-etm-decoder.h | 1 + 2 files changed, 3 insertions(+) (limited to 'tools/perf/util') diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c index cdd38ffd10d2..ba4c623cd8de 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -295,6 +295,7 @@ static void cs_etm_decoder__clear_buffer(struct cs_etm_decoder *decoder) decoder->packet_buffer[i].last_instr_cond = 0; decoder->packet_buffer[i].flags = 0; decoder->packet_buffer[i].exception_number = UINT32_MAX; + decoder->packet_buffer[i].trace_chan_id = UINT8_MAX; decoder->packet_buffer[i].cpu = INT_MIN; } } @@ -331,6 +332,7 @@ cs_etm_decoder__buffer_packet(struct cs_etm_decoder *decoder, decoder->packet_buffer[et].last_instr_cond = 0; decoder->packet_buffer[et].flags = 0; decoder->packet_buffer[et].exception_number = UINT32_MAX; + decoder->packet_buffer[et].trace_chan_id = trace_chan_id; if (decoder->packet_count == MAX_BUFFER - 1) return OCSD_RESP_WAIT; diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h index 012b4728a46f..7e6a8850be4a 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h @@ -50,6 +50,7 @@ struct cs_etm_packet { u8 last_instr_cond; u8 last_instr_taken_branch; u8 last_instr_size; + u8 trace_chan_id; int cpu; }; -- cgit v1.2.3 From 96dce7f4f38793b80b1ddfa48d5dbc1aba40df84 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Tue, 29 Jan 2019 20:28:41 +0800 Subject: perf cs-etm: Set sample flags for exception packet The exception taken and returning are typical flow for instruction jump but it needs to be handled with exception packets. This patch is to set sample flags for exception packet. Since the exception packet contains the exception number, according to the exception number this patch makes decision for belonging to which exception types. The decoder have defined different exception number for ETMv3 and ETMv4 separately, hence this patch needs firstly decide the ETM version by using the metadata magic number, and this patch adds helper function cs_etm__get_magic() for easily getting magic number. Based on different ETM version, the exception packet contains the exception number, according to the exception number this patch makes decision for the exception belonging to which exception types. In this patch, it introduces helper function cs_etm__is_svc_instr(); for ETMv4 CS_ETMV4_EXC_CALL covers SVC, SMC and HVC cases in the single exception number, thus need to use cs_etm__is_svc_instr() to decide an exception taken for system call. Signed-off-by: Leo Yan Reviewed-by: Mathieu Poirier Reviewed-by: Robert Walker Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mike Leach Cc: Namhyung Kim Cc: Suzuki K Poulouse Cc: coresight ml Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/20190129122842.32041-8-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cs-etm.c | 215 +++++++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/cs-etm.h | 44 ++++++++++ 2 files changed, 259 insertions(+) (limited to 'tools/perf/util') diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index a5497a761db7..a714b31656ea 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -97,6 +97,20 @@ static u32 cs_etm__get_v7_protocol_version(u32 etmidr) return CS_ETM_PROTO_ETMV3; } +static int cs_etm__get_magic(u8 trace_chan_id, u64 *magic) +{ + struct int_node *inode; + u64 *metadata; + + inode = intlist__find(traceid_list, trace_chan_id); + if (!inode) + return -EINVAL; + + metadata = inode->priv; + *magic = metadata[CS_ETM_MAGIC]; + return 0; +} + int cs_etm__get_cpu(u8 trace_chan_id, int *cpu) { struct int_node *inode; @@ -1122,10 +1136,174 @@ static int cs_etm__end_block(struct cs_etm_queue *etmq) return 0; } +static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq, + struct cs_etm_packet *packet, + u64 end_addr) +{ + u16 instr16; + u32 instr32; + u64 addr; + + switch (packet->isa) { + case CS_ETM_ISA_T32: + /* + * The SVC of T32 is defined in ARM DDI 0487D.a, F5.1.247: + * + * b'15 b'8 + * +-----------------+--------+ + * | 1 1 0 1 1 1 1 1 | imm8 | + * +-----------------+--------+ + * + * According to the specifiction, it only defines SVC for T32 + * with 16 bits instruction and has no definition for 32bits; + * so below only read 2 bytes as instruction size for T32. + */ + addr = end_addr - 2; + cs_etm__mem_access(etmq, addr, sizeof(instr16), (u8 *)&instr16); + if ((instr16 & 0xFF00) == 0xDF00) + return true; + + break; + case CS_ETM_ISA_A32: + /* + * The SVC of A32 is defined in ARM DDI 0487D.a, F5.1.247: + * + * b'31 b'28 b'27 b'24 + * +---------+---------+-------------------------+ + * | !1111 | 1 1 1 1 | imm24 | + * +---------+---------+-------------------------+ + */ + addr = end_addr - 4; + cs_etm__mem_access(etmq, addr, sizeof(instr32), (u8 *)&instr32); + if ((instr32 & 0x0F000000) == 0x0F000000 && + (instr32 & 0xF0000000) != 0xF0000000) + return true; + + break; + case CS_ETM_ISA_A64: + /* + * The SVC of A64 is defined in ARM DDI 0487D.a, C6.2.294: + * + * b'31 b'21 b'4 b'0 + * +-----------------------+---------+-----------+ + * | 1 1 0 1 0 1 0 0 0 0 0 | imm16 | 0 0 0 0 1 | + * +-----------------------+---------+-----------+ + */ + addr = end_addr - 4; + cs_etm__mem_access(etmq, addr, sizeof(instr32), (u8 *)&instr32); + if ((instr32 & 0xFFE0001F) == 0xd4000001) + return true; + + break; + case CS_ETM_ISA_UNKNOWN: + default: + break; + } + + return false; +} + +static bool cs_etm__is_syscall(struct cs_etm_queue *etmq, u64 magic) +{ + struct cs_etm_packet *packet = etmq->packet; + struct cs_etm_packet *prev_packet = etmq->prev_packet; + + if (magic == __perf_cs_etmv3_magic) + if (packet->exception_number == CS_ETMV3_EXC_SVC) + return true; + + /* + * ETMv4 exception type CS_ETMV4_EXC_CALL covers SVC, SMC and + * HVC cases; need to check if it's SVC instruction based on + * packet address. + */ + if (magic == __perf_cs_etmv4_magic) { + if (packet->exception_number == CS_ETMV4_EXC_CALL && + cs_etm__is_svc_instr(etmq, prev_packet, + prev_packet->end_addr)) + return true; + } + + return false; +} + +static bool cs_etm__is_async_exception(struct cs_etm_queue *etmq, u64 magic) +{ + struct cs_etm_packet *packet = etmq->packet; + + if (magic == __perf_cs_etmv3_magic) + if (packet->exception_number == CS_ETMV3_EXC_DEBUG_HALT || + packet->exception_number == CS_ETMV3_EXC_ASYNC_DATA_ABORT || + packet->exception_number == CS_ETMV3_EXC_PE_RESET || + packet->exception_number == CS_ETMV3_EXC_IRQ || + packet->exception_number == CS_ETMV3_EXC_FIQ) + return true; + + if (magic == __perf_cs_etmv4_magic) + if (packet->exception_number == CS_ETMV4_EXC_RESET || + packet->exception_number == CS_ETMV4_EXC_DEBUG_HALT || + packet->exception_number == CS_ETMV4_EXC_SYSTEM_ERROR || + packet->exception_number == CS_ETMV4_EXC_INST_DEBUG || + packet->exception_number == CS_ETMV4_EXC_DATA_DEBUG || + packet->exception_number == CS_ETMV4_EXC_IRQ || + packet->exception_number == CS_ETMV4_EXC_FIQ) + return true; + + return false; +} + +static bool cs_etm__is_sync_exception(struct cs_etm_queue *etmq, u64 magic) +{ + struct cs_etm_packet *packet = etmq->packet; + struct cs_etm_packet *prev_packet = etmq->prev_packet; + + if (magic == __perf_cs_etmv3_magic) + if (packet->exception_number == CS_ETMV3_EXC_SMC || + packet->exception_number == CS_ETMV3_EXC_HYP || + packet->exception_number == CS_ETMV3_EXC_JAZELLE_THUMBEE || + packet->exception_number == CS_ETMV3_EXC_UNDEFINED_INSTR || + packet->exception_number == CS_ETMV3_EXC_PREFETCH_ABORT || + packet->exception_number == CS_ETMV3_EXC_DATA_FAULT || + packet->exception_number == CS_ETMV3_EXC_GENERIC) + return true; + + if (magic == __perf_cs_etmv4_magic) { + if (packet->exception_number == CS_ETMV4_EXC_TRAP || + packet->exception_number == CS_ETMV4_EXC_ALIGNMENT || + packet->exception_number == CS_ETMV4_EXC_INST_FAULT || + packet->exception_number == CS_ETMV4_EXC_DATA_FAULT) + return true; + + /* + * For CS_ETMV4_EXC_CALL, except SVC other instructions + * (SMC, HVC) are taken as sync exceptions. + */ + if (packet->exception_number == CS_ETMV4_EXC_CALL && + !cs_etm__is_svc_instr(etmq, prev_packet, + prev_packet->end_addr)) + return true; + + /* + * ETMv4 has 5 bits for exception number; if the numbers + * are in the range ( CS_ETMV4_EXC_FIQ, CS_ETMV4_EXC_END ] + * they are implementation defined exceptions. + * + * For this case, simply take it as sync exception. + */ + if (packet->exception_number > CS_ETMV4_EXC_FIQ && + packet->exception_number <= CS_ETMV4_EXC_END) + return true; + } + + return false; +} + static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq) { struct cs_etm_packet *packet = etmq->packet; struct cs_etm_packet *prev_packet = etmq->prev_packet; + u64 magic; + int ret; switch (packet->sample_type) { case CS_ETM_RANGE: @@ -1206,6 +1384,43 @@ static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq) PERF_IP_FLAG_TRACE_END; break; case CS_ETM_EXCEPTION: + ret = cs_etm__get_magic(packet->trace_chan_id, &magic); + if (ret) + return ret; + + /* The exception is for system call. */ + if (cs_etm__is_syscall(etmq, magic)) + packet->flags = PERF_IP_FLAG_BRANCH | + PERF_IP_FLAG_CALL | + PERF_IP_FLAG_SYSCALLRET; + /* + * The exceptions are triggered by external signals from bus, + * interrupt controller, debug module, PE reset or halt. + */ + else if (cs_etm__is_async_exception(etmq, magic)) + packet->flags = PERF_IP_FLAG_BRANCH | + PERF_IP_FLAG_CALL | + PERF_IP_FLAG_ASYNC | + PERF_IP_FLAG_INTERRUPT; + /* + * Otherwise, exception is caused by trap, instruction & + * data fault, or alignment errors. + */ + else if (cs_etm__is_sync_exception(etmq, magic)) + packet->flags = PERF_IP_FLAG_BRANCH | + PERF_IP_FLAG_CALL | + PERF_IP_FLAG_INTERRUPT; + + /* + * When the exception packet is inserted, since exception + * packet is not used standalone for generating samples + * and it's affiliation to the previous instruction range + * packet; so set previous range packet flags to tell perf + * it is an exception taken branch. + */ + if (prev_packet->sample_type == CS_ETM_RANGE) + prev_packet->flags = packet->flags; + break; case CS_ETM_EXCEPTION_RET: case CS_ETM_EMPTY: default: diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h index fb5fc6538b7f..d76126e0e3d0 100644 --- a/tools/perf/util/cs-etm.h +++ b/tools/perf/util/cs-etm.h @@ -53,6 +53,50 @@ enum { CS_ETMV4_PRIV_MAX, }; +/* + * ETMv3 exception encoding number: + * See Embedded Trace Macrocell spcification (ARM IHI 0014Q) + * table 7-12 Encoding of Exception[3:0] for non-ARMv7-M processors. + */ +enum { + CS_ETMV3_EXC_NONE = 0, + CS_ETMV3_EXC_DEBUG_HALT = 1, + CS_ETMV3_EXC_SMC = 2, + CS_ETMV3_EXC_HYP = 3, + CS_ETMV3_EXC_ASYNC_DATA_ABORT = 4, + CS_ETMV3_EXC_JAZELLE_THUMBEE = 5, + CS_ETMV3_EXC_PE_RESET = 8, + CS_ETMV3_EXC_UNDEFINED_INSTR = 9, + CS_ETMV3_EXC_SVC = 10, + CS_ETMV3_EXC_PREFETCH_ABORT = 11, + CS_ETMV3_EXC_DATA_FAULT = 12, + CS_ETMV3_EXC_GENERIC = 13, + CS_ETMV3_EXC_IRQ = 14, + CS_ETMV3_EXC_FIQ = 15, +}; + +/* + * ETMv4 exception encoding number: + * See ARM Embedded Trace Macrocell Architecture Specification (ARM IHI 0064D) + * table 6-12 Possible values for the TYPE field in an Exception instruction + * trace packet, for ARMv7-A/R and ARMv8-A/R PEs. + */ +enum { + CS_ETMV4_EXC_RESET = 0, + CS_ETMV4_EXC_DEBUG_HALT = 1, + CS_ETMV4_EXC_CALL = 2, + CS_ETMV4_EXC_TRAP = 3, + CS_ETMV4_EXC_SYSTEM_ERROR = 4, + CS_ETMV4_EXC_INST_DEBUG = 6, + CS_ETMV4_EXC_DATA_DEBUG = 7, + CS_ETMV4_EXC_ALIGNMENT = 10, + CS_ETMV4_EXC_INST_FAULT = 11, + CS_ETMV4_EXC_DATA_FAULT = 12, + CS_ETMV4_EXC_IRQ = 14, + CS_ETMV4_EXC_FIQ = 15, + CS_ETMV4_EXC_END = 31, +}; + /* RB tree for quick conversion between traceID and metadata pointers */ struct intlist *traceid_list; -- cgit v1.2.3 From 173e65f6bc354dc07c6be399727a8a48335bccf2 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Tue, 29 Jan 2019 20:28:42 +0800 Subject: perf cs-etm: Set sample flags for exception return packet When return from exception, we need to distinguish if it's system call return or for other type exceptions for setting sample flags. Due to the exception return packet doesn't contain exception number, so we cannot decide sample flags based on exception number. On the other hand, the exception return packet is followed by an instruction range packet; this range packet deliveries the start address after exception handling, we can check if it is a SVC instruction just before the start address. If there has one SVC instruction is found ahead the return address, this means it's an exception return for system call; otherwise it is an normal return for other exceptions. This patch is to set sample flags for exception return packet, firstly it simply set sample flags as PERF_IP_FLAG_INTERRUPT for all exception returns since at this point it doesn't know what's exactly the exception type. We will defer to decide if it's an exception return for system call when the next instruction range packet comes, it checks if there has one SVC instruction prior to the start address and if so we will change sample flags to PERF_IP_FLAG_SYSCALLRET for system call return. Signed-off-by: Leo Yan Reviewed-by: Mathieu Poirier Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mike Leach Cc: Namhyung Kim Cc: Robert Walker Cc: Suzuki K Poulouse Cc: coresight ml Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/20190129122842.32041-9-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cs-etm.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) (limited to 'tools/perf/util') diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index a714b31656ea..8b3f882d6e2f 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -1372,6 +1372,20 @@ static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq) if (prev_packet->sample_type == CS_ETM_DISCONTINUITY) prev_packet->flags |= PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TRACE_BEGIN; + + /* + * If the previous packet is an exception return packet + * and the return address just follows SVC instuction, + * it needs to calibrate the previous packet sample flags + * as PERF_IP_FLAG_SYSCALLRET. + */ + if (prev_packet->flags == (PERF_IP_FLAG_BRANCH | + PERF_IP_FLAG_RETURN | + PERF_IP_FLAG_INTERRUPT) && + cs_etm__is_svc_instr(etmq, packet, packet->start_addr)) + prev_packet->flags = PERF_IP_FLAG_BRANCH | + PERF_IP_FLAG_RETURN | + PERF_IP_FLAG_SYSCALLRET; break; case CS_ETM_DISCONTINUITY: /* @@ -1422,6 +1436,36 @@ static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq) prev_packet->flags = packet->flags; break; case CS_ETM_EXCEPTION_RET: + /* + * When the exception return packet is inserted, since + * exception return packet is not used standalone for + * generating samples and it's affiliation to the previous + * instruction range packet; so set previous range packet + * flags to tell perf it is an exception return branch. + * + * The exception return can be for either system call or + * other exception types; unfortunately the packet doesn't + * contain exception type related info so we cannot decide + * the exception type purely based on exception return packet. + * If we record the exception number from exception packet and + * reuse it for excpetion return packet, this is not reliable + * due the trace can be discontinuity or the interrupt can + * be nested, thus the recorded exception number cannot be + * used for exception return packet for these two cases. + * + * For exception return packet, we only need to distinguish the + * packet is for system call or for other types. Thus the + * decision can be deferred when receive the next packet which + * contains the return address, based on the return address we + * can read out the previous instruction and check if it's a + * system call instruction and then calibrate the sample flag + * as needed. + */ + if (prev_packet->sample_type == CS_ETM_RANGE) + prev_packet->flags = PERF_IP_FLAG_BRANCH | + PERF_IP_FLAG_RETURN | + PERF_IP_FLAG_INTERRUPT; + break; case CS_ETM_EMPTY: default: break; -- cgit v1.2.3 From d6d457451eb94fa747dc202765592eb8885a7352 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 9 Jan 2019 11:18:30 +0200 Subject: perf tools: Fix split_kallsyms_for_kcore() for trampoline symbols Kallsyms symbols do not have a size, so the size becomes the distance to the next symbol. Consequently the recently added trampoline symbols end up with large sizes because the trampolines are some distance from one another and the main kernel map. However, symbols that end outside their map can disrupt the symbol tree because, after mapping, it can appear incorrectly that they overlap other symbols. Add logic to truncate symbol size to the end of the corresponding map. Signed-off-by: Adrian Hunter Acked-by: Jiri Olsa Cc: stable@vger.kernel.org Fixes: d83212d5dd67 ("kallsyms, x86: Export addresses of PTI entry trampolines") Link: http://lkml.kernel.org/r/20190109091835.5570-2-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools/perf/util') diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 7b194cf53cc0..758bf5f74e6e 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -718,6 +718,8 @@ static int map_groups__split_kallsyms_for_kcore(struct map_groups *kmaps, struct } pos->start -= curr_map->start - curr_map->pgoff; + if (pos->end > curr_map->end) + pos->end = curr_map->end; if (pos->end) pos->end -= curr_map->start - curr_map->pgoff; symbols__insert(&curr_map->dso->symbols, pos); -- cgit v1.2.3 From e7a3a055f2b88ebd0bdae8b0aade1e7d80c8a81e Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 9 Jan 2019 11:18:31 +0200 Subject: perf thread-stack: Tidy thread_stack__push_cp() usage If 'cp' is checked in thread_stack__push_cp() a number of error checks can be removed, reducing code size and improving readability. Signed-off-by: Adrian Hunter Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20190109091835.5570-3-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/thread-stack.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/thread-stack.c b/tools/perf/util/thread-stack.c index d52f27f373ce..93ba3ab6a602 100644 --- a/tools/perf/util/thread-stack.c +++ b/tools/perf/util/thread-stack.c @@ -493,6 +493,9 @@ static int thread_stack__push_cp(struct thread_stack *ts, u64 ret_addr, struct thread_stack_entry *tse; int err; + if (!cp) + return -ENOMEM; + if (ts->cnt == ts->sz) { err = thread_stack__grow(ts); if (err) @@ -576,8 +579,6 @@ static int thread_stack__bottom(struct thread_stack *ts, cp = call_path__findnew(cpr, &cpr->call_path, sym, ip, ts->kernel_start); - if (!cp) - return -ENOMEM; return thread_stack__push_cp(ts, ip, sample->time, ref, cp, true, false); @@ -609,8 +610,6 @@ static int thread_stack__no_call_return(struct thread *thread, cp = call_path__findnew(cpr, &cpr->call_path, to_al->sym, sample->addr, ts->kernel_start); - if (!cp) - return -ENOMEM; return thread_stack__push_cp(ts, 0, sample->time, ref, cp, true, false); } @@ -633,8 +632,6 @@ static int thread_stack__no_call_return(struct thread *thread, /* This 'return' had no 'call', so push and pop top of stack */ cp = call_path__findnew(cpr, parent, from_al->sym, sample->ip, ts->kernel_start); - if (!cp) - return -ENOMEM; err = thread_stack__push_cp(ts, sample->addr, sample->time, ref, cp, true, false); @@ -680,8 +677,6 @@ static int thread_stack__trace_end(struct thread_stack *ts, cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, NULL, 0, ts->kernel_start); - if (!cp) - return -ENOMEM; ret_addr = sample->ip + sample->insn_len; @@ -745,8 +740,6 @@ int thread_stack__process(struct thread *thread, struct comm *comm, cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, to_al->sym, sample->addr, ts->kernel_start); - if (!cp) - return -ENOMEM; err = thread_stack__push_cp(ts, ret_addr, sample->time, ref, cp, false, trace_end); } else if (sample->flags & PERF_IP_FLAG_RETURN) { -- cgit v1.2.3 From 90c2cda7056e3a7555d874a27aae12fd46ca802e Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 9 Jan 2019 11:18:32 +0200 Subject: perf thread-stack: Tidy thread_stack__no_call_return() by adding more local variables Make thread_stack__no_call_return() more readable by adding more local variables. Signed-off-by: Adrian Hunter Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20190109091835.5570-4-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/thread-stack.c | 35 +++++++++++++++++------------------ 1 file changed, 17 insertions(+), 18 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/thread-stack.c b/tools/perf/util/thread-stack.c index 93ba3ab6a602..7f8eff018c16 100644 --- a/tools/perf/util/thread-stack.c +++ b/tools/perf/util/thread-stack.c @@ -591,34 +591,36 @@ static int thread_stack__no_call_return(struct thread *thread, struct addr_location *to_al, u64 ref) { struct call_path_root *cpr = ts->crp->cpr; + struct call_path *root = &cpr->call_path; + struct symbol *fsym = from_al->sym; + struct symbol *tsym = to_al->sym; struct call_path *cp, *parent; u64 ks = ts->kernel_start; + u64 addr = sample->addr; + u64 tm = sample->time; + u64 ip = sample->ip; int err; - if (sample->ip >= ks && sample->addr < ks) { + if (ip >= ks && addr < ks) { /* Return to userspace, so pop all kernel addresses */ while (thread_stack__in_kernel(ts)) { err = thread_stack__call_return(thread, ts, --ts->cnt, - sample->time, ref, - true); + tm, ref, true); if (err) return err; } /* If the stack is empty, push the userspace address */ if (!ts->cnt) { - cp = call_path__findnew(cpr, &cpr->call_path, - to_al->sym, sample->addr, - ts->kernel_start); - return thread_stack__push_cp(ts, 0, sample->time, ref, - cp, true, false); + cp = call_path__findnew(cpr, root, tsym, addr, ks); + return thread_stack__push_cp(ts, 0, tm, ref, cp, true, + false); } - } else if (thread_stack__in_kernel(ts) && sample->ip < ks) { + } else if (thread_stack__in_kernel(ts) && ip < ks) { /* Return to userspace, so pop all kernel addresses */ while (thread_stack__in_kernel(ts)) { err = thread_stack__call_return(thread, ts, --ts->cnt, - sample->time, ref, - true); + tm, ref, true); if (err) return err; } @@ -627,19 +629,16 @@ static int thread_stack__no_call_return(struct thread *thread, if (ts->cnt) parent = ts->stack[ts->cnt - 1].cp; else - parent = &cpr->call_path; + parent = root; /* This 'return' had no 'call', so push and pop top of stack */ - cp = call_path__findnew(cpr, parent, from_al->sym, sample->ip, - ts->kernel_start); + cp = call_path__findnew(cpr, parent, fsym, ip, ks); - err = thread_stack__push_cp(ts, sample->addr, sample->time, ref, cp, - true, false); + err = thread_stack__push_cp(ts, addr, tm, ref, cp, true, false); if (err) return err; - return thread_stack__pop_cp(thread, ts, sample->addr, sample->time, ref, - to_al->sym); + return thread_stack__pop_cp(thread, ts, addr, tm, ref, tsym); } static int thread_stack__trace_begin(struct thread *thread, -- cgit v1.2.3 From f08046cb3082b313e7b08dc35838cf8bd902c36b Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 9 Jan 2019 11:18:33 +0200 Subject: perf thread-stack: Represent jmps to the start of a different symbol The compiler might optimize a call/ret combination by making it a jmp. However the thread-stack does not presently cater for that, so that such control flow is not visible in the call graph. Make it visible by recording on the stack a branch to the start of a different symbol. Note, that means when a ret pops the stack, all jmps must be popped off first. Example: $ cat jmp-to-fn.c __attribute__((noinline)) int bar(void) { return -1; } __attribute__((noinline)) int foo(void) { return bar() + 1; } int main() { return foo(); } $ gcc -ggdb3 -Wall -Wextra -O2 -o jmp-to-fn jmp-to-fn.c $ objdump -d jmp-to-fn 0000000000001040
: 1040: 31 c0 xor %eax,%eax 1042: e9 09 01 00 00 jmpq 1150 0000000000001140 : 1140: b8 ff ff ff ff mov $0xffffffff,%eax 1145: c3 retq 0000000000001150 : 1150: 31 c0 xor %eax,%eax 1152: e8 e9 ff ff ff callq 1140 1157: 83 c0 01 add $0x1,%eax 115a: c3 retq $ perf record -o jmp-to-fn.perf.data -e intel_pt/cyc/u ./jmp-to-fn [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0,017 MB jmp-to-fn.perf.data ] $ perf script -i jmp-to-fn.perf.data --itrace=be -s ~/libexec/perf-core/scripts/python/export-to-sqlite.py jmp-to-fn.db branches calls 2019-01-08 13:24:58.783069 Creating database... 2019-01-08 13:24:58.794650 Writing records... 2019-01-08 13:24:59.008050 Adding indexes 2019-01-08 13:24:59.015802 Done $ ~/libexec/perf-core/scripts/python/exported-sql-viewer.py jmp-to-fn.db Before: main -> bar After: main -> foo -> bar Committer testing: Install the python2-pyside package, then select these menu options on the GUI: "Reports" "Context sensitive callgraphs" Then go on expanding the symbols, to get, full picture when doing this on a fedora:29 with gcc version 8.2.1 20181215 (Red Hat 8.2.1-6) (GCC): jmp-to-fn PID:TID _start (ld-2.28.so) __libc_start_main main foo bar To verify that indeed, this fixes the problem. Signed-off-by: Adrian Hunter Tested-by: Arnaldo Carvalho de Melo Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20190109091835.5570-5-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/export-to-postgresql.py | 2 +- tools/perf/scripts/python/export-to-sqlite.py | 2 +- tools/perf/util/thread-stack.c | 30 +++++++++++++++++++++-- tools/perf/util/thread-stack.h | 3 +++ 4 files changed, 33 insertions(+), 4 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/scripts/python/export-to-postgresql.py b/tools/perf/scripts/python/export-to-postgresql.py index 0564dd7377f2..30130213da7e 100644 --- a/tools/perf/scripts/python/export-to-postgresql.py +++ b/tools/perf/scripts/python/export-to-postgresql.py @@ -478,7 +478,7 @@ if perf_db_export_calls: 'branch_count,' 'call_id,' 'return_id,' - 'CASE WHEN flags=1 THEN \'no call\' WHEN flags=2 THEN \'no return\' WHEN flags=3 THEN \'no call/return\' ELSE \'\' END AS flags,' + 'CASE WHEN flags=0 THEN \'\' WHEN flags=1 THEN \'no call\' WHEN flags=2 THEN \'no return\' WHEN flags=3 THEN \'no call/return\' WHEN flags=6 THEN \'jump\' ELSE flags END AS flags,' 'parent_call_path_id' ' FROM calls INNER JOIN call_paths ON call_paths.id = call_path_id') diff --git a/tools/perf/scripts/python/export-to-sqlite.py b/tools/perf/scripts/python/export-to-sqlite.py index 245caf2643ed..ed237f2ed03f 100644 --- a/tools/perf/scripts/python/export-to-sqlite.py +++ b/tools/perf/scripts/python/export-to-sqlite.py @@ -320,7 +320,7 @@ if perf_db_export_calls: 'branch_count,' 'call_id,' 'return_id,' - 'CASE WHEN flags=1 THEN \'no call\' WHEN flags=2 THEN \'no return\' WHEN flags=3 THEN \'no call/return\' ELSE \'\' END AS flags,' + 'CASE WHEN flags=0 THEN \'\' WHEN flags=1 THEN \'no call\' WHEN flags=2 THEN \'no return\' WHEN flags=3 THEN \'no call/return\' WHEN flags=6 THEN \'jump\' ELSE flags END AS flags,' 'parent_call_path_id' ' FROM calls INNER JOIN call_paths ON call_paths.id = call_path_id') diff --git a/tools/perf/util/thread-stack.c b/tools/perf/util/thread-stack.c index 7f8eff018c16..f52c0f90915d 100644 --- a/tools/perf/util/thread-stack.c +++ b/tools/perf/util/thread-stack.c @@ -38,6 +38,7 @@ * @cp: call path * @no_call: a 'call' was not seen * @trace_end: a 'call' but trace ended + * @non_call: a branch but not a 'call' to the start of a different symbol */ struct thread_stack_entry { u64 ret_addr; @@ -47,6 +48,7 @@ struct thread_stack_entry { struct call_path *cp; bool no_call; bool trace_end; + bool non_call; }; /** @@ -268,6 +270,8 @@ static int thread_stack__call_return(struct thread *thread, cr.flags |= CALL_RETURN_NO_CALL; if (no_return) cr.flags |= CALL_RETURN_NO_RETURN; + if (tse->non_call) + cr.flags |= CALL_RETURN_NON_CALL; return crp->process(&cr, crp->data); } @@ -510,6 +514,7 @@ static int thread_stack__push_cp(struct thread_stack *ts, u64 ret_addr, tse->cp = cp; tse->no_call = no_call; tse->trace_end = trace_end; + tse->non_call = false; return 0; } @@ -531,14 +536,16 @@ static int thread_stack__pop_cp(struct thread *thread, struct thread_stack *ts, timestamp, ref, false); } - if (ts->stack[ts->cnt - 1].ret_addr == ret_addr) { + if (ts->stack[ts->cnt - 1].ret_addr == ret_addr && + !ts->stack[ts->cnt - 1].non_call) { return thread_stack__call_return(thread, ts, --ts->cnt, timestamp, ref, false); } else { size_t i = ts->cnt - 1; while (i--) { - if (ts->stack[i].ret_addr != ret_addr) + if (ts->stack[i].ret_addr != ret_addr || + ts->stack[i].non_call) continue; i += 1; while (ts->cnt > i) { @@ -757,6 +764,25 @@ int thread_stack__process(struct thread *thread, struct comm *comm, err = thread_stack__trace_begin(thread, ts, sample->time, ref); } else if (sample->flags & PERF_IP_FLAG_TRACE_END) { err = thread_stack__trace_end(ts, sample, ref); + } else if (sample->flags & PERF_IP_FLAG_BRANCH && + from_al->sym != to_al->sym && to_al->sym && + to_al->addr == to_al->sym->start) { + struct call_path_root *cpr = ts->crp->cpr; + struct call_path *cp; + + /* + * The compiler might optimize a call/ret combination by making + * it a jmp. Make that visible by recording on the stack a + * branch to the start of a different symbol. Note, that means + * when a ret pops the stack, all jmps must be popped off first. + */ + cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, + to_al->sym, sample->addr, + ts->kernel_start); + err = thread_stack__push_cp(ts, 0, sample->time, ref, cp, false, + false); + if (!err) + ts->stack[ts->cnt - 1].non_call = true; } return err; diff --git a/tools/perf/util/thread-stack.h b/tools/perf/util/thread-stack.h index 1f626f4a1c40..b7c04e19ad41 100644 --- a/tools/perf/util/thread-stack.h +++ b/tools/perf/util/thread-stack.h @@ -35,10 +35,13 @@ struct call_path; * * CALL_RETURN_NO_CALL: 'return' but no matching 'call' * CALL_RETURN_NO_RETURN: 'call' but no matching 'return' + * CALL_RETURN_NON_CALL: a branch but not a 'call' to the start of a different + * symbol */ enum { CALL_RETURN_NO_CALL = 1 << 0, CALL_RETURN_NO_RETURN = 1 << 1, + CALL_RETURN_NON_CALL = 1 << 2, }; /** -- cgit v1.2.3 From c3fcadf0bb765faf45d6d562246e1d08885466df Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 6 Feb 2019 12:39:43 +0200 Subject: perf auxtrace: Define auxtrace record alignment Define auxtrace record alignment so that it can be referenced elsewhere. Note this is preparation for patch "perf intel-pt: Fix overlap calculation for padding" Signed-off-by: Adrian Hunter Cc: Jiri Olsa Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/20190206103947.15750-2-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/auxtrace.c | 4 ++-- tools/perf/util/auxtrace.h | 3 +++ 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index 94a22cc8004c..ad186d3255d1 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -1279,9 +1279,9 @@ static int __auxtrace_mmap__read(struct perf_mmap *map, } /* padding must be written by fn() e.g. record__process_auxtrace() */ - padding = size & 7; + padding = size & (PERF_AUXTRACE_RECORD_ALIGNMENT - 1); if (padding) - padding = 8 - padding; + padding = PERF_AUXTRACE_RECORD_ALIGNMENT - padding; memset(&ev, 0, sizeof(ev)); ev.auxtrace.header.type = PERF_RECORD_AUXTRACE; diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index 8e50f96d4b23..fac32482db61 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -40,6 +40,9 @@ struct record_opts; struct auxtrace_info_event; struct events_stats; +/* Auxtrace records must have the same alignment as perf event records */ +#define PERF_AUXTRACE_RECORD_ALIGNMENT 8 + enum auxtrace_type { PERF_AUXTRACE_UNKNOWN, PERF_AUXTRACE_INTEL_PT, -- cgit v1.2.3 From 5a99d99e3310a565b0cf63f785b347be9ee0da45 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 6 Feb 2019 12:39:44 +0200 Subject: perf intel-pt: Fix overlap calculation for padding Auxtrace records might have up to 7 bytes of padding appended. Adjust the overlap accordingly. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/20190206103947.15750-3-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- .../perf/util/intel-pt-decoder/intel-pt-decoder.c | 36 ++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index 4503f3ca45ab..ecd25cdc1d3e 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -26,6 +26,7 @@ #include "../cache.h" #include "../util.h" +#include "../auxtrace.h" #include "intel-pt-insn-decoder.h" #include "intel-pt-pkt-decoder.h" @@ -2575,6 +2576,34 @@ static int intel_pt_tsc_cmp(uint64_t tsc1, uint64_t tsc2) } } +#define MAX_PADDING (PERF_AUXTRACE_RECORD_ALIGNMENT - 1) + +/** + * adj_for_padding - adjust overlap to account for padding. + * @buf_b: second buffer + * @buf_a: first buffer + * @len_a: size of first buffer + * + * @buf_a might have up to 7 bytes of padding appended. Adjust the overlap + * accordingly. + * + * Return: A pointer into @buf_b from where non-overlapped data starts + */ +static unsigned char *adj_for_padding(unsigned char *buf_b, + unsigned char *buf_a, size_t len_a) +{ + unsigned char *p = buf_b - MAX_PADDING; + unsigned char *q = buf_a + len_a - MAX_PADDING; + int i; + + for (i = MAX_PADDING; i; i--, p++, q++) { + if (*p != *q) + break; + } + + return p; +} + /** * intel_pt_find_overlap_tsc - determine start of non-overlapped trace data * using TSC. @@ -2625,8 +2654,11 @@ static unsigned char *intel_pt_find_overlap_tsc(unsigned char *buf_a, /* Same TSC, so buffers are consecutive */ if (!cmp && rem_b >= rem_a) { + unsigned char *start; + *consecutive = true; - return buf_b + len_b - (rem_b - rem_a); + start = buf_b + len_b - (rem_b - rem_a); + return adj_for_padding(start, buf_a, len_a); } if (cmp < 0) return buf_b; /* tsc_a < tsc_b => no overlap */ @@ -2689,7 +2721,7 @@ unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a, found = memmem(buf_a, len_a, buf_b, len_a); if (found) { *consecutive = true; - return buf_b + len_a; + return adj_for_padding(buf_b + len_a, buf_a, len_a); } /* Try again at next PSB in buffer 'a' */ -- cgit v1.2.3 From 03997612904866abe7cdcc992784ef65cb3a4b81 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 6 Feb 2019 12:39:45 +0200 Subject: perf intel-pt: Fix CYC timestamp calculation after OVF CYC packet timestamp calculation depends upon CBR which was being cleared upon overflow (OVF). That can cause errors due to failing to synchronize with sideband events. Even if a CBR change has been lost, the old CBR is still a better estimate than zero. So remove the clearing of CBR. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/20190206103947.15750-4-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/intel-pt-decoder/intel-pt-decoder.c | 1 - 1 file changed, 1 deletion(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index ecd25cdc1d3e..a54d6c9a4601 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -1395,7 +1395,6 @@ static int intel_pt_overflow(struct intel_pt_decoder *decoder) { intel_pt_log("ERROR: Buffer overflow\n"); intel_pt_clear_tx_flags(decoder); - decoder->cbr = 0; decoder->timestamp_insn_cnt = 0; decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC; decoder->overflow = true; -- cgit v1.2.3 From 26ee2bcdea33c60aa833cc32a1624ef5d49c9c6f Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 6 Feb 2019 12:39:46 +0200 Subject: perf intel-pt: Packet splitting can happen only on 32-bit Data is copied when the trace is stopped, so packets are never split between buffers except when processing if the buffer cannot fit in the address space which can only happen on 32-bit systems. Change the logic to reflect that. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190206103947.15750-5-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/intel-pt-decoder/intel-pt-decoder.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index a54d6c9a4601..6e03db142091 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -868,7 +868,7 @@ static int intel_pt_get_next_packet(struct intel_pt_decoder *decoder) ret = intel_pt_get_packet(decoder->buf, decoder->len, &decoder->packet); - if (ret == INTEL_PT_NEED_MORE_BYTES && + if (ret == INTEL_PT_NEED_MORE_BYTES && BITS_PER_LONG == 32 && decoder->len < INTEL_PT_PKT_MAX_SZ && !decoder->next_buf) { ret = intel_pt_get_split_packet(decoder); if (ret < 0) -- cgit v1.2.3 From 16bd4321c2425d37031a902cdbf183e2cd099946 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 6 Feb 2019 12:39:47 +0200 Subject: perf auxtrace: Add timestamp to auxtrace errors The timestamp can use useful to find part of a trace that has an error without outputting all of the trace e.g. using the itrace 's' option to skip initial number of events. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190206103947.15750-6-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/auxtrace.c | 22 ++++++++++++++++++++-- tools/perf/util/auxtrace.h | 2 +- tools/perf/util/event.h | 3 ++- tools/perf/util/intel-bts.c | 4 ++-- tools/perf/util/intel-pt.c | 23 ++++++++++++++++------- tools/perf/util/s390-cpumsf.c | 7 ++++--- tools/perf/util/session.c | 3 +++ 7 files changed, 48 insertions(+), 16 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index ad186d3255d1..267e54df511b 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -858,7 +859,7 @@ void auxtrace_buffer__free(struct auxtrace_buffer *buffer) void auxtrace_synth_error(struct auxtrace_error_event *auxtrace_error, int type, int code, int cpu, pid_t pid, pid_t tid, u64 ip, - const char *msg) + const char *msg, u64 timestamp) { size_t size; @@ -870,7 +871,9 @@ void auxtrace_synth_error(struct auxtrace_error_event *auxtrace_error, int type, auxtrace_error->cpu = cpu; auxtrace_error->pid = pid; auxtrace_error->tid = tid; + auxtrace_error->fmt = 1; auxtrace_error->ip = ip; + auxtrace_error->time = timestamp; strlcpy(auxtrace_error->msg, msg, MAX_AUXTRACE_ERROR_MSG); size = (void *)auxtrace_error->msg - (void *)auxtrace_error + @@ -1160,12 +1163,27 @@ static const char *auxtrace_error_name(int type) size_t perf_event__fprintf_auxtrace_error(union perf_event *event, FILE *fp) { struct auxtrace_error_event *e = &event->auxtrace_error; + unsigned long long nsecs = e->time; + const char *msg = e->msg; int ret; ret = fprintf(fp, " %s error type %u", auxtrace_error_name(e->type), e->type); + + if (e->fmt && nsecs) { + unsigned long secs = nsecs / NSEC_PER_SEC; + + nsecs -= secs * NSEC_PER_SEC; + ret += fprintf(fp, " time %lu.%09llu", secs, nsecs); + } else { + ret += fprintf(fp, " time 0"); + } + + if (!e->fmt) + msg = (const char *)&e->time; + ret += fprintf(fp, " cpu %d pid %d tid %d ip %#"PRIx64" code %u: %s\n", - e->cpu, e->pid, e->tid, e->ip, e->code, e->msg); + e->cpu, e->pid, e->tid, e->ip, e->code, msg); return ret; } diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index fac32482db61..c69bcd9a3091 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -519,7 +519,7 @@ void auxtrace_index__free(struct list_head *head); void auxtrace_synth_error(struct auxtrace_error_event *auxtrace_error, int type, int code, int cpu, pid_t pid, pid_t tid, u64 ip, - const char *msg); + const char *msg, u64 timestamp); int perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr, struct perf_tool *tool, diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index feba1aa819b4..36ae7e92dab1 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -532,8 +532,9 @@ struct auxtrace_error_event { u32 cpu; u32 pid; u32 tid; - u32 reserved__; /* For alignment */ + u32 fmt; u64 ip; + u64 time; char msg[MAX_AUXTRACE_ERROR_MSG]; }; diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c index f99ac0cbe3ff..0c0180c67574 100644 --- a/tools/perf/util/intel-bts.c +++ b/tools/perf/util/intel-bts.c @@ -144,7 +144,7 @@ static int intel_bts_lost(struct intel_bts *bts, struct perf_sample *sample) auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE, INTEL_BTS_ERR_LOST, sample->cpu, sample->pid, - sample->tid, 0, "Lost trace data"); + sample->tid, 0, "Lost trace data", sample->time); err = perf_session__deliver_synth_event(bts->session, &event, NULL); if (err) @@ -374,7 +374,7 @@ static int intel_bts_synth_error(struct intel_bts *bts, int cpu, pid_t pid, auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE, INTEL_BTS_ERR_NOINSN, cpu, pid, tid, ip, - "Failed to get instruction"); + "Failed to get instruction", 0); err = perf_session__deliver_synth_event(bts->session, &event, NULL); if (err) diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 2e72373ec6df..3b497bab4324 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -1411,7 +1411,7 @@ static int intel_pt_synth_pwrx_sample(struct intel_pt_queue *ptq) } static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu, - pid_t pid, pid_t tid, u64 ip) + pid_t pid, pid_t tid, u64 ip, u64 timestamp) { union perf_event event; char msg[MAX_AUXTRACE_ERROR_MSG]; @@ -1420,7 +1420,7 @@ static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu, intel_pt__strerror(code, msg, MAX_AUXTRACE_ERROR_MSG); auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE, - code, cpu, pid, tid, ip, msg); + code, cpu, pid, tid, ip, msg, timestamp); err = perf_session__deliver_synth_event(pt->session, &event, NULL); if (err) @@ -1430,6 +1430,18 @@ static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu, return err; } +static int intel_ptq_synth_error(struct intel_pt_queue *ptq, + const struct intel_pt_state *state) +{ + struct intel_pt *pt = ptq->pt; + u64 tm = ptq->timestamp; + + tm = pt->timeless_decoding ? 0 : tsc_to_perf_time(tm, &pt->tc); + + return intel_pt_synth_error(pt, state->err, ptq->cpu, ptq->pid, + ptq->tid, state->from_ip, tm); +} + static int intel_pt_next_tid(struct intel_pt *pt, struct intel_pt_queue *ptq) { struct auxtrace_queue *queue; @@ -1676,10 +1688,7 @@ static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp) intel_pt_next_tid(pt, ptq); } if (pt->synth_opts.errors) { - err = intel_pt_synth_error(pt, state->err, - ptq->cpu, ptq->pid, - ptq->tid, - state->from_ip); + err = intel_ptq_synth_error(ptq, state); if (err) return err; } @@ -1804,7 +1813,7 @@ static int intel_pt_process_timeless_queues(struct intel_pt *pt, pid_t tid, static int intel_pt_lost(struct intel_pt *pt, struct perf_sample *sample) { return intel_pt_synth_error(pt, INTEL_PT_ERR_LOST, sample->cpu, - sample->pid, sample->tid, 0); + sample->pid, sample->tid, 0, sample->time); } static struct intel_pt_queue *intel_pt_cpu_to_ptq(struct intel_pt *pt, int cpu) diff --git a/tools/perf/util/s390-cpumsf.c b/tools/perf/util/s390-cpumsf.c index 835249c77f56..d9525d220db1 100644 --- a/tools/perf/util/s390-cpumsf.c +++ b/tools/perf/util/s390-cpumsf.c @@ -819,7 +819,7 @@ static int s390_cpumsf_process_queues(struct s390_cpumsf *sf, u64 timestamp) } static int s390_cpumsf_synth_error(struct s390_cpumsf *sf, int code, int cpu, - pid_t pid, pid_t tid, u64 ip) + pid_t pid, pid_t tid, u64 ip, u64 timestamp) { char msg[MAX_AUXTRACE_ERROR_MSG]; union perf_event event; @@ -827,7 +827,7 @@ static int s390_cpumsf_synth_error(struct s390_cpumsf *sf, int code, int cpu, strncpy(msg, "Lost Auxiliary Trace Buffer", sizeof(msg) - 1); auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE, - code, cpu, pid, tid, ip, msg); + code, cpu, pid, tid, ip, msg, timestamp); err = perf_session__deliver_synth_event(sf->session, &event, NULL); if (err) @@ -839,7 +839,8 @@ static int s390_cpumsf_synth_error(struct s390_cpumsf *sf, int code, int cpu, static int s390_cpumsf_lost(struct s390_cpumsf *sf, struct perf_sample *sample) { return s390_cpumsf_synth_error(sf, 1, sample->cpu, - sample->pid, sample->tid, 0); + sample->pid, sample->tid, 0, + sample->time); } static int diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 2012396abb7c..18fb9c8cbf9c 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -703,7 +703,10 @@ static void perf_event__auxtrace_error_swap(union perf_event *event, event->auxtrace_error.cpu = bswap_32(event->auxtrace_error.cpu); event->auxtrace_error.pid = bswap_32(event->auxtrace_error.pid); event->auxtrace_error.tid = bswap_32(event->auxtrace_error.tid); + event->auxtrace_error.fmt = bswap_32(event->auxtrace_error.fmt); event->auxtrace_error.ip = bswap_64(event->auxtrace_error.ip); + if (event->auxtrace_error.fmt) + event->auxtrace_error.time = bswap_64(event->auxtrace_error.time); } static void perf_event__thread_map_swap(union perf_event *event, -- cgit v1.2.3 From 859dcf64389c93a647f230a7cfd206d30bc9d286 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Fri, 8 Feb 2019 15:35:43 -0700 Subject: perf cs-etm: Add proper header file for symbols MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After 'commit e22c1c751140 ("perf thread: Don't include symbol.h, symbol_conf.h is enough")' Compilation of the perf tools is broken when using the functionality provided by the openCSD library: [...] ... timerfd: [ on ] ... sched_getcpu: [ on ] ... sdt: [ OFF ] ... setns: [ on ] ... libopencsd: [ on ] [...] CC util/arm-spe.o CC util/arm-spe-pkt-decoder.o CC util/s390-cpumsf.o CC util/cs-etm.o CC util/parse-branch-options.o util/cs-etm.c: In function ‘cs_etm__mem_access’: util/cs-etm.c:297:24: error: storage size of ‘al’ isn’t known struct addr_location al; And rightly so since file cs-etm.c doesn't include symbol.h, something that is rectified in this patch. Signed-off-by: Mathieu Poirier Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Suzuki K Poulouse Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/20190208223543.31836-1-mathieu.poirier@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cs-etm.c | 1 + 1 file changed, 1 insertion(+) (limited to 'tools/perf/util') diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 8b3f882d6e2f..0b11d653cfbe 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -25,6 +25,7 @@ #include "machine.h" #include "map.h" #include "perf.h" +#include "symbol.h" #include "thread.h" #include "thread_map.h" #include "thread-stack.h" -- cgit v1.2.3 From 2187d87eacd46f6214ce3dc9cfd7a558375a4153 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Mon, 11 Feb 2019 11:06:27 +0100 Subject: perf report: Add s390 diagnosic sampling descriptor size On IBM z13 machine types 2964 and 2965 the descriptor sizes for sampling and diagnostic sampling entries might be missing in the trailer entry and are set to zero. This leads to a perf report failure when processing diagnostic sampling entries. This patch adds missing descriptor sizes when the trailer entry contains zero for these fields. Output before: [root@s38lp82 perf]# ./perf report --stdio | fgrep Samples 0xabbf0 [0x8]: failed to process type: 68 Error: failed to process sample [root@s38lp82 perf]# Output after: [root@s38lp82 perf]# ./perf report --stdio | fgrep Samples # Total Lost Samples: 0 # Samples: 3K of event 'SF_CYCLES_BASIC_DIAG' # Samples: 162 of event 'CF_DIAG' [root@s38lp82 perf]# Fixes: 2b1444f2e28b ("perf report: Add raw report support for s390 auxiliary trace") Signed-off-by: Thomas Richter Reviewed-by: Hendrik Brueckner Cc: Heiko Carstens Cc: Martin Schwidefsky Link: http://lkml.kernel.org/r/20190211100627.85714-1-tmricht@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/s390-cpumsf.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'tools/perf/util') diff --git a/tools/perf/util/s390-cpumsf.c b/tools/perf/util/s390-cpumsf.c index d9525d220db1..c215704931dc 100644 --- a/tools/perf/util/s390-cpumsf.c +++ b/tools/perf/util/s390-cpumsf.c @@ -352,6 +352,11 @@ static bool s390_cpumsf_validate(int machine_type, *dsdes = 85; *bsdes = 32; break; + case 2964: + case 2965: + *dsdes = 112; + *bsdes = 32; + break; default: /* Illegal trailer entry */ return false; -- cgit v1.2.3 From 39f4a913d6d439178177cae8aa2e9a232160fd51 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Mon, 4 Feb 2019 11:31:40 -0800 Subject: perf utils: Silence "Couldn't synthesize bpf events" warning for EPERM Synthesizing BPF events is only supported for root. Silent warning msg when non-root user runs perf-record. Reported-by: David Carrillo-Cisneros Signed-off-by: Song Liu Tested-by: David Carrillo-Cisneros Acked-by: Jiri Olsa Cc: kernel-team@fb.com Link: http://lkml.kernel.org/r/20190204193140.719740-1-songliubraving@fb.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/bpf-event.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c index 796ef793f4ce..62dda96b0096 100644 --- a/tools/perf/util/bpf-event.c +++ b/tools/perf/util/bpf-event.c @@ -236,8 +236,8 @@ int perf_event__synthesize_bpf_events(struct perf_tool *tool, pr_debug("%s: can't get next program: %s%s", __func__, strerror(errno), errno == EINVAL ? " -- kernel too old?" : ""); - /* don't report error on old kernel */ - err = (errno == EINVAL) ? 0 : -1; + /* don't report error on old kernel or EPERM */ + err = (errno == EINVAL || errno == EPERM) ? 0 : -1; break; } fd = bpf_prog_get_fd_by_id(id); -- cgit v1.2.3 From b611f63bb1b7ba20e87d9d0d7af88d247bcf98eb Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Tue, 12 Feb 2019 10:16:06 -0700 Subject: perf cs-etm: Remove unused structure field "state" Field "state" in structure cs_etm_queue is no longer used and needs to be removed. Signed-off-by: Mathieu Poirier Cc: Jiri Olsa Cc: Leo Yan Cc: Peter Zijlstra Cc: Suzuki K Poulouse Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/20190212171618.25355-2-mathieu.poirier@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cs-etm.c | 1 - 1 file changed, 1 deletion(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 0b11d653cfbe..ebd68eb43da9 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -65,7 +65,6 @@ struct cs_etm_queue { struct thread *thread; struct cs_etm_decoder *decoder; struct auxtrace_buffer *buffer; - const struct cs_etm_state *state; union perf_event *event_buf; unsigned int queue_nr; pid_t pid, tid; -- cgit v1.2.3 From fc7ac4138cf5ab76850bca2b9a8f43449c762a37 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Tue, 12 Feb 2019 10:16:07 -0700 Subject: perf cs-etm: Remove unused structure field "time" and "timestamp" Field "time" and "timestamp" in structure cs_etm_queue are no longer used and need to be removed. Signed-off-by: Mathieu Poirier Cc: Jiri Olsa Cc: Leo Yan Cc: Peter Zijlstra Cc: Suzuki K Poulouse Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/20190212171618.25355-3-mathieu.poirier@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cs-etm.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index ebd68eb43da9..1d9419a0cf0c 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -69,8 +69,6 @@ struct cs_etm_queue { unsigned int queue_nr; pid_t pid, tid; int cpu; - u64 time; - u64 timestamp; u64 offset; u64 period_instructions; struct branch_stack *last_branch; @@ -82,7 +80,7 @@ struct cs_etm_queue { static int cs_etm__update_queues(struct cs_etm_auxtrace *etm); static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, - pid_t tid, u64 time_); + pid_t tid); /* PTMs ETMIDR [11:8] set to b0011 */ #define ETMIDR_PTM_VERSION 0x00000300 @@ -234,7 +232,7 @@ static int cs_etm__flush_events(struct perf_session *session, if (ret < 0) return ret; - return cs_etm__process_timeless_queues(etm, -1, MAX_TIMESTAMP - 1); + return cs_etm__process_timeless_queues(etm, -1); } static void cs_etm__free_queue(void *priv) @@ -1583,7 +1581,7 @@ static int cs_etm__run_decoder(struct cs_etm_queue *etmq) } static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, - pid_t tid, u64 time_) + pid_t tid) { unsigned int i; struct auxtrace_queues *queues = &etm->queues; @@ -1593,7 +1591,6 @@ static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, struct cs_etm_queue *etmq = queue->priv; if (etmq && ((tid == -1) || (etmq->tid == tid))) { - etmq->time = time_; cs_etm__set_pid_tid_cpu(etm, queue); cs_etm__run_decoder(etmq); } @@ -1637,8 +1634,7 @@ static int cs_etm__process_event(struct perf_session *session, if (event->header.type == PERF_RECORD_EXIT) return cs_etm__process_timeless_queues(etm, - event->fork.tid, - sample->time); + event->fork.tid); return 0; } -- cgit v1.2.3 From d3267ad43dd8b3778f9b3deb9a3946c844bdc9dc Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Tue, 12 Feb 2019 10:16:08 -0700 Subject: perf cs-etm: Fix wrong return values in error path Function cs_etm__mem_access() is supposed to return a u32 but the error path returns negative values at a couple of places, something that really throws off the clients using it. Fix the situation by return '0'. Signed-off-by: Mathieu Poirier Cc: Jiri Olsa Cc: Leo Yan Cc: Peter Zijlstra Cc: Suzuki K Poulouse Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/20190212171618.25355-4-mathieu.poirier@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cs-etm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 1d9419a0cf0c..f396fee9bb95 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -324,7 +324,7 @@ static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u64 address, struct addr_location al; if (!etmq) - return -1; + return 0; machine = etmq->etm->machine; cpumode = cs_etm__cpu_mode(etmq, address); @@ -332,7 +332,7 @@ static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u64 address, thread = etmq->thread; if (!thread) { if (cpumode != PERF_RECORD_MISC_KERNEL) - return -EINVAL; + return 0; thread = etmq->etm->unknown_thread; } -- cgit v1.2.3 From 65963e5b4dfa9b9ba4bdf7ef8dc7d4424cfd2097 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Tue, 12 Feb 2019 10:16:09 -0700 Subject: perf cs-etm: Introducing function cs_etm_decoder__init_dparams() Introducing function cs_etm_decoder__init_dparams() to avoid repeating code at two different places. Signed-off-by: Mathieu Poirier Cc: Jiri Olsa Cc: Leo Yan Cc: Peter Zijlstra Cc: Suzuki K Poulouse Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/20190212171618.25355-5-mathieu.poirier@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cs-etm-decoder/cs-etm-decoder.h | 3 +- tools/perf/util/cs-etm.c | 41 +++++++++++++++++-------- 2 files changed, 30 insertions(+), 14 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h index 7e6a8850be4a..663309486784 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h @@ -105,9 +105,10 @@ enum { CS_ETM_PROTO_PTM, }; -enum { +enum cs_etm_decoder_operation { CS_ETM_OPERATION_PRINT = 1, CS_ETM_OPERATION_DECODE, + CS_ETM_OPERATION_MAX, }; int cs_etm_decoder__process_data_block(struct cs_etm_decoder *decoder, diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index f396fee9bb95..3011c6cae531 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -136,6 +136,28 @@ static void cs_etm__packet_dump(const char *pkt_string) fflush(stdout); } +static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params, + struct cs_etm_queue *etmq, + enum cs_etm_decoder_operation mode) +{ + int ret = -EINVAL; + + if (!(mode < CS_ETM_OPERATION_MAX)) + goto out; + + d_params->packet_printer = cs_etm__packet_dump; + d_params->operation = mode; + d_params->data = etmq; + d_params->formatted = true; + d_params->fsyncs = false; + d_params->hsyncs = false; + d_params->frame_aligned = true; + + ret = 0; +out: + return ret; +} + static void cs_etm__dump_event(struct cs_etm_auxtrace *etm, struct auxtrace_buffer *buffer) { @@ -182,12 +204,9 @@ static void cs_etm__dump_event(struct cs_etm_auxtrace *etm, } /* Set decoder parameters to simply print the trace packets */ - d_params.packet_printer = cs_etm__packet_dump; - d_params.operation = CS_ETM_OPERATION_PRINT; - d_params.formatted = true; - d_params.fsyncs = false; - d_params.hsyncs = false; - d_params.frame_aligned = true; + if (cs_etm__init_decoder_params(&d_params, NULL, + CS_ETM_OPERATION_PRINT)) + return; decoder = cs_etm_decoder__new(etm->num_cpu, &d_params, t_params); @@ -436,13 +455,9 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm, } /* Set decoder parameters to simply print the trace packets */ - d_params.packet_printer = cs_etm__packet_dump; - d_params.operation = CS_ETM_OPERATION_DECODE; - d_params.formatted = true; - d_params.fsyncs = false; - d_params.hsyncs = false; - d_params.frame_aligned = true; - d_params.data = etmq; + if (cs_etm__init_decoder_params(&d_params, etmq, + CS_ETM_OPERATION_DECODE)) + goto out_free; etmq->decoder = cs_etm_decoder__new(etm->num_cpu, &d_params, t_params); -- cgit v1.2.3 From ae4d9f5236439ef08f9963f39d5984959b1ae04d Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Tue, 12 Feb 2019 10:16:10 -0700 Subject: perf cs-etm: Fix memory leak in error path Memory allocated for variable 't_params' isn't released properly in the error path of function cs_etm_queue *cs_etm__alloc_queue() and cs_etm__dump_event(), something this patch addresses. Signed-off-by: Mathieu Poirier Cc: Jiri Olsa Cc: Leo Yan Cc: Peter Zijlstra Cc: Suzuki K Poulouse Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/20190212171618.25355-6-mathieu.poirier@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cs-etm.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 3011c6cae531..aac07f950074 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -175,6 +175,10 @@ static void cs_etm__dump_event(struct cs_etm_auxtrace *etm, /* Use metadata to fill in trace parameters for trace decoder */ t_params = zalloc(sizeof(*t_params) * etm->num_cpu); + + if (!t_params) + return; + for (i = 0; i < etm->num_cpu; i++) { if (etm->metadata[i][CS_ETM_MAGIC] == __perf_cs_etmv3_magic) { u32 etmidr = etm->metadata[i][CS_ETM_ETMIDR]; @@ -206,14 +210,12 @@ static void cs_etm__dump_event(struct cs_etm_auxtrace *etm, /* Set decoder parameters to simply print the trace packets */ if (cs_etm__init_decoder_params(&d_params, NULL, CS_ETM_OPERATION_PRINT)) - return; + goto out_free; decoder = cs_etm_decoder__new(etm->num_cpu, &d_params, t_params); - zfree(&t_params); - if (!decoder) - return; + goto out_free; do { size_t consumed; @@ -228,6 +230,9 @@ static void cs_etm__dump_event(struct cs_etm_auxtrace *etm, } while (buffer_used < buffer->size); cs_etm_decoder__free(decoder); + +out_free: + zfree(&t_params); } static int cs_etm__flush_events(struct perf_session *session, @@ -379,7 +384,7 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm, { int i; struct cs_etm_decoder_params d_params; - struct cs_etm_trace_params *t_params; + struct cs_etm_trace_params *t_params = NULL; struct cs_etm_queue *etmq; size_t szp = sizeof(struct cs_etm_packet); @@ -461,8 +466,6 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm, etmq->decoder = cs_etm_decoder__new(etm->num_cpu, &d_params, t_params); - zfree(&t_params); - if (!etmq->decoder) goto out_free; @@ -475,6 +478,8 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm, cs_etm__mem_access)) goto out_free_decoder; + zfree(&t_params); + etmq->offset = 0; etmq->period_instructions = 0; @@ -483,6 +488,7 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm, out_free_decoder: cs_etm_decoder__free(etmq->decoder); out_free: + zfree(&t_params); zfree(&etmq->event_buf); zfree(&etmq->last_branch); zfree(&etmq->last_branch_rb); -- cgit v1.2.3 From 2507a3d982f2968168b53f4d1e67774d68f79b0c Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Tue, 12 Feb 2019 10:16:11 -0700 Subject: perf cs-etm: Introducing function cs_etm__init_trace_params() The trace parameter initialisation code is repeated in two different places, something that bloats the file and can lead to errors. This is fixed by introducing a helper function and calling the right protocol initialisation code when required. Signed-off-by: Mathieu Poirier Cc: Jiri Olsa Cc: Leo Yan Cc: Peter Zijlstra Cc: Suzuki K Poulouse Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/20190212171618.25355-7-mathieu.poirier@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cs-etm.c | 112 +++++++++++++++++++++++------------------------ tools/perf/util/cs-etm.h | 4 +- 2 files changed, 58 insertions(+), 58 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index aac07f950074..f3a6dfaf3026 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -136,6 +136,57 @@ static void cs_etm__packet_dump(const char *pkt_string) fflush(stdout); } +static void cs_etm__set_trace_param_etmv3(struct cs_etm_trace_params *t_params, + struct cs_etm_auxtrace *etm, int idx, + u32 etmidr) +{ + u64 **metadata = etm->metadata; + + t_params[idx].protocol = cs_etm__get_v7_protocol_version(etmidr); + t_params[idx].etmv3.reg_ctrl = metadata[idx][CS_ETM_ETMCR]; + t_params[idx].etmv3.reg_trc_id = metadata[idx][CS_ETM_ETMTRACEIDR]; +} + +static void cs_etm__set_trace_param_etmv4(struct cs_etm_trace_params *t_params, + struct cs_etm_auxtrace *etm, int idx) +{ + u64 **metadata = etm->metadata; + + t_params[idx].protocol = CS_ETM_PROTO_ETMV4i; + t_params[idx].etmv4.reg_idr0 = metadata[idx][CS_ETMV4_TRCIDR0]; + t_params[idx].etmv4.reg_idr1 = metadata[idx][CS_ETMV4_TRCIDR1]; + t_params[idx].etmv4.reg_idr2 = metadata[idx][CS_ETMV4_TRCIDR2]; + t_params[idx].etmv4.reg_idr8 = metadata[idx][CS_ETMV4_TRCIDR8]; + t_params[idx].etmv4.reg_configr = metadata[idx][CS_ETMV4_TRCCONFIGR]; + t_params[idx].etmv4.reg_traceidr = metadata[idx][CS_ETMV4_TRCTRACEIDR]; +} + +static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params, + struct cs_etm_auxtrace *etm) +{ + int i; + u32 etmidr; + u64 architecture; + + for (i = 0; i < etm->num_cpu; i++) { + architecture = etm->metadata[i][CS_ETM_MAGIC]; + + switch (architecture) { + case __perf_cs_etmv3_magic: + etmidr = etm->metadata[i][CS_ETM_ETMIDR]; + cs_etm__set_trace_param_etmv3(t_params, etm, i, etmidr); + break; + case __perf_cs_etmv4_magic: + cs_etm__set_trace_param_etmv4(t_params, etm, i); + break; + default: + return -EINVAL; + } + } + + return 0; +} + static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params, struct cs_etm_queue *etmq, enum cs_etm_decoder_operation mode) @@ -161,7 +212,7 @@ out: static void cs_etm__dump_event(struct cs_etm_auxtrace *etm, struct auxtrace_buffer *buffer) { - int i, ret; + int ret; const char *color = PERF_COLOR_BLUE; struct cs_etm_decoder_params d_params; struct cs_etm_trace_params *t_params; @@ -179,33 +230,8 @@ static void cs_etm__dump_event(struct cs_etm_auxtrace *etm, if (!t_params) return; - for (i = 0; i < etm->num_cpu; i++) { - if (etm->metadata[i][CS_ETM_MAGIC] == __perf_cs_etmv3_magic) { - u32 etmidr = etm->metadata[i][CS_ETM_ETMIDR]; - - t_params[i].protocol = - cs_etm__get_v7_protocol_version(etmidr); - t_params[i].etmv3.reg_ctrl = - etm->metadata[i][CS_ETM_ETMCR]; - t_params[i].etmv3.reg_trc_id = - etm->metadata[i][CS_ETM_ETMTRACEIDR]; - } else if (etm->metadata[i][CS_ETM_MAGIC] == - __perf_cs_etmv4_magic) { - t_params[i].protocol = CS_ETM_PROTO_ETMV4i; - t_params[i].etmv4.reg_idr0 = - etm->metadata[i][CS_ETMV4_TRCIDR0]; - t_params[i].etmv4.reg_idr1 = - etm->metadata[i][CS_ETMV4_TRCIDR1]; - t_params[i].etmv4.reg_idr2 = - etm->metadata[i][CS_ETMV4_TRCIDR2]; - t_params[i].etmv4.reg_idr8 = - etm->metadata[i][CS_ETMV4_TRCIDR8]; - t_params[i].etmv4.reg_configr = - etm->metadata[i][CS_ETMV4_TRCCONFIGR]; - t_params[i].etmv4.reg_traceidr = - etm->metadata[i][CS_ETMV4_TRCTRACEIDR]; - } - } + if (cs_etm__init_trace_params(t_params, etm)) + goto out_free; /* Set decoder parameters to simply print the trace packets */ if (cs_etm__init_decoder_params(&d_params, NULL, @@ -382,7 +408,6 @@ static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u64 address, static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm, unsigned int queue_nr) { - int i; struct cs_etm_decoder_params d_params; struct cs_etm_trace_params *t_params = NULL; struct cs_etm_queue *etmq; @@ -431,33 +456,8 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm, if (!t_params) goto out_free; - for (i = 0; i < etm->num_cpu; i++) { - if (etm->metadata[i][CS_ETM_MAGIC] == __perf_cs_etmv3_magic) { - u32 etmidr = etm->metadata[i][CS_ETM_ETMIDR]; - - t_params[i].protocol = - cs_etm__get_v7_protocol_version(etmidr); - t_params[i].etmv3.reg_ctrl = - etm->metadata[i][CS_ETM_ETMCR]; - t_params[i].etmv3.reg_trc_id = - etm->metadata[i][CS_ETM_ETMTRACEIDR]; - } else if (etm->metadata[i][CS_ETM_MAGIC] == - __perf_cs_etmv4_magic) { - t_params[i].protocol = CS_ETM_PROTO_ETMV4i; - t_params[i].etmv4.reg_idr0 = - etm->metadata[i][CS_ETMV4_TRCIDR0]; - t_params[i].etmv4.reg_idr1 = - etm->metadata[i][CS_ETMV4_TRCIDR1]; - t_params[i].etmv4.reg_idr2 = - etm->metadata[i][CS_ETMV4_TRCIDR2]; - t_params[i].etmv4.reg_idr8 = - etm->metadata[i][CS_ETMV4_TRCIDR8]; - t_params[i].etmv4.reg_configr = - etm->metadata[i][CS_ETMV4_TRCCONFIGR]; - t_params[i].etmv4.reg_traceidr = - etm->metadata[i][CS_ETMV4_TRCTRACEIDR]; - } - } + if (cs_etm__init_trace_params(t_params, etm)) + goto out_free; /* Set decoder parameters to simply print the trace packets */ if (cs_etm__init_decoder_params(&d_params, etmq, diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h index d76126e0e3d0..0e97c196147a 100644 --- a/tools/perf/util/cs-etm.h +++ b/tools/perf/util/cs-etm.h @@ -105,8 +105,8 @@ struct intlist *traceid_list; #define CS_ETM_HEADER_SIZE (CS_HEADER_VERSION_0_MAX * sizeof(u64)) -static const u64 __perf_cs_etmv3_magic = 0x3030303030303030ULL; -static const u64 __perf_cs_etmv4_magic = 0x4040404040404040ULL; +#define __perf_cs_etmv3_magic 0x3030303030303030ULL +#define __perf_cs_etmv4_magic 0x4040404040404040ULL #define CS_ETMV3_PRIV_SIZE (CS_ETM_PRIV_MAX * sizeof(u64)) #define CS_ETMV4_PRIV_SIZE (CS_ETMV4_PRIV_MAX * sizeof(u64)) -- cgit v1.2.3 From e4aa592d183228e5fbd3b49a317248c2895d3819 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Tue, 12 Feb 2019 10:16:12 -0700 Subject: perf cs-etm: Fix erroneous comment The comment just before initialising the decoder is plane wrong since it is part of the decoding queue setup function and the operation code specifically mention that trace data is to be decoded rather than printed out. This patch simply fix the comment to prevent people from getting really confused. Signed-off-by: Mathieu Poirier Cc: Jiri Olsa Cc: Leo Yan Cc: Peter Zijlstra Cc: Suzuki K Poulouse Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/20190212171618.25355-8-mathieu.poirier@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cs-etm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index f3a6dfaf3026..4cc9fce97a86 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -459,7 +459,7 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm, if (cs_etm__init_trace_params(t_params, etm)) goto out_free; - /* Set decoder parameters to simply print the trace packets */ + /* Set decoder parameters to decode trace packets */ if (cs_etm__init_decoder_params(&d_params, etmq, CS_ETM_OPERATION_DECODE)) goto out_free; -- cgit v1.2.3 From 4f5b37139fb3178b3db4e876eec0f2e92c82ac45 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Tue, 12 Feb 2019 10:16:13 -0700 Subject: perf cs-etm: Cleaning up function cs_etm__alloc_queue() Function cs_etm__alloc_queue() should only be concerned with the allocation of memory for the etmq and accompanying decoder. Everything else should be done in the calling function. Signed-off-by: Mathieu Poirier Cc: Jiri Olsa Cc: Leo Yan Cc: Peter Zijlstra Cc: Suzuki K Poulouse Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/20190212171618.25355-9-mathieu.poirier@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cs-etm.c | 37 ++++++++++++++++--------------------- 1 file changed, 16 insertions(+), 21 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 4cc9fce97a86..c9a5b4935209 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -405,8 +405,7 @@ static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u64 address, return len; } -static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm, - unsigned int queue_nr) +static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm) { struct cs_etm_decoder_params d_params; struct cs_etm_trace_params *t_params = NULL; @@ -444,12 +443,6 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm, if (!etmq->event_buf) goto out_free; - etmq->etm = etm; - etmq->queue_nr = queue_nr; - etmq->pid = -1; - etmq->tid = -1; - etmq->cpu = -1; - /* Use metadata to fill in trace parameters for trace decoder */ t_params = zalloc(sizeof(*t_params) * etm->num_cpu); @@ -479,10 +472,6 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm, goto out_free_decoder; zfree(&t_params); - - etmq->offset = 0; - etmq->period_instructions = 0; - return etmq; out_free_decoder: @@ -503,24 +492,30 @@ static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm, struct auxtrace_queue *queue, unsigned int queue_nr) { + int ret = 0; struct cs_etm_queue *etmq = queue->priv; if (list_empty(&queue->head) || etmq) - return 0; + goto out; - etmq = cs_etm__alloc_queue(etm, queue_nr); + etmq = cs_etm__alloc_queue(etm); - if (!etmq) - return -ENOMEM; + if (!etmq) { + ret = -ENOMEM; + goto out; + } queue->priv = etmq; - - if (queue->cpu != -1) - etmq->cpu = queue->cpu; - + etmq->etm = etm; + etmq->queue_nr = queue_nr; + etmq->cpu = queue->cpu; etmq->tid = queue->tid; + etmq->pid = -1; + etmq->offset = 0; + etmq->period_instructions = 0; - return 0; +out: + return ret; } static int cs_etm__setup_queues(struct cs_etm_auxtrace *etm) -- cgit v1.2.3 From 4b6df11ab6cf13c5babe9ed1a935cd99d50bf3fe Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Tue, 12 Feb 2019 10:16:14 -0700 Subject: perf cs-etm: Rethink kernel address initialisation Moving initialisation of the kernel start address to function cs_etm__setup_queues(), considered to be the common denominator for queue initialisation. That way we don't have to repeat the same code at different places. No change of functionatlity is introduced by this patch. Signed-off-by: Mathieu Poirier Cc: Jiri Olsa Cc: Leo Yan Cc: Peter Zijlstra Cc: Suzuki K Poulouse Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/20190212171618.25355-10-mathieu.poirier@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cs-etm.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index c9a5b4935209..2d2de898ea68 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -523,6 +523,9 @@ static int cs_etm__setup_queues(struct cs_etm_auxtrace *etm) unsigned int i; int ret; + if (!etm->kernel_start) + etm->kernel_start = machine__kernel_start(etm->machine); + for (i = 0; i < etm->queues.nr_queues; i++) { ret = cs_etm__setup_queue(etm, &etm->queues.queue_array[i], i); if (ret) @@ -1490,14 +1493,10 @@ static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq) static int cs_etm__run_decoder(struct cs_etm_queue *etmq) { - struct cs_etm_auxtrace *etm = etmq->etm; struct cs_etm_buffer buffer; size_t buffer_used, processed; int err = 0; - if (!etm->kernel_start) - etm->kernel_start = machine__kernel_start(etm->machine); - /* Go through each buffer in the queue and decode them one by one */ while (1) { buffer_used = 0; -- cgit v1.2.3 From 23cfcd6d75cc0c1a7f95c44658dc91380fb40770 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Tue, 12 Feb 2019 10:16:15 -0700 Subject: perf cs-etm: Make cs_etm__run_decoder() queue independent This patch makes decoding of auxtrace buffer centered around a struct cs_etm_queue. This eliminates surperflous variables and is a precursor for work that simplifies the main decoder loop. Signed-off-by: Mathieu Poirier Cc: Jiri Olsa Cc: Leo Yan Cc: Peter Zijlstra Cc: Suzuki K Poulouse Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/20190212171618.25355-11-mathieu.poirier@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cs-etm-decoder/cs-etm-decoder.h | 7 ---- tools/perf/util/cs-etm.c | 52 ++++++++++++------------- 2 files changed, 26 insertions(+), 33 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h index 663309486784..3ab11dfa92ae 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h @@ -15,13 +15,6 @@ struct cs_etm_decoder; -struct cs_etm_buffer { - const unsigned char *buf; - size_t len; - u64 offset; - u64 ref_timestamp; -}; - enum cs_etm_sample_type { CS_ETM_EMPTY, CS_ETM_RANGE, diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 2d2de898ea68..d2c90b369e7c 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -76,6 +76,8 @@ struct cs_etm_queue { size_t last_branch_pos; struct cs_etm_packet *prev_packet; struct cs_etm_packet *packet; + const unsigned char *buf; + size_t buf_len, buf_used; }; static int cs_etm__update_queues(struct cs_etm_auxtrace *etm); @@ -683,7 +685,7 @@ static int cs_etm__inject_event(union perf_event *event, static int -cs_etm__get_trace(struct cs_etm_buffer *buff, struct cs_etm_queue *etmq) +cs_etm__get_trace(struct cs_etm_queue *etmq) { struct auxtrace_buffer *aux_buffer = etmq->buffer; struct auxtrace_buffer *old_buffer = aux_buffer; @@ -697,7 +699,7 @@ cs_etm__get_trace(struct cs_etm_buffer *buff, struct cs_etm_queue *etmq) if (!aux_buffer) { if (old_buffer) auxtrace_buffer__drop_data(old_buffer); - buff->len = 0; + etmq->buf_len = 0; return 0; } @@ -717,13 +719,11 @@ cs_etm__get_trace(struct cs_etm_buffer *buff, struct cs_etm_queue *etmq) if (old_buffer) auxtrace_buffer__drop_data(old_buffer); - buff->offset = aux_buffer->offset; - buff->len = aux_buffer->size; - buff->buf = aux_buffer->data; + etmq->buf_used = 0; + etmq->buf_len = aux_buffer->size; + etmq->buf = aux_buffer->data; - buff->ref_timestamp = aux_buffer->reference; - - return buff->len; + return etmq->buf_len; } static void cs_etm__set_pid_tid_cpu(struct cs_etm_auxtrace *etm, @@ -1493,24 +1493,23 @@ static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq) static int cs_etm__run_decoder(struct cs_etm_queue *etmq) { - struct cs_etm_buffer buffer; - size_t buffer_used, processed; + size_t processed; int err = 0; /* Go through each buffer in the queue and decode them one by one */ while (1) { - buffer_used = 0; - memset(&buffer, 0, sizeof(buffer)); - err = cs_etm__get_trace(&buffer, etmq); - if (err <= 0) - return err; - /* - * We cannot assume consecutive blocks in the data file are - * contiguous, reset the decoder to force re-sync. - */ - err = cs_etm_decoder__reset(etmq->decoder); - if (err != 0) - return err; + if (!etmq->buf_len) { + err = cs_etm__get_trace(etmq); + if (err <= 0) + return err; + /* + * We cannot assume consecutive blocks in the data file + * are contiguous, reset the decoder to force re-sync. + */ + err = cs_etm_decoder__reset(etmq->decoder); + if (err != 0) + return err; + } /* Run trace decoder until buffer consumed or end of trace */ do { @@ -1518,14 +1517,15 @@ static int cs_etm__run_decoder(struct cs_etm_queue *etmq) err = cs_etm_decoder__process_data_block( etmq->decoder, etmq->offset, - &buffer.buf[buffer_used], - buffer.len - buffer_used, + &etmq->buf[etmq->buf_used], + etmq->buf_len, &processed); if (err) return err; etmq->offset += processed; - buffer_used += processed; + etmq->buf_used += processed; + etmq->buf_len -= processed; /* Process each packet in this chunk */ while (1) { @@ -1585,7 +1585,7 @@ static int cs_etm__run_decoder(struct cs_etm_queue *etmq) break; } } - } while (buffer.len > buffer_used); + } while (etmq->buf_len); if (err == 0) /* Flush any remaining branch stack entries */ -- cgit v1.2.3 From f74f349c211e3e62bc7fe35a132918c7f2c0fafb Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Tue, 12 Feb 2019 10:16:16 -0700 Subject: perf cs-etm: Modularize main decoder function Making the main decoder block modular so that it can be called from different decoding context (timeless vs. non-timeless), avoiding to repeat code. No change in functionality is introduced by this patch. Signed-off-by: Mathieu Poirier Cc: Jiri Olsa Cc: Leo Yan Cc: Peter Zijlstra Cc: Suzuki K Poulouse Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/20190212171618.25355-12-mathieu.poirier@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cs-etm.c | 41 +++++++++++++++++++++++++++++------------ 1 file changed, 29 insertions(+), 12 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index d2c90b369e7c..cfa686fe223e 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -1491,9 +1491,36 @@ static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq) return 0; } +static int cs_etm__decode_data_block(struct cs_etm_queue *etmq) +{ + int ret = 0; + size_t processed = 0; + + /* + * Packets are decoded and added to the decoder's packet queue + * until the decoder packet processing callback has requested that + * processing stops or there is nothing left in the buffer. Normal + * operations that stop processing are a timestamp packet or a full + * decoder buffer queue. + */ + ret = cs_etm_decoder__process_data_block(etmq->decoder, + etmq->offset, + &etmq->buf[etmq->buf_used], + etmq->buf_len, + &processed); + if (ret) + goto out; + + etmq->offset += processed; + etmq->buf_used += processed; + etmq->buf_len -= processed; + +out: + return ret; +} + static int cs_etm__run_decoder(struct cs_etm_queue *etmq) { - size_t processed; int err = 0; /* Go through each buffer in the queue and decode them one by one */ @@ -1513,20 +1540,10 @@ static int cs_etm__run_decoder(struct cs_etm_queue *etmq) /* Run trace decoder until buffer consumed or end of trace */ do { - processed = 0; - err = cs_etm_decoder__process_data_block( - etmq->decoder, - etmq->offset, - &etmq->buf[etmq->buf_used], - etmq->buf_len, - &processed); + err = cs_etm__decode_data_block(etmq); if (err) return err; - etmq->offset += processed; - etmq->buf_used += processed; - etmq->buf_len -= processed; - /* Process each packet in this chunk */ while (1) { err = cs_etm_decoder__get_packet(etmq->decoder, -- cgit v1.2.3 From 3fa0e83e29488547b39f321fd5a239f08f129c72 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Tue, 12 Feb 2019 10:16:17 -0700 Subject: perf cs-etm: Modularize main packet processing loop Making the main packet processing loop modular so that it can be called from different decoding context (timeless vs. non-timless), avoiding to repeat code. Signed-off-by: Mathieu Poirier Cc: Jiri Olsa Cc: Leo Yan Cc: Peter Zijlstra Cc: Suzuki K Poulouse Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/20190212171618.25355-13-mathieu.poirier@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cs-etm.c | 129 ++++++++++++++++++++++++++--------------------- 1 file changed, 72 insertions(+), 57 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index cfa686fe223e..f607bc58bd03 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -1519,6 +1519,72 @@ out: return ret; } +static int cs_etm__process_decoder_queue(struct cs_etm_queue *etmq) +{ + int ret; + + /* Process each packet in this chunk */ + while (1) { + ret = cs_etm_decoder__get_packet(etmq->decoder, + etmq->packet); + if (ret <= 0) + /* + * Stop processing this chunk on + * end of data or error + */ + break; + + /* + * Since packet addresses are swapped in packet + * handling within below switch() statements, + * thus setting sample flags must be called + * prior to switch() statement to use address + * information before packets swapping. + */ + ret = cs_etm__set_sample_flags(etmq); + if (ret < 0) + break; + + switch (etmq->packet->sample_type) { + case CS_ETM_RANGE: + /* + * If the packet contains an instruction + * range, generate instruction sequence + * events. + */ + cs_etm__sample(etmq); + break; + case CS_ETM_EXCEPTION: + case CS_ETM_EXCEPTION_RET: + /* + * If the exception packet is coming, + * make sure the previous instruction + * range packet to be handled properly. + */ + cs_etm__exception(etmq); + break; + case CS_ETM_DISCONTINUITY: + /* + * Discontinuity in trace, flush + * previous branch stack + */ + cs_etm__flush(etmq); + break; + case CS_ETM_EMPTY: + /* + * Should not receive empty packet, + * report error. + */ + pr_err("CS ETM Trace: empty packet\n"); + return -EINVAL; + default: + break; + } + } + + return ret; +} + static int cs_etm__run_decoder(struct cs_etm_queue *etmq) { int err = 0; @@ -1544,64 +1610,13 @@ static int cs_etm__run_decoder(struct cs_etm_queue *etmq) if (err) return err; - /* Process each packet in this chunk */ - while (1) { - err = cs_etm_decoder__get_packet(etmq->decoder, - etmq->packet); - if (err <= 0) - /* - * Stop processing this chunk on - * end of data or error - */ - break; + /* + * Process each packet in this chunk, nothing to do if + * an error occurs other than hoping the next one will + * be better. + */ + err = cs_etm__process_decoder_queue(etmq); - /* - * Since packet addresses are swapped in packet - * handling within below switch() statements, - * thus setting sample flags must be called - * prior to switch() statement to use address - * information before packets swapping. - */ - err = cs_etm__set_sample_flags(etmq); - if (err < 0) - break; - - switch (etmq->packet->sample_type) { - case CS_ETM_RANGE: - /* - * If the packet contains an instruction - * range, generate instruction sequence - * events. - */ - cs_etm__sample(etmq); - break; - case CS_ETM_EXCEPTION: - case CS_ETM_EXCEPTION_RET: - /* - * If the exception packet is coming, - * make sure the previous instruction - * range packet to be handled properly. - */ - cs_etm__exception(etmq); - break; - case CS_ETM_DISCONTINUITY: - /* - * Discontinuity in trace, flush - * previous branch stack - */ - cs_etm__flush(etmq); - break; - case CS_ETM_EMPTY: - /* - * Should not receive empty packet, - * report error. - */ - pr_err("CS ETM Trace: empty packet\n"); - return -EINVAL; - default: - break; - } - } } while (etmq->buf_len); if (err == 0) -- cgit v1.2.3 From 8224531cf5a1246bbe1d43c5db26e5348aeb77c5 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Tue, 12 Feb 2019 10:16:18 -0700 Subject: perf cs-etm: Modularize auxtrace_buffer fetch function Making the auxtrace_buffer fetch function modular so that it can be called from different decoding context (timeless vs. non-timeless), avoiding to repeat code. No change in functionality is introduced by this patch. Signed-off-by: Mathieu Poirier Cc: Jiri Olsa Cc: Leo Yan Cc: Peter Zijlstra Cc: Suzuki K Poulouse Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/20190212171618.25355-14-mathieu.poirier@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cs-etm.c | 41 +++++++++++++++++++++++++++++------------ 1 file changed, 29 insertions(+), 12 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index f607bc58bd03..110804936fc3 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -1152,6 +1152,32 @@ static int cs_etm__end_block(struct cs_etm_queue *etmq) return 0; } +/* + * cs_etm__get_data_block: Fetch a block from the auxtrace_buffer queue + * if need be. + * Returns: < 0 if error + * = 0 if no more auxtrace_buffer to read + * > 0 if the current buffer isn't empty yet + */ +static int cs_etm__get_data_block(struct cs_etm_queue *etmq) +{ + int ret; + + if (!etmq->buf_len) { + ret = cs_etm__get_trace(etmq); + if (ret <= 0) + return ret; + /* + * We cannot assume consecutive blocks in the data file + * are contiguous, reset the decoder to force re-sync. + */ + ret = cs_etm_decoder__reset(etmq->decoder); + if (ret) + return ret; + } + + return etmq->buf_len; +} static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq, struct cs_etm_packet *packet, @@ -1591,18 +1617,9 @@ static int cs_etm__run_decoder(struct cs_etm_queue *etmq) /* Go through each buffer in the queue and decode them one by one */ while (1) { - if (!etmq->buf_len) { - err = cs_etm__get_trace(etmq); - if (err <= 0) - return err; - /* - * We cannot assume consecutive blocks in the data file - * are contiguous, reset the decoder to force re-sync. - */ - err = cs_etm_decoder__reset(etmq->decoder); - if (err != 0) - return err; - } + err = cs_etm__get_data_block(etmq); + if (err <= 0) + return err; /* Run trace decoder until buffer consumed or end of trace */ do { -- cgit v1.2.3 From 5ff328836dfde0cef9f28c8b8791a90a36d7a183 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 13 Feb 2019 13:32:39 +0100 Subject: perf tools: Rename build libperf to perf Rename build libperf to perf, because it's used to build perf. The libperf build object name will be used for libperf library. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20190213123246.4015-4-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Build | 10 +- tools/perf/arch/Build | 4 +- tools/perf/arch/arm/Build | 4 +- tools/perf/arch/arm/tests/Build | 8 +- tools/perf/arch/arm/util/Build | 8 +- tools/perf/arch/arm64/Build | 4 +- tools/perf/arch/arm64/tests/Build | 6 +- tools/perf/arch/arm64/util/Build | 12 +- tools/perf/arch/nds32/Build | 2 +- tools/perf/arch/nds32/util/Build | 2 +- tools/perf/arch/powerpc/Build | 4 +- tools/perf/arch/powerpc/tests/Build | 6 +- tools/perf/arch/powerpc/util/Build | 18 +- tools/perf/arch/s390/Build | 2 +- tools/perf/arch/s390/util/Build | 12 +- tools/perf/arch/sh/Build | 2 +- tools/perf/arch/sh/util/Build | 2 +- tools/perf/arch/sparc/Build | 2 +- tools/perf/arch/sparc/util/Build | 2 +- tools/perf/arch/x86/Build | 4 +- tools/perf/arch/x86/tests/Build | 14 +- tools/perf/arch/x86/util/Build | 30 +-- tools/perf/arch/xtensa/Build | 2 +- tools/perf/arch/xtensa/util/Build | 2 +- tools/perf/scripts/Build | 4 +- tools/perf/scripts/perl/Perf-Trace-Util/Build | 2 +- tools/perf/scripts/python/Perf-Trace-Util/Build | 2 +- tools/perf/trace/beauty/Build | 26 +-- tools/perf/ui/Build | 18 +- tools/perf/ui/browsers/Build | 10 +- tools/perf/ui/tui/Build | 8 +- tools/perf/util/Build | 276 ++++++++++++------------ tools/perf/util/c++/Build | 4 +- tools/perf/util/cs-etm-decoder/Build | 2 +- tools/perf/util/intel-pt-decoder/Build | 2 +- tools/perf/util/scripting-engines/Build | 4 +- 36 files changed, 260 insertions(+), 260 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/Build b/tools/perf/Build index e5232d567611..5f392dbb88fc 100644 --- a/tools/perf/Build +++ b/tools/perf/Build @@ -46,10 +46,10 @@ CFLAGS_builtin-trace.o += -DSTRACE_GROUPS_DIR="BUILD_STR($(STRACE_GROUPS_DIR_ CFLAGS_builtin-report.o += -DTIPDIR="BUILD_STR($(tipdir_SQ))" CFLAGS_builtin-report.o += -DDOCDIR="BUILD_STR($(srcdir_SQ)/Documentation)" -libperf-y += util/ -libperf-y += arch/ -libperf-y += ui/ -libperf-y += scripts/ -libperf-$(CONFIG_TRACE) += trace/beauty/ +perf-y += util/ +perf-y += arch/ +perf-y += ui/ +perf-y += scripts/ +perf-$(CONFIG_TRACE) += trace/beauty/ gtk-y += ui/gtk/ diff --git a/tools/perf/arch/Build b/tools/perf/arch/Build index d9b6af837c7d..688818844c11 100644 --- a/tools/perf/arch/Build +++ b/tools/perf/arch/Build @@ -1,2 +1,2 @@ -libperf-y += common.o -libperf-y += $(SRCARCH)/ +perf-y += common.o +perf-y += $(SRCARCH)/ diff --git a/tools/perf/arch/arm/Build b/tools/perf/arch/arm/Build index 41bf61da476a..36222e64bbf7 100644 --- a/tools/perf/arch/arm/Build +++ b/tools/perf/arch/arm/Build @@ -1,2 +1,2 @@ -libperf-y += util/ -libperf-$(CONFIG_DWARF_UNWIND) += tests/ +perf-y += util/ +perf-$(CONFIG_DWARF_UNWIND) += tests/ diff --git a/tools/perf/arch/arm/tests/Build b/tools/perf/arch/arm/tests/Build index d9ae2733f9cc..bc8e97380c82 100644 --- a/tools/perf/arch/arm/tests/Build +++ b/tools/perf/arch/arm/tests/Build @@ -1,5 +1,5 @@ -libperf-y += regs_load.o -libperf-y += dwarf-unwind.o -libperf-y += vectors-page.o +perf-y += regs_load.o +perf-y += dwarf-unwind.o +perf-y += vectors-page.o -libperf-y += arch-tests.o +perf-y += arch-tests.o diff --git a/tools/perf/arch/arm/util/Build b/tools/perf/arch/arm/util/Build index e64c5f216448..296f0eac5e18 100644 --- a/tools/perf/arch/arm/util/Build +++ b/tools/perf/arch/arm/util/Build @@ -1,6 +1,6 @@ -libperf-$(CONFIG_DWARF) += dwarf-regs.o +perf-$(CONFIG_DWARF) += dwarf-regs.o -libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o -libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o +perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o +perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o -libperf-$(CONFIG_AUXTRACE) += pmu.o auxtrace.o cs-etm.o +perf-$(CONFIG_AUXTRACE) += pmu.o auxtrace.o cs-etm.o diff --git a/tools/perf/arch/arm64/Build b/tools/perf/arch/arm64/Build index 41bf61da476a..36222e64bbf7 100644 --- a/tools/perf/arch/arm64/Build +++ b/tools/perf/arch/arm64/Build @@ -1,2 +1,2 @@ -libperf-y += util/ -libperf-$(CONFIG_DWARF_UNWIND) += tests/ +perf-y += util/ +perf-$(CONFIG_DWARF_UNWIND) += tests/ diff --git a/tools/perf/arch/arm64/tests/Build b/tools/perf/arch/arm64/tests/Build index 883c57ff0c08..41707fea74b3 100644 --- a/tools/perf/arch/arm64/tests/Build +++ b/tools/perf/arch/arm64/tests/Build @@ -1,4 +1,4 @@ -libperf-y += regs_load.o -libperf-y += dwarf-unwind.o +perf-y += regs_load.o +perf-y += dwarf-unwind.o -libperf-y += arch-tests.o +perf-y += arch-tests.o diff --git a/tools/perf/arch/arm64/util/Build b/tools/perf/arch/arm64/util/Build index 68f8a8eb3ad0..3cde540d2fcf 100644 --- a/tools/perf/arch/arm64/util/Build +++ b/tools/perf/arch/arm64/util/Build @@ -1,10 +1,10 @@ -libperf-y += header.o -libperf-y += sym-handling.o -libperf-$(CONFIG_DWARF) += dwarf-regs.o -libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o -libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o +perf-y += header.o +perf-y += sym-handling.o +perf-$(CONFIG_DWARF) += dwarf-regs.o +perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o +perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o -libperf-$(CONFIG_AUXTRACE) += ../../arm/util/pmu.o \ +perf-$(CONFIG_AUXTRACE) += ../../arm/util/pmu.o \ ../../arm/util/auxtrace.o \ ../../arm/util/cs-etm.o \ arm-spe.o diff --git a/tools/perf/arch/nds32/Build b/tools/perf/arch/nds32/Build index 54afe4a467e7..e4e5f33c84d8 100644 --- a/tools/perf/arch/nds32/Build +++ b/tools/perf/arch/nds32/Build @@ -1 +1 @@ -libperf-y += util/ +perf-y += util/ diff --git a/tools/perf/arch/nds32/util/Build b/tools/perf/arch/nds32/util/Build index ca623bbf993c..d0bc205fe49a 100644 --- a/tools/perf/arch/nds32/util/Build +++ b/tools/perf/arch/nds32/util/Build @@ -1 +1 @@ -libperf-y += header.o +perf-y += header.o diff --git a/tools/perf/arch/powerpc/Build b/tools/perf/arch/powerpc/Build index db52fa22d3a1..a7dd46a5b678 100644 --- a/tools/perf/arch/powerpc/Build +++ b/tools/perf/arch/powerpc/Build @@ -1,2 +1,2 @@ -libperf-y += util/ -libperf-y += tests/ +perf-y += util/ +perf-y += tests/ diff --git a/tools/perf/arch/powerpc/tests/Build b/tools/perf/arch/powerpc/tests/Build index d827ef384b33..3526ab0af9f9 100644 --- a/tools/perf/arch/powerpc/tests/Build +++ b/tools/perf/arch/powerpc/tests/Build @@ -1,4 +1,4 @@ -libperf-$(CONFIG_DWARF_UNWIND) += regs_load.o -libperf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o +perf-$(CONFIG_DWARF_UNWIND) += regs_load.o +perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o -libperf-y += arch-tests.o +perf-y += arch-tests.o diff --git a/tools/perf/arch/powerpc/util/Build b/tools/perf/arch/powerpc/util/Build index ba98bd006488..7cf0b8803097 100644 --- a/tools/perf/arch/powerpc/util/Build +++ b/tools/perf/arch/powerpc/util/Build @@ -1,11 +1,11 @@ -libperf-y += header.o -libperf-y += sym-handling.o -libperf-y += kvm-stat.o -libperf-y += perf_regs.o -libperf-y += mem-events.o +perf-y += header.o +perf-y += sym-handling.o +perf-y += kvm-stat.o +perf-y += perf_regs.o +perf-y += mem-events.o -libperf-$(CONFIG_DWARF) += dwarf-regs.o -libperf-$(CONFIG_DWARF) += skip-callchain-idx.o +perf-$(CONFIG_DWARF) += dwarf-regs.o +perf-$(CONFIG_DWARF) += skip-callchain-idx.o -libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o -libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o +perf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o +perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o diff --git a/tools/perf/arch/s390/Build b/tools/perf/arch/s390/Build index 54afe4a467e7..e4e5f33c84d8 100644 --- a/tools/perf/arch/s390/Build +++ b/tools/perf/arch/s390/Build @@ -1 +1 @@ -libperf-y += util/ +perf-y += util/ diff --git a/tools/perf/arch/s390/util/Build b/tools/perf/arch/s390/util/Build index 4a233683c684..22797f043b84 100644 --- a/tools/perf/arch/s390/util/Build +++ b/tools/perf/arch/s390/util/Build @@ -1,9 +1,9 @@ -libperf-y += header.o -libperf-y += kvm-stat.o +perf-y += header.o +perf-y += kvm-stat.o -libperf-$(CONFIG_DWARF) += dwarf-regs.o -libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o +perf-$(CONFIG_DWARF) += dwarf-regs.o +perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o -libperf-y += machine.o +perf-y += machine.o -libperf-$(CONFIG_AUXTRACE) += auxtrace.o +perf-$(CONFIG_AUXTRACE) += auxtrace.o diff --git a/tools/perf/arch/sh/Build b/tools/perf/arch/sh/Build index 54afe4a467e7..e4e5f33c84d8 100644 --- a/tools/perf/arch/sh/Build +++ b/tools/perf/arch/sh/Build @@ -1 +1 @@ -libperf-y += util/ +perf-y += util/ diff --git a/tools/perf/arch/sh/util/Build b/tools/perf/arch/sh/util/Build index 954e287bbb89..e813e618954b 100644 --- a/tools/perf/arch/sh/util/Build +++ b/tools/perf/arch/sh/util/Build @@ -1 +1 @@ -libperf-$(CONFIG_DWARF) += dwarf-regs.o +perf-$(CONFIG_DWARF) += dwarf-regs.o diff --git a/tools/perf/arch/sparc/Build b/tools/perf/arch/sparc/Build index 54afe4a467e7..e4e5f33c84d8 100644 --- a/tools/perf/arch/sparc/Build +++ b/tools/perf/arch/sparc/Build @@ -1 +1 @@ -libperf-y += util/ +perf-y += util/ diff --git a/tools/perf/arch/sparc/util/Build b/tools/perf/arch/sparc/util/Build index 954e287bbb89..e813e618954b 100644 --- a/tools/perf/arch/sparc/util/Build +++ b/tools/perf/arch/sparc/util/Build @@ -1 +1 @@ -libperf-$(CONFIG_DWARF) += dwarf-regs.o +perf-$(CONFIG_DWARF) += dwarf-regs.o diff --git a/tools/perf/arch/x86/Build b/tools/perf/arch/x86/Build index db52fa22d3a1..a7dd46a5b678 100644 --- a/tools/perf/arch/x86/Build +++ b/tools/perf/arch/x86/Build @@ -1,2 +1,2 @@ -libperf-y += util/ -libperf-y += tests/ +perf-y += util/ +perf-y += tests/ diff --git a/tools/perf/arch/x86/tests/Build b/tools/perf/arch/x86/tests/Build index 586849ff83a0..3d83d0c6982d 100644 --- a/tools/perf/arch/x86/tests/Build +++ b/tools/perf/arch/x86/tests/Build @@ -1,8 +1,8 @@ -libperf-$(CONFIG_DWARF_UNWIND) += regs_load.o -libperf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o +perf-$(CONFIG_DWARF_UNWIND) += regs_load.o +perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o -libperf-y += arch-tests.o -libperf-y += rdpmc.o -libperf-y += perf-time-to-tsc.o -libperf-$(CONFIG_AUXTRACE) += insn-x86.o -libperf-$(CONFIG_X86_64) += bp-modify.o +perf-y += arch-tests.o +perf-y += rdpmc.o +perf-y += perf-time-to-tsc.o +perf-$(CONFIG_AUXTRACE) += insn-x86.o +perf-$(CONFIG_X86_64) += bp-modify.o diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build index 844b8f335532..7aab0be5fc5f 100644 --- a/tools/perf/arch/x86/util/Build +++ b/tools/perf/arch/x86/util/Build @@ -1,18 +1,18 @@ -libperf-y += header.o -libperf-y += tsc.o -libperf-y += pmu.o -libperf-y += kvm-stat.o -libperf-y += perf_regs.o -libperf-y += group.o -libperf-y += machine.o -libperf-y += event.o +perf-y += header.o +perf-y += tsc.o +perf-y += pmu.o +perf-y += kvm-stat.o +perf-y += perf_regs.o +perf-y += group.o +perf-y += machine.o +perf-y += event.o -libperf-$(CONFIG_DWARF) += dwarf-regs.o -libperf-$(CONFIG_BPF_PROLOGUE) += dwarf-regs.o +perf-$(CONFIG_DWARF) += dwarf-regs.o +perf-$(CONFIG_BPF_PROLOGUE) += dwarf-regs.o -libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o -libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o +perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o +perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o -libperf-$(CONFIG_AUXTRACE) += auxtrace.o -libperf-$(CONFIG_AUXTRACE) += intel-pt.o -libperf-$(CONFIG_AUXTRACE) += intel-bts.o +perf-$(CONFIG_AUXTRACE) += auxtrace.o +perf-$(CONFIG_AUXTRACE) += intel-pt.o +perf-$(CONFIG_AUXTRACE) += intel-bts.o diff --git a/tools/perf/arch/xtensa/Build b/tools/perf/arch/xtensa/Build index 54afe4a467e7..e4e5f33c84d8 100644 --- a/tools/perf/arch/xtensa/Build +++ b/tools/perf/arch/xtensa/Build @@ -1 +1 @@ -libperf-y += util/ +perf-y += util/ diff --git a/tools/perf/arch/xtensa/util/Build b/tools/perf/arch/xtensa/util/Build index 954e287bbb89..e813e618954b 100644 --- a/tools/perf/arch/xtensa/util/Build +++ b/tools/perf/arch/xtensa/util/Build @@ -1 +1 @@ -libperf-$(CONFIG_DWARF) += dwarf-regs.o +perf-$(CONFIG_DWARF) += dwarf-regs.o diff --git a/tools/perf/scripts/Build b/tools/perf/scripts/Build index 41efd7e368b3..68d4b54574ad 100644 --- a/tools/perf/scripts/Build +++ b/tools/perf/scripts/Build @@ -1,2 +1,2 @@ -libperf-$(CONFIG_LIBPERL) += perl/Perf-Trace-Util/ -libperf-$(CONFIG_LIBPYTHON) += python/Perf-Trace-Util/ +perf-$(CONFIG_LIBPERL) += perl/Perf-Trace-Util/ +perf-$(CONFIG_LIBPYTHON) += python/Perf-Trace-Util/ diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/Build b/tools/perf/scripts/perl/Perf-Trace-Util/Build index 34faecf774ae..db0036129307 100644 --- a/tools/perf/scripts/perl/Perf-Trace-Util/Build +++ b/tools/perf/scripts/perl/Perf-Trace-Util/Build @@ -1,4 +1,4 @@ -libperf-y += Context.o +perf-y += Context.o CFLAGS_Context.o += $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes CFLAGS_Context.o += -Wno-unused-parameter -Wno-nested-externs -Wno-undef diff --git a/tools/perf/scripts/python/Perf-Trace-Util/Build b/tools/perf/scripts/python/Perf-Trace-Util/Build index aefc15c9444a..7d0e33ce6aba 100644 --- a/tools/perf/scripts/python/Perf-Trace-Util/Build +++ b/tools/perf/scripts/python/Perf-Trace-Util/Build @@ -1,3 +1,3 @@ -libperf-y += Context.o +perf-y += Context.o CFLAGS_Context.o += $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs diff --git a/tools/perf/trace/beauty/Build b/tools/perf/trace/beauty/Build index 637365099b7d..85f328ddf897 100644 --- a/tools/perf/trace/beauty/Build +++ b/tools/perf/trace/beauty/Build @@ -1,15 +1,15 @@ -libperf-y += clone.o -libperf-y += fcntl.o -libperf-y += flock.o +perf-y += clone.o +perf-y += fcntl.o +perf-y += flock.o ifeq ($(SRCARCH),$(filter $(SRCARCH),x86)) -libperf-y += ioctl.o +perf-y += ioctl.o endif -libperf-y += kcmp.o -libperf-y += mount_flags.o -libperf-y += pkey_alloc.o -libperf-y += arch_prctl.o -libperf-y += prctl.o -libperf-y += renameat.o -libperf-y += sockaddr.o -libperf-y += socket.o -libperf-y += statx.o +perf-y += kcmp.o +perf-y += mount_flags.o +perf-y += pkey_alloc.o +perf-y += arch_prctl.o +perf-y += prctl.o +perf-y += renameat.o +perf-y += sockaddr.o +perf-y += socket.o +perf-y += statx.o diff --git a/tools/perf/ui/Build b/tools/perf/ui/Build index 0a73538c0441..3aff83c3275f 100644 --- a/tools/perf/ui/Build +++ b/tools/perf/ui/Build @@ -1,14 +1,14 @@ -libperf-y += setup.o -libperf-y += helpline.o -libperf-y += progress.o -libperf-y += util.o -libperf-y += hist.o -libperf-y += stdio/hist.o +perf-y += setup.o +perf-y += helpline.o +perf-y += progress.o +perf-y += util.o +perf-y += hist.o +perf-y += stdio/hist.o CFLAGS_setup.o += -DLIBDIR="BUILD_STR($(LIBDIR))" -libperf-$(CONFIG_SLANG) += browser.o -libperf-$(CONFIG_SLANG) += browsers/ -libperf-$(CONFIG_SLANG) += tui/ +perf-$(CONFIG_SLANG) += browser.o +perf-$(CONFIG_SLANG) += browsers/ +perf-$(CONFIG_SLANG) += tui/ CFLAGS_browser.o += -DENABLE_SLFUTURE_CONST diff --git a/tools/perf/ui/browsers/Build b/tools/perf/ui/browsers/Build index de223f5bed58..8fee56b46502 100644 --- a/tools/perf/ui/browsers/Build +++ b/tools/perf/ui/browsers/Build @@ -1,8 +1,8 @@ -libperf-y += annotate.o -libperf-y += hists.o -libperf-y += map.o -libperf-y += scripts.o -libperf-y += header.o +perf-y += annotate.o +perf-y += hists.o +perf-y += map.o +perf-y += scripts.o +perf-y += header.o CFLAGS_annotate.o += -DENABLE_SLFUTURE_CONST CFLAGS_hists.o += -DENABLE_SLFUTURE_CONST diff --git a/tools/perf/ui/tui/Build b/tools/perf/ui/tui/Build index 9e4c6ca41a9f..f916df33a1a7 100644 --- a/tools/perf/ui/tui/Build +++ b/tools/perf/ui/tui/Build @@ -1,4 +1,4 @@ -libperf-y += setup.o -libperf-y += util.o -libperf-y += helpline.o -libperf-y += progress.o +perf-y += setup.o +perf-y += util.o +perf-y += helpline.o +perf-y += progress.o diff --git a/tools/perf/util/Build b/tools/perf/util/Build index a36e6e5a6f4f..ca0741c91903 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -1,162 +1,162 @@ -libperf-y += annotate.o -libperf-y += block-range.o -libperf-y += build-id.o -libperf-y += config.o -libperf-y += ctype.o -libperf-y += db-export.o -libperf-y += env.o -libperf-y += event.o -libperf-y += evlist.o -libperf-y += evsel.o -libperf-y += evsel_fprintf.o -libperf-y += find_bit.o -libperf-y += get_current_dir_name.o -libperf-y += kallsyms.o -libperf-y += levenshtein.o -libperf-y += llvm-utils.o -libperf-y += mmap.o -libperf-y += memswap.o -libperf-y += parse-events.o -libperf-y += perf_regs.o -libperf-y += path.o -libperf-y += print_binary.o -libperf-y += rbtree.o -libperf-y += libstring.o -libperf-y += bitmap.o -libperf-y += hweight.o -libperf-y += smt.o -libperf-y += strbuf.o -libperf-y += string.o -libperf-y += strlist.o -libperf-y += strfilter.o -libperf-y += top.o -libperf-y += usage.o -libperf-y += dso.o -libperf-y += symbol.o -libperf-y += symbol_fprintf.o -libperf-y += color.o -libperf-y += color_config.o -libperf-y += metricgroup.o -libperf-y += header.o -libperf-y += callchain.o -libperf-y += values.o -libperf-y += debug.o -libperf-y += machine.o -libperf-y += map.o -libperf-y += pstack.o -libperf-y += session.o -libperf-y += sample-raw.o -libperf-y += s390-sample-raw.o -libperf-$(CONFIG_TRACE) += syscalltbl.o -libperf-y += ordered-events.o -libperf-y += namespaces.o -libperf-y += comm.o -libperf-y += thread.o -libperf-y += thread_map.o -libperf-y += trace-event-parse.o -libperf-y += parse-events-flex.o -libperf-y += parse-events-bison.o -libperf-y += pmu.o -libperf-y += pmu-flex.o -libperf-y += pmu-bison.o -libperf-y += trace-event-read.o -libperf-y += trace-event-info.o -libperf-y += trace-event-scripting.o -libperf-y += trace-event.o -libperf-y += svghelper.o -libperf-y += sort.o -libperf-y += hist.o -libperf-y += util.o -libperf-y += xyarray.o -libperf-y += cpumap.o -libperf-y += cgroup.o -libperf-y += target.o -libperf-y += rblist.o -libperf-y += intlist.o -libperf-y += vdso.o -libperf-y += counts.o -libperf-y += stat.o -libperf-y += stat-shadow.o -libperf-y += stat-display.o -libperf-y += record.o -libperf-y += srcline.o -libperf-y += srccode.o -libperf-y += data.o -libperf-y += tsc.o -libperf-y += cloexec.o -libperf-y += call-path.o -libperf-y += rwsem.o -libperf-y += thread-stack.o -libperf-$(CONFIG_AUXTRACE) += auxtrace.o -libperf-$(CONFIG_AUXTRACE) += intel-pt-decoder/ -libperf-$(CONFIG_AUXTRACE) += intel-pt.o -libperf-$(CONFIG_AUXTRACE) += intel-bts.o -libperf-$(CONFIG_AUXTRACE) += arm-spe.o -libperf-$(CONFIG_AUXTRACE) += arm-spe-pkt-decoder.o -libperf-$(CONFIG_AUXTRACE) += s390-cpumsf.o +perf-y += annotate.o +perf-y += block-range.o +perf-y += build-id.o +perf-y += config.o +perf-y += ctype.o +perf-y += db-export.o +perf-y += env.o +perf-y += event.o +perf-y += evlist.o +perf-y += evsel.o +perf-y += evsel_fprintf.o +perf-y += find_bit.o +perf-y += get_current_dir_name.o +perf-y += kallsyms.o +perf-y += levenshtein.o +perf-y += llvm-utils.o +perf-y += mmap.o +perf-y += memswap.o +perf-y += parse-events.o +perf-y += perf_regs.o +perf-y += path.o +perf-y += print_binary.o +perf-y += rbtree.o +perf-y += libstring.o +perf-y += bitmap.o +perf-y += hweight.o +perf-y += smt.o +perf-y += strbuf.o +perf-y += string.o +perf-y += strlist.o +perf-y += strfilter.o +perf-y += top.o +perf-y += usage.o +perf-y += dso.o +perf-y += symbol.o +perf-y += symbol_fprintf.o +perf-y += color.o +perf-y += color_config.o +perf-y += metricgroup.o +perf-y += header.o +perf-y += callchain.o +perf-y += values.o +perf-y += debug.o +perf-y += machine.o +perf-y += map.o +perf-y += pstack.o +perf-y += session.o +perf-y += sample-raw.o +perf-y += s390-sample-raw.o +perf-$(CONFIG_TRACE) += syscalltbl.o +perf-y += ordered-events.o +perf-y += namespaces.o +perf-y += comm.o +perf-y += thread.o +perf-y += thread_map.o +perf-y += trace-event-parse.o +perf-y += parse-events-flex.o +perf-y += parse-events-bison.o +perf-y += pmu.o +perf-y += pmu-flex.o +perf-y += pmu-bison.o +perf-y += trace-event-read.o +perf-y += trace-event-info.o +perf-y += trace-event-scripting.o +perf-y += trace-event.o +perf-y += svghelper.o +perf-y += sort.o +perf-y += hist.o +perf-y += util.o +perf-y += xyarray.o +perf-y += cpumap.o +perf-y += cgroup.o +perf-y += target.o +perf-y += rblist.o +perf-y += intlist.o +perf-y += vdso.o +perf-y += counts.o +perf-y += stat.o +perf-y += stat-shadow.o +perf-y += stat-display.o +perf-y += record.o +perf-y += srcline.o +perf-y += srccode.o +perf-y += data.o +perf-y += tsc.o +perf-y += cloexec.o +perf-y += call-path.o +perf-y += rwsem.o +perf-y += thread-stack.o +perf-$(CONFIG_AUXTRACE) += auxtrace.o +perf-$(CONFIG_AUXTRACE) += intel-pt-decoder/ +perf-$(CONFIG_AUXTRACE) += intel-pt.o +perf-$(CONFIG_AUXTRACE) += intel-bts.o +perf-$(CONFIG_AUXTRACE) += arm-spe.o +perf-$(CONFIG_AUXTRACE) += arm-spe-pkt-decoder.o +perf-$(CONFIG_AUXTRACE) += s390-cpumsf.o ifdef CONFIG_LIBOPENCSD -libperf-$(CONFIG_AUXTRACE) += cs-etm.o -libperf-$(CONFIG_AUXTRACE) += cs-etm-decoder/ +perf-$(CONFIG_AUXTRACE) += cs-etm.o +perf-$(CONFIG_AUXTRACE) += cs-etm-decoder/ endif -libperf-y += parse-branch-options.o -libperf-y += dump-insn.o -libperf-y += parse-regs-options.o -libperf-y += term.o -libperf-y += help-unknown-cmd.o -libperf-y += mem-events.o -libperf-y += vsprintf.o -libperf-y += units.o -libperf-y += time-utils.o -libperf-y += expr-bison.o -libperf-y += branch.o -libperf-y += mem2node.o - -libperf-$(CONFIG_LIBBPF) += bpf-loader.o -libperf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o -libperf-$(CONFIG_LIBELF) += symbol-elf.o -libperf-$(CONFIG_LIBELF) += probe-file.o -libperf-$(CONFIG_LIBELF) += probe-event.o +perf-y += parse-branch-options.o +perf-y += dump-insn.o +perf-y += parse-regs-options.o +perf-y += term.o +perf-y += help-unknown-cmd.o +perf-y += mem-events.o +perf-y += vsprintf.o +perf-y += units.o +perf-y += time-utils.o +perf-y += expr-bison.o +perf-y += branch.o +perf-y += mem2node.o + +perf-$(CONFIG_LIBBPF) += bpf-loader.o +perf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o +perf-$(CONFIG_LIBELF) += symbol-elf.o +perf-$(CONFIG_LIBELF) += probe-file.o +perf-$(CONFIG_LIBELF) += probe-event.o ifndef CONFIG_LIBELF -libperf-y += symbol-minimal.o +perf-y += symbol-minimal.o endif ifndef CONFIG_SETNS -libperf-y += setns.o +perf-y += setns.o endif -libperf-$(CONFIG_DWARF) += probe-finder.o -libperf-$(CONFIG_DWARF) += dwarf-aux.o -libperf-$(CONFIG_DWARF) += dwarf-regs.o +perf-$(CONFIG_DWARF) += probe-finder.o +perf-$(CONFIG_DWARF) += dwarf-aux.o +perf-$(CONFIG_DWARF) += dwarf-regs.o -libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o -libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind-local.o -libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o -libperf-$(CONFIG_LIBUNWIND_X86) += libunwind/x86_32.o -libperf-$(CONFIG_LIBUNWIND_AARCH64) += libunwind/arm64.o +perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o +perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind-local.o +perf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o +perf-$(CONFIG_LIBUNWIND_X86) += libunwind/x86_32.o +perf-$(CONFIG_LIBUNWIND_AARCH64) += libunwind/arm64.o -libperf-$(CONFIG_LIBBABELTRACE) += data-convert-bt.o +perf-$(CONFIG_LIBBABELTRACE) += data-convert-bt.o -libperf-y += scripting-engines/ +perf-y += scripting-engines/ -libperf-$(CONFIG_ZLIB) += zlib.o -libperf-$(CONFIG_LZMA) += lzma.o -libperf-y += demangle-java.o -libperf-y += demangle-rust.o +perf-$(CONFIG_ZLIB) += zlib.o +perf-$(CONFIG_LZMA) += lzma.o +perf-y += demangle-java.o +perf-y += demangle-rust.o ifdef CONFIG_JITDUMP -libperf-$(CONFIG_LIBELF) += jitdump.o -libperf-$(CONFIG_LIBELF) += genelf.o -libperf-$(CONFIG_DWARF) += genelf_debug.o +perf-$(CONFIG_LIBELF) += jitdump.o +perf-$(CONFIG_LIBELF) += genelf.o +perf-$(CONFIG_DWARF) += genelf_debug.o endif -libperf-y += perf-hooks.o +perf-y += perf-hooks.o -libperf-$(CONFIG_LIBBPF) += bpf-event.o +perf-$(CONFIG_LIBBPF) += bpf-event.o -libperf-$(CONFIG_CXX) += c++/ +perf-$(CONFIG_CXX) += c++/ CFLAGS_config.o += -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" CFLAGS_llvm-utils.o += -DPERF_INCLUDE_DIR="BUILD_STR($(perf_include_dir_SQ))" diff --git a/tools/perf/util/c++/Build b/tools/perf/util/c++/Build index 988fef1b11d7..613ecfd76527 100644 --- a/tools/perf/util/c++/Build +++ b/tools/perf/util/c++/Build @@ -1,2 +1,2 @@ -libperf-$(CONFIG_CLANGLLVM) += clang.o -libperf-$(CONFIG_CLANGLLVM) += clang-test.o +perf-$(CONFIG_CLANGLLVM) += clang.o +perf-$(CONFIG_CLANGLLVM) += clang-test.o diff --git a/tools/perf/util/cs-etm-decoder/Build b/tools/perf/util/cs-etm-decoder/Build index bc22c39c727f..216cb17a3322 100644 --- a/tools/perf/util/cs-etm-decoder/Build +++ b/tools/perf/util/cs-etm-decoder/Build @@ -1 +1 @@ -libperf-$(CONFIG_AUXTRACE) += cs-etm-decoder.o +perf-$(CONFIG_AUXTRACE) += cs-etm-decoder.o diff --git a/tools/perf/util/intel-pt-decoder/Build b/tools/perf/util/intel-pt-decoder/Build index 1b704fbea9de..23bf788f84b9 100644 --- a/tools/perf/util/intel-pt-decoder/Build +++ b/tools/perf/util/intel-pt-decoder/Build @@ -1,4 +1,4 @@ -libperf-$(CONFIG_AUXTRACE) += intel-pt-pkt-decoder.o intel-pt-insn-decoder.o intel-pt-log.o intel-pt-decoder.o +perf-$(CONFIG_AUXTRACE) += intel-pt-pkt-decoder.o intel-pt-insn-decoder.o intel-pt-log.o intel-pt-decoder.o inat_tables_script = util/intel-pt-decoder/gen-insn-attr-x86.awk inat_tables_maps = util/intel-pt-decoder/x86-opcode-map.txt diff --git a/tools/perf/util/scripting-engines/Build b/tools/perf/util/scripting-engines/Build index 82d28c67e0f3..7b342ce38d99 100644 --- a/tools/perf/util/scripting-engines/Build +++ b/tools/perf/util/scripting-engines/Build @@ -1,5 +1,5 @@ -libperf-$(CONFIG_LIBPERL) += trace-event-perl.o -libperf-$(CONFIG_LIBPYTHON) += trace-event-python.o +perf-$(CONFIG_LIBPERL) += trace-event-perl.o +perf-$(CONFIG_LIBPYTHON) += trace-event-python.o CFLAGS_trace-event-perl.o += $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-nested-externs -Wno-undef -Wno-switch-default -- cgit v1.2.3 From 714a92d83fd1e20461f53549cfbee77b20be5032 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 13 Feb 2019 13:32:40 +0100 Subject: perf tools: Fix legacy events symbol separator parsing Fixing legacy symbol events parsing. We can't support single slash separator, like 'cycles/u', because it conflicts with non empty terms, like 'cycles/period/u'. Keeping only '//' and ':' separator for these events: cycles//u cycles:k And removing '/' separator support, which is not working anymore. Also adding automated tests for above events. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20190213123246.4015-5-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/parse-events.c | 30 ++++++++++++++++++++++++++++++ tools/perf/util/parse-events.y | 4 ++-- 2 files changed, 32 insertions(+), 2 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index 3b97ac018d5a..4a69c07f4101 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -1330,6 +1330,26 @@ static int test__checkevent_complex_name(struct perf_evlist *evlist) return 0; } +static int test__sym_event_slash(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = perf_evlist__first(evlist); + + TEST_ASSERT_VAL("wrong type", evsel->attr.type == PERF_TYPE_HARDWARE); + TEST_ASSERT_VAL("wrong config", evsel->attr.config == PERF_COUNT_HW_CPU_CYCLES); + TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel); + return 0; +} + +static int test__sym_event_dc(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = perf_evlist__first(evlist); + + TEST_ASSERT_VAL("wrong type", evsel->attr.type == PERF_TYPE_HARDWARE); + TEST_ASSERT_VAL("wrong config", evsel->attr.config == PERF_COUNT_HW_CPU_CYCLES); + TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user); + return 0; +} + static int count_tracepoints(void) { struct dirent *events_ent; @@ -1670,6 +1690,16 @@ static struct evlist_test test__events[] = { .name = "cycles/name='COMPLEX_CYCLES_NAME:orig=cycles,desc=chip-clock-ticks'/Duk", .check = test__checkevent_complex_name, .id = 53 + }, + { + .name = "cycles//u", + .check = test__sym_event_slash, + .id = 54, + }, + { + .name = "cycles:k", + .check = test__sym_event_dc, + .id = 55, } }; diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index da8fe57691b8..44819bdb037d 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -311,7 +311,7 @@ value_sym '/' event_config '/' $$ = list; } | -value_sym sep_slash_dc +value_sym sep_slash_slash_dc { struct list_head *list; int type = $1 >> 16; @@ -702,7 +702,7 @@ PE_VALUE PE_ARRAY_RANGE PE_VALUE sep_dc: ':' | -sep_slash_dc: '/' | ':' | +sep_slash_slash_dc: '/' '/' | ':' | %% -- cgit v1.2.3 From 33bbc571ed79cace481fae4031b80a51d93ae997 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 13 Feb 2019 13:32:41 +0100 Subject: perf list: Display metric expressions for --details option Display metric expression itself when --details is specified. Current list with no details: # perf list metrics ... TopDownL1: IPC [Instructions Per Cycle (per logical thread)] SLOTS [Total issue-pipeline slots] ... Detailed output with metric formula: # perf list --details metrics ... TopDownL1: IPC [Instructions Per Cycle (per logical thread)] [inst_retired.any / cpu_clk_unhalted.thread] SLOTS [Total issue-pipeline slots] [4*(( cpu_clk_unhalted.thread_any / 2 ) if #smt_on else cycles)] ... Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20190213123246.4015-6-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-list.c | 8 ++++---- tools/perf/util/metricgroup.c | 8 +++++++- tools/perf/util/metricgroup.h | 3 ++- tools/perf/util/parse-events.c | 2 +- 4 files changed, 14 insertions(+), 7 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c index ead221e49f00..c9f98d00c0e9 100644 --- a/tools/perf/builtin-list.c +++ b/tools/perf/builtin-list.c @@ -82,9 +82,9 @@ int cmd_list(int argc, const char **argv) else if (strcmp(argv[i], "sdt") == 0) print_sdt_events(NULL, NULL, raw_dump); else if (strcmp(argv[i], "metric") == 0) - metricgroup__print(true, false, NULL, raw_dump); + metricgroup__print(true, false, NULL, raw_dump, details_flag); else if (strcmp(argv[i], "metricgroup") == 0) - metricgroup__print(false, true, NULL, raw_dump); + metricgroup__print(false, true, NULL, raw_dump, details_flag); else if ((sep = strchr(argv[i], ':')) != NULL) { int sep_idx; @@ -102,7 +102,7 @@ int cmd_list(int argc, const char **argv) s[sep_idx] = '\0'; print_tracepoint_events(s, s + sep_idx + 1, raw_dump); print_sdt_events(s, s + sep_idx + 1, raw_dump); - metricgroup__print(true, true, s, raw_dump); + metricgroup__print(true, true, s, raw_dump, details_flag); free(s); } else { if (asprintf(&s, "*%s*", argv[i]) < 0) { @@ -119,7 +119,7 @@ int cmd_list(int argc, const char **argv) details_flag); print_tracepoint_events(NULL, s, raw_dump); print_sdt_events(NULL, s, raw_dump); - metricgroup__print(true, true, NULL, raw_dump); + metricgroup__print(true, true, NULL, raw_dump, details_flag); free(s); } } diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 8529cbd3955b..b8d864ed4afe 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -270,7 +270,7 @@ static void metricgroup__print_strlist(struct strlist *metrics, bool raw) } void metricgroup__print(bool metrics, bool metricgroups, char *filter, - bool raw) + bool raw, bool details) { struct pmu_events_map *map = perf_pmu__find_map(NULL); struct pmu_event *pe; @@ -329,6 +329,12 @@ void metricgroup__print(bool metrics, bool metricgroups, char *filter, if (asprintf(&s, "%s\n%*s%s]", pe->metric_name, 8, "[", pe->desc) < 0) return; + + if (details) { + if (asprintf(&s, "%s\n%*s%s]", + s, 8, "[", pe->metric_expr) < 0) + return; + } } if (!s) diff --git a/tools/perf/util/metricgroup.h b/tools/perf/util/metricgroup.h index 8a155dba0581..5c52097a5c63 100644 --- a/tools/perf/util/metricgroup.h +++ b/tools/perf/util/metricgroup.h @@ -27,6 +27,7 @@ int metricgroup__parse_groups(const struct option *opt, const char *str, struct rblist *metric_events); -void metricgroup__print(bool metrics, bool groups, char *filter, bool raw); +void metricgroup__print(bool metrics, bool groups, char *filter, + bool raw, bool details); bool metricgroup__has_metric(const char *metric); #endif diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 920e1e6551dd..4dcc01b2532c 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -2540,7 +2540,7 @@ void print_events(const char *event_glob, bool name_only, bool quiet_flag, print_sdt_events(NULL, NULL, name_only); - metricgroup__print(true, true, NULL, name_only); + metricgroup__print(true, true, NULL, name_only, details_flag); } int parse_events__is_hardcoded_term(struct parse_events_term *term) -- cgit v1.2.3 From a9aeb87b98badb55ec28dfcae8a8ce127d6208f5 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 13 Feb 2019 13:32:43 +0100 Subject: perf header: Get rid of write_it label Simplifying the code a bit. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20190213123246.4015-8-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/header.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index dec6d218c31c..f1508adefc16 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1042,11 +1042,9 @@ static int write_cpuid(struct feat_fd *ff, int ret; ret = get_cpuid(buffer, sizeof(buffer)); - if (!ret) - goto write_it; + if (ret) + return -1; - return -1; -write_it: return do_write_string(ff, buffer); } -- cgit v1.2.3 From aa4df30db52293c146b1851b88021d0bc2dd3b89 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 13 Feb 2019 13:32:44 +0100 Subject: perf header: Remove unused 'cpu_nr' field from 'struct cpu_topo' Not used at all. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20190213123246.4015-9-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/header.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index f1508adefc16..61ce197c5362 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -563,7 +563,6 @@ static int write_cmdline(struct feat_fd *ff, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list" struct cpu_topo { - u32 cpu_nr; u32 core_sib; u32 thread_sib; char **core_siblings; @@ -679,7 +678,6 @@ static struct cpu_topo *build_cpu_topology(void) goto out_free; tp = addr; - tp->cpu_nr = nr; addr += sizeof(*tp); tp->core_siblings = addr; addr += sz; -- cgit v1.2.3 From b00ccb27f97367d89e2d7b419ed198b0985be55d Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 19 Feb 2019 10:58:12 +0100 Subject: perf header: Fix wrong node write in NUMA_TOPOLOGY feature We are currently passing the node index instead of the real node number. Signed-off-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Peter Zijlstra Fixes: fbe96f29ce4b ("perf tools: Make perf.data more self-descriptive (v8)" Link: http://lkml.kernel.org/r/20190219095815.15931-2-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/header.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 61ce197c5362..c66f26ec557a 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -879,7 +879,7 @@ static int write_numa_topology(struct feat_fd *ff, if (ret < 0) break; - ret = write_topo_node(ff, i); + ret = write_topo_node(ff, j); if (ret < 0) break; } -- cgit v1.2.3 From 5135d5efcbb439c2acb20d6197dd57af3a456e76 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 19 Feb 2019 10:58:13 +0100 Subject: perf tools: Add cpu_topology object Make struct cpu_topo global and rename it to 'struct cpu_topology', so that it can be used from the 'perf record' command in the following patches. Add the following interface functions to load/free cpu topology details: struct cpu_topology *cpu_topology__new(void); void cpu_topology__delete(struct cpu_topology *tp); Move it to a separate source file cputopo.c together with numa related object in the following patches. No functional change, the new interface will be used in upcoming changes. Signed-off-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20190219095815.15931-3-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/Build | 1 + tools/perf/util/cputopo.c | 144 ++++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/cputopo.h | 17 ++++++ tools/perf/util/header.c | 150 ++-------------------------------------------- 4 files changed, 166 insertions(+), 146 deletions(-) create mode 100644 tools/perf/util/cputopo.c create mode 100644 tools/perf/util/cputopo.h (limited to 'tools/perf/util') diff --git a/tools/perf/util/Build b/tools/perf/util/Build index ca0741c91903..3008d49fa587 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -69,6 +69,7 @@ perf-y += hist.o perf-y += util.o perf-y += xyarray.o perf-y += cpumap.o +perf-y += cputopo.o perf-y += cgroup.o perf-y += target.o perf-y += rblist.o diff --git a/tools/perf/util/cputopo.c b/tools/perf/util/cputopo.c new file mode 100644 index 000000000000..84470ed4e707 --- /dev/null +++ b/tools/perf/util/cputopo.c @@ -0,0 +1,144 @@ +// SPDX-License-Identifier: GPL-2.0 +#include + +#include "cputopo.h" +#include "cpumap.h" +#include "util.h" + + +#define CORE_SIB_FMT \ + "/sys/devices/system/cpu/cpu%d/topology/core_siblings_list" +#define THRD_SIB_FMT \ + "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list" + +static int build_cpu_topology(struct cpu_topology *tp, int cpu) +{ + FILE *fp; + char filename[MAXPATHLEN]; + char *buf = NULL, *p; + size_t len = 0; + ssize_t sret; + u32 i = 0; + int ret = -1; + + sprintf(filename, CORE_SIB_FMT, cpu); + fp = fopen(filename, "r"); + if (!fp) + goto try_threads; + + sret = getline(&buf, &len, fp); + fclose(fp); + if (sret <= 0) + goto try_threads; + + p = strchr(buf, '\n'); + if (p) + *p = '\0'; + + for (i = 0; i < tp->core_sib; i++) { + if (!strcmp(buf, tp->core_siblings[i])) + break; + } + if (i == tp->core_sib) { + tp->core_siblings[i] = buf; + tp->core_sib++; + buf = NULL; + len = 0; + } + ret = 0; + +try_threads: + sprintf(filename, THRD_SIB_FMT, cpu); + fp = fopen(filename, "r"); + if (!fp) + goto done; + + if (getline(&buf, &len, fp) <= 0) + goto done; + + p = strchr(buf, '\n'); + if (p) + *p = '\0'; + + for (i = 0; i < tp->thread_sib; i++) { + if (!strcmp(buf, tp->thread_siblings[i])) + break; + } + if (i == tp->thread_sib) { + tp->thread_siblings[i] = buf; + tp->thread_sib++; + buf = NULL; + } + ret = 0; +done: + if (fp) + fclose(fp); + free(buf); + return ret; +} + +void cpu_topology__delete(struct cpu_topology *tp) +{ + u32 i; + + if (!tp) + return; + + for (i = 0 ; i < tp->core_sib; i++) + zfree(&tp->core_siblings[i]); + + for (i = 0 ; i < tp->thread_sib; i++) + zfree(&tp->thread_siblings[i]); + + free(tp); +} + +struct cpu_topology *cpu_topology__new(void) +{ + struct cpu_topology *tp = NULL; + void *addr; + u32 nr, i; + size_t sz; + long ncpus; + int ret = -1; + struct cpu_map *map; + + ncpus = cpu__max_present_cpu(); + + /* build online CPU map */ + map = cpu_map__new(NULL); + if (map == NULL) { + pr_debug("failed to get system cpumap\n"); + return NULL; + } + + nr = (u32)(ncpus & UINT_MAX); + + sz = nr * sizeof(char *); + addr = calloc(1, sizeof(*tp) + 2 * sz); + if (!addr) + goto out_free; + + tp = addr; + addr += sizeof(*tp); + tp->core_siblings = addr; + addr += sz; + tp->thread_siblings = addr; + + for (i = 0; i < nr; i++) { + if (!cpu_map__has(map, i)) + continue; + + ret = build_cpu_topology(tp, i); + if (ret < 0) + break; + } + +out_free: + cpu_map__put(map); + if (ret) { + cpu_topology__delete(tp); + tp = NULL; + } + return tp; +} diff --git a/tools/perf/util/cputopo.h b/tools/perf/util/cputopo.h new file mode 100644 index 000000000000..4b5f4112b6f8 --- /dev/null +++ b/tools/perf/util/cputopo.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PERF_CPUTOPO_H +#define __PERF_CPUTOPO_H + +#include + +struct cpu_topology { + u32 core_sib; + u32 thread_sib; + char **core_siblings; + char **thread_siblings; +}; + +struct cpu_topology *cpu_topology__new(void); +void cpu_topology__delete(struct cpu_topology *tp); + +#endif /* __PERF_CPUTOPO_H */ diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index c66f26ec557a..80ac57e6d38f 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -39,6 +39,7 @@ #include "tool.h" #include "time-utils.h" #include "units.h" +#include "cputopo.h" #include "sane_ctype.h" @@ -557,158 +558,15 @@ static int write_cmdline(struct feat_fd *ff, return 0; } -#define CORE_SIB_FMT \ - "/sys/devices/system/cpu/cpu%d/topology/core_siblings_list" -#define THRD_SIB_FMT \ - "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list" - -struct cpu_topo { - u32 core_sib; - u32 thread_sib; - char **core_siblings; - char **thread_siblings; -}; - -static int build_cpu_topo(struct cpu_topo *tp, int cpu) -{ - FILE *fp; - char filename[MAXPATHLEN]; - char *buf = NULL, *p; - size_t len = 0; - ssize_t sret; - u32 i = 0; - int ret = -1; - - sprintf(filename, CORE_SIB_FMT, cpu); - fp = fopen(filename, "r"); - if (!fp) - goto try_threads; - - sret = getline(&buf, &len, fp); - fclose(fp); - if (sret <= 0) - goto try_threads; - - p = strchr(buf, '\n'); - if (p) - *p = '\0'; - - for (i = 0; i < tp->core_sib; i++) { - if (!strcmp(buf, tp->core_siblings[i])) - break; - } - if (i == tp->core_sib) { - tp->core_siblings[i] = buf; - tp->core_sib++; - buf = NULL; - len = 0; - } - ret = 0; - -try_threads: - sprintf(filename, THRD_SIB_FMT, cpu); - fp = fopen(filename, "r"); - if (!fp) - goto done; - - if (getline(&buf, &len, fp) <= 0) - goto done; - - p = strchr(buf, '\n'); - if (p) - *p = '\0'; - - for (i = 0; i < tp->thread_sib; i++) { - if (!strcmp(buf, tp->thread_siblings[i])) - break; - } - if (i == tp->thread_sib) { - tp->thread_siblings[i] = buf; - tp->thread_sib++; - buf = NULL; - } - ret = 0; -done: - if(fp) - fclose(fp); - free(buf); - return ret; -} - -static void free_cpu_topo(struct cpu_topo *tp) -{ - u32 i; - - if (!tp) - return; - - for (i = 0 ; i < tp->core_sib; i++) - zfree(&tp->core_siblings[i]); - - for (i = 0 ; i < tp->thread_sib; i++) - zfree(&tp->thread_siblings[i]); - - free(tp); -} - -static struct cpu_topo *build_cpu_topology(void) -{ - struct cpu_topo *tp = NULL; - void *addr; - u32 nr, i; - size_t sz; - long ncpus; - int ret = -1; - struct cpu_map *map; - - ncpus = cpu__max_present_cpu(); - - /* build online CPU map */ - map = cpu_map__new(NULL); - if (map == NULL) { - pr_debug("failed to get system cpumap\n"); - return NULL; - } - - nr = (u32)(ncpus & UINT_MAX); - - sz = nr * sizeof(char *); - addr = calloc(1, sizeof(*tp) + 2 * sz); - if (!addr) - goto out_free; - - tp = addr; - addr += sizeof(*tp); - tp->core_siblings = addr; - addr += sz; - tp->thread_siblings = addr; - - for (i = 0; i < nr; i++) { - if (!cpu_map__has(map, i)) - continue; - - ret = build_cpu_topo(tp, i); - if (ret < 0) - break; - } - -out_free: - cpu_map__put(map); - if (ret) { - free_cpu_topo(tp); - tp = NULL; - } - return tp; -} static int write_cpu_topology(struct feat_fd *ff, struct perf_evlist *evlist __maybe_unused) { - struct cpu_topo *tp; + struct cpu_topology *tp; u32 i; int ret, j; - tp = build_cpu_topology(); + tp = cpu_topology__new(); if (!tp) return -1; @@ -746,7 +604,7 @@ static int write_cpu_topology(struct feat_fd *ff, return ret; } done: - free_cpu_topo(tp); + cpu_topology__delete(tp); return ret; } -- cgit v1.2.3 From 48e6c5acd36873ff022cd97de866bfd0a36a3b99 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 19 Feb 2019 10:58:14 +0100 Subject: perf tools: Add numa_topology object Add the numa_topology object to return the list of numa nodes together with their cpus. It will replace the numa code in header.c and will be used from 'perf record' in the following patches. Add the following interface functions to load numa details: struct numa_topology *numa_topology__new(void); void numa_topology__delete(struct numa_topology *tp); And replace the current (copied) local interface, with no functional changes. Signed-off-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20190219095815.15931-4-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cputopo.c | 118 +++++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/cputopo.h | 16 +++++++ tools/perf/util/header.c | 119 ++++++++++------------------------------------ 3 files changed, 160 insertions(+), 93 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/cputopo.c b/tools/perf/util/cputopo.c index 84470ed4e707..83ffca2ea9ee 100644 --- a/tools/perf/util/cputopo.c +++ b/tools/perf/util/cputopo.c @@ -1,9 +1,11 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include #include "cputopo.h" #include "cpumap.h" #include "util.h" +#include "env.h" #define CORE_SIB_FMT \ @@ -142,3 +144,119 @@ out_free: } return tp; } + +static int load_numa_node(struct numa_topology_node *node, int nr) +{ + char str[MAXPATHLEN]; + char field[32]; + char *buf = NULL, *p; + size_t len = 0; + int ret = -1; + FILE *fp; + u64 mem; + + node->node = (u32) nr; + + sprintf(str, "/sys/devices/system/node/node%d/meminfo", nr); + fp = fopen(str, "r"); + if (!fp) + return -1; + + while (getline(&buf, &len, fp) > 0) { + /* skip over invalid lines */ + if (!strchr(buf, ':')) + continue; + if (sscanf(buf, "%*s %*d %31s %"PRIu64, field, &mem) != 2) + goto err; + if (!strcmp(field, "MemTotal:")) + node->mem_total = mem; + if (!strcmp(field, "MemFree:")) + node->mem_free = mem; + if (node->mem_total && node->mem_free) + break; + } + + fclose(fp); + fp = NULL; + + sprintf(str, "/sys/devices/system/node/node%d/cpulist", nr); + + fp = fopen(str, "r"); + if (!fp) + return -1; + + if (getline(&buf, &len, fp) <= 0) + goto err; + + p = strchr(buf, '\n'); + if (p) + *p = '\0'; + + node->cpus = buf; + fclose(fp); + return 0; + +err: + free(buf); + if (fp) + fclose(fp); + return ret; +} + +struct numa_topology *numa_topology__new(void) +{ + struct cpu_map *node_map = NULL; + struct numa_topology *tp = NULL; + char *buf = NULL; + size_t len = 0; + u32 nr, i; + FILE *fp; + char *c; + + fp = fopen("/sys/devices/system/node/online", "r"); + if (!fp) + return NULL; + + if (getline(&buf, &len, fp) <= 0) + goto out; + + c = strchr(buf, '\n'); + if (c) + *c = '\0'; + + node_map = cpu_map__new(buf); + if (!node_map) + goto out; + + nr = (u32) node_map->nr; + + tp = zalloc(sizeof(*tp) + sizeof(tp->nodes[0])*nr); + if (!tp) + goto out; + + tp->nr = nr; + + for (i = 0; i < nr; i++) { + if (load_numa_node(&tp->nodes[i], node_map->map[i])) { + numa_topology__delete(tp); + tp = NULL; + break; + } + } + +out: + free(buf); + fclose(fp); + cpu_map__put(node_map); + return tp; +} + +void numa_topology__delete(struct numa_topology *tp) +{ + u32 i; + + for (i = 0; i < tp->nr; i++) + free(tp->nodes[i].cpus); + + free(tp); +} diff --git a/tools/perf/util/cputopo.h b/tools/perf/util/cputopo.h index 4b5f4112b6f8..47a97e71acdf 100644 --- a/tools/perf/util/cputopo.h +++ b/tools/perf/util/cputopo.h @@ -3,6 +3,7 @@ #define __PERF_CPUTOPO_H #include +#include "env.h" struct cpu_topology { u32 core_sib; @@ -11,7 +12,22 @@ struct cpu_topology { char **thread_siblings; }; +struct numa_topology_node { + char *cpus; + u32 node; + u64 mem_total; + u64 mem_free; +}; + +struct numa_topology { + u32 nr; + struct numa_topology_node nodes[0]; +}; + struct cpu_topology *cpu_topology__new(void); void cpu_topology__delete(struct cpu_topology *tp); +struct numa_topology *numa_topology__new(void); +void numa_topology__delete(struct numa_topology *tp); + #endif /* __PERF_CPUTOPO_H */ diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 80ac57e6d38f..a2323d777dae 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -639,112 +639,45 @@ static int write_total_mem(struct feat_fd *ff, return ret; } -static int write_topo_node(struct feat_fd *ff, int node) -{ - char str[MAXPATHLEN]; - char field[32]; - char *buf = NULL, *p; - size_t len = 0; - FILE *fp; - u64 mem_total, mem_free, mem; - int ret = -1; - - sprintf(str, "/sys/devices/system/node/node%d/meminfo", node); - fp = fopen(str, "r"); - if (!fp) - return -1; - - while (getline(&buf, &len, fp) > 0) { - /* skip over invalid lines */ - if (!strchr(buf, ':')) - continue; - if (sscanf(buf, "%*s %*d %31s %"PRIu64, field, &mem) != 2) - goto done; - if (!strcmp(field, "MemTotal:")) - mem_total = mem; - if (!strcmp(field, "MemFree:")) - mem_free = mem; - } - - fclose(fp); - fp = NULL; - - ret = do_write(ff, &mem_total, sizeof(u64)); - if (ret) - goto done; - - ret = do_write(ff, &mem_free, sizeof(u64)); - if (ret) - goto done; - - ret = -1; - sprintf(str, "/sys/devices/system/node/node%d/cpulist", node); - - fp = fopen(str, "r"); - if (!fp) - goto done; - - if (getline(&buf, &len, fp) <= 0) - goto done; - - p = strchr(buf, '\n'); - if (p) - *p = '\0'; - - ret = do_write_string(ff, buf); -done: - free(buf); - if (fp) - fclose(fp); - return ret; -} - static int write_numa_topology(struct feat_fd *ff, struct perf_evlist *evlist __maybe_unused) { - char *buf = NULL; - size_t len = 0; - FILE *fp; - struct cpu_map *node_map = NULL; - char *c; - u32 nr, i, j; + struct numa_topology *tp; int ret = -1; + u32 i; - fp = fopen("/sys/devices/system/node/online", "r"); - if (!fp) - return -1; + tp = numa_topology__new(); + if (!tp) + return -ENOMEM; - if (getline(&buf, &len, fp) <= 0) - goto done; + ret = do_write(ff, &tp->nr, sizeof(u32)); + if (ret < 0) + goto err; - c = strchr(buf, '\n'); - if (c) - *c = '\0'; + for (i = 0; i < tp->nr; i++) { + struct numa_topology_node *n = &tp->nodes[i]; - node_map = cpu_map__new(buf); - if (!node_map) - goto done; - - nr = (u32)node_map->nr; + ret = do_write(ff, &n->node, sizeof(u32)); + if (ret < 0) + goto err; - ret = do_write(ff, &nr, sizeof(nr)); - if (ret < 0) - goto done; + ret = do_write(ff, &n->mem_total, sizeof(u64)); + if (ret) + goto err; - for (i = 0; i < nr; i++) { - j = (u32)node_map->map[i]; - ret = do_write(ff, &j, sizeof(j)); - if (ret < 0) - break; + ret = do_write(ff, &n->mem_free, sizeof(u64)); + if (ret) + goto err; - ret = write_topo_node(ff, j); + ret = do_write_string(ff, n->cpus); if (ret < 0) - break; + goto err; } -done: - free(buf); - fclose(fp); - cpu_map__put(node_map); + + ret = 0; + +err: + numa_topology__delete(tp); return ret; } -- cgit v1.2.3 From e19a01c1438e123d169fd09376a221d844797174 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 19 Feb 2019 10:58:15 +0100 Subject: perf tools: Use sysfs__mountpoint() when reading cpu topology Use sysfs__mountpoint() when reading sysfs files to obtain cpu/numa topologies. Also use scnprintf instead of sprintf as suggested by Namhyung. Signed-off-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20190219095815.15931-5-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cputopo.c | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/cputopo.c b/tools/perf/util/cputopo.c index 83ffca2ea9ee..ece0710249d4 100644 --- a/tools/perf/util/cputopo.c +++ b/tools/perf/util/cputopo.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include #include +#include #include "cputopo.h" #include "cpumap.h" @@ -9,9 +10,15 @@ #define CORE_SIB_FMT \ - "/sys/devices/system/cpu/cpu%d/topology/core_siblings_list" + "%s/devices/system/cpu/cpu%d/topology/core_siblings_list" #define THRD_SIB_FMT \ - "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list" + "%s/devices/system/cpu/cpu%d/topology/thread_siblings_list" +#define NODE_ONLINE_FMT \ + "%s/devices/system/node/online" +#define NODE_MEMINFO_FMT \ + "%s/devices/system/node/node%d/meminfo" +#define NODE_CPULIST_FMT \ + "%s/devices/system/node/node%d/cpulist" static int build_cpu_topology(struct cpu_topology *tp, int cpu) { @@ -23,7 +30,8 @@ static int build_cpu_topology(struct cpu_topology *tp, int cpu) u32 i = 0; int ret = -1; - sprintf(filename, CORE_SIB_FMT, cpu); + scnprintf(filename, MAXPATHLEN, CORE_SIB_FMT, + sysfs__mountpoint(), cpu); fp = fopen(filename, "r"); if (!fp) goto try_threads; @@ -50,7 +58,8 @@ static int build_cpu_topology(struct cpu_topology *tp, int cpu) ret = 0; try_threads: - sprintf(filename, THRD_SIB_FMT, cpu); + scnprintf(filename, MAXPATHLEN, THRD_SIB_FMT, + sysfs__mountpoint(), cpu); fp = fopen(filename, "r"); if (!fp) goto done; @@ -157,7 +166,8 @@ static int load_numa_node(struct numa_topology_node *node, int nr) node->node = (u32) nr; - sprintf(str, "/sys/devices/system/node/node%d/meminfo", nr); + scnprintf(str, MAXPATHLEN, NODE_MEMINFO_FMT, + sysfs__mountpoint(), nr); fp = fopen(str, "r"); if (!fp) return -1; @@ -179,7 +189,8 @@ static int load_numa_node(struct numa_topology_node *node, int nr) fclose(fp); fp = NULL; - sprintf(str, "/sys/devices/system/node/node%d/cpulist", nr); + scnprintf(str, MAXPATHLEN, NODE_CPULIST_FMT, + sysfs__mountpoint(), nr); fp = fopen(str, "r"); if (!fp) @@ -207,13 +218,17 @@ struct numa_topology *numa_topology__new(void) { struct cpu_map *node_map = NULL; struct numa_topology *tp = NULL; + char path[MAXPATHLEN]; char *buf = NULL; size_t len = 0; u32 nr, i; FILE *fp; char *c; - fp = fopen("/sys/devices/system/node/online", "r"); + scnprintf(path, MAXPATHLEN, NODE_ONLINE_FMT, + sysfs__mountpoint()); + + fp = fopen(path, "r"); if (!fp) return NULL; -- cgit v1.2.3 From 7346195e8643482968f547483e0d823ec1982fab Mon Sep 17 00:00:00 2001 From: He Kuang Date: Tue, 19 Feb 2019 21:05:31 +0800 Subject: perf report: Don't shadow inlined symbol with different addr range We can't assume inlined symbols with the same name are equal, because their address range may be different. This will cause the symbols with different addresses be shadowed when adding to the hist entry, and lead to ERANGE error when checking the symbol address during sample parse, the addr should be within the range of [sym.start, sym.end]. The error message is like: "0x36aea60 [0x8]: failed to process type: 68". The second parameter of symbol__new() is the length of the fake symbol for the inline frame, which is the subtraction of the end and start address of base_sym. Signed-off-by: He Kuang Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Milian Wolff Cc: Namhyung Kim Cc: Peter Zijlstra Fixes: aa441895f7b4 ("perf report: Compare symbol name for inlined frames when sorting") Link: http://lkml.kernel.org/r/20190219130531.15692-1-hekuang@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/sort.c | 10 ++++++++-- tools/perf/util/srcline.c | 2 +- 2 files changed, 9 insertions(+), 3 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 2b6c1ccb878c..d2299e912e59 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -231,8 +231,14 @@ static int64_t _sort__sym_cmp(struct symbol *sym_l, struct symbol *sym_r) if (sym_l == sym_r) return 0; - if (sym_l->inlined || sym_r->inlined) - return strcmp(sym_l->name, sym_r->name); + if (sym_l->inlined || sym_r->inlined) { + int ret = strcmp(sym_l->name, sym_r->name); + + if (ret) + return ret; + if ((sym_l->start <= sym_r->end) && (sym_l->end >= sym_r->start)) + return 0; + } if (sym_l->start != sym_r->start) return (int64_t)(sym_r->start - sym_l->start); diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c index 00f215580b5a..10ca1533937e 100644 --- a/tools/perf/util/srcline.c +++ b/tools/perf/util/srcline.c @@ -104,7 +104,7 @@ static struct symbol *new_inline_sym(struct dso *dso, } else { /* create a fake symbol for the inline frame */ inline_sym = symbol__new(base_sym ? base_sym->start : 0, - base_sym ? base_sym->end : 0, + base_sym ? (base_sym->end - base_sym->start) : 0, base_sym ? base_sym->binding : 0, base_sym ? base_sym->type : 0, funcname); -- cgit v1.2.3 From d19f856479feef7c1383f02b87688563a0ef7a14 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 19 Feb 2019 16:11:56 -0300 Subject: perf bpf: Add bpf_map dumper At some point I'll suggest moving this to libbpf, for now I'll experiment with ways to dump BPF maps set by events in 'perf trace', starting with a very basic dumper for the current very limited needs of the augmented_raw_syscalls code: dumping booleans. Having functions that apply to the map keys and values and do table lookup in things like syscall id to string tables should come next. Cc: Adrian Hunter Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Jiri Olsa Cc: Martin KaFai Lau Cc: Namhyung Kim Cc: Yonghong Song Link: https://lkml.kernel.org/n/tip-lz14w0esqyt1333aon05jpwc@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/Build | 1 + tools/perf/util/bpf_map.c | 72 +++++++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/bpf_map.h | 22 +++++++++++++++ 3 files changed, 95 insertions(+) create mode 100644 tools/perf/util/bpf_map.c create mode 100644 tools/perf/util/bpf_map.h (limited to 'tools/perf/util') diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 3008d49fa587..8dd3102301ea 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -115,6 +115,7 @@ perf-y += branch.o perf-y += mem2node.o perf-$(CONFIG_LIBBPF) += bpf-loader.o +perf-$(CONFIG_LIBBPF) += bpf_map.o perf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o perf-$(CONFIG_LIBELF) += symbol-elf.o perf-$(CONFIG_LIBELF) += probe-file.o diff --git a/tools/perf/util/bpf_map.c b/tools/perf/util/bpf_map.c new file mode 100644 index 000000000000..eb853ca67cf4 --- /dev/null +++ b/tools/perf/util/bpf_map.c @@ -0,0 +1,72 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) + +#include "util/bpf_map.h" +#include +#include +#include +#include +#include +#include +#include + +static bool bpf_map_def__is_per_cpu(const struct bpf_map_def *def) +{ + return def->type == BPF_MAP_TYPE_PERCPU_HASH || + def->type == BPF_MAP_TYPE_PERCPU_ARRAY || + def->type == BPF_MAP_TYPE_LRU_PERCPU_HASH || + def->type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE; +} + +static void *bpf_map_def__alloc_value(const struct bpf_map_def *def) +{ + if (bpf_map_def__is_per_cpu(def)) + return malloc(round_up(def->value_size, 8) * sysconf(_SC_NPROCESSORS_CONF)); + + return malloc(def->value_size); +} + +int bpf_map__fprintf(struct bpf_map *map, FILE *fp) +{ + const struct bpf_map_def *def = bpf_map__def(map); + void *prev_key = NULL, *key, *value; + int fd = bpf_map__fd(map), err; + int printed = 0; + + if (fd < 0) + return fd; + + if (IS_ERR(def)) + return PTR_ERR(def); + + err = -ENOMEM; + key = malloc(def->key_size); + if (key == NULL) + goto out; + + value = bpf_map_def__alloc_value(def); + if (value == NULL) + goto out_free_key; + + while ((err = bpf_map_get_next_key(fd, prev_key, key) == 0)) { + int intkey = *(int *)key; + + if (!bpf_map_lookup_elem(fd, key, value)) { + bool boolval = *(bool *)value; + if (boolval) + printed += fprintf(fp, "[%d] = %d,\n", intkey, boolval); + } else { + printed += fprintf(fp, "[%d] = ERROR,\n", intkey); + } + + prev_key = key; + } + + if (err == ENOENT) + err = printed; + + free(value); +out_free_key: + free(key); +out: + return err; +} diff --git a/tools/perf/util/bpf_map.h b/tools/perf/util/bpf_map.h new file mode 100644 index 000000000000..d6abd5e47af8 --- /dev/null +++ b/tools/perf/util/bpf_map.h @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +#ifndef __PERF_BPF_MAP_H +#define __PERF_BPF_MAP_H 1 + +#include +#include +struct bpf_map; + +#ifdef HAVE_LIBBPF_SUPPORT + +int bpf_map__fprintf(struct bpf_map *map, FILE *fp); + +#else + +static inline int bpf_map__fprintf(struct bpf_map *map __maybe_unused, FILE *fp __maybe_unused) +{ + return 0; +} + +#endif // HAVE_LIBBPF_SUPPORT + +#endif // __PERF_BPF_MAP_H -- cgit v1.2.3 From 529c1a9e18c3b470d2eff7879923eda40b1431db Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 20 Feb 2019 13:27:55 +0100 Subject: perf session: Don't report zero period samples for slave events There's no reason to deliver a sample with zero period. It means there was no value for slave event since its last group leader sample. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20190220122800.864-2-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/session.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'tools/perf/util') diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 18fb9c8cbf9c..c764bbc91009 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1202,6 +1202,13 @@ static int deliver_sample_value(struct perf_evlist *evlist, return 0; } + /* + * There's no reason to deliver sample + * for zero period, bail out. + */ + if (!sample->period) + return 0; + return tool->sample(tool, event, sample, sid->evsel, machine); } -- cgit v1.2.3 From 6e7e8b9fec45f7bef11a5d9d2dd9febe439d4047 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 20 Feb 2019 13:27:56 +0100 Subject: perf evsel: Force sample_type for slave events Force sample_type setup for slave events in group leader sessions. We don't get sample for slave events, we make them when delivering group leader sample. Set the slave event to follow the master sample_type to ease up report. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20190220122800.864-3-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'tools/perf/util') diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 684c893ca6bc..dfe2958e6287 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -956,6 +956,14 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts, attr->sample_freq = 0; attr->sample_period = 0; attr->write_backward = 0; + + /* + * We don't get sample for slave events, we make them + * when delivering group leader sample. Set the slave + * event to follow the master sample_type to ease up + * report. + */ + attr->sample_type = leader->attr.sample_type; } if (opts->no_samples) -- cgit v1.2.3 From b20fe10642f90d17c07b5e621bf4c00093c5654b Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 20 Feb 2019 13:27:58 +0100 Subject: perf bpf-event: Add missing new line into pr_debug call Add a missing new line into pr_debug call in perf_event__synthesize_bpf_events(), so that the error message does not screw the verbose output. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Song Liu Link: http://lkml.kernel.org/r/20190220122800.864-5-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/bpf-event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c index 62dda96b0096..028c8ec1f62a 100644 --- a/tools/perf/util/bpf-event.c +++ b/tools/perf/util/bpf-event.c @@ -233,7 +233,7 @@ int perf_event__synthesize_bpf_events(struct perf_tool *tool, err = 0; break; } - pr_debug("%s: can't get next program: %s%s", + pr_debug("%s: can't get next program: %s%s\n", __func__, strerror(errno), errno == EINVAL ? " -- kernel too old?" : ""); /* don't report error on old kernel or EPERM */ -- cgit v1.2.3 From deb83da16c1f3412fcb33c8944b3d854c4b265d6 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 20 Feb 2019 13:27:59 +0100 Subject: perf cpumap: Increase debug level for cpu_map__snprint verbose output So it does not screw up single -v verbose output. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20190220122800.864-6-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cpumap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 0bbc3feb0894..0b599229bc7e 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -681,7 +681,7 @@ size_t cpu_map__snprint(struct cpu_map *map, char *buf, size_t size) #undef COMMA - pr_debug("cpumask list: %s\n", buf); + pr_debug2("cpumask list: %s\n", buf); return ret; } -- cgit v1.2.3 From b4409ae112caa6315f6ee678e953b9fc93e6919c Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 20 Feb 2019 13:28:00 +0100 Subject: perf tools: Make rm_rf() remove single file Let rm_rf() remove a file if it's provided by path, not just directories. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Alexey Budankov Cc: Andi Kleen Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20190220122800.864-7-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/util.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index 320b0fef249a..3ee410fc047a 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -120,16 +120,26 @@ int mkdir_p(char *path, mode_t mode) int rm_rf(const char *path) { DIR *dir; - int ret = 0; + int ret; struct dirent *d; char namebuf[PATH_MAX]; + struct stat statbuf; + /* Do not fail if there's no file. */ + ret = lstat(path, &statbuf); + if (ret) + return 0; + + /* Try to remove any file we get. */ + if (!(statbuf.st_mode & S_IFDIR)) + return unlink(path); + + /* We have directory in path. */ dir = opendir(path); if (dir == NULL) - return 0; + return -1; while ((d = readdir(dir)) != NULL && !ret) { - struct stat statbuf; if (!strcmp(d->d_name, ".") || !strcmp(d->d_name, "..")) continue; -- cgit v1.2.3 From 11db1ad4513d6205d2519e1a30ff4cef746e3243 Mon Sep 17 00:00:00 2001 From: Wei Li Date: Thu, 21 Feb 2019 17:57:16 +0800 Subject: perf annotate: Fix getting source line failure The output of "perf annotate -l --stdio xxx" changed since commit 425859ff0de33 ("perf annotate: No need to calculate notes->start twice") removed notes->start assignment in symbol__calc_lines(). It will get failed in find_address_in_section() from symbol__tty_annotate() subroutine as the a2l->addr is wrong. So the annotate summary doesn't report the line number of source code correctly. Before fix: liwei@euler:~/main_code/hulk_work/hulk/tools/perf$ cat common_while_1.c void hotspot_1(void) { volatile int i; for (i = 0; i < 0x10000000; i++); for (i = 0; i < 0x10000000; i++); for (i = 0; i < 0x10000000; i++); } int main(void) { hotspot_1(); return 0; } liwei@euler:~/main_code/hulk_work/hulk/tools/perf$ gcc common_while_1.c -g -o common_while_1 liwei@euler:~/main_code/hulk_work/hulk/tools/perf$ sudo ./perf record ./common_while_1 [ perf record: Woken up 2 times to write data ] [ perf record: Captured and wrote 0.488 MB perf.data (12498 samples) ] liwei@euler:~/main_code/hulk_work/hulk/tools/perf$ sudo ./perf annotate -l -s hotspot_1 --stdio Sorted summary for file /home/liwei/main_code/hulk_work/hulk/tools/perf/common_while_1 ---------------------------------------------- 19.30 common_while_1[32] 19.03 common_while_1[4e] 19.01 common_while_1[16] 5.04 common_while_1[13] 4.99 common_while_1[4b] 4.78 common_while_1[2c] 4.77 common_while_1[10] 4.66 common_while_1[2f] 4.59 common_while_1[51] 4.59 common_while_1[35] 4.52 common_while_1[19] 4.20 common_while_1[56] 0.51 common_while_1[48] Percent | Source code & Disassembly of common_while_1 for cycles:ppp (12480 samples, percent: local period) ----------------------------------------------------------------------------------------------------------------- : : : : Disassembly of section .text: : : 00000000000005fa : : hotspot_1(): : void hotspot_1(void) : { 0.00 : 5fa: push %rbp 0.00 : 5fb: mov %rsp,%rbp : volatile int i; : : for (i = 0; i < 0x10000000; i++); 0.00 : 5fe: movl $0x0,-0x4(%rbp) 0.00 : 605: jmp 610 0.00 : 607: mov -0x4(%rbp),%eax common_while_1[10] 4.77 : 60a: add $0x1,%eax common_while_1[13] 5.04 : 60d: mov %eax,-0x4(%rbp) common_while_1[16] 19.01 : 610: mov -0x4(%rbp),%eax common_while_1[19] 4.52 : 613: cmp $0xfffffff,%eax 0.00 : 618: jle 607 : for (i = 0; i < 0x10000000; i++); ... After fix: liwei@euler:~/main_code/hulk_work/hulk/tools/perf$ sudo ./perf record ./common_while_1 [ perf record: Woken up 2 times to write data ] [ perf record: Captured and wrote 0.488 MB perf.data (12500 samples) ] liwei@euler:~/main_code/hulk_work/hulk/tools/perf$ sudo ./perf annotate -l -s hotspot_1 --stdio Sorted summary for file /home/liwei/main_code/hulk_work/hulk/tools/perf/common_while_1 ---------------------------------------------- 33.34 common_while_1.c:5 33.34 common_while_1.c:6 33.32 common_while_1.c:7 Percent | Source code & Disassembly of common_while_1 for cycles:ppp (12482 samples, percent: local period) ----------------------------------------------------------------------------------------------------------------- : : : : Disassembly of section .text: : : 00000000000005fa : : hotspot_1(): : void hotspot_1(void) : { 0.00 : 5fa: push %rbp 0.00 : 5fb: mov %rsp,%rbp : volatile int i; : : for (i = 0; i < 0x10000000; i++); 0.00 : 5fe: movl $0x0,-0x4(%rbp) 0.00 : 605: jmp 610 0.00 : 607: mov -0x4(%rbp),%eax common_while_1.c:5 4.70 : 60a: add $0x1,%eax 4.89 : 60d: mov %eax,-0x4(%rbp) common_while_1.c:5 19.03 : 610: mov -0x4(%rbp),%eax common_while_1.c:5 4.72 : 613: cmp $0xfffffff,%eax 0.00 : 618: jle 607 : for (i = 0; i < 0x10000000; i++); 0.00 : 61a: movl $0x0,-0x4(%rbp) 0.00 : 621: jmp 62c 0.00 : 623: mov -0x4(%rbp),%eax common_while_1.c:6 4.54 : 626: add $0x1,%eax 4.73 : 629: mov %eax,-0x4(%rbp) common_while_1.c:6 19.54 : 62c: mov -0x4(%rbp),%eax common_while_1.c:6 4.54 : 62f: cmp $0xfffffff,%eax ... Signed-off-by: Wei Li Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Jin Yao Cc: Namhyung Kim Cc: Peter Zijlstra Fixes: 425859ff0de33 ("perf annotate: No need to calculate notes->start twice") Link: http://lkml.kernel.org/r/20190221095716.39529-1-liwei391@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 2468b8aa0b6b..11a8a447a3af 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1891,6 +1891,7 @@ int symbol__annotate(struct symbol *sym, struct map *map, struct annotation_options *options, struct arch **parch) { + struct annotation *notes = symbol__annotation(sym); struct annotate_args args = { .privsize = privsize, .evsel = evsel, @@ -1921,6 +1922,7 @@ int symbol__annotate(struct symbol *sym, struct map *map, args.ms.map = map; args.ms.sym = sym; + notes->start = map__rip_2objdump(map, sym->start); return symbol__disassemble(sym, &args); } @@ -2796,8 +2798,6 @@ int symbol__annotate2(struct symbol *sym, struct map *map, struct perf_evsel *ev symbol__calc_percent(sym, evsel); - notes->start = map__rip_2objdump(map, sym->start); - annotation__set_offsets(notes, size); annotation__mark_jump_targets(notes, sym); annotation__compute_ipc(notes, size); -- cgit v1.2.3 From 1f35cd65386e02430546727bd3cc2508d052e3ec Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 9 Jan 2019 11:18:34 +0200 Subject: perf thread-stack: Improve thread_stack__no_call_return() Improve thread_stack__no_call_return() to better handle 'returns' that do not match the stack i.e. 'no call'. See code comments for details. The example below shows how retpolines are affected: Example: $ cat simple-retpoline.c __attribute__((noinline)) int bar(void) { return -1; } int foo(void) { return bar() + 1; } __attribute__((indirect_branch("thunk"))) int main() { int (*volatile fn)(void) = foo; fn(); return fn(); } $ gcc -ggdb3 -Wall -Wextra -O2 -o simple-retpoline simple-retpoline.c $ objdump -d simple-retpoline 0000000000001040
: 1040: 48 83 ec 18 sub $0x18,%rsp 1044: 48 8d 05 25 01 00 00 lea 0x125(%rip),%rax # 1170 104b: 48 89 44 24 08 mov %rax,0x8(%rsp) 1050: 48 8b 44 24 08 mov 0x8(%rsp),%rax 1055: e8 1f 01 00 00 callq 1179 <__x86_indirect_thunk_rax> 105a: 48 8b 44 24 08 mov 0x8(%rsp),%rax 105f: 48 83 c4 18 add $0x18,%rsp 1063: e9 11 01 00 00 jmpq 1179 <__x86_indirect_thunk_rax> 0000000000001160 : 1160: b8 ff ff ff ff mov $0xffffffff,%eax 1165: c3 retq 0000000000001170 : 1170: e8 eb ff ff ff callq 1160 1175: 83 c0 01 add $0x1,%eax 1178: c3 retq 0000000000001179 <__x86_indirect_thunk_rax>: 1179: e8 07 00 00 00 callq 1185 <__x86_indirect_thunk_rax+0xc> 117e: f3 90 pause 1180: 0f ae e8 lfence 1183: eb f9 jmp 117e <__x86_indirect_thunk_rax+0x5> 1185: 48 89 04 24 mov %rax,(%rsp) 1189: c3 retq $ perf record -o simple-retpoline.perf.data -e intel_pt/cyc/u ./simple-retpoline [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0,017 MB simple-retpoline.perf.data ] $ perf script -i simple-retpoline.perf.data --itrace=be -s ~/libexec/perf-core/scripts/python/export-to-sqlite.py simple-retpoline.db branches calls 2019-01-08 14:03:37.851655 Creating database... 2019-01-08 14:03:37.863256 Writing records... 2019-01-08 14:03:38.069750 Adding indexes 2019-01-08 14:03:38.078799 Done $ ~/libexec/perf-core/scripts/python/exported-sql-viewer.py simple-retpoline.db Before: main -> __x86_indirect_thunk_rax -> __x86_indirect_thunk_rax -> __x86_indirect_thunk_rax -> bar After: main -> __x86_indirect_thunk_rax -> __x86_indirect_thunk_rax -> foo -> bar Committer testing: Chose "Reports", Then "Context-Sensitive Call Graph" and then go on expanding: Before: simple-retpolin PID:PID _start _start __libc_start_main main __x86_indirect_thunk_rax __x86_indirect_thunk_rax bar After: Remove the "simple.retpoline.db" file, run again the 'perf script' line to regenerate the .db file and run the exported-sql-viewer.py again to get the same all the way to 'main', then, from there, including 'main': main __x86_indirect_thunk_rax __x86_indirect_thunk_rax foo bar Signed-off-by: Adrian Hunter Tested-by: Arnaldo Carvalho de Melo Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20190109091835.5570-6-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/thread-stack.c | 49 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 46 insertions(+), 3 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/thread-stack.c b/tools/perf/util/thread-stack.c index f52c0f90915d..632c07a125ab 100644 --- a/tools/perf/util/thread-stack.c +++ b/tools/perf/util/thread-stack.c @@ -638,14 +638,57 @@ static int thread_stack__no_call_return(struct thread *thread, else parent = root; - /* This 'return' had no 'call', so push and pop top of stack */ - cp = call_path__findnew(cpr, parent, fsym, ip, ks); + if (parent->sym == from_al->sym) { + /* + * At the bottom of the stack, assume the missing 'call' was + * before the trace started. So, pop the current symbol and push + * the 'to' symbol. + */ + if (ts->cnt == 1) { + err = thread_stack__call_return(thread, ts, --ts->cnt, + tm, ref, false); + if (err) + return err; + } + + if (!ts->cnt) { + cp = call_path__findnew(cpr, root, tsym, addr, ks); + + return thread_stack__push_cp(ts, addr, tm, ref, cp, + true, false); + } + + /* + * Otherwise assume the 'return' is being used as a jump (e.g. + * retpoline) and just push the 'to' symbol. + */ + cp = call_path__findnew(cpr, parent, tsym, addr, ks); + + err = thread_stack__push_cp(ts, 0, tm, ref, cp, true, false); + if (!err) + ts->stack[ts->cnt - 1].non_call = true; + + return err; + } + + /* + * Assume 'parent' has not yet returned, so push 'to', and then push and + * pop 'from'. + */ + + cp = call_path__findnew(cpr, parent, tsym, addr, ks); err = thread_stack__push_cp(ts, addr, tm, ref, cp, true, false); if (err) return err; - return thread_stack__pop_cp(thread, ts, addr, tm, ref, tsym); + cp = call_path__findnew(cpr, cp, fsym, ip, ks); + + err = thread_stack__push_cp(ts, ip, tm, ref, cp, true, false); + if (err) + return err; + + return thread_stack__call_return(thread, ts, --ts->cnt, tm, ref, false); } static int thread_stack__trace_begin(struct thread *thread, -- cgit v1.2.3 From 3c0cd952cf051903929cd57b89034cc5f71f451d Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 9 Jan 2019 11:18:35 +0200 Subject: perf thread-stack: Hide x86 retpolines x86 retpoline functions pollute the call graph by showing up everywhere there is an indirect branch, but they do not really mean anything. Make changes so that the default retpoline functions will no longer appear in the call graph. Note this only affects the call graph, since all the original branches are left unchanged. This does not handle function return thunks, nor is there any improvement for the handling of inline thunks or extern thunks. Example: $ cat simple-retpoline.c __attribute__((noinline)) int bar(void) { return -1; } int foo(void) { return bar() + 1; } __attribute__((indirect_branch("thunk"))) int main() { int (*volatile fn)(void) = foo; fn(); return fn(); } $ gcc -ggdb3 -Wall -Wextra -O2 -o simple-retpoline simple-retpoline.c $ objdump -d simple-retpoline 0000000000001040
: 1040: 48 83 ec 18 sub $0x18,%rsp 1044: 48 8d 05 25 01 00 00 lea 0x125(%rip),%rax # 1170 104b: 48 89 44 24 08 mov %rax,0x8(%rsp) 1050: 48 8b 44 24 08 mov 0x8(%rsp),%rax 1055: e8 1f 01 00 00 callq 1179 <__x86_indirect_thunk_rax> 105a: 48 8b 44 24 08 mov 0x8(%rsp),%rax 105f: 48 83 c4 18 add $0x18,%rsp 1063: e9 11 01 00 00 jmpq 1179 <__x86_indirect_thunk_rax> 0000000000001160 : 1160: b8 ff ff ff ff mov $0xffffffff,%eax 1165: c3 retq 0000000000001170 : 1170: e8 eb ff ff ff callq 1160 1175: 83 c0 01 add $0x1,%eax 1178: c3 retq 0000000000001179 <__x86_indirect_thunk_rax>: 1179: e8 07 00 00 00 callq 1185 <__x86_indirect_thunk_rax+0xc> 117e: f3 90 pause 1180: 0f ae e8 lfence 1183: eb f9 jmp 117e <__x86_indirect_thunk_rax+0x5> 1185: 48 89 04 24 mov %rax,(%rsp) 1189: c3 retq $ perf record -o simple-retpoline.perf.data -e intel_pt/cyc/u ./simple-retpoline [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0,017 MB simple-retpoline.perf.data ] $ perf script -i simple-retpoline.perf.data --itrace=be -s ~/libexec/perf-core/scripts/python/export-to-sqlite.py simple-retpoline.db branches calls 2019-01-08 14:03:37.851655 Creating database... 2019-01-08 14:03:37.863256 Writing records... 2019-01-08 14:03:38.069750 Adding indexes 2019-01-08 14:03:38.078799 Done $ ~/libexec/perf-core/scripts/python/exported-sql-viewer.py simple-retpoline.db Before: main -> __x86_indirect_thunk_rax -> __x86_indirect_thunk_rax -> foo -> bar After: main -> foo -> bar Signed-off-by: Adrian Hunter Tested-by: Arnaldo Carvalho de Melo Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20190109091835.5570-7-adrian.hunter@intel.com [ Remove (sym->name != NULL) test, this is not a pointer and breaks the build with clang version 7.0.1 (Fedora 7.0.1-2.fc30) ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/thread-stack.c | 112 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 109 insertions(+), 3 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/thread-stack.c b/tools/perf/util/thread-stack.c index 632c07a125ab..a8b45168513c 100644 --- a/tools/perf/util/thread-stack.c +++ b/tools/perf/util/thread-stack.c @@ -20,6 +20,7 @@ #include "thread.h" #include "event.h" #include "machine.h" +#include "env.h" #include "util.h" #include "debug.h" #include "symbol.h" @@ -29,6 +30,19 @@ #define STACK_GROWTH 2048 +/* + * State of retpoline detection. + * + * RETPOLINE_NONE: no retpoline detection + * X86_RETPOLINE_POSSIBLE: x86 retpoline possible + * X86_RETPOLINE_DETECTED: x86 retpoline detected + */ +enum retpoline_state_t { + RETPOLINE_NONE, + X86_RETPOLINE_POSSIBLE, + X86_RETPOLINE_DETECTED, +}; + /** * struct thread_stack_entry - thread stack entry. * @ret_addr: return address @@ -64,6 +78,7 @@ struct thread_stack_entry { * @crp: call/return processor * @comm: current comm * @arr_sz: size of array if this is the first element of an array + * @rstate: used to detect retpolines */ struct thread_stack { struct thread_stack_entry *stack; @@ -76,6 +91,7 @@ struct thread_stack { struct call_return_processor *crp; struct comm *comm; unsigned int arr_sz; + enum retpoline_state_t rstate; }; /* @@ -115,10 +131,16 @@ static int thread_stack__init(struct thread_stack *ts, struct thread *thread, if (err) return err; - if (thread->mg && thread->mg->machine) - ts->kernel_start = machine__kernel_start(thread->mg->machine); - else + if (thread->mg && thread->mg->machine) { + struct machine *machine = thread->mg->machine; + const char *arch = perf_env__arch(machine->env); + + ts->kernel_start = machine__kernel_start(machine); + if (!strcmp(arch, "x86")) + ts->rstate = X86_RETPOLINE_POSSIBLE; + } else { ts->kernel_start = 1ULL << 63; + } ts->crp = crp; return 0; @@ -733,6 +755,70 @@ static int thread_stack__trace_end(struct thread_stack *ts, false, true); } +static bool is_x86_retpoline(const char *name) +{ + const char *p = strstr(name, "__x86_indirect_thunk_"); + + return p == name || !strcmp(name, "__indirect_thunk_start"); +} + +/* + * x86 retpoline functions pollute the call graph. This function removes them. + * This does not handle function return thunks, nor is there any improvement + * for the handling of inline thunks or extern thunks. + */ +static int thread_stack__x86_retpoline(struct thread_stack *ts, + struct perf_sample *sample, + struct addr_location *to_al) +{ + struct thread_stack_entry *tse = &ts->stack[ts->cnt - 1]; + struct call_path_root *cpr = ts->crp->cpr; + struct symbol *sym = tse->cp->sym; + struct symbol *tsym = to_al->sym; + struct call_path *cp; + + if (sym && is_x86_retpoline(sym->name)) { + /* + * This is a x86 retpoline fn. It pollutes the call graph by + * showing up everywhere there is an indirect branch, but does + * not itself mean anything. Here the top-of-stack is removed, + * by decrementing the stack count, and then further down, the + * resulting top-of-stack is replaced with the actual target. + * The result is that the retpoline functions will no longer + * appear in the call graph. Note this only affects the call + * graph, since all the original branches are left unchanged. + */ + ts->cnt -= 1; + sym = ts->stack[ts->cnt - 2].cp->sym; + if (sym && sym == tsym && to_al->addr != tsym->start) { + /* + * Target is back to the middle of the symbol we came + * from so assume it is an indirect jmp and forget it + * altogether. + */ + ts->cnt -= 1; + return 0; + } + } else if (sym && sym == tsym) { + /* + * Target is back to the symbol we came from so assume it is an + * indirect jmp and forget it altogether. + */ + ts->cnt -= 1; + return 0; + } + + cp = call_path__findnew(cpr, ts->stack[ts->cnt - 2].cp, tsym, + sample->addr, ts->kernel_start); + if (!cp) + return -ENOMEM; + + /* Replace the top-of-stack with the actual target */ + ts->stack[ts->cnt - 1].cp = cp; + + return 0; +} + int thread_stack__process(struct thread *thread, struct comm *comm, struct perf_sample *sample, struct addr_location *from_al, @@ -740,6 +826,7 @@ int thread_stack__process(struct thread *thread, struct comm *comm, struct call_return_processor *crp) { struct thread_stack *ts = thread__stack(thread, sample->cpu); + enum retpoline_state_t rstate; int err = 0; if (ts && !ts->crp) { @@ -755,6 +842,10 @@ int thread_stack__process(struct thread *thread, struct comm *comm, ts->comm = comm; } + rstate = ts->rstate; + if (rstate == X86_RETPOLINE_DETECTED) + ts->rstate = X86_RETPOLINE_POSSIBLE; + /* Flush stack on exec */ if (ts->comm != comm && thread->pid_ == thread->tid) { err = __thread_stack__flush(thread, ts); @@ -791,10 +882,25 @@ int thread_stack__process(struct thread *thread, struct comm *comm, ts->kernel_start); err = thread_stack__push_cp(ts, ret_addr, sample->time, ref, cp, false, trace_end); + + /* + * A call to the same symbol but not the start of the symbol, + * may be the start of a x86 retpoline. + */ + if (!err && rstate == X86_RETPOLINE_POSSIBLE && to_al->sym && + from_al->sym == to_al->sym && + to_al->addr != to_al->sym->start) + ts->rstate = X86_RETPOLINE_DETECTED; + } else if (sample->flags & PERF_IP_FLAG_RETURN) { if (!sample->ip || !sample->addr) return 0; + /* x86 retpoline 'return' doesn't match the stack */ + if (rstate == X86_RETPOLINE_DETECTED && ts->cnt > 2 && + ts->stack[ts->cnt - 1].ret_addr != sample->addr) + return thread_stack__x86_retpoline(ts, sample, to_al); + err = thread_stack__pop_cp(thread, ts, sample->addr, sample->time, ref, from_al->sym); if (err) { -- cgit v1.2.3 From 45112e89a8b2b4c9a004147cbfb448b1200cfbf7 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 21 Feb 2019 10:41:29 +0100 Subject: perf data: Move size to struct perf_data_file We are about to add support for multiple files, so we need each file to keep its size. Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexey Budankov Cc: Andi Kleen Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/20190221094145.9151-2-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 5 ++--- tools/perf/util/data.c | 2 +- tools/perf/util/data.h | 4 ++-- 3 files changed, 5 insertions(+), 6 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 6c3719ac901d..e5e9900c9039 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -660,10 +660,9 @@ static int process_sample_event(struct perf_tool *tool, static int process_buildids(struct record *rec) { - struct perf_data *data = &rec->data; struct perf_session *session = rec->session; - if (data->size == 0) + if (perf_data__size(&rec->data) == 0) return 0; /* @@ -851,7 +850,7 @@ record__finish_output(struct record *rec) return; rec->session->header.data_size += rec->bytes_written; - data->size = lseek(perf_data__fd(data), 0, SEEK_CUR); + data->file.size = lseek(perf_data__fd(data), 0, SEEK_CUR); if (!rec->no_buildid) { process_buildids(rec); diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c index d8cfc19ddb10..09eceda17fc2 100644 --- a/tools/perf/util/data.c +++ b/tools/perf/util/data.c @@ -82,7 +82,7 @@ static int open_file_read(struct perf_data *data) goto out_close; } - data->size = st.st_size; + data->file.size = st.st_size; return fd; out_close: diff --git a/tools/perf/util/data.h b/tools/perf/util/data.h index 4828f7feea89..85f9c0dbf982 100644 --- a/tools/perf/util/data.h +++ b/tools/perf/util/data.h @@ -12,13 +12,13 @@ enum perf_data_mode { struct perf_data_file { const char *path; int fd; + unsigned long size; }; struct perf_data { struct perf_data_file file; bool is_pipe; bool force; - unsigned long size; enum perf_data_mode mode; }; @@ -44,7 +44,7 @@ static inline int perf_data__fd(struct perf_data *data) static inline unsigned long perf_data__size(struct perf_data *data) { - return data->size; + return data->file.size; } int perf_data__open(struct perf_data *data); -- cgit v1.2.3 From 2d4f27999b8877409f326682fd8cc40c52f47cea Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 21 Feb 2019 10:41:30 +0100 Subject: perf data: Add global path holder Add a 'path' member to 'struct perf_data'. It will keep the configured path for the data (const char *). The path in struct perf_data_file is now dynamically allocated (duped) from it. This scheme is useful/used in following patches where struct perf_data::path holds the 'configure' directory path and struct perf_data_file::path holds the allocated path for specific files. Also it actually makes the code little simpler. Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexey Budankov Cc: Andi Kleen Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/20190221094145.9151-3-jolsa@kernel.org [ Fixup data-convert-bt.c missing conversion ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-annotate.c | 4 ++-- tools/perf/builtin-buildid-cache.c | 4 ++-- tools/perf/builtin-buildid-list.c | 8 +++----- tools/perf/builtin-c2c.c | 4 ++-- tools/perf/builtin-diff.c | 12 ++++++------ tools/perf/builtin-evlist.c | 4 +--- tools/perf/builtin-inject.c | 10 ++++------ tools/perf/builtin-kmem.c | 2 +- tools/perf/builtin-kvm.c | 8 +++----- tools/perf/builtin-lock.c | 8 +++----- tools/perf/builtin-mem.c | 8 +++----- tools/perf/builtin-record.c | 6 +++--- tools/perf/builtin-report.c | 6 +++--- tools/perf/builtin-sched.c | 16 ++++++---------- tools/perf/builtin-script.c | 12 +++++------- tools/perf/builtin-stat.c | 6 +++--- tools/perf/builtin-timechart.c | 8 +++----- tools/perf/builtin-trace.c | 8 +++----- tools/perf/util/data-convert-bt.c | 4 ++-- tools/perf/util/data.c | 39 ++++++++++++++++++++++++++------------ tools/perf/util/data.h | 3 ++- 21 files changed, 87 insertions(+), 93 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 7f3c3fea67b4..67f9d9ffacfb 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -441,7 +441,7 @@ static int __cmd_annotate(struct perf_annotate *ann) } if (total_nr_samples == 0) { - ui__error("The %s file has no samples!\n", session->data->file.path); + ui__error("The %s data has no samples!\n", session->data->path); goto out; } @@ -578,7 +578,7 @@ int cmd_annotate(int argc, const char **argv) if (quiet) perf_quiet_option(); - data.file.path = input_name; + data.path = input_name; annotate.session = perf_session__new(&data, false, &annotate.tool); if (annotate.session == NULL) diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c index 115110a4796a..10457b10e568 100644 --- a/tools/perf/builtin-buildid-cache.c +++ b/tools/perf/builtin-buildid-cache.c @@ -416,8 +416,8 @@ int cmd_buildid_cache(int argc, const char **argv) nsi = nsinfo__new(ns_id); if (missing_filename) { - data.file.path = missing_filename; - data.force = force; + data.path = missing_filename; + data.force = force; session = perf_session__new(&data, false, NULL); if (session == NULL) diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c index 78abbe8d9d5f..f403e19488b5 100644 --- a/tools/perf/builtin-buildid-list.c +++ b/tools/perf/builtin-buildid-list.c @@ -52,11 +52,9 @@ static int perf_session__list_build_ids(bool force, bool with_hits) { struct perf_session *session; struct perf_data data = { - .file = { - .path = input_name, - }, - .mode = PERF_DATA_MODE_READ, - .force = force, + .path = input_name, + .mode = PERF_DATA_MODE_READ, + .force = force, }; symbol__elf_init(); diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index efaaab23c6fd..4272763a5e96 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -2750,8 +2750,8 @@ static int perf_c2c__report(int argc, const char **argv) if (!input_name || !strlen(input_name)) input_name = "perf.data"; - data.file.path = input_name; - data.force = symbol_conf.force; + data.path = input_name; + data.force = symbol_conf.force; err = setup_display(display); if (err) diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index 751e1971456b..58fe0e88215c 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -708,7 +708,7 @@ static void data__fprintf(void) data__for_each_file(i, d) fprintf(stdout, "# [%d] %s %s\n", - d->idx, d->data.file.path, + d->idx, d->data.path, !d->idx ? "(Baseline)" : ""); fprintf(stdout, "#\n"); @@ -779,14 +779,14 @@ static int __cmd_diff(void) data__for_each_file(i, d) { d->session = perf_session__new(&d->data, false, &tool); if (!d->session) { - pr_err("Failed to open %s\n", d->data.file.path); + pr_err("Failed to open %s\n", d->data.path); ret = -1; goto out_delete; } ret = perf_session__process_events(d->session); if (ret) { - pr_err("Failed to process %s\n", d->data.file.path); + pr_err("Failed to process %s\n", d->data.path); goto out_delete; } @@ -1289,9 +1289,9 @@ static int data_init(int argc, const char **argv) data__for_each_file(i, d) { struct perf_data *data = &d->data; - data->file.path = use_default ? defaults[i] : argv[i]; - data->mode = PERF_DATA_MODE_READ, - data->force = force, + data->path = use_default ? defaults[i] : argv[i]; + data->mode = PERF_DATA_MODE_READ, + data->force = force, d->idx = i; } diff --git a/tools/perf/builtin-evlist.c b/tools/perf/builtin-evlist.c index e06e822ce634..6e4f63b0da4a 100644 --- a/tools/perf/builtin-evlist.c +++ b/tools/perf/builtin-evlist.c @@ -23,9 +23,7 @@ static int __cmd_evlist(const char *file_name, struct perf_attr_details *details struct perf_session *session; struct perf_evsel *pos; struct perf_data data = { - .file = { - .path = file_name, - }, + .path = file_name, .mode = PERF_DATA_MODE_READ, .force = details->force, }; diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 9bb1f35d5cb7..24086b7f1b14 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -770,10 +770,8 @@ int cmd_inject(int argc, const char **argv) .input_name = "-", .samples = LIST_HEAD_INIT(inject.samples), .output = { - .file = { - .path = "-", - }, - .mode = PERF_DATA_MODE_WRITE, + .path = "-", + .mode = PERF_DATA_MODE_WRITE, }, }; struct perf_data data = { @@ -786,7 +784,7 @@ int cmd_inject(int argc, const char **argv) "Inject build-ids into the output stream"), OPT_STRING('i', "input", &inject.input_name, "file", "input file name"), - OPT_STRING('o', "output", &inject.output.file.path, "file", + OPT_STRING('o', "output", &inject.output.path, "file", "output file name"), OPT_BOOLEAN('s', "sched-stat", &inject.sched_stat, "Merge sched-stat and sched-switch for getting events " @@ -834,7 +832,7 @@ int cmd_inject(int argc, const char **argv) inject.tool.ordered_events = inject.sched_stat; - data.file.path = inject.input_name; + data.path = inject.input_name; inject.session = perf_session__new(&data, true, &inject.tool); if (inject.session == NULL) return -1; diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index b80ec0883537..fa520f4b8095 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -1949,7 +1949,7 @@ int cmd_kmem(int argc, const char **argv) return __cmd_record(argc, argv); } - data.file.path = input_name; + data.path = input_name; kmem_session = session = perf_session__new(&data, false, &perf_kmem); if (session == NULL) diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 3d4cbc4e87c7..dbb6f737a3e2 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -1080,11 +1080,9 @@ static int read_events(struct perf_kvm_stat *kvm) .ordered_events = true, }; struct perf_data file = { - .file = { - .path = kvm->file_name, - }, - .mode = PERF_DATA_MODE_READ, - .force = kvm->force, + .path = kvm->file_name, + .mode = PERF_DATA_MODE_READ, + .force = kvm->force, }; kvm->tool = eops; diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index 6e0189df2b3b..b9810a8d350a 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -866,11 +866,9 @@ static int __cmd_report(bool display_info) .ordered_events = true, }; struct perf_data data = { - .file = { - .path = input_name, - }, - .mode = PERF_DATA_MODE_READ, - .force = force, + .path = input_name, + .mode = PERF_DATA_MODE_READ, + .force = force, }; session = perf_session__new(&data, false, &eops); diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index ba7e8d87dec3..f45c8b502f63 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -239,11 +239,9 @@ static int process_sample_event(struct perf_tool *tool, static int report_raw_events(struct perf_mem *mem) { struct perf_data data = { - .file = { - .path = input_name, - }, - .mode = PERF_DATA_MODE_READ, - .force = mem->force, + .path = input_name, + .mode = PERF_DATA_MODE_READ, + .force = mem->force, }; int ret; struct perf_session *session = perf_session__new(&data, false, diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index e5e9900c9039..f3f7f3100336 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -918,7 +918,7 @@ record__switch_output(struct record *rec, bool at_exit) if (!quiet) fprintf(stderr, "[ perf record: Dump %s.%s ]\n", - data->file.path, timestamp); + data->path, timestamp); /* Output tracking events */ if (!at_exit) { @@ -1461,7 +1461,7 @@ out_child: fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s ]\n", perf_data__size(data) / 1024.0 / 1024.0, - data->file.path, postfix, samples); + data->path, postfix, samples); } out_delete_session: @@ -1862,7 +1862,7 @@ static struct option __record_options[] = { OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu", "list of cpus to monitor"), OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"), - OPT_STRING('o', "output", &record.data.file.path, "file", + OPT_STRING('o', "output", &record.data.path, "file", "output file name"), OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit, &record.opts.no_inherit_set, diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 2e8c74d6430c..1532ebde6c4b 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -899,7 +899,7 @@ static int __cmd_report(struct report *rep) rep->nr_entries += evsel__hists(pos)->nr_entries; if (rep->nr_entries == 0) { - ui__error("The %s file has no samples!\n", data->file.path); + ui__error("The %s data has no samples!\n", data->path); return 0; } @@ -1207,8 +1207,8 @@ int cmd_report(int argc, const char **argv) input_name = "perf.data"; } - data.file.path = input_name; - data.force = symbol_conf.force; + data.path = input_name; + data.force = symbol_conf.force; repeat: session = perf_session__new(&data, false, &report.tool); diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 640558e9352e..275f2d92a7bf 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -1785,11 +1785,9 @@ static int perf_sched__read_events(struct perf_sched *sched) }; struct perf_session *session; struct perf_data data = { - .file = { - .path = input_name, - }, - .mode = PERF_DATA_MODE_READ, - .force = sched->force, + .path = input_name, + .mode = PERF_DATA_MODE_READ, + .force = sched->force, }; int rc = -1; @@ -2958,11 +2956,9 @@ static int perf_sched__timehist(struct perf_sched *sched) { "sched:sched_migrate_task", timehist_migrate_task_event, }, }; struct perf_data data = { - .file = { - .path = input_name, - }, - .mode = PERF_DATA_MODE_READ, - .force = sched->force, + .path = input_name, + .mode = PERF_DATA_MODE_READ, + .force = sched->force, }; struct perf_session *session; diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 373ea151dc60..5b1543f42290 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -2951,10 +2951,8 @@ int find_scripts(char **scripts_array, char **scripts_path_array) DIR *scripts_dir, *lang_dir; struct perf_session *session; struct perf_data data = { - .file = { - .path = input_name, - }, - .mode = PERF_DATA_MODE_READ, + .path = input_name, + .mode = PERF_DATA_MODE_READ, }; char *temp; int i = 0; @@ -3427,8 +3425,8 @@ int cmd_script(int argc, const char **argv) argc = parse_options_subcommand(argc, argv, options, script_subcommands, script_usage, PARSE_OPT_STOP_AT_NON_OPTION); - data.file.path = input_name; - data.force = symbol_conf.force; + data.path = input_name; + data.force = symbol_conf.force; if (argc > 1 && !strncmp(argv[0], "rec", strlen("rec"))) { rec_script_path = get_script_path(argv[1], RECORD_SUFFIX); @@ -3654,7 +3652,7 @@ int cmd_script(int argc, const char **argv) goto out_delete; } - input = open(data.file.path, O_RDONLY); /* input_name */ + input = open(data.path, O_RDONLY); /* input_name */ if (input < 0) { err = -errno; perror("failed to open file"); diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index bb24f9c17f9a..7b8f09b0b8bf 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1322,7 +1322,7 @@ static int __cmd_record(int argc, const char **argv) PARSE_OPT_STOP_AT_NON_OPTION); if (output_name) - data->file.path = output_name; + data->path = output_name; if (stat_config.run_count != 1 || forever) { pr_err("Cannot use -r option with perf stat record.\n"); @@ -1523,8 +1523,8 @@ static int __cmd_report(int argc, const char **argv) input_name = "perf.data"; } - perf_stat.data.file.path = input_name; - perf_stat.data.mode = PERF_DATA_MODE_READ; + perf_stat.data.path = input_name; + perf_stat.data.mode = PERF_DATA_MODE_READ; session = perf_session__new(&perf_stat.data, false, &perf_stat.tool); if (session == NULL) diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c index 775b99833e51..9b98687a27b9 100644 --- a/tools/perf/builtin-timechart.c +++ b/tools/perf/builtin-timechart.c @@ -1602,11 +1602,9 @@ static int __cmd_timechart(struct timechart *tchart, const char *output_name) { "syscalls:sys_exit_select", process_exit_poll }, }; struct perf_data data = { - .file = { - .path = input_name, - }, - .mode = PERF_DATA_MODE_READ, - .force = tchart->force, + .path = input_name, + .mode = PERF_DATA_MODE_READ, + .force = tchart->force, }; struct perf_session *session = perf_session__new(&data, false, diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 1a11fe656afc..f5b3a1e9c1dd 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -3154,11 +3154,9 @@ static int trace__replay(struct trace *trace) { "probe:vfs_getname", trace__vfs_getname, }, }; struct perf_data data = { - .file = { - .path = input_name, - }, - .mode = PERF_DATA_MODE_READ, - .force = trace->force, + .path = input_name, + .mode = PERF_DATA_MODE_READ, + .force = trace->force, }; struct perf_session *session; struct perf_evsel *evsel; diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c index 2a36fab76994..26af43ad9ddd 100644 --- a/tools/perf/util/data-convert-bt.c +++ b/tools/perf/util/data-convert-bt.c @@ -1578,7 +1578,7 @@ int bt_convert__perf2ctf(const char *input, const char *path, { struct perf_session *session; struct perf_data data = { - .file = { .path = input, .fd = -1 }, + .path = input, .mode = PERF_DATA_MODE_READ, .force = opts->force, }; @@ -1650,7 +1650,7 @@ int bt_convert__perf2ctf(const char *input, const char *path, fprintf(stderr, "[ perf data convert: Converted '%s' into CTF data '%s' ]\n", - data.file.path, path); + data.path, path); fprintf(stderr, "[ perf data convert: Converted and wrote %.3f MB (%" PRIu64 " samples", diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c index 09eceda17fc2..e16d06ed1100 100644 --- a/tools/perf/util/data.c +++ b/tools/perf/util/data.c @@ -19,11 +19,11 @@ static bool check_pipe(struct perf_data *data) int fd = perf_data__is_read(data) ? STDIN_FILENO : STDOUT_FILENO; - if (!data->file.path) { + if (!data->path) { if (!fstat(fd, &st) && S_ISFIFO(st.st_mode)) is_pipe = true; } else { - if (!strcmp(data->file.path, "-")) + if (!strcmp(data->path, "-")) is_pipe = true; } @@ -37,13 +37,13 @@ static int check_backup(struct perf_data *data) { struct stat st; - if (!stat(data->file.path, &st) && st.st_size) { + if (!stat(data->path, &st) && st.st_size) { /* TODO check errors properly */ char oldname[PATH_MAX]; snprintf(oldname, sizeof(oldname), "%s.old", - data->file.path); + data->path); unlink(oldname); - rename(data->file.path, oldname); + rename(data->path, oldname); } return 0; @@ -115,8 +115,22 @@ static int open_file(struct perf_data *data) fd = perf_data__is_read(data) ? open_file_read(data) : open_file_write(data); + if (fd < 0) { + free(data->file.path); + return -1; + } + data->file.fd = fd; - return fd < 0 ? -1 : 0; + return 0; +} + +static int open_file_dup(struct perf_data *data) +{ + data->file.path = strdup(data->path); + if (!data->file.path) + return -ENOMEM; + + return open_file(data); } int perf_data__open(struct perf_data *data) @@ -124,14 +138,15 @@ int perf_data__open(struct perf_data *data) if (check_pipe(data)) return 0; - if (!data->file.path) - data->file.path = "perf.data"; + if (!data->path) + data->path = "perf.data"; - return open_file(data); + return open_file_dup(data); } void perf_data__close(struct perf_data *data) { + free(data->file.path); close(data->file.fd); } @@ -159,15 +174,15 @@ int perf_data__switch(struct perf_data *data, if (perf_data__is_read(data)) return -EINVAL; - if (asprintf(&new_filepath, "%s.%s", data->file.path, postfix) < 0) + if (asprintf(&new_filepath, "%s.%s", data->path, postfix) < 0) return -ENOMEM; /* * Only fire a warning, don't return error, continue fill * original file. */ - if (rename(data->file.path, new_filepath)) - pr_warning("Failed to rename %s to %s\n", data->file.path, new_filepath); + if (rename(data->path, new_filepath)) + pr_warning("Failed to rename %s to %s\n", data->path, new_filepath); if (!at_exit) { close(data->file.fd); diff --git a/tools/perf/util/data.h b/tools/perf/util/data.h index 85f9c0dbf982..2bce28117ccf 100644 --- a/tools/perf/util/data.h +++ b/tools/perf/util/data.h @@ -10,12 +10,13 @@ enum perf_data_mode { }; struct perf_data_file { - const char *path; + char *path; int fd; unsigned long size; }; struct perf_data { + const char *path; struct perf_data_file file; bool is_pipe; bool force; -- cgit v1.2.3 From 05a486593977bfcf71de6bf5cad6d045c18829c6 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 24 Feb 2019 20:06:37 +0100 Subject: perf tools: Add depth checking to rm_rf Adding depth argument to rm_rf (and renaming it to rm_rf_depth) to specify the depth we will go searching for files to remove. It will be used to specify single depth for perf.data directory removal in following patch. Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexey Budankov Cc: Andi Kleen Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/20190224190656.30163-2-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/util.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index 3ee410fc047a..bcf436892155 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -117,7 +117,12 @@ int mkdir_p(char *path, mode_t mode) return (stat(path, &st) && mkdir(path, mode)) ? -1 : 0; } -int rm_rf(const char *path) +/* + * The depth specify how deep the removal will go. + * 0 - will remove only files under the 'path' directory + * 1 .. x - will dive in x-level deep under the 'path' directory + */ +static int rm_rf_depth(const char *path, int depth) { DIR *dir; int ret; @@ -155,7 +160,7 @@ int rm_rf(const char *path) } if (S_ISDIR(statbuf.st_mode)) - ret = rm_rf(namebuf); + ret = depth ? rm_rf_depth(namebuf, depth - 1) : 0; else ret = unlink(namebuf); } @@ -167,6 +172,11 @@ int rm_rf(const char *path) return rmdir(path); } +int rm_rf(const char *path) +{ + return rm_rf_depth(path, INT_MAX); +} + /* A filter which removes dot files */ bool lsdir_no_dot_filter(const char *name __maybe_unused, struct dirent *d) { -- cgit v1.2.3 From cdb6b0235f170a5ffcd74731178efc064bd4d24a Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 24 Feb 2019 20:06:38 +0100 Subject: perf tools: Add pattern name checking to rm_rf Add pattern argument to rm_rf_depth() (and rename it to rm_rf_depth_pat()) to specify the name pattern files need to match inside the directory. The function fails if we find different file to remove. Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexey Budankov Cc: Andi Kleen Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/20190224190656.30163-3-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/util.c | 36 +++++++++++++++++++++++++++++++++--- 1 file changed, 33 insertions(+), 3 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index bcf436892155..02b7a38f98ce 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -21,6 +21,7 @@ #include #include #include "strlist.h" +#include "string2.h" /* * XXX We need to find a better place for these things... @@ -117,12 +118,38 @@ int mkdir_p(char *path, mode_t mode) return (stat(path, &st) && mkdir(path, mode)) ? -1 : 0; } +static bool match_pat(char *file, const char **pat) +{ + int i = 0; + + if (!pat) + return true; + + while (pat[i]) { + if (strglobmatch(file, pat[i])) + return true; + + i++; + } + + return false; +} + /* * The depth specify how deep the removal will go. * 0 - will remove only files under the 'path' directory * 1 .. x - will dive in x-level deep under the 'path' directory + * + * If specified the pat is array of string patterns ended with NULL, + * which are checked upon every file/directory found. Only matching + * ones are removed. + * + * The function returns: + * 0 on success + * -1 on removal failure with errno set + * -2 on pattern failure */ -static int rm_rf_depth(const char *path, int depth) +static int rm_rf_depth_pat(const char *path, int depth, const char **pat) { DIR *dir; int ret; @@ -149,6 +176,9 @@ static int rm_rf_depth(const char *path, int depth) if (!strcmp(d->d_name, ".") || !strcmp(d->d_name, "..")) continue; + if (!match_pat(d->d_name, pat)) + return -2; + scnprintf(namebuf, sizeof(namebuf), "%s/%s", path, d->d_name); @@ -160,7 +190,7 @@ static int rm_rf_depth(const char *path, int depth) } if (S_ISDIR(statbuf.st_mode)) - ret = depth ? rm_rf_depth(namebuf, depth - 1) : 0; + ret = depth ? rm_rf_depth_pat(namebuf, depth - 1, pat) : 0; else ret = unlink(namebuf); } @@ -174,7 +204,7 @@ static int rm_rf_depth(const char *path, int depth) int rm_rf(const char *path) { - return rm_rf_depth(path, INT_MAX); + return rm_rf_depth_pat(path, INT_MAX, NULL); } /* A filter which removes dot files */ -- cgit v1.2.3 From c69e4c37b37c816c7dbd307a6e7d2d2a5cfe8788 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 24 Feb 2019 20:06:39 +0100 Subject: perf tools: Add rm_rf_perf_data function To remove perf.data including the directory, with checking on expected files and no other directories inside. Signed-off-by: Jiri Olsa Suggested-by: Andi Kleen Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexey Budankov Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/20190224190656.30163-4-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/util.c | 11 +++++++++++ tools/perf/util/util.h | 1 + 2 files changed, 12 insertions(+) (limited to 'tools/perf/util') diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index 02b7a38f98ce..706818693086 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -202,6 +202,17 @@ static int rm_rf_depth_pat(const char *path, int depth, const char **pat) return rmdir(path); } +int rm_rf_perf_data(const char *path) +{ + const char *pat[] = { + "header", + "data.*", + NULL, + }; + + return rm_rf_depth_pat(path, 0, pat); +} + int rm_rf(const char *path) { return rm_rf_depth_pat(path, INT_MAX, NULL); diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index ece040b799f6..01c538027c6f 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -31,6 +31,7 @@ struct strlist; int mkdir_p(char *path, mode_t mode); int rm_rf(const char *path); +int rm_rf_perf_data(const char *path); struct strlist *lsdir(const char *name, bool (*filter)(const char *, struct dirent *)); bool lsdir_no_dot_filter(const char *name, struct dirent *d); int copyfile(const char *from, const char *to); -- cgit v1.2.3 From 5021fc4e8c7c3be6f8735eff76fb2520485fcec4 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 24 Feb 2019 20:06:42 +0100 Subject: perf data: Make check_backup work over directories Change check_backup() to call rm_rf_perf_data() instead of unlink() to work over directory paths. Also move the call earlier in the code, before we fork for file/dir, so it can backup also directory data. Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexey Budankov Cc: Andi Kleen Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/20190224190656.30163-7-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/data.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c index e16d06ed1100..bbf9a299615e 100644 --- a/tools/perf/util/data.c +++ b/tools/perf/util/data.c @@ -37,12 +37,15 @@ static int check_backup(struct perf_data *data) { struct stat st; + if (perf_data__is_read(data)) + return 0; + if (!stat(data->path, &st) && st.st_size) { /* TODO check errors properly */ char oldname[PATH_MAX]; snprintf(oldname, sizeof(oldname), "%s.old", data->path); - unlink(oldname); + rm_rf_perf_data(oldname); rename(data->path, oldname); } @@ -95,9 +98,6 @@ static int open_file_write(struct perf_data *data) int fd; char sbuf[STRERR_BUFSIZE]; - if (check_backup(data)) - return -1; - fd = open(data->file.path, O_CREAT|O_RDWR|O_TRUNC|O_CLOEXEC, S_IRUSR|S_IWUSR); @@ -141,6 +141,9 @@ int perf_data__open(struct perf_data *data) if (!data->path) data->path = "perf.data"; + if (check_backup(data)) + return -1; + return open_file_dup(data); } -- cgit v1.2.3 From ccb7a71dcea071c7fd68192909c4e54561c88043 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 24 Feb 2019 20:06:43 +0100 Subject: perf data: Fail check_backup in case of error And display the error message from removing the old data file: $ perf record ls Can't remove old data: Permission denied (perf.data.old) Perf session creation failed. $ perf record ls Can't remove old data: Unknown file found (perf.data.old) Perf session creation failed. Not sure how to make fail the rename (after we successfully remove the destination file/dir) to show the message, anyway let's have it there. Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexey Budankov Cc: Andi Kleen Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/20190224190656.30163-8-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/data.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c index bbf9a299615e..d008c3973012 100644 --- a/tools/perf/util/data.c +++ b/tools/perf/util/data.c @@ -41,12 +41,27 @@ static int check_backup(struct perf_data *data) return 0; if (!stat(data->path, &st) && st.st_size) { - /* TODO check errors properly */ char oldname[PATH_MAX]; + int ret; + snprintf(oldname, sizeof(oldname), "%s.old", data->path); - rm_rf_perf_data(oldname); - rename(data->path, oldname); + + ret = rm_rf_perf_data(oldname); + if (ret) { + pr_err("Can't remove old data: %s (%s)\n", + ret == -2 ? + "Unknown file found" : strerror(errno), + oldname); + return -1; + } + + if (rename(data->path, oldname)) { + pr_err("Can't move data: %s (%s to %s)\n", + strerror(errno), + data->path, oldname); + return -1; + } } return 0; -- cgit v1.2.3 From 145520631130bd64820b591775733256473eac62 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 24 Feb 2019 20:06:44 +0100 Subject: perf data: Add perf_data__(create_dir|close_dir) functions Add perf_data__create_dir() to create nr files inside 'struct perf_data' path directory: int perf_data__create_dir(struct perf_data *data, int nr); and function to close that data: void perf_data__close_dir(struct perf_data *data); Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexey Budankov Cc: Andi Kleen Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/20190224190656.30163-9-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/data.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/data.h | 8 ++++++++ 2 files changed, 55 insertions(+) (limited to 'tools/perf/util') diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c index d008c3973012..005b3909d1dc 100644 --- a/tools/perf/util/data.c +++ b/tools/perf/util/data.c @@ -7,11 +7,58 @@ #include #include #include +#include #include "data.h" #include "util.h" #include "debug.h" +static void close_dir(struct perf_data_file *files, int nr) +{ + while (--nr >= 1) { + close(files[nr].fd); + free(files[nr].path); + } + free(files); +} + +void perf_data__close_dir(struct perf_data *data) +{ + close_dir(data->dir.files, data->dir.nr); +} + +int perf_data__create_dir(struct perf_data *data, int nr) +{ + struct perf_data_file *files = NULL; + int i, ret = -1; + + files = zalloc(nr * sizeof(*files)); + if (!files) + return -ENOMEM; + + data->dir.files = files; + data->dir.nr = nr; + + for (i = 0; i < nr; i++) { + struct perf_data_file *file = &files[i]; + + if (asprintf(&file->path, "%s/data.%d", data->path, i) < 0) + goto out_err; + + ret = open(file->path, O_RDWR|O_CREAT|O_TRUNC, S_IRUSR|S_IWUSR); + if (ret < 0) + goto out_err; + + file->fd = ret; + } + + return 0; + +out_err: + close_dir(files, i); + return ret; +} + static bool check_pipe(struct perf_data *data) { struct stat st; diff --git a/tools/perf/util/data.h b/tools/perf/util/data.h index 2bce28117ccf..2d0d015a7d4d 100644 --- a/tools/perf/util/data.h +++ b/tools/perf/util/data.h @@ -21,6 +21,11 @@ struct perf_data { bool is_pipe; bool force; enum perf_data_mode mode; + + struct { + struct perf_data_file *files; + int nr; + } dir; }; static inline bool perf_data__is_read(struct perf_data *data) @@ -64,4 +69,7 @@ ssize_t perf_data_file__write(struct perf_data_file *file, int perf_data__switch(struct perf_data *data, const char *postfix, size_t pos, bool at_exit); + +int perf_data__create_dir(struct perf_data *data, int nr); +void perf_data__close_dir(struct perf_data *data); #endif /* __PERF_DATA_H */ -- cgit v1.2.3 From eb6176709b235b96511c7b4745c30412568395c7 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 24 Feb 2019 20:06:45 +0100 Subject: perf data: Add perf_data__open_dir_data function Add perf_data__open_dir_data to open files inside 'struct perf_data' path directory: static int perf_data__open_dir(struct perf_data *data); Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexey Budankov Cc: Andi Kleen Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/20190224190656.30163-10-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/data.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/data.h | 1 + 2 files changed, 60 insertions(+) (limited to 'tools/perf/util') diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c index 005b3909d1dc..7bd5ddeb7a41 100644 --- a/tools/perf/util/data.c +++ b/tools/perf/util/data.c @@ -8,6 +8,8 @@ #include #include #include +#include +#include #include "data.h" #include "util.h" @@ -59,6 +61,63 @@ out_err: return ret; } +int perf_data__open_dir(struct perf_data *data) +{ + struct perf_data_file *files = NULL; + struct dirent *dent; + int ret = -1; + DIR *dir; + int nr = 0; + + dir = opendir(data->path); + if (!dir) + return -EINVAL; + + while ((dent = readdir(dir)) != NULL) { + struct perf_data_file *file; + char path[PATH_MAX]; + struct stat st; + + snprintf(path, sizeof(path), "%s/%s", data->path, dent->d_name); + if (stat(path, &st)) + continue; + + if (!S_ISREG(st.st_mode) || strncmp(dent->d_name, "data", 4)) + continue; + + ret = -ENOMEM; + + file = realloc(files, (nr + 1) * sizeof(*files)); + if (!file) + goto out_err; + + files = file; + file = &files[nr++]; + + file->path = strdup(path); + if (!file->path) + goto out_err; + + ret = open(file->path, O_RDONLY); + if (ret < 0) + goto out_err; + + file->fd = ret; + file->size = st.st_size; + } + + if (!files) + return -EINVAL; + + data->dir.files = files; + data->dir.nr = nr; + return 0; + +out_err: + close_dir(files, nr); + return ret; +} + static bool check_pipe(struct perf_data *data) { struct stat st; diff --git a/tools/perf/util/data.h b/tools/perf/util/data.h index 2d0d015a7d4d..14b47be2bd69 100644 --- a/tools/perf/util/data.h +++ b/tools/perf/util/data.h @@ -71,5 +71,6 @@ int perf_data__switch(struct perf_data *data, size_t pos, bool at_exit); int perf_data__create_dir(struct perf_data *data, int nr); +int perf_data__open_dir(struct perf_data *data); void perf_data__close_dir(struct perf_data *data); #endif /* __PERF_DATA_H */ -- cgit v1.2.3 From 94816add0005595ea33fc8456519be582330401e Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sun, 24 Feb 2019 07:37:19 -0800 Subject: perf tools: Add perf_exe() helper to find perf binary Also convert one existing user. Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Cc: Namhyung Kim Cc: Stephane Eranian Link: http://lkml.kernel.org/r/20190224153722.27020-9-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/header.c | 12 +++--------- tools/perf/util/util.c | 10 ++++++++++ tools/perf/util/util.h | 2 ++ 3 files changed, 15 insertions(+), 9 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index a2323d777dae..01b324c275b9 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -527,17 +527,11 @@ static int write_event_desc(struct feat_fd *ff, static int write_cmdline(struct feat_fd *ff, struct perf_evlist *evlist __maybe_unused) { - char buf[MAXPATHLEN]; - u32 n; - int i, ret; + char pbuf[MAXPATHLEN], *buf; + int i, ret, n; /* actual path to perf binary */ - ret = readlink("/proc/self/exe", buf, sizeof(buf) - 1); - if (ret <= 0) - return -1; - - /* readlink() does not add null termination */ - buf[ret] = '\0'; + buf = perf_exe(pbuf, MAXPATHLEN); /* account for binary path */ n = perf_env.nr_cmdline + 1; diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index 706818693086..d388f80d8703 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -568,3 +568,13 @@ out: return tip; } + +char *perf_exe(char *buf, int len) +{ + int n = readlink("/proc/self/exe", buf, len); + if (n > 0) { + buf[n] = 0; + return buf; + } + return strcpy(buf, "perf"); +} diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 01c538027c6f..09c1b0f91f65 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -77,6 +77,8 @@ extern bool perf_singlethreaded; void perf_set_singlethreaded(void); void perf_set_multithreaded(void); +char *perf_exe(char *buf, int len); + #ifndef O_CLOEXEC #ifdef __sparc__ #define O_CLOEXEC 0x400000 -- cgit v1.2.3