From 4cabc3d1cb6a46f581a2628d1d11c483d5f300e5 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 21 Aug 2013 16:47:26 -0700 Subject: tools/perf/stat: Add perf stat --transaction Add support to perf stat to print the basic transactional execution statistics: Total cycles, Cycles in Transaction, Cycles in aborted transsactions using the in_tx and in_tx_checkpoint qualifiers. Transaction Starts and Elision Starts, to compute the average transaction length. This is a reasonable overview over the success of the transactions. Also support architectures that have a transaction aborted cycles counter like POWER8. Since that is awkward to handle in the kernel abstract handle both cases here. Enable with a new --transaction / -T option. This requires measuring these events in a group, since they depend on each other. This is implemented by using TM sysfs events exported by the kernel Signed-off-by: Andi Kleen Acked-by: Arnaldo Carvalho de Melo Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1377128846-977-5-git-send-email-andi@firstfloor.org Signed-off-by: Ingo Molnar --- tools/perf/Documentation/perf-stat.txt | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'tools/perf/Documentation') diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 73c9759005a3..80c7da6732f2 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -137,6 +137,11 @@ core number and the number of online logical processors on that physical process After starting the program, wait msecs before measuring. This is useful to filter out the startup phase of the program, which is often very different. +-T:: +--transaction:: + +Print statistics of transactional execution if supported. + EXAMPLES -------- -- cgit v1.2.3 From f5d05bcec409aec2c41727077ad818f7c4db005b Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 20 Sep 2013 07:40:41 -0700 Subject: tools/perf: Support sorting by in_tx or abort branch flags Extend the perf branch sorting code to support sorting by in_tx or abort_tx qualifiers. Also print out those qualifiers. This also fixes up some of the existing sort key documentation. We do not support no_tx here, because it's simply not showing the in_tx flag. Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1379688044-14173-4-git-send-email-andi@firstfloor.org Signed-off-by: Ingo Molnar --- tools/perf/Documentation/perf-report.txt | 4 ++- tools/perf/Documentation/perf-top.txt | 3 +- tools/perf/builtin-report.c | 2 +- tools/perf/builtin-top.c | 3 +- tools/perf/perf.h | 4 ++- tools/perf/util/hist.h | 2 ++ tools/perf/util/sort.c | 51 ++++++++++++++++++++++++++++++++ tools/perf/util/sort.h | 2 ++ 8 files changed, 66 insertions(+), 5 deletions(-) (limited to 'tools/perf/Documentation') diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 2b8097ee39d8..ae337e38cff5 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -71,7 +71,7 @@ OPTIONS entries are displayed as "[other]". - cpu: cpu number the task ran at the time of sample - srcline: filename and line number executed at the time of sample. The - DWARF debuggin info must be provided. + DWARF debugging info must be provided. By default, comm, dso and symbol keys are used. (i.e. --sort comm,dso,symbol) @@ -85,6 +85,8 @@ OPTIONS - symbol_from: name of function branched from - symbol_to: name of function branched to - mispredict: "N" for predicted branch, "Y" for mispredicted branch + - in_tx: branch in TSX transaction + - abort: TSX transaction abort. And default sort keys are changed to comm, dso_from, symbol_from, dso_to and symbol_to, see '--branch-stack'. diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index 58d6598a9686..f852eb5d988d 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt @@ -112,7 +112,8 @@ Default is to monitor all CPUS. -s:: --sort:: - Sort by key(s): pid, comm, dso, symbol, parent, srcline, weight, local_weight. + Sort by key(s): pid, comm, dso, symbol, parent, srcline, weight, + local_weight, abort, in_tx -n:: --show-nr-samples:: diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 72eae7498c09..89b188d64ea9 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -787,7 +787,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) "sort by key(s): pid, comm, dso, symbol, parent, cpu, srcline," " dso_to, dso_from, symbol_to, symbol_from, mispredict," " weight, local_weight, mem, symbol_daddr, dso_daddr, tlb, " - "snoop, locked"), + "snoop, locked, abort, in_tx"), OPT_BOOLEAN(0, "showcpuutilization", &symbol_conf.show_cpu_utilization, "Show sample percentage for different cpu modes"), OPT_STRING('p', "parent", &parent_pattern, "regex", diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 212214162bb2..6534a37d66fe 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1103,7 +1103,8 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) OPT_INCR('v', "verbose", &verbose, "be more verbose (show counter open errors, etc)"), OPT_STRING('s', "sort", &sort_order, "key[,key2...]", - "sort by key(s): pid, comm, dso, symbol, parent, weight, local_weight"), + "sort by key(s): pid, comm, dso, symbol, parent, weight, local_weight," + " abort, in_tx"), OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples, "Show a column with the number of samples"), OPT_CALLBACK_DEFAULT('G', "call-graph", &top.record_opts, diff --git a/tools/perf/perf.h b/tools/perf/perf.h index cf20187eee0a..acf3d665474d 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -182,7 +182,9 @@ struct ip_callchain { struct branch_flags { u64 mispred:1; u64 predicted:1; - u64 reserved:62; + u64 in_tx:1; + u64 abort:1; + u64 reserved:60; }; struct branch_entry { diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 1329b6b6ffe6..f743e96005f2 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -45,6 +45,8 @@ enum hist_column { HISTC_CPU, HISTC_SRCLINE, HISTC_MISPREDICT, + HISTC_IN_TX, + HISTC_ABORT, HISTC_SYMBOL_FROM, HISTC_SYMBOL_TO, HISTC_DSO_FROM, diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 5f118a089519..1771566858cc 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -858,6 +858,55 @@ struct sort_entry sort_mem_snoop = { .se_width_idx = HISTC_MEM_SNOOP, }; +static int64_t +sort__abort_cmp(struct hist_entry *left, struct hist_entry *right) +{ + return left->branch_info->flags.abort != + right->branch_info->flags.abort; +} + +static int hist_entry__abort_snprintf(struct hist_entry *self, char *bf, + size_t size, unsigned int width) +{ + static const char *out = "."; + + if (self->branch_info->flags.abort) + out = "A"; + return repsep_snprintf(bf, size, "%-*s", width, out); +} + +struct sort_entry sort_abort = { + .se_header = "Transaction abort", + .se_cmp = sort__abort_cmp, + .se_snprintf = hist_entry__abort_snprintf, + .se_width_idx = HISTC_ABORT, +}; + +static int64_t +sort__in_tx_cmp(struct hist_entry *left, struct hist_entry *right) +{ + return left->branch_info->flags.in_tx != + right->branch_info->flags.in_tx; +} + +static int hist_entry__in_tx_snprintf(struct hist_entry *self, char *bf, + size_t size, unsigned int width) +{ + static const char *out = "."; + + if (self->branch_info->flags.in_tx) + out = "T"; + + return repsep_snprintf(bf, size, "%-*s", width, out); +} + +struct sort_entry sort_in_tx = { + .se_header = "Branch in transaction", + .se_cmp = sort__in_tx_cmp, + .se_snprintf = hist_entry__in_tx_snprintf, + .se_width_idx = HISTC_IN_TX, +}; + struct sort_dimension { const char *name; struct sort_entry *entry; @@ -888,6 +937,8 @@ static struct sort_dimension bstack_sort_dimensions[] = { DIM(SORT_SYM_FROM, "symbol_from", sort_sym_from), DIM(SORT_SYM_TO, "symbol_to", sort_sym_to), DIM(SORT_MISPREDICT, "mispredict", sort_mispredict), + DIM(SORT_IN_TX, "in_tx", sort_in_tx), + DIM(SORT_ABORT, "abort", sort_abort), }; #undef DIM diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 4e80dbd271e7..9dad3a0440dc 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -153,6 +153,8 @@ enum sort_type { SORT_SYM_FROM, SORT_SYM_TO, SORT_MISPREDICT, + SORT_ABORT, + SORT_IN_TX, /* memory mode specific sort keys */ __SORT_MEMORY_MODE, -- cgit v1.2.3 From 0126d493b62e1306db09e1019c05e0bfe84ae8e7 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 20 Sep 2013 07:40:42 -0700 Subject: tools/perf/record: Add abort_tx,no_tx,in_tx branch filter options to perf record -j Make perf record -j aware of the new in_tx,no_tx,abort_tx branch qualifiers. Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1379688044-14173-5-git-send-email-andi@firstfloor.org Signed-off-by: Ingo Molnar --- tools/perf/Documentation/perf-record.txt | 3 +++ tools/perf/builtin-record.c | 3 +++ 2 files changed, 6 insertions(+) (limited to 'tools/perf/Documentation') diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index e297b74471b8..6bec1c944849 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -166,6 +166,9 @@ following filters are defined: - u: only when the branch target is at the user level - k: only when the branch target is in the kernel - hv: only when the target is at the hypervisor level + - in_tx: only when the target is in a hardware transaction + - no_tx: only when the target is not in a hardware transaction + - abort_tx: only when the target is a hardware transaction abort + The option requires at least one branch type among any, any_call, any_ret, ind_call. diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index a41ac41546c9..8384b54e1eba 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -618,6 +618,9 @@ static const struct branch_mode branch_modes[] = { BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL), BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN), BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL), + BRANCH_OPT("abort_tx", PERF_SAMPLE_BRANCH_ABORT_TX), + BRANCH_OPT("in_tx", PERF_SAMPLE_BRANCH_IN_TX), + BRANCH_OPT("no_tx", PERF_SAMPLE_BRANCH_NO_TX), BRANCH_END }; -- cgit v1.2.3 From 475eeab9f3c1579c8da89667496084db4867bf7c Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 20 Sep 2013 07:40:43 -0700 Subject: tools/perf: Add support for record transaction flags Add support for recording and displaying the transaction flags. They are essentially a new sort key. Also display them in a nice way to the user. Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1379688044-14173-6-git-send-email-andi@firstfloor.org Signed-off-by: Ingo Molnar --- tools/perf/Documentation/perf-record.txt | 4 +- tools/perf/Documentation/perf-report.txt | 4 ++ tools/perf/Documentation/perf-top.txt | 2 +- tools/perf/builtin-annotate.c | 2 +- tools/perf/builtin-diff.c | 8 ++-- tools/perf/builtin-record.c | 2 + tools/perf/builtin-report.c | 4 +- tools/perf/builtin-top.c | 5 +-- tools/perf/perf.h | 1 + tools/perf/tests/hists_link.c | 6 ++- tools/perf/util/event.h | 1 + tools/perf/util/evsel.c | 9 ++++ tools/perf/util/hist.c | 7 ++- tools/perf/util/hist.h | 4 +- tools/perf/util/session.c | 3 ++ tools/perf/util/sort.c | 73 ++++++++++++++++++++++++++++++++ tools/perf/util/sort.h | 2 + 17 files changed, 122 insertions(+), 15 deletions(-) (limited to 'tools/perf/Documentation') diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 6bec1c944849..f732eaa6a500 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -179,12 +179,14 @@ is enabled for all the sampling events. The sampled branch type is the same for The various filters must be specified as a comma separated list: --branch-filter any_ret,u,k Note that this feature may not be available on all processors. --W:: --weight:: Enable weightened sampling. An additional weight is recorded per sample and can be displayed with the weight and local_weight sort keys. This currently works for TSX abort events and some memory events in precise mode on modern Intel CPUs. +--transaction:: +Record transaction flags for transaction related events. + SEE ALSO -------- linkperf:perf-stat[1], linkperf:perf-list[1] diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index ae337e38cff5..be5ad87b6c3d 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -72,6 +72,10 @@ OPTIONS - cpu: cpu number the task ran at the time of sample - srcline: filename and line number executed at the time of sample. The DWARF debugging info must be provided. + - weight: Event specific weight, e.g. memory latency or transaction + abort cost. This is the global weight. + - local_weight: Local weight version of the weight above. + - transaction: Transaction abort flags. By default, comm, dso and symbol keys are used. (i.e. --sort comm,dso,symbol) diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index f852eb5d988d..6d70fbfe28a2 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt @@ -113,7 +113,7 @@ Default is to monitor all CPUS. -s:: --sort:: Sort by key(s): pid, comm, dso, symbol, parent, srcline, weight, - local_weight, abort, in_tx + local_weight, abort, in_tx, transaction -n:: --show-nr-samples:: diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 5ebd0c3b71b6..0393d9800516 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -63,7 +63,7 @@ static int perf_evsel__add_sample(struct perf_evsel *evsel, return 0; } - he = __hists__add_entry(&evsel->hists, al, NULL, 1, 1); + he = __hists__add_entry(&evsel->hists, al, NULL, 1, 1, 0); if (he == NULL) return -ENOMEM; diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index f28799e94f2a..2a78dc806c39 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -304,9 +304,10 @@ static int formula_fprintf(struct hist_entry *he, struct hist_entry *pair, static int hists__add_entry(struct hists *self, struct addr_location *al, u64 period, - u64 weight) + u64 weight, u64 transaction) { - if (__hists__add_entry(self, al, NULL, period, weight) != NULL) + if (__hists__add_entry(self, al, NULL, period, weight, transaction) + != NULL) return 0; return -ENOMEM; } @@ -328,7 +329,8 @@ static int diff__process_sample_event(struct perf_tool *tool __maybe_unused, if (al.filtered) return 0; - if (hists__add_entry(&evsel->hists, &al, sample->period, sample->weight)) { + if (hists__add_entry(&evsel->hists, &al, sample->period, + sample->weight, sample->transaction)) { pr_warning("problem incrementing symbol period, skipping event\n"); return -1; } diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 8384b54e1eba..a78db3f31b25 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -894,6 +894,8 @@ const struct option record_options[] = { parse_branch_stack), OPT_BOOLEAN('W', "weight", &record.opts.sample_weight, "sample by weight (on special events only)"), + OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction, + "sample transaction flags (special events only)"), OPT_END() }; diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 89b188d64ea9..06e1abe351dd 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -259,7 +259,7 @@ static int perf_evsel__add_hist_entry(struct perf_evsel *evsel, } he = __hists__add_entry(&evsel->hists, al, parent, sample->period, - sample->weight); + sample->weight, sample->transaction); if (he == NULL) return -ENOMEM; @@ -787,7 +787,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) "sort by key(s): pid, comm, dso, symbol, parent, cpu, srcline," " dso_to, dso_from, symbol_to, symbol_from, mispredict," " weight, local_weight, mem, symbol_daddr, dso_daddr, tlb, " - "snoop, locked, abort, in_tx"), + "snoop, locked, abort, in_tx, transaction"), OPT_BOOLEAN(0, "showcpuutilization", &symbol_conf.show_cpu_utilization, "Show sample percentage for different cpu modes"), OPT_STRING('p', "parent", &parent_pattern, "regex", diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 6534a37d66fe..b3e0229ee38f 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -247,9 +247,8 @@ static struct hist_entry *perf_evsel__add_hist_entry(struct perf_evsel *evsel, pthread_mutex_lock(&evsel->hists.lock); he = __hists__add_entry(&evsel->hists, al, NULL, sample->period, - sample->weight); + sample->weight, sample->transaction); pthread_mutex_unlock(&evsel->hists.lock); - if (he == NULL) return NULL; @@ -1104,7 +1103,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) "be more verbose (show counter open errors, etc)"), OPT_STRING('s', "sort", &sort_order, "key[,key2...]", "sort by key(s): pid, comm, dso, symbol, parent, weight, local_weight," - " abort, in_tx"), + " abort, in_tx, transaction"), OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples, "Show a column with the number of samples"), OPT_CALLBACK_DEFAULT('G', "call-graph", &top.record_opts, diff --git a/tools/perf/perf.h b/tools/perf/perf.h index acf3d665474d..84502e88488b 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -233,6 +233,7 @@ struct perf_record_opts { u64 default_interval; u64 user_interval; u16 stack_dump_size; + bool sample_transaction; }; #endif diff --git a/tools/perf/tests/hists_link.c b/tools/perf/tests/hists_link.c index 4228ffc0d968..025503a22ff7 100644 --- a/tools/perf/tests/hists_link.c +++ b/tools/perf/tests/hists_link.c @@ -222,7 +222,8 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine) &sample) < 0) goto out; - he = __hists__add_entry(&evsel->hists, &al, NULL, 1, 1); + he = __hists__add_entry(&evsel->hists, &al, NULL, + 1, 1, 0); if (he == NULL) goto out; @@ -244,7 +245,8 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine) &sample) < 0) goto out; - he = __hists__add_entry(&evsel->hists, &al, NULL, 1, 1); + he = __hists__add_entry(&evsel->hists, &al, NULL, 1, 1, + 0); if (he == NULL) goto out; diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index c67ecc457d29..17d9e167a7b9 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -111,6 +111,7 @@ struct perf_sample { u64 stream_id; u64 period; u64 weight; + u64 transaction; u32 cpu; u32 raw_size; u64 data_src; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 0ce9febf1ba0..abe69af58b62 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -681,6 +681,9 @@ void perf_evsel__config(struct perf_evsel *evsel, attr->mmap2 = track && !perf_missing_features.mmap2; attr->comm = track; + if (opts->sample_transaction) + attr->sample_type |= PERF_SAMPLE_TRANSACTION; + /* * XXX see the function comment above * @@ -1470,6 +1473,12 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, array++; } + data->transaction = 0; + if (type & PERF_SAMPLE_TRANSACTION) { + data->transaction = *array; + array++; + } + return 0; } diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 97dc2808885d..f3278a388e9a 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -160,6 +160,10 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h) hists__new_col_len(hists, HISTC_MEM_LVL, 21 + 3); hists__new_col_len(hists, HISTC_LOCAL_WEIGHT, 12); hists__new_col_len(hists, HISTC_GLOBAL_WEIGHT, 12); + + if (h->transaction) + hists__new_col_len(hists, HISTC_TRANSACTION, + hist_entry__transaction_len()); } void hists__output_recalc_col_len(struct hists *hists, int max_rows) @@ -466,7 +470,7 @@ struct hist_entry *__hists__add_branch_entry(struct hists *self, struct hist_entry *__hists__add_entry(struct hists *self, struct addr_location *al, struct symbol *sym_parent, u64 period, - u64 weight) + u64 weight, u64 transaction) { struct hist_entry entry = { .thread = al->thread, @@ -487,6 +491,7 @@ struct hist_entry *__hists__add_entry(struct hists *self, .hists = self, .branch_info = NULL, .mem_info = NULL, + .transaction = transaction, }; return add_hist_entry(self, &entry, al, period, weight); diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index f743e96005f2..6a048c09cd64 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -59,6 +59,7 @@ enum hist_column { HISTC_MEM_TLB, HISTC_MEM_LVL, HISTC_MEM_SNOOP, + HISTC_TRANSACTION, HISTC_NR_COLS, /* Last entry */ }; @@ -84,9 +85,10 @@ struct hists { struct hist_entry *__hists__add_entry(struct hists *self, struct addr_location *al, struct symbol *parent, u64 period, - u64 weight); + u64 weight, u64 transaction); int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right); int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry *right); +int hist_entry__transaction_len(void); int hist_entry__sort_snprintf(struct hist_entry *self, char *bf, size_t size, struct hists *hists); void hist_entry__free(struct hist_entry *); diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 70ffa41518f3..211b325791ad 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -858,6 +858,9 @@ static void dump_sample(struct perf_evsel *evsel, union perf_event *event, if (sample_type & PERF_SAMPLE_DATA_SRC) printf(" . data_src: 0x%"PRIx64"\n", sample->data_src); + if (sample_type & PERF_SAMPLE_TRANSACTION) + printf("... transaction: %" PRIx64 "\n", sample->transaction); + if (sample_type & PERF_SAMPLE_READ) sample_read__printf(sample, evsel->attr.read_format); } diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 1771566858cc..b4ecc0e4c908 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -907,6 +907,78 @@ struct sort_entry sort_in_tx = { .se_width_idx = HISTC_IN_TX, }; +static int64_t +sort__transaction_cmp(struct hist_entry *left, struct hist_entry *right) +{ + return left->transaction - right->transaction; +} + +static inline char *add_str(char *p, const char *str) +{ + strcpy(p, str); + return p + strlen(str); +} + +static struct txbit { + unsigned flag; + const char *name; + int skip_for_len; +} txbits[] = { + { PERF_TXN_ELISION, "EL ", 0 }, + { PERF_TXN_TRANSACTION, "TX ", 1 }, + { PERF_TXN_SYNC, "SYNC ", 1 }, + { PERF_TXN_ASYNC, "ASYNC ", 0 }, + { PERF_TXN_RETRY, "RETRY ", 0 }, + { PERF_TXN_CONFLICT, "CON ", 0 }, + { PERF_TXN_CAPACITY_WRITE, "CAP-WRITE ", 1 }, + { PERF_TXN_CAPACITY_READ, "CAP-READ ", 0 }, + { 0, NULL, 0 } +}; + +int hist_entry__transaction_len(void) +{ + int i; + int len = 0; + + for (i = 0; txbits[i].name; i++) { + if (!txbits[i].skip_for_len) + len += strlen(txbits[i].name); + } + len += 4; /* :XX */ + return len; +} + +static int hist_entry__transaction_snprintf(struct hist_entry *self, char *bf, + size_t size, unsigned int width) +{ + u64 t = self->transaction; + char buf[128]; + char *p = buf; + int i; + + buf[0] = 0; + for (i = 0; txbits[i].name; i++) + if (txbits[i].flag & t) + p = add_str(p, txbits[i].name); + if (t && !(t & (PERF_TXN_SYNC|PERF_TXN_ASYNC))) + p = add_str(p, "NEITHER "); + if (t & PERF_TXN_ABORT_MASK) { + sprintf(p, ":%" PRIx64, + (t & PERF_TXN_ABORT_MASK) >> + PERF_TXN_ABORT_SHIFT); + p += strlen(p); + } + + return repsep_snprintf(bf, size, "%-*s", width, buf); +} + +struct sort_entry sort_transaction = { + .se_header = "Transaction ", + .se_cmp = sort__transaction_cmp, + .se_snprintf = hist_entry__transaction_snprintf, + .se_width_idx = HISTC_TRANSACTION, +}; + struct sort_dimension { const char *name; struct sort_entry *entry; @@ -925,6 +997,7 @@ static struct sort_dimension common_sort_dimensions[] = { DIM(SORT_SRCLINE, "srcline", sort_srcline), DIM(SORT_LOCAL_WEIGHT, "local_weight", sort_local_weight), DIM(SORT_GLOBAL_WEIGHT, "weight", sort_global_weight), + DIM(SORT_TRANSACTION, "transaction", sort_transaction), }; #undef DIM diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 9dad3a0440dc..bf4333694d3a 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -85,6 +85,7 @@ struct hist_entry { struct map_symbol ms; struct thread *thread; u64 ip; + u64 transaction; s32 cpu; struct hist_entry_diff diff; @@ -145,6 +146,7 @@ enum sort_type { SORT_SRCLINE, SORT_LOCAL_WEIGHT, SORT_GLOBAL_WEIGHT, + SORT_TRANSACTION, /* branch stack specific sort keys */ __SORT_BRANCH_STACK, -- cgit v1.2.3 From 31f6be65e08d2cbb876b3b5fc9ae9281b7417c85 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 2 Oct 2013 06:51:27 +0200 Subject: tools/perf: Fix double/triple-build of the feature detection logic during 'make install' et al Linus reported the following perf build system bug: 'Another annoyance during that make was that "make install" seems to want to re-make the thing I just built. That's absolutely horrible, [...]' The thing that got re-built were 'only' the (numerous) feature checks, not the whole project - but still it was mighty annoying as the feature checks took 9+ seconds even on reasonably fast boxes. Even with the autodep patches where feature detection is much faster it wastes resources, wastes screen real estate and confuses users if we execute feature detection twice. There were two sources for these unnecessary re-builds of the feature checks: - Unnecessary nested invocations of $(MAKE), apparently to be able to do conditional compilation dependent on documentation tools presence. Use straight dependencies instead, with no nesting. - A direct invocation of $(MAKE) to rebuild the PERF-VERSION-FILE. This is apparently done to be able to include it into the Makefile: -include $(OUTPUT)PERF-VERSION-FILE but that's entirely pointless for two reasons: 1) the version file gets regenerated by the initial build pass anyway, 2) including it is futile, given its contents: #define PERF_VERSION "3.12.rc3.g8510c7" 'make' will interpret that as a comment line... So just remove this part of the doc-generation logic. With these things fixed a 'make install' now rebuilds only what is needed. A repeated 'make install' on an already built tree is super fast now, it finishes in under 0.3 seconds: # # After the patch: # $ time make install ... real 0m0.280s user 0m0.162s sys 0m0.054s Prior all the autodep changes and prior this fix, a repeat 'make install' took 24.1 seconds (!) on the same system: # # Before the patches: # $ time make install ... real 0m24.109s user 0m21.171s sys 0m2.449s Which almost entirely was caused by fixable build system fat. We are now literally ~86 times faster. A fresh rebuild and install now takes just 11.4 seconds: # # After the patch: # $ make clean $ time make -j16 install ... real 0m11.457s user 1m43.411s sys 0m7.610s Without the patches it took 27.8 seconds: # # Before the patches: # $ make clean $ time make -j16 install ... real 0m27.801s user 1m59.242s sys 0m9.749s So even in the complete rebuild case we are now ~2.5 times faster. Reported-by: Linus Torvalds Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Namhyung Kim Cc: David Ahern Cc: Jiri Olsa Link: http://lkml.kernel.org/n/tip-x4qjnxjGrgxpribq8sdakfTp@git.kernel.org Signed-off-by: Ingo Molnar --- tools/perf/Documentation/Makefile | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) (limited to 'tools/perf/Documentation') diff --git a/tools/perf/Documentation/Makefile b/tools/perf/Documentation/Makefile index 5a37a7c84e69..eaa477f00673 100644 --- a/tools/perf/Documentation/Makefile +++ b/tools/perf/Documentation/Makefile @@ -192,13 +192,14 @@ do-install-man: man install-man: check-man-tools man -try-install-man: ifdef missing_tools - $(warning Please install $(missing_tools) to have the man pages installed) + DO_INSTALL_MAN = $(warning Please install $(missing_tools) to have the man pages installed) else - $(MAKE) do-install-man + DO_INSTALL_MAN = do-install-man endif +try-install-man: $(DO_INSTALL_MAN) + install-info: info $(INSTALL) -d -m 755 $(DESTDIR)$(infodir) $(INSTALL) -m 644 $(OUTPUT)perf.info $(OUTPUT)perfman.info $(DESTDIR)$(infodir) @@ -216,14 +217,6 @@ install-pdf: pdf #install-html: html # '$(SHELL_PATH_SQ)' ./install-webdoc.sh $(DESTDIR)$(htmldir) -ifneq ($(MAKECMDGOALS),clean) -ifneq ($(MAKECMDGOALS),tags) -$(OUTPUT)PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE - $(QUIET_SUBDIR0)../ $(QUIET_SUBDIR1) $(OUTPUT)PERF-VERSION-FILE - --include $(OUTPUT)PERF-VERSION-FILE -endif -endif # # Determine "include::" file references in asciidoc files. @@ -342,5 +335,3 @@ $(patsubst %.txt,%.html,$(wildcard howto/*.txt)): %.html : %.txt #quick-install-html: # '$(SHELL_PATH_SQ)' ./install-doc-quick.sh $(HTML_REF) $(DESTDIR)$(htmldir) - -.PHONY: .FORCE-PERF-VERSION-FILE -- cgit v1.2.3 From 4bb09192d38ef08f0619667527cabb26354fff89 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 4 Sep 2013 12:37:43 -0600 Subject: perf trace: Add option to show full timestamp Current timestamp shown for output is time relative to firt sample. This patch adds an option to show the absolute perf_clock timestamp which is useful when comparing output across commands (e.g., perf-trace to perf-script). Signed-off-by: David Ahern Cc: Adrian Hunter Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1378319865-55695-1-git-send-email-dsahern@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-trace.txt | 4 ++++ tools/perf/builtin-trace.c | 7 +++++-- 2 files changed, 9 insertions(+), 2 deletions(-) (limited to 'tools/perf/Documentation') diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt index daccd2c0a48f..a93e91aad583 100644 --- a/tools/perf/Documentation/perf-trace.txt +++ b/tools/perf/Documentation/perf-trace.txt @@ -78,6 +78,10 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs. --input Process events from a given perf data file. +-T +--time + Print full timestamp rather time relative to first sample. + SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-script[1] diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 7ce036ee863b..bc21140ac782 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -624,6 +624,7 @@ struct trace { struct perf_record_opts opts; struct machine host; u64 base_time; + bool full_time; FILE *output; unsigned long nr_events; struct strlist *ev_qualifier; @@ -1066,7 +1067,7 @@ static int trace__process_sample(struct perf_tool *tool, if (skip_sample(trace, sample)) return 0; - if (trace->base_time == 0) + if (!trace->full_time && trace->base_time == 0) trace->base_time = sample->time; if (handler) @@ -1195,7 +1196,7 @@ again: continue; } - if (trace->base_time == 0) + if (!trace->full_time && trace->base_time == 0) trace->base_time = sample.time; if (type != PERF_RECORD_SAMPLE) { @@ -1433,6 +1434,8 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) trace__set_duration), OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"), OPT_INCR('v', "verbose", &verbose, "be more verbose"), + OPT_BOOLEAN('T', "time", &trace.full_time, + "Show full timestamp, not time relative to first start"), OPT_END() }; int err; -- cgit v1.2.3 From 50c95cbd70808aa2e5ba8d79e503456f1da37aeb Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 12 Sep 2013 12:35:21 -0300 Subject: perf trace: Add option to show process COMM Enabled by default, disable with --no-comm, e.g.: 181.821 (0.001 ms): deja-dup-monit/10784 recvmsg(fd: 8, msg: 0x7fff4342baf0, flags: PEEK|TRUNC|CMSG_CLOEXEC ) = 20 181.824 (0.001 ms): deja-dup-monit/10784 geteuid( ) = 1000 181.825 (0.001 ms): deja-dup-monit/10784 getegid( ) = 1000 181.834 (0.002 ms): deja-dup-monit/10784 recvmsg(fd: 8, msg: 0x7fff4342baf0, flags: CMSG_CLOEXEC ) = 20 181.836 (0.001 ms): deja-dup-monit/10784 geteuid( ) = 1000 181.838 (0.001 ms): deja-dup-monit/10784 getegid( ) = 1000 181.705 (0.003 ms): evolution-addr/10924 recvmsg(fd: 10, msg: 0x7fff17dc6990, flags: PEEK|TRUNC|CMSG_CLOEXEC) = 1256 181.710 (0.002 ms): evolution-addr/10924 geteuid( ) = 1000 181.712 (0.001 ms): evolution-addr/10924 getegid( ) = 1000 181.727 (0.003 ms): evolution-addr/10924 recvmsg(fd: 10, msg: 0x7fff17dc6990, flags: CMSG_CLOEXEC ) = 1256 181.731 (0.001 ms): evolution-addr/10924 geteuid( ) = 1000 181.734 (0.001 ms): evolution-addr/10924 getegid( ) = 1000 181.908 (0.002 ms): evolution-addr/10924 recvmsg(fd: 10, msg: 0x7fff17dc6990, flags: PEEK|TRUNC|CMSG_CLOEXEC) = 20 181.913 (0.001 ms): evolution-addr/10924 geteuid( ) = 1000 181.915 (0.001 ms): evolution-addr/10924 getegid( ) = 1000 181.930 (0.003 ms): evolution-addr/10924 recvmsg(fd: 10, msg: 0x7fff17dc6990, flags: CMSG_CLOEXEC ) = 20 181.934 (0.001 ms): evolution-addr/10924 geteuid( ) = 1000 181.937 (0.001 ms): evolution-addr/10924 getegid( ) = 1000 220.718 (0.010 ms): at-spi2-regist/10715 sendmsg(fd: 3, msg: 0x7fffdb8756c0, flags: NOSIGNAL ) = 200 220.741 (0.000 ms): dbus-daemon/10711 ... [continued]: epoll_wait()) = 1 220.759 (0.004 ms): dbus-daemon/10711 recvmsg(fd: 11, msg: 0x7ffff94594d0, flags: CMSG_CLOEXEC ) = 200 220.780 (0.002 ms): dbus-daemon/10711 recvmsg(fd: 11, msg: 0x7ffff94594d0, flags: CMSG_CLOEXEC ) = 200 220.788 (0.001 ms): dbus-daemon/10711 recvmsg(fd: 11, msg: 0x7ffff94594d0, flags: CMSG_CLOEXEC ) = -1 EAGAIN Resource temporarily unavailable 220.760 (0.004 ms): at-spi2-regist/10715 sendmsg(fd: 3, msg: 0x7fffdb8756c0, flags: NOSIGNAL ) = 200 220.771 (0.023 ms): perf/26347 open(filename: 0xf2e780, mode: 15918976 ) = 19 220.850 (0.002 ms): perf/26347 close(fd: 19 ) = 0 Cc: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-6be5jvnkdzjptdrebfn5263n@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-trace.txt | 3 +++ tools/perf/builtin-trace.c | 9 ++++++++- 2 files changed, 11 insertions(+), 1 deletion(-) (limited to 'tools/perf/Documentation') diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt index a93e91aad583..37773854d5c4 100644 --- a/tools/perf/Documentation/perf-trace.txt +++ b/tools/perf/Documentation/perf-trace.txt @@ -82,6 +82,9 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs. --time Print full timestamp rather time relative to first sample. +--comm:: + Show process COMM right beside its ID, on by default, disable with --no-comm. + SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-script[1] diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 8a09ba3dcd97..be658433d3bb 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -726,6 +726,7 @@ struct trace { struct intlist *pid_list; bool sched; bool multiple_threads; + bool show_comm; double duration_filter; double runtime_ms; }; @@ -755,8 +756,11 @@ static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thre size_t printed = trace__fprintf_tstamp(trace, tstamp, fp); printed += fprintf_duration(duration, fp); - if (trace->multiple_threads) + if (trace->multiple_threads) { + if (trace->show_comm) + printed += fprintf(fp, "%.14s/", thread->comm); printed += fprintf(fp, "%d ", thread->tid); + } return printed; } @@ -1503,10 +1507,13 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) .mmap_pages = 1024, }, .output = stdout, + .show_comm = true, }; const char *output_name = NULL; const char *ev_qualifier_str = NULL; const struct option trace_options[] = { + OPT_BOOLEAN(0, "comm", &trace.show_comm, + "show the thread COMM next to its id"), OPT_STRING('e', "expr", &ev_qualifier_str, "expr", "list of events to trace"), OPT_STRING('o', "output", &output_name, "file", "output file name"), -- cgit v1.2.3 From f37376cd721a539ac398cbb7718b72fce83cd49b Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Sun, 8 Sep 2013 19:19:19 -0700 Subject: perf lock: Account for lock average wait time While perf-lock currently reports both the total wait time and the number of contentions, it doesn't explicitly show the average wait time. Having this value immediately in the report can be quite useful when looking into performance issues. Furthermore, allowing report to sort by averages is another handy feature to have - and thus do not only print the value, but add it to the lock_stat structure. Signed-off-by: Davidlohr Bueso Cc: Aswin Chandramouleeswaran Cc: Frederic Weisbecker Cc: Hitoshi Mitake Cc: Ingo Molnar Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1378693159-8747-8-git-send-email-davidlohr@hp.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-lock.txt | 2 +- tools/perf/builtin-lock.c | 10 +++++++++- 2 files changed, 10 insertions(+), 2 deletions(-) (limited to 'tools/perf/Documentation') diff --git a/tools/perf/Documentation/perf-lock.txt b/tools/perf/Documentation/perf-lock.txt index c7f5f55634ac..ab25be28c9dc 100644 --- a/tools/perf/Documentation/perf-lock.txt +++ b/tools/perf/Documentation/perf-lock.txt @@ -48,7 +48,7 @@ REPORT OPTIONS -k:: --key=:: Sorting key. Possible values: acquired (default), contended, - wait_total, wait_max, wait_min. + avg_wait, wait_total, wait_max, wait_min. INFO OPTIONS ------------ diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index 972310cbeb63..6a9076f165f4 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -56,7 +56,9 @@ struct lock_stat { unsigned int nr_readlock; unsigned int nr_trylock; + /* these times are in nano sec. */ + u64 avg_wait_time; u64 wait_time_total; u64 wait_time_min; u64 wait_time_max; @@ -208,6 +210,7 @@ static struct thread_stat *thread_stat_findnew_first(u32 tid) SINGLE_KEY(nr_acquired) SINGLE_KEY(nr_contended) +SINGLE_KEY(avg_wait_time) SINGLE_KEY(wait_time_total) SINGLE_KEY(wait_time_max) @@ -244,6 +247,7 @@ static struct rb_root result; /* place to store sorted data */ struct lock_key keys[] = { DEF_KEY_LOCK(acquired, nr_acquired), DEF_KEY_LOCK(contended, nr_contended), + DEF_KEY_LOCK(avg_wait, avg_wait_time), DEF_KEY_LOCK(wait_total, wait_time_total), DEF_KEY_LOCK(wait_min, wait_time_min), DEF_KEY_LOCK(wait_max, wait_time_max), @@ -516,6 +520,7 @@ static int report_lock_acquired_event(struct perf_evsel *evsel, seq->state = SEQ_STATE_ACQUIRED; ls->nr_acquired++; + ls->avg_wait_time = ls->nr_contended ? ls->wait_time_total/ls->nr_contended : 0; seq->prev_event_time = sample->time; end: return 0; @@ -570,6 +575,7 @@ static int report_lock_contended_event(struct perf_evsel *evsel, seq->state = SEQ_STATE_CONTENDED; ls->nr_contended++; + ls->avg_wait_time = ls->wait_time_total/ls->nr_contended; seq->prev_event_time = sample->time; end: return 0; @@ -703,6 +709,7 @@ static void print_result(void) pr_info("%10s ", "acquired"); pr_info("%10s ", "contended"); + pr_info("%15s ", "avg wait (ns)"); pr_info("%15s ", "total wait (ns)"); pr_info("%15s ", "max wait (ns)"); pr_info("%15s ", "min wait (ns)"); @@ -734,6 +741,7 @@ static void print_result(void) pr_info("%10u ", st->nr_acquired); pr_info("%10u ", st->nr_contended); + pr_info("%15" PRIu64 " ", st->avg_wait_time); pr_info("%15" PRIu64 " ", st->wait_time_total); pr_info("%15" PRIu64 " ", st->wait_time_max); pr_info("%15" PRIu64 " ", st->wait_time_min == ULLONG_MAX ? @@ -940,7 +948,7 @@ int cmd_lock(int argc, const char **argv, const char *prefix __maybe_unused) }; const struct option report_options[] = { OPT_STRING('k', "key", &sort_key, "acquired", - "key for sorting (acquired / contended / wait_total / wait_max / wait_min)"), + "key for sorting (acquired / contended / avg_wait / wait_total / wait_max / wait_min)"), /* TODO: type */ OPT_END() }; -- cgit v1.2.3 From 27050f530dc4fd88dc93d85c177e000efe970d12 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 1 Sep 2013 12:36:13 +0200 Subject: perf tools: Add possibility to specify mmap size Adding possibility to specify mmap size via -m/--mmap-pages by appending unit size character (B/K/M/G) to the number, like: $ perf record -m 8K ls $ perf record -m 2M ls The size is rounded up appropriately to follow perf mmap restrictions. If no unit is specified the number provides pages as of now, like: $ perf record -m 8 ls Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1378031796-17892-3-git-send-email-jolsa@redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-kvm.txt | 4 +++- tools/perf/Documentation/perf-record.txt | 4 +++- tools/perf/Documentation/perf-top.txt | 4 +++- tools/perf/Documentation/perf-trace.txt | 4 +++- tools/perf/util/evlist.c | 32 ++++++++++++++++++++++++++------ tools/perf/util/util.c | 25 +++++++++++++++++++++++++ tools/perf/util/util.h | 14 ++++++++++++++ 7 files changed, 77 insertions(+), 10 deletions(-) (limited to 'tools/perf/Documentation') diff --git a/tools/perf/Documentation/perf-kvm.txt b/tools/perf/Documentation/perf-kvm.txt index ac84db2d2334..6a06cefe9642 100644 --- a/tools/perf/Documentation/perf-kvm.txt +++ b/tools/perf/Documentation/perf-kvm.txt @@ -109,7 +109,9 @@ STAT LIVE OPTIONS -m:: --mmap-pages=:: - Number of mmap data pages. Must be a power of two. + Number of mmap data pages (must be a power of two) or size + specification with appended unit character - B/K/M/G. The + size is rounded up to have nearest pages power of two value. -a:: --all-cpus:: diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index f732eaa6a500..f10ab63576d7 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -87,7 +87,9 @@ OPTIONS -m:: --mmap-pages=:: - Number of mmap data pages. Must be a power of two. + Number of mmap data pages (must be a power of two) or size + specification with appended unit character - B/K/M/G. The + size is rounded up to have nearest pages power of two value. -g:: --call-graph:: diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index 6d70fbfe28a2..f65777c1f723 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt @@ -68,7 +68,9 @@ Default is to monitor all CPUS. -m :: --mmap-pages=:: - Number of mmapped data pages. + Number of mmap data pages (must be a power of two) or size + specification with appended unit character - B/K/M/G. The + size is rounded up to have nearest pages power of two value. -p :: --pid=:: diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt index 37773854d5c4..7f70d3640edd 100644 --- a/tools/perf/Documentation/perf-trace.txt +++ b/tools/perf/Documentation/perf-trace.txt @@ -59,7 +59,9 @@ OPTIONS -m:: --mmap-pages=:: - Number of mmap data pages. Must be a power of two. + Number of mmap data pages (must be a power of two) or size + specification with appended unit character - B/K/M/G. The + size is rounded up to have nearest pages power of two value. -C:: --cpu:: diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 4283f49ca0ab..d21ab0812926 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -687,14 +687,33 @@ static size_t perf_evlist__mmap_size(unsigned long pages) int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str, int unset __maybe_unused) { - unsigned int pages, *mmap_pages = opt->value; + unsigned int pages, val, *mmap_pages = opt->value; size_t size; - char *eptr; + static struct parse_tag tags[] = { + { .tag = 'B', .mult = 1 }, + { .tag = 'K', .mult = 1 << 10 }, + { .tag = 'M', .mult = 1 << 20 }, + { .tag = 'G', .mult = 1 << 30 }, + { .tag = 0 }, + }; - pages = strtoul(str, &eptr, 10); - if (*eptr != '\0') { - pr_err("failed to parse --mmap_pages/-m value\n"); - return -1; + val = parse_tag_value(str, tags); + if (val != (unsigned int) -1) { + /* we got file size value */ + pages = PERF_ALIGN(val, page_size) / page_size; + if (!is_power_of_2(pages)) { + pages = next_pow2(pages); + pr_info("rounding mmap pages size to %u (%u pages)\n", + pages * page_size, pages); + } + } else { + /* we got pages count value */ + char *eptr; + pages = strtoul(str, &eptr, 10); + if (*eptr != '\0') { + pr_err("failed to parse --mmap_pages/-m value\n"); + return -1; + } } size = perf_evlist__mmap_size(pages); @@ -738,6 +757,7 @@ int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages, evlist->overwrite = overwrite; evlist->mmap_len = perf_evlist__mmap_size(pages); + pr_debug("mmap size %luB\n", evlist->mmap_len); mask = evlist->mmap_len - page_size - 1; list_for_each_entry(evsel, &evlist->entries, node) { diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index ccfdeb62f576..ab71d6216803 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -361,3 +361,28 @@ int parse_nsec_time(const char *str, u64 *ptime) *ptime = time_sec * NSEC_PER_SEC + time_nsec; return 0; } + +unsigned long parse_tag_value(const char *str, struct parse_tag *tags) +{ + struct parse_tag *i = tags; + + while (i->tag) { + char *s; + + s = strchr(str, i->tag); + if (s) { + unsigned long int value; + char *endptr; + + value = strtoul(str, &endptr, 10); + if (s != endptr) + break; + + value *= i->mult; + return value; + } + i++; + } + + return (unsigned long) -1; +} diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index a53535949043..c29ecaabf461 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -270,6 +270,13 @@ bool is_power_of_2(unsigned long n) return (n != 0 && ((n & (n - 1)) == 0)); } +static inline unsigned next_pow2(unsigned x) +{ + if (!x) + return 1; + return 1ULL << (32 - __builtin_clz(x - 1)); +} + size_t hex_width(u64 v); int hex2u64(const char *ptr, u64 *val); @@ -281,4 +288,11 @@ void dump_stack(void); extern unsigned int page_size; void get_term_dimensions(struct winsize *ws); + +struct parse_tag { + char tag; + int mult; +}; + +unsigned long parse_tag_value(const char *str, struct parse_tag *tags); #endif /* GIT_COMPAT_UTIL_H */ -- cgit v1.2.3 From 5e2485b1a2813faa6b80007c653f8bbbed9457ee Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sat, 28 Sep 2013 13:13:01 -0600 Subject: perf trace: Add record option The record option is a convience alias to include the -e raw_syscalls:* argument to perf-record. All other options are passed to perf-record's handler. Resulting data file can be analyzed by perf-trace -i. Signed-off-by: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1380395584-9025-5-git-send-email-dsahern@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-trace.txt | 12 +++++++++--- tools/perf/builtin-trace.c | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 3 deletions(-) (limited to 'tools/perf/Documentation') diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt index 7f70d3640edd..1a224862118f 100644 --- a/tools/perf/Documentation/perf-trace.txt +++ b/tools/perf/Documentation/perf-trace.txt @@ -9,6 +9,7 @@ SYNOPSIS -------- [verse] 'perf trace' +'perf trace record' DESCRIPTION ----------- @@ -16,9 +17,14 @@ This command will show the events associated with the target, initially syscalls, but other system events like pagefaults, task lifetime events, scheduling events, etc. -Initially this is a live mode only tool, but eventually will work with -perf.data files like the other tools, allowing a detached 'record' from -analysis phases. +This is a live mode tool in addition to working with perf.data files like +the other perf tools. Files can be generated using the 'perf record' command +but the session needs to include the raw_syscalls events (-e 'raw_syscalls:*'). +Alernatively, the 'perf trace record' can be used as a shortcut to +automatically include the raw_syscalls events when writing events to a file. + +The following options apply to perf trace; options to perf trace record are +found in the perf record man page. OPTIONS ------- diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 5776b5f829e1..1e2368f56a74 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1501,6 +1501,33 @@ static int parse_target_str(struct trace *trace) return 0; } +static int trace__record(int argc, const char **argv) +{ + unsigned int rec_argc, i, j; + const char **rec_argv; + const char * const record_args[] = { + "record", + "-R", + "-m", "1024", + "-c", "1", + "-e", "raw_syscalls:sys_enter,raw_syscalls:sys_exit", + }; + + rec_argc = ARRAY_SIZE(record_args) + argc; + rec_argv = calloc(rec_argc + 1, sizeof(char *)); + + if (rec_argv == NULL) + return -ENOMEM; + + for (i = 0; i < ARRAY_SIZE(record_args); i++) + rec_argv[i] = record_args[i]; + + for (j = 0; j < (unsigned int)argc; j++, i++) + rec_argv[i] = argv[j]; + + return cmd_record(i, rec_argv, NULL); +} + static int trace__run(struct trace *trace, int argc, const char **argv) { struct perf_evlist *evlist = perf_evlist__new(); @@ -1788,6 +1815,8 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) const char * const trace_usage[] = { "perf trace [] []", "perf trace [] -- []", + "perf trace record [] []", + "perf trace record [] -- []", NULL }; struct trace trace = { @@ -1844,6 +1873,9 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) int err; char bf[BUFSIZ]; + if ((argc > 1) && (strcmp(argv[1], "record") == 0)) + return trace__record(argc-2, &argv[2]); + argc = parse_options(argc, argv, trace_options, trace_usage, 0); if (output_name != NULL) { -- cgit v1.2.3 From 8ec19c0eba73d7221e93e993c1c8bd62484354e9 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 9 Oct 2013 11:49:26 +0200 Subject: perf tools: Implement summary output for 'make clean' 'make clean' used to show all the rm lines, which isn't really informative in any way and spams the console. Implement summary output: comet:~/tip/tools/perf> make clean CLEAN libtraceevent CLEAN liblk CLEAN config CLEAN core-objs CLEAN core-progs CLEAN core-gen CLEAN Documentation CLEAN python 'make clean V=1' will still show the old, detailed output. Signed-off-by: Ingo Molnar Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1381312169-17354-2-git-send-email-mingo@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/Makefile | 18 ++++++++++-------- tools/perf/Makefile.perf | 24 ++++++++++++------------ tools/perf/config/utilities.mak | 6 ++++++ tools/scripts/Makefile.include | 2 +- 4 files changed, 29 insertions(+), 21 deletions(-) (limited to 'tools/perf/Documentation') diff --git a/tools/perf/Documentation/Makefile b/tools/perf/Documentation/Makefile index eaa477f00673..be5adb1e0290 100644 --- a/tools/perf/Documentation/Makefile +++ b/tools/perf/Documentation/Makefile @@ -246,15 +246,17 @@ $(OUTPUT)cmd-list.made: cmd-list.perl ../command-list.txt $(MAN1_TXT) $(PERL_PATH) ./cmd-list.perl ../command-list.txt $(QUIET_STDERR) && \ date >$@ +CLEAN_FILES = \ + $(MAN_XML) $(addsuffix +,$(MAN_XML)) \ + $(MAN_HTML) $(addsuffix +,$(MAN_HTML)) \ + $(DOC_HTML) $(DOC_MAN1) $(DOC_MAN5) $(DOC_MAN7) \ + $(OUTPUT)*.texi $(OUTPUT)*.texi+ $(OUTPUT)*.texi++ \ + $(OUTPUT)perf.info $(OUTPUT)perfman.info \ + $(OUTPUT)howto-index.txt $(OUTPUT)howto/*.html $(OUTPUT)doc.dep \ + $(OUTPUT)technical/api-*.html $(OUTPUT)technical/api-index.txt \ + $(cmds_txt) $(OUTPUT)*.made clean: - $(RM) $(MAN_XML) $(addsuffix +,$(MAN_XML)) - $(RM) $(MAN_HTML) $(addsuffix +,$(MAN_HTML)) - $(RM) $(DOC_HTML) $(DOC_MAN1) $(DOC_MAN5) $(DOC_MAN7) - $(RM) $(OUTPUT)*.texi $(OUTPUT)*.texi+ $(OUTPUT)*.texi++ - $(RM) $(OUTPUT)perf.info $(OUTPUT)perfman.info - $(RM) $(OUTPUT)howto-index.txt $(OUTPUT)howto/*.html $(OUTPUT)doc.dep - $(RM) $(OUTPUT)technical/api-*.html $(OUTPUT)technical/api-index.txt - $(RM) $(cmds_txt) $(OUTPUT)*.made + $(call QUIET_CLEAN, Documentation) $(RM) $(CLEAN_FILES) $(MAN_HTML): $(OUTPUT)%.html : %.txt $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 8bc6d0c4e776..250b276c136f 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -148,7 +148,7 @@ PYTHON_EXTBUILD_LIB := $(PYTHON_EXTBUILD)lib/ PYTHON_EXTBUILD_TMP := $(PYTHON_EXTBUILD)tmp/ export PYTHON_EXTBUILD_LIB PYTHON_EXTBUILD_TMP -python-clean := rm -rf $(PYTHON_EXTBUILD) $(OUTPUT)python/perf.so +python-clean := $(call QUIET_CLEAN, python) $(RM) -r $(PYTHON_EXTBUILD) $(OUTPUT)python/perf.so PYTHON_EXT_SRCS := $(shell grep -v ^\# util/python-ext-sources) PYTHON_EXT_DEPS := util/python-ext-sources util/setup.py $(LIBTRACEEVENT) $(LIBLK) @@ -708,7 +708,8 @@ $(LIBTRACEEVENT): $(TE_SOURCES) $(QUIET_SUBDIR0)$(TRACE_EVENT_DIR) $(QUIET_SUBDIR1) O=$(OUTPUT) libtraceevent.a $(LIBTRACEEVENT)-clean: - $(QUIET_SUBDIR0)$(TRACE_EVENT_DIR) $(QUIET_SUBDIR1) O=$(OUTPUT) clean + $(call QUIET_CLEAN, libtraceevent) + @$(MAKE) -C $(TRACE_EVENT_DIR) O=$(OUTPUT) clean >/dev/null LIBLK_SOURCES = $(wildcard $(LK_PATH)*.[ch]) @@ -721,7 +722,8 @@ endif $(LIBLK)-clean: ifeq ($(subdir),) - $(QUIET_SUBDIR0)$(LK_DIR) $(QUIET_SUBDIR1) O=$(OUTPUT) clean + $(call QUIET_CLEAN, liblk) + @$(MAKE) -C $(LK_DIR) O=$(OUTPUT) clean >/dev/null endif help: @@ -850,17 +852,15 @@ $(INSTALL_DOC_TARGETS): # not get included for the clean target: # config-clean: - @$(MAKE) -C config/feature-checks clean + $(call QUIET_CLEAN, config) + @$(MAKE) -C config/feature-checks clean >/dev/null clean: $(LIBTRACEEVENT)-clean $(LIBLK)-clean config-clean - $(RM) $(LIB_OBJS) $(BUILTIN_OBJS) $(LIB_FILE) $(GTK_OBJS) - $(RM) $(OUTPUT)perf-archive $(OUTPUT)perf.o $(LANG_BINDINGS) - $(RM) $(ALL_PROGRAMS) perf - $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* - $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean - $(RM) $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)PERF-CFLAGS - $(RM) $(OUTPUT)util/*-bison* - $(RM) $(OUTPUT)util/*-flex* + $(call QUIET_CLEAN, core-objs) $(RM) $(LIB_OBJS) $(BUILTIN_OBJS) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf.o $(LANG_BINDINGS) $(GTK_OBJS) + $(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf + $(call QUIET_CLEAN, core-gen) $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)PERF-CFLAGS $(OUTPUT)util/*-bison* $(OUTPUT)util/*-flex* + $(call QUIET_CLEAN, Documentation) + @$(MAKE) -C Documentation O=$(OUTPUT) clean >/dev/null $(python-clean) # diff --git a/tools/perf/config/utilities.mak b/tools/perf/config/utilities.mak index 4d985e0f03f5..94908a5fbea7 100644 --- a/tools/perf/config/utilities.mak +++ b/tools/perf/config/utilities.mak @@ -178,3 +178,9 @@ endef _ge_attempt = $(if $(get-executable),$(get-executable),$(_gea_warn)$(call _gea_err,$(2))) _gea_warn = $(warning The path '$(1)' is not executable.) _gea_err = $(if $(1),$(error Please set '$(1)' appropriately)) + +ifneq ($(findstring $(MAKEFLAGS),s),s) + ifneq ($(V),1) + QUIET_CLEAN = @printf ' CLEAN %s\n' $(1); + endif +endif diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include index 0d0506d55c71..1853736c8106 100644 --- a/tools/scripts/Makefile.include +++ b/tools/scripts/Makefile.include @@ -66,7 +66,7 @@ ifneq ($(V),1) QUIET_MKDIR = @echo ' ' MKDIR $@; QUIET_GEN = @echo ' ' GEN $@; QUIET_SUBDIR0 = +@subdir= - QUIET_SUBDIR1 = ;$(NO_SUBDIR) echo ' ' SUBDIR $$subdir; \ + QUIET_SUBDIR1 = ;$(NO_SUBDIR) echo ' ' SUBDIR $$subdir; \ $(MAKE) $(PRINT_DIR) -C $$subdir QUIET_FLEX = @echo ' ' FLEX $@; QUIET_BISON = @echo ' ' BISON $@; -- cgit v1.2.3 From 65fb09922d4ca5da54fe18d7b44e5961caf169ad Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 9 Oct 2013 11:49:27 +0200 Subject: tools: Harmonize the various build messages in perf, lib-traceevent, lib-lk The various build lines from libtraceevent and perf mix up during a parallel build and produce unaligned output like: CC builtin-buildid-list.o CC builtin-buildid-cache.o CC builtin-list.o CC FPIC trace-seq.o CC builtin-record.o CC FPIC parse-filter.o CC builtin-report.o CC builtin-stat.o CC FPIC parse-utils.o CC FPIC kbuffer-parse.o CC builtin-timechart.o CC builtin-top.o CC builtin-script.o BUILD STATIC LIB libtraceevent.a CC builtin-probe.o CC builtin-kmem.o CC builtin-lock.o To solve this, harmonize all the build message alignments to be similar to the kernel's kbuild output: prefixed by two spaces and 11-char wide. After the patch the output looks pretty tidy, even if output lines get mixed up: CC builtin-annotate.o FLAGS: * new build flags or cross compiler CC builtin-bench.o AR liblk.a CC bench/sched-messaging.o CC FPIC event-parse.o CC bench/sched-pipe.o CC FPIC trace-seq.o CC bench/mem-memcpy.o CC bench/mem-memset.o CC FPIC parse-filter.o CC builtin-diff.o CC builtin-evlist.o CC builtin-help.o Signed-off-by: Ingo Molnar Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1381312169-17354-3-git-send-email-mingo@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/Makefile | 18 +++++++++--------- tools/perf/Documentation/Makefile | 17 +++++++++-------- tools/perf/Makefile | 2 +- tools/perf/Makefile.perf | 2 +- tools/scripts/Makefile.include | 23 ++++++++++++----------- 5 files changed, 32 insertions(+), 30 deletions(-) (limited to 'tools/perf/Documentation') diff --git a/tools/lib/traceevent/Makefile b/tools/lib/traceevent/Makefile index ca6cb779876a..fc1502098595 100644 --- a/tools/lib/traceevent/Makefile +++ b/tools/lib/traceevent/Makefile @@ -134,14 +134,14 @@ ifeq ($(VERBOSE),1) print_install = else Q = @ - print_compile = echo ' CC '$(OBJ); - print_app_build = echo ' BUILD '$(OBJ); - print_fpic_compile = echo ' CC FPIC '$(OBJ); - print_shared_lib_compile = echo ' BUILD SHARED LIB '$(OBJ); - print_plugin_obj_compile = echo ' CC PLUGIN OBJ '$(OBJ); - print_plugin_build = echo ' CC PLUGI '$(OBJ); - print_static_lib_build = echo ' BUILD STATIC LIB '$(OBJ); - print_install = echo ' INSTALL '$1' to $(DESTDIR_SQ)$2'; + print_compile = echo ' CC '$(OBJ); + print_app_build = echo ' BUILD '$(OBJ); + print_fpic_compile = echo ' CC FPIC '$(OBJ); + print_shared_lib_compile = echo ' BUILD SHARED LIB '$(OBJ); + print_plugin_obj_compile = echo ' BUILD PLUGIN OBJ '$(OBJ); + print_plugin_build = echo ' BUILD PLUGIN '$(OBJ); + print_static_lib_build = echo ' BUILD STATIC LIB '$(OBJ); + print_install = echo ' INSTALL '$1' to $(DESTDIR_SQ)$2'; endif do_fpic_compile = \ @@ -268,7 +268,7 @@ TRACK_CFLAGS = $(subst ','\'',$(CFLAGS)):$(ARCH):$(CROSS_COMPILE) TRACEEVENT-CFLAGS: force @FLAGS='$(TRACK_CFLAGS)'; \ if test x"$$FLAGS" != x"`cat TRACEEVENT-CFLAGS 2>/dev/null`" ; then \ - echo 1>&2 " * new build flags or cross compiler"; \ + echo 1>&2 " FLAGS: * new build flags or cross compiler"; \ echo "$$FLAGS" >TRACEEVENT-CFLAGS; \ fi diff --git a/tools/perf/Documentation/Makefile b/tools/perf/Documentation/Makefile index be5adb1e0290..c4c300cf155f 100644 --- a/tools/perf/Documentation/Makefile +++ b/tools/perf/Documentation/Makefile @@ -145,16 +145,17 @@ endif ifneq ($(findstring $(MAKEFLAGS),s),s) ifneq ($(V),1) - QUIET_ASCIIDOC = @echo ' ' ASCIIDOC $@; - QUIET_XMLTO = @echo ' ' XMLTO $@; - QUIET_DB2TEXI = @echo ' ' DB2TEXI $@; - QUIET_MAKEINFO = @echo ' ' MAKEINFO $@; - QUIET_DBLATEX = @echo ' ' DBLATEX $@; - QUIET_XSLTPROC = @echo ' ' XSLTPROC $@; - QUIET_GEN = @echo ' ' GEN $@; + QUIET_ASCIIDOC = @echo ' ASCIIDOC '$@; + QUIET_XMLTO = @echo ' XMLTO '$@; + QUIET_DB2TEXI = @echo ' DB2TEXI '$@; + QUIET_MAKEINFO = @echo ' MAKEINFO '$@; + QUIET_DBLATEX = @echo ' DBLATEX '$@; + QUIET_XSLTPROC = @echo ' XSLTPROC '$@; + QUIET_GEN = @echo ' GEN '$@; QUIET_STDERR = 2> /dev/null QUIET_SUBDIR0 = +@subdir= - QUIET_SUBDIR1 = ;$(NO_SUBDIR) echo ' ' SUBDIR $$subdir; \ + QUIET_SUBDIR1 = ;$(NO_SUBDIR) \ + echo ' SUBDIR ' $$subdir; \ $(MAKE) $(PRINT_DIR) -C $$subdir export V endif diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 9580ebe260e0..5aa3d040bbf3 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -38,7 +38,7 @@ ifneq ($(O),) endif define print_msg - @printf ' BUILD: Doing '\''make \033[33m-j'$(JOBS)'\033[m'\'' parallel build\n' + @printf ' BUILD: Doing '\''make \033[33m-j'$(JOBS)'\033[m'\'' parallel build\n' endef define make diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 250b276c136f..f91bd5a9b50a 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -784,7 +784,7 @@ TRACK_CFLAGS = $(subst ','\'',$(CFLAGS)):\ $(OUTPUT)PERF-CFLAGS: .FORCE-PERF-CFLAGS @FLAGS='$(TRACK_CFLAGS)'; \ if test x"$$FLAGS" != x"`cat $(OUTPUT)PERF-CFLAGS 2>/dev/null`" ; then \ - echo 1>&2 " * new build flags or prefix"; \ + echo 1>&2 " FLAGS: * new build flags or prefix"; \ echo "$$FLAGS" >$(OUTPUT)PERF-CFLAGS; \ fi diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include index 1853736c8106..ee76544deecb 100644 --- a/tools/scripts/Makefile.include +++ b/tools/scripts/Makefile.include @@ -59,21 +59,22 @@ QUIET_SUBDIR0 = +$(MAKE) $(COMMAND_O) -C # space to separate -C and subdir QUIET_SUBDIR1 = ifneq ($(findstring $(MAKEFLAGS),s),s) -ifneq ($(V),1) - QUIET_CC = @echo ' ' CC $@; - QUIET_AR = @echo ' ' AR $@; - QUIET_LINK = @echo ' ' LINK $@; - QUIET_MKDIR = @echo ' ' MKDIR $@; - QUIET_GEN = @echo ' ' GEN $@; + ifneq ($(V),1) + QUIET_CC = @echo ' CC '$@; + QUIET_AR = @echo ' AR '$@; + QUIET_LINK = @echo ' LINK '$@; + QUIET_MKDIR = @echo ' MKDIR '$@; + QUIET_GEN = @echo ' GEN '$@; QUIET_SUBDIR0 = +@subdir= - QUIET_SUBDIR1 = ;$(NO_SUBDIR) echo ' ' SUBDIR $$subdir; \ + QUIET_SUBDIR1 = ;$(NO_SUBDIR) \ + echo ' SUBDIR '$$subdir; \ $(MAKE) $(PRINT_DIR) -C $$subdir - QUIET_FLEX = @echo ' ' FLEX $@; - QUIET_BISON = @echo ' ' BISON $@; + QUIET_FLEX = @echo ' FLEX '$@; + QUIET_BISON = @echo ' BISON '$@; descend = \ - +@echo ' ' DESCEND $(1); \ + +@echo ' DESCEND '$(1); \ mkdir -p $(OUTPUT)$(1) && \ $(MAKE) $(COMMAND_O) subdir=$(if $(subdir),$(subdir)/$(1),$(1)) $(PRINT_DIR) -C $(1) $(2) -endif + endif endif -- cgit v1.2.3 From 8a5411e9a3e65ab70b094022e5aa6deaec82ae7c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 9 Oct 2013 11:49:29 +0200 Subject: perf tools: Implement summary output for 'make install' 'make install' used to show all the install lines, which is way too verbose to be really informative to the user. Implement summary output instead: comet:~/tip/tools/perf> make install BUILD: Doing 'make -j12' parallel build SUBDIR Documentation INSTALL Documentation-man INSTALL binaries INSTALL libexec INSTALL perf-archive INSTALL perl-scripts INSTALL python-scripts INSTALL bash_completion-script INSTALL tests 'make install V=1' will still show the old, detailed output. Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Signed-off-by: Ingo Molnar Link: http://lkml.kernel.org/r/1381312169-17354-5-git-send-email-mingo@kernel.org [ Fixed conflict with libperf-gtk patches in acme/perf/core, cope with 'trace' alias ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/Makefile | 27 +++++++++++--------- tools/perf/Makefile.perf | 54 ++++++++++++++++++++++----------------- tools/perf/config/utilities.mak | 3 ++- 3 files changed, 48 insertions(+), 36 deletions(-) (limited to 'tools/perf/Documentation') diff --git a/tools/perf/Documentation/Makefile b/tools/perf/Documentation/Makefile index c4c300cf155f..3ba1c0b09908 100644 --- a/tools/perf/Documentation/Makefile +++ b/tools/perf/Documentation/Makefile @@ -184,12 +184,13 @@ ifdef missing_tools endif do-install-man: man - $(INSTALL) -d -m 755 $(DESTDIR)$(man1dir) -# $(INSTALL) -d -m 755 $(DESTDIR)$(man5dir) -# $(INSTALL) -d -m 755 $(DESTDIR)$(man7dir) - $(INSTALL) -m 644 $(DOC_MAN1) $(DESTDIR)$(man1dir) -# $(INSTALL) -m 644 $(DOC_MAN5) $(DESTDIR)$(man5dir) -# $(INSTALL) -m 644 $(DOC_MAN7) $(DESTDIR)$(man7dir) + $(call QUIET_INSTALL, Documentation-man) \ + $(INSTALL) -d -m 755 $(DESTDIR)$(man1dir); \ +# $(INSTALL) -d -m 755 $(DESTDIR)$(man5dir); \ +# $(INSTALL) -d -m 755 $(DESTDIR)$(man7dir); \ + $(INSTALL) -m 644 $(DOC_MAN1) $(DESTDIR)$(man1dir); \ +# $(INSTALL) -m 644 $(DOC_MAN5) $(DESTDIR)$(man5dir); \ +# $(INSTALL) -m 644 $(DOC_MAN7) $(DESTDIR)$(man7dir) install-man: check-man-tools man @@ -202,18 +203,20 @@ endif try-install-man: $(DO_INSTALL_MAN) install-info: info - $(INSTALL) -d -m 755 $(DESTDIR)$(infodir) - $(INSTALL) -m 644 $(OUTPUT)perf.info $(OUTPUT)perfman.info $(DESTDIR)$(infodir) + $(call QUIET_INSTALL, Documentation-info) \ + $(INSTALL) -d -m 755 $(DESTDIR)$(infodir); \ + $(INSTALL) -m 644 $(OUTPUT)perf.info $(OUTPUT)perfman.info $(DESTDIR)$(infodir); \ if test -r $(DESTDIR)$(infodir)/dir; then \ - $(INSTALL_INFO) --info-dir=$(DESTDIR)$(infodir) perf.info ;\ - $(INSTALL_INFO) --info-dir=$(DESTDIR)$(infodir) perfman.info ;\ + $(INSTALL_INFO) --info-dir=$(DESTDIR)$(infodir) perf.info ;\ + $(INSTALL_INFO) --info-dir=$(DESTDIR)$(infodir) perfman.info ;\ else \ echo "No directory found in $(DESTDIR)$(infodir)" >&2 ; \ fi install-pdf: pdf - $(INSTALL) -d -m 755 $(DESTDIR)$(pdfdir) - $(INSTALL) -m 644 $(OUTPUT)user-manual.pdf $(DESTDIR)$(pdfdir) + $(call QUIET_INSTALL, Documentation-pdf) \ + $(INSTALL) -d -m 755 $(DESTDIR)$(pdfdir); \ + $(INSTALL) -m 644 $(OUTPUT)user-manual.pdf $(DESTDIR)$(pdfdir) #install-html: html # '$(SHELL_PATH_SQ)' ./install-webdoc.sh $(DESTDIR)$(htmldir) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index f91bd5a9b50a..c873e039aafb 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -504,8 +504,9 @@ ifndef NO_GTK2 GTK_OBJS += $(OUTPUT)ui/gtk/annotate.o install-gtk: $(OUTPUT)libperf-gtk.so - $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(libdir_SQ)' - $(INSTALL) $(OUTPUT)libperf-gtk.so '$(DESTDIR_SQ)$(libdir_SQ)' + $(call QUIET_INSTALL, 'GTK UI') \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(libdir_SQ)'; \ + $(INSTALL) $(OUTPUT)libperf-gtk.so '$(DESTDIR_SQ)$(libdir_SQ)' endif ifndef NO_LIBPERL @@ -810,31 +811,38 @@ check: $(OUTPUT)common-cmds.h install-gtk: install-bin: all install-gtk - $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(bindir_SQ)' - $(INSTALL) $(OUTPUT)perf '$(DESTDIR_SQ)$(bindir_SQ)' - $(LN) '$(DESTDIR_SQ)$(bindir_SQ)/perf' '$(DESTDIR_SQ)$(bindir_SQ)/trace' - $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' - $(INSTALL) $(OUTPUT)perf-archive -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' + $(call QUIET_INSTALL, binaries) \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(bindir_SQ)'; \ + $(INSTALL) $(OUTPUT)perf '$(DESTDIR_SQ)$(bindir_SQ)'; \ + $(LN) '$(DESTDIR_SQ)$(bindir_SQ)/perf' '$(DESTDIR_SQ)$(bindir_SQ)/trace' + $(call QUIET_INSTALL, libexec) \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' + $(call QUIET_INSTALL, perf-archive) \ + $(INSTALL) $(OUTPUT)perf-archive -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' ifndef NO_LIBPERL - $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace' - $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/bin' - $(INSTALL) scripts/perl/Perf-Trace-Util/lib/Perf/Trace/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace' - $(INSTALL) scripts/perl/*.pl -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl' - $(INSTALL) scripts/perl/bin/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/bin' + $(call QUIET_INSTALL, perl-scripts) \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace'; \ + $(INSTALL) scripts/perl/Perf-Trace-Util/lib/Perf/Trace/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace'; \ + $(INSTALL) scripts/perl/*.pl -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl'; \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/bin'; \ + $(INSTALL) scripts/perl/bin/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/bin' endif ifndef NO_LIBPYTHON - $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/Perf-Trace-Util/lib/Perf/Trace' - $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/bin' - $(INSTALL) scripts/python/Perf-Trace-Util/lib/Perf/Trace/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/Perf-Trace-Util/lib/Perf/Trace' - $(INSTALL) scripts/python/*.py -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python' - $(INSTALL) scripts/python/bin/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/bin' + $(call QUIET_INSTALL, python-scripts) \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/Perf-Trace-Util/lib/Perf/Trace'; \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/bin'; \ + $(INSTALL) scripts/python/Perf-Trace-Util/lib/Perf/Trace/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/Perf-Trace-Util/lib/Perf/Trace'; \ + $(INSTALL) scripts/python/*.py -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python'; \ + $(INSTALL) scripts/python/bin/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/bin' endif - $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(sysconfdir_SQ)/bash_completion.d' - $(INSTALL) bash_completion '$(DESTDIR_SQ)$(sysconfdir_SQ)/bash_completion.d/perf' - $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests' - $(INSTALL) tests/attr.py '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests' - $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr' - $(INSTALL) tests/attr/* '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr' + $(call QUIET_INSTALL, bash_completion-script) \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(sysconfdir_SQ)/bash_completion.d'; \ + $(INSTALL) bash_completion '$(DESTDIR_SQ)$(sysconfdir_SQ)/bash_completion.d/perf' + $(call QUIET_INSTALL, tests) \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests'; \ + $(INSTALL) tests/attr.py '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests'; \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr'; \ + $(INSTALL) tests/attr/* '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr' install: install-bin try-install-man diff --git a/tools/perf/config/utilities.mak b/tools/perf/config/utilities.mak index 94908a5fbea7..f168debc5be2 100644 --- a/tools/perf/config/utilities.mak +++ b/tools/perf/config/utilities.mak @@ -181,6 +181,7 @@ _gea_err = $(if $(1),$(error Please set '$(1)' appropriately)) ifneq ($(findstring $(MAKEFLAGS),s),s) ifneq ($(V),1) - QUIET_CLEAN = @printf ' CLEAN %s\n' $(1); + QUIET_CLEAN = @printf ' CLEAN %s\n' $1; + QUIET_INSTALL = @printf ' INSTALL %s\n' $1; endif endif -- cgit v1.2.3 From d366c53e1d4fc9d7a5826fd82010b3cffaabe5f1 Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Thu, 3 Oct 2013 14:45:16 +0530 Subject: perf timechart: Add example in the documentation While at it, update the synopsis to show both forms. Signed-off-by: Ramkumar Ramachandra Cc: David Ahern Cc: Ingo Molnar Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1380791716-10325-1-git-send-email-artagnon@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-timechart.txt | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'tools/perf/Documentation') diff --git a/tools/perf/Documentation/perf-timechart.txt b/tools/perf/Documentation/perf-timechart.txt index 1632b0efc757..3ff8bd4f0b4d 100644 --- a/tools/perf/Documentation/perf-timechart.txt +++ b/tools/perf/Documentation/perf-timechart.txt @@ -8,7 +8,8 @@ perf-timechart - Tool to visualize total system behavior during a workload SYNOPSIS -------- [verse] -'perf timechart' {record} +'perf timechart' record +'perf timechart' [] DESCRIPTION ----------- @@ -41,6 +42,18 @@ OPTIONS --symfs=:: Look for files with symbols relative to this directory. +EXAMPLES +-------- + +$ perf timechart record git pull + + [ perf record: Woken up 13 times to write data ] + [ perf record: Captured and wrote 4.253 MB perf.data (~185801 samples) ] + +$ perf timechart + + Written 10.2 seconds of trace to output.svg. + SEE ALSO -------- linkperf:perf-record[1] -- cgit v1.2.3 From bf2575c121ca11247ef07fd02b43f7430834f7b1 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 8 Oct 2013 21:26:53 -0600 Subject: perf trace: Add summary option to dump syscall statistics When enabled dumps a summary of all syscalls by task with the usual statistics -- min, max, average and relative stddev. For example, make - 26341 : 3344 [ 17.4% ] 0.000 ms read : 52 0.000 4.802 0.644 30.08 write : 20 0.004 0.036 0.010 21.72 open : 24 0.003 0.046 0.014 23.68 close : 64 0.002 0.055 0.008 22.53 stat : 2714 0.002 0.222 0.004 4.47 fstat : 18 0.001 0.041 0.006 46.26 mmap : 30 0.003 0.009 0.006 5.71 mprotect : 8 0.006 0.039 0.016 32.16 munmap : 12 0.007 0.077 0.020 38.25 brk : 48 0.002 0.014 0.004 10.18 rt_sigaction : 18 0.002 0.002 0.002 2.11 rt_sigprocmask : 60 0.002 0.128 0.010 32.88 access : 2 0.006 0.006 0.006 0.00 pipe : 12 0.004 0.048 0.013 35.98 vfork : 34 0.448 0.980 0.692 3.04 execve : 20 0.000 0.387 0.046 56.66 wait4 : 34 0.017 9923.287 593.221 68.45 fcntl : 8 0.001 0.041 0.013 48.79 getdents : 48 0.002 0.079 0.013 19.62 getcwd : 2 0.005 0.005 0.005 0.00 chdir : 2 0.070 0.070 0.070 0.00 getrlimit : 2 0.045 0.045 0.045 0.00 arch_prctl : 2 0.002 0.002 0.002 0.00 setrlimit : 2 0.002 0.002 0.002 0.00 openat : 94 0.003 0.005 0.003 2.11 Signed-off-by: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1381289214-24885-3-git-send-email-dsahern@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-trace.txt | 4 ++ tools/perf/builtin-trace.c | 110 ++++++++++++++++++++++++++++---- 2 files changed, 102 insertions(+), 12 deletions(-) (limited to 'tools/perf/Documentation') diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt index 1a224862118f..54139c6457f8 100644 --- a/tools/perf/Documentation/perf-trace.txt +++ b/tools/perf/Documentation/perf-trace.txt @@ -93,6 +93,10 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs. --comm:: Show process COMM right beside its ID, on by default, disable with --no-comm. +--summary:: + Show a summary of syscalls by thread with min, max, and average times (in + msec) and relative stddev. + SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-script[1] diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 5496546b7c5c..d0f91fe755a3 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -10,6 +10,7 @@ #include "util/strlist.h" #include "util/intlist.h" #include "util/thread_map.h" +#include "util/stat.h" #include #include @@ -909,6 +910,8 @@ struct thread_trace { int max; char **table; } paths; + + struct intlist *syscall_stats; }; static struct thread_trace *thread_trace__new(void) @@ -918,6 +921,8 @@ static struct thread_trace *thread_trace__new(void) if (ttrace) ttrace->paths.max = -1; + ttrace->syscall_stats = intlist__new(NULL); + return ttrace; } @@ -964,6 +969,7 @@ struct trace { struct intlist *pid_list; bool sched; bool multiple_threads; + bool summary; bool show_comm; double duration_filter; double runtime_ms; @@ -1291,10 +1297,8 @@ typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel, struct perf_sample *sample); static struct syscall *trace__syscall_info(struct trace *trace, - struct perf_evsel *evsel, - struct perf_sample *sample) + struct perf_evsel *evsel, int id) { - int id = perf_evsel__intval(evsel, sample, "id"); if (id < 0) { @@ -1335,6 +1339,32 @@ out_cant_read: return NULL; } +static void thread__update_stats(struct thread_trace *ttrace, + int id, struct perf_sample *sample) +{ + struct int_node *inode; + struct stats *stats; + u64 duration = 0; + + inode = intlist__findnew(ttrace->syscall_stats, id); + if (inode == NULL) + return; + + stats = inode->priv; + if (stats == NULL) { + stats = malloc(sizeof(struct stats)); + if (stats == NULL) + return; + init_stats(stats); + inode->priv = stats; + } + + if (ttrace->entry_time && sample->time > ttrace->entry_time) + duration = sample->time - ttrace->entry_time; + + update_stats(stats, duration); +} + static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, struct perf_sample *sample) { @@ -1342,7 +1372,8 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, void *args; size_t printed = 0; struct thread *thread; - struct syscall *sc = trace__syscall_info(trace, evsel, sample); + int id = perf_evsel__intval(evsel, sample, "id"); + struct syscall *sc = trace__syscall_info(trace, evsel, id); struct thread_trace *ttrace; if (sc == NULL) @@ -1394,7 +1425,8 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, int ret; u64 duration = 0; struct thread *thread; - struct syscall *sc = trace__syscall_info(trace, evsel, sample); + int id = perf_evsel__intval(evsel, sample, "id"); + struct syscall *sc = trace__syscall_info(trace, evsel, id); struct thread_trace *ttrace; if (sc == NULL) @@ -1408,6 +1440,9 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, if (ttrace == NULL) return -1; + if (trace->summary) + thread__update_stats(ttrace, id, sample); + ret = perf_evsel__intval(evsel, sample, "ret"); ttrace = thread->priv; @@ -1574,6 +1609,8 @@ static int trace__record(int argc, const char **argv) return cmd_record(i, rec_argv, NULL); } +static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp); + static int trace__run(struct trace *trace, int argc, const char **argv) { struct perf_evlist *evlist = perf_evlist__new(); @@ -1703,6 +1740,9 @@ again: goto again; out_unmap_evlist: + if (!err && trace->summary) + trace__fprintf_thread_summary(trace, trace->output); + perf_evlist__munmap(evlist); out_close_evlist: perf_evlist__close(evlist); @@ -1798,6 +1838,9 @@ static int trace__replay(struct trace *trace) if (err) pr_err("Failed to process events, error %d", err); + else if (trace->summary) + trace__fprintf_thread_summary(trace, trace->output); + out: perf_session__delete(session); @@ -1808,10 +1851,53 @@ static size_t trace__fprintf_threads_header(FILE *fp) { size_t printed; - printed = fprintf(fp, "\n _____________________________________________________________________\n"); - printed += fprintf(fp," __) Summary of events (__\n\n"); - printed += fprintf(fp," [ task - pid ] [ events ] [ ratio ] [ runtime ]\n"); - printed += fprintf(fp," _____________________________________________________________________\n\n"); + printed = fprintf(fp, "\n _____________________________________________________________________________\n"); + printed += fprintf(fp, " __) Summary of events (__\n\n"); + printed += fprintf(fp, " [ task - pid ] [ events ] [ ratio ] [ runtime ]\n"); + printed += fprintf(fp, " syscall count min max avg stddev\n"); + printed += fprintf(fp, " msec msec msec %%\n"); + printed += fprintf(fp, " _____________________________________________________________________________\n\n"); + + return printed; +} + +static size_t thread__dump_stats(struct thread_trace *ttrace, + struct trace *trace, FILE *fp) +{ + struct stats *stats; + size_t printed = 0; + struct syscall *sc; + struct int_node *inode = intlist__first(ttrace->syscall_stats); + + if (inode == NULL) + return 0; + + printed += fprintf(fp, "\n"); + + /* each int_node is a syscall */ + while (inode) { + stats = inode->priv; + if (stats) { + double min = (double)(stats->min) / NSEC_PER_MSEC; + double max = (double)(stats->max) / NSEC_PER_MSEC; + double avg = avg_stats(stats); + double pct; + u64 n = (u64) stats->n; + + pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0; + avg /= NSEC_PER_MSEC; + + sc = &trace->syscalls.table[inode->i]; + printed += fprintf(fp, "%24s %14s : ", "", sc->name); + printed += fprintf(fp, "%5" PRIu64 " %8.3f %8.3f", + n, min, max); + printed += fprintf(fp, " %8.3f %6.2f\n", avg, pct); + } + + inode = intlist__next(inode); + } + + printed += fprintf(fp, "\n\n"); return printed; } @@ -1850,6 +1936,7 @@ static int trace__fprintf_one_thread(struct thread *thread, void *priv) printed += fprintf(fp, " - %-5d :%11lu [", thread->tid, ttrace->nr_events); printed += color_fprintf(fp, color, "%5.1f%%", ratio); printed += fprintf(fp, " ] %10.3f ms\n", ttrace->runtime_ms); + printed += thread__dump_stats(ttrace, trace, fp); data->printed += printed; @@ -1953,6 +2040,8 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) OPT_INCR('v', "verbose", &verbose, "be more verbose"), OPT_BOOLEAN('T', "time", &trace.full_time, "Show full timestamp, not time relative to first start"), + OPT_BOOLEAN(0, "summary", &trace.summary, + "Show syscall summary with statistics"), OPT_END() }; int err; @@ -2008,9 +2097,6 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) else err = trace__run(&trace, argc, argv); - if (trace.sched && !err) - trace__fprintf_thread_summary(&trace, trace.output); - out_close: if (output_name != NULL) fclose(trace.output); -- cgit v1.2.3 From fc1b691d7651d9496e912de7e0fc73a5be3294af Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 14 Oct 2013 16:57:29 +0300 Subject: perf buildid-cache: Add ability to add kcore to the cache kcore can be used to view the running kernel object code. However, kcore changes as modules are loaded and unloaded, and when the kernel decides to modify its own code. Consequently it is useful to create a copy of kcore at a particular time. Unlike vmlinux, kcore is not unique for a given build-id. And in addition, the kallsyms and modules files are also needed. The tool therefore creates a directory: ~/.debug/[kernel.kcore]// which contains: kcore, kallsyms and modules. Note that the copied kcore contains only code sections. See the kcore_copy() function for how that is determined. The tool will not make additional copies of kcore if there is already one with the same modules at the same addresses. Currently, perf tools will not look for kcore in the cache. That is addressed in another patch. Signed-off-by: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/525BF849.5030405@intel.com [ renamed 'index' to 'idx' to avoid shadowing string.h symbol in f12, use at least one member initializer when initializing a struct to zeros, also to fix the build on f12 ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-buildid-cache.txt | 13 + tools/perf/builtin-buildid-cache.c | 148 +++++++++- tools/perf/util/symbol-elf.c | 366 ++++++++++++++++++++++++ tools/perf/util/symbol-minimal.c | 6 + tools/perf/util/symbol.c | 41 +++ tools/perf/util/symbol.h | 3 + 6 files changed, 576 insertions(+), 1 deletion(-) (limited to 'tools/perf/Documentation') diff --git a/tools/perf/Documentation/perf-buildid-cache.txt b/tools/perf/Documentation/perf-buildid-cache.txt index e9a8349a7172..fd77d81ea748 100644 --- a/tools/perf/Documentation/perf-buildid-cache.txt +++ b/tools/perf/Documentation/perf-buildid-cache.txt @@ -21,6 +21,19 @@ OPTIONS -a:: --add=:: Add specified file to the cache. +-k:: +--kcore:: + Add specified kcore file to the cache. For the current host that is + /proc/kcore which requires root permissions to read. Be aware that + running 'perf buildid-cache' as root may update root's build-id cache + not the user's. Use the -v option to see where the file is created. + Note that the copied file contains only code sections not the whole core + image. Note also that files "kallsyms" and "modules" must also be in the + same directory and are also copied. All 3 files are created with read + permissions for root only. kcore will not be added if there is already a + kcore in the cache (with the same build-id) that has the same modules at + the same addresses. Use the -v option to see if a copy of kcore is + actually made. -r:: --remove=:: Remove specified file from the cache. diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c index c96c8fa38243..8140b7b249fa 100644 --- a/tools/perf/builtin-buildid-cache.c +++ b/tools/perf/builtin-buildid-cache.c @@ -6,6 +6,11 @@ * Copyright (C) 2010, Red Hat Inc. * Copyright (C) 2010, Arnaldo Carvalho de Melo */ +#include +#include +#include +#include +#include #include "builtin.h" #include "perf.h" #include "util/cache.h" @@ -17,6 +22,140 @@ #include "util/session.h" #include "util/symbol.h" +static int build_id_cache__kcore_buildid(const char *proc_dir, char *sbuildid) +{ + char root_dir[PATH_MAX]; + char notes[PATH_MAX]; + u8 build_id[BUILD_ID_SIZE]; + char *p; + + strlcpy(root_dir, proc_dir, sizeof(root_dir)); + + p = strrchr(root_dir, '/'); + if (!p) + return -1; + *p = '\0'; + + scnprintf(notes, sizeof(notes), "%s/sys/kernel/notes", root_dir); + + if (sysfs__read_build_id(notes, build_id, sizeof(build_id))) + return -1; + + build_id__sprintf(build_id, sizeof(build_id), sbuildid); + + return 0; +} + +static int build_id_cache__kcore_dir(char *dir, size_t sz) +{ + struct timeval tv; + struct tm tm; + char dt[32]; + + if (gettimeofday(&tv, NULL) || !localtime_r(&tv.tv_sec, &tm)) + return -1; + + if (!strftime(dt, sizeof(dt), "%Y%m%d%H%M%S", &tm)) + return -1; + + scnprintf(dir, sz, "%s%02u", dt, (unsigned)tv.tv_usec / 10000); + + return 0; +} + +static int build_id_cache__kcore_existing(const char *from_dir, char *to_dir, + size_t to_dir_sz) +{ + char from[PATH_MAX]; + char to[PATH_MAX]; + struct dirent *dent; + int ret = -1; + DIR *d; + + d = opendir(to_dir); + if (!d) + return -1; + + scnprintf(from, sizeof(from), "%s/modules", from_dir); + + while (1) { + dent = readdir(d); + if (!dent) + break; + if (dent->d_type != DT_DIR) + continue; + scnprintf(to, sizeof(to), "%s/%s/modules", to_dir, + dent->d_name); + if (!compare_proc_modules(from, to)) { + scnprintf(to, sizeof(to), "%s/%s", to_dir, + dent->d_name); + strlcpy(to_dir, to, to_dir_sz); + ret = 0; + break; + } + } + + closedir(d); + + return ret; +} + +static int build_id_cache__add_kcore(const char *filename, const char *debugdir) +{ + char dir[32], sbuildid[BUILD_ID_SIZE * 2 + 1]; + char from_dir[PATH_MAX], to_dir[PATH_MAX]; + char *p; + + strlcpy(from_dir, filename, sizeof(from_dir)); + + p = strrchr(from_dir, '/'); + if (!p || strcmp(p + 1, "kcore")) + return -1; + *p = '\0'; + + if (build_id_cache__kcore_buildid(from_dir, sbuildid)) + return -1; + + scnprintf(to_dir, sizeof(to_dir), "%s/[kernel.kcore]/%s", + debugdir, sbuildid); + + if (!build_id_cache__kcore_existing(from_dir, to_dir, sizeof(to_dir))) { + pr_debug("same kcore found in %s\n", to_dir); + return 0; + } + + if (build_id_cache__kcore_dir(dir, sizeof(dir))) + return -1; + + scnprintf(to_dir, sizeof(to_dir), "%s/[kernel.kcore]/%s/%s", + debugdir, sbuildid, dir); + + if (mkdir_p(to_dir, 0755)) + return -1; + + if (kcore_copy(from_dir, to_dir)) { + /* Remove YYYYmmddHHMMSShh directory */ + if (!rmdir(to_dir)) { + p = strrchr(to_dir, '/'); + if (p) + *p = '\0'; + /* Try to remove buildid directory */ + if (!rmdir(to_dir)) { + p = strrchr(to_dir, '/'); + if (p) + *p = '\0'; + /* Try to remove [kernel.kcore] directory */ + rmdir(to_dir); + } + } + return -1; + } + + pr_debug("kcore added to build-id cache directory %s\n", to_dir); + + return 0; +} + static int build_id_cache__add_file(const char *filename, const char *debugdir) { char sbuild_id[BUILD_ID_SIZE * 2 + 1]; @@ -130,11 +269,14 @@ int cmd_buildid_cache(int argc, const char **argv, char const *add_name_list_str = NULL, *remove_name_list_str = NULL, *missing_filename = NULL, - *update_name_list_str = NULL; + *update_name_list_str = NULL, + *kcore_filename; const struct option buildid_cache_options[] = { OPT_STRING('a', "add", &add_name_list_str, "file list", "file(s) to add"), + OPT_STRING('k', "kcore", &kcore_filename, + "file", "kcore file to add"), OPT_STRING('r', "remove", &remove_name_list_str, "file list", "file(s) to remove"), OPT_STRING('M', "missing", &missing_filename, "file", @@ -217,5 +359,9 @@ int cmd_buildid_cache(int argc, const char **argv, } } + if (kcore_filename && + build_id_cache__add_kcore(kcore_filename, debugdir)) + pr_warning("Couldn't add %s\n", kcore_filename); + return ret; } diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 499c71d30225..d6b8af3ce344 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -1202,6 +1202,372 @@ static off_t kcore__write(struct kcore *kcore) return elf_update(kcore->elf, ELF_C_WRITE); } +struct phdr_data { + off_t offset; + u64 addr; + u64 len; +}; + +struct kcore_copy_info { + u64 stext; + u64 etext; + u64 first_symbol; + u64 last_symbol; + u64 first_module; + u64 last_module_symbol; + struct phdr_data kernel_map; + struct phdr_data modules_map; +}; + +static int kcore_copy__process_kallsyms(void *arg, const char *name, char type, + u64 start) +{ + struct kcore_copy_info *kci = arg; + + if (!symbol_type__is_a(type, MAP__FUNCTION)) + return 0; + + if (strchr(name, '[')) { + if (start > kci->last_module_symbol) + kci->last_module_symbol = start; + return 0; + } + + if (!kci->first_symbol || start < kci->first_symbol) + kci->first_symbol = start; + + if (!kci->last_symbol || start > kci->last_symbol) + kci->last_symbol = start; + + if (!strcmp(name, "_stext")) { + kci->stext = start; + return 0; + } + + if (!strcmp(name, "_etext")) { + kci->etext = start; + return 0; + } + + return 0; +} + +static int kcore_copy__parse_kallsyms(struct kcore_copy_info *kci, + const char *dir) +{ + char kallsyms_filename[PATH_MAX]; + + scnprintf(kallsyms_filename, PATH_MAX, "%s/kallsyms", dir); + + if (symbol__restricted_filename(kallsyms_filename, "/proc/kallsyms")) + return -1; + + if (kallsyms__parse(kallsyms_filename, kci, + kcore_copy__process_kallsyms) < 0) + return -1; + + return 0; +} + +static int kcore_copy__process_modules(void *arg, + const char *name __maybe_unused, + u64 start) +{ + struct kcore_copy_info *kci = arg; + + if (!kci->first_module || start < kci->first_module) + kci->first_module = start; + + return 0; +} + +static int kcore_copy__parse_modules(struct kcore_copy_info *kci, + const char *dir) +{ + char modules_filename[PATH_MAX]; + + scnprintf(modules_filename, PATH_MAX, "%s/modules", dir); + + if (symbol__restricted_filename(modules_filename, "/proc/modules")) + return -1; + + if (modules__parse(modules_filename, kci, + kcore_copy__process_modules) < 0) + return -1; + + return 0; +} + +static void kcore_copy__map(struct phdr_data *p, u64 start, u64 end, u64 pgoff, + u64 s, u64 e) +{ + if (p->addr || s < start || s >= end) + return; + + p->addr = s; + p->offset = (s - start) + pgoff; + p->len = e < end ? e - s : end - s; +} + +static int kcore_copy__read_map(u64 start, u64 len, u64 pgoff, void *data) +{ + struct kcore_copy_info *kci = data; + u64 end = start + len; + + kcore_copy__map(&kci->kernel_map, start, end, pgoff, kci->stext, + kci->etext); + + kcore_copy__map(&kci->modules_map, start, end, pgoff, kci->first_module, + kci->last_module_symbol); + + return 0; +} + +static int kcore_copy__read_maps(struct kcore_copy_info *kci, Elf *elf) +{ + if (elf_read_maps(elf, true, kcore_copy__read_map, kci) < 0) + return -1; + + return 0; +} + +static int kcore_copy__calc_maps(struct kcore_copy_info *kci, const char *dir, + Elf *elf) +{ + if (kcore_copy__parse_kallsyms(kci, dir)) + return -1; + + if (kcore_copy__parse_modules(kci, dir)) + return -1; + + if (kci->stext) + kci->stext = round_down(kci->stext, page_size); + else + kci->stext = round_down(kci->first_symbol, page_size); + + if (kci->etext) { + kci->etext = round_up(kci->etext, page_size); + } else if (kci->last_symbol) { + kci->etext = round_up(kci->last_symbol, page_size); + kci->etext += page_size; + } + + kci->first_module = round_down(kci->first_module, page_size); + + if (kci->last_module_symbol) { + kci->last_module_symbol = round_up(kci->last_module_symbol, + page_size); + kci->last_module_symbol += page_size; + } + + if (!kci->stext || !kci->etext) + return -1; + + if (kci->first_module && !kci->last_module_symbol) + return -1; + + return kcore_copy__read_maps(kci, elf); +} + +static int kcore_copy__copy_file(const char *from_dir, const char *to_dir, + const char *name) +{ + char from_filename[PATH_MAX]; + char to_filename[PATH_MAX]; + + scnprintf(from_filename, PATH_MAX, "%s/%s", from_dir, name); + scnprintf(to_filename, PATH_MAX, "%s/%s", to_dir, name); + + return copyfile_mode(from_filename, to_filename, 0400); +} + +static int kcore_copy__unlink(const char *dir, const char *name) +{ + char filename[PATH_MAX]; + + scnprintf(filename, PATH_MAX, "%s/%s", dir, name); + + return unlink(filename); +} + +static int kcore_copy__compare_fds(int from, int to) +{ + char *buf_from; + char *buf_to; + ssize_t ret; + size_t len; + int err = -1; + + buf_from = malloc(page_size); + buf_to = malloc(page_size); + if (!buf_from || !buf_to) + goto out; + + while (1) { + /* Use read because mmap won't work on proc files */ + ret = read(from, buf_from, page_size); + if (ret < 0) + goto out; + + if (!ret) + break; + + len = ret; + + if (readn(to, buf_to, len) != (int)len) + goto out; + + if (memcmp(buf_from, buf_to, len)) + goto out; + } + + err = 0; +out: + free(buf_to); + free(buf_from); + return err; +} + +static int kcore_copy__compare_files(const char *from_filename, + const char *to_filename) +{ + int from, to, err = -1; + + from = open(from_filename, O_RDONLY); + if (from < 0) + return -1; + + to = open(to_filename, O_RDONLY); + if (to < 0) + goto out_close_from; + + err = kcore_copy__compare_fds(from, to); + + close(to); +out_close_from: + close(from); + return err; +} + +static int kcore_copy__compare_file(const char *from_dir, const char *to_dir, + const char *name) +{ + char from_filename[PATH_MAX]; + char to_filename[PATH_MAX]; + + scnprintf(from_filename, PATH_MAX, "%s/%s", from_dir, name); + scnprintf(to_filename, PATH_MAX, "%s/%s", to_dir, name); + + return kcore_copy__compare_files(from_filename, to_filename); +} + +/** + * kcore_copy - copy kallsyms, modules and kcore from one directory to another. + * @from_dir: from directory + * @to_dir: to directory + * + * This function copies kallsyms, modules and kcore files from one directory to + * another. kallsyms and modules are copied entirely. Only code segments are + * copied from kcore. It is assumed that two segments suffice: one for the + * kernel proper and one for all the modules. The code segments are determined + * from kallsyms and modules files. The kernel map starts at _stext or the + * lowest function symbol, and ends at _etext or the highest function symbol. + * The module map starts at the lowest module address and ends at the highest + * module symbol. Start addresses are rounded down to the nearest page. End + * addresses are rounded up to the nearest page. An extra page is added to the + * highest kernel symbol and highest module symbol to, hopefully, encompass that + * symbol too. Because it contains only code sections, the resulting kcore is + * unusual. One significant peculiarity is that the mapping (start -> pgoff) + * is not the same for the kernel map and the modules map. That happens because + * the data is copied adjacently whereas the original kcore has gaps. Finally, + * kallsyms and modules files are compared with their copies to check that + * modules have not been loaded or unloaded while the copies were taking place. + * + * Return: %0 on success, %-1 on failure. + */ +int kcore_copy(const char *from_dir, const char *to_dir) +{ + struct kcore kcore; + struct kcore extract; + size_t count = 2; + int idx = 0, err = -1; + off_t offset = page_size, sz, modules_offset = 0; + struct kcore_copy_info kci = { .stext = 0, }; + char kcore_filename[PATH_MAX]; + char extract_filename[PATH_MAX]; + + if (kcore_copy__copy_file(from_dir, to_dir, "kallsyms")) + return -1; + + if (kcore_copy__copy_file(from_dir, to_dir, "modules")) + goto out_unlink_kallsyms; + + scnprintf(kcore_filename, PATH_MAX, "%s/kcore", from_dir); + scnprintf(extract_filename, PATH_MAX, "%s/kcore", to_dir); + + if (kcore__open(&kcore, kcore_filename)) + goto out_unlink_modules; + + if (kcore_copy__calc_maps(&kci, from_dir, kcore.elf)) + goto out_kcore_close; + + if (kcore__init(&extract, extract_filename, kcore.elfclass, false)) + goto out_kcore_close; + + if (!kci.modules_map.addr) + count -= 1; + + if (kcore__copy_hdr(&kcore, &extract, count)) + goto out_extract_close; + + if (kcore__add_phdr(&extract, idx++, offset, kci.kernel_map.addr, + kci.kernel_map.len)) + goto out_extract_close; + + if (kci.modules_map.addr) { + modules_offset = offset + kci.kernel_map.len; + if (kcore__add_phdr(&extract, idx, modules_offset, + kci.modules_map.addr, kci.modules_map.len)) + goto out_extract_close; + } + + sz = kcore__write(&extract); + if (sz < 0 || sz > offset) + goto out_extract_close; + + if (copy_bytes(kcore.fd, kci.kernel_map.offset, extract.fd, offset, + kci.kernel_map.len)) + goto out_extract_close; + + if (modules_offset && copy_bytes(kcore.fd, kci.modules_map.offset, + extract.fd, modules_offset, + kci.modules_map.len)) + goto out_extract_close; + + if (kcore_copy__compare_file(from_dir, to_dir, "modules")) + goto out_extract_close; + + if (kcore_copy__compare_file(from_dir, to_dir, "kallsyms")) + goto out_extract_close; + + err = 0; + +out_extract_close: + kcore__close(&extract); + if (err) + unlink(extract_filename); +out_kcore_close: + kcore__close(&kcore); +out_unlink_modules: + if (err) + kcore_copy__unlink(to_dir, "modules"); +out_unlink_kallsyms: + if (err) + kcore_copy__unlink(to_dir, "kallsyms"); + + return err; +} + int kcore_extract__create(struct kcore_extract *kce) { struct kcore kcore; diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c index 928556d2508f..2d2dd0532b5a 100644 --- a/tools/perf/util/symbol-minimal.c +++ b/tools/perf/util/symbol-minimal.c @@ -317,6 +317,12 @@ void kcore_extract__delete(struct kcore_extract *kce __maybe_unused) { } +int kcore_copy(const char *from_dir __maybe_unused, + const char *to_dir __maybe_unused) +{ + return -1; +} + void symbol__elf_init(void) { } diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 76a9e933a7a5..b66c1eefc313 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -893,6 +893,47 @@ static int read_proc_modules(const char *filename, struct rb_root *modules) return 0; } +int compare_proc_modules(const char *from, const char *to) +{ + struct rb_root from_modules = RB_ROOT; + struct rb_root to_modules = RB_ROOT; + struct rb_node *from_node, *to_node; + struct module_info *from_m, *to_m; + int ret = -1; + + if (read_proc_modules(from, &from_modules)) + return -1; + + if (read_proc_modules(to, &to_modules)) + goto out_delete_from; + + from_node = rb_first(&from_modules); + to_node = rb_first(&to_modules); + while (from_node) { + if (!to_node) + break; + + from_m = rb_entry(from_node, struct module_info, rb_node); + to_m = rb_entry(to_node, struct module_info, rb_node); + + if (from_m->start != to_m->start || + strcmp(from_m->name, to_m->name)) + break; + + from_node = rb_next(from_node); + to_node = rb_next(to_node); + } + + if (!from_node && !to_node) + ret = 0; + + delete_modules(&to_modules); +out_delete_from: + delete_modules(&from_modules); + + return ret; +} + static int do_validate_kcore_modules(const char *filename, struct map *map, struct map_groups *kmaps) { diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index fb107e13e925..07de8fea2f48 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -270,4 +270,7 @@ struct kcore_extract { int kcore_extract__create(struct kcore_extract *kce); void kcore_extract__delete(struct kcore_extract *kce); +int kcore_copy(const char *from_dir, const char *to_dir); +int compare_proc_modules(const char *from, const char *to); + #endif /* __PERF_SYMBOL */ -- cgit v1.2.3 From c522739d72a341a3e74a369ce6298b9412813d3f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 27 Sep 2013 18:06:19 -0300 Subject: perf trace: Use vfs_getname hook if available Initially it tries to find a probe:vfs_getname that should be setup with: perf probe 'vfs_getname=getname_flags:65 pathname=result->name:string' or with slight changes to cope with code flux in the getname_flags code. In the future, if a "vfs:getname" tracepoint becomes available, then it will be preferred. This is not strictly required and more expensive method of reading the /proc/pid/fd/ symlink will be used when the fd->path array entry is not populated by a previous vfs_getname + open syscall ret sequence. As with any other 'perf probe' probe the setup must be done just once and the probe will be left inactive, waiting for users, be it 'perf trace' of any other tool. Cc: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Masami Hiramatsu Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-ujg8se8glq5izmu8cdkq15po@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-trace.txt | 4 ++ tools/perf/builtin-trace.c | 83 ++++++++++++++++++++++++++++----- 2 files changed, 75 insertions(+), 12 deletions(-) (limited to 'tools/perf/Documentation') diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt index 54139c6457f8..7b0497f95a75 100644 --- a/tools/perf/Documentation/perf-trace.txt +++ b/tools/perf/Documentation/perf-trace.txt @@ -97,6 +97,10 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs. Show a summary of syscalls by thread with min, max, and average times (in msec) and relative stddev. +--tool_stats:: + Show tool stats such as number of times fd->pathname was discovered thru + hooking the open syscall return + vfs_getname or via reading /proc/pid/fd, etc. + SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-script[1] diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 763bef45afad..86d2b1f2399c 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -951,7 +951,10 @@ fail: struct trace { struct perf_tool tool; - int audit_machine; + struct { + int machine; + int open_id; + } audit; struct { int max; struct syscall *table; @@ -965,14 +968,19 @@ struct trace { struct strlist *ev_qualifier; bool not_ev_qualifier; bool live; + const char *last_vfs_getname; struct intlist *tid_list; struct intlist *pid_list; bool sched; bool multiple_threads; bool summary; bool show_comm; + bool show_tool_stats; double duration_filter; double runtime_ms; + struct { + u64 vfs_getname, proc_getname; + } stats; }; static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname) @@ -1027,7 +1035,8 @@ static int thread__read_fd_path(struct thread *thread, int fd) return trace__set_fd_pathname(thread, fd, pathname); } -static const char *thread__fd_path(struct thread *thread, int fd, bool live) +static const char *thread__fd_path(struct thread *thread, int fd, + struct trace *trace) { struct thread_trace *ttrace = thread->priv; @@ -1037,9 +1046,13 @@ static const char *thread__fd_path(struct thread *thread, int fd, bool live) if (fd < 0) return NULL; - if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL) && - (!live || thread__read_fd_path(thread, fd))) - return NULL; + if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) + if (!trace->live) + return NULL; + ++trace->stats.proc_getname; + if (thread__read_fd_path(thread, fd)) { + return NULL; + } return ttrace->paths.table[fd]; } @@ -1049,7 +1062,7 @@ static size_t syscall_arg__scnprintf_fd(char *bf, size_t size, { int fd = arg->val; size_t printed = scnprintf(bf, size, "%d", fd); - const char *path = thread__fd_path(arg->thread, fd, arg->trace->live); + const char *path = thread__fd_path(arg->thread, fd, arg->trace); if (path) printed += scnprintf(bf + printed, size - printed, "<%s>", path); @@ -1186,7 +1199,7 @@ static int trace__read_syscall_info(struct trace *trace, int id) { char tp_name[128]; struct syscall *sc; - const char *name = audit_syscall_to_name(id, trace->audit_machine); + const char *name = audit_syscall_to_name(id, trace->audit.machine); if (name == NULL) return -1; @@ -1450,6 +1463,12 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, ret = perf_evsel__intval(evsel, sample, "ret"); + if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) { + trace__set_fd_pathname(thread, ret, trace->last_vfs_getname); + trace->last_vfs_getname = NULL; + ++trace->stats.vfs_getname; + } + ttrace = thread->priv; ttrace->exit_time = sample->time; @@ -1494,6 +1513,13 @@ out: return 0; } +static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel, + struct perf_sample *sample) +{ + trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname"); + return 0; +} + static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel, struct perf_sample *sample) { @@ -1616,6 +1642,22 @@ static int trace__record(int argc, const char **argv) static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp); +static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname", + evlist->nr_entries); + if (evsel == NULL) + return; + + if (perf_evsel__field(evsel, "pathname") == NULL) { + perf_evsel__delete(evsel); + return; + } + + evsel->handler.func = trace__vfs_getname; + perf_evlist__add(evlist, evsel); +} + static int trace__run(struct trace *trace, int argc, const char **argv) { struct perf_evlist *evlist = perf_evlist__new(); @@ -1635,6 +1677,8 @@ static int trace__run(struct trace *trace, int argc, const char **argv) perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_exit", trace__sys_exit)) goto out_error_tp; + perf_evlist__add_vfs_getname(evlist); + if (trace->sched && perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime", trace__sched_stat_runtime)) @@ -1741,12 +1785,22 @@ again: if (done) perf_evlist__disable(evlist); - - goto again; + else + goto again; out_unmap_evlist: - if (!err && trace->summary) - trace__fprintf_thread_summary(trace, trace->output); + if (!err) { + if (trace->summary) + trace__fprintf_thread_summary(trace, trace->output); + + if (trace->show_tool_stats) { + fprintf(trace->output, "Stats:\n " + " vfs_getname : %" PRIu64 "\n" + " proc_getname: %" PRIu64 "\n", + trace->stats.vfs_getname, + trace->stats.proc_getname); + } + } perf_evlist__munmap(evlist); out_close_evlist: @@ -1788,6 +1842,7 @@ static int trace__replay(struct trace *trace) const struct perf_evsel_str_handler handlers[] = { { "raw_syscalls:sys_enter", trace__sys_enter, }, { "raw_syscalls:sys_exit", trace__sys_exit, }, + { "probe:vfs_getname", trace__vfs_getname, }, }; struct perf_session *session; @@ -1997,7 +2052,10 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) NULL }; struct trace trace = { - .audit_machine = audit_detect_machine(), + .audit = { + .machine = audit_detect_machine(), + .open_id = audit_name_to_syscall("open", trace.audit.machine), + }, .syscalls = { . max = -1, }, @@ -2019,6 +2077,7 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) const struct option trace_options[] = { OPT_BOOLEAN(0, "comm", &trace.show_comm, "show the thread COMM next to its id"), + OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"), OPT_STRING('e', "expr", &ev_qualifier_str, "expr", "list of events to trace"), OPT_STRING('o', "output", &output_name, "file", "output file name"), -- cgit v1.2.3 From 91e95617429cb272fd908b1928a1915b37b9655f Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Fri, 18 Oct 2013 10:38:48 -0400 Subject: perf report: Add --max-stack option to limit callchain stack scan When callgraph data was included in the perf data file, it may take a long time to scan all those data and merge them together especially if the stored callchains are long and the perf data file itself is large, like a Gbyte or so. The callchain stack is currently limited to PERF_MAX_STACK_DEPTH (127). This is a large value. Usually the callgraph data that developers are most interested in are the first few levels, the rests are usually not looked at. This patch adds a new --max-stack option to perf-report to limit the depth of callchain stack data to look at to reduce the time it takes for perf-report to finish its processing. It trades the presence of trailing stack information with faster speed. The following table shows the elapsed time of doing perf-report on a perf.data file of size 985,531,828 bytes. --max_stack Elapsed Time Output data size ----------- ------------ ---------------- not set 88.0s 124,422,651 64 87.5s 116,303,213 32 87.2s 112,023,804 16 86.6s 94,326,380 8 59.9s 33,697,248 4 40.7s 10,116,637 -g none 27.1s 2,555,810 Signed-off-by: Waiman Long Acked-by: David Ahern Cc: Adrian Hunter Cc: Aswin Chandramouleeswaran Cc: David Ahern Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Scott J Norton Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1382107129-2010-4-git-send-email-Waiman.Long@hp.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-report.txt | 8 ++++++++ tools/perf/builtin-report.c | 22 +++++++++++++++++----- tools/perf/builtin-top.c | 3 ++- tools/perf/util/machine.c | 14 +++++++++----- tools/perf/util/machine.h | 3 ++- tools/perf/util/session.c | 3 ++- 6 files changed, 40 insertions(+), 13 deletions(-) (limited to 'tools/perf/Documentation') diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index be5ad87b6c3d..10a279871251 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -141,6 +141,14 @@ OPTIONS Default: fractal,0.5,callee,function. +--max-stack:: + Set the stack depth limit when parsing the callchain, anything + beyond the specified depth will be ignored. This is a trade-off + between information loss and faster processing especially for + workloads that can have a very long callchain stack. + + Default: 127 + -G:: --inverted:: alias for inverted caller based call graph. diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index fa68a36bc461..81addcabb356 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -49,6 +49,7 @@ struct perf_report { bool show_threads; bool inverted_callchain; bool mem_mode; + int max_stack; struct perf_read_values show_threads_values; const char *pretty_printing_style; const char *cpu_list; @@ -90,7 +91,8 @@ static int perf_report__add_mem_hist_entry(struct perf_tool *tool, if ((sort__has_parent || symbol_conf.use_callchain) && sample->callchain) { err = machine__resolve_callchain(machine, evsel, al->thread, - sample, &parent, al); + sample, &parent, al, + rep->max_stack); if (err) return err; } @@ -181,7 +183,8 @@ static int perf_report__add_branch_hist_entry(struct perf_tool *tool, if ((sort__has_parent || symbol_conf.use_callchain) && sample->callchain) { err = machine__resolve_callchain(machine, evsel, al->thread, - sample, &parent, al); + sample, &parent, al, + rep->max_stack); if (err) return err; } @@ -244,18 +247,21 @@ out: return err; } -static int perf_evsel__add_hist_entry(struct perf_evsel *evsel, +static int perf_evsel__add_hist_entry(struct perf_tool *tool, + struct perf_evsel *evsel, struct addr_location *al, struct perf_sample *sample, struct machine *machine) { + struct perf_report *rep = container_of(tool, struct perf_report, tool); struct symbol *parent = NULL; int err = 0; struct hist_entry *he; if ((sort__has_parent || symbol_conf.use_callchain) && sample->callchain) { err = machine__resolve_callchain(machine, evsel, al->thread, - sample, &parent, al); + sample, &parent, al, + rep->max_stack); if (err) return err; } @@ -332,7 +338,8 @@ static int process_sample_event(struct perf_tool *tool, if (al.map != NULL) al.map->dso->hit = 1; - ret = perf_evsel__add_hist_entry(evsel, &al, sample, machine); + ret = perf_evsel__add_hist_entry(tool, evsel, &al, sample, + machine); if (ret < 0) pr_debug("problem incrementing symbol period, skipping event\n"); } @@ -772,6 +779,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) .ordered_samples = true, .ordering_requires_timestamps = true, }, + .max_stack = PERF_MAX_STACK_DEPTH, .pretty_printing_style = "normal", }; const struct option options[] = { @@ -812,6 +820,10 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) OPT_CALLBACK_DEFAULT('g', "call-graph", &report, "output_type,min_percent[,print_limit],call_order", "Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold, optional print limit, callchain order, key (function or address). " "Default: fractal,0.5,callee,function", &parse_callchain_opt, callchain_default_opt), + OPT_INTEGER(0, "max-stack", &report.max_stack, + "Set the maximum stack depth when parsing the callchain, " + "anything beyond the specified depth will be ignored. " + "Default: " __stringify(PERF_MAX_STACK_DEPTH)), OPT_BOOLEAN('G', "inverted", &report.inverted_callchain, "alias for inverted call graph"), OPT_CALLBACK(0, "ignore-callees", NULL, "regex", diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index d934f707ee74..112cb7d68e64 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -770,7 +770,8 @@ static void perf_event__process_sample(struct perf_tool *tool, sample->callchain) { err = machine__resolve_callchain(machine, evsel, al.thread, sample, - &parent, &al); + &parent, &al, + PERF_MAX_STACK_DEPTH); if (err) return; } diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 6b861aefd99a..ea93425cce95 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1253,10 +1253,12 @@ static int machine__resolve_callchain_sample(struct machine *machine, struct thread *thread, struct ip_callchain *chain, struct symbol **parent, - struct addr_location *root_al) + struct addr_location *root_al, + int max_stack) { u8 cpumode = PERF_RECORD_MISC_USER; - unsigned int i; + int chain_nr = min(max_stack, (int)chain->nr); + int i; int err; callchain_cursor_reset(&callchain_cursor); @@ -1266,7 +1268,7 @@ static int machine__resolve_callchain_sample(struct machine *machine, return 0; } - for (i = 0; i < chain->nr; i++) { + for (i = 0; i < chain_nr; i++) { u64 ip; struct addr_location al; @@ -1338,12 +1340,14 @@ int machine__resolve_callchain(struct machine *machine, struct thread *thread, struct perf_sample *sample, struct symbol **parent, - struct addr_location *root_al) + struct addr_location *root_al, + int max_stack) { int ret; ret = machine__resolve_callchain_sample(machine, thread, - sample->callchain, parent, root_al); + sample->callchain, parent, + root_al, max_stack); if (ret) return ret; diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index d44c09bdc45e..4c1f5d567f54 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -92,7 +92,8 @@ int machine__resolve_callchain(struct machine *machine, struct thread *thread, struct perf_sample *sample, struct symbol **parent, - struct addr_location *root_al); + struct addr_location *root_al, + int max_stack); /* * Default guest kernel is defined by parameter --guestkallsyms diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 19fc71678c8e..854c5aa4db0d 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1512,7 +1512,8 @@ void perf_evsel__print_ip(struct perf_evsel *evsel, union perf_event *event, if (symbol_conf.use_callchain && sample->callchain) { if (machine__resolve_callchain(machine, evsel, al.thread, - sample, NULL, NULL) != 0) { + sample, NULL, NULL, + PERF_MAX_STACK_DEPTH) != 0) { if (verbose) error("Failed to resolve callchain. Skipping\n"); return; -- cgit v1.2.3 From 5dbb6e81d85e55ee2b4cf523c1738e16f63e5400 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Fri, 18 Oct 2013 10:38:49 -0400 Subject: perf top: Add --max-stack option to limit callchain stack scan When the callgraph function is enabled (-G), it may take a long time to scan all the stack data and merge them accordingly. This patch adds a new --max-stack option to perf-top to limit the depth of callchain stack data to look at to reduce the time it takes for perf-top to finish its processing. It reduces the amount of information provided to the user in exchange for faster speed. Signed-off-by: Waiman Long Acked-by: David Ahern Tested-by: Davidlohr Bueso Cc: Adrian Hunter Cc: Aswin Chandramouleeswaran Cc: David Ahern Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Scott J Norton Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1382107129-2010-5-git-send-email-Waiman.Long@hp.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-top.txt | 8 ++++++++ tools/perf/builtin-top.c | 8 ++++++-- tools/perf/util/top.h | 1 + 3 files changed, 15 insertions(+), 2 deletions(-) (limited to 'tools/perf/Documentation') diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index f65777c1f723..c16a09e2f182 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt @@ -158,6 +158,14 @@ Default is to monitor all CPUS. Default: fractal,0.5,callee. +--max-stack:: + Set the stack depth limit when parsing the callchain, anything + beyond the specified depth will be ignored. This is a trade-off + between information loss and faster processing especially for + workloads that can have a very long callchain stack. + + Default: 127 + --ignore-callees=:: Ignore callees of the function(s) matching the given regex. This has the effect of collecting the callers of each such diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 112cb7d68e64..386d83324a8d 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -771,7 +771,7 @@ static void perf_event__process_sample(struct perf_tool *tool, err = machine__resolve_callchain(machine, evsel, al.thread, sample, &parent, &al, - PERF_MAX_STACK_DEPTH); + top->max_stack); if (err) return; } @@ -1048,10 +1048,11 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) .user_freq = UINT_MAX, .user_interval = ULLONG_MAX, .freq = 4000, /* 4 KHz */ - .target = { + .target = { .uses_mmap = true, }, }, + .max_stack = PERF_MAX_STACK_DEPTH, .sym_pcnt_filter = 5, }; struct perf_record_opts *opts = &top.record_opts; @@ -1110,6 +1111,9 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) OPT_CALLBACK_DEFAULT('G', "call-graph", &top.record_opts, "mode[,dump_size]", record_callchain_help, &parse_callchain_opt, "fp"), + OPT_INTEGER(0, "max-stack", &top.max_stack, + "Set the maximum stack depth when parsing the callchain. " + "Default: " __stringify(PERF_MAX_STACK_DEPTH)), OPT_CALLBACK(0, "ignore-callees", NULL, "regex", "ignore callees of these functions in call graphs", report_parse_ignore_callees_opt), diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h index b554ffc462b6..88cfeaff600b 100644 --- a/tools/perf/util/top.h +++ b/tools/perf/util/top.h @@ -24,6 +24,7 @@ struct perf_top { u64 exact_samples; u64 guest_us_samples, guest_kernel_samples; int print_entries, count_filter, delay_secs; + int max_stack; bool hide_kernel_symbols, hide_user_symbols, zero; bool use_tui, use_stdio; bool kptr_restrict_warned; -- cgit v1.2.3