From ab81f3fd350c510730adb1ca40ef55c2b2952121 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 5 Oct 2011 19:16:15 -0300 Subject: perf top: Reuse the 'report' hist_entry/hists classes This actually fixes several problems we had in the old 'perf top': 1. Unresolved symbols not show, limitation that came from the old "KernelTop" codebase, to solve it we would need to do changes that would make sym_entry have most of the hist_entry fields. 2. It was using the number of samples, not the sum of sample->period. And brings the --sort code that allows us to have all the views in 'perf report', for instance: [root@emilia ~]# perf top --sort dso PerfTop: 5903 irqs/sec kernel:77.5% exact: 0.0% [1000Hz cycles], (all, 8 CPUs) ------------------------------------------------------------------------------ 31.59% libcrypto.so.1.0.0 21.55% [kernel] 18.57% libpython2.6.so.1.0 7.04% libc-2.12.so 6.99% _backend_agg.so 4.72% sshd 1.48% multiarray.so 1.39% libfreetype.so.6.3.22 1.37% perf 0.71% libgobject-2.0.so.0.2200.5 0.53% [tg3] 0.48% libglib-2.0.so.0.2200.5 0.44% libstdc++.so.6.0.13 0.40% libcairo.so.2.10800.8 0.38% libm-2.12.so 0.34% umath.so 0.30% libgdk-x11-2.0.so.0.1800.9 0.22% libpthread-2.12.so 0.20% libgtk-x11-2.0.so.0.1800.9 0.20% librt-2.12.so 0.15% _path.so 0.13% libpango-1.0.so.0.2800.1 0.11% libatlas.so.3.0 0.09% ft2font.so 0.09% libpangoft2-1.0.so.0.2800.1 0.08% libX11.so.6.3.0 0.07% [vdso] 0.06% cyclictest ^C All the filter lists can be used as well: --dsos, --comms, --symbols, etc. The 'perf report' TUI is also reused, being possible to apply all the zoom operations, do annotation, etc. This change will allow multiple simplifications in the symbol system as well, that will be detailed in upcoming changesets. Cc: David Ahern Cc: Frederic Weisbecker Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-xzaaldxq7zhqrrxdxjifk1mh@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-top.txt | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) (limited to 'tools/perf/Documentation/perf-top.txt') diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index f6eb1cdafb77..d146ba33d691 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt @@ -106,6 +106,26 @@ Default is to monitor all CPUS. --zero:: Zero history across display updates. +-s:: +--sort:: + Sort by key(s): pid, comm, dso, symbol, parent + +-n:: +--show-nr-samples:: + Show a column with the number of samples. + +--show-total-period:: + Show a column with the sum of periods. + +--dsos:: + Only consider symbols in these dsos. + +--comms:: + Only consider symbols in these comms. + +--symbols:: + Only consider these symbols. + INTERACTIVE PROMPTING KEYS -------------------------- @@ -130,9 +150,6 @@ INTERACTIVE PROMPTING KEYS [S]:: Stop annotation, return to full profile display. -[w]:: - Toggle between weighted sum and individual count[E]r profile. - [z]:: Toggle event count zeroing across display updates. -- cgit v1.2.3 From 19d4ac3c1039fb952f4c073fd0898e9295a836c8 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 5 Oct 2011 19:30:22 -0300 Subject: perf top: Add callgraph support Just like in 'perf report', but live. Still needs to decay the callchains, but already somewhat useful as-is. Cc: David Ahern Cc: Frederic Weisbecker Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-cj3rmaf5jpsvi3v0tf7t4uvp@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-top.txt | 15 ++++ tools/perf/builtin-top.c | 146 +++++++++++++++++++++++++++++++++- 2 files changed, 159 insertions(+), 2 deletions(-) (limited to 'tools/perf/Documentation/perf-top.txt') diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index d146ba33d691..e2e5cd0023cd 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt @@ -126,6 +126,21 @@ Default is to monitor all CPUS. --symbols:: Only consider these symbols. +-G [type,min,order]:: +--call-graph:: + Display call chains using type, min percent threshold and order. + type can be either: + - flat: single column, linear exposure of call chains. + - graph: use a graph tree, displaying absolute overhead rates. + - fractal: like graph, but displays relative rates. Each branch of + the tree is considered as a new profiled object. + + order can be either: + - callee: callee based call graph. + - caller: inverted caller based call graph. + + Default: fractal,0.5,callee. + INTERACTIVE PROMPTING KEYS -------------------------- diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 2cf5e50a6997..b9b7fe085895 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -76,6 +76,12 @@ static bool system_wide = false; static bool use_tui, use_stdio; +static bool sort_has_symbols; + +static bool dont_use_callchains; +static char callchain_default_opt[] = "fractal,0.5,callee"; + + static int default_interval = 0; static bool kptr_restrict_warned; @@ -648,9 +654,11 @@ static void perf_event__process_sample(const union perf_event *event, struct perf_sample *sample, struct perf_session *session) { + struct symbol *parent = NULL; u64 ip = event->ip.ip; struct addr_location al; struct machine *machine; + int err; u8 origin = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; ++top.samples; @@ -748,13 +756,29 @@ static void perf_event__process_sample(const union perf_event *event, evsel = perf_evlist__id2evsel(top.evlist, sample->id); assert(evsel != NULL); + if ((sort__has_parent || symbol_conf.use_callchain) && + sample->callchain) { + err = perf_session__resolve_callchain(session, al.thread, + sample->callchain, &parent); + if (err) + return; + } + he = perf_session__add_hist_entry(session, &al, sample, evsel); if (he == NULL) { pr_err("Problem incrementing symbol period, skipping event\n"); return; } - record_precise_ip(he, evsel->idx, ip); + if (symbol_conf.use_callchain) { + err = callchain_append(he->callchain, &session->callchain_cursor, + sample->period); + if (err) + return; + } + + if (sort_has_symbols) + record_precise_ip(he, evsel->idx, ip); } return; @@ -808,6 +832,9 @@ static void start_counters(struct perf_evlist *evlist) attr->read_format |= PERF_FORMAT_ID; } + if (symbol_conf.use_callchain) + attr->sample_type |= PERF_SAMPLE_CALLCHAIN; + attr->mmap = 1; attr->comm = 1; attr->inherit = inherit; @@ -864,10 +891,27 @@ out_err: exit(0); } +static int setup_sample_type(void) +{ + if (!sort_has_symbols) { + if (symbol_conf.use_callchain) { + ui__warning("Selected -g but \"sym\" not present in --sort/-s."); + return -EINVAL; + } + } else if (!dont_use_callchains && callchain_param.mode != CHAIN_NONE) { + if (callchain_register_param(&callchain_param) < 0) { + ui__warning("Can't register callchain params.\n"); + return -EINVAL; + } + } + + return 0; +} + static int __cmd_top(void) { pthread_t thread; - int ret __used; + int ret; /* * FIXME: perf_session__new should allow passing a O_MMAP, so that all this * mmap reading, etc is encapsulated in it. Use O_WRONLY for now. @@ -876,6 +920,10 @@ static int __cmd_top(void) if (top.session == NULL) return -ENOMEM; + ret = setup_sample_type(); + if (ret) + goto out_delete; + if (top.target_tid != -1) perf_event__synthesize_thread_map(top.evlist->threads, perf_event__process, top.session); @@ -916,6 +964,90 @@ static int __cmd_top(void) ret = poll(top.evlist->pollfd, top.evlist->nr_fds, 100); } +out_delete: + perf_session__delete(top.session); + top.session = NULL; + + return 0; +} + +static int +parse_callchain_opt(const struct option *opt __used, const char *arg, + int unset) +{ + char *tok, *tok2; + char *endptr; + + /* + * --no-call-graph + */ + if (unset) { + dont_use_callchains = true; + return 0; + } + + symbol_conf.use_callchain = true; + + if (!arg) + return 0; + + tok = strtok((char *)arg, ","); + if (!tok) + return -1; + + /* get the output mode */ + if (!strncmp(tok, "graph", strlen(arg))) + callchain_param.mode = CHAIN_GRAPH_ABS; + + else if (!strncmp(tok, "flat", strlen(arg))) + callchain_param.mode = CHAIN_FLAT; + + else if (!strncmp(tok, "fractal", strlen(arg))) + callchain_param.mode = CHAIN_GRAPH_REL; + + else if (!strncmp(tok, "none", strlen(arg))) { + callchain_param.mode = CHAIN_NONE; + symbol_conf.use_callchain = false; + + return 0; + } + + else + return -1; + + /* get the min percentage */ + tok = strtok(NULL, ","); + if (!tok) + goto setup; + + callchain_param.min_percent = strtod(tok, &endptr); + if (tok == endptr) + return -1; + + /* get the print limit */ + tok2 = strtok(NULL, ","); + if (!tok2) + goto setup; + + if (tok2[0] != 'c') { + callchain_param.print_limit = strtod(tok2, &endptr); + tok2 = strtok(NULL, ","); + if (!tok2) + goto setup; + } + + /* get the call chain order */ + if (!strcmp(tok2, "caller")) + callchain_param.order = ORDER_CALLER; + else if (!strcmp(tok2, "callee")) + callchain_param.order = ORDER_CALLEE; + else + return -1; +setup: + if (callchain_register_param(&callchain_param) < 0) { + fprintf(stderr, "Can't register callchain params\n"); + return -1; + } return 0; } @@ -973,6 +1105,10 @@ static const struct option options[] = { "sort by key(s): pid, comm, dso, symbol, parent"), OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples, "Show a column with the number of samples"), + OPT_CALLBACK_DEFAULT('G', "call-graph", NULL, "output_type,min_percent, call_order", + "Display callchains using output_type (graph, flat, fractal, or none), min percent threshold and callchain order. " + "Default: fractal,0.5,callee", &parse_callchain_opt, + callchain_default_opt), OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period, "Show a column with the sum of periods"), OPT_STRING(0, "dsos", &symbol_conf.dso_list_str, "dso[,dso...]", @@ -1082,6 +1218,12 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) sort_entry__setup_elide(&sort_comm, symbol_conf.comm_list, "comm", stdout); sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, "symbol", stdout); + /* + * Avoid annotation data structures overhead when symbols aren't on the + * sort list. + */ + sort_has_symbols = sort_sym.list.next != NULL; + get_term_dimensions(&winsize); if (top.print_entries == 0) { update_print_entries(&winsize); -- cgit v1.2.3 From 64c6f0c7f8db449e05ee16e35a7083df69addd1d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 6 Oct 2011 12:48:31 -0300 Subject: perf tools: Make --no-asm-raw the default And add the annotation output knobs to all the tools that have integrated annotation (top, report). Cc: David Ahern Cc: Frederic Weisbecker Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-gnlob67mke6sji2kf4nstp7m@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-annotate.txt | 3 +-- tools/perf/Documentation/perf-report.txt | 9 ++++++++- tools/perf/Documentation/perf-top.txt | 10 ++++++++++ tools/perf/builtin-annotate.c | 4 ++-- tools/perf/builtin-report.c | 4 ++++ tools/perf/builtin-top.c | 6 ++++++ tools/perf/util/symbol.c | 1 - 7 files changed, 31 insertions(+), 6 deletions(-) (limited to 'tools/perf/Documentation/perf-top.txt') diff --git a/tools/perf/Documentation/perf-annotate.txt b/tools/perf/Documentation/perf-annotate.txt index 0102d83600db..fe6762ed56bd 100644 --- a/tools/perf/Documentation/perf-annotate.txt +++ b/tools/perf/Documentation/perf-annotate.txt @@ -73,8 +73,7 @@ OPTIONS CPUs. --asm-raw:: - Show raw instruction encoding of assembly instructions. They - are displayed by default, disable with --no-asm-raw. + Show raw instruction encoding of assembly instructions. --source:: Interleave source code with assembly code. Enabled by default, diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 4ed17072ea88..212f24d672e1 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -137,6 +137,13 @@ OPTIONS -M:: --disassembler-style=:: Set disassembler style for objdump. +--source:: + Interleave source code with assembly code. Enabled by default, + disable with --no-source. + +--asm-raw:: + Show raw instruction encoding of assembly instructions. + --show-total-period:: Show a column with the sum of periods. -I:: @@ -147,4 +154,4 @@ OPTIONS SEE ALSO -------- -linkperf:perf-stat[1] +linkperf:perf-stat[1], linkperf:perf-annotate[1] diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index e2e5cd0023cd..b1a5bbbfebef 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt @@ -126,6 +126,16 @@ Default is to monitor all CPUS. --symbols:: Only consider these symbols. +-M:: +--disassembler-style=:: Set disassembler style for objdump. + +--source:: + Interleave source code with assembly code. Enabled by default, + disable with --no-source. + +--asm-raw:: + Show raw instruction encoding of assembly instructions. + -G [type,min,order]:: --call-graph:: Display call chains using type, min percent threshold and order. diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 24db9d2db7b4..3ea764a78053 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -272,9 +272,9 @@ static const struct option options[] = { OPT_STRING('c', "cpu", &cpu_list, "cpu", "list of cpus to profile"), OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory", "Look for files with symbols relative to this directory"), - OPT_BOOLEAN('0', "source", &symbol_conf.annotate_src, + OPT_BOOLEAN(0, "source", &symbol_conf.annotate_src, "Interleave source code with assembly code (default)"), - OPT_BOOLEAN('0', "asm-raw", &symbol_conf.annotate_asm_raw, + OPT_BOOLEAN(0, "asm-raw", &symbol_conf.annotate_asm_raw, "Display raw encoding of assembly instructions (default)"), OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style", "Specify disassembler style (e.g. -M intel for intel syntax)"), diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 66fe822b181d..4d7c8340c326 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -491,6 +491,10 @@ static const struct option options[] = { OPT_STRING('c', "cpu", &cpu_list, "cpu", "list of cpus to profile"), OPT_BOOLEAN('I', "show-info", &show_full_info, "Display extended information about perf.data file"), + OPT_BOOLEAN(0, "source", &symbol_conf.annotate_src, + "Interleave source code with assembly code (default)"), + OPT_BOOLEAN(0, "asm-raw", &symbol_conf.annotate_asm_raw, + "Display raw encoding of assembly instructions (default)"), OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style", "Specify disassembler style (e.g. -M intel for intel syntax)"), OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period, diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index cc877bc83ebd..c5aebf6eb746 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1117,6 +1117,12 @@ static const struct option options[] = { "only consider symbols in these comms"), OPT_STRING(0, "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]", "only consider these symbols"), + OPT_BOOLEAN(0, "source", &symbol_conf.annotate_src, + "Interleave source code with assembly code (default)"), + OPT_BOOLEAN(0, "asm-raw", &symbol_conf.annotate_asm_raw, + "Display raw encoding of assembly instructions (default)"), + OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style", + "Specify disassembler style (e.g. -M intel for intel syntax)"), OPT_END() }; diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 077df15ee705..3f09a23f71b4 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -46,7 +46,6 @@ struct symbol_conf symbol_conf = { .exclude_other = true, .use_modules = true, .try_vmlinux_path = true, - .annotate_asm_raw = true, .annotate_src = true, .symfs = "", }; -- cgit v1.2.3