From cebf7d51a6c3babc4d0589da7aec0de1af0a5691 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Wed, 25 Sep 2019 09:14:46 +0800 Subject: perf diff: Report noisy for cycles diff MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch prints the stddev and hist for the cycles diff of program block. It can help us to understand if the cycles is noisy or not. This patch is inspired by Andi Kleen's patch: https://lwn.net/Articles/600471/ We create new option '--cycles-hist'. Example: perf record -b ./div perf record -b ./div perf diff -c cycles # Baseline [Program Block Range] Cycles Diff Shared Object Symbol # ........ .......................................................... .... ................. ............................ # 46.72% [div.c:40 -> div.c:40] 0 div [.] main 46.72% [div.c:42 -> div.c:44] 0 div [.] main 46.72% [div.c:42 -> div.c:39] 0 div [.] main 20.54% [random_r.c:357 -> random_r.c:394] 1 libc-2.27.so [.] __random_r 20.54% [random_r.c:357 -> random_r.c:380] 0 libc-2.27.so [.] __random_r 20.54% [random_r.c:388 -> random_r.c:388] 0 libc-2.27.so [.] __random_r 20.54% [random_r.c:388 -> random_r.c:391] 0 libc-2.27.so [.] __random_r 17.04% [random.c:288 -> random.c:291] 0 libc-2.27.so [.] __random 17.04% [random.c:291 -> random.c:291] 0 libc-2.27.so [.] __random 17.04% [random.c:293 -> random.c:293] 0 libc-2.27.so [.] __random 17.04% [random.c:295 -> random.c:295] 0 libc-2.27.so [.] __random 17.04% [random.c:295 -> random.c:295] 0 libc-2.27.so [.] __random 17.04% [random.c:298 -> random.c:298] 0 libc-2.27.so [.] __random 8.40% [div.c:22 -> div.c:25] 0 div [.] compute_flag 8.40% [div.c:27 -> div.c:28] 0 div [.] compute_flag 5.14% [rand.c:26 -> rand.c:27] 0 libc-2.27.so [.] rand 5.14% [rand.c:28 -> rand.c:28] 0 libc-2.27.so [.] rand 2.15% [rand@plt+0 -> rand@plt+0] 0 div [.] rand@plt 0.00% [kernel.kallsyms] [k] __x86_indirect_thunk_rax 0.00% [do_mmap+714 -> do_mmap+732] -10 [kernel.kallsyms] [k] do_mmap 0.00% [do_mmap+737 -> do_mmap+765] 1 [kernel.kallsyms] [k] do_mmap 0.00% [do_mmap+262 -> do_mmap+299] 0 [kernel.kallsyms] [k] do_mmap 0.00% [__x86_indirect_thunk_r15+0 -> __x86_indirect_thunk_r15+0] 7 [kernel.kallsyms] [k] __x86_indirect_thunk_r15 0.00% [native_sched_clock+0 -> native_sched_clock+119] -1 [kernel.kallsyms] [k] native_sched_clock 0.00% [native_write_msr+0 -> native_write_msr+16] -13 [kernel.kallsyms] [k] native_write_msr When we enable the option '--cycles-hist', the output is perf diff -c cycles --cycles-hist # Baseline [Program Block Range] Cycles Diff stddev/Hist Shared Object Symbol # ........ .......................................................... .... ................. ................. ............................ # 46.72% [div.c:40 -> div.c:40] 0 ± 37.8% ▁█▁▁██▁█ div [.] main 46.72% [div.c:42 -> div.c:44] 0 ± 49.4% ▁▁▂█▂▂▂▂ div [.] main 46.72% [div.c:42 -> div.c:39] 0 ± 24.1% ▃█▂▄▁▃▂▁ div [.] main 20.54% [random_r.c:357 -> random_r.c:394] 1 ± 33.5% ▅▂▁█▃▁▂▁ libc-2.27.so [.] __random_r 20.54% [random_r.c:357 -> random_r.c:380] 0 ± 39.4% ▁▁█▁██▅▁ libc-2.27.so [.] __random_r 20.54% [random_r.c:388 -> random_r.c:388] 0 libc-2.27.so [.] __random_r 20.54% [random_r.c:388 -> random_r.c:391] 0 ± 41.2% ▁▃▁▂█▄▃▁ libc-2.27.so [.] __random_r 17.04% [random.c:288 -> random.c:291] 0 ± 48.8% ▁▁▁▁███▁ libc-2.27.so [.] __random 17.04% [random.c:291 -> random.c:291] 0 ±100.0% ▁█▁▁▁▁▁▁ libc-2.27.so [.] __random 17.04% [random.c:293 -> random.c:293] 0 ±100.0% ▁█▁▁▁▁▁▁ libc-2.27.so [.] __random 17.04% [random.c:295 -> random.c:295] 0 ±100.0% ▁█▁▁▁▁▁▁ libc-2.27.so [.] __random 17.04% [random.c:295 -> random.c:295] 0 libc-2.27.so [.] __random 17.04% [random.c:298 -> random.c:298] 0 ± 75.6% ▃█▁▁▁▁▁▁ libc-2.27.so [.] __random 8.40% [div.c:22 -> div.c:25] 0 ± 42.1% ▁▃▁▁███▁ div [.] compute_flag 8.40% [div.c:27 -> div.c:28] 0 ± 41.8% ██▁▁▄▁▁▄ div [.] compute_flag 5.14% [rand.c:26 -> rand.c:27] 0 ± 37.8% ▁▁▁████▁ libc-2.27.so [.] rand 5.14% [rand.c:28 -> rand.c:28] 0 libc-2.27.so [.] rand 2.15% [rand@plt+0 -> rand@plt+0] 0 div [.] rand@plt 0.00% [kernel.kallsyms] [k] __x86_indirect_thunk_rax 0.00% [do_mmap+714 -> do_mmap+732] -10 [kernel.kallsyms] [k] do_mmap 0.00% [do_mmap+737 -> do_mmap+765] 1 [kernel.kallsyms] [k] do_mmap 0.00% [do_mmap+262 -> do_mmap+299] 0 [kernel.kallsyms] [k] do_mmap 0.00% [__x86_indirect_thunk_r15+0 -> __x86_indirect_thunk_r15+0] 7 [kernel.kallsyms] [k] __x86_indirect_thunk_r15 0.00% [native_sched_clock+0 -> native_sched_clock+119] -1 ± 38.5% ▄█▁ [kernel.kallsyms] [k] native_sched_clock 0.00% [native_write_msr+0 -> native_write_msr+16] -13 ± 47.1% ▁█▇▃▁▁ [kernel.kallsyms] [k] native_write_msr v8: --- Rebase to perf/core branch v7: --- 1. v6 got Jiri's ACK. 2. Rebase to latest perf/core branch. v6: --- 1. Jiri provides better code for using data__hpp_register() in ui_init(). Use this code in v6. v5: --- 1. Refine the use of data__hpp_register() in ui_init() according to Jiri's suggestion. v4: --- 1. Rename the new option from '--noisy' to '--cycles-hist' 2. Remove the option '-n'. 3. Only update the spark value and stats when '--cycles-hist' is enabled. 4. Remove the code of printing '..'. v3: --- 1. Move the histogram to a separate column 2. Move the svals[] out of struct stats v2: --- Jiri got a compile error, CC builtin-diff.o builtin-diff.c: In function ‘compute_cycles_diff’: builtin-diff.c:712:10: error: taking the absolute value of unsigned type ‘u64’ {aka ‘long unsigned int’} has no effect [-Werror=absolute-value] 712 | labs(pair->block_info->cycles_spark[i] - | ^~~~ Because the result of u64 - u64 is still u64. Now we change the type of cycles_spark[] to s64. Signed-off-by: Jin Yao Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20190925011446.30678-1-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-diff.c | 143 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 143 insertions(+) (limited to 'tools/perf/builtin-diff.c') diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index c37a78677955..5281629c27b1 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -23,6 +23,7 @@ #include "util/time-utils.h" #include "util/annotate.h" #include "util/map.h" +#include "util/spark.h" #include #include #include @@ -53,6 +54,7 @@ enum { PERF_HPP_DIFF__FORMULA, PERF_HPP_DIFF__DELTA_ABS, PERF_HPP_DIFF__CYCLES, + PERF_HPP_DIFF__CYCLES_HIST, PERF_HPP_DIFF__MAX_INDEX }; @@ -87,6 +89,7 @@ static bool force; static bool show_period; static bool show_formula; static bool show_baseline_only; +static bool cycles_hist; static unsigned int sort_compute = 1; static s64 compute_wdiff_w1; @@ -164,6 +167,10 @@ static struct header_column { [PERF_HPP_DIFF__CYCLES] = { .name = "[Program Block Range] Cycles Diff", .width = 70, + }, + [PERF_HPP_DIFF__CYCLES_HIST] = { + .name = "stddev/Hist", + .width = NUM_SPARKS + 9, } }; @@ -610,6 +617,9 @@ static void init_block_info(struct block_info *bi, struct symbol *sym, bi->cycles_aggr = ch->cycles_aggr; bi->num = ch->num; bi->num_aggr = ch->num_aggr; + + memcpy(bi->cycles_spark, ch->cycles_spark, + NUM_SPARKS * sizeof(u64)); } static int process_block_per_sym(struct hist_entry *he) @@ -689,6 +699,21 @@ static struct hist_entry *get_block_pair(struct hist_entry *he, return NULL; } +static void init_spark_values(unsigned long *svals, int num) +{ + for (int i = 0; i < num; i++) + svals[i] = 0; +} + +static void update_spark_value(unsigned long *svals, int num, + struct stats *stats, u64 val) +{ + int n = stats->n; + + if (n < num) + svals[n] = val; +} + static void compute_cycles_diff(struct hist_entry *he, struct hist_entry *pair) { @@ -697,6 +722,26 @@ static void compute_cycles_diff(struct hist_entry *he, pair->diff.cycles = pair->block_info->cycles_aggr / pair->block_info->num_aggr - he->block_info->cycles_aggr / he->block_info->num_aggr; + + if (!cycles_hist) + return; + + init_stats(&pair->diff.stats); + init_spark_values(pair->diff.svals, NUM_SPARKS); + + for (int i = 0; i < pair->block_info->num; i++) { + u64 val; + + if (i >= he->block_info->num || i >= NUM_SPARKS) + break; + + val = labs(pair->block_info->cycles_spark[i] - + he->block_info->cycles_spark[i]); + + update_spark_value(pair->diff.svals, NUM_SPARKS, + &pair->diff.stats, val); + update_stats(&pair->diff.stats, val); + } } } @@ -1255,6 +1300,9 @@ static const struct option options[] = { "Show period values."), OPT_BOOLEAN('F', "formula", &show_formula, "Show formula."), + OPT_BOOLEAN(0, "cycles-hist", &cycles_hist, + "Show cycles histogram and standard deviation " + "- WARNING: use only with -c cycles."), OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"), OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), @@ -1462,6 +1510,90 @@ static int hpp__color_cycles(struct perf_hpp_fmt *fmt, return __hpp__color_compare(fmt, hpp, he, COMPUTE_CYCLES); } +static int all_zero(unsigned long *vals, int len) +{ + int i; + + for (i = 0; i < len; i++) + if (vals[i] != 0) + return 0; + return 1; +} + +static int print_cycles_spark(char *bf, int size, unsigned long *svals, u64 n) +{ + int printed; + + if (n <= 1) + return 0; + + if (n > NUM_SPARKS) + n = NUM_SPARKS; + if (all_zero(svals, n)) + return 0; + + printed = print_spark(bf, size, svals, n); + printed += scnprintf(bf + printed, size - printed, " "); + return printed; +} + +static int hpp__color_cycles_hist(struct perf_hpp_fmt *fmt, + struct perf_hpp *hpp, struct hist_entry *he) +{ + struct diff_hpp_fmt *dfmt = + container_of(fmt, struct diff_hpp_fmt, fmt); + struct hist_entry *pair = get_pair_fmt(he, dfmt); + struct block_hist *bh = container_of(he, struct block_hist, he); + struct block_hist *bh_pair; + struct hist_entry *block_he; + char spark[32], buf[128]; + double r; + int ret, pad; + + if (!pair) { + if (bh->block_idx) + hpp->skip = true; + + goto no_print; + } + + bh_pair = container_of(pair, struct block_hist, he); + + block_he = hists__get_entry(&bh_pair->block_hists, bh->block_idx); + if (!block_he) { + hpp->skip = true; + goto no_print; + } + + ret = print_cycles_spark(spark, sizeof(spark), block_he->diff.svals, + block_he->diff.stats.n); + + r = rel_stddev_stats(stddev_stats(&block_he->diff.stats), + avg_stats(&block_he->diff.stats)); + + if (ret) { + /* + * Padding spaces if number of sparks less than NUM_SPARKS + * otherwise the output is not aligned. + */ + pad = NUM_SPARKS - ((ret - 1) / 3); + scnprintf(buf, sizeof(buf), "%s%5.1f%% %s", "\u00B1", r, spark); + ret = scnprintf(hpp->buf, hpp->size, "%*s", + dfmt->header_width, buf); + + if (pad) { + ret += scnprintf(hpp->buf + ret, hpp->size - ret, + "%-*s", pad, " "); + } + + return ret; + } + +no_print: + return scnprintf(hpp->buf, hpp->size, "%*s", + dfmt->header_width, " "); +} + static void hpp__entry_unpair(struct hist_entry *he, int idx, char *buf, size_t size) { @@ -1667,6 +1799,10 @@ static void data__hpp_register(struct data__file *d, int idx) fmt->color = hpp__color_cycles; fmt->sort = hist_entry__cmp_nop; break; + case PERF_HPP_DIFF__CYCLES_HIST: + fmt->color = hpp__color_cycles_hist; + fmt->sort = hist_entry__cmp_nop; + break; default: fmt->sort = hist_entry__cmp_nop; break; @@ -1692,10 +1828,14 @@ static int ui_init(void) * PERF_HPP_DIFF__DELTA * PERF_HPP_DIFF__RATIO * PERF_HPP_DIFF__WEIGHTED_DIFF + * PERF_HPP_DIFF__CYCLES */ data__hpp_register(d, i ? compute_2_hpp[compute] : PERF_HPP_DIFF__BASELINE); + if (cycles_hist && i) + data__hpp_register(d, PERF_HPP_DIFF__CYCLES_HIST); + /* * And the rest: * @@ -1850,6 +1990,9 @@ int cmd_diff(int argc, const char **argv) if (quiet) perf_quiet_option(); + if (cycles_hist && (compute != COMPUTE_CYCLES)) + usage_with_options(diff_usage, options); + symbol__annotation_init(); if (symbol__init(NULL) < 0) -- cgit v1.2.3