From f01642e4912bb80a01d693f4cc6fb0897207a090 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Wed, 28 Aug 2019 13:59:32 +0800 Subject: perf metricgroup: Support multiple events for metricgroup Some uncore metrics don't work as expected. For example, on cascadelakex: root@lkp-csl-2sp2:~# perf stat -M UNC_M_PMM_BANDWIDTH.TOTAL -a -- sleep 1 Performance counter stats for 'system wide': 1841092 unc_m_pmm_rpq_inserts 3680816 unc_m_pmm_wpq_inserts 1.001775055 seconds time elapsed root@lkp-csl-2sp2:~# perf stat -M UNC_M_PMM_READ_LATENCY -a -- sleep 1 Performance counter stats for 'system wide': 860649746 unc_m_pmm_rpq_occupancy.all 1840557 unc_m_pmm_rpq_inserts 12790627455 unc_m_clockticks 1.001773348 seconds time elapsed No metrics 'UNC_M_PMM_BANDWIDTH.TOTAL' or 'UNC_M_PMM_READ_LATENCY' are reported. The issue is, the case of an alias expanding to mulitple events is not supported, typically the uncore events. (see comments in find_evsel_group()). For UNC_M_PMM_BANDWIDTH.TOTAL in above example, the expanded event group is '{unc_m_pmm_rpq_inserts,unc_m_pmm_wpq_inserts}:W', but the actual events passed to find_evsel_group are: unc_m_pmm_rpq_inserts unc_m_pmm_rpq_inserts unc_m_pmm_rpq_inserts unc_m_pmm_rpq_inserts unc_m_pmm_rpq_inserts unc_m_pmm_rpq_inserts unc_m_pmm_wpq_inserts unc_m_pmm_wpq_inserts unc_m_pmm_wpq_inserts unc_m_pmm_wpq_inserts unc_m_pmm_wpq_inserts unc_m_pmm_wpq_inserts For this multiple events case, it's not supported well. This patch introduces a new field 'metric_leader' in struct evsel. The first event is considered as a metric leader. For the rest of same events, they point to the first event via it's metric_leader field in struct evsel. This design is for adding the counting results of all same events to the first event in group (the metric_leader). With this patch, root@lkp-csl-2sp2:~# perf stat -M UNC_M_PMM_BANDWIDTH.TOTAL -a -- sleep 1 Performance counter stats for 'system wide': 1842108 unc_m_pmm_rpq_inserts # 337.2 MB/sec UNC_M_PMM_BANDWIDTH.TOTAL 3682209 unc_m_pmm_wpq_inserts 1.001819706 seconds time elapsed root@lkp-csl-2sp2:~# perf stat -M UNC_M_PMM_READ_LATENCY -a -- sleep 1 Performance counter stats for 'system wide': 861970685 unc_m_pmm_rpq_occupancy.all # 219.4 ns UNC_M_PMM_READ_LATENCY 1842772 unc_m_pmm_rpq_inserts 12790196356 unc_m_clockticks 1.001749103 seconds time elapsed Now we can see the correct metrics 'UNC_M_PMM_BANDWIDTH.TOTAL' and 'UNC_M_PMM_READ_LATENCY'. Signed-off-by: Jin Yao Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20190828055932.8269-5-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-shadow.c | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) (limited to 'tools/perf/util/stat-shadow.c') diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 696d263f6eb6..70c87fdb2a43 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -31,6 +31,8 @@ struct saved_value { int cpu; struct runtime_stat *stat; struct stats stats; + u64 metric_total; + int metric_other; }; static int saved_value_cmp(struct rb_node *rb_node, const void *entry) @@ -212,6 +214,7 @@ void perf_stat__update_shadow_stats(struct evsel *counter, u64 count, { int ctx = evsel_context(counter); u64 count_ns = count; + struct saved_value *v; count *= counter->scale; @@ -266,9 +269,15 @@ void perf_stat__update_shadow_stats(struct evsel *counter, u64 count, update_runtime_stat(st, STAT_APERF, ctx, cpu, count); if (counter->collect_stat) { - struct saved_value *v = saved_value_lookup(counter, cpu, true, - STAT_NONE, 0, st); + v = saved_value_lookup(counter, cpu, true, STAT_NONE, 0, st); update_stats(&v->stats, count); + if (counter->metric_leader) + v->metric_total += count; + } else if (counter->metric_leader) { + v = saved_value_lookup(counter->metric_leader, + cpu, true, STAT_NONE, 0, st); + v->metric_total += count; + v->metric_other++; } } @@ -729,10 +738,10 @@ static void generic_metric(struct perf_stat_config *config, char *n, *pn; expr__ctx_init(&pctx); - expr__add_id(&pctx, name, avg); for (i = 0; metric_events[i]; i++) { struct saved_value *v; struct stats *stats; + u64 metric_total = 0; if (!strcmp(metric_events[i]->name, "duration_time")) { stats = &walltime_nsecs_stats; @@ -744,6 +753,9 @@ static void generic_metric(struct perf_stat_config *config, break; stats = &v->stats; scale = 1.0; + + if (v->metric_other) + metric_total = v->metric_total; } n = strdup(metric_events[i]->name); @@ -757,8 +769,15 @@ static void generic_metric(struct perf_stat_config *config, pn = strchr(n, ' '); if (pn) *pn = 0; - expr__add_id(&pctx, n, avg_stats(stats)*scale); + + if (metric_total) + expr__add_id(&pctx, n, metric_total); + else + expr__add_id(&pctx, n, avg_stats(stats)*scale); } + + expr__add_id(&pctx, name, avg); + if (!metric_events[i]) { const char *p = metric_expr; -- cgit v1.2.3