diff options
author | Kan Liang <kan.liang@linux.intel.com> | 2019-06-05 01:50:42 +0300 |
---|---|---|
committer | Arnaldo Carvalho de Melo <acme@redhat.com> | 2019-06-10 22:19:59 +0300 |
commit | db5742b6849ed7f01d764e6755b8ab2b422f29b2 (patch) | |
tree | 7b302de744f0c1c6b6af182e99754c1a34832153 /tools/perf/builtin-stat.c | |
parent | acae8b36cded0ee62038dedd0a44d54d5d673a96 (diff) | |
download | linux-db5742b6849ed7f01d764e6755b8ab2b422f29b2.tar.xz |
perf stat: Support per-die aggregation
It is useful to aggregate counts per die. E.g. Uncore becomes die-scope
on Xeon Cascade Lake-AP.
Introduce a new option "--per-die" to support per-die aggregation.
The global id for each core has been changed to socket + die id + core
id. The global id for each die is socket + die id.
Add die information for per-core aggregation. The output of per-core
aggregation will be changed from "S0-C0" to "S0-D0-C0". Any scripts
which rely on the output format of per-core aggregation probably be
broken.
For 'perf stat record/report', there is no die information when
processing the old perf.data. The per-die result will be the same as
per-socket.
Committer notes:
Renamed 'die' variable to 'die_id' to fix the build in some systems:
CC /tmp/build/perf/builtin-script.o
cc1: warnings being treated as errors
builtin-stat.c: In function 'perf_env__get_die':
builtin-stat.c:963: error: declaration of 'die' shadows a global declaration
util/util.h:19: error: shadowed declaration is here
mv: cannot stat `/tmp/build/perf/.builtin-stat.o.tmp': No such file or directory
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Reviewed-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: https://lkml.kernel.org/n/tip-bsnhx7vgsuu6ei307mw60mbj@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools/perf/builtin-stat.c')
-rw-r--r-- | tools/perf/builtin-stat.c | 87 |
1 files changed, 81 insertions, 6 deletions
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 24b8e690fb69..272df8426f0a 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -777,6 +777,8 @@ static struct option stat_options[] = { "stop workload and print counts after a timeout period in ms (>= 10ms)"), OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode, "aggregate counts per processor socket", AGGR_SOCKET), + OPT_SET_UINT(0, "per-die", &stat_config.aggr_mode, + "aggregate counts per processor die", AGGR_DIE), OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode, "aggregate counts per physical processor core", AGGR_CORE), OPT_SET_UINT(0, "per-thread", &stat_config.aggr_mode, @@ -801,6 +803,12 @@ static int perf_stat__get_socket(struct perf_stat_config *config __maybe_unused, return cpu_map__get_socket(map, cpu, NULL); } +static int perf_stat__get_die(struct perf_stat_config *config __maybe_unused, + struct cpu_map *map, int cpu) +{ + return cpu_map__get_die(map, cpu, NULL); +} + static int perf_stat__get_core(struct perf_stat_config *config __maybe_unused, struct cpu_map *map, int cpu) { @@ -841,6 +849,12 @@ static int perf_stat__get_socket_cached(struct perf_stat_config *config, return perf_stat__get_aggr(config, perf_stat__get_socket, map, idx); } +static int perf_stat__get_die_cached(struct perf_stat_config *config, + struct cpu_map *map, int idx) +{ + return perf_stat__get_aggr(config, perf_stat__get_die, map, idx); +} + static int perf_stat__get_core_cached(struct perf_stat_config *config, struct cpu_map *map, int idx) { @@ -871,6 +885,13 @@ static int perf_stat_init_aggr_mode(void) } stat_config.aggr_get_id = perf_stat__get_socket_cached; break; + case AGGR_DIE: + if (cpu_map__build_die_map(evsel_list->cpus, &stat_config.aggr_map)) { + perror("cannot build die map"); + return -1; + } + stat_config.aggr_get_id = perf_stat__get_die_cached; + break; case AGGR_CORE: if (cpu_map__build_core_map(evsel_list->cpus, &stat_config.aggr_map)) { perror("cannot build core map"); @@ -936,21 +957,55 @@ static int perf_env__get_socket(struct cpu_map *map, int idx, void *data) return cpu == -1 ? -1 : env->cpu[cpu].socket_id; } +static int perf_env__get_die(struct cpu_map *map, int idx, void *data) +{ + struct perf_env *env = data; + int die_id = -1, cpu = perf_env__get_cpu(env, map, idx); + + if (cpu != -1) { + /* + * Encode socket in bit range 15:8 + * die_id is relative to socket, + * we need a global id. So we combine + * socket + die id + */ + if (WARN_ONCE(env->cpu[cpu].socket_id >> 8, "The socket id number is too big.\n")) + return -1; + + if (WARN_ONCE(env->cpu[cpu].die_id >> 8, "The die id number is too big.\n")) + return -1; + + die_id = (env->cpu[cpu].socket_id << 8) | (env->cpu[cpu].die_id & 0xff); + } + + return die_id; +} + static int perf_env__get_core(struct cpu_map *map, int idx, void *data) { struct perf_env *env = data; int core = -1, cpu = perf_env__get_cpu(env, map, idx); if (cpu != -1) { - int socket_id = env->cpu[cpu].socket_id; - /* - * Encode socket in upper 16 bits - * core_id is relative to socket, and + * Encode socket in bit range 31:24 + * encode die id in bit range 23:16 + * core_id is relative to socket and die, * we need a global id. So we combine - * socket + core id. + * socket + die id + core id */ - core = (socket_id << 16) | (env->cpu[cpu].core_id & 0xffff); + if (WARN_ONCE(env->cpu[cpu].socket_id >> 8, "The socket id number is too big.\n")) + return -1; + + if (WARN_ONCE(env->cpu[cpu].die_id >> 8, "The die id number is too big.\n")) + return -1; + + if (WARN_ONCE(env->cpu[cpu].core_id >> 16, "The core id number is too big.\n")) + return -1; + + core = (env->cpu[cpu].socket_id << 24) | + (env->cpu[cpu].die_id << 16) | + (env->cpu[cpu].core_id & 0xffff); } return core; @@ -962,6 +1017,12 @@ static int perf_env__build_socket_map(struct perf_env *env, struct cpu_map *cpus return cpu_map__build_map(cpus, sockp, perf_env__get_socket, env); } +static int perf_env__build_die_map(struct perf_env *env, struct cpu_map *cpus, + struct cpu_map **diep) +{ + return cpu_map__build_map(cpus, diep, perf_env__get_die, env); +} + static int perf_env__build_core_map(struct perf_env *env, struct cpu_map *cpus, struct cpu_map **corep) { @@ -973,6 +1034,11 @@ static int perf_stat__get_socket_file(struct perf_stat_config *config __maybe_un { return perf_env__get_socket(map, idx, &perf_stat.session->header.env); } +static int perf_stat__get_die_file(struct perf_stat_config *config __maybe_unused, + struct cpu_map *map, int idx) +{ + return perf_env__get_die(map, idx, &perf_stat.session->header.env); +} static int perf_stat__get_core_file(struct perf_stat_config *config __maybe_unused, struct cpu_map *map, int idx) @@ -992,6 +1058,13 @@ static int perf_stat_init_aggr_mode_file(struct perf_stat *st) } stat_config.aggr_get_id = perf_stat__get_socket_file; break; + case AGGR_DIE: + if (perf_env__build_die_map(env, evsel_list->cpus, &stat_config.aggr_map)) { + perror("cannot build die map"); + return -1; + } + stat_config.aggr_get_id = perf_stat__get_die_file; + break; case AGGR_CORE: if (perf_env__build_core_map(env, evsel_list->cpus, &stat_config.aggr_map)) { perror("cannot build core map"); @@ -1542,6 +1615,8 @@ static int __cmd_report(int argc, const char **argv) OPT_STRING('i', "input", &input_name, "file", "input file name"), OPT_SET_UINT(0, "per-socket", &perf_stat.aggr_mode, "aggregate counts per processor socket", AGGR_SOCKET), + OPT_SET_UINT(0, "per-die", &perf_stat.aggr_mode, + "aggregate counts per processor die", AGGR_DIE), OPT_SET_UINT(0, "per-core", &perf_stat.aggr_mode, "aggregate counts per physical processor core", AGGR_CORE), OPT_SET_UINT('A', "no-aggr", &perf_stat.aggr_mode, |