diff options
author | K Prateek Nayak <kprateek.nayak@amd.com> | 2023-05-17 20:27:42 +0300 |
---|---|---|
committer | Arnaldo Carvalho de Melo <acme@redhat.com> | 2023-05-23 22:08:08 +0300 |
commit | 995ed074b829f293586028560f2f27f47889df64 (patch) | |
tree | b615e840af969d6a8ff5166a2a36915a0d4dbd1b /tools/perf/util | |
parent | 2b72cec9eef19d73c2a4a3e603004fdf2d93d9e6 (diff) | |
download | linux-995ed074b829f293586028560f2f27f47889df64.tar.xz |
perf stat: Setup the foundation to allow aggregation based on cache topology
Processors based on chiplet architecture, such as AMD EPYC and Hygon do
not expose the chiplet details in the sysfs CPU topology information.
However, this information can be derived from the per CPU cache level
information from the sysfs.
'perf stat' has already supported aggregation based on topology
information using core ID, socket ID, etc. It'll be useful to aggregate
based on the cache topology to detect problems like imbalance and
cache-to-cache sharing at various cache levels.
This patch lays the foundation for aggregating data in 'perf stat' based
on the processor's cache topology. The cmdline option to aggregate data
based on the cache topology is added in Patch 4 of the series while this
patch sets up all the necessary functions and variables required to
support the new aggregation option.
The patch also adds support to display per-cache aggregation, or save it
as a JSON or CSV, as splitting it into a separate patch would break
builds when compiling with "-Werror=switch-enum" where the compiler will
complain about the lack of handling for the AGGR_CACHE case in the
output functions.
Committer notes:
Don't use perf_stat_config in tools/perf/util/cpumap.c, this would make
code that is in util/, thus not really specific to a single builtin, use
a specific builtin config structure.
Move the functions introduced in this patch from
tools/perf/util/cpumap.c since it needs access to builtin specific
and is not strictly needed to live in the util/ directory.
With this 'perf test python' is back building.
Suggested-by: Gautham Shenoy <gautham.shenoy@amd.com>
Signed-off-by: K Prateek Nayak <kprateek.nayak@amd.com>
Acked-by: Ian Rogers <irogers@google.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ananth Narayan <ananth.narayan@amd.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ravi Bangoria <ravi.bangoria@amd.com>
Cc: Sandipan Das <sandipan.das@amd.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Wen Pu <puwen@hygon.cn>
Link: https://lore.kernel.org/r/20230517172745.5833-3-kprateek.nayak@amd.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools/perf/util')
-rw-r--r-- | tools/perf/util/cpumap.c | 10 | ||||
-rw-r--r-- | tools/perf/util/cpumap.h | 7 | ||||
-rw-r--r-- | tools/perf/util/stat-display.c | 17 | ||||
-rw-r--r-- | tools/perf/util/stat.h | 2 |
4 files changed, 36 insertions, 0 deletions
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 75d9c73e0184..a0719816a218 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -222,6 +222,10 @@ static int aggr_cpu_id__cmp(const void *a_pointer, const void *b_pointer) return a->socket - b->socket; else if (a->die != b->die) return a->die - b->die; + else if (a->cache_lvl != b->cache_lvl) + return a->cache_lvl - b->cache_lvl; + else if (a->cache != b->cache) + return a->cache - b->cache; else if (a->core != b->core) return a->core - b->core; else @@ -679,6 +683,8 @@ bool aggr_cpu_id__equal(const struct aggr_cpu_id *a, const struct aggr_cpu_id *b a->node == b->node && a->socket == b->socket && a->die == b->die && + a->cache_lvl == b->cache_lvl && + a->cache == b->cache && a->core == b->core && a->cpu.cpu == b->cpu.cpu; } @@ -689,6 +695,8 @@ bool aggr_cpu_id__is_empty(const struct aggr_cpu_id *a) a->node == -1 && a->socket == -1 && a->die == -1 && + a->cache_lvl == -1 && + a->cache == -1 && a->core == -1 && a->cpu.cpu == -1; } @@ -700,6 +708,8 @@ struct aggr_cpu_id aggr_cpu_id__empty(void) .node = -1, .socket = -1, .die = -1, + .cache_lvl = -1, + .cache = -1, .core = -1, .cpu = (struct perf_cpu){ .cpu = -1 }, }; diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index e3426541e0aa..f394ccc0ccfb 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -20,6 +20,13 @@ struct aggr_cpu_id { int socket; /** The die id as read from /sys/devices/system/cpu/cpuX/topology/die_id. */ int die; + /** The cache level as read from /sys/devices/system/cpu/cpuX/cache/indexY/level */ + int cache_lvl; + /** + * The cache instance ID, which is the first CPU in the + * /sys/devices/system/cpu/cpuX/cache/indexY/shared_cpu_list + */ + int cache; /** The core id as read from /sys/devices/system/cpu/cpuX/topology/core_id. */ int core; /** CPU aggregation, note there is one CPU for each SMT thread. */ diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index bf5a6c14dfcd..319f456f0673 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -36,6 +36,7 @@ static int aggr_header_lens[] = { [AGGR_CORE] = 18, + [AGGR_CACHE] = 22, [AGGR_DIE] = 12, [AGGR_SOCKET] = 6, [AGGR_NODE] = 6, @@ -46,6 +47,7 @@ static int aggr_header_lens[] = { static const char *aggr_header_csv[] = { [AGGR_CORE] = "core,cpus,", + [AGGR_CACHE] = "cache,cpus,", [AGGR_DIE] = "die,cpus,", [AGGR_SOCKET] = "socket,cpus,", [AGGR_NONE] = "cpu,", @@ -56,6 +58,7 @@ static const char *aggr_header_csv[] = { static const char *aggr_header_std[] = { [AGGR_CORE] = "core", + [AGGR_CACHE] = "cache", [AGGR_DIE] = "die", [AGGR_SOCKET] = "socket", [AGGR_NONE] = "cpu", @@ -193,6 +196,10 @@ static void print_aggr_id_std(struct perf_stat_config *config, case AGGR_CORE: snprintf(buf, sizeof(buf), "S%d-D%d-C%d", id.socket, id.die, id.core); break; + case AGGR_CACHE: + snprintf(buf, sizeof(buf), "S%d-D%d-L%d-ID%d", + id.socket, id.die, id.cache_lvl, id.cache); + break; case AGGR_DIE: snprintf(buf, sizeof(buf), "S%d-D%d", id.socket, id.die); break; @@ -239,6 +246,10 @@ static void print_aggr_id_csv(struct perf_stat_config *config, fprintf(output, "S%d-D%d-C%d%s%d%s", id.socket, id.die, id.core, sep, aggr_nr, sep); break; + case AGGR_CACHE: + fprintf(config->output, "S%d-D%d-L%d-ID%d%s%d%s", + id.socket, id.die, id.cache_lvl, id.cache, sep, aggr_nr, sep); + break; case AGGR_DIE: fprintf(output, "S%d-D%d%s%d%s", id.socket, id.die, sep, aggr_nr, sep); @@ -284,6 +295,10 @@ static void print_aggr_id_json(struct perf_stat_config *config, fprintf(output, "\"core\" : \"S%d-D%d-C%d\", \"aggregate-number\" : %d, ", id.socket, id.die, id.core, aggr_nr); break; + case AGGR_CACHE: + fprintf(output, "\"cache\" : \"S%d-D%d-L%d-ID%d\", \"aggregate-number\" : %d, ", + id.socket, id.die, id.cache_lvl, id.cache, aggr_nr); + break; case AGGR_DIE: fprintf(output, "\"die\" : \"S%d-D%d\", \"aggregate-number\" : %d, ", id.socket, id.die, aggr_nr); @@ -1125,6 +1140,7 @@ static void print_header_interval_std(struct perf_stat_config *config, case AGGR_NODE: case AGGR_SOCKET: case AGGR_DIE: + case AGGR_CACHE: case AGGR_CORE: fprintf(output, "#%*s %-*s cpus", INTERVAL_LEN - 1, "time", @@ -1425,6 +1441,7 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf switch (config->aggr_mode) { case AGGR_CORE: + case AGGR_CACHE: case AGGR_DIE: case AGGR_SOCKET: case AGGR_NODE: diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index e35e188237c8..7abff7cbb5a1 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -48,6 +48,7 @@ enum aggr_mode { AGGR_GLOBAL, AGGR_SOCKET, AGGR_DIE, + AGGR_CACHE, AGGR_CORE, AGGR_THREAD, AGGR_UNSET, @@ -64,6 +65,7 @@ typedef struct aggr_cpu_id (*aggr_get_id_t)(struct perf_stat_config *config, str struct perf_stat_config { enum aggr_mode aggr_mode; + u32 aggr_level; bool scale; bool no_inherit; bool identifier; |