diff options
Diffstat (limited to 'tools/power')
-rw-r--r-- | tools/power/acpi/os_specific/service_layers/oslinuxtbl.c | 2 | ||||
-rw-r--r-- | tools/power/acpi/tools/acpidump/apfiles.c | 2 | ||||
-rw-r--r-- | tools/power/cpupower/Makefile | 19 | ||||
-rw-r--r-- | tools/power/cpupower/lib/cpupower.c | 48 | ||||
-rw-r--r-- | tools/power/cpupower/lib/cpupower.h | 3 | ||||
-rw-r--r-- | tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c | 44 | ||||
-rw-r--r-- | tools/power/pm-graph/config/custom-timeline-functions.cfg | 4 | ||||
-rwxr-xr-x | tools/power/pm-graph/sleepgraph.py | 6 | ||||
-rw-r--r-- | tools/power/x86/intel-speed-select/Makefile | 2 | ||||
-rw-r--r-- | tools/power/x86/intel-speed-select/isst-config.c | 22 | ||||
-rw-r--r-- | tools/power/x86/intel-speed-select/isst-display.c | 11 | ||||
-rw-r--r-- | tools/power/x86/turbostat/turbostat.8 | 14 | ||||
-rw-r--r-- | tools/power/x86/turbostat/turbostat.c | 172 |
13 files changed, 265 insertions, 84 deletions
diff --git a/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c b/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c index 9d70d8c945af..987a5d32f3b6 100644 --- a/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c +++ b/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c @@ -19,7 +19,7 @@ ACPI_MODULE_NAME("oslinuxtbl") typedef struct osl_table_info { struct osl_table_info *next; u32 instance; - char signature[ACPI_NAMESEG_SIZE]; + char signature[ACPI_NAMESEG_SIZE] ACPI_NONSTRING; } osl_table_info; diff --git a/tools/power/acpi/tools/acpidump/apfiles.c b/tools/power/acpi/tools/acpidump/apfiles.c index 13817f9112c0..9fc927fcc22a 100644 --- a/tools/power/acpi/tools/acpidump/apfiles.c +++ b/tools/power/acpi/tools/acpidump/apfiles.c @@ -103,7 +103,7 @@ int ap_open_output_file(char *pathname) int ap_write_to_binary_file(struct acpi_table_header *table, u32 instance) { - char filename[ACPI_NAMESEG_SIZE + 16]; + char filename[ACPI_NAMESEG_SIZE + 16] ACPI_NONSTRING; char instance_str[16]; ACPI_FILE file; acpi_size actual; diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile index 51a95239fe06..835123add0ed 100644 --- a/tools/power/cpupower/Makefile +++ b/tools/power/cpupower/Makefile @@ -52,8 +52,11 @@ DESTDIR ?= # and _should_ modify the PACKAGE_BUGREPORT definition VERSION:= $(shell ./utils/version-gen.sh) -LIB_MAJ= 0.0.1 -LIB_MIN= 1 +LIB_FIX= 1 +LIB_MIN= 0 +LIB_MAJ= 1 +LIB_VER= $(LIB_MAJ).$(LIB_MIN).$(LIB_FIX) + PACKAGE = cpupower PACKAGE_BUGREPORT = linux-pm@vger.kernel.org @@ -200,14 +203,14 @@ $(OUTPUT)lib/%.o: $(LIB_SRC) $(LIB_HEADERS) $(ECHO) " CC " $@ $(QUIET) $(CC) $(CFLAGS) -fPIC -o $@ -c lib/$*.c -$(OUTPUT)libcpupower.so.$(LIB_MAJ): $(LIB_OBJS) +$(OUTPUT)libcpupower.so.$(LIB_VER): $(LIB_OBJS) $(ECHO) " LD " $@ $(QUIET) $(CC) -shared $(CFLAGS) $(LDFLAGS) -o $@ \ - -Wl,-soname,libcpupower.so.$(LIB_MIN) $(LIB_OBJS) + -Wl,-soname,libcpupower.so.$(LIB_MAJ) $(LIB_OBJS) @ln -sf $(@F) $(OUTPUT)libcpupower.so - @ln -sf $(@F) $(OUTPUT)libcpupower.so.$(LIB_MIN) + @ln -sf $(@F) $(OUTPUT)libcpupower.so.$(LIB_MAJ) -libcpupower: $(OUTPUT)libcpupower.so.$(LIB_MAJ) +libcpupower: $(OUTPUT)libcpupower.so.$(LIB_VER) # Let all .o files depend on its .c file and all headers # Might be worth to put this into utils/Makefile at some point of time @@ -217,7 +220,7 @@ $(OUTPUT)%.o: %.c $(ECHO) " CC " $@ $(QUIET) $(CC) $(CFLAGS) -I./lib -I ./utils -o $@ -c $*.c -$(OUTPUT)cpupower: $(UTIL_OBJS) $(OUTPUT)libcpupower.so.$(LIB_MAJ) +$(OUTPUT)cpupower: $(UTIL_OBJS) $(OUTPUT)libcpupower.so.$(LIB_VER) $(ECHO) " CC " $@ ifeq ($(strip $(STATIC)),true) $(QUIET) $(CC) $(CFLAGS) $(LDFLAGS) $(UTIL_OBJS) -lrt -lpci -L$(OUTPUT) -o $@ @@ -262,7 +265,7 @@ update-po: $(OUTPUT)po/$(PACKAGE).pot done; endif -compile-bench: $(OUTPUT)libcpupower.so.$(LIB_MAJ) +compile-bench: $(OUTPUT)libcpupower.so.$(LIB_VER) @V=$(V) confdir=$(confdir) $(MAKE) -C bench O=$(OUTPUT) # we compile into subdirectories. if the target directory is not the diff --git a/tools/power/cpupower/lib/cpupower.c b/tools/power/cpupower/lib/cpupower.c index 7a2ef691b20e..ce8dfb8e46ab 100644 --- a/tools/power/cpupower/lib/cpupower.c +++ b/tools/power/cpupower/lib/cpupower.c @@ -10,6 +10,7 @@ #include <stdio.h> #include <errno.h> #include <stdlib.h> +#include <string.h> #include "cpupower.h" #include "cpupower_intern.h" @@ -150,15 +151,25 @@ static int __compare(const void *t1, const void *t2) return 0; } +static int __compare_core_cpu_list(const void *t1, const void *t2) +{ + struct cpuid_core_info *top1 = (struct cpuid_core_info *)t1; + struct cpuid_core_info *top2 = (struct cpuid_core_info *)t2; + + return strcmp(top1->core_cpu_list, top2->core_cpu_list); +} + /* * Returns amount of cpus, negative on error, cpu_top must be * passed to cpu_topology_release to free resources * - * Array is sorted after ->pkg, ->core, then ->cpu + * Array is sorted after ->cpu_smt_list ->pkg, ->core */ int get_cpu_topology(struct cpupower_topology *cpu_top) { int cpu, last_pkg, cpus = sysconf(_SC_NPROCESSORS_CONF); + char path[SYSFS_PATH_MAX]; + char *last_cpu_list; cpu_top->core_info = malloc(sizeof(struct cpuid_core_info) * cpus); if (cpu_top->core_info == NULL) @@ -183,6 +194,34 @@ int get_cpu_topology(struct cpupower_topology *cpu_top) cpu_top->core_info[cpu].core = -1; continue; } + if (cpu_top->core_info[cpu].core == -1) { + strncpy(cpu_top->core_info[cpu].core_cpu_list, "-1", CPULIST_BUFFER); + continue; + } + snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/topology/%s", + cpu, "core_cpus_list"); + if (cpupower_read_sysfs( + path, + cpu_top->core_info[cpu].core_cpu_list, + CPULIST_BUFFER) < 1) { + printf("Warning CPU%u has a 0 size core_cpus_list string", cpu); + } + } + + /* Count the number of distinct cpu lists to get the physical core + * count. + */ + qsort(cpu_top->core_info, cpus, sizeof(struct cpuid_core_info), + __compare_core_cpu_list); + + last_cpu_list = cpu_top->core_info[0].core_cpu_list; + cpu_top->cores = 1; + for (cpu = 1; cpu < cpus; cpu++) { + if (strcmp(cpu_top->core_info[cpu].core_cpu_list, last_cpu_list) != 0 && + cpu_top->core_info[cpu].pkg != -1) { + last_cpu_list = cpu_top->core_info[cpu].core_cpu_list; + cpu_top->cores++; + } } qsort(cpu_top->core_info, cpus, sizeof(struct cpuid_core_info), @@ -203,13 +242,6 @@ int get_cpu_topology(struct cpupower_topology *cpu_top) if (!(cpu_top->core_info[0].pkg == -1)) cpu_top->pkgs++; - /* Intel's cores count is not consecutively numbered, there may - * be a core_id of 3, but none of 2. Assume there always is 0 - * Get amount of cores by counting duplicates in a package - for (cpu = 0; cpu_top->core_info[cpu].pkg = 0 && cpu < cpus; cpu++) { - if (cpu_top->core_info[cpu].core == 0) - cpu_top->cores++; - */ return cpus; } diff --git a/tools/power/cpupower/lib/cpupower.h b/tools/power/cpupower/lib/cpupower.h index e4e4292eacec..2e67a080f203 100644 --- a/tools/power/cpupower/lib/cpupower.h +++ b/tools/power/cpupower/lib/cpupower.h @@ -2,6 +2,8 @@ #ifndef __CPUPOWER_CPUPOWER_H__ #define __CPUPOWER_CPUPOWER_H__ +#define CPULIST_BUFFER 5 + struct cpupower_topology { /* Amount of CPU cores, packages and threads per core in the system */ unsigned int cores; @@ -16,6 +18,7 @@ struct cpuid_core_info { int pkg; int core; int cpu; + char core_cpu_list[CPULIST_BUFFER]; /* flags */ unsigned int is_online:1; diff --git a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c index f746099b5dac..e8b3841d5c0f 100644 --- a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c +++ b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c @@ -6,6 +6,7 @@ */ +#include <errno.h> #include <stdio.h> #include <unistd.h> #include <stdlib.h> @@ -91,7 +92,11 @@ int fill_string_with_spaces(char *s, int n) return 0; } -#define MAX_COL_WIDTH 6 +#define MAX_COL_WIDTH 6 +#define TOPOLOGY_DEPTH_PKG 3 +#define TOPOLOGY_DEPTH_CORE 2 +#define TOPOLOGY_DEPTH_CPU 1 + void print_header(int topology_depth) { int unsigned mon; @@ -113,12 +118,17 @@ void print_header(int topology_depth) } printf("\n"); - if (topology_depth > 2) + switch (topology_depth) { + case TOPOLOGY_DEPTH_PKG: printf(" PKG|"); - if (topology_depth > 1) + case TOPOLOGY_DEPTH_CORE: printf("CORE|"); - if (topology_depth > 0) + case TOPOLOGY_DEPTH_CPU: printf(" CPU|"); + break; + default: + return; + } for (mon = 0; mon < avail_monitors; mon++) { if (mon != 0) @@ -152,12 +162,17 @@ void print_results(int topology_depth, int cpu) cpu_top.core_info[cpu].pkg == -1) return; - if (topology_depth > 2) + switch (topology_depth) { + case TOPOLOGY_DEPTH_PKG: printf("%4d|", cpu_top.core_info[cpu].pkg); - if (topology_depth > 1) + case TOPOLOGY_DEPTH_CORE: printf("%4d|", cpu_top.core_info[cpu].core); - if (topology_depth > 0) + case TOPOLOGY_DEPTH_CPU: printf("%4d|", cpu_top.core_info[cpu].cpu); + break; + default: + return; + } for (mon = 0; mon < avail_monitors; mon++) { if (mon != 0) @@ -294,7 +309,10 @@ int fork_it(char **argv) if (!child_pid) { /* child */ - execvp(argv[0], argv); + if (execvp(argv[0], argv) == -1) { + printf("Invalid monitor command %s\n", argv[0]); + exit(errno); + } } else { /* parent */ if (child_pid == -1) { @@ -423,11 +441,13 @@ int cmd_monitor(int argc, char **argv) if (avail_monitors == 0) { printf(_("No HW Cstate monitors found\n")); + cpu_topology_release(cpu_top); return 1; } if (mode == list) { list_monitors(); + cpu_topology_release(cpu_top); exit(EXIT_SUCCESS); } @@ -448,15 +468,15 @@ int cmd_monitor(int argc, char **argv) /* ToDo: Topology parsing needs fixing first to do this more generically */ if (cpu_top.pkgs > 1) - print_header(3); + print_header(TOPOLOGY_DEPTH_PKG); else - print_header(1); + print_header(TOPOLOGY_DEPTH_CPU); for (cpu = 0; cpu < cpu_count; cpu++) { if (cpu_top.pkgs > 1) - print_results(3, cpu); + print_results(TOPOLOGY_DEPTH_PKG, cpu); else - print_results(1, cpu); + print_results(TOPOLOGY_DEPTH_CPU, cpu); } for (num = 0; num < avail_monitors; num++) { diff --git a/tools/power/pm-graph/config/custom-timeline-functions.cfg b/tools/power/pm-graph/config/custom-timeline-functions.cfg index 4f80ad7d7275..0321b59518f3 100644 --- a/tools/power/pm-graph/config/custom-timeline-functions.cfg +++ b/tools/power/pm-graph/config/custom-timeline-functions.cfg @@ -122,13 +122,13 @@ freeze_processes: freeze_kernel_threads: pm_restrict_gfp_mask: acpi_suspend_begin: -suspend_console: +console_suspend_all: acpi_pm_prepare: syscore_suspend: arch_enable_nonboot_cpus_end: syscore_resume: acpi_pm_finish: -resume_console: +console_resume_all: acpi_pm_end: pm_restore_gfp_mask: thaw_processes: diff --git a/tools/power/pm-graph/sleepgraph.py b/tools/power/pm-graph/sleepgraph.py index 918eae58b0b4..e2261f33a082 100755 --- a/tools/power/pm-graph/sleepgraph.py +++ b/tools/power/pm-graph/sleepgraph.py @@ -210,13 +210,13 @@ class SystemValues: 'hibernate_preallocate_memory': {}, 'create_basic_memory_bitmaps': {}, 'swsusp_write': {}, - 'suspend_console': {}, + 'console_suspend_all': {}, 'acpi_pm_prepare': {}, 'syscore_suspend': {}, 'arch_enable_nonboot_cpus_end': {}, 'syscore_resume': {}, 'acpi_pm_finish': {}, - 'resume_console': {}, + 'console_resume_all': {}, 'acpi_pm_end': {}, 'pm_restore_gfp_mask': {}, 'thaw_processes': {}, @@ -3459,7 +3459,7 @@ def parseTraceLog(live=False): tracewatch = ['irq_wakeup'] if sysvals.usekprobes: tracewatch += ['sync_filesystems', 'freeze_processes', 'syscore_suspend', - 'syscore_resume', 'resume_console', 'thaw_processes', 'CPU_ON', + 'syscore_resume', 'console_resume_all', 'thaw_processes', 'CPU_ON', 'CPU_OFF', 'acpi_suspend'] # extract the callgraph and traceevent data diff --git a/tools/power/x86/intel-speed-select/Makefile b/tools/power/x86/intel-speed-select/Makefile index 7221f2f55e8b..8d3a02a20f3d 100644 --- a/tools/power/x86/intel-speed-select/Makefile +++ b/tools/power/x86/intel-speed-select/Makefile @@ -13,7 +13,7 @@ endif # Do not use make's built-in rules # (this improves performance and avoids hard-to-debug behaviour); MAKEFLAGS += -r -override CFLAGS += -O2 -Wall -g -D_GNU_SOURCE -I$(OUTPUT)include -I/usr/include/libnl3 +override CFLAGS += -O2 -Wall -g -D_GNU_SOURCE -I$(OUTPUT)include -I$(shell $(CC) -print-sysroot)/usr/include/libnl3 override LDFLAGS += -lnl-genl-3 -lnl-3 ALL_TARGETS := intel-speed-select diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c index fadfb02b8611..eaa420ac848d 100644 --- a/tools/power/x86/intel-speed-select/isst-config.c +++ b/tools/power/x86/intel-speed-select/isst-config.c @@ -16,7 +16,7 @@ struct process_cmd_struct { int arg; }; -static const char *version_str = "v1.21"; +static const char *version_str = "v1.22"; static const int supported_api_ver = 3; static struct isst_if_platform_info isst_platform_info; @@ -46,8 +46,9 @@ static int force_online_offline; static int auto_mode; static int fact_enable_fail; static int cgroupv2; +static int max_pkg_id; static int max_die_id; -static int max_punit_id; +static int max_die_id_package_0; /* clos related */ static int current_clos = -1; @@ -557,6 +558,8 @@ void for_each_online_power_domain_in_set(void (*callback)(struct isst_id *, void if (id.pkg < 0 || id.die < 0 || id.punit < 0) continue; + id.die = id.die % (max_die_id_package_0 + 1); + valid_mask[id.pkg][id.die] = 1; if (cpus[id.pkg][id.die][id.punit] == -1) @@ -564,11 +567,11 @@ void for_each_online_power_domain_in_set(void (*callback)(struct isst_id *, void } for (i = 0; i < MAX_PACKAGE_COUNT; i++) { - if (max_die_id == max_punit_id) { + if (max_die_id > max_pkg_id) { for (k = 0; k < MAX_PUNIT_PER_DIE && k < MAX_DIE_PER_PACKAGE; k++) { id.cpu = cpus[i][k][k]; id.pkg = i; - id.die = k; + id.die = get_physical_die_id(id.cpu); id.punit = k; if (isst_is_punit_valid(&id)) callback(&id, arg1, arg2, arg3, arg4); @@ -586,7 +589,10 @@ void for_each_online_power_domain_in_set(void (*callback)(struct isst_id *, void for (k = 0; k < MAX_PUNIT_PER_DIE; k++) { id.cpu = cpus[i][j][k]; id.pkg = i; - id.die = j; + if (id.cpu >= 0) + id.die = get_physical_die_id(id.cpu); + else + id.die = id.pkg; id.punit = k; if (isst_is_punit_valid(&id)) callback(&id, arg1, arg2, arg3, arg4); @@ -788,6 +794,8 @@ static void create_cpu_map(void) cpu_map[i].die_id = die_id; cpu_map[i].core_id = core_id; + if (max_pkg_id < pkg_id) + max_pkg_id = pkg_id; punit_id = 0; @@ -812,8 +820,8 @@ static void create_cpu_map(void) if (max_die_id < die_id) max_die_id = die_id; - if (max_punit_id < cpu_map[i].punit_id) - max_punit_id = cpu_map[i].punit_id; + if (!pkg_id && max_die_id_package_0 < die_id) + max_die_id_package_0 = die_id; debug_printf( "map logical_cpu:%d core: %d die:%d pkg:%d punit:%d punit_cpu:%d punit_core:%d\n", diff --git a/tools/power/x86/intel-speed-select/isst-display.c b/tools/power/x86/intel-speed-select/isst-display.c index 07ebd08f3202..da5a59a4c545 100644 --- a/tools/power/x86/intel-speed-select/isst-display.c +++ b/tools/power/x86/intel-speed-select/isst-display.c @@ -173,7 +173,11 @@ static int print_package_info(struct isst_id *id, FILE *outf) if (out_format_is_json()) { if (api_version() > 1) { - if (id->cpu < 0) + if (id->die < 0 && id->cpu < 0) + snprintf(header, sizeof(header), + "package-%d:die-IO:powerdomain-%d:cpu-None", + id->pkg, id->punit); + else if (id->cpu < 0) snprintf(header, sizeof(header), "package-%d:die-%d:powerdomain-%d:cpu-None", id->pkg, id->die, id->punit); @@ -190,7 +194,10 @@ static int print_package_info(struct isst_id *id, FILE *outf) } snprintf(header, sizeof(header), "package-%d", id->pkg); format_and_print(outf, level++, header, NULL); - snprintf(header, sizeof(header), "die-%d", id->die); + if (id->die < 0) + snprintf(header, sizeof(header), "die-IO"); + else + snprintf(header, sizeof(header), "die-%d", id->die); format_and_print(outf, level++, header, NULL); if (api_version() > 1) { snprintf(header, sizeof(header), "powerdomain-%d", id->punit); diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index e4f9f93c123a..b74ed916057e 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 @@ -100,7 +100,7 @@ The column name "all" can be used to enable all disabled-by-default built-in cou .PP \fB--show column\fP show only the specified built-in columns. May be invoked multiple times, or with a comma-separated list of column names. .PP -\fB--show CATEGORY --hide CATEGORY\fP Show and hide also accept a single CATEGORY of columns: "all", "topology", "idle", "frequency", "power", "sysfs", "other". +\fB--show CATEGORY --hide CATEGORY\fP Show and hide also accept a single CATEGORY of columns: "all", "topology", "idle", "frequency", "power", "cpuidle", "hwidle", "swidle", "other". "idle" (enabled by default), includes "hwidle" and "idle_pct". "cpuidle" (default disabled) includes cpuidle software invocation counters. "swidle" includes "cpuidle" plus "idle_pct". "hwidle" includes only hardware based idle residency counters. Older versions of turbostat used the term "sysfs" for what is now "swidle". .PP \fB--Dump\fP displays the raw counter values. .PP @@ -158,11 +158,15 @@ The system configuration dump (if --quiet is not used) is followed by statistics .PP \fBSMI\fP The number of System Management Interrupts serviced CPU during the measurement interval. While this counter is actually per-CPU, SMI are triggered on all processors, so the number should be the same for all CPUs. .PP -\fBC1, C2, C3...\fP The number times Linux requested the C1, C2, C3 idle state during the measurement interval. The system summary line shows the sum for all CPUs. These are C-state names as exported in /sys/devices/system/cpu/cpu*/cpuidle/state*/name. While their names are generic, their attributes are processor specific. They the system description section of output shows what MWAIT sub-states they are mapped to on each system. +\fBC1, C2, C3...\fP The number times Linux requested the C1, C2, C3 idle state during the measurement interval. The system summary line shows the sum for all CPUs. These are C-state names as exported in /sys/devices/system/cpu/cpu*/cpuidle/state*/name. While their names are generic, their attributes are processor specific. They the system description section of output shows what MWAIT sub-states they are mapped to on each system. These counters are in the "cpuidle" group, which is disabled, by default. .PP -\fBC1%, C2%, C3%\fP The residency percentage that Linux requested C1, C2, C3.... The system summary is the average of all CPUs in the system. Note that these are software, reflecting what was requested. The hardware counters reflect what was actually achieved. +\fBC1+, C2+, C3+...\fP The idle governor idle state misprediction statistics. Inidcates the number times Linux requested the C1, C2, C3 idle state during the measurement interval, but should have requested a deeper idle state (if it exists and enabled). These statistics come from the /sys/devices/system/cpu/cpu*/cpuidle/state*/below file. These counters are in the "cpuidle" group, which is disabled, by default. .PP -\fBCPU%c1, CPU%c3, CPU%c6, CPU%c7\fP show the percentage residency in hardware core idle states. These numbers are from hardware residency counters. +\fBC1-, C2-, C3-...\fP The idle governor idle state misprediction statistics. Inidcates the number times Linux requested the C1, C2, C3 idle state during the measurement interval, but should have requested a shallower idle state (if it exists and enabled). These statistics come from the /sys/devices/system/cpu/cpu*/cpuidle/state*/above file. These counters are in the "cpuidle" group, which is disabled, by default. +.PP +\fBC1%, C2%, C3%\fP The residency percentage that Linux requested C1, C2, C3.... The system summary is the average of all CPUs in the system. Note that these are software, reflecting what was requested. The hardware counters reflect what was actually achieved. These counters are in the "pct_idle" group, which is enabled by default. +.PP +\fBCPU%c1, CPU%c3, CPU%c6, CPU%c7\fP show the percentage residency in hardware core idle states. These numbers are from hardware residency counters and are in the "hwidle" group, which is enabled, by default. .PP \fBCoreTmp\fP Degrees Celsius reported by the per-core Digital Thermal Sensor. .PP @@ -201,6 +205,8 @@ The system configuration dump (if --quiet is not used) is followed by statistics \fBUncMHz\fP per-package uncore MHz, instantaneous sample. .PP \fBUMHz1.0\fP per-package uncore MHz for domain=1 and fabric_cluster=0, instantaneous sample. System summary is the average of all packages. +Intel Granite Rapids systems use domains 0-2 for CPUs, and 3-4 for IO, with cluster always 0. +For the "--show" and "--hide" options, use "UncMHz" to operate on all UMHz*.* as a group. .SH TOO MUCH INFORMATION EXAMPLE By default, turbostat dumps all possible information -- a system configuration header, followed by columns for all counters. This is ideal for remote debugging, use the "--out" option to save everything to a text file, and get that file to the expert helping you debug. diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 4155d9bfcfc6..444b6bfb4683 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -153,7 +153,7 @@ struct msr_counter bic[] = { { 0x0, "TSC_MHz", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "IRQ", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "SMI", NULL, 32, 0, FORMAT_DELTA, NULL, 0 }, - { 0x0, "sysfs", NULL, 0, 0, 0, NULL, 0 }, + { 0x0, "cpuidle", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "CPU%c1", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "CPU%c3", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "CPU%c6", NULL, 0, 0, 0, NULL, 0 }, @@ -206,6 +206,7 @@ struct msr_counter bic[] = { { 0x0, "Sys_J", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "NMI", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "CPU%c1e", NULL, 0, 0, 0, NULL, 0 }, + { 0x0, "pct_idle", NULL, 0, 0, 0, NULL, 0 }, }; #define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter)) @@ -219,7 +220,7 @@ struct msr_counter bic[] = { #define BIC_TSC_MHz (1ULL << 7) #define BIC_IRQ (1ULL << 8) #define BIC_SMI (1ULL << 9) -#define BIC_sysfs (1ULL << 10) +#define BIC_cpuidle (1ULL << 10) #define BIC_CPU_c1 (1ULL << 11) #define BIC_CPU_c3 (1ULL << 12) #define BIC_CPU_c6 (1ULL << 13) @@ -272,17 +273,20 @@ struct msr_counter bic[] = { #define BIC_Sys_J (1ULL << 60) #define BIC_NMI (1ULL << 61) #define BIC_CPU_c1e (1ULL << 62) - -#define BIC_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die) -#define BIC_THERMAL_PWR (BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__ | BIC_SysWatt) -#define BIC_FREQUENCY (BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz | BIC_SAMMHz | BIC_SAMACTMHz | BIC_UNCORE_MHZ) -#define BIC_IDLE (BIC_Busy | BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_SAM_mc6 | BIC_Diec6) +#define BIC_pct_idle (1ULL << 63) + +#define BIC_GROUP_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die) +#define BIC_GROUP_THERMAL_PWR (BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__ | BIC_SysWatt) +#define BIC_GROUP_FREQUENCY (BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz | BIC_SAMMHz | BIC_SAMACTMHz | BIC_UNCORE_MHZ) +#define BIC_GROUP_HW_IDLE (BIC_Busy | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_SAM_mc6 | BIC_Diec6) +#define BIC_GROUP_SW_IDLE (BIC_Busy | BIC_cpuidle | BIC_pct_idle ) +#define BIC_GROUP_IDLE (BIC_GROUP_HW_IDLE | BIC_pct_idle) #define BIC_OTHER (BIC_IRQ | BIC_NMI | BIC_SMI | BIC_ThreadC | BIC_CoreTmp | BIC_IPC) -#define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC) +#define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC | BIC_cpuidle) unsigned long long bic_enabled = (0xFFFFFFFFFFFFFFFFULL & ~BIC_DISABLED_BY_DEFAULT); -unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs | BIC_APIC | BIC_X2APIC; +unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_cpuidle | BIC_pct_idle | BIC_APIC | BIC_X2APIC; #define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME) #define DO_BIC_READ(COUNTER_NAME) (bic_present & COUNTER_NAME) @@ -1056,7 +1060,7 @@ static const struct platform_data turbostat_pdata[] = { * Missing support for * INTEL_ICELAKE * INTEL_ATOM_SILVERMONT_MID - * INTEL_ATOM_AIRMONT_MID + * INTEL_ATOM_SILVERMONT_MID2 * INTEL_ATOM_AIRMONT_NP */ { 0, NULL }, @@ -1121,7 +1125,7 @@ end: int backwards_count; char *progname; -#define CPU_SUBSET_MAXCPUS 1024 /* need to use before probe... */ +#define CPU_SUBSET_MAXCPUS 8192 /* need to use before probe... */ cpu_set_t *cpu_present_set, *cpu_possible_set, *cpu_effective_set, *cpu_allowed_set, *cpu_affinity_set, *cpu_subset; size_t cpu_present_setsize, cpu_possible_setsize, cpu_effective_setsize, cpu_allowed_setsize, cpu_affinity_setsize, cpu_subset_size; #define MAX_ADDED_THREAD_COUNTERS 24 @@ -2354,16 +2358,25 @@ unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode) retval |= ~0; break; } else if (!strcmp(name_list, "topology")) { - retval |= BIC_TOPOLOGY; + retval |= BIC_GROUP_TOPOLOGY; break; } else if (!strcmp(name_list, "power")) { - retval |= BIC_THERMAL_PWR; + retval |= BIC_GROUP_THERMAL_PWR; break; } else if (!strcmp(name_list, "idle")) { - retval |= BIC_IDLE; + retval |= BIC_GROUP_IDLE; + break; + } else if (!strcmp(name_list, "swidle")) { + retval |= BIC_GROUP_SW_IDLE; + break; + } else if (!strcmp(name_list, "sysfs")) { /* legacy compatibility */ + retval |= BIC_GROUP_SW_IDLE; + break; + } else if (!strcmp(name_list, "hwidle")) { + retval |= BIC_GROUP_HW_IDLE; break; } else if (!strcmp(name_list, "frequency")) { - retval |= BIC_FREQUENCY; + retval |= BIC_GROUP_FREQUENCY; break; } else if (!strcmp(name_list, "other")) { retval |= BIC_OTHER; @@ -4752,6 +4765,38 @@ unsigned long pmt_read_counter(struct pmt_counter *ppmt, unsigned int domain_id) return (value & value_mask) >> value_shift; } + +/* Rapl domain enumeration helpers */ +static inline int get_rapl_num_domains(void) +{ + int num_packages = topo.max_package_id + 1; + int num_cores_per_package; + int num_cores; + + if (!platform->has_per_core_rapl) + return num_packages; + + num_cores_per_package = topo.max_core_id + 1; + num_cores = num_cores_per_package * num_packages; + + return num_cores; +} + +static inline int get_rapl_domain_id(int cpu) +{ + int nr_cores_per_package = topo.max_core_id + 1; + int rapl_core_id; + + if (!platform->has_per_core_rapl) + return cpus[cpu].physical_package_id; + + /* Compute the system-wide unique core-id for @cpu */ + rapl_core_id = cpus[cpu].physical_core_id; + rapl_core_id += cpus[cpu].physical_package_id * nr_cores_per_package; + + return rapl_core_id; +} + /* * get_counters(...) * migrate to cpu @@ -4807,7 +4852,7 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) goto done; if (platform->has_per_core_rapl) { - status = get_rapl_counters(cpu, c->core_id, c, p); + status = get_rapl_counters(cpu, get_rapl_domain_id(cpu), c, p); if (status != 0) return status; } @@ -4873,7 +4918,7 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) p->sys_lpi = cpuidle_cur_sys_lpi_us; if (!platform->has_per_core_rapl) { - status = get_rapl_counters(cpu, p->package_id, c, p); + status = get_rapl_counters(cpu, get_rapl_domain_id(cpu), c, p); if (status != 0) return status; } @@ -6713,7 +6758,18 @@ static void probe_intel_uncore_frequency_cluster(void) sprintf(path, "%s/current_freq_khz", path_base); sprintf(name_buf, "UMHz%d.%d", domain_id, cluster_id); - add_counter(0, path, name_buf, 0, SCOPE_PACKAGE, COUNTER_K2M, FORMAT_AVERAGE, 0, package_id); + /* + * Once add_couter() is called, that counter is always read + * and reported -- So it is effectively (enabled & present). + * Only call add_counter() here if legacy BIC_UNCORE_MHZ (UncMHz) + * is (enabled). Since we are in this routine, we + * know we will not probe and set (present) the legacy counter. + * + * This allows "--show/--hide UncMHz" to be effective for + * the clustered MHz counters, as a group. + */ + if BIC_IS_ENABLED(BIC_UNCORE_MHZ) + add_counter(0, path, name_buf, 0, SCOPE_PACKAGE, COUNTER_K2M, FORMAT_AVERAGE, 0, package_id); if (quiet) continue; @@ -7838,7 +7894,7 @@ void linux_perf_init(void) void rapl_perf_init(void) { - const unsigned int num_domains = (platform->has_per_core_rapl ? topo.max_core_id : topo.max_package_id) + 1; + const unsigned int num_domains = get_rapl_num_domains(); bool *domain_visited = calloc(num_domains, sizeof(bool)); rapl_counter_info_perdomain = calloc(num_domains, sizeof(*rapl_counter_info_perdomain)); @@ -7879,8 +7935,7 @@ void rapl_perf_init(void) continue; /* Skip already seen and handled RAPL domains */ - next_domain = - platform->has_per_core_rapl ? cpus[cpu].physical_core_id : cpus[cpu].physical_package_id; + next_domain = get_rapl_domain_id(cpu); assert(next_domain < num_domains); @@ -9538,6 +9593,7 @@ int fork_it(char **argv) timersub(&tv_odd, &tv_even, &tv_delta); if (for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS)) fprintf(outf, "%s: Counter reset detected\n", progname); + delta_platform(&platform_counters_odd, &platform_counters_even); compute_average(EVEN_COUNTERS); format_all_counters(EVEN_COUNTERS); @@ -9569,7 +9625,7 @@ int get_and_dump_counters(void) void print_version() { - fprintf(outf, "turbostat version 2025.02.02 - Len Brown <lenb@kernel.org>\n"); + fprintf(outf, "turbostat version 2025.04.06 - Len Brown <lenb@kernel.org>\n"); } #define COMMAND_LINE_SIZE 2048 @@ -9602,7 +9658,7 @@ struct msr_counter *find_msrp_by_name(struct msr_counter *head, char *name) for (mp = head; mp; mp = mp->next) { if (debug) fprintf(stderr, "%s: %s %s\n", __func__, name, mp->name); - if (!strncmp(name, mp->name, strlen(mp->name))) + if (!strcmp(name, mp->name)) return mp; } return NULL; @@ -10249,12 +10305,13 @@ int is_deferred_skip(char *name) return 0; } -void probe_sysfs(void) +void probe_cpuidle_residency(void) { char path[64]; char name_buf[16]; FILE *input; int state; + int min_state = 1024, max_state = 0; char *sp; for (state = 10; state >= 0; --state) { @@ -10279,14 +10336,32 @@ void probe_sysfs(void) sprintf(path, "cpuidle/state%d/time", state); - if (!DO_BIC(BIC_sysfs) && !is_deferred_add(name_buf)) + if (!DO_BIC(BIC_pct_idle) && !is_deferred_add(name_buf)) continue; if (is_deferred_skip(name_buf)) continue; add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_USEC, FORMAT_PERCENT, SYSFS_PERCPU, 0); + + if (state > max_state) + max_state = state; + if (state < min_state) + min_state = state; } +} + +void probe_cpuidle_counts(void) +{ + char path[64]; + char name_buf[16]; + FILE *input; + int state; + int min_state = 1024, max_state = 0; + char *sp; + + if (!DO_BIC(BIC_cpuidle)) + return; for (state = 10; state >= 0; --state) { @@ -10296,26 +10371,52 @@ void probe_sysfs(void) continue; if (!fgets(name_buf, sizeof(name_buf), input)) err(1, "%s: failed to read file", path); - /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */ - sp = strchr(name_buf, '-'); - if (!sp) - sp = strchrnul(name_buf, '\n'); - *sp = '\0'; fclose(input); remove_underbar(name_buf); - sprintf(path, "cpuidle/state%d/usage", state); - - if (!DO_BIC(BIC_sysfs) && !is_deferred_add(name_buf)) + if (!DO_BIC(BIC_cpuidle) && !is_deferred_add(name_buf)) continue; if (is_deferred_skip(name_buf)) continue; + /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */ + sp = strchr(name_buf, '-'); + if (!sp) + sp = strchrnul(name_buf, '\n'); + + /* + * The 'below' sysfs file always contains 0 for the deepest state (largest index), + * do not add it. + */ + if (state != max_state) { + /* + * Add 'C1+' for C1, and so on. The 'below' sysfs file always contains 0 for + * the last state, so do not add it. + */ + + *sp = '+'; + *(sp + 1) = '\0'; + sprintf(path, "cpuidle/state%d/below", state); + add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_ITEMS, FORMAT_DELTA, SYSFS_PERCPU, 0); + } + + *sp = '\0'; + sprintf(path, "cpuidle/state%d/usage", state); add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_ITEMS, FORMAT_DELTA, SYSFS_PERCPU, 0); - } + /* + * The 'above' sysfs file always contains 0 for the shallowest state (smallest + * index), do not add it. + */ + if (state != min_state) { + *sp = '-'; + *(sp + 1) = '\0'; + sprintf(path, "cpuidle/state%d/above", state); + add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_ITEMS, FORMAT_DELTA, SYSFS_PERCPU, 0); + } + } } /* @@ -10559,7 +10660,8 @@ skip_cgroup_setting: print_bootcmd(); } - probe_sysfs(); + probe_cpuidle_residency(); + probe_cpuidle_counts(); if (!getuid()) set_rlimit(); |