summaryrefslogtreecommitdiff
path: root/tools/power
diff options
context:
space:
mode:
Diffstat (limited to 'tools/power')
-rw-r--r--tools/power/acpi/common/cmfsize.c2
-rw-r--r--tools/power/acpi/common/getopt.c2
-rw-r--r--tools/power/acpi/os_specific/service_layers/oslinuxtbl.c4
-rw-r--r--tools/power/acpi/os_specific/service_layers/osunixdir.c2
-rw-r--r--tools/power/acpi/os_specific/service_layers/osunixmap.c2
-rw-r--r--tools/power/acpi/os_specific/service_layers/osunixxf.c2
-rw-r--r--tools/power/acpi/tools/acpidump/acpidump.h2
-rw-r--r--tools/power/acpi/tools/acpidump/apdump.c2
-rw-r--r--tools/power/acpi/tools/acpidump/apfiles.c4
-rw-r--r--tools/power/acpi/tools/acpidump/apmain.c2
-rw-r--r--tools/power/cpupower/Makefile33
-rw-r--r--tools/power/cpupower/README28
-rw-r--r--tools/power/cpupower/bench/parse.c4
-rw-r--r--tools/power/cpupower/bindings/python/Makefile18
-rw-r--r--tools/power/cpupower/bindings/python/README38
-rw-r--r--tools/power/cpupower/bindings/python/raw_pylibcpupower.swg5
-rw-r--r--tools/power/cpupower/cpupower-service.conf32
-rw-r--r--tools/power/cpupower/cpupower.service.in16
-rw-r--r--tools/power/cpupower/cpupower.sh26
-rw-r--r--tools/power/cpupower/lib/cpufreq.c18
-rw-r--r--tools/power/cpupower/lib/cpufreq.h8
-rw-r--r--tools/power/cpupower/lib/cpupower.c48
-rw-r--r--tools/power/cpupower/lib/cpupower.h3
-rw-r--r--tools/power/cpupower/utils/cpufreq-info.c36
-rw-r--r--tools/power/cpupower/utils/helpers/amd.c18
-rw-r--r--tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c44
-rw-r--r--tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c4
-rw-r--r--tools/power/cpupower/utils/idle_monitor/mperf_monitor.c6
-rw-r--r--tools/power/cpupower/utils/idle_monitor/nhm_idle.c2
-rw-r--r--tools/power/cpupower/utils/idle_monitor/snb_idle.c4
-rw-r--r--tools/power/pm-graph/config/custom-timeline-functions.cfg4
-rwxr-xr-xtools/power/pm-graph/sleepgraph.py9
-rw-r--r--tools/power/x86/intel-speed-select/Makefile2
-rw-r--r--tools/power/x86/intel-speed-select/isst-config.c35
-rw-r--r--tools/power/x86/intel-speed-select/isst-core-tpmi.c14
-rw-r--r--tools/power/x86/intel-speed-select/isst-display.c31
-rw-r--r--tools/power/x86/intel-speed-select/isst.h3
-rw-r--r--tools/power/x86/turbostat/turbostat.850
-rw-r--r--tools/power/x86/turbostat/turbostat.c1180
39 files changed, 1429 insertions, 314 deletions
diff --git a/tools/power/acpi/common/cmfsize.c b/tools/power/acpi/common/cmfsize.c
index 68b9ea86b86c..af0e558f231c 100644
--- a/tools/power/acpi/common/cmfsize.c
+++ b/tools/power/acpi/common/cmfsize.c
@@ -3,7 +3,7 @@
*
* Module Name: cmfsize - Common get file size function
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/tools/power/acpi/common/getopt.c b/tools/power/acpi/common/getopt.c
index 6a0cdba6fdfd..3d63626d80e7 100644
--- a/tools/power/acpi/common/getopt.c
+++ b/tools/power/acpi/common/getopt.c
@@ -3,7 +3,7 @@
*
* Module Name: getopt
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c b/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c
index 9d70d8c945af..9741e7503591 100644
--- a/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c
+++ b/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c
@@ -3,7 +3,7 @@
*
* Module Name: oslinuxtbl - Linux OSL for obtaining ACPI tables
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
@@ -19,7 +19,7 @@ ACPI_MODULE_NAME("oslinuxtbl")
typedef struct osl_table_info {
struct osl_table_info *next;
u32 instance;
- char signature[ACPI_NAMESEG_SIZE];
+ char signature[ACPI_NAMESEG_SIZE] ACPI_NONSTRING;
} osl_table_info;
diff --git a/tools/power/acpi/os_specific/service_layers/osunixdir.c b/tools/power/acpi/os_specific/service_layers/osunixdir.c
index 39f3bffd9355..b9bb83116549 100644
--- a/tools/power/acpi/os_specific/service_layers/osunixdir.c
+++ b/tools/power/acpi/os_specific/service_layers/osunixdir.c
@@ -3,7 +3,7 @@
*
* Module Name: osunixdir - Unix directory access interfaces
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/tools/power/acpi/os_specific/service_layers/osunixmap.c b/tools/power/acpi/os_specific/service_layers/osunixmap.c
index 2b7d56252684..b93ebc9371a5 100644
--- a/tools/power/acpi/os_specific/service_layers/osunixmap.c
+++ b/tools/power/acpi/os_specific/service_layers/osunixmap.c
@@ -3,7 +3,7 @@
*
* Module Name: osunixmap - Unix OSL for file mappings
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/tools/power/acpi/os_specific/service_layers/osunixxf.c b/tools/power/acpi/os_specific/service_layers/osunixxf.c
index 46429417c71a..36f27491713c 100644
--- a/tools/power/acpi/os_specific/service_layers/osunixxf.c
+++ b/tools/power/acpi/os_specific/service_layers/osunixxf.c
@@ -3,7 +3,7 @@
*
* Module Name: osunixxf - UNIX OSL interfaces
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/tools/power/acpi/tools/acpidump/acpidump.h b/tools/power/acpi/tools/acpidump/acpidump.h
index 643e3e722340..fe0d23c4f2ed 100644
--- a/tools/power/acpi/tools/acpidump/acpidump.h
+++ b/tools/power/acpi/tools/acpidump/acpidump.h
@@ -3,7 +3,7 @@
*
* Module Name: acpidump.h - Include file for acpi_dump utility
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/tools/power/acpi/tools/acpidump/apdump.c b/tools/power/acpi/tools/acpidump/apdump.c
index 0742b00b61a1..bf30143efbdc 100644
--- a/tools/power/acpi/tools/acpidump/apdump.c
+++ b/tools/power/acpi/tools/acpidump/apdump.c
@@ -3,7 +3,7 @@
*
* Module Name: apdump - Dump routines for ACPI tables (acpidump)
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/tools/power/acpi/tools/acpidump/apfiles.c b/tools/power/acpi/tools/acpidump/apfiles.c
index 13817f9112c0..75db0091e275 100644
--- a/tools/power/acpi/tools/acpidump/apfiles.c
+++ b/tools/power/acpi/tools/acpidump/apfiles.c
@@ -3,7 +3,7 @@
*
* Module Name: apfiles - File-related functions for acpidump utility
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
@@ -103,7 +103,7 @@ int ap_open_output_file(char *pathname)
int ap_write_to_binary_file(struct acpi_table_header *table, u32 instance)
{
- char filename[ACPI_NAMESEG_SIZE + 16];
+ char filename[ACPI_NAMESEG_SIZE + 16] ACPI_NONSTRING;
char instance_str[16];
ACPI_FILE file;
acpi_size actual;
diff --git a/tools/power/acpi/tools/acpidump/apmain.c b/tools/power/acpi/tools/acpidump/apmain.c
index 666a9675e743..9f3850e3af5b 100644
--- a/tools/power/acpi/tools/acpidump/apmain.c
+++ b/tools/power/acpi/tools/acpidump/apmain.c
@@ -3,7 +3,7 @@
*
* Module Name: apmain - Main module for the acpidump utility
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile
index 51a95239fe06..c43db1c41205 100644
--- a/tools/power/cpupower/Makefile
+++ b/tools/power/cpupower/Makefile
@@ -2,6 +2,7 @@
# Makefile for cpupower
#
# Copyright (C) 2005,2006 Dominik Brodowski <linux@dominikbrodowski.net>
+# Copyright (C) 2025 Francesco Poli <invernomuto@paranoici.org>
#
# Based largely on the Makefile for udev by:
#
@@ -52,8 +53,11 @@ DESTDIR ?=
# and _should_ modify the PACKAGE_BUGREPORT definition
VERSION:= $(shell ./utils/version-gen.sh)
-LIB_MAJ= 0.0.1
-LIB_MIN= 1
+LIB_FIX= 1
+LIB_MIN= 0
+LIB_MAJ= 1
+LIB_VER= $(LIB_MAJ).$(LIB_MIN).$(LIB_FIX)
+
PACKAGE = cpupower
PACKAGE_BUGREPORT = linux-pm@vger.kernel.org
@@ -68,6 +72,8 @@ bindir ?= /usr/bin
sbindir ?= /usr/sbin
mandir ?= /usr/man
libdir ?= /usr/lib
+libexecdir ?= /usr/libexec
+unitdir ?= /usr/lib/systemd/system
includedir ?= /usr/include
localedir ?= /usr/share/locale
docdir ?= /usr/share/doc/packages/cpupower
@@ -80,6 +86,7 @@ CP = cp -fpR
INSTALL = /usr/bin/install -c
INSTALL_PROGRAM = ${INSTALL}
INSTALL_DATA = ${INSTALL} -m 644
+SETPERM_DATA = chmod 644
#bash completion scripts get sourced and so they should be rw only.
INSTALL_SCRIPT = ${INSTALL} -m 644
@@ -200,14 +207,14 @@ $(OUTPUT)lib/%.o: $(LIB_SRC) $(LIB_HEADERS)
$(ECHO) " CC " $@
$(QUIET) $(CC) $(CFLAGS) -fPIC -o $@ -c lib/$*.c
-$(OUTPUT)libcpupower.so.$(LIB_MAJ): $(LIB_OBJS)
+$(OUTPUT)libcpupower.so.$(LIB_VER): $(LIB_OBJS)
$(ECHO) " LD " $@
$(QUIET) $(CC) -shared $(CFLAGS) $(LDFLAGS) -o $@ \
- -Wl,-soname,libcpupower.so.$(LIB_MIN) $(LIB_OBJS)
+ -Wl,-soname,libcpupower.so.$(LIB_MAJ) $(LIB_OBJS)
@ln -sf $(@F) $(OUTPUT)libcpupower.so
- @ln -sf $(@F) $(OUTPUT)libcpupower.so.$(LIB_MIN)
+ @ln -sf $(@F) $(OUTPUT)libcpupower.so.$(LIB_MAJ)
-libcpupower: $(OUTPUT)libcpupower.so.$(LIB_MAJ)
+libcpupower: $(OUTPUT)libcpupower.so.$(LIB_VER)
# Let all .o files depend on its .c file and all headers
# Might be worth to put this into utils/Makefile at some point of time
@@ -217,7 +224,7 @@ $(OUTPUT)%.o: %.c
$(ECHO) " CC " $@
$(QUIET) $(CC) $(CFLAGS) -I./lib -I ./utils -o $@ -c $*.c
-$(OUTPUT)cpupower: $(UTIL_OBJS) $(OUTPUT)libcpupower.so.$(LIB_MAJ)
+$(OUTPUT)cpupower: $(UTIL_OBJS) $(OUTPUT)libcpupower.so.$(LIB_VER)
$(ECHO) " CC " $@
ifeq ($(strip $(STATIC)),true)
$(QUIET) $(CC) $(CFLAGS) $(LDFLAGS) $(UTIL_OBJS) -lrt -lpci -L$(OUTPUT) -o $@
@@ -262,7 +269,7 @@ update-po: $(OUTPUT)po/$(PACKAGE).pot
done;
endif
-compile-bench: $(OUTPUT)libcpupower.so.$(LIB_MAJ)
+compile-bench: $(OUTPUT)libcpupower.so.$(LIB_VER)
@V=$(V) confdir=$(confdir) $(MAKE) -C bench O=$(OUTPUT)
# we compile into subdirectories. if the target directory is not the
@@ -299,6 +306,13 @@ install-tools: $(OUTPUT)cpupower
$(INSTALL_PROGRAM) $(OUTPUT)cpupower $(DESTDIR)${bindir}
$(INSTALL) -d $(DESTDIR)${bash_completion_dir}
$(INSTALL_SCRIPT) cpupower-completion.sh '$(DESTDIR)${bash_completion_dir}/cpupower'
+ $(INSTALL) -d $(DESTDIR)${confdir}
+ $(INSTALL_DATA) cpupower-service.conf '$(DESTDIR)${confdir}'
+ $(INSTALL) -d $(DESTDIR)${libexecdir}
+ $(INSTALL_PROGRAM) cpupower.sh '$(DESTDIR)${libexecdir}/cpupower'
+ $(INSTALL) -d $(DESTDIR)${unitdir}
+ sed 's|___CDIR___|${confdir}|; s|___LDIR___|${libexecdir}|' cpupower.service.in > '$(DESTDIR)${unitdir}/cpupower.service'
+ $(SETPERM_DATA) '$(DESTDIR)${unitdir}/cpupower.service'
install-man:
$(INSTALL_DATA) -D man/cpupower.1 $(DESTDIR)${mandir}/man1/cpupower.1
@@ -333,6 +347,9 @@ uninstall:
- rm -f $(DESTDIR)${includedir}/cpufreq.h
- rm -f $(DESTDIR)${includedir}/cpuidle.h
- rm -f $(DESTDIR)${bindir}/utils/cpupower
+ - rm -f $(DESTDIR)${confdir}cpupower-service.conf
+ - rm -f $(DESTDIR)${libexecdir}/cpupower
+ - rm -f $(DESTDIR)${unitdir}/cpupower.service
- rm -f $(DESTDIR)${mandir}/man1/cpupower.1
- rm -f $(DESTDIR)${mandir}/man1/cpupower-frequency-set.1
- rm -f $(DESTDIR)${mandir}/man1/cpupower-frequency-info.1
diff --git a/tools/power/cpupower/README b/tools/power/cpupower/README
index 2678ed81d311..9de449469568 100644
--- a/tools/power/cpupower/README
+++ b/tools/power/cpupower/README
@@ -59,6 +59,10 @@ $ sudo make install
-----------------------------------------------------------------------
| man pages | /usr/man |
-----------------------------------------------------------------------
+| systemd service | /usr/lib/systemd/system |
+-----------------------------------------------------------------------
+| systemd support script | /usr/libexec |
+-----------------------------------------------------------------------
To put it in other words it makes build results available system-wide,
enabling any user to simply start using it without any additional steps
@@ -109,6 +113,10 @@ The files will be installed to the following dirs:
-----------------------------------------------------------------------
| man pages | ${DESTDIR}/usr/man |
-----------------------------------------------------------------------
+| systemd service | ${DESTDIR}/usr/lib/systemd/system |
+-----------------------------------------------------------------------
+| systemd support script | ${DESTDIR}/usr/libexec |
+-----------------------------------------------------------------------
If you look at the table for the default 'make' output dirs you will
notice that the only difference with the non-default case is the
@@ -173,6 +181,26 @@ The issue is that binary cannot find the 'libcpupower' library. So, we
shall point to the lib dir:
sudo LD_LIBRARY_PATH=lib64/ ./bin/cpupower
+systemd service
+---------------
+
+A systemd service is also provided to run the cpupower utility at boot with
+settings read from a configuration file.
+
+If you want systemd to find the new service after the installation, the service
+unit must have been installed in one of the system unit search path directories
+(such as '/usr/lib/systemd/system/', which is the default location) and (unless
+you are willing to wait for the next reboot) you need to issue the following
+command:
+
+$ sudo systemctl daemon-reload
+
+If you want to enable this systemd service, edit '/etc/cpupower-service.conf'
+(uncommenting at least one of the options, depending on your preferences)
+and then issue the following command:
+
+$ sudo systemctl enable --now cpupower.service
+
THANKS
------
diff --git a/tools/power/cpupower/bench/parse.c b/tools/power/cpupower/bench/parse.c
index 080678d9d74e..bd67c758b33a 100644
--- a/tools/power/cpupower/bench/parse.c
+++ b/tools/power/cpupower/bench/parse.c
@@ -121,6 +121,10 @@ out_dir:
struct config *prepare_default_config()
{
struct config *config = malloc(sizeof(struct config));
+ if (!config) {
+ perror("malloc");
+ return NULL;
+ }
dprintf("loading defaults\n");
diff --git a/tools/power/cpupower/bindings/python/Makefile b/tools/power/cpupower/bindings/python/Makefile
index e1ebb1d60cd4..81db39a03efb 100644
--- a/tools/power/cpupower/bindings/python/Makefile
+++ b/tools/power/cpupower/bindings/python/Makefile
@@ -1,21 +1,20 @@
# SPDX-License-Identifier: GPL-2.0-only
# Makefile for libcpupower's Python bindings
#
-# This Makefile expects you have already run the makefile for cpupower to build
-# the .o files in the lib directory for the bindings to be created.
+# This Makefile expects you have already run `make install-lib` in the lib
+# directory for the bindings to be created.
CC := gcc
HAVE_SWIG := $(shell if which swig >/dev/null 2>&1; then echo 1; else echo 0; fi)
HAVE_PYCONFIG := $(shell if which python-config >/dev/null 2>&1; then echo 1; else echo 0; fi)
-LIB_DIR := ../../lib
PY_INCLUDE = $(firstword $(shell python-config --includes))
-OBJECTS_LIB = $(wildcard $(LIB_DIR)/*.o)
+INSTALL_DIR = $(shell python3 -c "import site; print(site.getsitepackages()[0])")
all: _raw_pylibcpupower.so
_raw_pylibcpupower.so: raw_pylibcpupower_wrap.o
- $(CC) -shared $(OBJECTS_LIB) raw_pylibcpupower_wrap.o -o _raw_pylibcpupower.so
+ $(CC) -shared -lcpupower raw_pylibcpupower_wrap.o -o _raw_pylibcpupower.so
raw_pylibcpupower_wrap.o: raw_pylibcpupower_wrap.c
$(CC) -fPIC -c raw_pylibcpupower_wrap.c $(PY_INCLUDE)
@@ -28,6 +27,15 @@ else ifeq ($(HAVE_PYCONFIG),0)
endif
swig -python raw_pylibcpupower.swg
+# Only installs the Python bindings
+install: _raw_pylibcpupower.so
+ install -D _raw_pylibcpupower.so $(INSTALL_DIR)/_raw_pylibcpupower.so
+ install -D raw_pylibcpupower.py $(INSTALL_DIR)/raw_pylibcpupower.py
+
+uninstall:
+ rm -f $(INSTALL_DIR)/_raw_pylibcpupower.so
+ rm -f $(INSTALL_DIR)/raw_pylibcpupower.py
+
# Will only clean the bindings folder; will not clean the actual cpupower folder
clean:
rm -f raw_pylibcpupower.py raw_pylibcpupower_wrap.c raw_pylibcpupower_wrap.o _raw_pylibcpupower.so
diff --git a/tools/power/cpupower/bindings/python/README b/tools/power/cpupower/bindings/python/README
index 0a4bb2581e8a..2a4896b648b7 100644
--- a/tools/power/cpupower/bindings/python/README
+++ b/tools/power/cpupower/bindings/python/README
@@ -5,18 +5,21 @@ libcpupower (aside from the libcpupower object files).
requirements
------------
-* You need the object files in the libcpupower directory compiled by
-cpupower's makefile.
+* If you are building completely from upstream; please install libcpupower by
+running `make install-lib` within the cpupower directory. This installs the
+libcpupower.so file and symlinks needed. Otherwise, please make sure a symlink
+to libcpupower.so exists in your library path from your distribution's
+packages.
* The SWIG program must be installed.
-* The Python's development libraries installed.
+* The Python's development libraries must be installed.
Please check that your version of SWIG is compatible with the version of Python
installed on your machine by checking the SWIG changelog on their website.
https://swig.org/
Note that while SWIG itself is GPL v3+ licensed; the resulting output,
-the bindings code: is permissively licensed + the license of libcpupower's .o
-files. For these bindings that means GPL v2.
+the bindings code: is permissively licensed + the license of libcpupower's
+library files. For these bindings that means GPL v2.
Please see https://swig.org/legal.html and the discussion [1] for more details.
@@ -48,6 +51,31 @@ To run the test script:
$ python test_raw_pylibcpupower.py
+developing/using the bindings directly
+--------------------------------------
+
+You need to add the Python bindings directory to your $PYTHONPATH.
+
+You would set the path in the Bash terminal or in the Bash profile:
+
+PYTHONPATH=~/linux/tools/power/cpupower/bindings/python:$PYTHONPATH
+
+This allows you to set a specific repo of the bindings to use.
+
+
+installing/uninstalling
+-----------------------
+
+Python uses a system specific site-packages folder to look up modules to import
+by default. You do not need to install cpupower to use the SWIG bindings.
+
+You can install and uninstall the bindings to the site-packages with:
+
+sudo make install
+
+sudo make uninstall
+
+
credits
-------
diff --git a/tools/power/cpupower/bindings/python/raw_pylibcpupower.swg b/tools/power/cpupower/bindings/python/raw_pylibcpupower.swg
index 96556d87a745..d82af6fa93c3 100644
--- a/tools/power/cpupower/bindings/python/raw_pylibcpupower.swg
+++ b/tools/power/cpupower/bindings/python/raw_pylibcpupower.swg
@@ -134,6 +134,9 @@ void cpufreq_put_stats(struct cpufreq_stats *stats);
unsigned long cpufreq_get_transitions(unsigned int cpu);
+char *cpufreq_get_energy_performance_preference(unsigned int cpu);
+void cpufreq_put_energy_performance_preference(char *ptr);
+
int cpufreq_set_policy(unsigned int cpu, struct cpufreq_policy *policy);
int cpufreq_modify_policy_min(unsigned int cpu, unsigned long min_freq);
@@ -160,6 +163,8 @@ int cpuidle_state_disable(unsigned int cpu, unsigned int idlestate,
unsigned int disable);
unsigned long cpuidle_state_latency(unsigned int cpu,
unsigned int idlestate);
+unsigned long cpuidle_state_residency(unsigned int cpu,
+ unsigned int idlestate);
unsigned long cpuidle_state_usage(unsigned int cpu,
unsigned int idlestate);
unsigned long long cpuidle_state_time(unsigned int cpu,
diff --git a/tools/power/cpupower/cpupower-service.conf b/tools/power/cpupower/cpupower-service.conf
new file mode 100644
index 000000000000..02eabe8e3614
--- /dev/null
+++ b/tools/power/cpupower/cpupower-service.conf
@@ -0,0 +1,32 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
+# Copyright (C) 2012, Sébastien Luttringer
+# Copyright (C) 2024-2025, Francesco Poli <invernomuto@paranoici.org>
+
+# Configuration file for cpupower.service systemd service unit
+#
+# Edit this file (uncommenting at least one of the options, depending on
+# your preferences) and then enable cpupower.service, if you want cpupower
+# to run at boot with these settings.
+
+# --- CPU clock frequency ---
+
+# Define CPU governor
+# Valid governors: ondemand, performance, powersave, conservative, userspace
+#GOVERNOR='ondemand'
+
+# Limit frequency range
+# Valid suffixes: Hz, kHz (default), MHz, GHz, THz
+#MIN_FREQ="2.25GHz"
+#MAX_FREQ="3GHz"
+
+# Set a specific frequency
+# Requires userspace governor to be available.
+# If this option is set, all the previous frequency options are ignored
+#FREQ=
+
+# --- CPU policy ---
+
+# Set a register on supported Intel processore which allows software to convey
+# its policy for the relative importance of performance versus energy savings to
+# the processor. See man CPUPOWER-SET(1) for additional details
+#PERF_BIAS=
diff --git a/tools/power/cpupower/cpupower.service.in b/tools/power/cpupower/cpupower.service.in
new file mode 100644
index 000000000000..fbd5b8c14270
--- /dev/null
+++ b/tools/power/cpupower/cpupower.service.in
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
+# Copyright (C) 2012-2020, Sébastien Luttringer
+# Copyright (C) 2024-2025, Francesco Poli <invernomuto@paranoici.org>
+
+[Unit]
+Description=Apply cpupower configuration
+ConditionVirtualization=!container
+
+[Service]
+Type=oneshot
+EnvironmentFile=-___CDIR___cpupower-service.conf
+ExecStart=___LDIR___/cpupower
+RemainAfterExit=yes
+
+[Install]
+WantedBy=multi-user.target
diff --git a/tools/power/cpupower/cpupower.sh b/tools/power/cpupower/cpupower.sh
new file mode 100644
index 000000000000..a37dd4cfdb2b
--- /dev/null
+++ b/tools/power/cpupower/cpupower.sh
@@ -0,0 +1,26 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0-or-later
+# Copyright (C) 2012, Sébastien Luttringer
+# Copyright (C) 2024, Francesco Poli <invernomuto@paranoici.org>
+
+ESTATUS=0
+
+# apply CPU clock frequency options
+if test -n "$FREQ"
+then
+ cpupower frequency-set -f "$FREQ" > /dev/null || ESTATUS=1
+elif test -n "${GOVERNOR}${MIN_FREQ}${MAX_FREQ}"
+then
+ cpupower frequency-set \
+ ${GOVERNOR:+ -g "$GOVERNOR"} \
+ ${MIN_FREQ:+ -d "$MIN_FREQ"} ${MAX_FREQ:+ -u "$MAX_FREQ"} \
+ > /dev/null || ESTATUS=1
+fi
+
+# apply CPU policy options
+if test -n "$PERF_BIAS"
+then
+ cpupower set -b "$PERF_BIAS" > /dev/null || ESTATUS=1
+fi
+
+exit $ESTATUS
diff --git a/tools/power/cpupower/lib/cpufreq.c b/tools/power/cpupower/lib/cpufreq.c
index 1516d23c17c9..8dda3db2dff0 100644
--- a/tools/power/cpupower/lib/cpufreq.c
+++ b/tools/power/cpupower/lib/cpufreq.c
@@ -102,6 +102,10 @@ unsigned long cpufreq_get_sysfs_value_from_table(unsigned int cpu,
if (len == 0)
return 0;
+ if (!strcmp(linebuf, "enabled\n"))
+ return 1;
+ if (!strcmp(linebuf, "disabled\n"))
+ return 0;
value = strtoul(linebuf, &endp, 0);
if (endp == linebuf || errno == ERANGE)
@@ -123,12 +127,14 @@ static unsigned long sysfs_cpufreq_get_one_value(unsigned int cpu,
enum cpufreq_string {
SCALING_DRIVER,
SCALING_GOVERNOR,
+ ENERGY_PERFORMANCE_PREFERENCE,
MAX_CPUFREQ_STRING_FILES
};
static const char *cpufreq_string_files[MAX_CPUFREQ_STRING_FILES] = {
[SCALING_DRIVER] = "scaling_driver",
[SCALING_GOVERNOR] = "scaling_governor",
+ [ENERGY_PERFORMANCE_PREFERENCE] = "energy_performance_preference",
};
@@ -203,6 +209,18 @@ unsigned long cpufreq_get_transition_latency(unsigned int cpu)
return sysfs_cpufreq_get_one_value(cpu, CPUINFO_LATENCY);
}
+char *cpufreq_get_energy_performance_preference(unsigned int cpu)
+{
+ return sysfs_cpufreq_get_one_string(cpu, ENERGY_PERFORMANCE_PREFERENCE);
+}
+
+void cpufreq_put_energy_performance_preference(char *ptr)
+{
+ if (!ptr)
+ return;
+ free(ptr);
+}
+
int cpufreq_get_hardware_limits(unsigned int cpu,
unsigned long *min,
unsigned long *max)
diff --git a/tools/power/cpupower/lib/cpufreq.h b/tools/power/cpupower/lib/cpufreq.h
index 2f3c84035806..bfc617311ebd 100644
--- a/tools/power/cpupower/lib/cpufreq.h
+++ b/tools/power/cpupower/lib/cpufreq.h
@@ -68,6 +68,14 @@ unsigned long cpufreq_get_freq_hardware(unsigned int cpu);
unsigned long cpufreq_get_transition_latency(unsigned int cpu);
+/* determine energy performance preference
+ *
+ * returns NULL on failure, else the string that represents the energy performance
+ * preference requested.
+ */
+char *cpufreq_get_energy_performance_preference(unsigned int cpu);
+void cpufreq_put_energy_performance_preference(char *ptr);
+
/* determine hardware CPU frequency limits
*
* These may be limited further by thermal, energy or other
diff --git a/tools/power/cpupower/lib/cpupower.c b/tools/power/cpupower/lib/cpupower.c
index 7a2ef691b20e..ce8dfb8e46ab 100644
--- a/tools/power/cpupower/lib/cpupower.c
+++ b/tools/power/cpupower/lib/cpupower.c
@@ -10,6 +10,7 @@
#include <stdio.h>
#include <errno.h>
#include <stdlib.h>
+#include <string.h>
#include "cpupower.h"
#include "cpupower_intern.h"
@@ -150,15 +151,25 @@ static int __compare(const void *t1, const void *t2)
return 0;
}
+static int __compare_core_cpu_list(const void *t1, const void *t2)
+{
+ struct cpuid_core_info *top1 = (struct cpuid_core_info *)t1;
+ struct cpuid_core_info *top2 = (struct cpuid_core_info *)t2;
+
+ return strcmp(top1->core_cpu_list, top2->core_cpu_list);
+}
+
/*
* Returns amount of cpus, negative on error, cpu_top must be
* passed to cpu_topology_release to free resources
*
- * Array is sorted after ->pkg, ->core, then ->cpu
+ * Array is sorted after ->cpu_smt_list ->pkg, ->core
*/
int get_cpu_topology(struct cpupower_topology *cpu_top)
{
int cpu, last_pkg, cpus = sysconf(_SC_NPROCESSORS_CONF);
+ char path[SYSFS_PATH_MAX];
+ char *last_cpu_list;
cpu_top->core_info = malloc(sizeof(struct cpuid_core_info) * cpus);
if (cpu_top->core_info == NULL)
@@ -183,6 +194,34 @@ int get_cpu_topology(struct cpupower_topology *cpu_top)
cpu_top->core_info[cpu].core = -1;
continue;
}
+ if (cpu_top->core_info[cpu].core == -1) {
+ strncpy(cpu_top->core_info[cpu].core_cpu_list, "-1", CPULIST_BUFFER);
+ continue;
+ }
+ snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/topology/%s",
+ cpu, "core_cpus_list");
+ if (cpupower_read_sysfs(
+ path,
+ cpu_top->core_info[cpu].core_cpu_list,
+ CPULIST_BUFFER) < 1) {
+ printf("Warning CPU%u has a 0 size core_cpus_list string", cpu);
+ }
+ }
+
+ /* Count the number of distinct cpu lists to get the physical core
+ * count.
+ */
+ qsort(cpu_top->core_info, cpus, sizeof(struct cpuid_core_info),
+ __compare_core_cpu_list);
+
+ last_cpu_list = cpu_top->core_info[0].core_cpu_list;
+ cpu_top->cores = 1;
+ for (cpu = 1; cpu < cpus; cpu++) {
+ if (strcmp(cpu_top->core_info[cpu].core_cpu_list, last_cpu_list) != 0 &&
+ cpu_top->core_info[cpu].pkg != -1) {
+ last_cpu_list = cpu_top->core_info[cpu].core_cpu_list;
+ cpu_top->cores++;
+ }
}
qsort(cpu_top->core_info, cpus, sizeof(struct cpuid_core_info),
@@ -203,13 +242,6 @@ int get_cpu_topology(struct cpupower_topology *cpu_top)
if (!(cpu_top->core_info[0].pkg == -1))
cpu_top->pkgs++;
- /* Intel's cores count is not consecutively numbered, there may
- * be a core_id of 3, but none of 2. Assume there always is 0
- * Get amount of cores by counting duplicates in a package
- for (cpu = 0; cpu_top->core_info[cpu].pkg = 0 && cpu < cpus; cpu++) {
- if (cpu_top->core_info[cpu].core == 0)
- cpu_top->cores++;
- */
return cpus;
}
diff --git a/tools/power/cpupower/lib/cpupower.h b/tools/power/cpupower/lib/cpupower.h
index e4e4292eacec..2e67a080f203 100644
--- a/tools/power/cpupower/lib/cpupower.h
+++ b/tools/power/cpupower/lib/cpupower.h
@@ -2,6 +2,8 @@
#ifndef __CPUPOWER_CPUPOWER_H__
#define __CPUPOWER_CPUPOWER_H__
+#define CPULIST_BUFFER 5
+
struct cpupower_topology {
/* Amount of CPU cores, packages and threads per core in the system */
unsigned int cores;
@@ -16,6 +18,7 @@ struct cpuid_core_info {
int pkg;
int core;
int cpu;
+ char core_cpu_list[CPULIST_BUFFER];
/* flags */
unsigned int is_online:1;
diff --git a/tools/power/cpupower/utils/cpufreq-info.c b/tools/power/cpupower/utils/cpufreq-info.c
index c96b77365c63..fc750e127404 100644
--- a/tools/power/cpupower/utils/cpufreq-info.c
+++ b/tools/power/cpupower/utils/cpufreq-info.c
@@ -120,7 +120,6 @@ static void print_duration(unsigned long duration)
} else
printf("%lu ns", duration);
}
- return;
}
static int get_boost_mode_x86(unsigned int cpu)
@@ -255,7 +254,12 @@ static int get_freq_kernel(unsigned int cpu, unsigned int human)
static int get_freq_hardware(unsigned int cpu, unsigned int human)
{
- unsigned long freq = cpufreq_get_freq_hardware(cpu);
+ unsigned long freq;
+
+ if (cpupower_cpu_info.caps & CPUPOWER_CAP_APERF)
+ return -EINVAL;
+
+ freq = cpufreq_get_freq_hardware(cpu);
printf(_(" current CPU frequency: "));
if (!freq) {
printf("Unable to call hardware\n");
@@ -418,12 +422,32 @@ static int get_freq_stats(unsigned int cpu, unsigned int human)
return 0;
}
+/* --epp / -z */
+
+static int get_epp(unsigned int cpu, bool interactive)
+{
+ char *epp;
+
+ epp = cpufreq_get_energy_performance_preference(cpu);
+ if (!epp)
+ return -EINVAL;
+ if (interactive)
+ printf(_(" energy performance preference: %s\n"), epp);
+
+ cpufreq_put_energy_performance_preference(epp);
+
+ return 0;
+}
+
/* --latency / -y */
static int get_latency(unsigned int cpu, unsigned int human)
{
unsigned long latency = cpufreq_get_transition_latency(cpu);
+ if (!get_epp(cpu, false))
+ return -EINVAL;
+
printf(_(" maximum transition latency: "));
if (!latency || latency == UINT_MAX) {
printf(_(" Cannot determine or is not supported.\n"));
@@ -457,6 +481,7 @@ static void debug_output_one(unsigned int cpu)
get_related_cpus(cpu);
get_affected_cpus(cpu);
get_latency(cpu, 1);
+ get_epp(cpu, true);
get_hardware_limits(cpu, 1);
freqs = cpufreq_get_available_frequencies(cpu);
@@ -497,6 +522,7 @@ static struct option info_opts[] = {
{"human", no_argument, NULL, 'm'},
{"no-rounding", no_argument, NULL, 'n'},
{"performance", no_argument, NULL, 'c'},
+ {"epp", no_argument, NULL, 'z'},
{ },
};
@@ -510,7 +536,7 @@ int cmd_freq_info(int argc, char **argv)
int output_param = 0;
do {
- ret = getopt_long(argc, argv, "oefwldpgrasmybnc", info_opts,
+ ret = getopt_long(argc, argv, "oefwldpgrasmybncz", info_opts,
NULL);
switch (ret) {
case '?':
@@ -534,6 +560,7 @@ int cmd_freq_info(int argc, char **argv)
case 's':
case 'y':
case 'c':
+ case 'z':
if (output_param) {
output_param = -1;
cont = 0;
@@ -643,6 +670,9 @@ int cmd_freq_info(int argc, char **argv)
case 'c':
ret = get_perf_cap(cpu);
break;
+ case 'z':
+ ret = get_epp(cpu, true);
+ break;
}
if (ret)
return ret;
diff --git a/tools/power/cpupower/utils/helpers/amd.c b/tools/power/cpupower/utils/helpers/amd.c
index 0a56e22240fc..795562e879de 100644
--- a/tools/power/cpupower/utils/helpers/amd.c
+++ b/tools/power/cpupower/utils/helpers/amd.c
@@ -177,6 +177,8 @@ enum amd_pstate_value {
AMD_PSTATE_HIGHEST_PERF,
AMD_PSTATE_MAX_FREQ,
AMD_PSTATE_LOWEST_NONLINEAR_FREQ,
+ AMD_PSTATE_HW_PREFCORE,
+ AMD_PSTATE_PREFCORE_RANKING,
MAX_AMD_PSTATE_VALUE_READ_FILES,
};
@@ -184,6 +186,8 @@ static const char *amd_pstate_value_files[MAX_AMD_PSTATE_VALUE_READ_FILES] = {
[AMD_PSTATE_HIGHEST_PERF] = "amd_pstate_highest_perf",
[AMD_PSTATE_MAX_FREQ] = "amd_pstate_max_freq",
[AMD_PSTATE_LOWEST_NONLINEAR_FREQ] = "amd_pstate_lowest_nonlinear_freq",
+ [AMD_PSTATE_HW_PREFCORE] = "amd_pstate_hw_prefcore",
+ [AMD_PSTATE_PREFCORE_RANKING] = "amd_pstate_prefcore_ranking",
};
static unsigned long amd_pstate_get_data(unsigned int cpu,
@@ -215,7 +219,9 @@ void amd_pstate_boost_init(unsigned int cpu, int *support, int *active)
void amd_pstate_show_perf_and_freq(unsigned int cpu, int no_rounding)
{
- printf(_(" AMD PSTATE Highest Performance: %lu. Maximum Frequency: "),
+
+ printf(_(" amd-pstate limits:\n"));
+ printf(_(" Highest Performance: %lu. Maximum Frequency: "),
amd_pstate_get_data(cpu, AMD_PSTATE_HIGHEST_PERF));
/*
* If boost isn't active, the cpuinfo_max doesn't indicate real max
@@ -224,22 +230,26 @@ void amd_pstate_show_perf_and_freq(unsigned int cpu, int no_rounding)
print_speed(amd_pstate_get_data(cpu, AMD_PSTATE_MAX_FREQ), no_rounding);
printf(".\n");
- printf(_(" AMD PSTATE Nominal Performance: %lu. Nominal Frequency: "),
+ printf(_(" Nominal Performance: %lu. Nominal Frequency: "),
acpi_cppc_get_data(cpu, NOMINAL_PERF));
print_speed(acpi_cppc_get_data(cpu, NOMINAL_FREQ) * 1000,
no_rounding);
printf(".\n");
- printf(_(" AMD PSTATE Lowest Non-linear Performance: %lu. Lowest Non-linear Frequency: "),
+ printf(_(" Lowest Non-linear Performance: %lu. Lowest Non-linear Frequency: "),
acpi_cppc_get_data(cpu, LOWEST_NONLINEAR_PERF));
print_speed(amd_pstate_get_data(cpu, AMD_PSTATE_LOWEST_NONLINEAR_FREQ),
no_rounding);
printf(".\n");
- printf(_(" AMD PSTATE Lowest Performance: %lu. Lowest Frequency: "),
+ printf(_(" Lowest Performance: %lu. Lowest Frequency: "),
acpi_cppc_get_data(cpu, LOWEST_PERF));
print_speed(acpi_cppc_get_data(cpu, LOWEST_FREQ) * 1000, no_rounding);
printf(".\n");
+
+ printf(_(" Preferred Core Support: %lu. Preferred Core Ranking: %lu.\n"),
+ amd_pstate_get_data(cpu, AMD_PSTATE_HW_PREFCORE),
+ amd_pstate_get_data(cpu, AMD_PSTATE_PREFCORE_RANKING));
}
/* AMD P-State Helper Functions ************************************/
diff --git a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c
index f746099b5dac..e8b3841d5c0f 100644
--- a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c
+++ b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c
@@ -6,6 +6,7 @@
*/
+#include <errno.h>
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
@@ -91,7 +92,11 @@ int fill_string_with_spaces(char *s, int n)
return 0;
}
-#define MAX_COL_WIDTH 6
+#define MAX_COL_WIDTH 6
+#define TOPOLOGY_DEPTH_PKG 3
+#define TOPOLOGY_DEPTH_CORE 2
+#define TOPOLOGY_DEPTH_CPU 1
+
void print_header(int topology_depth)
{
int unsigned mon;
@@ -113,12 +118,17 @@ void print_header(int topology_depth)
}
printf("\n");
- if (topology_depth > 2)
+ switch (topology_depth) {
+ case TOPOLOGY_DEPTH_PKG:
printf(" PKG|");
- if (topology_depth > 1)
+ case TOPOLOGY_DEPTH_CORE:
printf("CORE|");
- if (topology_depth > 0)
+ case TOPOLOGY_DEPTH_CPU:
printf(" CPU|");
+ break;
+ default:
+ return;
+ }
for (mon = 0; mon < avail_monitors; mon++) {
if (mon != 0)
@@ -152,12 +162,17 @@ void print_results(int topology_depth, int cpu)
cpu_top.core_info[cpu].pkg == -1)
return;
- if (topology_depth > 2)
+ switch (topology_depth) {
+ case TOPOLOGY_DEPTH_PKG:
printf("%4d|", cpu_top.core_info[cpu].pkg);
- if (topology_depth > 1)
+ case TOPOLOGY_DEPTH_CORE:
printf("%4d|", cpu_top.core_info[cpu].core);
- if (topology_depth > 0)
+ case TOPOLOGY_DEPTH_CPU:
printf("%4d|", cpu_top.core_info[cpu].cpu);
+ break;
+ default:
+ return;
+ }
for (mon = 0; mon < avail_monitors; mon++) {
if (mon != 0)
@@ -294,7 +309,10 @@ int fork_it(char **argv)
if (!child_pid) {
/* child */
- execvp(argv[0], argv);
+ if (execvp(argv[0], argv) == -1) {
+ printf("Invalid monitor command %s\n", argv[0]);
+ exit(errno);
+ }
} else {
/* parent */
if (child_pid == -1) {
@@ -423,11 +441,13 @@ int cmd_monitor(int argc, char **argv)
if (avail_monitors == 0) {
printf(_("No HW Cstate monitors found\n"));
+ cpu_topology_release(cpu_top);
return 1;
}
if (mode == list) {
list_monitors();
+ cpu_topology_release(cpu_top);
exit(EXIT_SUCCESS);
}
@@ -448,15 +468,15 @@ int cmd_monitor(int argc, char **argv)
/* ToDo: Topology parsing needs fixing first to do
this more generically */
if (cpu_top.pkgs > 1)
- print_header(3);
+ print_header(TOPOLOGY_DEPTH_PKG);
else
- print_header(1);
+ print_header(TOPOLOGY_DEPTH_CPU);
for (cpu = 0; cpu < cpu_count; cpu++) {
if (cpu_top.pkgs > 1)
- print_results(3, cpu);
+ print_results(TOPOLOGY_DEPTH_PKG, cpu);
else
- print_results(1, cpu);
+ print_results(TOPOLOGY_DEPTH_CPU, cpu);
}
for (num = 0; num < avail_monitors; num++) {
diff --git a/tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c b/tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c
index 55e55b6b42f9..f5a2a326b1b7 100644
--- a/tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c
+++ b/tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c
@@ -117,7 +117,7 @@ static int hsw_ext_start(void)
for (num = 0; num < HSW_EXT_CSTATE_COUNT; num++) {
for (cpu = 0; cpu < cpu_count; cpu++) {
- hsw_ext_get_count(num, &val, cpu);
+ is_valid[cpu] = !hsw_ext_get_count(num, &val, cpu);
previous_count[num][cpu] = val;
}
}
@@ -134,7 +134,7 @@ static int hsw_ext_stop(void)
for (num = 0; num < HSW_EXT_CSTATE_COUNT; num++) {
for (cpu = 0; cpu < cpu_count; cpu++) {
- is_valid[cpu] = !hsw_ext_get_count(num, &val, cpu);
+ is_valid[cpu] |= !hsw_ext_get_count(num, &val, cpu);
current_count[num][cpu] = val;
}
}
diff --git a/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c b/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c
index 08a399b0be28..5ae02c3d5b64 100644
--- a/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c
+++ b/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c
@@ -148,7 +148,7 @@ static int mperf_measure_stats(unsigned int cpu)
ret = get_aperf_mperf(cpu, &aval, &mval);
aperf_current_count[cpu] = aval;
mperf_current_count[cpu] = mval;
- is_valid[cpu] = !ret;
+ is_valid[cpu] |= !ret;
return 0;
}
@@ -240,9 +240,9 @@ static int mperf_stop(void)
int cpu;
for (cpu = 0; cpu < cpu_count; cpu++) {
- mperf_measure_stats(cpu);
- mperf_get_tsc(&tsc_at_measure_end[cpu]);
clock_gettime(CLOCK_REALTIME, &time_end[cpu]);
+ mperf_get_tsc(&tsc_at_measure_end[cpu]);
+ mperf_measure_stats(cpu);
}
return 0;
diff --git a/tools/power/cpupower/utils/idle_monitor/nhm_idle.c b/tools/power/cpupower/utils/idle_monitor/nhm_idle.c
index 16eaf006f61f..6b1733782ffa 100644
--- a/tools/power/cpupower/utils/idle_monitor/nhm_idle.c
+++ b/tools/power/cpupower/utils/idle_monitor/nhm_idle.c
@@ -151,7 +151,7 @@ static int nhm_stop(void)
for (num = 0; num < NHM_CSTATE_COUNT; num++) {
for (cpu = 0; cpu < cpu_count; cpu++) {
- is_valid[cpu] = !nhm_get_count(num, &val, cpu);
+ is_valid[cpu] |= !nhm_get_count(num, &val, cpu);
current_count[num][cpu] = val;
}
}
diff --git a/tools/power/cpupower/utils/idle_monitor/snb_idle.c b/tools/power/cpupower/utils/idle_monitor/snb_idle.c
index 811d63ab17a7..5969b88a85b4 100644
--- a/tools/power/cpupower/utils/idle_monitor/snb_idle.c
+++ b/tools/power/cpupower/utils/idle_monitor/snb_idle.c
@@ -115,7 +115,7 @@ static int snb_start(void)
for (num = 0; num < SNB_CSTATE_COUNT; num++) {
for (cpu = 0; cpu < cpu_count; cpu++) {
- snb_get_count(num, &val, cpu);
+ is_valid[cpu] = !snb_get_count(num, &val, cpu);
previous_count[num][cpu] = val;
}
}
@@ -132,7 +132,7 @@ static int snb_stop(void)
for (num = 0; num < SNB_CSTATE_COUNT; num++) {
for (cpu = 0; cpu < cpu_count; cpu++) {
- is_valid[cpu] = !snb_get_count(num, &val, cpu);
+ is_valid[cpu] |= !snb_get_count(num, &val, cpu);
current_count[num][cpu] = val;
}
}
diff --git a/tools/power/pm-graph/config/custom-timeline-functions.cfg b/tools/power/pm-graph/config/custom-timeline-functions.cfg
index 4f80ad7d7275..0321b59518f3 100644
--- a/tools/power/pm-graph/config/custom-timeline-functions.cfg
+++ b/tools/power/pm-graph/config/custom-timeline-functions.cfg
@@ -122,13 +122,13 @@ freeze_processes:
freeze_kernel_threads:
pm_restrict_gfp_mask:
acpi_suspend_begin:
-suspend_console:
+console_suspend_all:
acpi_pm_prepare:
syscore_suspend:
arch_enable_nonboot_cpus_end:
syscore_resume:
acpi_pm_finish:
-resume_console:
+console_resume_all:
acpi_pm_end:
pm_restore_gfp_mask:
thaw_processes:
diff --git a/tools/power/pm-graph/sleepgraph.py b/tools/power/pm-graph/sleepgraph.py
index 918eae58b0b4..1555b51a7d55 100755
--- a/tools/power/pm-graph/sleepgraph.py
+++ b/tools/power/pm-graph/sleepgraph.py
@@ -210,13 +210,13 @@ class SystemValues:
'hibernate_preallocate_memory': {},
'create_basic_memory_bitmaps': {},
'swsusp_write': {},
- 'suspend_console': {},
+ 'console_suspend_all': {},
'acpi_pm_prepare': {},
'syscore_suspend': {},
'arch_enable_nonboot_cpus_end': {},
'syscore_resume': {},
'acpi_pm_finish': {},
- 'resume_console': {},
+ 'console_resume_all': {},
'acpi_pm_end': {},
'pm_restore_gfp_mask': {},
'thaw_processes': {},
@@ -3459,7 +3459,7 @@ def parseTraceLog(live=False):
tracewatch = ['irq_wakeup']
if sysvals.usekprobes:
tracewatch += ['sync_filesystems', 'freeze_processes', 'syscore_suspend',
- 'syscore_resume', 'resume_console', 'thaw_processes', 'CPU_ON',
+ 'syscore_resume', 'console_resume_all', 'thaw_processes', 'CPU_ON',
'CPU_OFF', 'acpi_suspend']
# extract the callgraph and traceevent data
@@ -4017,7 +4017,8 @@ def parseKernelLog(data):
'PM: early restore of devices complete after.*'],
'resume_complete': ['PM: resume of devices complete after.*',
'PM: restore of devices complete after.*'],
- 'post_resume': [r'.*Restarting tasks \.\.\..*'],
+ 'post_resume': [r'.*Restarting tasks \.\.\..*',
+ 'Done restarting tasks.*'],
}
# action table (expected events that occur and show up in dmesg)
diff --git a/tools/power/x86/intel-speed-select/Makefile b/tools/power/x86/intel-speed-select/Makefile
index 7221f2f55e8b..8d3a02a20f3d 100644
--- a/tools/power/x86/intel-speed-select/Makefile
+++ b/tools/power/x86/intel-speed-select/Makefile
@@ -13,7 +13,7 @@ endif
# Do not use make's built-in rules
# (this improves performance and avoids hard-to-debug behaviour);
MAKEFLAGS += -r
-override CFLAGS += -O2 -Wall -g -D_GNU_SOURCE -I$(OUTPUT)include -I/usr/include/libnl3
+override CFLAGS += -O2 -Wall -g -D_GNU_SOURCE -I$(OUTPUT)include -I$(shell $(CC) -print-sysroot)/usr/include/libnl3
override LDFLAGS += -lnl-genl-3 -lnl-3
ALL_TARGETS := intel-speed-select
diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c
index 5127be34869e..0ce251b8d466 100644
--- a/tools/power/x86/intel-speed-select/isst-config.c
+++ b/tools/power/x86/intel-speed-select/isst-config.c
@@ -16,7 +16,7 @@ struct process_cmd_struct {
int arg;
};
-static const char *version_str = "v1.20";
+static const char *version_str = "v1.23";
static const int supported_api_ver = 3;
static struct isst_if_platform_info isst_platform_info;
@@ -26,6 +26,7 @@ static FILE *outf;
static int cpu_model;
static int cpu_stepping;
+static int extended_family;
#define MAX_CPUS_IN_ONE_REQ 512
static short max_target_cpus;
@@ -46,8 +47,9 @@ static int force_online_offline;
static int auto_mode;
static int fact_enable_fail;
static int cgroupv2;
+static int max_pkg_id;
static int max_die_id;
-static int max_punit_id;
+static int max_die_id_package_0;
/* clos related */
static int current_clos = -1;
@@ -142,6 +144,14 @@ int is_icx_platform(void)
return 0;
}
+static int is_dmr_plus_platform(void)
+{
+ if (extended_family == 0x04)
+ return 1;
+
+ return 0;
+}
+
static int update_cpu_model(void)
{
unsigned int ebx, ecx, edx;
@@ -149,6 +159,7 @@ static int update_cpu_model(void)
__cpuid(1, fms, ebx, ecx, edx);
family = (fms >> 8) & 0xf;
+ extended_family = (fms >> 20) & 0x0f;
cpu_model = (fms >> 4) & 0xf;
if (family == 6 || family == 0xf)
cpu_model += ((fms >> 16) & 0xf) << 4;
@@ -557,6 +568,8 @@ void for_each_online_power_domain_in_set(void (*callback)(struct isst_id *, void
if (id.pkg < 0 || id.die < 0 || id.punit < 0)
continue;
+ id.die = id.die % (max_die_id_package_0 + 1);
+
valid_mask[id.pkg][id.die] = 1;
if (cpus[id.pkg][id.die][id.punit] == -1)
@@ -564,11 +577,11 @@ void for_each_online_power_domain_in_set(void (*callback)(struct isst_id *, void
}
for (i = 0; i < MAX_PACKAGE_COUNT; i++) {
- if (max_die_id == max_punit_id) {
+ if (max_die_id > max_pkg_id) {
for (k = 0; k < MAX_PUNIT_PER_DIE && k < MAX_DIE_PER_PACKAGE; k++) {
id.cpu = cpus[i][k][k];
id.pkg = i;
- id.die = k;
+ id.die = get_physical_die_id(id.cpu);
id.punit = k;
if (isst_is_punit_valid(&id))
callback(&id, arg1, arg2, arg3, arg4);
@@ -586,7 +599,10 @@ void for_each_online_power_domain_in_set(void (*callback)(struct isst_id *, void
for (k = 0; k < MAX_PUNIT_PER_DIE; k++) {
id.cpu = cpus[i][j][k];
id.pkg = i;
- id.die = j;
+ if (id.cpu >= 0)
+ id.die = get_physical_die_id(id.cpu);
+ else
+ id.die = id.pkg;
id.punit = k;
if (isst_is_punit_valid(&id))
callback(&id, arg1, arg2, arg3, arg4);
@@ -788,6 +804,8 @@ static void create_cpu_map(void)
cpu_map[i].die_id = die_id;
cpu_map[i].core_id = core_id;
+ if (max_pkg_id < pkg_id)
+ max_pkg_id = pkg_id;
punit_id = 0;
@@ -812,8 +830,8 @@ static void create_cpu_map(void)
if (max_die_id < die_id)
max_die_id = die_id;
- if (max_punit_id < cpu_map[i].punit_id)
- max_punit_id = cpu_map[i].punit_id;
+ if (!pkg_id && max_die_id_package_0 < die_id)
+ max_die_id_package_0 = die_id;
debug_printf(
"map logical_cpu:%d core: %d die:%d pkg:%d punit:%d punit_cpu:%d punit_core:%d\n",
@@ -1509,7 +1527,8 @@ display_result:
usleep(2000);
/* Adjusting uncore freq */
- isst_adjust_uncore_freq(id, tdp_level, &ctdp_level);
+ if (!is_dmr_plus_platform())
+ isst_adjust_uncore_freq(id, tdp_level, &ctdp_level);
fprintf(stderr, "Option is set to online/offline\n");
ctdp_level.core_cpumask_size =
diff --git a/tools/power/x86/intel-speed-select/isst-core-tpmi.c b/tools/power/x86/intel-speed-select/isst-core-tpmi.c
index 32ea70c7dbd8..4f389e1c0525 100644
--- a/tools/power/x86/intel-speed-select/isst-core-tpmi.c
+++ b/tools/power/x86/intel-speed-select/isst-core-tpmi.c
@@ -227,6 +227,7 @@ static int tpmi_get_ctdp_control(struct isst_id *id, int config_index,
static int tpmi_get_tdp_info(struct isst_id *id, int config_index,
struct isst_pkg_ctdp_level_info *ctdp_level)
{
+ struct isst_perf_level_fabric_info fabric_info;
struct isst_perf_level_data_info info;
int ret;
@@ -253,6 +254,17 @@ static int tpmi_get_tdp_info(struct isst_id *id, int config_index,
ctdp_level->uncore_p1 = info.p1_fabric_freq_mhz;
ctdp_level->uncore_pm = info.pm_fabric_freq_mhz;
+ fabric_info.socket_id = id->pkg;
+ fabric_info.power_domain_id = id->punit;
+ fabric_info.level = config_index;
+
+ ret = tpmi_process_ioctl(ISST_IF_GET_PERF_LEVEL_FABRIC_INFO, &fabric_info);
+ if (ret != -1) {
+ ctdp_level->uncore1_p0 = fabric_info.p0_fabric_freq_mhz[1];
+ ctdp_level->uncore1_p1 = fabric_info.p1_fabric_freq_mhz[1];
+ ctdp_level->uncore1_pm = fabric_info.pm_fabric_freq_mhz[1];
+ }
+
debug_printf
("cpu:%d ctdp:%d CONFIG_TDP_GET_TDP_INFO tdp_ratio:%d pkg_tdp:%d ctdp_level->t_proc_hot:%d\n",
id->cpu, config_index, ctdp_level->tdp_ratio, ctdp_level->pkg_tdp,
@@ -329,7 +341,7 @@ static int tpmi_get_get_trls(struct isst_id *id, int config_index,
return 0;
}
-static int tpmi_get_get_trl(struct isst_id *id, int level, int config_index,
+static int tpmi_get_get_trl(struct isst_id *id, int config_index, int level,
int *trl)
{
struct isst_pkg_ctdp_level_info ctdp_level;
diff --git a/tools/power/x86/intel-speed-select/isst-display.c b/tools/power/x86/intel-speed-select/isst-display.c
index 07ebd08f3202..e4884eb02837 100644
--- a/tools/power/x86/intel-speed-select/isst-display.c
+++ b/tools/power/x86/intel-speed-select/isst-display.c
@@ -173,7 +173,11 @@ static int print_package_info(struct isst_id *id, FILE *outf)
if (out_format_is_json()) {
if (api_version() > 1) {
- if (id->cpu < 0)
+ if (id->die < 0 && id->cpu < 0)
+ snprintf(header, sizeof(header),
+ "package-%d:die-IO:powerdomain-%d:cpu-None",
+ id->pkg, id->punit);
+ else if (id->cpu < 0)
snprintf(header, sizeof(header),
"package-%d:die-%d:powerdomain-%d:cpu-None",
id->pkg, id->die, id->punit);
@@ -190,7 +194,10 @@ static int print_package_info(struct isst_id *id, FILE *outf)
}
snprintf(header, sizeof(header), "package-%d", id->pkg);
format_and_print(outf, level++, header, NULL);
- snprintf(header, sizeof(header), "die-%d", id->die);
+ if (id->die < 0)
+ snprintf(header, sizeof(header), "die-IO");
+ else
+ snprintf(header, sizeof(header), "die-%d", id->die);
format_and_print(outf, level++, header, NULL);
if (api_version() > 1) {
snprintf(header, sizeof(header), "powerdomain-%d", id->punit);
@@ -453,6 +460,26 @@ void isst_ctdp_display_information(struct isst_id *id, FILE *outf, int tdp_level
format_and_print(outf, level + 2, header, value);
}
+ if (ctdp_level->uncore1_p1) {
+ snprintf(header, sizeof(header), "uncore-1-frequency-base(MHz)");
+ snprintf(value, sizeof(value), "%d",
+ ctdp_level->uncore1_p1 * isst_get_disp_freq_multiplier());
+ format_and_print(outf, level + 2, header, value);
+ }
+ if (ctdp_level->uncore1_pm) {
+ snprintf(header, sizeof(header), "uncore-1-frequency-min(MHz)");
+ snprintf(value, sizeof(value), "%d",
+ ctdp_level->uncore1_pm * isst_get_disp_freq_multiplier());
+ format_and_print(outf, level + 2, header, value);
+ }
+
+ if (ctdp_level->uncore1_p0) {
+ snprintf(header, sizeof(header), "uncore-1-frequency-max(MHz)");
+ snprintf(value, sizeof(value), "%d",
+ ctdp_level->uncore1_p0 * isst_get_disp_freq_multiplier());
+ format_and_print(outf, level + 2, header, value);
+ }
+
if (ctdp_level->mem_freq) {
snprintf(header, sizeof(header), "max-mem-frequency(MHz)");
snprintf(value, sizeof(value), "%d",
diff --git a/tools/power/x86/intel-speed-select/isst.h b/tools/power/x86/intel-speed-select/isst.h
index 39ee75677c2c..960f647cfc2d 100644
--- a/tools/power/x86/intel-speed-select/isst.h
+++ b/tools/power/x86/intel-speed-select/isst.h
@@ -147,6 +147,9 @@ struct isst_pkg_ctdp_level_info {
int uncore_p0;
int uncore_p1;
int uncore_pm;
+ int uncore1_p0;
+ int uncore1_p1;
+ int uncore1_pm;
int sse_p1;
int avx2_p1;
int avx512_p1;
diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8
index a7f7ed01421c..fb11108aaf42 100644
--- a/tools/power/x86/turbostat/turbostat.8
+++ b/tools/power/x86/turbostat/turbostat.8
@@ -100,7 +100,7 @@ The column name "all" can be used to enable all disabled-by-default built-in cou
.PP
\fB--show column\fP show only the specified built-in columns. May be invoked multiple times, or with a comma-separated list of column names.
.PP
-\fB--show CATEGORY --hide CATEGORY\fP Show and hide also accept a single CATEGORY of columns: "all", "topology", "idle", "frequency", "power", "sysfs", "other".
+\fB--show CATEGORY --hide CATEGORY\fP Show and hide also accept a single CATEGORY of columns: "all", "topology", "idle", "frequency", "power", "cpuidle", "hwidle", "swidle", "other". "idle" (enabled by default), includes "hwidle" and "pct_idle". "cpuidle" (default disabled) includes cpuidle software invocation counters. "swidle" includes "cpuidle" plus "pct_idle". "hwidle" includes only hardware based idle residency counters. Older versions of turbostat used the term "sysfs" for what is now "swidle".
.PP
\fB--Dump\fP displays the raw counter values.
.PP
@@ -136,7 +136,7 @@ displays the statistics gathered since it was forked.
The system configuration dump (if --quiet is not used) is followed by statistics. The first row of the statistics labels the content of each column (below). The second row of statistics is the system summary line. The system summary line has a '-' in the columns for the Package, Core, and CPU. The contents of the system summary line depends on the type of column. Columns that count items (eg. IRQ) show the sum across all CPUs in the system. Columns that show a percentage show the average across all CPUs in the system. Columns that dump raw MSR values simply show 0 in the summary. After the system summary row, each row describes a specific Package/Core/CPU. Note that if the --cpu parameter is used to limit which specific CPUs are displayed, turbostat will still collect statistics for all CPUs in the system and will still show the system summary for all CPUs in the system.
.SH COLUMN DESCRIPTIONS
.PP
-\fBusec\fP For each CPU, the number of microseconds elapsed during counter collection, including thread migration -- if any. This counter is disabled by default, and is enabled with "--enable usec", or --debug. On the summary row, usec refers to the total elapsed time to collect the counters on all cpus.
+\fBusec\fP For each CPU, the number of microseconds elapsed during counter collection, including thread migration -- if any. This counter is disabled by default, and is enabled with "--enable usec", or --debug. On the summary row, usec refers to the total elapsed time to snapshot the procfs/sysfs and collect the counters on all cpus.
.PP
\fBTime_Of_Day_Seconds\fP For each CPU, the gettimeofday(2) value (seconds.subsec since Epoch) when the counters ending the measurement interval were collected. This column is disabled by default, and can be enabled with "--enable Time_Of_Day_Seconds" or "--debug". On the summary row, Time_Of_Day_Seconds refers to the timestamp following collection of counters on the last CPU.
.PP
@@ -158,16 +158,22 @@ The system configuration dump (if --quiet is not used) is followed by statistics
.PP
\fBSMI\fP The number of System Management Interrupts serviced CPU during the measurement interval. While this counter is actually per-CPU, SMI are triggered on all processors, so the number should be the same for all CPUs.
.PP
-\fBC1, C2, C3...\fP The number times Linux requested the C1, C2, C3 idle state during the measurement interval. The system summary line shows the sum for all CPUs. These are C-state names as exported in /sys/devices/system/cpu/cpu*/cpuidle/state*/name. While their names are generic, their attributes are processor specific. They the system description section of output shows what MWAIT sub-states they are mapped to on each system.
+\fBC1, C2, C3...\fP The number times Linux requested the C1, C2, C3 idle state during the measurement interval. The system summary line shows the sum for all CPUs. These are C-state names as exported in /sys/devices/system/cpu/cpu*/cpuidle/state*/name. While their names are generic, their attributes are processor specific. They the system description section of output shows what MWAIT sub-states they are mapped to on each system. These counters are in the "cpuidle" group, which is disabled, by default.
.PP
-\fBC1%, C2%, C3%\fP The residency percentage that Linux requested C1, C2, C3.... The system summary is the average of all CPUs in the system. Note that these are software, reflecting what was requested. The hardware counters reflect what was actually achieved.
+\fBC1+, C2+, C3+...\fP The idle governor idle state misprediction statistics. Inidcates the number times Linux requested the C1, C2, C3 idle state during the measurement interval, but should have requested a deeper idle state (if it exists and enabled). These statistics come from the /sys/devices/system/cpu/cpu*/cpuidle/state*/below file. These counters are in the "cpuidle" group, which is disabled, by default.
.PP
-\fBCPU%c1, CPU%c3, CPU%c6, CPU%c7\fP show the percentage residency in hardware core idle states. These numbers are from hardware residency counters.
+\fBC1-, C2-, C3-...\fP The idle governor idle state misprediction statistics. Inidcates the number times Linux requested the C1, C2, C3 idle state during the measurement interval, but should have requested a shallower idle state (if it exists and enabled). These statistics come from the /sys/devices/system/cpu/cpu*/cpuidle/state*/above file. These counters are in the "cpuidle" group, which is disabled, by default.
+.PP
+\fBC1%, C2%, C3%\fP The residency percentage that Linux requested C1, C2, C3.... The system summary is the average of all CPUs in the system. Note that these are software, reflecting what was requested. The hardware counters reflect what was actually achieved. These counters are in the "pct_idle" group, which is enabled by default.
+.PP
+\fBCPU%c1, CPU%c3, CPU%c6, CPU%c7\fP show the percentage residency in hardware core idle states. These numbers are from hardware residency counters and are in the "hwidle" group, which is enabled, by default.
.PP
\fBCoreTmp\fP Degrees Celsius reported by the per-core Digital Thermal Sensor.
.PP
\fBPkgTmp\fP Degrees Celsius reported by the per-package Package Thermal Monitor.
.PP
+\fBCoreThr\fP Core Thermal Throttling events during the measurement interval. Note that events since boot can be find in /sys/devices/system/cpu/cpu*/thermal_throttle/*
+.PP
\fBGFX%rc6\fP The percentage of time the GPU is in the "render C6" state, rc6, during the measurement interval. From /sys/class/drm/card0/power/rc6_residency_ms or /sys/class/drm/card0/gt/gt0/rc6_residency_ms or /sys/class/drm/card0/device/tile0/gtN/gtidle/idle_residency_ms depending on the graphics driver being used.
.PP
\fBGFXMHz\fP Instantaneous snapshot of what sysfs presents at the end of the measurement interval. From /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz or /sys/class/drm/card0/gt_cur_freq_mhz or /sys/class/drm/card0/gt/gt0/rps_cur_freq_mhz or /sys/class/drm/card0/device/tile0/gtN/freq0/cur_freq depending on the graphics driver being used.
@@ -190,7 +196,7 @@ The system configuration dump (if --quiet is not used) is followed by statistics
.PP
\fBRAMWatt\fP Watts consumed by the DRAM DIMMS -- available only on server processors.
.PP
-\fBSysWatt\fP Watts consumed by the whole platform (RAPL PSYS). Disabled by default. Enable with --enable SysWatt.
+\fBSysWatt\fP Watts consumed by the whole platform (RAPL PSYS).
.PP
\fBPKG_%\fP percent of the interval that RAPL throttling was active on the Package. Note that the system summary is the sum of the package throttling time, and thus may be higher than 100% on a multi-package system. Note that the meaning of this field is model specific. For example, some hardware increments this counter when RAPL responds to thermal limits, but does not increment this counter when RAPL responds to power limits. Comparing PkgWatt and PkgTmp to system limits is necessary.
.PP
@@ -198,7 +204,9 @@ The system configuration dump (if --quiet is not used) is followed by statistics
.PP
\fBUncMHz\fP per-package uncore MHz, instantaneous sample.
.PP
-\fBUMHz1.0\fP per-package uncore MHz for domain=1 and fabric_cluster=0, instantaneous sample. System summary is the average of all packages.
+\fBUMHz1.0\fP per-package uncore MHz for pm_domain=1 and fabric_cluster=0, instantaneous sample. System summary is the average of all packages.
+Intel Granite Rapids systems use pm_domains 0-2 for CPUs, and 3-4 for IO, with cluster always 0.
+For the "--show" and "--hide" options, use "UncMHz" to operate on all UMHz*.* as a group.
.SH TOO MUCH INFORMATION EXAMPLE
By default, turbostat dumps all possible information -- a system configuration header, followed by columns for all counters.
This is ideal for remote debugging, use the "--out" option to save everything to a text file, and get that file to the expert helping you debug.
@@ -516,14 +524,40 @@ that they count at TSC rate, which is true on all processors tested to date.
Volume 3B: System Programming Guide"
https://www.intel.com/products/processor/manuals/
+.SH RUN THE LATEST VERSION
+If turbostat complains that it doesn't recognize your processor,
+please try the latest version.
+
+The latest version of turbostat does not require the latest version of the Linux kernel.
+However, some features, such as perf(1) counters, do require kernel support.
+
+The latest turbostat release is available in the upstream Linux Kernel source tree.
+eg. "git pull https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git"
+and run make in tools/power/x86/turbostat/.
+
+n.b. "make install" will update your system manually, but a distro update may subsequently downgrade your turbostat to an older version.
+For this reason, manually installing to /usr/local/bin may be what you want.
+
+Note that turbostat/Makefile has a "make snapshot" target, which will create a tar file
+that can build without a local kernel source tree.
+
+If the upstream version isn't new enough, the development tree can be found here:
+"git pull https://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux.git turbostat"
+
+If the development tree doesn't work, please contact the author via chat,
+or via email with the word "turbostat" on the Subject line.
+
.SH FILES
.ta
.nf
+/sys/bus/event_source/devices/
/dev/cpu/*/msr
+/sys/class/intel_pmt/
+/sys/devices/system/cpu/
.fi
.SH "SEE ALSO"
-msr(4), vmstat(8)
+perf(1), msr(4), vmstat(8)
.PP
.SH AUTHOR
.nf
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 8ec677c639ec..5c747131f155 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -3,7 +3,7 @@
* turbostat -- show CPU frequency and C-state residency
* on modern Intel and AMD processors.
*
- * Copyright (c) 2024 Intel Corporation.
+ * Copyright (c) 2025 Intel Corporation.
* Len Brown <len.brown@intel.com>
*/
@@ -67,6 +67,7 @@
#include <stdbool.h>
#include <assert.h>
#include <linux/kernel.h>
+#include <limits.h>
#define UNUSED(x) (void)(x)
@@ -153,7 +154,7 @@ struct msr_counter bic[] = {
{ 0x0, "TSC_MHz", NULL, 0, 0, 0, NULL, 0 },
{ 0x0, "IRQ", NULL, 0, 0, 0, NULL, 0 },
{ 0x0, "SMI", NULL, 32, 0, FORMAT_DELTA, NULL, 0 },
- { 0x0, "sysfs", NULL, 0, 0, 0, NULL, 0 },
+ { 0x0, "cpuidle", NULL, 0, 0, 0, NULL, 0 },
{ 0x0, "CPU%c1", NULL, 0, 0, 0, NULL, 0 },
{ 0x0, "CPU%c3", NULL, 0, 0, 0, NULL, 0 },
{ 0x0, "CPU%c6", NULL, 0, 0, 0, NULL, 0 },
@@ -204,6 +205,9 @@ struct msr_counter bic[] = {
{ 0x0, "Die%c6", NULL, 0, 0, 0, NULL, 0 },
{ 0x0, "SysWatt", NULL, 0, 0, 0, NULL, 0 },
{ 0x0, "Sys_J", NULL, 0, 0, 0, NULL, 0 },
+ { 0x0, "NMI", NULL, 0, 0, 0, NULL, 0 },
+ { 0x0, "CPU%c1e", NULL, 0, 0, 0, NULL, 0 },
+ { 0x0, "pct_idle", NULL, 0, 0, 0, NULL, 0 },
};
#define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter))
@@ -217,7 +221,7 @@ struct msr_counter bic[] = {
#define BIC_TSC_MHz (1ULL << 7)
#define BIC_IRQ (1ULL << 8)
#define BIC_SMI (1ULL << 9)
-#define BIC_sysfs (1ULL << 10)
+#define BIC_cpuidle (1ULL << 10)
#define BIC_CPU_c1 (1ULL << 11)
#define BIC_CPU_c3 (1ULL << 12)
#define BIC_CPU_c6 (1ULL << 13)
@@ -268,17 +272,22 @@ struct msr_counter bic[] = {
#define BIC_Diec6 (1ULL << 58)
#define BIC_SysWatt (1ULL << 59)
#define BIC_Sys_J (1ULL << 60)
+#define BIC_NMI (1ULL << 61)
+#define BIC_CPU_c1e (1ULL << 62)
+#define BIC_pct_idle (1ULL << 63)
-#define BIC_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die )
-#define BIC_THERMAL_PWR ( BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__)
-#define BIC_FREQUENCY (BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz | BIC_SAMMHz | BIC_SAMACTMHz | BIC_UNCORE_MHZ)
-#define BIC_IDLE (BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_SAM_mc6 | BIC_Diec6)
-#define BIC_OTHER ( BIC_IRQ | BIC_SMI | BIC_ThreadC | BIC_CoreTmp | BIC_IPC)
+#define BIC_GROUP_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die)
+#define BIC_GROUP_THERMAL_PWR (BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__ | BIC_SysWatt)
+#define BIC_GROUP_FREQUENCY (BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz | BIC_SAMMHz | BIC_SAMACTMHz | BIC_UNCORE_MHZ)
+#define BIC_GROUP_HW_IDLE (BIC_Busy | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_SAM_mc6 | BIC_Diec6)
+#define BIC_GROUP_SW_IDLE (BIC_Busy | BIC_cpuidle | BIC_pct_idle )
+#define BIC_GROUP_IDLE (BIC_GROUP_HW_IDLE | BIC_pct_idle)
+#define BIC_OTHER (BIC_IRQ | BIC_NMI | BIC_SMI | BIC_ThreadC | BIC_CoreTmp | BIC_IPC)
-#define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC | BIC_SysWatt | BIC_Sys_J)
+#define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC | BIC_cpuidle)
unsigned long long bic_enabled = (0xFFFFFFFFFFFFFFFFULL & ~BIC_DISABLED_BY_DEFAULT);
-unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs | BIC_APIC | BIC_X2APIC;
+unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_cpuidle | BIC_pct_idle | BIC_APIC | BIC_X2APIC;
#define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME)
#define DO_BIC_READ(COUNTER_NAME) (bic_present & COUNTER_NAME)
@@ -328,6 +337,7 @@ unsigned int rapl_joules;
unsigned int summary_only;
unsigned int list_header_only;
unsigned int dump_only;
+unsigned int force_load;
unsigned int has_aperf;
unsigned int has_aperf_access;
unsigned int has_epb;
@@ -355,7 +365,7 @@ unsigned long long cpuidle_cur_sys_lpi_us;
unsigned int tj_max;
unsigned int tj_max_override;
double rapl_power_units, rapl_time_units;
-double rapl_dram_energy_units, rapl_energy_units;
+double rapl_dram_energy_units, rapl_energy_units, rapl_psys_energy_units;
double rapl_joule_counter_range;
unsigned int crystal_hz;
unsigned long long tsc_hz;
@@ -367,6 +377,9 @@ unsigned int has_hwp_activity_window; /* IA32_HWP_REQUEST[bits 41:32] */
unsigned int has_hwp_epp; /* IA32_HWP_REQUEST[bits 31:24] */
unsigned int has_hwp_pkg; /* IA32_HWP_REQUEST_PKG */
unsigned int first_counter_read = 1;
+
+static struct timeval procsysfs_tv_begin;
+
int ignore_stdin;
bool no_msr;
bool no_perf;
@@ -421,6 +434,7 @@ struct platform_features {
bool has_per_core_rapl; /* Indicates cores energy collection is per-core, not per-package. AMD specific for now */
bool has_rapl_divisor; /* Divisor for Energy unit raw value from MSR_RAPL_POWER_UNIT */
bool has_fixed_rapl_unit; /* Fixed Energy Unit used for DRAM RAPL Domain */
+ bool has_fixed_rapl_psys_unit; /* Fixed Energy Unit used for PSYS RAPL Domain */
int rapl_quirk_tdp; /* Hardcoded TDP value when cannot be retrieved from hardware */
int tcc_offset_bits; /* TCC Offset bits in MSR_IA32_TEMPERATURE_TARGET */
bool enable_tsc_tweak; /* Use CPU Base freq instead of TSC freq for aperf/mperf counter */
@@ -526,7 +540,7 @@ enum rapl_msrs {
#define RAPL_PKG_ALL (RAPL_PKG | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO)
#define RAPL_DRAM_ALL (RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_DRAM_POWER_INFO)
#define RAPL_CORE_ALL (RAPL_CORE | RAPL_CORE_POLICY)
-#define RAPL_GFX_ALL (RAPL_GFX | RAPL_GFX_POLIGY)
+#define RAPL_GFX_ALL (RAPL_GFX | RAPL_GFX_POLICY)
#define RAPL_AMD_F17H (RAPL_AMD_PWR_UNIT | RAPL_AMD_CORE_ENERGY_STAT | RAPL_AMD_PKG_ENERGY_STAT)
@@ -821,10 +835,29 @@ static const struct platform_features spr_features = {
.has_msr_core_c1_res = 1,
.has_irtl_msrs = 1,
.has_cst_prewake_bit = 1,
+ .has_fixed_rapl_psys_unit = 1,
.trl_msrs = TRL_BASE | TRL_CORECOUNT,
.rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS,
};
+static const struct platform_features dmr_features = {
+ .has_msr_misc_feature_control = spr_features.has_msr_misc_feature_control,
+ .has_msr_misc_pwr_mgmt = spr_features.has_msr_misc_pwr_mgmt,
+ .has_nhm_msrs = spr_features.has_nhm_msrs,
+ .bclk_freq = spr_features.bclk_freq,
+ .supported_cstates = spr_features.supported_cstates,
+ .cst_limit = spr_features.cst_limit,
+ .has_msr_core_c1_res = spr_features.has_msr_core_c1_res,
+ .has_cst_prewake_bit = spr_features.has_cst_prewake_bit,
+ .has_fixed_rapl_psys_unit = spr_features.has_fixed_rapl_psys_unit,
+ .trl_msrs = spr_features.trl_msrs,
+ .has_msr_module_c6_res_ms = 1, /* DMR has Dual-Core-Module and MC6 MSR */
+ .rapl_msrs = 0, /* DMR does not have RAPL MSRs */
+ .plr_msrs = 0, /* DMR does not have PLR MSRs */
+ .has_irtl_msrs = 0, /* DMR does not have IRTL MSRs */
+ .has_config_tdp = 0, /* DMR does not have CTDP MSRs */
+};
+
static const struct platform_features srf_features = {
.has_msr_misc_feature_control = 1,
.has_msr_misc_pwr_mgmt = 1,
@@ -1014,18 +1047,21 @@ static const struct platform_data turbostat_pdata[] = {
{ INTEL_EMERALDRAPIDS_X, &spr_features },
{ INTEL_GRANITERAPIDS_X, &spr_features },
{ INTEL_GRANITERAPIDS_D, &spr_features },
+ { INTEL_PANTHERCOVE_X, &dmr_features },
{ INTEL_LAKEFIELD, &cnl_features },
{ INTEL_ALDERLAKE, &adl_features },
{ INTEL_ALDERLAKE_L, &adl_features },
{ INTEL_RAPTORLAKE, &adl_features },
{ INTEL_RAPTORLAKE_P, &adl_features },
{ INTEL_RAPTORLAKE_S, &adl_features },
+ { INTEL_BARTLETTLAKE, &adl_features },
{ INTEL_METEORLAKE, &adl_features },
{ INTEL_METEORLAKE_L, &adl_features },
{ INTEL_ARROWLAKE_H, &adl_features },
{ INTEL_ARROWLAKE_U, &adl_features },
{ INTEL_ARROWLAKE, &adl_features },
{ INTEL_LUNARLAKE_M, &lnl_features },
+ { INTEL_PANTHERLAKE_L, &lnl_features },
{ INTEL_ATOM_SILVERMONT, &slv_features },
{ INTEL_ATOM_SILVERMONT_D, &slvd_features },
{ INTEL_ATOM_AIRMONT, &amt_features },
@@ -1038,13 +1074,14 @@ static const struct platform_data turbostat_pdata[] = {
{ INTEL_ATOM_GRACEMONT, &adl_features },
{ INTEL_ATOM_CRESTMONT_X, &srf_features },
{ INTEL_ATOM_CRESTMONT, &grr_features },
+ { INTEL_ATOM_DARKMONT_X, &srf_features },
{ INTEL_XEON_PHI_KNL, &knl_features },
{ INTEL_XEON_PHI_KNM, &knl_features },
/*
* Missing support for
* INTEL_ICELAKE
* INTEL_ATOM_SILVERMONT_MID
- * INTEL_ATOM_AIRMONT_MID
+ * INTEL_ATOM_SILVERMONT_MID2
* INTEL_ATOM_AIRMONT_NP
*/
{ 0, NULL },
@@ -1056,9 +1093,9 @@ void probe_platform_features(unsigned int family, unsigned int model)
{
int i;
- platform = &default_features;
-
if (authentic_amd || hygon_genuine) {
+ /* fallback to default features on unsupported models */
+ force_load++;
if (max_extended_level >= 0x80000007) {
unsigned int eax, ebx, ecx, edx;
@@ -1067,11 +1104,11 @@ void probe_platform_features(unsigned int family, unsigned int model)
if ((edx & (1 << 14)) && family >= 0x17)
platform = &amd_features_with_rapl;
}
- return;
+ goto end;
}
if (!genuine_intel)
- return;
+ goto end;
for (i = 0; turbostat_pdata[i].features; i++) {
if (VFM_FAMILY(turbostat_pdata[i].vfm) == family && VFM_MODEL(turbostat_pdata[i].vfm) == model) {
@@ -1079,6 +1116,18 @@ void probe_platform_features(unsigned int family, unsigned int model)
return;
}
}
+
+end:
+ if (force_load && !platform) {
+ fprintf(outf, "Forced to run on unsupported platform!\n");
+ platform = &default_features;
+ }
+
+ if (platform)
+ return;
+
+ fprintf(stderr, "Unsupported platform detected.\n\tSee RUN THE LATEST VERSION on turbostat(8)\n");
+ exit(1);
}
/* Model specific support End */
@@ -1095,9 +1144,10 @@ void probe_platform_features(unsigned int family, unsigned int model)
int backwards_count;
char *progname;
-#define CPU_SUBSET_MAXCPUS 1024 /* need to use before probe... */
+#define CPU_SUBSET_MAXCPUS 8192 /* need to use before probe... */
cpu_set_t *cpu_present_set, *cpu_possible_set, *cpu_effective_set, *cpu_allowed_set, *cpu_affinity_set, *cpu_subset;
-size_t cpu_present_setsize, cpu_possible_setsize, cpu_effective_setsize, cpu_allowed_setsize, cpu_affinity_setsize, cpu_subset_size;
+size_t cpu_present_setsize, cpu_possible_setsize, cpu_effective_setsize, cpu_allowed_setsize, cpu_affinity_setsize,
+ cpu_subset_size;
#define MAX_ADDED_THREAD_COUNTERS 24
#define MAX_ADDED_CORE_COUNTERS 8
#define MAX_ADDED_PACKAGE_COUNTERS 16
@@ -1273,7 +1323,7 @@ static const struct rapl_counter_arch_info rapl_counter_arch_infos[] = {
.msr = MSR_PLATFORM_ENERGY_STATUS,
.msr_mask = 0x00000000FFFFFFFF,
.msr_shift = 0,
- .platform_rapl_msr_scale = &rapl_energy_units,
+ .platform_rapl_msr_scale = &rapl_psys_energy_units,
.rci_index = RAPL_RCI_INDEX_ENERGY_PLATFORM,
.bic = BIC_SysWatt | BIC_Sys_J,
.compat_scale = 1.0,
@@ -1512,6 +1562,17 @@ static struct msr_counter_arch_info msr_counter_arch_infos[] = {
#define PMT_COUNTER_MTL_DC6_LSB 0
#define PMT_COUNTER_MTL_DC6_MSB 63
#define PMT_MTL_DC6_GUID 0x1a067102
+#define PMT_MTL_DC6_SEQ 0
+
+#define PMT_COUNTER_CWF_MC1E_OFFSET_BASE 20936
+#define PMT_COUNTER_CWF_MC1E_OFFSET_INCREMENT 24
+#define PMT_COUNTER_CWF_MC1E_NUM_MODULES_PER_FILE 12
+#define PMT_COUNTER_CWF_CPUS_PER_MODULE 4
+#define PMT_COUNTER_CWF_MC1E_LSB 0
+#define PMT_COUNTER_CWF_MC1E_MSB 63
+#define PMT_CWF_MC1E_GUID 0x14421519
+
+unsigned long long tcore_clock_freq_hz = 800000000;
#define PMT_COUNTER_NAME_SIZE_BYTES 16
#define PMT_COUNTER_TYPE_NAME_SIZE_BYTES 32
@@ -1535,6 +1596,7 @@ struct pmt_mmio {
enum pmt_datatype {
PMT_TYPE_RAW,
PMT_TYPE_XTAL_TIME,
+ PMT_TYPE_TCORE_CLOCK,
};
struct pmt_domain_info {
@@ -1564,6 +1626,93 @@ struct pmt_counter {
struct pmt_domain_info *domains;
};
+/*
+ * PMT telemetry directory iterator.
+ * Used to iterate telemetry files in sysfs in correct order.
+ */
+struct pmt_diriter_t {
+ DIR *dir;
+ struct dirent **namelist;
+ unsigned int num_names;
+ unsigned int current_name_idx;
+};
+
+int pmt_telemdir_filter(const struct dirent *e)
+{
+ unsigned int dummy;
+
+ return sscanf(e->d_name, "telem%u", &dummy);
+}
+
+int pmt_telemdir_sort(const struct dirent **a, const struct dirent **b)
+{
+ unsigned int aidx = 0, bidx = 0;
+
+ sscanf((*a)->d_name, "telem%u", &aidx);
+ sscanf((*b)->d_name, "telem%u", &bidx);
+
+ return aidx >= bidx;
+}
+
+const struct dirent *pmt_diriter_next(struct pmt_diriter_t *iter)
+{
+ const struct dirent *ret = NULL;
+
+ if (!iter->dir)
+ return NULL;
+
+ if (iter->current_name_idx >= iter->num_names)
+ return NULL;
+
+ ret = iter->namelist[iter->current_name_idx];
+ ++iter->current_name_idx;
+
+ return ret;
+}
+
+const struct dirent *pmt_diriter_begin(struct pmt_diriter_t *iter, const char *pmt_root_path)
+{
+ int num_names = iter->num_names;
+
+ if (!iter->dir) {
+ iter->dir = opendir(pmt_root_path);
+ if (iter->dir == NULL)
+ return NULL;
+
+ num_names = scandir(pmt_root_path, &iter->namelist, pmt_telemdir_filter, pmt_telemdir_sort);
+ if (num_names == -1)
+ return NULL;
+ }
+
+ iter->current_name_idx = 0;
+ iter->num_names = num_names;
+
+ return pmt_diriter_next(iter);
+}
+
+void pmt_diriter_init(struct pmt_diriter_t *iter)
+{
+ memset(iter, 0, sizeof(*iter));
+}
+
+void pmt_diriter_remove(struct pmt_diriter_t *iter)
+{
+ if (iter->namelist) {
+ for (unsigned int i = 0; i < iter->num_names; i++) {
+ free(iter->namelist[i]);
+ iter->namelist[i] = NULL;
+ }
+ }
+
+ free(iter->namelist);
+ iter->namelist = NULL;
+ iter->num_names = 0;
+ iter->current_name_idx = 0;
+
+ closedir(iter->dir);
+ iter->dir = NULL;
+}
+
unsigned int pmt_counter_get_width(const struct pmt_counter *p)
{
return (p->msb - p->lsb) + 1;
@@ -1613,6 +1762,7 @@ struct thread_data {
unsigned long long c1;
unsigned long long instr_count;
unsigned long long irq_count;
+ unsigned long long nmi_count;
unsigned int smi_count;
unsigned int cpu_id;
unsigned int apic_id;
@@ -1919,6 +2069,7 @@ struct timeval tv_even, tv_odd, tv_delta;
int *irq_column_2_cpu; /* /proc/interrupts column numbers */
int *irqs_per_cpu; /* indexed by cpu_num */
+int *nmi_per_cpu; /* indexed by cpu_num */
void setup_all_buffers(bool startup);
@@ -1946,6 +2097,8 @@ int for_all_cpus(int (func) (struct thread_data *, struct core_data *, struct pk
{
int retval, pkg_no, core_no, thread_no, node_no;
+ retval = 0;
+
for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
for (node_no = 0; node_no < topo.nodes_per_pkg; node_no++) {
for (core_no = 0; core_no < topo.cores_per_node; ++core_no) {
@@ -1961,14 +2114,12 @@ int for_all_cpus(int (func) (struct thread_data *, struct core_data *, struct pk
c = GET_CORE(core_base, core_no, node_no, pkg_no);
p = GET_PKG(pkg_base, pkg_no);
- retval = func(t, c, p);
- if (retval)
- return retval;
+ retval |= func(t, c, p);
}
}
}
}
- return 0;
+ return retval;
}
int is_cpu_first_thread_in_core(struct thread_data *t, struct core_data *c, struct pkg_data *p)
@@ -2009,13 +2160,20 @@ int get_msr_fd(int cpu)
if (fd)
return fd;
-
+#if defined(ANDROID)
+ sprintf(pathname, "/dev/msr%d", cpu);
+#else
sprintf(pathname, "/dev/cpu/%d/msr", cpu);
+#endif
fd = open(pathname, O_RDONLY);
if (fd < 0)
+#if defined(ANDROID)
+ err(-1, "%s open failed, try chown or chmod +r /dev/msr*, "
+ "or run with --no-msr, or run as root", pathname);
+#else
err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, "
"or run with --no-msr, or run as root", pathname);
-
+#endif
fd_percpu[cpu] = fd;
return fd;
@@ -2084,19 +2242,52 @@ int get_msr(int cpu, off_t offset, unsigned long long *msr)
return 0;
}
-int probe_msr(int cpu, off_t offset)
+int add_msr_counter(int cpu, off_t offset)
{
ssize_t retval;
- unsigned long long dummy;
+ unsigned long long value;
- assert(!no_msr);
+ if (no_msr)
+ return -1;
- retval = pread(get_msr_fd(cpu), &dummy, sizeof(dummy), offset);
+ if (!offset)
+ return -1;
- if (retval != sizeof(dummy))
- return 1;
+ retval = pread(get_msr_fd(cpu), &value, sizeof(value), offset);
- return 0;
+ /* if the read failed, the probe fails */
+ if (retval != sizeof(value))
+ return -1;
+
+ if (value == 0)
+ return 0;
+
+ return 1;
+}
+
+int add_rapl_msr_counter(int cpu, const struct rapl_counter_arch_info *cai)
+{
+ int ret;
+
+ if (!(platform->rapl_msrs & cai->feature_mask))
+ return -1;
+
+ ret = add_msr_counter(cpu, cai->msr);
+ if (ret < 0)
+ return -1;
+
+ switch (cai->rci_index) {
+ case RAPL_RCI_INDEX_ENERGY_PKG:
+ case RAPL_RCI_INDEX_ENERGY_CORES:
+ case RAPL_RCI_INDEX_DRAM:
+ case RAPL_RCI_INDEX_GFX:
+ case RAPL_RCI_INDEX_ENERGY_PLATFORM:
+ if (ret == 0)
+ return 1;
+ }
+
+ /* PKG,DRAM_PERF_STATUS MSRs, can return any value */
+ return 1;
}
/* Convert CPU ID to domain ID for given added perf counter. */
@@ -2137,41 +2328,53 @@ void help(void)
"when COMMAND completes.\n"
"If no COMMAND is specified, turbostat wakes every 5-seconds\n"
"to print statistics, until interrupted.\n"
- " -a, --add add a counter\n"
+ " -a, --add counter\n"
+ " add a counter\n"
" eg. --add msr0x10,u64,cpu,delta,MY_TSC\n"
" eg. --add perf/cstate_pkg/c2-residency,package,delta,percent,perfPC2\n"
" eg. --add pmt,name=XTAL,type=raw,domain=package0,offset=0,lsb=0,msb=63,guid=0x1a067102\n"
- " -c, --cpu cpu-set limit output to summary plus cpu-set:\n"
+ " -c, --cpu cpu-set\n"
+ " limit output to summary plus cpu-set:\n"
" {core | package | j,k,l..m,n-p }\n"
- " -d, --debug displays usec, Time_Of_Day_Seconds and more debugging\n"
+ " -d, --debug\n"
+ " displays usec, Time_Of_Day_Seconds and more debugging\n"
" debug messages are printed to stderr\n"
- " -D, --Dump displays the raw counter values\n"
- " -e, --enable [all | column]\n"
+ " -D, --Dump\n"
+ " displays the raw counter values\n"
+ " -e, --enable [all | column]\n"
" shows all or the specified disabled column\n"
- " -H, --hide [column|column,column,...]\n"
+ " -f, --force\n"
+ " force load turbostat with minimum default features on unsupported platforms.\n"
+ " -H, --hide [column | column,column,...]\n"
" hide the specified column(s)\n"
" -i, --interval sec.subsec\n"
- " Override default 5-second measurement interval\n"
- " -J, --Joules displays energy in Joules instead of Watts\n"
- " -l, --list list column headers only\n"
- " -M, --no-msr Disable all uses of the MSR driver\n"
- " -P, --no-perf Disable all uses of the perf API\n"
+ " override default 5-second measurement interval\n"
+ " -J, --Joules\n"
+ " displays energy in Joules instead of Watts\n"
+ " -l, --list\n"
+ " list column headers only\n"
+ " -M, --no-msr\n"
+ " disable all uses of the MSR driver\n"
+ " -P, --no-perf\n"
+ " disable all uses of the perf API\n"
" -n, --num_iterations num\n"
" number of the measurement iterations\n"
" -N, --header_iterations num\n"
" print header every num iterations\n"
" -o, --out file\n"
" create or truncate \"file\" for all output\n"
- " -q, --quiet skip decoding system configuration header\n"
- " -s, --show [column|column,column,...]\n"
+ " -q, --quiet\n"
+ " skip decoding system configuration header\n"
+ " -s, --show [column | column,column,...]\n"
" show only the specified column(s)\n"
" -S, --Summary\n"
" limits output to 1-line system summary per interval\n"
" -T, --TCC temperature\n"
" sets the Thermal Control Circuit temperature in\n"
" degrees Celsius\n"
- " -h, --help print this help message\n"
- " -v, --version print version information\n" "\n" "For more help, run \"man turbostat\"\n");
+ " -h, --help\n"
+ " print this help message\n"
+ " -v, --version\n\t\tprint version information\n\nFor more help, run \"man turbostat\"\n");
}
/*
@@ -2201,16 +2404,25 @@ unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode)
retval |= ~0;
break;
} else if (!strcmp(name_list, "topology")) {
- retval |= BIC_TOPOLOGY;
+ retval |= BIC_GROUP_TOPOLOGY;
break;
} else if (!strcmp(name_list, "power")) {
- retval |= BIC_THERMAL_PWR;
+ retval |= BIC_GROUP_THERMAL_PWR;
break;
} else if (!strcmp(name_list, "idle")) {
- retval |= BIC_IDLE;
+ retval |= BIC_GROUP_IDLE;
+ break;
+ } else if (!strcmp(name_list, "swidle")) {
+ retval |= BIC_GROUP_SW_IDLE;
+ break;
+ } else if (!strcmp(name_list, "sysfs")) { /* legacy compatibility */
+ retval |= BIC_GROUP_SW_IDLE;
+ break;
+ } else if (!strcmp(name_list, "hwidle")) {
+ retval |= BIC_GROUP_HW_IDLE;
break;
} else if (!strcmp(name_list, "frequency")) {
- retval |= BIC_FREQUENCY;
+ retval |= BIC_GROUP_FREQUENCY;
break;
} else if (!strcmp(name_list, "other")) {
retval |= BIC_OTHER;
@@ -2291,6 +2503,12 @@ void print_header(char *delim)
else
outp += sprintf(outp, "%sIRQ", (printed++ ? delim : ""));
}
+ if (DO_BIC(BIC_NMI)) {
+ if (sums_need_wide_columns)
+ outp += sprintf(outp, "%s NMI", (printed++ ? delim : ""));
+ else
+ outp += sprintf(outp, "%sNMI", (printed++ ? delim : ""));
+ }
if (DO_BIC(BIC_SMI))
outp += sprintf(outp, "%sSMI", (printed++ ? delim : ""));
@@ -2337,6 +2555,7 @@ void print_header(char *delim)
break;
case PMT_TYPE_XTAL_TIME:
+ case PMT_TYPE_TCORE_CLOCK:
outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), ppmt->name);
break;
}
@@ -2411,6 +2630,7 @@ void print_header(char *delim)
break;
case PMT_TYPE_XTAL_TIME:
+ case PMT_TYPE_TCORE_CLOCK:
outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), ppmt->name);
break;
}
@@ -2469,7 +2689,7 @@ void print_header(char *delim)
if (DO_BIC(BIC_SYS_LPI))
outp += sprintf(outp, "%sSYS%%LPI", (printed++ ? delim : ""));
- if (platform->rapl_msrs && !rapl_joules) {
+ if (!rapl_joules) {
if (DO_BIC(BIC_PkgWatt))
outp += sprintf(outp, "%sPkgWatt", (printed++ ? delim : ""));
if (DO_BIC(BIC_CorWatt) && !platform->has_per_core_rapl)
@@ -2482,7 +2702,7 @@ void print_header(char *delim)
outp += sprintf(outp, "%sPKG_%%", (printed++ ? delim : ""));
if (DO_BIC(BIC_RAM__))
outp += sprintf(outp, "%sRAM_%%", (printed++ ? delim : ""));
- } else if (platform->rapl_msrs && rapl_joules) {
+ } else {
if (DO_BIC(BIC_Pkg_J))
outp += sprintf(outp, "%sPkg_J", (printed++ ? delim : ""));
if (DO_BIC(BIC_Cor_J) && !platform->has_per_core_rapl)
@@ -2542,6 +2762,7 @@ void print_header(char *delim)
break;
case PMT_TYPE_XTAL_TIME:
+ case PMT_TYPE_TCORE_CLOCK:
outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), ppmt->name);
break;
}
@@ -2577,6 +2798,8 @@ int dump_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p
if (DO_BIC(BIC_IRQ))
outp += sprintf(outp, "IRQ: %lld\n", t->irq_count);
+ if (DO_BIC(BIC_NMI))
+ outp += sprintf(outp, "IRQ: %lld\n", t->nmi_count);
if (DO_BIC(BIC_SMI))
outp += sprintf(outp, "SMI: %d\n", t->smi_count);
@@ -2796,6 +3019,14 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data
outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->irq_count);
}
+ /* NMI */
+ if (DO_BIC(BIC_NMI)) {
+ if (sums_need_wide_columns)
+ outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->nmi_count);
+ else
+ outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->nmi_count);
+ }
+
/* SMI */
if (DO_BIC(BIC_SMI))
outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->smi_count);
@@ -2850,7 +3081,7 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data
for (i = 0, ppmt = sys.pmt_tp; ppmt; i++, ppmt = ppmt->next) {
const unsigned long value_raw = t->pmt_counter[i];
- const double value_converted = 100.0 * value_raw / crystal_hz / interval_float;
+ double value_converted;
switch (ppmt->type) {
case PMT_TYPE_RAW:
if (pmt_counter_get_width(ppmt) <= 32)
@@ -2862,8 +3093,13 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data
break;
case PMT_TYPE_XTAL_TIME:
+ value_converted = 100.0 * value_raw / crystal_hz / interval_float;
outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted);
break;
+
+ case PMT_TYPE_TCORE_CLOCK:
+ value_converted = 100.0 * value_raw / tcore_clock_freq_hz / interval_float;
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted);
}
}
@@ -2930,7 +3166,7 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data
for (i = 0, ppmt = sys.pmt_cp; ppmt; i++, ppmt = ppmt->next) {
const unsigned long value_raw = c->pmt_counter[i];
- const double value_converted = 100.0 * value_raw / crystal_hz / interval_float;
+ double value_converted;
switch (ppmt->type) {
case PMT_TYPE_RAW:
if (pmt_counter_get_width(ppmt) <= 32)
@@ -2942,8 +3178,13 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data
break;
case PMT_TYPE_XTAL_TIME:
+ value_converted = 100.0 * value_raw / crystal_hz / interval_float;
outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted);
break;
+
+ case PMT_TYPE_TCORE_CLOCK:
+ value_converted = 100.0 * value_raw / tcore_clock_freq_hz / interval_float;
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted);
}
}
@@ -3128,7 +3369,7 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data
for (i = 0, ppmt = sys.pmt_pp; ppmt; i++, ppmt = ppmt->next) {
const unsigned long value_raw = p->pmt_counter[i];
- const double value_converted = 100.0 * value_raw / crystal_hz / interval_float;
+ double value_converted;
switch (ppmt->type) {
case PMT_TYPE_RAW:
if (pmt_counter_get_width(ppmt) <= 32)
@@ -3140,8 +3381,13 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data
break;
case PMT_TYPE_XTAL_TIME:
+ value_converted = 100.0 * value_raw / crystal_hz / interval_float;
outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted);
break;
+
+ case PMT_TYPE_TCORE_CLOCK:
+ value_converted = 100.0 * value_raw / tcore_clock_freq_hz / interval_float;
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted);
}
}
@@ -3298,7 +3544,7 @@ void delta_core(struct core_data *new, struct core_data *old)
old->c6 = new->c6 - old->c6;
old->c7 = new->c7 - old->c7;
old->core_temp_c = new->core_temp_c;
- old->core_throt_cnt = new->core_throt_cnt;
+ old->core_throt_cnt = new->core_throt_cnt - old->core_throt_cnt;
old->mc6_us = new->mc6_us - old->mc6_us;
DELTA_WRAP32(new->core_energy.raw_value, old->core_energy.raw_value);
@@ -3411,6 +3657,9 @@ int delta_thread(struct thread_data *new, struct thread_data *old, struct core_d
if (DO_BIC(BIC_IRQ))
old->irq_count = new->irq_count - old->irq_count;
+ if (DO_BIC(BIC_NMI))
+ old->nmi_count = new->nmi_count - old->nmi_count;
+
if (DO_BIC(BIC_SMI))
old->smi_count = new->smi_count - old->smi_count;
@@ -3449,12 +3698,10 @@ int delta_cpu(struct thread_data *t, struct core_data *c,
/* always calculate thread delta */
retval = delta_thread(t, t2, c2); /* c2 is core delta */
- if (retval)
- return retval;
/* calculate package delta only for 1st core in package */
if (is_cpu_first_core_in_package(t, c, p))
- retval = delta_package(p, p2);
+ retval |= delta_package(p, p2);
return retval;
}
@@ -3491,6 +3738,7 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data
t->instr_count = 0;
t->irq_count = 0;
+ t->nmi_count = 0;
t->smi_count = 0;
c->c3 = 0;
@@ -3582,7 +3830,7 @@ int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
/* remember first tv_begin */
if (average.threads.tv_begin.tv_sec == 0)
- average.threads.tv_begin = t->tv_begin;
+ average.threads.tv_begin = procsysfs_tv_begin;
/* remember last tv_end */
average.threads.tv_end = t->tv_end;
@@ -3595,6 +3843,7 @@ int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
average.threads.instr_count += t->instr_count;
average.threads.irq_count += t->irq_count;
+ average.threads.nmi_count += t->nmi_count;
average.threads.smi_count += t->smi_count;
for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
@@ -3736,6 +3985,8 @@ void compute_average(struct thread_data *t, struct core_data *c, struct pkg_data
if (average.threads.irq_count > 9999999)
sums_need_wide_columns = 1;
+ if (average.threads.nmi_count > 9999999)
+ sums_need_wide_columns = 1;
average.cores.c3 /= topo.allowed_cores;
average.cores.c6 /= topo.allowed_cores;
@@ -4548,7 +4799,8 @@ unsigned long pmt_gen_value_mask(unsigned int lsb, unsigned int msb)
unsigned long pmt_read_counter(struct pmt_counter *ppmt, unsigned int domain_id)
{
- assert(domain_id < ppmt->num_domains);
+ if (domain_id >= ppmt->num_domains)
+ return 0;
const unsigned long *pmmio = ppmt->domains[domain_id].pcounter;
const unsigned long value = pmmio ? *pmmio : 0;
@@ -4558,6 +4810,37 @@ unsigned long pmt_read_counter(struct pmt_counter *ppmt, unsigned int domain_id)
return (value & value_mask) >> value_shift;
}
+/* Rapl domain enumeration helpers */
+static inline int get_rapl_num_domains(void)
+{
+ int num_packages = topo.max_package_id + 1;
+ int num_cores_per_package;
+ int num_cores;
+
+ if (!platform->has_per_core_rapl)
+ return num_packages;
+
+ num_cores_per_package = topo.max_core_id + 1;
+ num_cores = num_cores_per_package * num_packages;
+
+ return num_cores;
+}
+
+static inline int get_rapl_domain_id(int cpu)
+{
+ int nr_cores_per_package = topo.max_core_id + 1;
+ int rapl_core_id;
+
+ if (!platform->has_per_core_rapl)
+ return cpus[cpu].physical_package_id;
+
+ /* Compute the system-wide unique core-id for @cpu */
+ rapl_core_id = cpus[cpu].physical_core_id;
+ rapl_core_id += cpus[cpu].physical_package_id * nr_cores_per_package;
+
+ return rapl_core_id;
+}
+
/*
* get_counters(...)
* migrate to cpu
@@ -4592,6 +4875,8 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
if (DO_BIC(BIC_IRQ))
t->irq_count = irqs_per_cpu[cpu];
+ if (DO_BIC(BIC_NMI))
+ t->nmi_count = nmi_per_cpu[cpu];
get_cstate_counters(cpu, t, c, p);
@@ -4611,7 +4896,7 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
goto done;
if (platform->has_per_core_rapl) {
- status = get_rapl_counters(cpu, c->core_id, c, p);
+ status = get_rapl_counters(cpu, get_rapl_domain_id(cpu), c, p);
if (status != 0)
return status;
}
@@ -4677,7 +4962,7 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
p->sys_lpi = cpuidle_cur_sys_lpi_us;
if (!platform->has_per_core_rapl) {
- status = get_rapl_counters(cpu, p->package_id, c, p);
+ status = get_rapl_counters(cpu, get_rapl_domain_id(cpu), c, p);
if (status != 0)
return status;
}
@@ -5337,6 +5622,7 @@ void free_all_buffers(void)
free(irq_column_2_cpu);
free(irqs_per_cpu);
+ free(nmi_per_cpu);
for (i = 0; i <= topo.max_cpu_num; ++i) {
if (cpus[i].put_ids)
@@ -5568,6 +5854,8 @@ int for_all_cpus_2(int (func) (struct thread_data *, struct core_data *,
{
int retval, pkg_no, node_no, core_no, thread_no;
+ retval = 0;
+
for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
for (node_no = 0; node_no < topo.nodes_per_pkg; ++node_no) {
for (core_no = 0; core_no < topo.cores_per_node; ++core_no) {
@@ -5589,14 +5877,12 @@ int for_all_cpus_2(int (func) (struct thread_data *, struct core_data *,
p = GET_PKG(pkg_base, pkg_no);
p2 = GET_PKG(pkg_base2, pkg_no);
- retval = func(t, c, p, t2, c2, p2);
- if (retval)
- return retval;
+ retval |= func(t, c, p, t2, c2, p2);
}
}
}
}
- return 0;
+ return retval;
}
/*
@@ -5793,31 +6079,37 @@ int snapshot_proc_interrupts(void)
irq_column_2_cpu[column] = cpu_number;
irqs_per_cpu[cpu_number] = 0;
+ nmi_per_cpu[cpu_number] = 0;
}
/* read /proc/interrupt count lines and sum up irqs per cpu */
while (1) {
int column;
char buf[64];
+ int this_row_is_nmi = 0;
- retval = fscanf(fp, " %s:", buf); /* flush irq# "N:" */
+ retval = fscanf(fp, " %s:", buf); /* irq# "N:" */
if (retval != 1)
break;
+ if (strncmp(buf, "NMI", strlen("NMI")) == 0)
+ this_row_is_nmi = 1;
+
/* read the count per cpu */
for (column = 0; column < topo.num_cpus; ++column) {
int cpu_number, irq_count;
retval = fscanf(fp, " %d", &irq_count);
+
if (retval != 1)
break;
cpu_number = irq_column_2_cpu[column];
irqs_per_cpu[cpu_number] += irq_count;
-
+ if (this_row_is_nmi)
+ nmi_per_cpu[cpu_number] += irq_count;
}
-
while (getc(fp) != '\n') ; /* flush interrupt description */
}
@@ -5836,6 +6128,7 @@ int snapshot_graphics(int idx)
int retval;
rewind(gfx_info[idx].fp);
+ fflush(gfx_info[idx].fp);
switch (idx) {
case GFX_rc6:
@@ -5914,7 +6207,9 @@ int snapshot_sys_lpi_us(void)
*/
int snapshot_proc_sysfs_files(void)
{
- if (DO_BIC(BIC_IRQ))
+ gettimeofday(&procsysfs_tv_begin, (struct timezone *)NULL);
+
+ if (DO_BIC(BIC_IRQ) || DO_BIC(BIC_NMI))
if (snapshot_proc_interrupts())
return 1;
@@ -6256,8 +6551,11 @@ void check_dev_msr()
if (no_msr)
return;
-
+#if defined(ANDROID)
+ sprintf(pathname, "/dev/msr%d", base_cpu);
+#else
sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
+#endif
if (stat(pathname, &sb))
if (system("/sbin/modprobe msr > /dev/null 2>&1"))
no_msr = 1;
@@ -6275,8 +6573,16 @@ int check_for_cap_sys_rawio(void)
int ret = 0;
caps = cap_get_proc();
- if (caps == NULL)
+ if (caps == NULL) {
+ /*
+ * CONFIG_MULTIUSER=n kernels have no cap_get_proc()
+ * Allow them to continue and attempt to access MSRs
+ */
+ if (errno == ENOSYS)
+ return 0;
+
return 1;
+ }
if (cap_get_flag(caps, CAP_SYS_RAWIO, CAP_EFFECTIVE, &cap_flag_value)) {
ret = 1;
@@ -6307,7 +6613,11 @@ void check_msr_permission(void)
failed += check_for_cap_sys_rawio();
/* test file permissions */
+#if defined(ANDROID)
+ sprintf(pathname, "/dev/msr%d", base_cpu);
+#else
sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
+#endif
if (euidaccess(pathname, R_OK)) {
failed++;
}
@@ -6439,7 +6749,8 @@ static void probe_intel_uncore_frequency_legacy(void)
sprintf(path_base, "/sys/devices/system/cpu/intel_uncore_frequency/package_%02d_die_%02d", i,
j);
- if (access(path_base, R_OK))
+ sprintf(path, "%s/current_freq_khz", path_base);
+ if (access(path, R_OK))
continue;
BIC_PRESENT(BIC_UNCORE_MHZ);
@@ -6507,7 +6818,20 @@ static void probe_intel_uncore_frequency_cluster(void)
sprintf(path, "%s/current_freq_khz", path_base);
sprintf(name_buf, "UMHz%d.%d", domain_id, cluster_id);
- add_counter(0, path, name_buf, 0, SCOPE_PACKAGE, COUNTER_K2M, FORMAT_AVERAGE, 0, package_id);
+ /*
+ * Once add_couter() is called, that counter is always read
+ * and reported -- So it is effectively (enabled & present).
+ * Only call add_counter() here if legacy BIC_UNCORE_MHZ (UncMHz)
+ * is (enabled). Since we are in this routine, we
+ * know we will not probe and set (present) the legacy counter.
+ *
+ * This allows "--show/--hide UncMHz" to be effective for
+ * the clustered MHz counters, as a group.
+ */
+ if BIC_IS_ENABLED
+ (BIC_UNCORE_MHZ)
+ add_counter(0, path, name_buf, 0, SCOPE_PACKAGE, COUNTER_K2M, FORMAT_AVERAGE, 0,
+ package_id);
if (quiet)
continue;
@@ -6579,17 +6903,21 @@ static void probe_graphics(void)
else
goto next;
- set_graphics_fp("/sys/class/drm/card0/device/tile0/gt0/gtidle/idle_residency_ms", gt0_is_gt ? GFX_rc6 : SAM_mc6);
+ set_graphics_fp("/sys/class/drm/card0/device/tile0/gt0/gtidle/idle_residency_ms",
+ gt0_is_gt ? GFX_rc6 : SAM_mc6);
set_graphics_fp("/sys/class/drm/card0/device/tile0/gt0/freq0/cur_freq", gt0_is_gt ? GFX_MHz : SAM_MHz);
- set_graphics_fp("/sys/class/drm/card0/device/tile0/gt0/freq0/act_freq", gt0_is_gt ? GFX_ACTMHz : SAM_ACTMHz);
+ set_graphics_fp("/sys/class/drm/card0/device/tile0/gt0/freq0/act_freq",
+ gt0_is_gt ? GFX_ACTMHz : SAM_ACTMHz);
- set_graphics_fp("/sys/class/drm/card0/device/tile0/gt1/gtidle/idle_residency_ms", gt0_is_gt ? SAM_mc6 : GFX_rc6);
+ set_graphics_fp("/sys/class/drm/card0/device/tile0/gt1/gtidle/idle_residency_ms",
+ gt0_is_gt ? SAM_mc6 : GFX_rc6);
set_graphics_fp("/sys/class/drm/card0/device/tile0/gt1/freq0/cur_freq", gt0_is_gt ? SAM_MHz : GFX_MHz);
- set_graphics_fp("/sys/class/drm/card0/device/tile0/gt1/freq0/act_freq", gt0_is_gt ? SAM_ACTMHz : GFX_ACTMHz);
+ set_graphics_fp("/sys/class/drm/card0/device/tile0/gt1/freq0/act_freq",
+ gt0_is_gt ? SAM_ACTMHz : GFX_ACTMHz);
goto end;
}
@@ -7025,6 +7353,9 @@ void rapl_probe_intel(void)
else
bic_enabled &= ~bic_joules_bits;
+ if (!platform->rapl_msrs || no_msr)
+ return;
+
if (!(platform->rapl_msrs & RAPL_PKG_PERF_STATUS))
bic_enabled &= ~BIC_PKG__;
if (!(platform->rapl_msrs & RAPL_DRAM_PERF_STATUS))
@@ -7045,6 +7376,11 @@ void rapl_probe_intel(void)
else
rapl_dram_energy_units = rapl_energy_units;
+ if (platform->has_fixed_rapl_psys_unit)
+ rapl_psys_energy_units = 1.0;
+ else
+ rapl_psys_energy_units = rapl_energy_units;
+
time_unit = msr >> 16 & 0xF;
if (time_unit == 0)
time_unit = 0xA;
@@ -7070,6 +7406,9 @@ void rapl_probe_amd(void)
else
bic_enabled &= ~bic_joules_bits;
+ if (!platform->rapl_msrs || no_msr)
+ return;
+
if (get_msr(base_cpu, MSR_RAPL_PWR_UNIT, &msr))
return;
@@ -7096,6 +7435,158 @@ void print_power_limit_msr(int cpu, unsigned long long msr, char *label)
return;
}
+static int fread_int(char *path, int *val)
+{
+ FILE *filep;
+ int ret;
+
+ filep = fopen(path, "r");
+ if (!filep)
+ return -1;
+
+ ret = fscanf(filep, "%d", val);
+ fclose(filep);
+ return ret;
+}
+
+static int fread_ull(char *path, unsigned long long *val)
+{
+ FILE *filep;
+ int ret;
+
+ filep = fopen(path, "r");
+ if (!filep)
+ return -1;
+
+ ret = fscanf(filep, "%llu", val);
+ fclose(filep);
+ return ret;
+}
+
+static int fread_str(char *path, char *buf, int size)
+{
+ FILE *filep;
+ int ret;
+ char *cp;
+
+ filep = fopen(path, "r");
+ if (!filep)
+ return -1;
+
+ ret = fread(buf, 1, size, filep);
+ fclose(filep);
+
+ /* replace '\n' with '\0' */
+ cp = strchr(buf, '\n');
+ if (cp != NULL)
+ *cp = '\0';
+
+ return ret;
+}
+
+#define PATH_RAPL_SYSFS "/sys/class/powercap"
+
+static int dump_one_domain(char *domain_path)
+{
+ char path[PATH_MAX];
+ char str[PATH_MAX];
+ unsigned long long val;
+ int constraint;
+ int enable;
+ int ret;
+
+ snprintf(path, PATH_MAX, "%s/name", domain_path);
+ ret = fread_str(path, str, PATH_MAX);
+ if (ret <= 0)
+ return -1;
+
+ fprintf(outf, "%s: %s", domain_path + strlen(PATH_RAPL_SYSFS) + 1, str);
+
+ snprintf(path, PATH_MAX, "%s/enabled", domain_path);
+ ret = fread_int(path, &enable);
+ if (ret <= 0)
+ return -1;
+
+ if (!enable) {
+ fputs(" disabled\n", outf);
+ return 0;
+ }
+
+ for (constraint = 0;; constraint++) {
+ snprintf(path, PATH_MAX, "%s/constraint_%d_time_window_us", domain_path, constraint);
+ ret = fread_ull(path, &val);
+ if (ret <= 0)
+ break;
+
+ if (val > 1000000)
+ fprintf(outf, " %0.1fs", (double)val / 1000000);
+ else if (val > 1000)
+ fprintf(outf, " %0.1fms", (double)val / 1000);
+ else
+ fprintf(outf, " %0.1fus", (double)val);
+
+ snprintf(path, PATH_MAX, "%s/constraint_%d_power_limit_uw", domain_path, constraint);
+ ret = fread_ull(path, &val);
+ if (ret > 0 && val)
+ fprintf(outf, ":%lluW", val / 1000000);
+
+ snprintf(path, PATH_MAX, "%s/constraint_%d_max_power_uw", domain_path, constraint);
+ ret = fread_ull(path, &val);
+ if (ret > 0 && val)
+ fprintf(outf, ",max:%lluW", val / 1000000);
+ }
+ fputc('\n', outf);
+
+ return 0;
+}
+
+static int print_rapl_sysfs(void)
+{
+ DIR *dir, *cdir;
+ struct dirent *entry, *centry;
+ char path[PATH_MAX];
+ char str[PATH_MAX];
+
+ if ((dir = opendir(PATH_RAPL_SYSFS)) == NULL) {
+ warn("open %s failed", PATH_RAPL_SYSFS);
+ return 1;
+ }
+
+ while ((entry = readdir(dir)) != NULL) {
+ if (strlen(entry->d_name) > 100)
+ continue;
+
+ if (strncmp(entry->d_name, "intel-rapl", strlen("intel-rapl")))
+ continue;
+
+ snprintf(path, PATH_MAX, "%s/%s/name", PATH_RAPL_SYSFS, entry->d_name);
+
+ /* Parse top level domains first, including package and psys */
+ fread_str(path, str, PATH_MAX);
+ if (strncmp(str, "package", strlen("package")) && strncmp(str, "psys", strlen("psys")))
+ continue;
+
+ snprintf(path, PATH_MAX, "%s/%s", PATH_RAPL_SYSFS, entry->d_name);
+ if ((cdir = opendir(path)) == NULL) {
+ perror("opendir() error");
+ return 1;
+ }
+
+ dump_one_domain(path);
+
+ while ((centry = readdir(cdir)) != NULL) {
+ if (strncmp(centry->d_name, "intel-rapl", strlen("intel-rapl")))
+ continue;
+ snprintf(path, PATH_MAX, "%s/%s/%s", PATH_RAPL_SYSFS, entry->d_name, centry->d_name);
+ dump_one_domain(path);
+ }
+ closedir(cdir);
+ }
+
+ closedir(dir);
+ return 0;
+}
+
int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
{
unsigned long long msr;
@@ -7222,9 +7713,6 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
*/
void probe_rapl(void)
{
- if (!platform->rapl_msrs || no_msr)
- return;
-
if (genuine_intel)
rapl_probe_intel();
if (authentic_amd || hygon_genuine)
@@ -7233,6 +7721,11 @@ void probe_rapl(void)
if (quiet)
return;
+ print_rapl_sysfs();
+
+ if (!platform->rapl_msrs || no_msr)
+ return;
+
for_all_cpus(print_rapl, ODD_COUNTERS);
}
@@ -7565,44 +8058,42 @@ static int has_instr_count_access(void)
return has_access;
}
-int add_rapl_perf_counter_(int cpu, struct rapl_counter_info_t *rci, const struct rapl_counter_arch_info *cai,
- double *scale_, enum rapl_unit *unit_)
+int add_rapl_perf_counter(int cpu, struct rapl_counter_info_t *rci, const struct rapl_counter_arch_info *cai,
+ double *scale_, enum rapl_unit *unit_)
{
+ int ret = -1;
+
if (no_perf)
return -1;
+ if (!cai->perf_name)
+ return -1;
+
const double scale = read_perf_scale(cai->perf_subsys, cai->perf_name);
if (scale == 0.0)
- return -1;
+ goto end;
const enum rapl_unit unit = read_perf_rapl_unit(cai->perf_subsys, cai->perf_name);
if (unit == RAPL_UNIT_INVALID)
- return -1;
+ goto end;
const unsigned int rapl_type = read_perf_type(cai->perf_subsys);
const unsigned int rapl_energy_pkg_config = read_perf_config(cai->perf_subsys, cai->perf_name);
- const int fd_counter =
- open_perf_counter(cpu, rapl_type, rapl_energy_pkg_config, rci->fd_perf, PERF_FORMAT_GROUP);
- if (fd_counter == -1)
- return -1;
+ ret = open_perf_counter(cpu, rapl_type, rapl_energy_pkg_config, rci->fd_perf, PERF_FORMAT_GROUP);
+ if (ret == -1)
+ goto end;
/* If it's the first counter opened, make it a group descriptor */
if (rci->fd_perf == -1)
- rci->fd_perf = fd_counter;
+ rci->fd_perf = ret;
*scale_ = scale;
*unit_ = unit;
- return fd_counter;
-}
-
-int add_rapl_perf_counter(int cpu, struct rapl_counter_info_t *rci, const struct rapl_counter_arch_info *cai,
- double *scale, enum rapl_unit *unit)
-{
- int ret = add_rapl_perf_counter_(cpu, rci, cai, scale, unit);
+end:
if (debug >= 2)
fprintf(stderr, "%s: %d (cpu: %d)\n", __func__, ret, cpu);
@@ -7627,7 +8118,7 @@ void linux_perf_init(void)
void rapl_perf_init(void)
{
- const unsigned int num_domains = (platform->has_per_core_rapl ? topo.max_core_id : topo.max_package_id) + 1;
+ const unsigned int num_domains = get_rapl_num_domains();
bool *domain_visited = calloc(num_domains, sizeof(bool));
rapl_counter_info_perdomain = calloc(num_domains, sizeof(*rapl_counter_info_perdomain));
@@ -7660,6 +8151,9 @@ void rapl_perf_init(void)
enum rapl_unit unit;
unsigned int next_domain;
+ if (!BIC_IS_ENABLED(cai->bic))
+ continue;
+
memset(domain_visited, 0, num_domains * sizeof(*domain_visited));
for (int cpu = 0; cpu < topo.max_cpu_num + 1; ++cpu) {
@@ -7668,8 +8162,7 @@ void rapl_perf_init(void)
continue;
/* Skip already seen and handled RAPL domains */
- next_domain =
- platform->has_per_core_rapl ? cpus[cpu].physical_core_id : cpus[cpu].physical_package_id;
+ next_domain = get_rapl_domain_id(cpu);
assert(next_domain < num_domains);
@@ -7683,27 +8176,37 @@ void rapl_perf_init(void)
struct rapl_counter_info_t *rci = &rapl_counter_info_perdomain[next_domain];
- /* Check if the counter is enabled and accessible */
- if (BIC_IS_ENABLED(cai->bic) && (platform->rapl_msrs & cai->feature_mask)) {
+ /*
+ * rapl_counter_arch_infos[] can have multiple entries describing the same
+ * counter, due to the difference from different platforms/Vendors.
+ * E.g. rapl_counter_arch_infos[0] and rapl_counter_arch_infos[1] share the
+ * same perf_subsys and perf_name, but with different MSR address.
+ * rapl_counter_arch_infos[0] is for Intel and rapl_counter_arch_infos[1]
+ * is for AMD.
+ * In this case, it is possible that multiple rapl_counter_arch_infos[]
+ * entries are probed just because their perf/msr is duplicate and valid.
+ *
+ * Thus need a check to avoid re-probe the same counters.
+ */
+ if (rci->source[cai->rci_index] != COUNTER_SOURCE_NONE)
+ break;
- /* Use perf API for this counter */
- if (!no_perf && cai->perf_name
- && add_rapl_perf_counter(cpu, rci, cai, &scale, &unit) != -1) {
- rci->source[cai->rci_index] = COUNTER_SOURCE_PERF;
- rci->scale[cai->rci_index] = scale * cai->compat_scale;
- rci->unit[cai->rci_index] = unit;
- rci->flags[cai->rci_index] = cai->flags;
-
- /* Use MSR for this counter */
- } else if (!no_msr && cai->msr && probe_msr(cpu, cai->msr) == 0) {
- rci->source[cai->rci_index] = COUNTER_SOURCE_MSR;
- rci->msr[cai->rci_index] = cai->msr;
- rci->msr_mask[cai->rci_index] = cai->msr_mask;
- rci->msr_shift[cai->rci_index] = cai->msr_shift;
- rci->unit[cai->rci_index] = RAPL_UNIT_JOULES;
- rci->scale[cai->rci_index] = *cai->platform_rapl_msr_scale * cai->compat_scale;
- rci->flags[cai->rci_index] = cai->flags;
- }
+ /* Use perf API for this counter */
+ if (add_rapl_perf_counter(cpu, rci, cai, &scale, &unit) != -1) {
+ rci->source[cai->rci_index] = COUNTER_SOURCE_PERF;
+ rci->scale[cai->rci_index] = scale * cai->compat_scale;
+ rci->unit[cai->rci_index] = unit;
+ rci->flags[cai->rci_index] = cai->flags;
+
+ /* Use MSR for this counter */
+ } else if (add_rapl_msr_counter(cpu, cai) >= 0) {
+ rci->source[cai->rci_index] = COUNTER_SOURCE_MSR;
+ rci->msr[cai->rci_index] = cai->msr;
+ rci->msr_mask[cai->rci_index] = cai->msr_mask;
+ rci->msr_shift[cai->rci_index] = cai->msr_shift;
+ rci->unit[cai->rci_index] = RAPL_UNIT_JOULES;
+ rci->scale[cai->rci_index] = *cai->platform_rapl_msr_scale * cai->compat_scale;
+ rci->flags[cai->rci_index] = cai->flags;
}
if (rci->source[cai->rci_index] != COUNTER_SOURCE_NONE)
@@ -7736,65 +8239,63 @@ int *get_cstate_perf_group_fd(struct cstate_counter_info_t *cci, const char *gro
return NULL;
}
-int add_cstate_perf_counter_(int cpu, struct cstate_counter_info_t *cci, const struct cstate_counter_arch_info *cai)
+int add_cstate_perf_counter(int cpu, struct cstate_counter_info_t *cci, const struct cstate_counter_arch_info *cai)
{
+ int ret = -1;
+
if (no_perf)
return -1;
+ if (!cai->perf_name)
+ return -1;
+
int *pfd_group = get_cstate_perf_group_fd(cci, cai->perf_subsys);
if (pfd_group == NULL)
- return -1;
+ goto end;
const unsigned int type = read_perf_type(cai->perf_subsys);
const unsigned int config = read_perf_config(cai->perf_subsys, cai->perf_name);
- const int fd_counter = open_perf_counter(cpu, type, config, *pfd_group, PERF_FORMAT_GROUP);
+ ret = open_perf_counter(cpu, type, config, *pfd_group, PERF_FORMAT_GROUP);
- if (fd_counter == -1)
- return -1;
+ if (ret == -1)
+ goto end;
/* If it's the first counter opened, make it a group descriptor */
if (*pfd_group == -1)
- *pfd_group = fd_counter;
-
- return fd_counter;
-}
-
-int add_cstate_perf_counter(int cpu, struct cstate_counter_info_t *cci, const struct cstate_counter_arch_info *cai)
-{
- int ret = add_cstate_perf_counter_(cpu, cci, cai);
+ *pfd_group = ret;
+end:
if (debug >= 2)
fprintf(stderr, "%s: %d (cpu: %d)\n", __func__, ret, cpu);
return ret;
}
-int add_msr_perf_counter_(int cpu, struct msr_counter_info_t *cci, const struct msr_counter_arch_info *cai)
+int add_msr_perf_counter(int cpu, struct msr_counter_info_t *cci, const struct msr_counter_arch_info *cai)
{
+ int ret = -1;
+
if (no_perf)
return -1;
+ if (!cai->perf_name)
+ return -1;
+
const unsigned int type = read_perf_type(cai->perf_subsys);
const unsigned int config = read_perf_config(cai->perf_subsys, cai->perf_name);
- const int fd_counter = open_perf_counter(cpu, type, config, cci->fd_perf, PERF_FORMAT_GROUP);
+ ret = open_perf_counter(cpu, type, config, cci->fd_perf, PERF_FORMAT_GROUP);
- if (fd_counter == -1)
- return -1;
+ if (ret == -1)
+ goto end;
/* If it's the first counter opened, make it a group descriptor */
if (cci->fd_perf == -1)
- cci->fd_perf = fd_counter;
-
- return fd_counter;
-}
-
-int add_msr_perf_counter(int cpu, struct msr_counter_info_t *cci, const struct msr_counter_arch_info *cai)
-{
- int ret = add_msr_perf_counter_(cpu, cci, cai);
+ cci->fd_perf = ret;
+end:
if (debug)
fprintf(stderr, "%s: %s/%s: %d (cpu: %d)\n", __func__, cai->perf_subsys, cai->perf_name, ret, cpu);
@@ -7828,12 +8329,12 @@ void msr_perf_init_(void)
if (cai->needed) {
/* Use perf API for this counter */
- if (!no_perf && cai->perf_name && add_msr_perf_counter(cpu, cci, cai) != -1) {
+ if (add_msr_perf_counter(cpu, cci, cai) != -1) {
cci->source[cai->rci_index] = COUNTER_SOURCE_PERF;
cai->present = true;
/* User MSR for this counter */
- } else if (!no_msr && cai->msr && probe_msr(cpu, cai->msr) == 0) {
+ } else if (add_msr_counter(cpu, cai->msr) >= 0) {
cci->source[cai->rci_index] = COUNTER_SOURCE_MSR;
cci->msr[cai->rci_index] = cai->msr;
cci->msr_mask[cai->rci_index] = cai->msr_mask;
@@ -7941,13 +8442,13 @@ void cstate_perf_init_(bool soft_c1)
if (counter_needed && counter_supported) {
/* Use perf API for this counter */
- if (!no_perf && cai->perf_name && add_cstate_perf_counter(cpu, cci, cai) != -1) {
+ if (add_cstate_perf_counter(cpu, cci, cai) != -1) {
cci->source[cai->rci_index] = COUNTER_SOURCE_PERF;
/* User MSR for this counter */
- } else if (!no_msr && cai->msr && pkg_cstate_limit >= cai->pkg_cstate_limit
- && probe_msr(cpu, cai->msr) == 0) {
+ } else if (pkg_cstate_limit >= cai->pkg_cstate_limit
+ && add_msr_counter(cpu, cai->msr) >= 0) {
cci->source[cai->rci_index] = COUNTER_SOURCE_MSR;
cci->msr[cai->rci_index] = cai->msr;
}
@@ -8235,6 +8736,7 @@ void process_cpuid()
aperf_mperf_multiplier = platform->need_perf_multiplier ? 1024 : 1;
BIC_PRESENT(BIC_IRQ);
+ BIC_PRESENT(BIC_NMI);
BIC_PRESENT(BIC_TSC_MHz);
}
@@ -8622,7 +9124,11 @@ void allocate_irq_buffers(void)
irqs_per_cpu = calloc(topo.max_cpu_num + 1, sizeof(int));
if (irqs_per_cpu == NULL)
- err(-1, "calloc %d", topo.max_cpu_num + 1);
+ err(-1, "calloc %d IRQ", topo.max_cpu_num + 1);
+
+ nmi_per_cpu = calloc(topo.max_cpu_num + 1, sizeof(int));
+ if (nmi_per_cpu == NULL)
+ err(-1, "calloc %d NMI", topo.max_cpu_num + 1);
}
int update_topo(struct thread_data *t, struct core_data *c, struct pkg_data *p)
@@ -8803,15 +9309,14 @@ int added_perf_counters_init_(struct perf_counter_info *pinfo)
perf_device = "cpu_atom";
break;
- default: /* Don't change, we will probably fail and report a problem soon. */
+ default: /* Don't change, we will probably fail and report a problem soon. */
break;
}
}
perf_type = read_perf_type(perf_device);
if (perf_type == (unsigned int)-1) {
- warnx("%s: perf/%s/%s: failed to read %s",
- __func__, perf_device, pinfo->event, "type");
+ warnx("%s: perf/%s/%s: failed to read %s", __func__, perf_device, pinfo->event, "type");
continue;
}
@@ -8893,49 +9398,36 @@ int parse_telem_info_file(int fd_dir, const char *info_filename, const char *for
struct pmt_mmio *pmt_mmio_open(unsigned int target_guid)
{
- DIR *dirp;
- struct dirent *entry;
+ struct pmt_diriter_t pmt_iter;
+ const struct dirent *entry;
struct stat st;
- unsigned int telem_idx;
int fd_telem_dir, fd_pmt;
unsigned long guid, size, offset;
size_t mmap_size;
void *mmio;
- struct pmt_mmio *ret = NULL;
+ struct pmt_mmio *head = NULL, *last = NULL;
+ struct pmt_mmio *new_pmt = NULL;
if (stat(SYSFS_TELEM_PATH, &st) == -1)
return NULL;
- dirp = opendir(SYSFS_TELEM_PATH);
- if (dirp == NULL)
+ pmt_diriter_init(&pmt_iter);
+ entry = pmt_diriter_begin(&pmt_iter, SYSFS_TELEM_PATH);
+ if (!entry) {
+ pmt_diriter_remove(&pmt_iter);
return NULL;
+ }
- for (;;) {
- entry = readdir(dirp);
-
- if (entry == NULL)
- break;
-
- if (strcmp(entry->d_name, ".") == 0)
- continue;
-
- if (strcmp(entry->d_name, "..") == 0)
- continue;
-
- if (sscanf(entry->d_name, "telem%u", &telem_idx) != 1)
- continue;
-
- if (fstatat(dirfd(dirp), entry->d_name, &st, 0) == -1) {
+ for (; entry != NULL; entry = pmt_diriter_next(&pmt_iter)) {
+ if (fstatat(dirfd(pmt_iter.dir), entry->d_name, &st, 0) == -1)
break;
- }
if (!S_ISDIR(st.st_mode))
continue;
- fd_telem_dir = openat(dirfd(dirp), entry->d_name, O_RDONLY);
- if (fd_telem_dir == -1) {
+ fd_telem_dir = openat(dirfd(pmt_iter.dir), entry->d_name, O_RDONLY);
+ if (fd_telem_dir == -1)
break;
- }
if (parse_telem_info_file(fd_telem_dir, "guid", "%lx", &guid)) {
close(fd_telem_dir);
@@ -8966,35 +9458,51 @@ struct pmt_mmio *pmt_mmio_open(unsigned int target_guid)
mmap_size = ROUND_UP_TO_PAGE_SIZE(size);
mmio = mmap(0, mmap_size, PROT_READ, MAP_SHARED, fd_pmt, 0);
if (mmio != MAP_FAILED) {
-
if (debug)
fprintf(stderr, "%s: 0x%lx mmaped at: %p\n", __func__, guid, mmio);
- ret = calloc(1, sizeof(*ret));
+ new_pmt = calloc(1, sizeof(*new_pmt));
- if (!ret) {
+ if (!new_pmt) {
fprintf(stderr, "%s: Failed to allocate pmt_mmio\n", __func__);
exit(1);
}
- ret->guid = guid;
- ret->mmio_base = mmio;
- ret->pmt_offset = offset;
- ret->size = size;
+ /*
+ * Create linked list of mmaped regions,
+ * but preserve the ordering from sysfs.
+ * Ordering is important for the user to
+ * use the seq=%u parameter when adding a counter.
+ */
+ new_pmt->guid = guid;
+ new_pmt->mmio_base = mmio;
+ new_pmt->pmt_offset = offset;
+ new_pmt->size = size;
+ new_pmt->next = pmt_mmios;
+
+ if (last)
+ last->next = new_pmt;
+ else
+ head = new_pmt;
- ret->next = pmt_mmios;
- pmt_mmios = ret;
+ last = new_pmt;
}
loop_cleanup_and_break:
close(fd_pmt);
close(fd_telem_dir);
- break;
}
- closedir(dirp);
+ pmt_diriter_remove(&pmt_iter);
- return ret;
+ /*
+ * If we found something, stick just
+ * created linked list to the front.
+ */
+ if (head)
+ pmt_mmios = head;
+
+ return head;
}
struct pmt_mmio *pmt_mmio_find(unsigned int guid)
@@ -9031,7 +9539,7 @@ void *pmt_get_counter_pointer(struct pmt_mmio *pmmio, unsigned long counter_offs
return ret;
}
-struct pmt_mmio *pmt_add_guid(unsigned int guid)
+struct pmt_mmio *pmt_add_guid(unsigned int guid, unsigned int seq)
{
struct pmt_mmio *ret;
@@ -9039,6 +9547,11 @@ struct pmt_mmio *pmt_add_guid(unsigned int guid)
if (!ret)
ret = pmt_mmio_open(guid);
+ while (ret && seq) {
+ ret = ret->next;
+ --seq;
+ }
+
return ret;
}
@@ -9085,7 +9598,7 @@ void pmt_counter_add_domain(struct pmt_counter *pcounter, unsigned long *pmmio,
pcounter->domains[domain_id].pcounter = pmmio;
}
-int pmt_add_counter(unsigned int guid, const char *name, enum pmt_datatype type,
+int pmt_add_counter(unsigned int guid, unsigned int seq, const char *name, enum pmt_datatype type,
unsigned int lsb, unsigned int msb, unsigned int offset, enum counter_scope scope,
enum counter_format format, unsigned int domain_id, enum pmt_open_mode mode)
{
@@ -9105,10 +9618,10 @@ int pmt_add_counter(unsigned int guid, const char *name, enum pmt_datatype type,
exit(1);
}
- mmio = pmt_add_guid(guid);
+ mmio = pmt_add_guid(guid, seq);
if (!mmio) {
if (mode != PMT_OPEN_TRY) {
- fprintf(stderr, "%s: failed to map PMT MMIO for guid %x\n", __func__, guid);
+ fprintf(stderr, "%s: failed to map PMT MMIO for guid %x, seq %u\n", __func__, guid, seq);
exit(1);
}
@@ -9163,10 +9676,68 @@ int pmt_add_counter(unsigned int guid, const char *name, enum pmt_datatype type,
void pmt_init(void)
{
+ int cpu_num;
+ unsigned long seq, offset, mod_num;
+
if (BIC_IS_ENABLED(BIC_Diec6)) {
- pmt_add_counter(PMT_MTL_DC6_GUID, "Die%c6", PMT_TYPE_XTAL_TIME, PMT_COUNTER_MTL_DC6_LSB,
- PMT_COUNTER_MTL_DC6_MSB, PMT_COUNTER_MTL_DC6_OFFSET, SCOPE_PACKAGE, FORMAT_DELTA,
- 0, PMT_OPEN_TRY);
+ pmt_add_counter(PMT_MTL_DC6_GUID, PMT_MTL_DC6_SEQ, "Die%c6", PMT_TYPE_XTAL_TIME,
+ PMT_COUNTER_MTL_DC6_LSB, PMT_COUNTER_MTL_DC6_MSB, PMT_COUNTER_MTL_DC6_OFFSET,
+ SCOPE_PACKAGE, FORMAT_DELTA, 0, PMT_OPEN_TRY);
+ }
+
+ if (BIC_IS_ENABLED(BIC_CPU_c1e)) {
+ seq = 0;
+ offset = PMT_COUNTER_CWF_MC1E_OFFSET_BASE;
+ mod_num = 0; /* Relative module number for current PMT file. */
+
+ /* Open the counter for each CPU. */
+ for (cpu_num = 0; cpu_num < topo.max_cpu_num;) {
+
+ if (cpu_is_not_allowed(cpu_num))
+ goto next_loop_iter;
+
+ /*
+ * Set the scope to CPU, even though CWF report the counter per module.
+ * CPUs inside the same module will read from the same location, instead of reporting zeros.
+ *
+ * CWF with newer firmware might require a PMT_TYPE_XTAL_TIME intead of PMT_TYPE_TCORE_CLOCK.
+ */
+ pmt_add_counter(PMT_CWF_MC1E_GUID, seq, "CPU%c1e", PMT_TYPE_TCORE_CLOCK,
+ PMT_COUNTER_CWF_MC1E_LSB, PMT_COUNTER_CWF_MC1E_MSB, offset, SCOPE_CPU,
+ FORMAT_DELTA, cpu_num, PMT_OPEN_TRY);
+
+ /*
+ * Rather complex logic for each time we go to the next loop iteration,
+ * so keep it as a label.
+ */
+next_loop_iter:
+ /*
+ * Advance the cpu number and check if we should also advance offset to
+ * the next counter inside the PMT file.
+ *
+ * On Clearwater Forest platform, the counter is reported per module,
+ * so open the same counter for all of the CPUs inside the module.
+ * That way, reported table show the correct value for all of the CPUs inside the module,
+ * instead of zeros.
+ */
+ ++cpu_num;
+ if (cpu_num % PMT_COUNTER_CWF_CPUS_PER_MODULE == 0) {
+ offset += PMT_COUNTER_CWF_MC1E_OFFSET_INCREMENT;
+ ++mod_num;
+ }
+
+ /*
+ * There are PMT_COUNTER_CWF_MC1E_NUM_MODULES_PER_FILE in each PMT file.
+ *
+ * If that number is reached, seq must be incremented to advance to the next file in a sequence.
+ * Offset inside that file and a module counter has to be reset.
+ */
+ if (mod_num == PMT_COUNTER_CWF_MC1E_NUM_MODULES_PER_FILE) {
+ ++seq;
+ offset = PMT_COUNTER_CWF_MC1E_OFFSET_BASE;
+ mod_num = 0;
+ }
+ }
}
}
@@ -9256,10 +9827,10 @@ int fork_it(char **argv)
timersub(&tv_odd, &tv_even, &tv_delta);
if (for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS))
fprintf(outf, "%s: Counter reset detected\n", progname);
- else {
- compute_average(EVEN_COUNTERS);
- format_all_counters(EVEN_COUNTERS);
- }
+ delta_platform(&platform_counters_odd, &platform_counters_even);
+
+ compute_average(EVEN_COUNTERS);
+ format_all_counters(EVEN_COUNTERS);
fprintf(outf, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec / 1000000.0);
@@ -9288,7 +9859,7 @@ int get_and_dump_counters(void)
void print_version()
{
- fprintf(outf, "turbostat version 2024.11.30 - Len Brown <lenb@kernel.org>\n");
+ fprintf(outf, "turbostat version 2025.06.08 - Len Brown <lenb@kernel.org>\n");
}
#define COMMAND_LINE_SIZE 2048
@@ -9321,7 +9892,7 @@ struct msr_counter *find_msrp_by_name(struct msr_counter *head, char *name)
for (mp = head; mp; mp = mp->next) {
if (debug)
fprintf(stderr, "%s: %s %s\n", __func__, name, mp->name);
- if (!strncmp(name, mp->name, strlen(mp->name)))
+ if (!strcmp(name, mp->name))
return mp;
}
return NULL;
@@ -9613,7 +10184,7 @@ next:
}
if ((msr_num == 0) && (path == NULL) && (perf_device[0] == '\0' || perf_event[0] == '\0')) {
- fprintf(stderr, "--add: (msrDDD | msr0xXXX | /path_to_counter | perf/device/event ) required\n");
+ fprintf(stderr, "--add: (msrDDD | msr0xXXX | /path_to_counter | perf/device/event) required\n");
fail++;
}
@@ -9651,15 +10222,100 @@ bool starts_with(const char *str, const char *prefix)
return strncmp(prefix, str, strlen(prefix)) == 0;
}
+int pmt_parse_from_path(const char *target_path, unsigned int *out_guid, unsigned int *out_seq)
+{
+ struct pmt_diriter_t pmt_iter;
+ const struct dirent *dirname;
+ struct stat stat, target_stat;
+ int fd_telem_dir = -1;
+ int fd_target_dir;
+ unsigned int seq = 0;
+ unsigned long guid, target_guid;
+ int ret = -1;
+
+ fd_target_dir = open(target_path, O_RDONLY | O_DIRECTORY);
+ if (fd_target_dir == -1) {
+ return -1;
+ }
+
+ if (fstat(fd_target_dir, &target_stat) == -1) {
+ fprintf(stderr, "%s: Failed to stat the target: %s", __func__, strerror(errno));
+ exit(1);
+ }
+
+ if (parse_telem_info_file(fd_target_dir, "guid", "%lx", &target_guid)) {
+ fprintf(stderr, "%s: Failed to parse the target guid file: %s", __func__, strerror(errno));
+ exit(1);
+ }
+
+ close(fd_target_dir);
+
+ pmt_diriter_init(&pmt_iter);
+
+ for (dirname = pmt_diriter_begin(&pmt_iter, SYSFS_TELEM_PATH); dirname != NULL;
+ dirname = pmt_diriter_next(&pmt_iter)) {
+
+ fd_telem_dir = openat(dirfd(pmt_iter.dir), dirname->d_name, O_RDONLY | O_DIRECTORY);
+ if (fd_telem_dir == -1)
+ continue;
+
+ if (parse_telem_info_file(fd_telem_dir, "guid", "%lx", &guid)) {
+ fprintf(stderr, "%s: Failed to parse the guid file: %s", __func__, strerror(errno));
+ continue;
+ }
+
+ if (fstat(fd_telem_dir, &stat) == -1) {
+ fprintf(stderr, "%s: Failed to stat %s directory: %s", __func__,
+ dirname->d_name, strerror(errno));
+ continue;
+ }
+
+ /*
+ * If reached the same directory as target, exit the loop.
+ * Seq has the correct value now.
+ */
+ if (stat.st_dev == target_stat.st_dev && stat.st_ino == target_stat.st_ino) {
+ ret = 0;
+ break;
+ }
+
+ /*
+ * If reached directory with the same guid,
+ * but it's not the target directory yet,
+ * increment seq and continue the search.
+ */
+ if (guid == target_guid)
+ ++seq;
+
+ close(fd_telem_dir);
+ fd_telem_dir = -1;
+ }
+
+ pmt_diriter_remove(&pmt_iter);
+
+ if (fd_telem_dir != -1)
+ close(fd_telem_dir);
+
+ if (!ret) {
+ *out_guid = target_guid;
+ *out_seq = seq;
+ }
+
+ return ret;
+}
+
void parse_add_command_pmt(char *add_command)
{
char *name = NULL;
char *type_name = NULL;
char *format_name = NULL;
+ char *direct_path = NULL;
+ static const char direct_path_prefix[] = "path=";
unsigned int offset;
unsigned int lsb;
unsigned int msb;
unsigned int guid;
+ unsigned int seq = 0; /* By default, pick first file in a sequence with a given GUID. */
unsigned int domain_id;
enum counter_scope scope = 0;
enum pmt_datatype type = PMT_TYPE_RAW;
@@ -9739,6 +10395,13 @@ void parse_add_command_pmt(char *add_command)
goto next;
}
+ if (sscanf(add_command, "seq=%x", &seq) == 1)
+ goto next;
+
+ if (strncmp(add_command, direct_path_prefix, strlen(direct_path_prefix)) == 0) {
+ direct_path = add_command + strlen(direct_path_prefix);
+ goto next;
+ }
next:
add_command = strchr(add_command, ',');
if (add_command) {
@@ -9789,6 +10452,11 @@ next:
has_type = true;
}
+ if (strcmp("tcore_clock", type_name) == 0) {
+ type = PMT_TYPE_TCORE_CLOCK;
+ has_type = true;
+ }
+
if (!has_type) {
printf("%s: invalid %s: %s\n", __func__, "type", type_name);
exit(1);
@@ -9810,8 +10478,24 @@ next:
exit(1);
}
+ if (direct_path && has_guid) {
+ printf("%s: path and guid+seq parameters are mutually exclusive\n"
+ "notice: passed guid=0x%x and path=%s\n", __func__, guid, direct_path);
+ exit(1);
+ }
+
+ if (direct_path) {
+ if (pmt_parse_from_path(direct_path, &guid, &seq)) {
+ printf("%s: failed to parse PMT file from %s\n", __func__, direct_path);
+ exit(1);
+ }
+
+ /* GUID was just infered from the direct path. */
+ has_guid = true;
+ }
+
if (!has_guid) {
- printf("%s: missing %s\n", __func__, "guid");
+ printf("%s: missing %s\n", __func__, "guid or path");
exit(1);
}
@@ -9825,7 +10509,7 @@ next:
exit(1);
}
- pmt_add_counter(guid, name, type, lsb, msb, offset, scope, format, domain_id, PMT_OPEN_REQUIRED);
+ pmt_add_counter(guid, seq, name, type, lsb, msb, offset, scope, format, domain_id, PMT_OPEN_REQUIRED);
}
void parse_add_command(char *add_command)
@@ -9855,12 +10539,13 @@ int is_deferred_skip(char *name)
return 0;
}
-void probe_sysfs(void)
+void probe_cpuidle_residency(void)
{
char path[64];
char name_buf[16];
FILE *input;
int state;
+ int min_state = 1024, max_state = 0;
char *sp;
for (state = 10; state >= 0; --state) {
@@ -9885,14 +10570,32 @@ void probe_sysfs(void)
sprintf(path, "cpuidle/state%d/time", state);
- if (!DO_BIC(BIC_sysfs) && !is_deferred_add(name_buf))
+ if (!DO_BIC(BIC_pct_idle) && !is_deferred_add(name_buf))
continue;
if (is_deferred_skip(name_buf))
continue;
add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_USEC, FORMAT_PERCENT, SYSFS_PERCPU, 0);
+
+ if (state > max_state)
+ max_state = state;
+ if (state < min_state)
+ min_state = state;
}
+}
+
+void probe_cpuidle_counts(void)
+{
+ char path[64];
+ char name_buf[16];
+ FILE *input;
+ int state;
+ int min_state = 1024, max_state = 0;
+ char *sp;
+
+ if (!DO_BIC(BIC_cpuidle))
+ return;
for (state = 10; state >= 0; --state) {
@@ -9902,26 +10605,52 @@ void probe_sysfs(void)
continue;
if (!fgets(name_buf, sizeof(name_buf), input))
err(1, "%s: failed to read file", path);
- /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
- sp = strchr(name_buf, '-');
- if (!sp)
- sp = strchrnul(name_buf, '\n');
- *sp = '\0';
fclose(input);
remove_underbar(name_buf);
- sprintf(path, "cpuidle/state%d/usage", state);
-
- if (!DO_BIC(BIC_sysfs) && !is_deferred_add(name_buf))
+ if (!DO_BIC(BIC_cpuidle) && !is_deferred_add(name_buf))
continue;
if (is_deferred_skip(name_buf))
continue;
+ /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
+ sp = strchr(name_buf, '-');
+ if (!sp)
+ sp = strchrnul(name_buf, '\n');
+
+ /*
+ * The 'below' sysfs file always contains 0 for the deepest state (largest index),
+ * do not add it.
+ */
+ if (state != max_state) {
+ /*
+ * Add 'C1+' for C1, and so on. The 'below' sysfs file always contains 0 for
+ * the last state, so do not add it.
+ */
+
+ *sp = '+';
+ *(sp + 1) = '\0';
+ sprintf(path, "cpuidle/state%d/below", state);
+ add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_ITEMS, FORMAT_DELTA, SYSFS_PERCPU, 0);
+ }
+
+ *sp = '\0';
+ sprintf(path, "cpuidle/state%d/usage", state);
add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_ITEMS, FORMAT_DELTA, SYSFS_PERCPU, 0);
- }
+ /*
+ * The 'above' sysfs file always contains 0 for the shallowest state (smallest
+ * index), do not add it.
+ */
+ if (state != min_state) {
+ *sp = '-';
+ *(sp + 1) = '\0';
+ sprintf(path, "cpuidle/state%d/above", state);
+ add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_ITEMS, FORMAT_DELTA, SYSFS_PERCPU, 0);
+ }
+ }
}
/*
@@ -9973,6 +10702,7 @@ void cmdline(int argc, char **argv)
{ "Dump", no_argument, 0, 'D' },
{ "debug", no_argument, 0, 'd' }, /* internal, not documented */
{ "enable", required_argument, 0, 'e' },
+ { "force", no_argument, 0, 'f' },
{ "interval", required_argument, 0, 'i' },
{ "IPC", no_argument, 0, 'I' },
{ "num_iterations", required_argument, 0, 'n' },
@@ -10033,6 +10763,9 @@ void cmdline(int argc, char **argv)
/* --enable specified counter */
bic_enabled = bic_enabled | bic_lookup(optarg, SHOW_LIST);
break;
+ case 'f':
+ force_load++;
+ break;
case 'd':
debug++;
ENABLE_BIC(BIC_DISABLED_BY_DEFAULT);
@@ -10161,7 +10894,8 @@ skip_cgroup_setting:
print_bootcmd();
}
- probe_sysfs();
+ probe_cpuidle_residency();
+ probe_cpuidle_counts();
if (!getuid())
set_rlimit();