summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt7
-rw-r--r--Documentation/admin-guide/pm/amd-pstate.rst87
-rw-r--r--Documentation/devicetree/bindings/arm/tegra/nvidia,tegra-ccplex-cluster.yaml1
-rw-r--r--Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml1
-rw-r--r--MAINTAINERS25
-rw-r--r--arch/x86/include/asm/cpufeatures.h2
-rw-r--r--arch/x86/include/asm/msr-index.h5
-rw-r--r--arch/x86/kernel/cpu/scattered.c1
-rw-r--r--arch/x86/power/hibernate_64.c2
-rw-r--r--drivers/acpi/cppc_acpi.c3
-rw-r--r--drivers/cpufreq/Kconfig5
-rw-r--r--drivers/cpufreq/Kconfig.arm2
-rw-r--r--drivers/cpufreq/Kconfig.x8614
-rw-r--r--drivers/cpufreq/acpi-cpufreq.c31
-rw-r--r--drivers/cpufreq/amd-pstate-trace.h35
-rw-r--r--drivers/cpufreq/amd-pstate-ut.c279
-rw-r--r--drivers/cpufreq/amd-pstate.c627
-rw-r--r--drivers/cpufreq/amd-pstate.h37
-rw-r--r--drivers/cpufreq/cppc_cpufreq.c10
-rw-r--r--drivers/cpufreq/cpufreq-dt-platdev.c1
-rw-r--r--drivers/cpufreq/cpufreq.c85
-rw-r--r--drivers/cpufreq/cpufreq_governor.h5
-rw-r--r--drivers/cpufreq/intel_pstate.c6
-rw-r--r--drivers/cpufreq/tegra194-cpufreq.c4
-rw-r--r--drivers/cpuidle/Kconfig2
-rw-r--r--drivers/cpuidle/Kconfig.mips2
-rw-r--r--drivers/cpuidle/Kconfig.powerpc2
-rw-r--r--drivers/cpuidle/cpuidle.c12
-rw-r--r--drivers/cpuidle/governors/gov.h5
-rw-r--r--drivers/cpuidle/governors/menu.c15
-rw-r--r--drivers/cpuidle/governors/teo.c81
-rw-r--r--drivers/devfreq/devfreq.c108
-rw-r--r--drivers/devfreq/tegra30-devfreq.c17
-rw-r--r--drivers/idle/intel_idle.c42
-rw-r--r--drivers/opp/core.c2
-rw-r--r--drivers/opp/debugfs.c20
-rw-r--r--drivers/powercap/intel_rapl_common.c565
-rw-r--r--drivers/powercap/intel_rapl_msr.c393
-rw-r--r--drivers/powercap/intel_rapl_tpmi.c101
-rw-r--r--drivers/thermal/intel/int340x_thermal/processor_thermal_rapl.c81
-rw-r--r--include/acpi/cppc_acpi.h1
-rw-r--r--include/linux/cpufreq.h11
-rw-r--r--include/linux/intel_rapl.h52
-rw-r--r--include/linux/powercap.h4
-rw-r--r--include/linux/units.h3
-rw-r--r--kernel/power/user.c7
-rw-r--r--kernel/sched/cpufreq_schedutil.c5
-rw-r--r--rust/kernel/cpufreq.rs13
-rw-r--r--tools/arch/x86/include/asm/cpufeatures.h2
-rw-r--r--tools/power/cpupower/man/cpupower-frequency-info.18
-rw-r--r--tools/power/cpupower/man/cpupower-idle-info.14
-rw-r--r--tools/power/cpupower/man/cpupower-info.19
-rw-r--r--tools/power/cpupower/utils/cpufreq-info.c2
-rw-r--r--tools/power/cpupower/utils/cpufreq-set.c2
-rw-r--r--tools/power/cpupower/utils/cpuidle-info.c2
-rw-r--r--tools/power/cpupower/utils/cpuidle-set.c2
-rw-r--r--tools/power/cpupower/utils/cpupower-info.c2
-rw-r--r--tools/power/cpupower/utils/cpupower-set.c2
58 files changed, 1951 insertions, 903 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index c17ad454dd81..2075e7a9dcde 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -501,6 +501,13 @@ Kernel parameters
disable
Disable amd-pstate preferred core.
+ amd_dynamic_epp=
+ [X86]
+ disable
+ Disable amd-pstate dynamic EPP.
+ enable
+ Enable amd-pstate dynamic EPP.
+
amijoy.map= [HW,JOY] Amiga joystick support
Map of devices attached to JOY0DAT and JOY1DAT
Format: <a>,<b>
diff --git a/Documentation/admin-guide/pm/amd-pstate.rst b/Documentation/admin-guide/pm/amd-pstate.rst
index e1771f2225d5..f8e7050fc762 100644
--- a/Documentation/admin-guide/pm/amd-pstate.rst
+++ b/Documentation/admin-guide/pm/amd-pstate.rst
@@ -239,8 +239,12 @@ control its functionality at the system level. They are located in the
root@hr-test1:/home/ray# ls /sys/devices/system/cpu/cpufreq/policy0/*amd*
/sys/devices/system/cpu/cpufreq/policy0/amd_pstate_highest_perf
+ /sys/devices/system/cpu/cpufreq/policy0/amd_pstate_hw_prefcore
/sys/devices/system/cpu/cpufreq/policy0/amd_pstate_lowest_nonlinear_freq
/sys/devices/system/cpu/cpufreq/policy0/amd_pstate_max_freq
+ /sys/devices/system/cpu/cpufreq/policy0/amd_pstate_floor_freq
+ /sys/devices/system/cpu/cpufreq/policy0/amd_pstate_floor_count
+ /sys/devices/system/cpu/cpufreq/policy0/amd_pstate_prefcore_ranking
``amd_pstate_highest_perf / amd_pstate_max_freq``
@@ -264,14 +268,46 @@ This attribute is read-only.
``amd_pstate_hw_prefcore``
-Whether the platform supports the preferred core feature and it has been
-enabled. This attribute is read-only.
+Whether the platform supports the preferred core feature and it has
+been enabled. This attribute is read-only. This file is only visible
+on platforms which support the preferred core feature.
``amd_pstate_prefcore_ranking``
The performance ranking of the core. This number doesn't have any unit, but
larger numbers are preferred at the time of reading. This can change at
-runtime based on platform conditions. This attribute is read-only.
+runtime based on platform conditions. This attribute is read-only. This file
+is only visible on platforms which support the preferred core feature.
+
+``amd_pstate_floor_freq``
+
+The floor frequency associated with each CPU. Userspace can write any
+value between ``cpuinfo_min_freq`` and ``scaling_max_freq`` into this
+file. When the system is under power or thermal constraints, the
+platform firmware will attempt to throttle the CPU frequency to the
+value specified in ``amd_pstate_floor_freq`` before throttling it
+further. This allows userspace to specify different floor frequencies
+to different CPUs. For optimal results, threads of the same core
+should have the same floor frequency value. This file is only visible
+on platforms that support the CPPC Performance Priority feature.
+
+
+``amd_pstate_floor_count``
+
+The number of distinct Floor Performance levels supported by the
+platform. For example, if this value is 2, then the number of unique
+values obtained from the command ``cat
+/sys/devices/system/cpu/cpufreq/policy*/amd_pstate_floor_freq |
+sort -n | uniq`` should be at most this number for the behavior
+described in ``amd_pstate_floor_freq`` to take effect. A zero value
+implies that the platform supports unlimited floor performance levels.
+This file is only visible on platforms that support the CPPC
+Performance Priority feature.
+
+**Note**: When ``amd_pstate_floor_count`` is non-zero, the frequency to
+which the CPU is throttled under power or thermal constraints is
+undefined when the number of unique values of ``amd_pstate_floor_freq``
+across all CPUs in the system exceeds ``amd_pstate_floor_count``.
``energy_performance_available_preferences``
@@ -280,16 +316,22 @@ A list of all the supported EPP preferences that could be used for
These profiles represent different hints that are provided
to the low-level firmware about the user's desired energy vs efficiency
tradeoff. ``default`` represents the epp value is set by platform
-firmware. This attribute is read-only.
+firmware. ``custom`` designates that integer values 0-255 may be written
+as well. This attribute is read-only.
``energy_performance_preference``
The current energy performance preference can be read from this attribute.
and user can change current preference according to energy or performance needs
-Please get all support profiles list from
-``energy_performance_available_preferences`` attribute, all the profiles are
-integer values defined between 0 to 255 when EPP feature is enabled by platform
-firmware, if EPP feature is disabled, driver will ignore the written value
+Coarse named profiles are available in the attribute
+``energy_performance_available_preferences``.
+Users can also write individual integer values between 0 to 255.
+When dynamic EPP is enabled, writes to energy_performance_preference are blocked
+even when EPP feature is enabled by platform firmware. Lower epp values shift the bias
+towards improved performance while a higher epp value shifts the bias towards
+power-savings. The exact impact can change from one platform to the other.
+If a valid integer was last written, then a number will be returned on future reads.
+If a valid string was last written then a string will be returned on future reads.
This attribute is read-write.
``boost``
@@ -311,6 +353,24 @@ boost or `1` to enable it, for the respective CPU using the sysfs path
Other performance and frequency values can be read back from
``/sys/devices/system/cpu/cpuX/acpi_cppc/``, see :ref:`cppc_sysfs`.
+Dynamic energy performance profile
+==================================
+The amd-pstate driver supports dynamically selecting the energy performance
+profile based on whether the machine is running on AC or DC power.
+
+Whether this behavior is enabled by default depends on the kernel
+config option `CONFIG_X86_AMD_PSTATE_DYNAMIC_EPP`. This behavior can also be overridden
+at runtime by the sysfs file ``/sys/devices/system/cpu/cpufreq/policyX/dynamic_epp``.
+
+When set to enabled, the driver will select a different energy performance
+profile when the machine is running on battery or AC power. The driver will
+also register with the platform profile handler to receive notifications of
+user desired power state and react to those.
+When set to disabled, the driver will not change the energy performance profile
+based on the power source and will not react to user desired power state.
+
+Attempting to manually write to the ``energy_performance_preference`` sysfs
+file will fail when ``dynamic_epp`` is enabled.
``amd-pstate`` vs ``acpi-cpufreq``
======================================
@@ -422,6 +482,13 @@ For systems that support ``amd-pstate`` preferred core, the core rankings will
always be advertised by the platform. But OS can choose to ignore that via the
kernel parameter ``amd_prefcore=disable``.
+``amd_dynamic_epp``
+
+When AMD pstate is in auto mode, dynamic EPP will control whether the kernel
+autonomously changes the EPP mode. The default is configured by
+``CONFIG_X86_AMD_PSTATE_DYNAMIC_EPP`` but can be explicitly enabled with
+``amd_dynamic_epp=enable`` or disabled with ``amd_dynamic_epp=disable``.
+
User Space Interface in ``sysfs`` - General
===========================================
@@ -790,13 +857,13 @@ Reference
===========
.. [1] AMD64 Architecture Programmer's Manual Volume 2: System Programming,
- https://www.amd.com/system/files/TechDocs/24593.pdf
+ https://docs.amd.com/v/u/en-US/24593_3.44_APM_Vol2
.. [2] Advanced Configuration and Power Interface Specification,
https://uefi.org/sites/default/files/resources/ACPI_Spec_6_4_Jan22.pdf
.. [3] Processor Programming Reference (PPR) for AMD Family 19h Model 51h, Revision A1 Processors
- https://www.amd.com/system/files/TechDocs/56569-A1-PUB.zip
+ https://docs.amd.com/v/u/en-US/56569-A1-PUB_3.03
.. [4] Linux Kernel Selftests,
https://www.kernel.org/doc/html/latest/dev-tools/kselftest.html
diff --git a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra-ccplex-cluster.yaml b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra-ccplex-cluster.yaml
index 36dbd0838f2d..fe9c8791f227 100644
--- a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra-ccplex-cluster.yaml
+++ b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra-ccplex-cluster.yaml
@@ -24,6 +24,7 @@ properties:
enum:
- nvidia,tegra186-ccplex-cluster
- nvidia,tegra234-ccplex-cluster
+ - nvidia,tegra238-ccplex-cluster
reg:
maxItems: 1
diff --git a/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml b/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml
index 22eeaef14f55..98eb36bff172 100644
--- a/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml
+++ b/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml
@@ -35,6 +35,7 @@ properties:
- description: v2 of CPUFREQ HW (EPSS)
items:
- enum:
+ - qcom,eliza-cpufreq-epss
- qcom,milos-cpufreq-epss
- qcom,qcs8300-cpufreq-epss
- qcom,qdu1000-cpufreq-epss
diff --git a/MAINTAINERS b/MAINTAINERS
index 0ce430d6e2a5..20c2b5b0bb8d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1234,9 +1234,9 @@ F: drivers/gpu/drm/amd/pm/
AMD PSTATE DRIVER
M: Huang Rui <ray.huang@amd.com>
-M: Gautham R. Shenoy <gautham.shenoy@amd.com>
M: Mario Limonciello <mario.limonciello@amd.com>
R: Perry Yuan <perry.yuan@amd.com>
+R: K Prateek Nayak <kprateek.nayak@amd.com>
L: linux-pm@vger.kernel.org
S: Supported
F: Documentation/admin-guide/pm/amd-pstate.rst
@@ -6618,6 +6618,17 @@ M: Bence Csókás <bence98@sch.bme.hu>
S: Maintained
F: drivers/i2c/busses/i2c-cp2615.c
+CPU FREQUENCY DRIVERS - CPPC CPUFREQ
+M: "Rafael J. Wysocki" <rafael@kernel.org>
+M: Viresh Kumar <viresh.kumar@linaro.org>
+R: Jie Zhan <zhanjie9@hisilicon.com>
+R: Lifeng Zheng <zhenglifeng1@huawei.com>
+R: Pierre Gondois <pierre.gondois@arm.com>
+R: Sumit Gupta <sumitg@nvidia.com>
+L: linux-pm@vger.kernel.org
+S: Maintained
+F: drivers/cpufreq/cppc_cpufreq.c
+
CPU FREQUENCY DRIVERS - VEXPRESS SPC ARM BIG LITTLE
M: Viresh Kumar <viresh.kumar@linaro.org>
M: Sudeep Holla <sudeep.holla@kernel.org>
@@ -6626,6 +6637,12 @@ S: Maintained
W: http://www.arm.com/products/processors/technologies/biglittleprocessing.php
F: drivers/cpufreq/vexpress-spc-cpufreq.c
+CPU FREQUENCY DRIVERS - VIRTUAL MACHINE CPUFREQ
+M: Saravana Kannan <saravanak@kernel.org>
+L: linux-pm@vger.kernel.org
+S: Maintained
+F: drivers/cpufreq/virtual-cpufreq.c
+
CPU FREQUENCY SCALING FRAMEWORK
M: "Rafael J. Wysocki" <rafael@kernel.org>
M: Viresh Kumar <viresh.kumar@linaro.org>
@@ -6645,12 +6662,6 @@ F: kernel/sched/cpufreq*.c
F: rust/kernel/cpufreq.rs
F: tools/testing/selftests/cpufreq/
-CPU FREQUENCY DRIVERS - VIRTUAL MACHINE CPUFREQ
-M: Saravana Kannan <saravanak@kernel.org>
-L: linux-pm@vger.kernel.org
-S: Maintained
-F: drivers/cpufreq/virtual-cpufreq.c
-
CPU HOTPLUG
M: Thomas Gleixner <tglx@kernel.org>
M: Peter Zijlstra <peterz@infradead.org>
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index dbe104df339b..86d17b195e79 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -415,7 +415,7 @@
*/
#define X86_FEATURE_OVERFLOW_RECOV (17*32+ 0) /* "overflow_recov" MCA overflow recovery support */
#define X86_FEATURE_SUCCOR (17*32+ 1) /* "succor" Uncorrectable error containment and recovery */
-
+#define X86_FEATURE_CPPC_PERF_PRIO (17*32+ 2) /* CPPC Floor Perf support */
#define X86_FEATURE_SMCA (17*32+ 3) /* "smca" Scalable MCA */
/* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 6673601246b3..e126c7fb69cf 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -765,12 +765,14 @@
#define MSR_AMD_CPPC_CAP2 0xc00102b2
#define MSR_AMD_CPPC_REQ 0xc00102b3
#define MSR_AMD_CPPC_STATUS 0xc00102b4
+#define MSR_AMD_CPPC_REQ2 0xc00102b5
/* Masks for use with MSR_AMD_CPPC_CAP1 */
#define AMD_CPPC_LOWEST_PERF_MASK GENMASK(7, 0)
#define AMD_CPPC_LOWNONLIN_PERF_MASK GENMASK(15, 8)
#define AMD_CPPC_NOMINAL_PERF_MASK GENMASK(23, 16)
#define AMD_CPPC_HIGHEST_PERF_MASK GENMASK(31, 24)
+#define AMD_CPPC_FLOOR_PERF_CNT_MASK GENMASK_ULL(39, 32)
/* Masks for use with MSR_AMD_CPPC_REQ */
#define AMD_CPPC_MAX_PERF_MASK GENMASK(7, 0)
@@ -778,6 +780,9 @@
#define AMD_CPPC_DES_PERF_MASK GENMASK(23, 16)
#define AMD_CPPC_EPP_PERF_MASK GENMASK(31, 24)
+/* Masks for use with MSR_AMD_CPPC_REQ2 */
+#define AMD_CPPC_FLOOR_PERF_MASK GENMASK(7, 0)
+
/* AMD Performance Counter Global Status and Control MSRs */
#define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS 0xc0000300
#define MSR_AMD64_PERF_CNTR_GLOBAL_CTL 0xc0000301
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 42c7eac0c387..837d6a4b0c28 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -52,6 +52,7 @@ static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_CPB, CPUID_EDX, 9, 0x80000007, 0 },
{ X86_FEATURE_PROC_FEEDBACK, CPUID_EDX, 11, 0x80000007, 0 },
{ X86_FEATURE_AMD_FAST_CPPC, CPUID_EDX, 15, 0x80000007, 0 },
+ { X86_FEATURE_CPPC_PERF_PRIO, CPUID_EDX, 16, 0x80000007, 0 },
{ X86_FEATURE_MBA, CPUID_EBX, 6, 0x80000008, 0 },
{ X86_FEATURE_X2AVIC_EXT, CPUID_ECX, 6, 0x8000000a, 0 },
{ X86_FEATURE_COHERENCY_SFW_NO, CPUID_EBX, 31, 0x8000001f, 0 },
diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c
index a595953f1d6d..e72d26acae79 100644
--- a/arch/x86/power/hibernate_64.c
+++ b/arch/x86/power/hibernate_64.c
@@ -14,8 +14,6 @@
#include <linux/kdebug.h>
#include <linux/pgtable.h>
-#include <crypto/hash.h>
-
#include <asm/e820/api.h>
#include <asm/init.h>
#include <asm/proto.h>
diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c
index 053fc6765a59..2e91c5a97761 100644
--- a/drivers/acpi/cppc_acpi.c
+++ b/drivers/acpi/cppc_acpi.c
@@ -2145,7 +2145,7 @@ static void cppc_find_dmi_mhz(const struct dmi_header *dm, void *private)
}
/* Look up the max frequency in DMI */
-static u64 cppc_get_dmi_max_khz(void)
+u64 cppc_get_dmi_max_khz(void)
{
u16 mhz = 0;
@@ -2159,6 +2159,7 @@ static u64 cppc_get_dmi_max_khz(void)
return KHZ_PER_MHZ * mhz;
}
+EXPORT_SYMBOL_GPL(cppc_get_dmi_max_khz);
/*
* If CPPC lowest_freq and nominal_freq registers are exposed then we can
diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig
index 78702a08364f..db83f3365698 100644
--- a/drivers/cpufreq/Kconfig
+++ b/drivers/cpufreq/Kconfig
@@ -163,7 +163,6 @@ config CPU_FREQ_GOV_ONDEMAND
config CPU_FREQ_GOV_CONSERVATIVE
tristate "'conservative' cpufreq governor"
- depends on CPU_FREQ
select CPU_FREQ_GOV_COMMON
help
'conservative' - this driver is rather similar to the 'ondemand'
@@ -188,7 +187,7 @@ config CPU_FREQ_GOV_CONSERVATIVE
config CPU_FREQ_GOV_SCHEDUTIL
bool "'schedutil' cpufreq policy governor"
- depends on CPU_FREQ && SMP
+ depends on SMP
select CPU_FREQ_GOV_ATTR_SET
select IRQ_WORK
help
@@ -365,6 +364,6 @@ config ACPI_CPPC_CPUFREQ_FIE
If in doubt, say N.
-endif
+endif # CPU_FREQ
endmenu
diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm
index 4014bc9dd73a..47c9b031f1b3 100644
--- a/drivers/cpufreq/Kconfig.arm
+++ b/drivers/cpufreq/Kconfig.arm
@@ -248,7 +248,7 @@ config ARM_TEGRA186_CPUFREQ
config ARM_TEGRA194_CPUFREQ
tristate "Tegra194 CPUFreq support"
- depends on ARCH_TEGRA_194_SOC || ARCH_TEGRA_234_SOC || (64BIT && COMPILE_TEST)
+ depends on ARCH_TEGRA_194_SOC || ARCH_TEGRA_234_SOC
depends on TEGRA_BPMP
default ARCH_TEGRA_194_SOC || ARCH_TEGRA_234_SOC
help
diff --git a/drivers/cpufreq/Kconfig.x86 b/drivers/cpufreq/Kconfig.x86
index 2c5c228408bf..027e6ea2e038 100644
--- a/drivers/cpufreq/Kconfig.x86
+++ b/drivers/cpufreq/Kconfig.x86
@@ -40,6 +40,8 @@ config X86_AMD_PSTATE
select ACPI_PROCESSOR
select ACPI_CPPC_LIB if X86_64
select CPU_FREQ_GOV_SCHEDUTIL if SMP
+ select ACPI_PLATFORM_PROFILE
+ select POWER_SUPPLY
help
This driver adds a CPUFreq driver which utilizes a fine grain
processor performance frequency control range instead of legacy
@@ -68,6 +70,18 @@ config X86_AMD_PSTATE_DEFAULT_MODE
For details, take a look at:
<file:Documentation/admin-guide/pm/amd-pstate.rst>.
+config X86_AMD_PSTATE_DYNAMIC_EPP
+ bool "AMD Processor P-State dynamic EPP support"
+ depends on X86_AMD_PSTATE
+ default n
+ help
+ Allow the kernel to dynamically change the energy performance
+ value from events like ACPI platform profile and AC adapter plug
+ events.
+
+ This feature can also be changed at runtime, this configuration
+ option only sets the kernel default value behavior.
+
config X86_AMD_PSTATE_UT
tristate "selftest for AMD Processor P-State driver"
depends on X86 && ACPI_PROCESSOR
diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c
index e7eff6c2f092..21639d9ac753 100644
--- a/drivers/cpufreq/acpi-cpufreq.c
+++ b/drivers/cpufreq/acpi-cpufreq.c
@@ -675,6 +675,29 @@ static inline u64 get_max_boost_ratio(unsigned int cpu, u64 *nominal_freq)
}
#endif
+static void acpi_cpufreq_resolve_max_freq(struct cpufreq_policy *policy,
+ unsigned int pss_max_freq)
+{
+#ifdef CONFIG_ACPI_CPPC_LIB
+ u64 max_speed = cppc_get_dmi_max_khz();
+ /*
+ * Use DMI "Max Speed" if it looks plausible: must be
+ * above _PSS P0 frequency and within 2x of it.
+ */
+ if (max_speed > pss_max_freq && max_speed < pss_max_freq * 2) {
+ policy->cpuinfo.max_freq = max_speed;
+ return;
+ }
+#endif
+ /*
+ * If the maximum "boost" frequency is unknown, ask the arch
+ * scale-invariance code to use the "nominal" performance for
+ * CPU utilization scaling so as to prevent the schedutil
+ * governor from selecting inadequate CPU frequencies.
+ */
+ arch_set_max_freq_ratio(true);
+}
+
static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
{
struct cpufreq_frequency_table *freq_table;
@@ -849,13 +872,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
policy->cpuinfo.max_freq = freq * max_boost_ratio >> SCHED_CAPACITY_SHIFT;
} else {
- /*
- * If the maximum "boost" frequency is unknown, ask the arch
- * scale-invariance code to use the "nominal" performance for
- * CPU utilization scaling so as to prevent the schedutil
- * governor from selecting inadequate CPU frequencies.
- */
- arch_set_max_freq_ratio(true);
+ acpi_cpufreq_resolve_max_freq(policy, freq_table[0].frequency);
}
policy->freq_table = freq_table;
diff --git a/drivers/cpufreq/amd-pstate-trace.h b/drivers/cpufreq/amd-pstate-trace.h
index 32e1bdc588c5..91fa073b2be4 100644
--- a/drivers/cpufreq/amd-pstate-trace.h
+++ b/drivers/cpufreq/amd-pstate-trace.h
@@ -133,6 +133,41 @@ TRACE_EVENT(amd_pstate_epp_perf,
)
);
+TRACE_EVENT(amd_pstate_cppc_req2,
+
+ TP_PROTO(unsigned int cpu_id,
+ u8 floor_perf,
+ bool changed,
+ int err_code
+ ),
+
+ TP_ARGS(cpu_id,
+ floor_perf,
+ changed,
+ err_code),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, cpu_id)
+ __field(u8, floor_perf)
+ __field(bool, changed)
+ __field(int, err_code)
+ ),
+
+ TP_fast_assign(
+ __entry->cpu_id = cpu_id;
+ __entry->floor_perf = floor_perf;
+ __entry->changed = changed;
+ __entry->err_code = err_code;
+ ),
+
+ TP_printk("cpu%u: floor_perf=%u, changed=%u (error = %d)",
+ __entry->cpu_id,
+ __entry->floor_perf,
+ __entry->changed,
+ __entry->err_code
+ )
+);
+
#endif /* _AMD_PSTATE_TRACE_H */
/* This part must be outside protection */
diff --git a/drivers/cpufreq/amd-pstate-ut.c b/drivers/cpufreq/amd-pstate-ut.c
index 447b9aa5ce40..aa8a464fab47 100644
--- a/drivers/cpufreq/amd-pstate-ut.c
+++ b/drivers/cpufreq/amd-pstate-ut.c
@@ -23,9 +23,12 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/bitfield.h>
+#include <linux/cpufeature.h>
+#include <linux/cpufreq.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
+#include <linux/mm.h>
#include <linux/fs.h>
#include <linux/cleanup.h>
@@ -35,6 +38,11 @@
#include "amd-pstate.h"
+static char *test_list;
+module_param(test_list, charp, 0444);
+MODULE_PARM_DESC(test_list,
+ "Comma-delimited list of tests to run (empty means run all tests)");
+DEFINE_FREE(cleanup_page, void *, if (_T) free_page((unsigned long)_T))
struct amd_pstate_ut_struct {
const char *name;
@@ -48,16 +56,39 @@ static int amd_pstate_ut_acpi_cpc_valid(u32 index);
static int amd_pstate_ut_check_enabled(u32 index);
static int amd_pstate_ut_check_perf(u32 index);
static int amd_pstate_ut_check_freq(u32 index);
+static int amd_pstate_ut_epp(u32 index);
static int amd_pstate_ut_check_driver(u32 index);
+static int amd_pstate_ut_check_freq_attrs(u32 index);
static struct amd_pstate_ut_struct amd_pstate_ut_cases[] = {
- {"amd_pstate_ut_acpi_cpc_valid", amd_pstate_ut_acpi_cpc_valid },
- {"amd_pstate_ut_check_enabled", amd_pstate_ut_check_enabled },
- {"amd_pstate_ut_check_perf", amd_pstate_ut_check_perf },
- {"amd_pstate_ut_check_freq", amd_pstate_ut_check_freq },
- {"amd_pstate_ut_check_driver", amd_pstate_ut_check_driver }
+ {"amd_pstate_ut_acpi_cpc_valid", amd_pstate_ut_acpi_cpc_valid },
+ {"amd_pstate_ut_check_enabled", amd_pstate_ut_check_enabled },
+ {"amd_pstate_ut_check_perf", amd_pstate_ut_check_perf },
+ {"amd_pstate_ut_check_freq", amd_pstate_ut_check_freq },
+ {"amd_pstate_ut_epp", amd_pstate_ut_epp },
+ {"amd_pstate_ut_check_driver", amd_pstate_ut_check_driver },
+ {"amd_pstate_ut_check_freq_attrs", amd_pstate_ut_check_freq_attrs },
};
+static bool test_in_list(const char *list, const char *name)
+{
+ size_t name_len = strlen(name);
+ const char *p = list;
+
+ while (*p) {
+ const char *sep = strchr(p, ',');
+ size_t token_len = sep ? sep - p : strlen(p);
+
+ if (token_len == name_len && !strncmp(p, name, token_len))
+ return true;
+ if (!sep)
+ break;
+ p = sep + 1;
+ }
+
+ return false;
+}
+
static bool get_shared_mem(void)
{
bool result = false;
@@ -241,6 +272,111 @@ static int amd_pstate_set_mode(enum amd_pstate_mode mode)
return amd_pstate_update_status(mode_str, strlen(mode_str));
}
+static int amd_pstate_ut_epp(u32 index)
+{
+ struct cpufreq_policy *policy __free(put_cpufreq_policy) = NULL;
+ char *buf __free(cleanup_page) = NULL;
+ static const char * const epp_strings[] = {
+ "performance",
+ "balance_performance",
+ "balance_power",
+ "power",
+ };
+ struct amd_cpudata *cpudata;
+ enum amd_pstate_mode orig_mode;
+ bool orig_dynamic_epp;
+ int ret, cpu = 0;
+ int i;
+ u16 epp;
+
+ policy = cpufreq_cpu_get(cpu);
+ if (!policy)
+ return -ENODEV;
+
+ cpudata = policy->driver_data;
+ orig_mode = amd_pstate_get_status();
+ orig_dynamic_epp = cpudata->dynamic_epp;
+
+ /* disable dynamic EPP before running test */
+ if (cpudata->dynamic_epp) {
+ pr_debug("Dynamic EPP is enabled, disabling it\n");
+ amd_pstate_clear_dynamic_epp(policy);
+ }
+
+ buf = (char *)__get_free_page(GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ ret = amd_pstate_set_mode(AMD_PSTATE_ACTIVE);
+ if (ret)
+ goto out;
+
+ for (epp = 0; epp <= U8_MAX; epp++) {
+ u8 val;
+
+ /* write all EPP values */
+ memset(buf, 0, PAGE_SIZE);
+ snprintf(buf, PAGE_SIZE, "%d", epp);
+ ret = store_energy_performance_preference(policy, buf, strlen(buf));
+ if (ret < 0)
+ goto out;
+
+ /* check if the EPP value reads back correctly for raw numbers */
+ memset(buf, 0, PAGE_SIZE);
+ ret = show_energy_performance_preference(policy, buf);
+ if (ret < 0)
+ goto out;
+ strreplace(buf, '\n', '\0');
+ ret = kstrtou8(buf, 0, &val);
+ if (!ret && epp != val) {
+ pr_err("Raw EPP value mismatch: %d != %d\n", epp, val);
+ ret = -EINVAL;
+ goto out;
+ }
+ }
+
+ for (i = 0; i < ARRAY_SIZE(epp_strings); i++) {
+ memset(buf, 0, PAGE_SIZE);
+ snprintf(buf, PAGE_SIZE, "%s", epp_strings[i]);
+ ret = store_energy_performance_preference(policy, buf, strlen(buf));
+ if (ret < 0)
+ goto out;
+
+ memset(buf, 0, PAGE_SIZE);
+ ret = show_energy_performance_preference(policy, buf);
+ if (ret < 0)
+ goto out;
+ strreplace(buf, '\n', '\0');
+
+ if (strcmp(buf, epp_strings[i])) {
+ pr_err("String EPP value mismatch: %s != %s\n", buf, epp_strings[i]);
+ ret = -EINVAL;
+ goto out;
+ }
+ }
+
+ ret = 0;
+
+out:
+ if (orig_dynamic_epp) {
+ int ret2;
+
+ ret2 = amd_pstate_set_mode(AMD_PSTATE_DISABLE);
+ if (!ret && ret2)
+ ret = ret2;
+ }
+
+ if (orig_mode != amd_pstate_get_status()) {
+ int ret2;
+
+ ret2 = amd_pstate_set_mode(orig_mode);
+ if (!ret && ret2)
+ ret = ret2;
+ }
+
+ return ret;
+}
+
static int amd_pstate_ut_check_driver(u32 index)
{
enum amd_pstate_mode mode1, mode2 = AMD_PSTATE_DISABLE;
@@ -270,12 +406,143 @@ out:
return ret;
}
+enum attr_category {
+ ATTR_ALWAYS,
+ ATTR_PREFCORE,
+ ATTR_EPP,
+ ATTR_FLOOR_FREQ,
+};
+
+static const struct {
+ const char *name;
+ enum attr_category category;
+} expected_freq_attrs[] = {
+ {"amd_pstate_max_freq", ATTR_ALWAYS},
+ {"amd_pstate_lowest_nonlinear_freq", ATTR_ALWAYS},
+ {"amd_pstate_highest_perf", ATTR_ALWAYS},
+ {"amd_pstate_prefcore_ranking", ATTR_PREFCORE},
+ {"amd_pstate_hw_prefcore", ATTR_PREFCORE},
+ {"energy_performance_preference", ATTR_EPP},
+ {"energy_performance_available_preferences", ATTR_EPP},
+ {"amd_pstate_floor_freq", ATTR_FLOOR_FREQ},
+ {"amd_pstate_floor_count", ATTR_FLOOR_FREQ},
+};
+
+static bool attr_in_driver(struct freq_attr **driver_attrs, const char *name)
+{
+ int j;
+
+ for (j = 0; driver_attrs[j]; j++) {
+ if (!strcmp(driver_attrs[j]->attr.name, name))
+ return true;
+ }
+ return false;
+}
+
+/*
+ * Verify that for each mode the driver's live ->attr array contains exactly
+ * the attributes that should be visible. Expected visibility is derived
+ * independently from hw_prefcore, cpu features, and the current mode —
+ * not from the driver's own visibility functions.
+ */
+static int amd_pstate_ut_check_freq_attrs(u32 index)
+{
+ enum amd_pstate_mode orig_mode = amd_pstate_get_status();
+ static const enum amd_pstate_mode modes[] = {
+ AMD_PSTATE_PASSIVE, AMD_PSTATE_ACTIVE, AMD_PSTATE_GUIDED,
+ };
+ bool has_prefcore, has_floor_freq;
+ int m, i, ret;
+
+ has_floor_freq = cpu_feature_enabled(X86_FEATURE_CPPC_PERF_PRIO);
+
+ /*
+ * Determine prefcore support from any online CPU's cpudata.
+ * hw_prefcore reflects the platform-wide decision made at init.
+ */
+ has_prefcore = false;
+ for_each_online_cpu(i) {
+ struct cpufreq_policy *policy __free(put_cpufreq_policy) = NULL;
+ struct amd_cpudata *cpudata;
+
+ policy = cpufreq_cpu_get(i);
+ if (!policy)
+ continue;
+ cpudata = policy->driver_data;
+ has_prefcore = cpudata->hw_prefcore;
+ break;
+ }
+
+ for (m = 0; m < ARRAY_SIZE(modes); m++) {
+ struct freq_attr **driver_attrs;
+
+ ret = amd_pstate_set_mode(modes[m]);
+ if (ret)
+ goto out;
+
+ driver_attrs = amd_pstate_get_current_attrs();
+ if (!driver_attrs) {
+ pr_err("%s: no driver attrs in mode %s\n",
+ __func__, amd_pstate_get_mode_string(modes[m]));
+ ret = -EINVAL;
+ goto out;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(expected_freq_attrs); i++) {
+ bool expected, found;
+
+ switch (expected_freq_attrs[i].category) {
+ case ATTR_ALWAYS:
+ expected = true;
+ break;
+ case ATTR_PREFCORE:
+ expected = has_prefcore;
+ break;
+ case ATTR_EPP:
+ expected = (modes[m] == AMD_PSTATE_ACTIVE);
+ break;
+ case ATTR_FLOOR_FREQ:
+ expected = has_floor_freq;
+ break;
+ default:
+ expected = false;
+ break;
+ }
+
+ found = attr_in_driver(driver_attrs,
+ expected_freq_attrs[i].name);
+
+ if (expected != found) {
+ pr_err("%s: mode %s: attr %s expected %s but is %s\n",
+ __func__,
+ amd_pstate_get_mode_string(modes[m]),
+ expected_freq_attrs[i].name,
+ expected ? "visible" : "hidden",
+ found ? "visible" : "hidden");
+ ret = -EINVAL;
+ goto out;
+ }
+ }
+ }
+
+ ret = 0;
+out:
+ amd_pstate_set_mode(orig_mode);
+ return ret;
+}
+
static int __init amd_pstate_ut_init(void)
{
u32 i = 0, arr_size = ARRAY_SIZE(amd_pstate_ut_cases);
for (i = 0; i < arr_size; i++) {
- int ret = amd_pstate_ut_cases[i].func(i);
+ int ret;
+
+ if (test_list && *test_list &&
+ !test_in_list(test_list, amd_pstate_ut_cases[i].name))
+ continue;
+
+ ret = amd_pstate_ut_cases[i].func(i);
if (ret)
pr_err("%-4d %-20s\t fail: %d!\n", i+1, amd_pstate_ut_cases[i].name, ret);
diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
index 5aa9fcd80cf5..e2ae608e7043 100644
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -36,6 +36,7 @@
#include <linux/io.h>
#include <linux/delay.h>
#include <linux/uaccess.h>
+#include <linux/power_supply.h>
#include <linux/static_call.h>
#include <linux/topology.h>
@@ -86,6 +87,11 @@ static struct cpufreq_driver amd_pstate_driver;
static struct cpufreq_driver amd_pstate_epp_driver;
static int cppc_state = AMD_PSTATE_UNDEFINED;
static bool amd_pstate_prefcore = true;
+#ifdef CONFIG_X86_AMD_PSTATE_DYNAMIC_EPP
+static bool dynamic_epp = CONFIG_X86_AMD_PSTATE_DYNAMIC_EPP;
+#else
+static bool dynamic_epp;
+#endif
static struct quirk_entry *quirks;
/*
@@ -103,6 +109,7 @@ static struct quirk_entry *quirks;
* 2 balance_performance
* 3 balance_power
* 4 power
+ * 5 custom (for raw EPP values)
*/
enum energy_perf_value_index {
EPP_INDEX_DEFAULT = 0,
@@ -110,6 +117,7 @@ enum energy_perf_value_index {
EPP_INDEX_BALANCE_PERFORMANCE,
EPP_INDEX_BALANCE_POWERSAVE,
EPP_INDEX_POWERSAVE,
+ EPP_INDEX_CUSTOM,
EPP_INDEX_MAX,
};
@@ -119,6 +127,7 @@ static const char * const energy_perf_strings[] = {
[EPP_INDEX_BALANCE_PERFORMANCE] = "balance_performance",
[EPP_INDEX_BALANCE_POWERSAVE] = "balance_power",
[EPP_INDEX_POWERSAVE] = "power",
+ [EPP_INDEX_CUSTOM] = "custom",
};
static_assert(ARRAY_SIZE(energy_perf_strings) == EPP_INDEX_MAX);
@@ -129,7 +138,7 @@ static unsigned int epp_values[] = {
[EPP_INDEX_BALANCE_POWERSAVE] = AMD_CPPC_EPP_BALANCE_POWERSAVE,
[EPP_INDEX_POWERSAVE] = AMD_CPPC_EPP_POWERSAVE,
};
-static_assert(ARRAY_SIZE(epp_values) == EPP_INDEX_MAX);
+static_assert(ARRAY_SIZE(epp_values) == EPP_INDEX_MAX - 1);
typedef int (*cppc_mode_transition_fn)(int);
@@ -261,7 +270,6 @@ static int msr_update_perf(struct cpufreq_policy *policy, u8 min_perf,
if (fast_switch) {
wrmsrq(MSR_AMD_CPPC_REQ, value);
- return 0;
} else {
int ret = wrmsrq_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value);
@@ -330,6 +338,75 @@ static inline int amd_pstate_set_epp(struct cpufreq_policy *policy, u8 epp)
return static_call(amd_pstate_set_epp)(policy, epp);
}
+static int amd_pstate_set_floor_perf(struct cpufreq_policy *policy, u8 perf)
+{
+ struct amd_cpudata *cpudata = policy->driver_data;
+ u64 value, prev;
+ bool changed;
+ int ret;
+
+ if (!cpu_feature_enabled(X86_FEATURE_CPPC_PERF_PRIO))
+ return 0;
+
+ value = prev = READ_ONCE(cpudata->cppc_req2_cached);
+ FIELD_MODIFY(AMD_CPPC_FLOOR_PERF_MASK, &value, perf);
+
+ changed = value != prev;
+ if (!changed) {
+ ret = 0;
+ goto out_trace;
+ }
+
+ ret = wrmsrq_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ2, value);
+ if (ret) {
+ changed = false;
+ pr_err("failed to set CPPC REQ2 value. Error (%d)\n", ret);
+ goto out_trace;
+ }
+
+ WRITE_ONCE(cpudata->cppc_req2_cached, value);
+
+out_trace:
+ if (trace_amd_pstate_cppc_req2_enabled())
+ trace_amd_pstate_cppc_req2(cpudata->cpu, perf, changed, ret);
+ return ret;
+}
+
+static int amd_pstate_init_floor_perf(struct cpufreq_policy *policy)
+{
+ struct amd_cpudata *cpudata = policy->driver_data;
+ u8 floor_perf;
+ u64 value;
+ int ret;
+
+ if (!cpu_feature_enabled(X86_FEATURE_CPPC_PERF_PRIO))
+ return 0;
+
+ ret = rdmsrq_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ2, &value);
+ if (ret) {
+ pr_err("failed to read CPPC REQ2 value. Error (%d)\n", ret);
+ return ret;
+ }
+
+ WRITE_ONCE(cpudata->cppc_req2_cached, value);
+ floor_perf = FIELD_GET(AMD_CPPC_FLOOR_PERF_MASK,
+ cpudata->cppc_req2_cached);
+
+ /* Set a sane value for floor_perf if the default value is invalid */
+ if (floor_perf < cpudata->perf.lowest_perf) {
+ floor_perf = cpudata->perf.nominal_perf;
+ ret = amd_pstate_set_floor_perf(policy, floor_perf);
+ if (ret)
+ return ret;
+ }
+
+
+ cpudata->bios_floor_perf = floor_perf;
+ cpudata->floor_freq = perf_to_freq(cpudata->perf, cpudata->nominal_freq,
+ floor_perf);
+ return 0;
+}
+
static int shmem_set_epp(struct cpufreq_policy *policy, u8 epp)
{
struct amd_cpudata *cpudata = policy->driver_data;
@@ -427,6 +504,7 @@ static int msr_init_perf(struct amd_cpudata *cpudata)
perf.lowest_perf = FIELD_GET(AMD_CPPC_LOWEST_PERF_MASK, cap1);
WRITE_ONCE(cpudata->perf, perf);
WRITE_ONCE(cpudata->prefcore_ranking, FIELD_GET(AMD_CPPC_HIGHEST_PERF_MASK, cap1));
+ WRITE_ONCE(cpudata->floor_perf_cnt, FIELD_GET(AMD_CPPC_FLOOR_PERF_CNT_MASK, cap1));
return 0;
}
@@ -565,15 +643,12 @@ static inline bool amd_pstate_sample(struct amd_cpudata *cpudata)
return true;
}
-static void amd_pstate_update(struct amd_cpudata *cpudata, u8 min_perf,
+static void amd_pstate_update(struct cpufreq_policy *policy, u8 min_perf,
u8 des_perf, u8 max_perf, bool fast_switch, int gov_flags)
{
- struct cpufreq_policy *policy __free(put_cpufreq_policy) = cpufreq_cpu_get(cpudata->cpu);
+ struct amd_cpudata *cpudata = policy->driver_data;
union perf_cached perf = READ_ONCE(cpudata->perf);
- if (!policy)
- return;
-
/* limit the max perf when core performance boost feature is disabled */
if (!cpudata->boost_supported)
max_perf = min_t(u8, perf.nominal_perf, max_perf);
@@ -688,7 +763,7 @@ static int amd_pstate_update_freq(struct cpufreq_policy *policy,
if (!fast_switch)
cpufreq_freq_transition_begin(policy, &freqs);
- amd_pstate_update(cpudata, perf.min_limit_perf, des_perf,
+ amd_pstate_update(policy, perf.min_limit_perf, des_perf,
perf.max_limit_perf, fast_switch,
policy->governor->flags);
@@ -713,13 +788,12 @@ static unsigned int amd_pstate_fast_switch(struct cpufreq_policy *policy,
return policy->cur;
}
-static void amd_pstate_adjust_perf(unsigned int cpu,
+static void amd_pstate_adjust_perf(struct cpufreq_policy *policy,
unsigned long _min_perf,
unsigned long target_perf,
unsigned long capacity)
{
u8 max_perf, min_perf, des_perf, cap_perf;
- struct cpufreq_policy *policy __free(put_cpufreq_policy) = cpufreq_cpu_get(cpu);
struct amd_cpudata *cpudata;
union perf_cached perf;
@@ -750,27 +824,23 @@ static void amd_pstate_adjust_perf(unsigned int cpu,
if (max_perf < min_perf)
max_perf = min_perf;
- amd_pstate_update(cpudata, min_perf, des_perf, max_perf, true,
+ amd_pstate_update(policy, min_perf, des_perf, max_perf, true,
policy->governor->flags);
}
static int amd_pstate_cpu_boost_update(struct cpufreq_policy *policy, bool on)
{
struct amd_cpudata *cpudata = policy->driver_data;
- union perf_cached perf = READ_ONCE(cpudata->perf);
- u32 nominal_freq, max_freq;
+ u32 nominal_freq;
int ret = 0;
nominal_freq = READ_ONCE(cpudata->nominal_freq);
- max_freq = perf_to_freq(perf, cpudata->nominal_freq, perf.highest_perf);
if (on)
- policy->cpuinfo.max_freq = max_freq;
+ policy->cpuinfo.max_freq = cpudata->max_freq;
else if (policy->cpuinfo.max_freq > nominal_freq)
policy->cpuinfo.max_freq = nominal_freq;
- policy->max = policy->cpuinfo.max_freq;
-
if (cppc_state == AMD_PSTATE_PASSIVE) {
ret = freq_qos_update_request(&cpudata->req[1], policy->cpuinfo.max_freq);
if (ret < 0)
@@ -952,13 +1022,15 @@ static int amd_pstate_init_freq(struct amd_cpudata *cpudata)
WRITE_ONCE(cpudata->nominal_freq, nominal_freq);
+ /* max_freq is calculated according to (nominal_freq * highest_perf)/nominal_perf */
max_freq = perf_to_freq(perf, nominal_freq, perf.highest_perf);
+ WRITE_ONCE(cpudata->max_freq, max_freq);
+
lowest_nonlinear_freq = perf_to_freq(perf, nominal_freq, perf.lowest_nonlinear_perf);
WRITE_ONCE(cpudata->lowest_nonlinear_freq, lowest_nonlinear_freq);
/**
* Below values need to be initialized correctly, otherwise driver will fail to load
- * max_freq is calculated according to (nominal_freq * highest_perf)/nominal_perf
* lowest_nonlinear_freq is a value between [min_freq, nominal_freq]
* Check _CPC in ACPI table objects if any values are incorrect
*/
@@ -1021,10 +1093,9 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy)
policy->cpuinfo.min_freq = policy->min = perf_to_freq(perf,
cpudata->nominal_freq,
perf.lowest_perf);
- policy->cpuinfo.max_freq = policy->max = perf_to_freq(perf,
- cpudata->nominal_freq,
- perf.highest_perf);
+ policy->cpuinfo.max_freq = policy->max = cpudata->max_freq;
+ policy->driver_data = cpudata;
ret = amd_pstate_cppc_enable(policy);
if (ret)
goto free_cpudata1;
@@ -1037,6 +1108,12 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy)
if (cpu_feature_enabled(X86_FEATURE_CPPC))
policy->fast_switch_possible = true;
+ ret = amd_pstate_init_floor_perf(policy);
+ if (ret) {
+ dev_err(dev, "Failed to initialize Floor Perf (%d)\n", ret);
+ goto free_cpudata1;
+ }
+
ret = freq_qos_add_request(&policy->constraints, &cpudata->req[0],
FREQ_QOS_MIN, FREQ_QOS_MIN_DEFAULT_VALUE);
if (ret < 0) {
@@ -1051,7 +1128,6 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy)
goto free_cpudata2;
}
- policy->driver_data = cpudata;
if (!current_pstate_driver->adjust_perf)
current_pstate_driver->adjust_perf = amd_pstate_adjust_perf;
@@ -1063,6 +1139,7 @@ free_cpudata2:
free_cpudata1:
pr_warn("Failed to initialize CPU %d: %d\n", policy->cpu, ret);
kfree(cpudata);
+ policy->driver_data = NULL;
return ret;
}
@@ -1073,6 +1150,7 @@ static void amd_pstate_cpu_exit(struct cpufreq_policy *policy)
/* Reset CPPC_REQ MSR to the BIOS value */
amd_pstate_update_perf(policy, perf.bios_min_perf, 0U, 0U, 0U, false);
+ amd_pstate_set_floor_perf(policy, cpudata->bios_floor_perf);
freq_qos_remove_request(&cpudata->req[1]);
freq_qos_remove_request(&cpudata->req[0]);
@@ -1080,6 +1158,167 @@ static void amd_pstate_cpu_exit(struct cpufreq_policy *policy)
kfree(cpudata);
}
+static int amd_pstate_get_balanced_epp(struct cpufreq_policy *policy)
+{
+ struct amd_cpudata *cpudata = policy->driver_data;
+
+ if (power_supply_is_system_supplied())
+ return cpudata->epp_default_ac;
+ else
+ return cpudata->epp_default_dc;
+}
+
+static int amd_pstate_power_supply_notifier(struct notifier_block *nb,
+ unsigned long event, void *data)
+{
+ struct amd_cpudata *cpudata = container_of(nb, struct amd_cpudata, power_nb);
+ struct cpufreq_policy *policy __free(put_cpufreq_policy) = cpufreq_cpu_get(cpudata->cpu);
+ u8 epp;
+ int ret;
+
+ if (event != PSY_EVENT_PROP_CHANGED)
+ return NOTIFY_OK;
+
+ /* dynamic actions are only applied while platform profile is in balanced */
+ if (cpudata->current_profile != PLATFORM_PROFILE_BALANCED)
+ return 0;
+
+ epp = amd_pstate_get_balanced_epp(policy);
+
+ ret = amd_pstate_set_epp(policy, epp);
+ if (ret)
+ pr_warn("Failed to set CPU %d EPP %u: %d\n", cpudata->cpu, epp, ret);
+
+ return NOTIFY_OK;
+}
+
+static int amd_pstate_profile_probe(void *drvdata, unsigned long *choices)
+{
+ set_bit(PLATFORM_PROFILE_LOW_POWER, choices);
+ set_bit(PLATFORM_PROFILE_BALANCED, choices);
+ set_bit(PLATFORM_PROFILE_PERFORMANCE, choices);
+
+ return 0;
+}
+
+static int amd_pstate_profile_get(struct device *dev,
+ enum platform_profile_option *profile)
+{
+ struct amd_cpudata *cpudata = dev_get_drvdata(dev);
+
+ *profile = cpudata->current_profile;
+
+ return 0;
+}
+
+static int amd_pstate_profile_set(struct device *dev,
+ enum platform_profile_option profile)
+{
+ struct amd_cpudata *cpudata = dev_get_drvdata(dev);
+ struct cpufreq_policy *policy __free(put_cpufreq_policy) = cpufreq_cpu_get(cpudata->cpu);
+ int ret;
+
+ switch (profile) {
+ case PLATFORM_PROFILE_LOW_POWER:
+ ret = amd_pstate_set_epp(policy, AMD_CPPC_EPP_POWERSAVE);
+ if (ret)
+ return ret;
+ break;
+ case PLATFORM_PROFILE_BALANCED:
+ ret = amd_pstate_set_epp(policy,
+ amd_pstate_get_balanced_epp(policy));
+ if (ret)
+ return ret;
+ break;
+ case PLATFORM_PROFILE_PERFORMANCE:
+ ret = amd_pstate_set_epp(policy, AMD_CPPC_EPP_PERFORMANCE);
+ if (ret)
+ return ret;
+ break;
+ default:
+ pr_err("Unknown Platform Profile %d\n", profile);
+ return -EOPNOTSUPP;
+ }
+
+ cpudata->current_profile = profile;
+
+ return 0;
+}
+
+static const struct platform_profile_ops amd_pstate_profile_ops = {
+ .probe = amd_pstate_profile_probe,
+ .profile_set = amd_pstate_profile_set,
+ .profile_get = amd_pstate_profile_get,
+};
+
+void amd_pstate_clear_dynamic_epp(struct cpufreq_policy *policy)
+{
+ struct amd_cpudata *cpudata = policy->driver_data;
+
+ if (cpudata->power_nb.notifier_call)
+ power_supply_unreg_notifier(&cpudata->power_nb);
+ if (cpudata->ppdev) {
+ platform_profile_remove(cpudata->ppdev);
+ cpudata->ppdev = NULL;
+ }
+ kfree(cpudata->profile_name);
+ cpudata->dynamic_epp = false;
+}
+EXPORT_SYMBOL_GPL(amd_pstate_clear_dynamic_epp);
+
+static int amd_pstate_set_dynamic_epp(struct cpufreq_policy *policy)
+{
+ struct amd_cpudata *cpudata = policy->driver_data;
+ int ret;
+ u8 epp;
+
+ switch (cpudata->current_profile) {
+ case PLATFORM_PROFILE_PERFORMANCE:
+ epp = AMD_CPPC_EPP_PERFORMANCE;
+ break;
+ case PLATFORM_PROFILE_LOW_POWER:
+ epp = AMD_CPPC_EPP_POWERSAVE;
+ break;
+ case PLATFORM_PROFILE_BALANCED:
+ epp = amd_pstate_get_balanced_epp(policy);
+ break;
+ default:
+ pr_err("Unknown Platform Profile %d\n", cpudata->current_profile);
+ return -EOPNOTSUPP;
+ }
+ ret = amd_pstate_set_epp(policy, epp);
+ if (ret)
+ return ret;
+
+ cpudata->profile_name = kasprintf(GFP_KERNEL, "amd-pstate-epp-cpu%d", cpudata->cpu);
+
+ cpudata->ppdev = platform_profile_register(get_cpu_device(policy->cpu),
+ cpudata->profile_name,
+ policy->driver_data,
+ &amd_pstate_profile_ops);
+ if (IS_ERR(cpudata->ppdev)) {
+ ret = PTR_ERR(cpudata->ppdev);
+ goto cleanup;
+ }
+
+ /* only enable notifier if things will actually change */
+ if (cpudata->epp_default_ac != cpudata->epp_default_dc) {
+ cpudata->power_nb.notifier_call = amd_pstate_power_supply_notifier;
+ ret = power_supply_reg_notifier(&cpudata->power_nb);
+ if (ret)
+ goto cleanup;
+ }
+
+ cpudata->dynamic_epp = true;
+
+ return 0;
+
+cleanup:
+ amd_pstate_clear_dynamic_epp(policy);
+
+ return ret;
+}
+
/* Sysfs attributes */
/*
@@ -1090,14 +1329,9 @@ static void amd_pstate_cpu_exit(struct cpufreq_policy *policy)
static ssize_t show_amd_pstate_max_freq(struct cpufreq_policy *policy,
char *buf)
{
- struct amd_cpudata *cpudata;
- union perf_cached perf;
-
- cpudata = policy->driver_data;
- perf = READ_ONCE(cpudata->perf);
+ struct amd_cpudata *cpudata = policy->driver_data;
- return sysfs_emit(buf, "%u\n",
- perf_to_freq(perf, cpudata->nominal_freq, perf.highest_perf));
+ return sysfs_emit(buf, "%u\n", cpudata->max_freq);
}
static ssize_t show_amd_pstate_lowest_nonlinear_freq(struct cpufreq_policy *policy,
@@ -1167,40 +1401,60 @@ static ssize_t show_energy_performance_available_preferences(
return offset;
}
-static ssize_t store_energy_performance_preference(
- struct cpufreq_policy *policy, const char *buf, size_t count)
+ssize_t store_energy_performance_preference(struct cpufreq_policy *policy,
+ const char *buf, size_t count)
{
struct amd_cpudata *cpudata = policy->driver_data;
ssize_t ret;
+ bool raw_epp = false;
u8 epp;
- ret = sysfs_match_string(energy_perf_strings, buf);
- if (ret < 0)
- return -EINVAL;
+ if (cpudata->dynamic_epp) {
+ pr_debug("EPP cannot be set when dynamic EPP is enabled\n");
+ return -EBUSY;
+ }
- if (!ret)
- epp = cpudata->epp_default;
- else
- epp = epp_values[ret];
+ /*
+ * if the value matches a number, use that, otherwise see if
+ * matches an index in the energy_perf_strings array
+ */
+ ret = kstrtou8(buf, 0, &epp);
+ raw_epp = !ret;
+ if (ret) {
+ ret = sysfs_match_string(energy_perf_strings, buf);
+ if (ret < 0 || ret == EPP_INDEX_CUSTOM)
+ return -EINVAL;
+ if (ret)
+ epp = epp_values[ret];
+ else
+ epp = amd_pstate_get_balanced_epp(policy);
+ }
- if (epp > 0 && policy->policy == CPUFREQ_POLICY_PERFORMANCE) {
+ if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) {
pr_debug("EPP cannot be set under performance policy\n");
return -EBUSY;
}
ret = amd_pstate_set_epp(policy, epp);
+ if (ret)
+ return ret;
- return ret ? ret : count;
+ cpudata->raw_epp = raw_epp;
+
+ return count;
}
+EXPORT_SYMBOL_GPL(store_energy_performance_preference);
-static ssize_t show_energy_performance_preference(
- struct cpufreq_policy *policy, char *buf)
+ssize_t show_energy_performance_preference(struct cpufreq_policy *policy, char *buf)
{
struct amd_cpudata *cpudata = policy->driver_data;
u8 preference, epp;
epp = FIELD_GET(AMD_CPPC_EPP_PERF_MASK, cpudata->cppc_req_cached);
+ if (cpudata->raw_epp)
+ return sysfs_emit(buf, "%u\n", epp);
+
switch (epp) {
case AMD_CPPC_EPP_PERFORMANCE:
preference = EPP_INDEX_PERFORMANCE;
@@ -1220,6 +1474,138 @@ static ssize_t show_energy_performance_preference(
return sysfs_emit(buf, "%s\n", energy_perf_strings[preference]);
}
+EXPORT_SYMBOL_GPL(show_energy_performance_preference);
+
+static ssize_t store_amd_pstate_floor_freq(struct cpufreq_policy *policy,
+ const char *buf, size_t count)
+{
+ struct amd_cpudata *cpudata = policy->driver_data;
+ union perf_cached perf = READ_ONCE(cpudata->perf);
+ unsigned int freq;
+ u8 floor_perf;
+ int ret;
+
+ ret = kstrtouint(buf, 0, &freq);
+ if (ret)
+ return ret;
+
+ if (freq < policy->cpuinfo.min_freq || freq > policy->max)
+ return -EINVAL;
+
+ floor_perf = freq_to_perf(perf, cpudata->nominal_freq, freq);
+ ret = amd_pstate_set_floor_perf(policy, floor_perf);
+
+ if (!ret)
+ cpudata->floor_freq = freq;
+
+ return ret ?: count;
+}
+
+static ssize_t show_amd_pstate_floor_freq(struct cpufreq_policy *policy, char *buf)
+{
+ struct amd_cpudata *cpudata = policy->driver_data;
+
+ return sysfs_emit(buf, "%u\n", cpudata->floor_freq);
+}
+
+static ssize_t show_amd_pstate_floor_count(struct cpufreq_policy *policy, char *buf)
+{
+ struct amd_cpudata *cpudata = policy->driver_data;
+ u8 count = cpudata->floor_perf_cnt;
+
+ return sysfs_emit(buf, "%u\n", count);
+}
+
+cpufreq_freq_attr_ro(amd_pstate_max_freq);
+cpufreq_freq_attr_ro(amd_pstate_lowest_nonlinear_freq);
+
+cpufreq_freq_attr_ro(amd_pstate_highest_perf);
+cpufreq_freq_attr_ro(amd_pstate_prefcore_ranking);
+cpufreq_freq_attr_ro(amd_pstate_hw_prefcore);
+cpufreq_freq_attr_rw(energy_performance_preference);
+cpufreq_freq_attr_ro(energy_performance_available_preferences);
+cpufreq_freq_attr_rw(amd_pstate_floor_freq);
+cpufreq_freq_attr_ro(amd_pstate_floor_count);
+
+struct freq_attr_visibility {
+ struct freq_attr *attr;
+ bool (*visibility_fn)(void);
+};
+
+/* For attributes which are always visible */
+static bool always_visible(void)
+{
+ return true;
+}
+
+/* Determines whether prefcore related attributes should be visible */
+static bool prefcore_visibility(void)
+{
+ return amd_pstate_prefcore;
+}
+
+/* Determines whether energy performance preference should be visible */
+static bool epp_visibility(void)
+{
+ return cppc_state == AMD_PSTATE_ACTIVE;
+}
+
+/* Determines whether amd_pstate_floor_freq related attributes should be visible */
+static bool floor_freq_visibility(void)
+{
+ return cpu_feature_enabled(X86_FEATURE_CPPC_PERF_PRIO);
+}
+
+static struct freq_attr_visibility amd_pstate_attr_visibility[] = {
+ {&amd_pstate_max_freq, always_visible},
+ {&amd_pstate_lowest_nonlinear_freq, always_visible},
+ {&amd_pstate_highest_perf, always_visible},
+ {&amd_pstate_prefcore_ranking, prefcore_visibility},
+ {&amd_pstate_hw_prefcore, prefcore_visibility},
+ {&energy_performance_preference, epp_visibility},
+ {&energy_performance_available_preferences, epp_visibility},
+ {&amd_pstate_floor_freq, floor_freq_visibility},
+ {&amd_pstate_floor_count, floor_freq_visibility},
+};
+
+struct freq_attr **amd_pstate_get_current_attrs(void)
+{
+ if (!current_pstate_driver)
+ return NULL;
+ return current_pstate_driver->attr;
+}
+EXPORT_SYMBOL_GPL(amd_pstate_get_current_attrs);
+
+static struct freq_attr **get_freq_attrs(void)
+{
+ bool attr_visible[ARRAY_SIZE(amd_pstate_attr_visibility)];
+ struct freq_attr **attrs;
+ int i, j, count;
+
+ for (i = 0, count = 0; i < ARRAY_SIZE(amd_pstate_attr_visibility); i++) {
+ struct freq_attr_visibility *v = &amd_pstate_attr_visibility[i];
+
+ attr_visible[i] = v->visibility_fn();
+ if (attr_visible[i])
+ count++;
+ }
+
+ /* amd_pstate_{max_freq, lowest_nonlinear_freq, highest_perf} should always be visible */
+ BUG_ON(!count);
+
+ attrs = kcalloc(count + 1, sizeof(struct freq_attr *), GFP_KERNEL);
+ if (!attrs)
+ return ERR_PTR(-ENOMEM);
+
+ for (i = 0, j = 0; i < ARRAY_SIZE(amd_pstate_attr_visibility); i++) {
+ if (!attr_visible[i])
+ continue;
+
+ attrs[j++] = amd_pstate_attr_visibility[i].attr;
+ }
+
+ return attrs;
+}
static void amd_pstate_driver_cleanup(void)
{
@@ -1227,6 +1613,8 @@ static void amd_pstate_driver_cleanup(void)
sched_clear_itmt_support();
cppc_state = AMD_PSTATE_DISABLE;
+ kfree(current_pstate_driver->attr);
+ current_pstate_driver->attr = NULL;
current_pstate_driver = NULL;
}
@@ -1251,6 +1639,7 @@ static int amd_pstate_set_driver(int mode_idx)
static int amd_pstate_register_driver(int mode)
{
+ struct freq_attr **attr = NULL;
int ret;
ret = amd_pstate_set_driver(mode);
@@ -1259,6 +1648,22 @@ static int amd_pstate_register_driver(int mode)
cppc_state = mode;
+ /*
+ * Note: It is important to compute the attrs _after_
+ * re-initializing the cppc_state. Some attributes become
+ * visible only when cppc_state is AMD_PSTATE_ACTIVE.
+ */
+ attr = get_freq_attrs();
+ if (IS_ERR(attr)) {
+ ret = (int) PTR_ERR(attr);
+ pr_err("Couldn't compute freq_attrs for current mode %s [%d]\n",
+ amd_pstate_get_mode_string(cppc_state), ret);
+ amd_pstate_driver_cleanup();
+ return ret;
+ }
+
+ current_pstate_driver->attr = attr;
+
/* at least one CPU supports CPB */
current_pstate_driver->boost_enabled = cpu_feature_enabled(X86_FEATURE_CPB);
@@ -1400,40 +1805,42 @@ static ssize_t prefcore_show(struct device *dev,
return sysfs_emit(buf, "%s\n", str_enabled_disabled(amd_pstate_prefcore));
}
-cpufreq_freq_attr_ro(amd_pstate_max_freq);
-cpufreq_freq_attr_ro(amd_pstate_lowest_nonlinear_freq);
+static ssize_t dynamic_epp_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%s\n", str_enabled_disabled(dynamic_epp));
+}
-cpufreq_freq_attr_ro(amd_pstate_highest_perf);
-cpufreq_freq_attr_ro(amd_pstate_prefcore_ranking);
-cpufreq_freq_attr_ro(amd_pstate_hw_prefcore);
-cpufreq_freq_attr_rw(energy_performance_preference);
-cpufreq_freq_attr_ro(energy_performance_available_preferences);
-static DEVICE_ATTR_RW(status);
-static DEVICE_ATTR_RO(prefcore);
+static ssize_t dynamic_epp_store(struct device *a, struct device_attribute *b,
+ const char *buf, size_t count)
+{
+ bool enabled;
+ int ret;
-static struct freq_attr *amd_pstate_attr[] = {
- &amd_pstate_max_freq,
- &amd_pstate_lowest_nonlinear_freq,
- &amd_pstate_highest_perf,
- &amd_pstate_prefcore_ranking,
- &amd_pstate_hw_prefcore,
- NULL,
-};
+ ret = kstrtobool(buf, &enabled);
+ if (ret)
+ return ret;
-static struct freq_attr *amd_pstate_epp_attr[] = {
- &amd_pstate_max_freq,
- &amd_pstate_lowest_nonlinear_freq,
- &amd_pstate_highest_perf,
- &amd_pstate_prefcore_ranking,
- &amd_pstate_hw_prefcore,
- &energy_performance_preference,
- &energy_performance_available_preferences,
- NULL,
-};
+ if (dynamic_epp == enabled)
+ return -EINVAL;
+
+ /* reinitialize with desired dynamic EPP value */
+ dynamic_epp = enabled;
+ ret = amd_pstate_change_driver_mode(cppc_state);
+ if (ret)
+ dynamic_epp = false;
+
+ return ret ? ret : count;
+}
+
+static DEVICE_ATTR_RW(status);
+static DEVICE_ATTR_RO(prefcore);
+static DEVICE_ATTR_RW(dynamic_epp);
static struct attribute *pstate_global_attributes[] = {
&dev_attr_status.attr,
&dev_attr_prefcore.attr,
+ &dev_attr_dynamic_epp.attr,
NULL
};
@@ -1503,9 +1910,7 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
policy->cpuinfo.min_freq = policy->min = perf_to_freq(perf,
cpudata->nominal_freq,
perf.lowest_perf);
- policy->cpuinfo.max_freq = policy->max = perf_to_freq(perf,
- cpudata->nominal_freq,
- perf.highest_perf);
+ policy->cpuinfo.max_freq = policy->max = cpudata->max_freq;
policy->driver_data = cpudata;
ret = amd_pstate_cppc_enable(policy);
@@ -1525,15 +1930,27 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
if (amd_pstate_acpi_pm_profile_server() ||
amd_pstate_acpi_pm_profile_undefined()) {
policy->policy = CPUFREQ_POLICY_PERFORMANCE;
- cpudata->epp_default = amd_pstate_get_epp(cpudata);
+ cpudata->epp_default_ac = cpudata->epp_default_dc = amd_pstate_get_epp(cpudata);
+ cpudata->current_profile = PLATFORM_PROFILE_PERFORMANCE;
} else {
policy->policy = CPUFREQ_POLICY_POWERSAVE;
- cpudata->epp_default = AMD_CPPC_EPP_BALANCE_PERFORMANCE;
+ cpudata->epp_default_ac = AMD_CPPC_EPP_PERFORMANCE;
+ cpudata->epp_default_dc = AMD_CPPC_EPP_BALANCE_PERFORMANCE;
+ cpudata->current_profile = PLATFORM_PROFILE_BALANCED;
}
- ret = amd_pstate_set_epp(policy, cpudata->epp_default);
+ if (dynamic_epp)
+ ret = amd_pstate_set_dynamic_epp(policy);
+ else
+ ret = amd_pstate_set_epp(policy, amd_pstate_get_balanced_epp(policy));
if (ret)
- return ret;
+ goto free_cpudata1;
+
+ ret = amd_pstate_init_floor_perf(policy);
+ if (ret) {
+ dev_err(dev, "Failed to initialize Floor Perf (%d)\n", ret);
+ goto free_cpudata1;
+ }
current_pstate_driver->adjust_perf = NULL;
@@ -1542,6 +1959,7 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
free_cpudata1:
pr_warn("Failed to initialize CPU %d: %d\n", policy->cpu, ret);
kfree(cpudata);
+ policy->driver_data = NULL;
return ret;
}
@@ -1554,7 +1972,10 @@ static void amd_pstate_epp_cpu_exit(struct cpufreq_policy *policy)
/* Reset CPPC_REQ MSR to the BIOS value */
amd_pstate_update_perf(policy, perf.bios_min_perf, 0U, 0U, 0U, false);
+ amd_pstate_set_floor_perf(policy, cpudata->bios_floor_perf);
+ if (cpudata->dynamic_epp)
+ amd_pstate_clear_dynamic_epp(policy);
kfree(cpudata);
policy->driver_data = NULL;
}
@@ -1609,24 +2030,39 @@ static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy)
static int amd_pstate_cpu_online(struct cpufreq_policy *policy)
{
- return amd_pstate_cppc_enable(policy);
+ struct amd_cpudata *cpudata = policy->driver_data;
+ union perf_cached perf = READ_ONCE(cpudata->perf);
+ u8 cached_floor_perf;
+ int ret;
+
+ ret = amd_pstate_cppc_enable(policy);
+ if (ret)
+ return ret;
+
+ cached_floor_perf = freq_to_perf(perf, cpudata->nominal_freq, cpudata->floor_freq);
+ return amd_pstate_set_floor_perf(policy, cached_floor_perf);
}
static int amd_pstate_cpu_offline(struct cpufreq_policy *policy)
{
struct amd_cpudata *cpudata = policy->driver_data;
union perf_cached perf = READ_ONCE(cpudata->perf);
+ int ret;
/*
* Reset CPPC_REQ MSR to the BIOS value, this will allow us to retain the BIOS specified
* min_perf value across kexec reboots. If this CPU is just onlined normally after this, the
* limits, epp and desired perf will get reset to the cached values in cpudata struct
*/
- return amd_pstate_update_perf(policy, perf.bios_min_perf,
+ ret = amd_pstate_update_perf(policy, perf.bios_min_perf,
FIELD_GET(AMD_CPPC_DES_PERF_MASK, cpudata->cppc_req_cached),
FIELD_GET(AMD_CPPC_MAX_PERF_MASK, cpudata->cppc_req_cached),
FIELD_GET(AMD_CPPC_EPP_PERF_MASK, cpudata->cppc_req_cached),
false);
+ if (ret)
+ return ret;
+
+ return amd_pstate_set_floor_perf(policy, cpudata->bios_floor_perf);
}
static int amd_pstate_suspend(struct cpufreq_policy *policy)
@@ -1648,6 +2084,10 @@ static int amd_pstate_suspend(struct cpufreq_policy *policy)
if (ret)
return ret;
+ ret = amd_pstate_set_floor_perf(policy, cpudata->bios_floor_perf);
+ if (ret)
+ return ret;
+
/* set this flag to avoid setting core offline*/
cpudata->suspended = true;
@@ -1659,15 +2099,24 @@ static int amd_pstate_resume(struct cpufreq_policy *policy)
struct amd_cpudata *cpudata = policy->driver_data;
union perf_cached perf = READ_ONCE(cpudata->perf);
int cur_perf = freq_to_perf(perf, cpudata->nominal_freq, policy->cur);
+ u8 cached_floor_perf;
+ int ret;
/* Set CPPC_REQ to last sane value until the governor updates it */
- return amd_pstate_update_perf(policy, perf.min_limit_perf, cur_perf, perf.max_limit_perf,
- 0U, false);
+ ret = amd_pstate_update_perf(policy, perf.min_limit_perf, cur_perf, perf.max_limit_perf,
+ 0U, false);
+ if (ret)
+ return ret;
+
+ cached_floor_perf = freq_to_perf(perf, cpudata->nominal_freq, cpudata->floor_freq);
+ return amd_pstate_set_floor_perf(policy, cached_floor_perf);
}
static int amd_pstate_epp_resume(struct cpufreq_policy *policy)
{
struct amd_cpudata *cpudata = policy->driver_data;
+ union perf_cached perf = READ_ONCE(cpudata->perf);
+ u8 cached_floor_perf;
if (cpudata->suspended) {
int ret;
@@ -1680,7 +2129,8 @@ static int amd_pstate_epp_resume(struct cpufreq_policy *policy)
cpudata->suspended = false;
}
- return 0;
+ cached_floor_perf = freq_to_perf(perf, cpudata->nominal_freq, cpudata->floor_freq);
+ return amd_pstate_set_floor_perf(policy, cached_floor_perf);
}
static struct cpufreq_driver amd_pstate_driver = {
@@ -1697,7 +2147,6 @@ static struct cpufreq_driver amd_pstate_driver = {
.set_boost = amd_pstate_set_boost,
.update_limits = amd_pstate_update_limits,
.name = "amd-pstate",
- .attr = amd_pstate_attr,
};
static struct cpufreq_driver amd_pstate_epp_driver = {
@@ -1713,7 +2162,6 @@ static struct cpufreq_driver amd_pstate_epp_driver = {
.update_limits = amd_pstate_update_limits,
.set_boost = amd_pstate_set_boost,
.name = "amd-pstate-epp",
- .attr = amd_pstate_epp_attr,
};
/*
@@ -1859,7 +2307,7 @@ static int __init amd_pstate_init(void)
return ret;
global_attr_free:
- cpufreq_unregister_driver(current_pstate_driver);
+ amd_pstate_unregister_driver(0);
return ret;
}
device_initcall(amd_pstate_init);
@@ -1886,8 +2334,19 @@ static int __init amd_prefcore_param(char *str)
return 0;
}
+static int __init amd_dynamic_epp_param(char *str)
+{
+ if (!strcmp(str, "disable"))
+ dynamic_epp = false;
+ if (!strcmp(str, "enable"))
+ dynamic_epp = true;
+
+ return 0;
+}
+
early_param("amd_pstate", amd_pstate_param);
early_param("amd_prefcore", amd_prefcore_param);
+early_param("amd_dynamic_epp", amd_dynamic_epp_param);
MODULE_AUTHOR("Huang Rui <ray.huang@amd.com>");
MODULE_DESCRIPTION("AMD Processor P-state Frequency Driver");
diff --git a/drivers/cpufreq/amd-pstate.h b/drivers/cpufreq/amd-pstate.h
index cb45fdca27a6..e4722e54387b 100644
--- a/drivers/cpufreq/amd-pstate.h
+++ b/drivers/cpufreq/amd-pstate.h
@@ -9,6 +9,7 @@
#define _LINUX_AMD_PSTATE_H
#include <linux/pm_qos.h>
+#include <linux/platform_profile.h>
/*********************************************************************
* AMD P-state INTERFACE *
@@ -62,13 +63,20 @@ struct amd_aperf_mperf {
* @cpu: CPU number
* @req: constraint request to apply
* @cppc_req_cached: cached performance request hints
+ * @cppc_req2_cached: cached value of MSR_AMD_CPPC_REQ2
* @perf: cached performance-related data
* @prefcore_ranking: the preferred core ranking, the higher value indicates a higher
* priority.
+ * @floor_perf_cnt: Cached value of the number of distinct floor
+ * performance levels supported
+ * @bios_floor_perf: Cached value of the boot-time floor performance level from
+ * MSR_AMD_CPPC_REQ2
* @min_limit_freq: Cached value of policy->min (in khz)
* @max_limit_freq: Cached value of policy->max (in khz)
* @nominal_freq: the frequency (in khz) that mapped to nominal_perf
+ * @max_freq: in ideal conditions the maximum frequency (in khz) possible frequency
* @lowest_nonlinear_freq: the frequency (in khz) that mapped to lowest_nonlinear_perf
+ * @floor_freq: Cached value of the user requested floor_freq
* @cur: Difference of Aperf/Mperf/tsc count between last and current sample
* @prev: Last Aperf/Mperf/tsc count value read from register
* @freq: current cpu frequency value (in khz)
@@ -78,6 +86,11 @@ struct amd_aperf_mperf {
* AMD P-State driver supports preferred core featue.
* @epp_cached: Cached CPPC energy-performance preference value
* @policy: Cpufreq policy value
+ * @suspended: If CPU core if offlined
+ * @epp_default_ac: Default EPP value for AC power source
+ * @epp_default_dc: Default EPP value for DC power source
+ * @dynamic_epp: Whether dynamic EPP is enabled
+ * @power_nb: Notifier block for power events
*
* The amd_cpudata is key private data for each CPU thread in AMD P-State, and
* represents all the attributes and goals that AMD P-State requests at runtime.
@@ -87,14 +100,19 @@ struct amd_cpudata {
struct freq_qos_request req[2];
u64 cppc_req_cached;
+ u64 cppc_req2_cached;
union perf_cached perf;
u8 prefcore_ranking;
+ u8 floor_perf_cnt;
+ u8 bios_floor_perf;
u32 min_limit_freq;
u32 max_limit_freq;
u32 nominal_freq;
+ u32 max_freq;
u32 lowest_nonlinear_freq;
+ u32 floor_freq;
struct amd_aperf_mperf cur;
struct amd_aperf_mperf prev;
@@ -106,7 +124,16 @@ struct amd_cpudata {
/* EPP feature related attributes*/
u32 policy;
bool suspended;
- u8 epp_default;
+ u8 epp_default_ac;
+ u8 epp_default_dc;
+ bool dynamic_epp;
+ bool raw_epp;
+ struct notifier_block power_nb;
+
+ /* platform profile */
+ enum platform_profile_option current_profile;
+ struct device *ppdev;
+ char *profile_name;
};
/*
@@ -123,5 +150,13 @@ enum amd_pstate_mode {
const char *amd_pstate_get_mode_string(enum amd_pstate_mode mode);
int amd_pstate_get_status(void);
int amd_pstate_update_status(const char *buf, size_t size);
+ssize_t store_energy_performance_preference(struct cpufreq_policy *policy,
+ const char *buf, size_t count);
+ssize_t show_energy_performance_preference(struct cpufreq_policy *policy, char *buf);
+void amd_pstate_clear_dynamic_epp(struct cpufreq_policy *policy);
+
+struct freq_attr;
+
+struct freq_attr **amd_pstate_get_current_attrs(void);
#endif /* _LINUX_AMD_PSTATE_H */
diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c
index 5dfb109cf1f4..7e7f9dfb7a24 100644
--- a/drivers/cpufreq/cppc_cpufreq.c
+++ b/drivers/cpufreq/cppc_cpufreq.c
@@ -834,17 +834,11 @@ static int cppc_cpufreq_set_boost(struct cpufreq_policy *policy, int state)
{
struct cppc_cpudata *cpu_data = policy->driver_data;
struct cppc_perf_caps *caps = &cpu_data->perf_caps;
- int ret;
if (state)
- policy->max = cppc_perf_to_khz(caps, caps->highest_perf);
+ policy->cpuinfo.max_freq = cppc_perf_to_khz(caps, caps->highest_perf);
else
- policy->max = cppc_perf_to_khz(caps, caps->nominal_perf);
- policy->cpuinfo.max_freq = policy->max;
-
- ret = freq_qos_update_request(policy->max_freq_req, policy->max);
- if (ret < 0)
- return ret;
+ policy->cpuinfo.max_freq = cppc_perf_to_khz(caps, caps->nominal_perf);
return 0;
}
diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c
index 25fd3b191b7e..ff1204c666b1 100644
--- a/drivers/cpufreq/cpufreq-dt-platdev.c
+++ b/drivers/cpufreq/cpufreq-dt-platdev.c
@@ -159,6 +159,7 @@ static const struct of_device_id blocklist[] __initconst = {
{ .compatible = "qcom,qcm2290", },
{ .compatible = "qcom,qcm6490", },
{ .compatible = "qcom,qcs404", },
+ { .compatible = "qcom,qcs8300", },
{ .compatible = "qcom,qdu1000", },
{ .compatible = "qcom,sa8155p" },
{ .compatible = "qcom,sa8540p" },
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 1f794524a1d9..3c5b7fe52cd7 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -609,10 +609,19 @@ static int policy_set_boost(struct cpufreq_policy *policy, bool enable)
policy->boost_enabled = enable;
ret = cpufreq_driver->set_boost(policy, enable);
- if (ret)
+ if (ret) {
policy->boost_enabled = !policy->boost_enabled;
+ return ret;
+ }
- return ret;
+ ret = freq_qos_update_request(&policy->boost_freq_req, policy->cpuinfo.max_freq);
+ if (ret < 0) {
+ policy->boost_enabled = !policy->boost_enabled;
+ cpufreq_driver->set_boost(policy, policy->boost_enabled);
+ return ret;
+ }
+
+ return 0;
}
static ssize_t store_local_boost(struct cpufreq_policy *policy,
@@ -760,7 +769,7 @@ static ssize_t store_##file_name \
if (ret) \
return ret; \
\
- ret = freq_qos_update_request(policy->object##_freq_req, val);\
+ ret = freq_qos_update_request(&policy->object##_freq_req, val); \
return ret >= 0 ? count : ret; \
}
@@ -1365,7 +1374,7 @@ static void cpufreq_policy_free(struct cpufreq_policy *policy)
/* Cancel any pending policy->update work before freeing the policy. */
cancel_work_sync(&policy->update);
- if (policy->max_freq_req) {
+ if (freq_qos_request_active(&policy->max_freq_req)) {
/*
* Remove max_freq_req after sending CPUFREQ_REMOVE_POLICY
* notification, since CPUFREQ_CREATE_POLICY notification was
@@ -1373,11 +1382,13 @@ static void cpufreq_policy_free(struct cpufreq_policy *policy)
*/
blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
CPUFREQ_REMOVE_POLICY, policy);
- freq_qos_remove_request(policy->max_freq_req);
+ freq_qos_remove_request(&policy->max_freq_req);
}
- freq_qos_remove_request(policy->min_freq_req);
- kfree(policy->min_freq_req);
+ if (freq_qos_request_active(&policy->min_freq_req))
+ freq_qos_remove_request(&policy->min_freq_req);
+ if (freq_qos_request_active(&policy->boost_freq_req))
+ freq_qos_remove_request(&policy->boost_freq_req);
cpufreq_policy_put_kobj(policy);
free_cpumask_var(policy->real_cpus);
@@ -1447,47 +1458,29 @@ static int cpufreq_policy_online(struct cpufreq_policy *policy,
add_cpu_dev_symlink(policy, j, get_cpu_device(j));
}
- policy->min_freq_req = kzalloc(2 * sizeof(*policy->min_freq_req),
- GFP_KERNEL);
- if (!policy->min_freq_req) {
- ret = -ENOMEM;
- goto out_destroy_policy;
+ if (policy->boost_supported) {
+ ret = freq_qos_add_request(&policy->constraints,
+ &policy->boost_freq_req,
+ FREQ_QOS_MAX,
+ policy->cpuinfo.max_freq);
+ if (ret < 0)
+ goto out_destroy_policy;
}
ret = freq_qos_add_request(&policy->constraints,
- policy->min_freq_req, FREQ_QOS_MIN,
+ &policy->min_freq_req, FREQ_QOS_MIN,
FREQ_QOS_MIN_DEFAULT_VALUE);
- if (ret < 0) {
- /*
- * So we don't call freq_qos_remove_request() for an
- * uninitialized request.
- */
- kfree(policy->min_freq_req);
- policy->min_freq_req = NULL;
+ if (ret < 0)
goto out_destroy_policy;
- }
-
- /*
- * This must be initialized right here to avoid calling
- * freq_qos_remove_request() on uninitialized request in case
- * of errors.
- */
- policy->max_freq_req = policy->min_freq_req + 1;
ret = freq_qos_add_request(&policy->constraints,
- policy->max_freq_req, FREQ_QOS_MAX,
+ &policy->max_freq_req, FREQ_QOS_MAX,
FREQ_QOS_MAX_DEFAULT_VALUE);
- if (ret < 0) {
- policy->max_freq_req = NULL;
+ if (ret < 0)
goto out_destroy_policy;
- }
blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
CPUFREQ_CREATE_POLICY, policy);
- } else {
- ret = freq_qos_update_request(policy->max_freq_req, policy->max);
- if (ret < 0)
- goto out_destroy_policy;
}
if (cpufreq_driver->get && has_target()) {
@@ -2228,7 +2221,7 @@ EXPORT_SYMBOL_GPL(cpufreq_driver_fast_switch);
/**
* cpufreq_driver_adjust_perf - Adjust CPU performance level in one go.
- * @cpu: Target CPU.
+ * @policy: cpufreq policy object of the target CPU.
* @min_perf: Minimum (required) performance level (units of @capacity).
* @target_perf: Target (desired) performance level (units of @capacity).
* @capacity: Capacity of the target CPU.
@@ -2247,12 +2240,12 @@ EXPORT_SYMBOL_GPL(cpufreq_driver_fast_switch);
* parallel with either ->target() or ->target_index() or ->fast_switch() for
* the same CPU.
*/
-void cpufreq_driver_adjust_perf(unsigned int cpu,
+void cpufreq_driver_adjust_perf(struct cpufreq_policy *policy,
unsigned long min_perf,
unsigned long target_perf,
unsigned long capacity)
{
- cpufreq_driver->adjust_perf(cpu, min_perf, target_perf, capacity);
+ cpufreq_driver->adjust_perf(policy, min_perf, target_perf, capacity);
}
/**
@@ -2364,8 +2357,8 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy,
target_freq = __resolve_freq(policy, target_freq, policy->min,
policy->max, relation);
- pr_debug("target for CPU %u: %u kHz, relation %u, requested %u kHz\n",
- policy->cpu, target_freq, relation, old_target_freq);
+ pr_debug("CPU %u: cur %u kHz -> target %u kHz (req %u kHz, rel %u)\n",
+ policy->cpu, policy->cur, target_freq, old_target_freq, relation);
/*
* This might look like a redundant call as we are checking it again
@@ -2789,16 +2782,10 @@ int cpufreq_boost_set_sw(struct cpufreq_policy *policy, int state)
return -ENXIO;
ret = cpufreq_frequency_table_cpuinfo(policy);
- if (ret) {
+ if (ret)
pr_err("%s: Policy frequency update failed\n", __func__);
- return ret;
- }
-
- ret = freq_qos_update_request(policy->max_freq_req, policy->max);
- if (ret < 0)
- return ret;
- return 0;
+ return ret;
}
EXPORT_SYMBOL_GPL(cpufreq_boost_set_sw);
diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h
index 1462d59277bd..73b8ed7cfaae 100644
--- a/drivers/cpufreq/cpufreq_governor.h
+++ b/drivers/cpufreq/cpufreq_governor.h
@@ -21,6 +21,7 @@
#include <linux/kernel_stat.h>
#include <linux/module.h>
#include <linux/mutex.h>
+#include <linux/sysfs.h>
/* Ondemand Sampling types */
enum {OD_NORMAL_SAMPLE, OD_SUB_SAMPLE};
@@ -57,7 +58,7 @@ static ssize_t file_name##_show \
{ \
struct dbs_data *dbs_data = to_dbs_data(attr_set); \
struct _gov##_dbs_tuners *tuners = dbs_data->tuners; \
- return sprintf(buf, "%u\n", tuners->file_name); \
+ return sysfs_emit(buf, "%u\n", tuners->file_name); \
}
#define gov_show_one_common(file_name) \
@@ -65,7 +66,7 @@ static ssize_t file_name##_show \
(struct gov_attr_set *attr_set, char *buf) \
{ \
struct dbs_data *dbs_data = to_dbs_data(attr_set); \
- return sprintf(buf, "%u\n", dbs_data->file_name); \
+ return sysfs_emit(buf, "%u\n", dbs_data->file_name); \
}
#define gov_attr_ro(_name) \
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 11c58af41900..1552b2d32a34 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -3239,12 +3239,12 @@ static unsigned int intel_cpufreq_fast_switch(struct cpufreq_policy *policy,
return target_pstate * cpu->pstate.scaling;
}
-static void intel_cpufreq_adjust_perf(unsigned int cpunum,
+static void intel_cpufreq_adjust_perf(struct cpufreq_policy *policy,
unsigned long min_perf,
unsigned long target_perf,
unsigned long capacity)
{
- struct cpudata *cpu = all_cpu_data[cpunum];
+ struct cpudata *cpu = all_cpu_data[policy->cpu];
u64 hwp_cap = READ_ONCE(cpu->hwp_cap_cached);
int old_pstate = cpu->pstate.current_pstate;
int cap_pstate, min_pstate, max_pstate, target_pstate;
@@ -3472,7 +3472,7 @@ static int intel_pstate_update_status(const char *buf, size_t size)
{
if (size == 3 && !strncmp(buf, "off", size)) {
if (!intel_pstate_driver)
- return -EINVAL;
+ return 0;
if (hwp_active)
return -EBUSY;
diff --git a/drivers/cpufreq/tegra194-cpufreq.c b/drivers/cpufreq/tegra194-cpufreq.c
index 7a41cfc71a46..c6375e14d445 100644
--- a/drivers/cpufreq/tegra194-cpufreq.c
+++ b/drivers/cpufreq/tegra194-cpufreq.c
@@ -196,7 +196,7 @@ static const struct tegra_cpufreq_soc tegra234_cpufreq_soc = {
.refclk_delta_min = 16000,
};
-static const struct tegra_cpufreq_soc tegra239_cpufreq_soc = {
+static const struct tegra_cpufreq_soc tegra238_cpufreq_soc = {
.ops = &tegra234_cpufreq_ops,
.actmon_cntr_base = 0x4000,
.maxcpus_per_cluster = 8,
@@ -807,7 +807,7 @@ static void tegra194_cpufreq_remove(struct platform_device *pdev)
static const struct of_device_id tegra194_cpufreq_of_match[] = {
{ .compatible = "nvidia,tegra194-ccplex", .data = &tegra194_cpufreq_soc },
{ .compatible = "nvidia,tegra234-ccplex-cluster", .data = &tegra234_cpufreq_soc },
- { .compatible = "nvidia,tegra239-ccplex-cluster", .data = &tegra239_cpufreq_soc },
+ { .compatible = "nvidia,tegra238-ccplex-cluster", .data = &tegra238_cpufreq_soc },
{ /* sentinel */ }
};
MODULE_DEVICE_TABLE(of, tegra194_cpufreq_of_match);
diff --git a/drivers/cpuidle/Kconfig b/drivers/cpuidle/Kconfig
index cac5997dca50..d6d8386d3f02 100644
--- a/drivers/cpuidle/Kconfig
+++ b/drivers/cpuidle/Kconfig
@@ -81,7 +81,7 @@ config HALTPOLL_CPUIDLE
before halting in the guest (more efficient than polling in the
host via halt_poll_ns for some scenarios).
-endif
+endif # CPU_IDLE
config ARCH_NEEDS_CPU_IDLE_COUPLED
def_bool n
diff --git a/drivers/cpuidle/Kconfig.mips b/drivers/cpuidle/Kconfig.mips
index c3c011af4a35..88728b2b4ea0 100644
--- a/drivers/cpuidle/Kconfig.mips
+++ b/drivers/cpuidle/Kconfig.mips
@@ -4,7 +4,7 @@
#
config MIPS_CPS_CPUIDLE
bool "CPU Idle driver for MIPS CPS platforms"
- depends on CPU_IDLE && MIPS_CPS
+ depends on MIPS_CPS
depends on SYS_SUPPORTS_MIPS_CPS
select ARCH_NEEDS_CPU_IDLE_COUPLED if MIPS_MT || CPU_MIPSR6
select GENERIC_CLOCKEVENTS_BROADCAST if SMP
diff --git a/drivers/cpuidle/Kconfig.powerpc b/drivers/cpuidle/Kconfig.powerpc
index a797a02b7b6f..1931ac8faffb 100644
--- a/drivers/cpuidle/Kconfig.powerpc
+++ b/drivers/cpuidle/Kconfig.powerpc
@@ -4,7 +4,6 @@
#
config PSERIES_CPUIDLE
bool "Cpuidle driver for pSeries platforms"
- depends on CPU_IDLE
depends on PPC_PSERIES
default y
help
@@ -13,7 +12,6 @@ config PSERIES_CPUIDLE
config POWERNV_CPUIDLE
bool "Cpuidle driver for powernv platforms"
- depends on CPU_IDLE
depends on PPC_POWERNV
default y
help
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index 1a55542efead..2d2f40a2cb81 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -679,16 +679,16 @@ int cpuidle_register_device(struct cpuidle_device *dev)
if (!dev)
return -EINVAL;
- mutex_lock(&cpuidle_lock);
+ guard(mutex)(&cpuidle_lock);
if (dev->registered)
- goto out_unlock;
+ return ret;
__cpuidle_device_init(dev);
ret = __cpuidle_register_device(dev);
if (ret)
- goto out_unlock;
+ return ret;
ret = cpuidle_add_sysfs(dev);
if (ret)
@@ -700,16 +700,14 @@ int cpuidle_register_device(struct cpuidle_device *dev)
cpuidle_install_idle_handler();
-out_unlock:
- mutex_unlock(&cpuidle_lock);
-
return ret;
out_sysfs:
cpuidle_remove_sysfs(dev);
out_unregister:
__cpuidle_unregister_device(dev);
- goto out_unlock;
+
+ return ret;
}
EXPORT_SYMBOL_GPL(cpuidle_register_device);
diff --git a/drivers/cpuidle/governors/gov.h b/drivers/cpuidle/governors/gov.h
index 99e067d9668c..cd06a2e7b506 100644
--- a/drivers/cpuidle/governors/gov.h
+++ b/drivers/cpuidle/governors/gov.h
@@ -10,5 +10,10 @@
* check the time till the closest expected timer event.
*/
#define RESIDENCY_THRESHOLD_NS (15 * NSEC_PER_USEC)
+/*
+ * If the closest timer is in this range, the governor idle state selection need
+ * not be adjusted after the scheduler tick has been stopped.
+ */
+#define SAFE_TIMER_RANGE_NS (2 * TICK_NSEC)
#endif /* __CPUIDLE_GOVERNOR_H */
diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index 899ff16ff1fe..544a5d593007 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -261,13 +261,16 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
predicted_ns = min((u64)timer_us * NSEC_PER_USEC, predicted_ns);
/*
* If the tick is already stopped, the cost of possible short
- * idle duration misprediction is much higher, because the CPU
- * may be stuck in a shallow idle state for a long time as a
- * result of it. In that case, say we might mispredict and use
- * the known time till the closest timer event for the idle
- * state selection.
+ * idle duration misprediction is higher because the CPU may get
+ * stuck in a shallow idle state then. To avoid that, if
+ * predicted_ns is small enough, say it might be mispredicted
+ * and use the known time till the closest timer for idle state
+ * selection unless that timer is going to trigger within
+ * SAFE_TIMER_RANGE_NS in which case it can be regarded as a
+ * sufficient safety net.
*/
- if (tick_nohz_tick_stopped() && predicted_ns < TICK_NSEC)
+ if (tick_nohz_tick_stopped() && predicted_ns < TICK_NSEC &&
+ data->next_timer_ns > SAFE_TIMER_RANGE_NS)
predicted_ns = data->next_timer_ns;
} else {
/*
diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c
index bec0142377b8..ac43b9b013b3 100644
--- a/drivers/cpuidle/governors/teo.c
+++ b/drivers/cpuidle/governors/teo.c
@@ -407,50 +407,13 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
* better choice.
*/
if (2 * idx_intercept_sum > cpu_data->total - idx_hit_sum) {
- int min_idx = idx0;
-
- if (tick_nohz_tick_stopped()) {
- /*
- * Look for the shallowest idle state below the current
- * candidate one whose target residency is at least
- * equal to the tick period length.
- */
- while (min_idx < idx &&
- drv->states[min_idx].target_residency_ns < TICK_NSEC)
- min_idx++;
-
- /*
- * Avoid selecting a state with a lower index, but with
- * the same target residency as the current candidate
- * one.
- */
- if (drv->states[min_idx].target_residency_ns ==
- drv->states[idx].target_residency_ns)
- goto constraint;
- }
-
- /*
- * If the minimum state index is greater than or equal to the
- * index of the state with the maximum intercepts metric and
- * the corresponding state is enabled, there is no need to look
- * at the deeper states.
- */
- if (min_idx >= intercept_max_idx &&
- !dev->states_usage[min_idx].disable) {
- idx = min_idx;
- goto constraint;
- }
-
/*
* Look for the deepest enabled idle state, at most as deep as
* the one with the maximum intercepts metric, whose target
* residency had not been greater than the idle duration in over
* a half of the relevant cases in the past.
- *
- * Take the possible duration limitation present if the tick
- * has been stopped already into account.
*/
- for (i = idx - 1, intercept_sum = 0; i >= min_idx; i--) {
+ for (i = idx - 1, intercept_sum = 0; i >= idx0; i--) {
intercept_sum += cpu_data->state_bins[i].intercepts;
if (dev->states_usage[i].disable)
@@ -463,7 +426,6 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
}
}
-constraint:
/*
* If there is a latency constraint, it may be necessary to select an
* idle state shallower than the current candidate one.
@@ -472,13 +434,13 @@ constraint:
idx = constraint_idx;
/*
- * If either the candidate state is state 0 or its target residency is
- * low enough, there is basically nothing more to do, but if the sleep
- * length is not updated, the subsequent wakeup will be counted as an
- * "intercept" which may be problematic in the cases when timer wakeups
- * are dominant. Namely, it may effectively prevent deeper idle states
- * from being selected at one point even if no imminent timers are
- * scheduled.
+ * If the tick has not been stopped and either the candidate state is
+ * state 0 or its target residency is low enough, there is basically
+ * nothing more to do, but if the sleep length is not updated, the
+ * subsequent wakeup will be counted as an "intercept". That may be
+ * problematic in the cases when timer wakeups are dominant because it
+ * may effectively prevent deeper idle states from being selected at one
+ * point even if no imminent timers are scheduled.
*
* However, frequent timers in the RESIDENCY_THRESHOLD_NS range on one
* CPU are unlikely (user space has a default 50 us slack value for
@@ -494,7 +456,8 @@ constraint:
* shallow idle states regardless of the wakeup type, so the sleep
* length need not be known in that case.
*/
- if ((!idx || drv->states[idx].target_residency_ns < RESIDENCY_THRESHOLD_NS) &&
+ if (!tick_nohz_tick_stopped() && (!idx ||
+ drv->states[idx].target_residency_ns < RESIDENCY_THRESHOLD_NS) &&
(2 * cpu_data->short_idles >= cpu_data->total ||
latency_req < LATENCY_THRESHOLD_NS))
goto out_tick;
@@ -502,6 +465,30 @@ constraint:
duration_ns = tick_nohz_get_sleep_length(&delta_tick);
cpu_data->sleep_length_ns = duration_ns;
+ /*
+ * If the tick has been stopped and the closest timer is too far away,
+ * update the selection to prevent the CPU from getting stuck in a
+ * shallow idle state for too long.
+ */
+ if (tick_nohz_tick_stopped() && duration_ns > SAFE_TIMER_RANGE_NS &&
+ drv->states[idx].target_residency_ns < TICK_NSEC) {
+ /*
+ * Look for the deepest enabled idle state with exit latency
+ * within the PM QoS limit and with target residency within
+ * duration_ns.
+ */
+ for (i = constraint_idx; i > idx; i--) {
+ if (dev->states_usage[i].disable)
+ continue;
+
+ if (drv->states[i].target_residency_ns <= duration_ns) {
+ idx = i;
+ break;
+ }
+ }
+ return idx;
+ }
+
if (!idx)
goto out_tick;
diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
index c0a74091b904..82dd9a43dc62 100644
--- a/drivers/devfreq/devfreq.c
+++ b/drivers/devfreq/devfreq.c
@@ -38,6 +38,7 @@
static struct class *devfreq_class;
static struct dentry *devfreq_debugfs;
+static const struct attribute_group gov_attr_group;
/*
* devfreq core provides delayed work based load monitoring helper
@@ -146,10 +147,9 @@ void devfreq_get_freq_range(struct devfreq *devfreq,
DEV_PM_QOS_MIN_FREQUENCY);
qos_max_freq = dev_pm_qos_read_value(devfreq->dev.parent,
DEV_PM_QOS_MAX_FREQUENCY);
- *min_freq = max(*min_freq, (unsigned long)HZ_PER_KHZ * qos_min_freq);
+ *min_freq = max(*min_freq, HZ_PER_KHZ * qos_min_freq);
if (qos_max_freq != PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE)
- *max_freq = min(*max_freq,
- (unsigned long)HZ_PER_KHZ * qos_max_freq);
+ *max_freq = min(*max_freq, HZ_PER_KHZ * qos_max_freq);
/* Apply constraints from OPP interface */
*max_freq = clamp(*max_freq, devfreq->scaling_min_freq, devfreq->scaling_max_freq);
@@ -785,11 +785,6 @@ static void devfreq_dev_release(struct device *dev)
kfree(devfreq);
}
-static void create_sysfs_files(struct devfreq *devfreq,
- const struct devfreq_governor *gov);
-static void remove_sysfs_files(struct devfreq *devfreq,
- const struct devfreq_governor *gov);
-
/**
* devfreq_add_device() - Add devfreq feature to the device
* @dev: the device to add devfreq feature.
@@ -956,7 +951,10 @@ struct devfreq *devfreq_add_device(struct device *dev,
__func__);
goto err_init;
}
- create_sysfs_files(devfreq, devfreq->governor);
+
+ err = sysfs_update_group(&devfreq->dev.kobj, &gov_attr_group);
+ if (err)
+ goto err_init;
list_add(&devfreq->node, &devfreq_list);
@@ -995,12 +993,9 @@ int devfreq_remove_device(struct devfreq *devfreq)
devfreq_cooling_unregister(devfreq->cdev);
- if (devfreq->governor) {
+ if (devfreq->governor)
devfreq->governor->event_handler(devfreq,
DEVFREQ_GOV_STOP, NULL);
- remove_sysfs_files(devfreq, devfreq->governor);
- }
-
device_unregister(&devfreq->dev);
return 0;
@@ -1460,7 +1455,6 @@ static ssize_t governor_store(struct device *dev, struct device_attribute *attr,
__func__, df->governor->name, ret);
goto out;
}
- remove_sysfs_files(df, df->governor);
/*
* Start the new governor and create the specific sysfs files
@@ -1489,7 +1483,7 @@ static ssize_t governor_store(struct device *dev, struct device_attribute *attr,
* Create the sysfs files for the new governor. But if failed to start
* the new governor, restore the sysfs files of previous governor.
*/
- create_sysfs_files(df, df->governor);
+ ret = sysfs_update_group(&df->dev.kobj, &gov_attr_group);
out:
mutex_unlock(&devfreq_list_lock);
@@ -1807,14 +1801,17 @@ static struct attribute *devfreq_attrs[] = {
&dev_attr_trans_stat.attr,
NULL,
};
-ATTRIBUTE_GROUPS(devfreq);
static ssize_t polling_interval_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct devfreq *df = to_devfreq(dev);
- if (!df->profile)
+ /* Protect against race between sysfs attrs update and read/write */
+ guard(mutex)(&devfreq_list_lock);
+
+ if (!df->profile || !df->governor ||
+ !IS_SUPPORTED_ATTR(df->governor->attrs, POLLING_INTERVAL))
return -EINVAL;
return sprintf(buf, "%d\n", df->profile->polling_ms);
@@ -1828,7 +1825,10 @@ static ssize_t polling_interval_store(struct device *dev,
unsigned int value;
int ret;
- if (!df->governor)
+ guard(mutex)(&devfreq_list_lock);
+
+ if (!df->governor ||
+ !IS_SUPPORTED_ATTR(df->governor->attrs, POLLING_INTERVAL))
return -EINVAL;
ret = sscanf(buf, "%u", &value);
@@ -1847,7 +1847,10 @@ static ssize_t timer_show(struct device *dev,
{
struct devfreq *df = to_devfreq(dev);
- if (!df->profile)
+ guard(mutex)(&devfreq_list_lock);
+
+ if (!df->profile || !df->governor ||
+ !IS_SUPPORTED_ATTR(df->governor->attrs, TIMER))
return -EINVAL;
return sprintf(buf, "%s\n", timer_name[df->profile->timer]);
@@ -1861,7 +1864,10 @@ static ssize_t timer_store(struct device *dev, struct device_attribute *attr,
int timer = -1;
int ret = 0, i;
- if (!df->governor || !df->profile)
+ guard(mutex)(&devfreq_list_lock);
+
+ if (!df->governor || !df->profile ||
+ !IS_SUPPORTED_ATTR(df->governor->attrs, TIMER))
return -EINVAL;
ret = sscanf(buf, "%16s", str_timer);
@@ -1905,37 +1911,47 @@ out:
}
static DEVICE_ATTR_RW(timer);
-#define CREATE_SYSFS_FILE(df, name) \
-{ \
- int ret; \
- ret = sysfs_create_file(&df->dev.kobj, &dev_attr_##name.attr); \
- if (ret < 0) { \
- dev_warn(&df->dev, \
- "Unable to create attr(%s)\n", "##name"); \
- } \
-} \
+static struct attribute *governor_attrs[] = {
+ &dev_attr_polling_interval.attr,
+ &dev_attr_timer.attr,
+ NULL
+};
-/* Create the specific sysfs files which depend on each governor. */
-static void create_sysfs_files(struct devfreq *devfreq,
- const struct devfreq_governor *gov)
+static umode_t gov_attr_visible(struct kobject *kobj,
+ struct attribute *attr, int n)
{
- if (IS_SUPPORTED_ATTR(gov->attrs, POLLING_INTERVAL))
- CREATE_SYSFS_FILE(devfreq, polling_interval);
- if (IS_SUPPORTED_ATTR(gov->attrs, TIMER))
- CREATE_SYSFS_FILE(devfreq, timer);
-}
+ struct device *dev = kobj_to_dev(kobj);
+ struct devfreq *df = to_devfreq(dev);
-/* Remove the specific sysfs files which depend on each governor. */
-static void remove_sysfs_files(struct devfreq *devfreq,
- const struct devfreq_governor *gov)
-{
- if (IS_SUPPORTED_ATTR(gov->attrs, POLLING_INTERVAL))
- sysfs_remove_file(&devfreq->dev.kobj,
- &dev_attr_polling_interval.attr);
- if (IS_SUPPORTED_ATTR(gov->attrs, TIMER))
- sysfs_remove_file(&devfreq->dev.kobj, &dev_attr_timer.attr);
+ if (!df->governor || !df->governor->attrs)
+ return 0;
+
+ if (attr == &dev_attr_polling_interval.attr &&
+ IS_SUPPORTED_ATTR(df->governor->attrs, POLLING_INTERVAL))
+ return attr->mode;
+
+ if (attr == &dev_attr_timer.attr &&
+ IS_SUPPORTED_ATTR(df->governor->attrs, TIMER))
+ return attr->mode;
+
+ return 0;
}
+static const struct attribute_group devfreq_group = {
+ .attrs = devfreq_attrs,
+};
+
+static const struct attribute_group gov_attr_group = {
+ .attrs = governor_attrs,
+ .is_visible = gov_attr_visible,
+};
+
+static const struct attribute_group *devfreq_groups[] = {
+ &devfreq_group,
+ &gov_attr_group,
+ NULL
+};
+
/**
* devfreq_summary_show() - Show the summary of the devfreq devices
* @s: seq_file instance to show the summary of devfreq devices
diff --git a/drivers/devfreq/tegra30-devfreq.c b/drivers/devfreq/tegra30-devfreq.c
index 8b57194ac698..401aac6a9f07 100644
--- a/drivers/devfreq/tegra30-devfreq.c
+++ b/drivers/devfreq/tegra30-devfreq.c
@@ -941,16 +941,22 @@ static int tegra_devfreq_probe(struct platform_device *pdev)
return 0;
}
+/*
+ * The activity counter is incremented every 256 memory transactions. However,
+ * the number of clock cycles required for each transaction varies across
+ * different SoC generations. For instance, a single transaction takes 2 EMC
+ * clocks on Tegra30, 1 EMC clock on Tegra114, and 4 EMC clocks on Tegra124.
+ */
static const struct tegra_devfreq_soc_data tegra124_soc = {
.configs = tegra124_device_configs,
-
- /*
- * Activity counter is incremented every 256 memory transactions,
- * and each transaction takes 4 EMC clocks.
- */
.count_weight = 4 * 256,
};
+static const struct tegra_devfreq_soc_data tegra114_soc = {
+ .configs = tegra124_device_configs,
+ .count_weight = 256,
+};
+
static const struct tegra_devfreq_soc_data tegra30_soc = {
.configs = tegra30_device_configs,
.count_weight = 2 * 256,
@@ -958,6 +964,7 @@ static const struct tegra_devfreq_soc_data tegra30_soc = {
static const struct of_device_id tegra_devfreq_of_match[] = {
{ .compatible = "nvidia,tegra30-actmon", .data = &tegra30_soc, },
+ { .compatible = "nvidia,tegra114-actmon", .data = &tegra114_soc, },
{ .compatible = "nvidia,tegra124-actmon", .data = &tegra124_soc, },
{ },
};
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index f49c939d636f..f49354e37777 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -983,6 +983,43 @@ static struct cpuidle_state mtl_l_cstates[] __initdata = {
.enter = NULL }
};
+static struct cpuidle_state ptl_cstates[] __initdata = {
+ {
+ .name = "C1",
+ .desc = "MWAIT 0x00",
+ .flags = MWAIT2flg(0x00),
+ .exit_latency = 1,
+ .target_residency = 1,
+ .enter = &intel_idle,
+ .enter_s2idle = intel_idle_s2idle, },
+ {
+ .name = "C1E",
+ .desc = "MWAIT 0x01",
+ .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
+ .exit_latency = 10,
+ .target_residency = 10,
+ .enter = &intel_idle,
+ .enter_s2idle = intel_idle_s2idle, },
+ {
+ .name = "C6S",
+ .desc = "MWAIT 0x21",
+ .flags = MWAIT2flg(0x21) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .exit_latency = 300,
+ .target_residency = 300,
+ .enter = &intel_idle,
+ .enter_s2idle = intel_idle_s2idle, },
+ {
+ .name = "C10",
+ .desc = "MWAIT 0x60",
+ .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .exit_latency = 370,
+ .target_residency = 2500,
+ .enter = &intel_idle,
+ .enter_s2idle = intel_idle_s2idle, },
+ {
+ .enter = NULL }
+};
+
static struct cpuidle_state gmt_cstates[] __initdata = {
{
.name = "C1",
@@ -1561,6 +1598,10 @@ static const struct idle_cpu idle_cpu_mtl_l __initconst = {
.state_table = mtl_l_cstates,
};
+static const struct idle_cpu idle_cpu_ptl __initconst = {
+ .state_table = ptl_cstates,
+};
+
static const struct idle_cpu idle_cpu_gmt __initconst = {
.state_table = gmt_cstates,
};
@@ -1669,6 +1710,7 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = {
X86_MATCH_VFM(INTEL_ALDERLAKE, &idle_cpu_adl),
X86_MATCH_VFM(INTEL_ALDERLAKE_L, &idle_cpu_adl_l),
X86_MATCH_VFM(INTEL_METEORLAKE_L, &idle_cpu_mtl_l),
+ X86_MATCH_VFM(INTEL_PANTHERLAKE_L, &idle_cpu_ptl),
X86_MATCH_VFM(INTEL_ATOM_GRACEMONT, &idle_cpu_gmt),
X86_MATCH_VFM(INTEL_SAPPHIRERAPIDS_X, &idle_cpu_spr),
X86_MATCH_VFM(INTEL_EMERALDRAPIDS_X, &idle_cpu_spr),
diff --git a/drivers/opp/core.c b/drivers/opp/core.c
index 866641666e41..da3f5eba4341 100644
--- a/drivers/opp/core.c
+++ b/drivers/opp/core.c
@@ -2742,8 +2742,8 @@ struct dev_pm_opp *dev_pm_opp_xlate_required_opp(struct opp_table *src_table,
break;
}
}
- break;
}
+ break;
}
if (IS_ERR(dest_opp)) {
diff --git a/drivers/opp/debugfs.c b/drivers/opp/debugfs.c
index 8fc6238b1728..61506d30d5ff 100644
--- a/drivers/opp/debugfs.c
+++ b/drivers/opp/debugfs.c
@@ -130,22 +130,24 @@ void opp_debug_create_one(struct dev_pm_opp *opp, struct opp_table *opp_table)
{
struct dentry *pdentry = opp_table->dentry;
struct dentry *d;
- unsigned long id;
- char name[25]; /* 20 chars for 64 bit value + 5 (opp:\0) */
+ char name[36]; /* "opp:"(4) + u64(20) + "-" (1) + u32(10) + NULL(1) */
/*
* Get directory name for OPP.
*
- * - Normally rate is unique to each OPP, use it to get unique opp-name.
+ * - Normally rate is unique to each OPP, use it to get unique opp-name,
+ * together with performance level if available.
* - For some devices rate isn't available or there are multiple, use
* index instead for them.
*/
- if (likely(opp_table->clk_count == 1 && opp->rates[0]))
- id = opp->rates[0];
- else
- id = _get_opp_count(opp_table);
-
- snprintf(name, sizeof(name), "opp:%lu", id);
+ if (likely(opp_table->clk_count == 1 && opp->rates[0])) {
+ if (opp->level == OPP_LEVEL_UNSET)
+ snprintf(name, sizeof(name), "opp:%lu", opp->rates[0]);
+ else
+ snprintf(name, sizeof(name), "opp:%lu-%u", opp->rates[0], opp->level);
+ } else {
+ snprintf(name, sizeof(name), "opp:%u", _get_opp_count(opp_table));
+ }
/* Create per-opp directory */
d = debugfs_create_dir(name, pdentry);
diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c
index 019a65a5283a..a8dd02dff0a0 100644
--- a/drivers/powercap/intel_rapl_common.c
+++ b/drivers/powercap/intel_rapl_common.c
@@ -24,89 +24,34 @@
#include <linux/suspend.h>
#include <linux/sysfs.h>
#include <linux/types.h>
+#include <linux/units.h>
#include <asm/cpu_device_id.h>
#include <asm/intel-family.h>
-#include <asm/iosf_mbi.h>
#include <asm/msr.h>
-/* bitmasks for RAPL MSRs, used by primitive access functions */
-#define ENERGY_STATUS_MASK 0xffffffff
+#define ENERGY_STATUS_MASK GENMASK(31, 0)
-#define POWER_LIMIT1_MASK 0x7FFF
-#define POWER_LIMIT1_ENABLE BIT(15)
-#define POWER_LIMIT1_CLAMP BIT(16)
+#define POWER_UNIT_OFFSET 0x00
+#define POWER_UNIT_MASK GENMASK(3, 0)
-#define POWER_LIMIT2_MASK (0x7FFFULL<<32)
-#define POWER_LIMIT2_ENABLE BIT_ULL(47)
-#define POWER_LIMIT2_CLAMP BIT_ULL(48)
-#define POWER_HIGH_LOCK BIT_ULL(63)
-#define POWER_LOW_LOCK BIT(31)
+#define ENERGY_UNIT_OFFSET 0x08
+#define ENERGY_UNIT_MASK GENMASK(12, 8)
-#define POWER_LIMIT4_MASK 0x1FFF
-
-#define TIME_WINDOW1_MASK (0x7FULL<<17)
-#define TIME_WINDOW2_MASK (0x7FULL<<49)
-
-#define POWER_UNIT_OFFSET 0
-#define POWER_UNIT_MASK 0x0F
-
-#define ENERGY_UNIT_OFFSET 0x08
-#define ENERGY_UNIT_MASK 0x1F00
-
-#define TIME_UNIT_OFFSET 0x10
-#define TIME_UNIT_MASK 0xF0000
-
-#define POWER_INFO_MAX_MASK (0x7fffULL<<32)
-#define POWER_INFO_MIN_MASK (0x7fffULL<<16)
-#define POWER_INFO_MAX_TIME_WIN_MASK (0x3fULL<<48)
-#define POWER_INFO_THERMAL_SPEC_MASK 0x7fff
-
-#define PERF_STATUS_THROTTLE_TIME_MASK 0xffffffff
-#define PP_POLICY_MASK 0x1F
-
-/*
- * SPR has different layout for Psys Domain PowerLimit registers.
- * There are 17 bits of PL1 and PL2 instead of 15 bits.
- * The Enable bits and TimeWindow bits are also shifted as a result.
- */
-#define PSYS_POWER_LIMIT1_MASK 0x1FFFF
-#define PSYS_POWER_LIMIT1_ENABLE BIT(17)
-
-#define PSYS_POWER_LIMIT2_MASK (0x1FFFFULL<<32)
-#define PSYS_POWER_LIMIT2_ENABLE BIT_ULL(49)
-
-#define PSYS_TIME_WINDOW1_MASK (0x7FULL<<19)
-#define PSYS_TIME_WINDOW2_MASK (0x7FULL<<51)
-
-/* bitmasks for RAPL TPMI, used by primitive access functions */
-#define TPMI_POWER_LIMIT_MASK 0x3FFFF
-#define TPMI_POWER_LIMIT_ENABLE BIT_ULL(62)
-#define TPMI_TIME_WINDOW_MASK (0x7FULL<<18)
-#define TPMI_INFO_SPEC_MASK 0x3FFFF
-#define TPMI_INFO_MIN_MASK (0x3FFFFULL << 18)
-#define TPMI_INFO_MAX_MASK (0x3FFFFULL << 36)
-#define TPMI_INFO_MAX_TIME_WIN_MASK (0x7FULL << 54)
+#define TIME_UNIT_OFFSET 0x10
+#define TIME_UNIT_MASK GENMASK(19, 16)
/* Non HW constants */
-#define RAPL_PRIMITIVE_DERIVED BIT(1) /* not from raw data */
-#define RAPL_PRIMITIVE_DUMMY BIT(2)
-
-#define TIME_WINDOW_MAX_MSEC 40000
-#define TIME_WINDOW_MIN_MSEC 250
-#define ENERGY_UNIT_SCALE 1000 /* scale from driver unit to powercap unit */
-enum unit_type {
- ARBITRARY_UNIT, /* no translation */
- POWER_UNIT,
- ENERGY_UNIT,
- TIME_UNIT,
-};
+#define RAPL_PRIMITIVE_DUMMY BIT(2)
+
+#define ENERGY_UNIT_SCALE 1000 /* scale from driver unit to powercap unit */
/* per domain data, some are optional */
-#define NR_RAW_PRIMITIVES (NR_RAPL_PRIMITIVES - 2)
+#define NR_RAW_PRIMITIVES (NR_RAPL_PRIMITIVES - 2)
+
+#define PACKAGE_PLN_INT_SAVED BIT(0)
-#define DOMAIN_STATE_INACTIVE BIT(0)
-#define DOMAIN_STATE_POWER_LIMIT_SET BIT(1)
+#define RAPL_EVENT_MASK GENMASK(7, 0)
static const char *pl_names[NR_POWER_LIMITS] = {
[POWER_LIMIT1] = "long_term",
@@ -204,52 +149,11 @@ static int get_pl_prim(struct rapl_domain *rd, int pl, enum pl_prims prim)
#define power_zone_to_rapl_domain(_zone) \
container_of(_zone, struct rapl_domain, power_zone)
-struct rapl_defaults {
- u8 floor_freq_reg_addr;
- int (*check_unit)(struct rapl_domain *rd);
- void (*set_floor_freq)(struct rapl_domain *rd, bool mode);
- u64 (*compute_time_window)(struct rapl_domain *rd, u64 val,
- bool to_raw);
- unsigned int dram_domain_energy_unit;
- unsigned int psys_domain_energy_unit;
- bool spr_psys_bits;
-};
-static struct rapl_defaults *defaults_msr;
-static const struct rapl_defaults defaults_tpmi;
-
-static struct rapl_defaults *get_defaults(struct rapl_package *rp)
+static const struct rapl_defaults *get_defaults(struct rapl_package *rp)
{
return rp->priv->defaults;
}
-/* Sideband MBI registers */
-#define IOSF_CPU_POWER_BUDGET_CTL_BYT (0x2)
-#define IOSF_CPU_POWER_BUDGET_CTL_TNG (0xdf)
-
-#define PACKAGE_PLN_INT_SAVED BIT(0)
-#define MAX_PRIM_NAME (32)
-
-/* per domain data. used to describe individual knobs such that access function
- * can be consolidated into one instead of many inline functions.
- */
-struct rapl_primitive_info {
- const char *name;
- u64 mask;
- int shift;
- enum rapl_domain_reg_id id;
- enum unit_type unit;
- u32 flag;
-};
-
-#define PRIMITIVE_INFO_INIT(p, m, s, i, u, f) { \
- .name = #p, \
- .mask = m, \
- .shift = s, \
- .id = i, \
- .unit = u, \
- .flag = f \
- }
-
static void rapl_init_domains(struct rapl_package *rp);
static int rapl_read_data_raw(struct rapl_domain *rd,
enum rapl_primitives prim,
@@ -341,7 +245,7 @@ static int find_nr_power_limit(struct rapl_domain *rd)
static int set_domain_enable(struct powercap_zone *power_zone, bool mode)
{
struct rapl_domain *rd = power_zone_to_rapl_domain(power_zone);
- struct rapl_defaults *defaults = get_defaults(rd->rp);
+ const struct rapl_defaults *defaults = get_defaults(rd->rp);
u64 val;
int ret;
@@ -630,7 +534,7 @@ static u64 rapl_unit_xlate(struct rapl_domain *rd, enum unit_type type,
u64 value, int to_raw)
{
u64 units = 1;
- struct rapl_defaults *defaults = get_defaults(rd->rp);
+ const struct rapl_defaults *defaults = get_defaults(rd->rp);
u64 scale = 1;
switch (type) {
@@ -656,104 +560,6 @@ static u64 rapl_unit_xlate(struct rapl_domain *rd, enum unit_type type,
return div64_u64(value, scale);
}
-/* RAPL primitives for MSR and MMIO I/F */
-static struct rapl_primitive_info rpi_msr[NR_RAPL_PRIMITIVES] = {
- /* name, mask, shift, msr index, unit divisor */
- [POWER_LIMIT1] = PRIMITIVE_INFO_INIT(POWER_LIMIT1, POWER_LIMIT1_MASK, 0,
- RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0),
- [POWER_LIMIT2] = PRIMITIVE_INFO_INIT(POWER_LIMIT2, POWER_LIMIT2_MASK, 32,
- RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0),
- [POWER_LIMIT4] = PRIMITIVE_INFO_INIT(POWER_LIMIT4, POWER_LIMIT4_MASK, 0,
- RAPL_DOMAIN_REG_PL4, POWER_UNIT, 0),
- [ENERGY_COUNTER] = PRIMITIVE_INFO_INIT(ENERGY_COUNTER, ENERGY_STATUS_MASK, 0,
- RAPL_DOMAIN_REG_STATUS, ENERGY_UNIT, 0),
- [FW_LOCK] = PRIMITIVE_INFO_INIT(FW_LOCK, POWER_LOW_LOCK, 31,
- RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
- [FW_HIGH_LOCK] = PRIMITIVE_INFO_INIT(FW_LOCK, POWER_HIGH_LOCK, 63,
- RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
- [PL1_ENABLE] = PRIMITIVE_INFO_INIT(PL1_ENABLE, POWER_LIMIT1_ENABLE, 15,
- RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
- [PL1_CLAMP] = PRIMITIVE_INFO_INIT(PL1_CLAMP, POWER_LIMIT1_CLAMP, 16,
- RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
- [PL2_ENABLE] = PRIMITIVE_INFO_INIT(PL2_ENABLE, POWER_LIMIT2_ENABLE, 47,
- RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
- [PL2_CLAMP] = PRIMITIVE_INFO_INIT(PL2_CLAMP, POWER_LIMIT2_CLAMP, 48,
- RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
- [TIME_WINDOW1] = PRIMITIVE_INFO_INIT(TIME_WINDOW1, TIME_WINDOW1_MASK, 17,
- RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0),
- [TIME_WINDOW2] = PRIMITIVE_INFO_INIT(TIME_WINDOW2, TIME_WINDOW2_MASK, 49,
- RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0),
- [THERMAL_SPEC_POWER] = PRIMITIVE_INFO_INIT(THERMAL_SPEC_POWER, POWER_INFO_THERMAL_SPEC_MASK,
- 0, RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0),
- [MAX_POWER] = PRIMITIVE_INFO_INIT(MAX_POWER, POWER_INFO_MAX_MASK, 32,
- RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0),
- [MIN_POWER] = PRIMITIVE_INFO_INIT(MIN_POWER, POWER_INFO_MIN_MASK, 16,
- RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0),
- [MAX_TIME_WINDOW] = PRIMITIVE_INFO_INIT(MAX_TIME_WINDOW, POWER_INFO_MAX_TIME_WIN_MASK, 48,
- RAPL_DOMAIN_REG_INFO, TIME_UNIT, 0),
- [THROTTLED_TIME] = PRIMITIVE_INFO_INIT(THROTTLED_TIME, PERF_STATUS_THROTTLE_TIME_MASK, 0,
- RAPL_DOMAIN_REG_PERF, TIME_UNIT, 0),
- [PRIORITY_LEVEL] = PRIMITIVE_INFO_INIT(PRIORITY_LEVEL, PP_POLICY_MASK, 0,
- RAPL_DOMAIN_REG_POLICY, ARBITRARY_UNIT, 0),
- [PSYS_POWER_LIMIT1] = PRIMITIVE_INFO_INIT(PSYS_POWER_LIMIT1, PSYS_POWER_LIMIT1_MASK, 0,
- RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0),
- [PSYS_POWER_LIMIT2] = PRIMITIVE_INFO_INIT(PSYS_POWER_LIMIT2, PSYS_POWER_LIMIT2_MASK, 32,
- RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0),
- [PSYS_PL1_ENABLE] = PRIMITIVE_INFO_INIT(PSYS_PL1_ENABLE, PSYS_POWER_LIMIT1_ENABLE, 17,
- RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
- [PSYS_PL2_ENABLE] = PRIMITIVE_INFO_INIT(PSYS_PL2_ENABLE, PSYS_POWER_LIMIT2_ENABLE, 49,
- RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
- [PSYS_TIME_WINDOW1] = PRIMITIVE_INFO_INIT(PSYS_TIME_WINDOW1, PSYS_TIME_WINDOW1_MASK, 19,
- RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0),
- [PSYS_TIME_WINDOW2] = PRIMITIVE_INFO_INIT(PSYS_TIME_WINDOW2, PSYS_TIME_WINDOW2_MASK, 51,
- RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0),
- /* non-hardware */
- [AVERAGE_POWER] = PRIMITIVE_INFO_INIT(AVERAGE_POWER, 0, 0, 0, POWER_UNIT,
- RAPL_PRIMITIVE_DERIVED),
-};
-
-/* RAPL primitives for TPMI I/F */
-static struct rapl_primitive_info rpi_tpmi[NR_RAPL_PRIMITIVES] = {
- /* name, mask, shift, msr index, unit divisor */
- [POWER_LIMIT1] = PRIMITIVE_INFO_INIT(POWER_LIMIT1, TPMI_POWER_LIMIT_MASK, 0,
- RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0),
- [POWER_LIMIT2] = PRIMITIVE_INFO_INIT(POWER_LIMIT2, TPMI_POWER_LIMIT_MASK, 0,
- RAPL_DOMAIN_REG_PL2, POWER_UNIT, 0),
- [POWER_LIMIT4] = PRIMITIVE_INFO_INIT(POWER_LIMIT4, TPMI_POWER_LIMIT_MASK, 0,
- RAPL_DOMAIN_REG_PL4, POWER_UNIT, 0),
- [ENERGY_COUNTER] = PRIMITIVE_INFO_INIT(ENERGY_COUNTER, ENERGY_STATUS_MASK, 0,
- RAPL_DOMAIN_REG_STATUS, ENERGY_UNIT, 0),
- [PL1_LOCK] = PRIMITIVE_INFO_INIT(PL1_LOCK, POWER_HIGH_LOCK, 63,
- RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
- [PL2_LOCK] = PRIMITIVE_INFO_INIT(PL2_LOCK, POWER_HIGH_LOCK, 63,
- RAPL_DOMAIN_REG_PL2, ARBITRARY_UNIT, 0),
- [PL4_LOCK] = PRIMITIVE_INFO_INIT(PL4_LOCK, POWER_HIGH_LOCK, 63,
- RAPL_DOMAIN_REG_PL4, ARBITRARY_UNIT, 0),
- [PL1_ENABLE] = PRIMITIVE_INFO_INIT(PL1_ENABLE, TPMI_POWER_LIMIT_ENABLE, 62,
- RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
- [PL2_ENABLE] = PRIMITIVE_INFO_INIT(PL2_ENABLE, TPMI_POWER_LIMIT_ENABLE, 62,
- RAPL_DOMAIN_REG_PL2, ARBITRARY_UNIT, 0),
- [PL4_ENABLE] = PRIMITIVE_INFO_INIT(PL4_ENABLE, TPMI_POWER_LIMIT_ENABLE, 62,
- RAPL_DOMAIN_REG_PL4, ARBITRARY_UNIT, 0),
- [TIME_WINDOW1] = PRIMITIVE_INFO_INIT(TIME_WINDOW1, TPMI_TIME_WINDOW_MASK, 18,
- RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0),
- [TIME_WINDOW2] = PRIMITIVE_INFO_INIT(TIME_WINDOW2, TPMI_TIME_WINDOW_MASK, 18,
- RAPL_DOMAIN_REG_PL2, TIME_UNIT, 0),
- [THERMAL_SPEC_POWER] = PRIMITIVE_INFO_INIT(THERMAL_SPEC_POWER, TPMI_INFO_SPEC_MASK, 0,
- RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0),
- [MAX_POWER] = PRIMITIVE_INFO_INIT(MAX_POWER, TPMI_INFO_MAX_MASK, 36,
- RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0),
- [MIN_POWER] = PRIMITIVE_INFO_INIT(MIN_POWER, TPMI_INFO_MIN_MASK, 18,
- RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0),
- [MAX_TIME_WINDOW] = PRIMITIVE_INFO_INIT(MAX_TIME_WINDOW, TPMI_INFO_MAX_TIME_WIN_MASK, 54,
- RAPL_DOMAIN_REG_INFO, TIME_UNIT, 0),
- [THROTTLED_TIME] = PRIMITIVE_INFO_INIT(THROTTLED_TIME, PERF_STATUS_THROTTLE_TIME_MASK, 0,
- RAPL_DOMAIN_REG_PERF, TIME_UNIT, 0),
- /* non-hardware */
- [AVERAGE_POWER] = PRIMITIVE_INFO_INIT(AVERAGE_POWER, 0, 0, 0,
- POWER_UNIT, RAPL_PRIMITIVE_DERIVED),
-};
-
static struct rapl_primitive_info *get_rpi(struct rapl_package *rp, int prim)
{
struct rapl_primitive_info *rpi = rp->priv->rpi;
@@ -766,21 +572,6 @@ static struct rapl_primitive_info *get_rpi(struct rapl_package *rp, int prim)
static int rapl_config(struct rapl_package *rp)
{
- switch (rp->priv->type) {
- /* MMIO I/F shares the same register layout as MSR registers */
- case RAPL_IF_MMIO:
- case RAPL_IF_MSR:
- rp->priv->defaults = (void *)defaults_msr;
- rp->priv->rpi = (void *)rpi_msr;
- break;
- case RAPL_IF_TPMI:
- rp->priv->defaults = (void *)&defaults_tpmi;
- rp->priv->rpi = (void *)rpi_tpmi;
- break;
- default:
- return -EINVAL;
- }
-
/* defaults_msr can be NULL on unsupported platforms */
if (!rp->priv->defaults || !rp->priv->rpi)
return -ENODEV;
@@ -791,7 +582,7 @@ static int rapl_config(struct rapl_package *rp)
static enum rapl_primitives
prim_fixups(struct rapl_domain *rd, enum rapl_primitives prim)
{
- struct rapl_defaults *defaults = get_defaults(rd->rp);
+ const struct rapl_defaults *defaults = get_defaults(rd->rp);
if (!defaults->spr_psys_bits)
return prim;
@@ -846,12 +637,6 @@ static int rapl_read_data_raw(struct rapl_domain *rd,
if (!ra.reg.val)
return -EINVAL;
- /* non-hardware data are collected by the polling thread */
- if (rpi->flag & RAPL_PRIMITIVE_DERIVED) {
- *data = rd->rdd.primitives[prim];
- return 0;
- }
-
ra.mask = rpi->mask;
if (rd->rp->priv->read_raw(get_rid(rd->rp), &ra, pmu_ctx)) {
@@ -936,7 +721,7 @@ static int rapl_write_pl_data(struct rapl_domain *rd, int pl,
* power unit : microWatts : Represented in milliWatts by default
* time unit : microseconds: Represented in seconds by default
*/
-static int rapl_check_unit_core(struct rapl_domain *rd)
+int rapl_default_check_unit(struct rapl_domain *rd)
{
struct reg_action ra;
u32 value;
@@ -950,47 +735,20 @@ static int rapl_check_unit_core(struct rapl_domain *rd)
}
value = (ra.value & ENERGY_UNIT_MASK) >> ENERGY_UNIT_OFFSET;
- rd->energy_unit = ENERGY_UNIT_SCALE * 1000000 / (1 << value);
+ rd->energy_unit = (ENERGY_UNIT_SCALE * MICROJOULE_PER_JOULE) >> value;
value = (ra.value & POWER_UNIT_MASK) >> POWER_UNIT_OFFSET;
- rd->power_unit = 1000000 / (1 << value);
+ rd->power_unit = MICROWATT_PER_WATT >> value;
value = (ra.value & TIME_UNIT_MASK) >> TIME_UNIT_OFFSET;
- rd->time_unit = 1000000 / (1 << value);
+ rd->time_unit = USEC_PER_SEC >> value;
pr_debug("Core CPU %s:%s energy=%dpJ, time=%dus, power=%duW\n",
rd->rp->name, rd->name, rd->energy_unit, rd->time_unit, rd->power_unit);
return 0;
}
-
-static int rapl_check_unit_atom(struct rapl_domain *rd)
-{
- struct reg_action ra;
- u32 value;
-
- ra.reg = rd->regs[RAPL_DOMAIN_REG_UNIT];
- ra.mask = ~0;
- if (rd->rp->priv->read_raw(get_rid(rd->rp), &ra, false)) {
- pr_err("Failed to read power unit REG 0x%llx on %s:%s, exit.\n",
- ra.reg.val, rd->rp->name, rd->name);
- return -ENODEV;
- }
-
- value = (ra.value & ENERGY_UNIT_MASK) >> ENERGY_UNIT_OFFSET;
- rd->energy_unit = ENERGY_UNIT_SCALE * 1 << value;
-
- value = (ra.value & POWER_UNIT_MASK) >> POWER_UNIT_OFFSET;
- rd->power_unit = (1 << value) * 1000;
-
- value = (ra.value & TIME_UNIT_MASK) >> TIME_UNIT_OFFSET;
- rd->time_unit = 1000000 / (1 << value);
-
- pr_debug("Atom %s:%s energy=%dpJ, time=%dus, power=%duW\n",
- rd->rp->name, rd->name, rd->energy_unit, rd->time_unit, rd->power_unit);
-
- return 0;
-}
+EXPORT_SYMBOL_NS_GPL(rapl_default_check_unit, "INTEL_RAPL");
static void power_limit_irq_save_cpu(void *info)
{
@@ -1056,7 +814,7 @@ static void package_power_limit_irq_restore(struct rapl_package *rp)
wrmsr_safe(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
}
-static void set_floor_freq_default(struct rapl_domain *rd, bool mode)
+void rapl_default_set_floor_freq(struct rapl_domain *rd, bool mode)
{
int i;
@@ -1070,33 +828,9 @@ static void set_floor_freq_default(struct rapl_domain *rd, bool mode)
rapl_write_pl_data(rd, i, PL_CLAMP, mode);
}
}
+EXPORT_SYMBOL_NS_GPL(rapl_default_set_floor_freq, "INTEL_RAPL");
-static void set_floor_freq_atom(struct rapl_domain *rd, bool enable)
-{
- static u32 power_ctrl_orig_val;
- struct rapl_defaults *defaults = get_defaults(rd->rp);
- u32 mdata;
-
- if (!defaults->floor_freq_reg_addr) {
- pr_err("Invalid floor frequency config register\n");
- return;
- }
-
- if (!power_ctrl_orig_val)
- iosf_mbi_read(BT_MBI_UNIT_PMC, MBI_CR_READ,
- defaults->floor_freq_reg_addr,
- &power_ctrl_orig_val);
- mdata = power_ctrl_orig_val;
- if (enable) {
- mdata &= ~(0x7f << 8);
- mdata |= 1 << 8;
- }
- iosf_mbi_write(BT_MBI_UNIT_PMC, MBI_CR_WRITE,
- defaults->floor_freq_reg_addr, mdata);
-}
-
-static u64 rapl_compute_time_window_core(struct rapl_domain *rd, u64 value,
- bool to_raw)
+u64 rapl_default_compute_time_window(struct rapl_domain *rd, u64 value, bool to_raw)
{
u64 f, y; /* fraction and exp. used for time unit */
@@ -1107,7 +841,7 @@ static u64 rapl_compute_time_window_core(struct rapl_domain *rd, u64 value,
if (!to_raw) {
f = (value & 0x60) >> 5;
y = value & 0x1f;
- value = (1 << y) * (4 + f) * rd->time_unit / 4;
+ value = (1ULL << y) * (4 + f) * rd->time_unit / 4;
} else {
if (value < rd->time_unit)
return 0;
@@ -1122,199 +856,12 @@ static u64 rapl_compute_time_window_core(struct rapl_domain *rd, u64 value,
if (y > 0x1f)
return 0x7f;
- f = div64_u64(4 * (value - (1ULL << y)), 1ULL << y);
+ f = div64_u64(4 * (value - BIT_ULL(y)), BIT_ULL(y));
value = (y & 0x1f) | ((f & 0x3) << 5);
}
return value;
}
-
-static u64 rapl_compute_time_window_atom(struct rapl_domain *rd, u64 value,
- bool to_raw)
-{
- /*
- * Atom time unit encoding is straight forward val * time_unit,
- * where time_unit is default to 1 sec. Never 0.
- */
- if (!to_raw)
- return (value) ? value * rd->time_unit : rd->time_unit;
-
- value = div64_u64(value, rd->time_unit);
-
- return value;
-}
-
-/* TPMI Unit register has different layout */
-#define TPMI_POWER_UNIT_OFFSET POWER_UNIT_OFFSET
-#define TPMI_POWER_UNIT_MASK POWER_UNIT_MASK
-#define TPMI_ENERGY_UNIT_OFFSET 0x06
-#define TPMI_ENERGY_UNIT_MASK 0x7C0
-#define TPMI_TIME_UNIT_OFFSET 0x0C
-#define TPMI_TIME_UNIT_MASK 0xF000
-
-static int rapl_check_unit_tpmi(struct rapl_domain *rd)
-{
- struct reg_action ra;
- u32 value;
-
- ra.reg = rd->regs[RAPL_DOMAIN_REG_UNIT];
- ra.mask = ~0;
- if (rd->rp->priv->read_raw(get_rid(rd->rp), &ra, false)) {
- pr_err("Failed to read power unit REG 0x%llx on %s:%s, exit.\n",
- ra.reg.val, rd->rp->name, rd->name);
- return -ENODEV;
- }
-
- value = (ra.value & TPMI_ENERGY_UNIT_MASK) >> TPMI_ENERGY_UNIT_OFFSET;
- rd->energy_unit = ENERGY_UNIT_SCALE * 1000000 / (1 << value);
-
- value = (ra.value & TPMI_POWER_UNIT_MASK) >> TPMI_POWER_UNIT_OFFSET;
- rd->power_unit = 1000000 / (1 << value);
-
- value = (ra.value & TPMI_TIME_UNIT_MASK) >> TPMI_TIME_UNIT_OFFSET;
- rd->time_unit = 1000000 / (1 << value);
-
- pr_debug("Core CPU %s:%s energy=%dpJ, time=%dus, power=%duW\n",
- rd->rp->name, rd->name, rd->energy_unit, rd->time_unit, rd->power_unit);
-
- return 0;
-}
-
-static const struct rapl_defaults defaults_tpmi = {
- .check_unit = rapl_check_unit_tpmi,
- /* Reuse existing logic, ignore the PL_CLAMP failures and enable all Power Limits */
- .set_floor_freq = set_floor_freq_default,
- .compute_time_window = rapl_compute_time_window_core,
-};
-
-static const struct rapl_defaults rapl_defaults_core = {
- .floor_freq_reg_addr = 0,
- .check_unit = rapl_check_unit_core,
- .set_floor_freq = set_floor_freq_default,
- .compute_time_window = rapl_compute_time_window_core,
-};
-
-static const struct rapl_defaults rapl_defaults_hsw_server = {
- .check_unit = rapl_check_unit_core,
- .set_floor_freq = set_floor_freq_default,
- .compute_time_window = rapl_compute_time_window_core,
- .dram_domain_energy_unit = 15300,
-};
-
-static const struct rapl_defaults rapl_defaults_spr_server = {
- .check_unit = rapl_check_unit_core,
- .set_floor_freq = set_floor_freq_default,
- .compute_time_window = rapl_compute_time_window_core,
- .psys_domain_energy_unit = 1000000000,
- .spr_psys_bits = true,
-};
-
-static const struct rapl_defaults rapl_defaults_byt = {
- .floor_freq_reg_addr = IOSF_CPU_POWER_BUDGET_CTL_BYT,
- .check_unit = rapl_check_unit_atom,
- .set_floor_freq = set_floor_freq_atom,
- .compute_time_window = rapl_compute_time_window_atom,
-};
-
-static const struct rapl_defaults rapl_defaults_tng = {
- .floor_freq_reg_addr = IOSF_CPU_POWER_BUDGET_CTL_TNG,
- .check_unit = rapl_check_unit_atom,
- .set_floor_freq = set_floor_freq_atom,
- .compute_time_window = rapl_compute_time_window_atom,
-};
-
-static const struct rapl_defaults rapl_defaults_ann = {
- .floor_freq_reg_addr = 0,
- .check_unit = rapl_check_unit_atom,
- .set_floor_freq = NULL,
- .compute_time_window = rapl_compute_time_window_atom,
-};
-
-static const struct rapl_defaults rapl_defaults_cht = {
- .floor_freq_reg_addr = 0,
- .check_unit = rapl_check_unit_atom,
- .set_floor_freq = NULL,
- .compute_time_window = rapl_compute_time_window_atom,
-};
-
-static const struct rapl_defaults rapl_defaults_amd = {
- .check_unit = rapl_check_unit_core,
-};
-
-static const struct x86_cpu_id rapl_ids[] __initconst = {
- X86_MATCH_VFM(INTEL_SANDYBRIDGE, &rapl_defaults_core),
- X86_MATCH_VFM(INTEL_SANDYBRIDGE_X, &rapl_defaults_core),
-
- X86_MATCH_VFM(INTEL_IVYBRIDGE, &rapl_defaults_core),
- X86_MATCH_VFM(INTEL_IVYBRIDGE_X, &rapl_defaults_core),
-
- X86_MATCH_VFM(INTEL_HASWELL, &rapl_defaults_core),
- X86_MATCH_VFM(INTEL_HASWELL_L, &rapl_defaults_core),
- X86_MATCH_VFM(INTEL_HASWELL_G, &rapl_defaults_core),
- X86_MATCH_VFM(INTEL_HASWELL_X, &rapl_defaults_hsw_server),
-
- X86_MATCH_VFM(INTEL_BROADWELL, &rapl_defaults_core),
- X86_MATCH_VFM(INTEL_BROADWELL_G, &rapl_defaults_core),
- X86_MATCH_VFM(INTEL_BROADWELL_D, &rapl_defaults_core),
- X86_MATCH_VFM(INTEL_BROADWELL_X, &rapl_defaults_hsw_server),
-
- X86_MATCH_VFM(INTEL_SKYLAKE, &rapl_defaults_core),
- X86_MATCH_VFM(INTEL_SKYLAKE_L, &rapl_defaults_core),
- X86_MATCH_VFM(INTEL_SKYLAKE_X, &rapl_defaults_hsw_server),
- X86_MATCH_VFM(INTEL_KABYLAKE_L, &rapl_defaults_core),
- X86_MATCH_VFM(INTEL_KABYLAKE, &rapl_defaults_core),
- X86_MATCH_VFM(INTEL_CANNONLAKE_L, &rapl_defaults_core),
- X86_MATCH_VFM(INTEL_ICELAKE_L, &rapl_defaults_core),
- X86_MATCH_VFM(INTEL_ICELAKE, &rapl_defaults_core),
- X86_MATCH_VFM(INTEL_ICELAKE_NNPI, &rapl_defaults_core),
- X86_MATCH_VFM(INTEL_ICELAKE_X, &rapl_defaults_hsw_server),
- X86_MATCH_VFM(INTEL_ICELAKE_D, &rapl_defaults_hsw_server),
- X86_MATCH_VFM(INTEL_COMETLAKE_L, &rapl_defaults_core),
- X86_MATCH_VFM(INTEL_COMETLAKE, &rapl_defaults_core),
- X86_MATCH_VFM(INTEL_TIGERLAKE_L, &rapl_defaults_core),
- X86_MATCH_VFM(INTEL_TIGERLAKE, &rapl_defaults_core),
- X86_MATCH_VFM(INTEL_ROCKETLAKE, &rapl_defaults_core),
- X86_MATCH_VFM(INTEL_ALDERLAKE, &rapl_defaults_core),
- X86_MATCH_VFM(INTEL_ALDERLAKE_L, &rapl_defaults_core),
- X86_MATCH_VFM(INTEL_ATOM_GRACEMONT, &rapl_defaults_core),
- X86_MATCH_VFM(INTEL_RAPTORLAKE, &rapl_defaults_core),
- X86_MATCH_VFM(INTEL_RAPTORLAKE_P, &rapl_defaults_core),
- X86_MATCH_VFM(INTEL_RAPTORLAKE_S, &rapl_defaults_core),
- X86_MATCH_VFM(INTEL_BARTLETTLAKE, &rapl_defaults_core),
- X86_MATCH_VFM(INTEL_METEORLAKE, &rapl_defaults_core),
- X86_MATCH_VFM(INTEL_METEORLAKE_L, &rapl_defaults_core),
- X86_MATCH_VFM(INTEL_SAPPHIRERAPIDS_X, &rapl_defaults_spr_server),
- X86_MATCH_VFM(INTEL_EMERALDRAPIDS_X, &rapl_defaults_spr_server),
- X86_MATCH_VFM(INTEL_LUNARLAKE_M, &rapl_defaults_core),
- X86_MATCH_VFM(INTEL_PANTHERLAKE_L, &rapl_defaults_core),
- X86_MATCH_VFM(INTEL_WILDCATLAKE_L, &rapl_defaults_core),
- X86_MATCH_VFM(INTEL_NOVALAKE, &rapl_defaults_core),
- X86_MATCH_VFM(INTEL_NOVALAKE_L, &rapl_defaults_core),
- X86_MATCH_VFM(INTEL_ARROWLAKE_H, &rapl_defaults_core),
- X86_MATCH_VFM(INTEL_ARROWLAKE, &rapl_defaults_core),
- X86_MATCH_VFM(INTEL_ARROWLAKE_U, &rapl_defaults_core),
- X86_MATCH_VFM(INTEL_LAKEFIELD, &rapl_defaults_core),
-
- X86_MATCH_VFM(INTEL_ATOM_SILVERMONT, &rapl_defaults_byt),
- X86_MATCH_VFM(INTEL_ATOM_AIRMONT, &rapl_defaults_cht),
- X86_MATCH_VFM(INTEL_ATOM_SILVERMONT_MID, &rapl_defaults_tng),
- X86_MATCH_VFM(INTEL_ATOM_SILVERMONT_MID2,&rapl_defaults_ann),
- X86_MATCH_VFM(INTEL_ATOM_GOLDMONT, &rapl_defaults_core),
- X86_MATCH_VFM(INTEL_ATOM_GOLDMONT_PLUS, &rapl_defaults_core),
- X86_MATCH_VFM(INTEL_ATOM_GOLDMONT_D, &rapl_defaults_core),
- X86_MATCH_VFM(INTEL_ATOM_TREMONT, &rapl_defaults_core),
- X86_MATCH_VFM(INTEL_ATOM_TREMONT_D, &rapl_defaults_core),
- X86_MATCH_VFM(INTEL_ATOM_TREMONT_L, &rapl_defaults_core),
-
- X86_MATCH_VFM(INTEL_XEON_PHI_KNL, &rapl_defaults_hsw_server),
- X86_MATCH_VFM(INTEL_XEON_PHI_KNM, &rapl_defaults_hsw_server),
-
- X86_MATCH_VENDOR_FAM(AMD, 0x17, &rapl_defaults_amd),
- X86_MATCH_VENDOR_FAM(AMD, 0x19, &rapl_defaults_amd),
- X86_MATCH_VENDOR_FAM(AMD, 0x1A, &rapl_defaults_amd),
- X86_MATCH_VENDOR_FAM(HYGON, 0x18, &rapl_defaults_amd),
- {}
-};
-MODULE_DEVICE_TABLE(x86cpu, rapl_ids);
+EXPORT_SYMBOL_NS_GPL(rapl_default_compute_time_window, "INTEL_RAPL");
/* Read once for all raw primitive data for domains */
static void rapl_update_domain_data(struct rapl_package *rp)
@@ -1443,7 +990,7 @@ static int rapl_check_domain(int domain, struct rapl_package *rp)
*/
static int rapl_get_domain_unit(struct rapl_domain *rd)
{
- struct rapl_defaults *defaults = get_defaults(rd->rp);
+ const struct rapl_defaults *defaults = get_defaults(rd->rp);
int ret;
if (!rd->regs[RAPL_DOMAIN_REG_UNIT].val) {
@@ -1777,7 +1324,6 @@ enum perf_rapl_events {
PERF_RAPL_PSYS, /* psys */
PERF_RAPL_MAX
};
-#define RAPL_EVENT_MASK GENMASK(7, 0)
static const int event_to_domain[PERF_RAPL_MAX] = {
[PERF_RAPL_PP0] = RAPL_DOMAIN_PP0,
@@ -2083,7 +1629,7 @@ int rapl_package_add_pmu_locked(struct rapl_package *rp)
return rapl_pmu_update(rp);
}
-EXPORT_SYMBOL_GPL(rapl_package_add_pmu_locked);
+EXPORT_SYMBOL_NS_GPL(rapl_package_add_pmu_locked, "INTEL_RAPL");
int rapl_package_add_pmu(struct rapl_package *rp)
{
@@ -2091,7 +1637,7 @@ int rapl_package_add_pmu(struct rapl_package *rp)
return rapl_package_add_pmu_locked(rp);
}
-EXPORT_SYMBOL_GPL(rapl_package_add_pmu);
+EXPORT_SYMBOL_NS_GPL(rapl_package_add_pmu, "INTEL_RAPL");
void rapl_package_remove_pmu_locked(struct rapl_package *rp)
{
@@ -2109,7 +1655,7 @@ void rapl_package_remove_pmu_locked(struct rapl_package *rp)
perf_pmu_unregister(&rapl_pmu.pmu);
memset(&rapl_pmu, 0, sizeof(struct rapl_pmu));
}
-EXPORT_SYMBOL_GPL(rapl_package_remove_pmu_locked);
+EXPORT_SYMBOL_NS_GPL(rapl_package_remove_pmu_locked, "INTEL_RAPL");
void rapl_package_remove_pmu(struct rapl_package *rp)
{
@@ -2117,7 +1663,7 @@ void rapl_package_remove_pmu(struct rapl_package *rp)
rapl_package_remove_pmu_locked(rp);
}
-EXPORT_SYMBOL_GPL(rapl_package_remove_pmu);
+EXPORT_SYMBOL_NS_GPL(rapl_package_remove_pmu, "INTEL_RAPL");
#endif
/* called from CPU hotplug notifier, hotplug lock held */
@@ -2150,14 +1696,14 @@ void rapl_remove_package_cpuslocked(struct rapl_package *rp)
list_del(&rp->plist);
kfree(rp);
}
-EXPORT_SYMBOL_GPL(rapl_remove_package_cpuslocked);
+EXPORT_SYMBOL_NS_GPL(rapl_remove_package_cpuslocked, "INTEL_RAPL");
void rapl_remove_package(struct rapl_package *rp)
{
guard(cpus_read_lock)();
rapl_remove_package_cpuslocked(rp);
}
-EXPORT_SYMBOL_GPL(rapl_remove_package);
+EXPORT_SYMBOL_NS_GPL(rapl_remove_package, "INTEL_RAPL");
/*
* RAPL Package energy counter scope:
@@ -2200,14 +1746,14 @@ struct rapl_package *rapl_find_package_domain_cpuslocked(int id, struct rapl_if_
return NULL;
}
-EXPORT_SYMBOL_GPL(rapl_find_package_domain_cpuslocked);
+EXPORT_SYMBOL_NS_GPL(rapl_find_package_domain_cpuslocked, "INTEL_RAPL");
struct rapl_package *rapl_find_package_domain(int id, struct rapl_if_priv *priv, bool id_is_cpu)
{
guard(cpus_read_lock)();
return rapl_find_package_domain_cpuslocked(id, priv, id_is_cpu);
}
-EXPORT_SYMBOL_GPL(rapl_find_package_domain);
+EXPORT_SYMBOL_NS_GPL(rapl_find_package_domain, "INTEL_RAPL");
/* called from CPU hotplug notifier, hotplug lock held */
struct rapl_package *rapl_add_package_cpuslocked(int id, struct rapl_if_priv *priv, bool id_is_cpu)
@@ -2261,14 +1807,14 @@ err_free_package:
kfree(rp);
return ERR_PTR(ret);
}
-EXPORT_SYMBOL_GPL(rapl_add_package_cpuslocked);
+EXPORT_SYMBOL_NS_GPL(rapl_add_package_cpuslocked, "INTEL_RAPL");
struct rapl_package *rapl_add_package(int id, struct rapl_if_priv *priv, bool id_is_cpu)
{
guard(cpus_read_lock)();
return rapl_add_package_cpuslocked(id, priv, id_is_cpu);
}
-EXPORT_SYMBOL_GPL(rapl_add_package);
+EXPORT_SYMBOL_NS_GPL(rapl_add_package, "INTEL_RAPL");
static void power_limit_state_save(void)
{
@@ -2328,40 +1874,13 @@ static struct notifier_block rapl_pm_notifier = {
.notifier_call = rapl_pm_callback,
};
-static struct platform_device *rapl_msr_platdev;
-
static int __init rapl_init(void)
{
- const struct x86_cpu_id *id;
- int ret;
-
- id = x86_match_cpu(rapl_ids);
- if (id) {
- defaults_msr = (struct rapl_defaults *)id->driver_data;
-
- rapl_msr_platdev = platform_device_alloc("intel_rapl_msr", 0);
- if (!rapl_msr_platdev)
- return -ENOMEM;
-
- ret = platform_device_add(rapl_msr_platdev);
- if (ret) {
- platform_device_put(rapl_msr_platdev);
- return ret;
- }
- }
-
- ret = register_pm_notifier(&rapl_pm_notifier);
- if (ret && rapl_msr_platdev) {
- platform_device_del(rapl_msr_platdev);
- platform_device_put(rapl_msr_platdev);
- }
-
- return ret;
+ return register_pm_notifier(&rapl_pm_notifier);
}
static void __exit rapl_exit(void)
{
- platform_device_unregister(rapl_msr_platdev);
unregister_pm_notifier(&rapl_pm_notifier);
}
diff --git a/drivers/powercap/intel_rapl_msr.c b/drivers/powercap/intel_rapl_msr.c
index 3d5e7f56d68a..a34543e66446 100644
--- a/drivers/powercap/intel_rapl_msr.c
+++ b/drivers/powercap/intel_rapl_msr.c
@@ -21,15 +21,73 @@
#include <linux/intel_rapl.h>
#include <linux/processor.h>
#include <linux/platform_device.h>
+#include <linux/units.h>
+#include <linux/bits.h>
#include <asm/cpu_device_id.h>
#include <asm/intel-family.h>
+#include <asm/iosf_mbi.h>
#include <asm/msr.h>
/* Local defines */
#define MSR_PLATFORM_POWER_LIMIT 0x0000065C
#define MSR_VR_CURRENT_CONFIG 0x00000601
+#define ENERGY_UNIT_SCALE 1000 /* scale from driver unit to powercap unit */
+
+#define POWER_UNIT_OFFSET 0x00
+#define POWER_UNIT_MASK GENMASK(3, 0)
+
+#define ENERGY_UNIT_OFFSET 0x08
+#define ENERGY_UNIT_MASK GENMASK(12, 8)
+
+#define TIME_UNIT_OFFSET 0x10
+#define TIME_UNIT_MASK GENMASK(19, 16)
+
+/* bitmasks for RAPL MSRs, used by primitive access functions */
+#define ENERGY_STATUS_MASK GENMASK(31, 0)
+
+#define POWER_LIMIT1_MASK GENMASK(14, 0)
+#define POWER_LIMIT1_ENABLE BIT(15)
+#define POWER_LIMIT1_CLAMP BIT(16)
+
+#define POWER_LIMIT2_MASK GENMASK_ULL(46, 32)
+#define POWER_LIMIT2_ENABLE BIT_ULL(47)
+#define POWER_LIMIT2_CLAMP BIT_ULL(48)
+#define POWER_HIGH_LOCK BIT_ULL(63)
+#define POWER_LOW_LOCK BIT(31)
+
+#define POWER_LIMIT4_MASK GENMASK(12, 0)
+
+#define TIME_WINDOW1_MASK GENMASK_ULL(23, 17)
+#define TIME_WINDOW2_MASK GENMASK_ULL(55, 49)
+
+#define POWER_INFO_MAX_MASK GENMASK_ULL(46, 32)
+#define POWER_INFO_MIN_MASK GENMASK_ULL(30, 16)
+#define POWER_INFO_MAX_TIME_WIN_MASK GENMASK_ULL(53, 48)
+#define POWER_INFO_THERMAL_SPEC_MASK GENMASK(14, 0)
+
+#define PERF_STATUS_THROTTLE_TIME_MASK GENMASK(31, 0)
+#define PP_POLICY_MASK GENMASK(4, 0)
+
+/*
+ * SPR has different layout for Psys Domain PowerLimit registers.
+ * There are 17 bits of PL1 and PL2 instead of 15 bits.
+ * The Enable bits and TimeWindow bits are also shifted as a result.
+ */
+#define PSYS_POWER_LIMIT1_MASK GENMASK_ULL(16, 0)
+#define PSYS_POWER_LIMIT1_ENABLE BIT(17)
+
+#define PSYS_POWER_LIMIT2_MASK GENMASK_ULL(48, 32)
+#define PSYS_POWER_LIMIT2_ENABLE BIT_ULL(49)
+
+#define PSYS_TIME_WINDOW1_MASK GENMASK_ULL(25, 19)
+#define PSYS_TIME_WINDOW2_MASK GENMASK_ULL(57, 51)
+
+/* Sideband MBI registers */
+#define IOSF_CPU_POWER_BUDGET_CTL_BYT 0x02
+#define IOSF_CPU_POWER_BUDGET_CTL_TNG 0xDF
+
/* private data for RAPL MSR Interface */
static struct rapl_if_priv *rapl_msr_priv;
@@ -158,36 +216,278 @@ static int rapl_msr_write_raw(int cpu, struct reg_action *ra)
return ra->err;
}
-/* List of verified CPUs. */
-static const struct x86_cpu_id pl4_support_ids[] = {
- X86_MATCH_VFM(INTEL_ICELAKE_L, NULL),
- X86_MATCH_VFM(INTEL_TIGERLAKE_L, NULL),
- X86_MATCH_VFM(INTEL_ALDERLAKE, NULL),
- X86_MATCH_VFM(INTEL_ALDERLAKE_L, NULL),
- X86_MATCH_VFM(INTEL_ATOM_GRACEMONT, NULL),
- X86_MATCH_VFM(INTEL_RAPTORLAKE, NULL),
- X86_MATCH_VFM(INTEL_RAPTORLAKE_P, NULL),
- X86_MATCH_VFM(INTEL_METEORLAKE, NULL),
- X86_MATCH_VFM(INTEL_METEORLAKE_L, NULL),
- X86_MATCH_VFM(INTEL_ARROWLAKE_U, NULL),
- X86_MATCH_VFM(INTEL_ARROWLAKE_H, NULL),
- X86_MATCH_VFM(INTEL_PANTHERLAKE_L, NULL),
- X86_MATCH_VFM(INTEL_WILDCATLAKE_L, NULL),
- X86_MATCH_VFM(INTEL_NOVALAKE, NULL),
- X86_MATCH_VFM(INTEL_NOVALAKE_L, NULL),
- {}
+static int rapl_check_unit_atom(struct rapl_domain *rd)
+{
+ struct reg_action ra;
+ u32 value;
+
+ ra.reg = rd->regs[RAPL_DOMAIN_REG_UNIT];
+ ra.mask = ~0;
+ if (rapl_msr_read_raw(rd->rp->lead_cpu, &ra, false)) {
+ pr_err("Failed to read power unit REG 0x%llx on %s:%s, exit.\n",
+ ra.reg.val, rd->rp->name, rd->name);
+ return -ENODEV;
+ }
+
+ value = (ra.value & ENERGY_UNIT_MASK) >> ENERGY_UNIT_OFFSET;
+ rd->energy_unit = ENERGY_UNIT_SCALE * (1ULL << value);
+
+ value = (ra.value & POWER_UNIT_MASK) >> POWER_UNIT_OFFSET;
+ rd->power_unit = (1ULL << value) * MILLIWATT_PER_WATT;
+
+ value = (ra.value & TIME_UNIT_MASK) >> TIME_UNIT_OFFSET;
+ rd->time_unit = USEC_PER_SEC >> value;
+
+ pr_debug("Atom %s:%s energy=%dpJ, time=%dus, power=%duW\n",
+ rd->rp->name, rd->name, rd->energy_unit, rd->time_unit, rd->power_unit);
+
+ return 0;
+}
+
+static void set_floor_freq_atom(struct rapl_domain *rd, bool enable)
+{
+ static u32 power_ctrl_orig_val;
+ const struct rapl_defaults *defaults = rd->rp->priv->defaults;
+ u32 mdata;
+
+ if (!defaults->floor_freq_reg_addr) {
+ pr_err("Invalid floor frequency config register\n");
+ return;
+ }
+
+ if (!power_ctrl_orig_val)
+ iosf_mbi_read(BT_MBI_UNIT_PMC, MBI_CR_READ,
+ defaults->floor_freq_reg_addr,
+ &power_ctrl_orig_val);
+ mdata = power_ctrl_orig_val;
+ if (enable) {
+ mdata &= ~GENMASK(14, 8);
+ mdata |= BIT(8);
+ }
+ iosf_mbi_write(BT_MBI_UNIT_PMC, MBI_CR_WRITE,
+ defaults->floor_freq_reg_addr, mdata);
+}
+
+static u64 rapl_compute_time_window_atom(struct rapl_domain *rd, u64 value,
+ bool to_raw)
+{
+ if (to_raw)
+ return div64_u64(value, rd->time_unit);
+
+ /*
+ * Atom time unit encoding is straight forward val * time_unit,
+ * where time_unit is default to 1 sec. Never 0.
+ */
+ return value ? value * rd->time_unit : rd->time_unit;
+}
+
+/* RAPL primitives for MSR I/F */
+static struct rapl_primitive_info rpi_msr[NR_RAPL_PRIMITIVES] = {
+ /* name, mask, shift, msr index, unit divisor */
+ [POWER_LIMIT1] = PRIMITIVE_INFO_INIT(POWER_LIMIT1, POWER_LIMIT1_MASK, 0,
+ RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0),
+ [POWER_LIMIT2] = PRIMITIVE_INFO_INIT(POWER_LIMIT2, POWER_LIMIT2_MASK, 32,
+ RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0),
+ [POWER_LIMIT4] = PRIMITIVE_INFO_INIT(POWER_LIMIT4, POWER_LIMIT4_MASK, 0,
+ RAPL_DOMAIN_REG_PL4, POWER_UNIT, 0),
+ [ENERGY_COUNTER] = PRIMITIVE_INFO_INIT(ENERGY_COUNTER, ENERGY_STATUS_MASK, 0,
+ RAPL_DOMAIN_REG_STATUS, ENERGY_UNIT, 0),
+ [FW_LOCK] = PRIMITIVE_INFO_INIT(FW_LOCK, POWER_LOW_LOCK, 31,
+ RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
+ [FW_HIGH_LOCK] = PRIMITIVE_INFO_INIT(FW_LOCK, POWER_HIGH_LOCK, 63,
+ RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
+ [PL1_ENABLE] = PRIMITIVE_INFO_INIT(PL1_ENABLE, POWER_LIMIT1_ENABLE, 15,
+ RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
+ [PL1_CLAMP] = PRIMITIVE_INFO_INIT(PL1_CLAMP, POWER_LIMIT1_CLAMP, 16,
+ RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
+ [PL2_ENABLE] = PRIMITIVE_INFO_INIT(PL2_ENABLE, POWER_LIMIT2_ENABLE, 47,
+ RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
+ [PL2_CLAMP] = PRIMITIVE_INFO_INIT(PL2_CLAMP, POWER_LIMIT2_CLAMP, 48,
+ RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
+ [TIME_WINDOW1] = PRIMITIVE_INFO_INIT(TIME_WINDOW1, TIME_WINDOW1_MASK, 17,
+ RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0),
+ [TIME_WINDOW2] = PRIMITIVE_INFO_INIT(TIME_WINDOW2, TIME_WINDOW2_MASK, 49,
+ RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0),
+ [THERMAL_SPEC_POWER] = PRIMITIVE_INFO_INIT(THERMAL_SPEC_POWER,
+ POWER_INFO_THERMAL_SPEC_MASK, 0,
+ RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0),
+ [MAX_POWER] = PRIMITIVE_INFO_INIT(MAX_POWER, POWER_INFO_MAX_MASK, 32,
+ RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0),
+ [MIN_POWER] = PRIMITIVE_INFO_INIT(MIN_POWER, POWER_INFO_MIN_MASK, 16,
+ RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0),
+ [MAX_TIME_WINDOW] = PRIMITIVE_INFO_INIT(MAX_TIME_WINDOW,
+ POWER_INFO_MAX_TIME_WIN_MASK, 48,
+ RAPL_DOMAIN_REG_INFO, TIME_UNIT, 0),
+ [THROTTLED_TIME] = PRIMITIVE_INFO_INIT(THROTTLED_TIME,
+ PERF_STATUS_THROTTLE_TIME_MASK, 0,
+ RAPL_DOMAIN_REG_PERF, TIME_UNIT, 0),
+ [PRIORITY_LEVEL] = PRIMITIVE_INFO_INIT(PRIORITY_LEVEL, PP_POLICY_MASK, 0,
+ RAPL_DOMAIN_REG_POLICY, ARBITRARY_UNIT, 0),
+ [PSYS_POWER_LIMIT1] = PRIMITIVE_INFO_INIT(PSYS_POWER_LIMIT1, PSYS_POWER_LIMIT1_MASK, 0,
+ RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0),
+ [PSYS_POWER_LIMIT2] = PRIMITIVE_INFO_INIT(PSYS_POWER_LIMIT2, PSYS_POWER_LIMIT2_MASK,
+ 32, RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0),
+ [PSYS_PL1_ENABLE] = PRIMITIVE_INFO_INIT(PSYS_PL1_ENABLE, PSYS_POWER_LIMIT1_ENABLE,
+ 17, RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT,
+ 0),
+ [PSYS_PL2_ENABLE] = PRIMITIVE_INFO_INIT(PSYS_PL2_ENABLE, PSYS_POWER_LIMIT2_ENABLE,
+ 49, RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT,
+ 0),
+ [PSYS_TIME_WINDOW1] = PRIMITIVE_INFO_INIT(PSYS_TIME_WINDOW1, PSYS_TIME_WINDOW1_MASK,
+ 19, RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0),
+ [PSYS_TIME_WINDOW2] = PRIMITIVE_INFO_INIT(PSYS_TIME_WINDOW2, PSYS_TIME_WINDOW2_MASK,
+ 51, RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0),
+};
+
+static const struct rapl_defaults rapl_defaults_core = {
+ .floor_freq_reg_addr = 0,
+ .check_unit = rapl_default_check_unit,
+ .set_floor_freq = rapl_default_set_floor_freq,
+ .compute_time_window = rapl_default_compute_time_window,
+};
+
+static const struct rapl_defaults rapl_defaults_hsw_server = {
+ .check_unit = rapl_default_check_unit,
+ .set_floor_freq = rapl_default_set_floor_freq,
+ .compute_time_window = rapl_default_compute_time_window,
+ .dram_domain_energy_unit = 15300,
+};
+
+static const struct rapl_defaults rapl_defaults_spr_server = {
+ .check_unit = rapl_default_check_unit,
+ .set_floor_freq = rapl_default_set_floor_freq,
+ .compute_time_window = rapl_default_compute_time_window,
+ .psys_domain_energy_unit = NANOJOULE_PER_JOULE,
+ .spr_psys_bits = true,
+};
+
+static const struct rapl_defaults rapl_defaults_byt = {
+ .floor_freq_reg_addr = IOSF_CPU_POWER_BUDGET_CTL_BYT,
+ .check_unit = rapl_check_unit_atom,
+ .set_floor_freq = set_floor_freq_atom,
+ .compute_time_window = rapl_compute_time_window_atom,
+};
+
+static const struct rapl_defaults rapl_defaults_tng = {
+ .floor_freq_reg_addr = IOSF_CPU_POWER_BUDGET_CTL_TNG,
+ .check_unit = rapl_check_unit_atom,
+ .set_floor_freq = set_floor_freq_atom,
+ .compute_time_window = rapl_compute_time_window_atom,
+};
+
+static const struct rapl_defaults rapl_defaults_ann = {
+ .floor_freq_reg_addr = 0,
+ .check_unit = rapl_check_unit_atom,
+ .set_floor_freq = NULL,
+ .compute_time_window = rapl_compute_time_window_atom,
};
-/* List of MSR-based RAPL PMU support CPUs */
-static const struct x86_cpu_id pmu_support_ids[] = {
- X86_MATCH_VFM(INTEL_PANTHERLAKE_L, NULL),
- X86_MATCH_VFM(INTEL_WILDCATLAKE_L, NULL),
+static const struct rapl_defaults rapl_defaults_cht = {
+ .floor_freq_reg_addr = 0,
+ .check_unit = rapl_check_unit_atom,
+ .set_floor_freq = NULL,
+ .compute_time_window = rapl_compute_time_window_atom,
+};
+
+static const struct rapl_defaults rapl_defaults_amd = {
+ .check_unit = rapl_default_check_unit,
+};
+
+static const struct rapl_defaults rapl_defaults_core_pl4 = {
+ .floor_freq_reg_addr = 0,
+ .check_unit = rapl_default_check_unit,
+ .set_floor_freq = rapl_default_set_floor_freq,
+ .compute_time_window = rapl_default_compute_time_window,
+ .msr_pl4_support = 1,
+};
+
+static const struct rapl_defaults rapl_defaults_core_pl4_pmu = {
+ .floor_freq_reg_addr = 0,
+ .check_unit = rapl_default_check_unit,
+ .set_floor_freq = rapl_default_set_floor_freq,
+ .compute_time_window = rapl_default_compute_time_window,
+ .msr_pl4_support = 1,
+ .msr_pmu_support = 1,
+};
+
+static const struct x86_cpu_id rapl_ids[] = {
+ X86_MATCH_VFM(INTEL_SANDYBRIDGE, &rapl_defaults_core),
+ X86_MATCH_VFM(INTEL_SANDYBRIDGE_X, &rapl_defaults_core),
+
+ X86_MATCH_VFM(INTEL_IVYBRIDGE, &rapl_defaults_core),
+ X86_MATCH_VFM(INTEL_IVYBRIDGE_X, &rapl_defaults_core),
+
+ X86_MATCH_VFM(INTEL_HASWELL, &rapl_defaults_core),
+ X86_MATCH_VFM(INTEL_HASWELL_L, &rapl_defaults_core),
+ X86_MATCH_VFM(INTEL_HASWELL_G, &rapl_defaults_core),
+ X86_MATCH_VFM(INTEL_HASWELL_X, &rapl_defaults_hsw_server),
+
+ X86_MATCH_VFM(INTEL_BROADWELL, &rapl_defaults_core),
+ X86_MATCH_VFM(INTEL_BROADWELL_G, &rapl_defaults_core),
+ X86_MATCH_VFM(INTEL_BROADWELL_D, &rapl_defaults_core),
+ X86_MATCH_VFM(INTEL_BROADWELL_X, &rapl_defaults_hsw_server),
+
+ X86_MATCH_VFM(INTEL_SKYLAKE, &rapl_defaults_core),
+ X86_MATCH_VFM(INTEL_SKYLAKE_L, &rapl_defaults_core),
+ X86_MATCH_VFM(INTEL_SKYLAKE_X, &rapl_defaults_hsw_server),
+ X86_MATCH_VFM(INTEL_KABYLAKE_L, &rapl_defaults_core),
+ X86_MATCH_VFM(INTEL_KABYLAKE, &rapl_defaults_core),
+ X86_MATCH_VFM(INTEL_CANNONLAKE_L, &rapl_defaults_core),
+ X86_MATCH_VFM(INTEL_ICELAKE_L, &rapl_defaults_core_pl4),
+ X86_MATCH_VFM(INTEL_ICELAKE, &rapl_defaults_core),
+ X86_MATCH_VFM(INTEL_ICELAKE_NNPI, &rapl_defaults_core),
+ X86_MATCH_VFM(INTEL_ICELAKE_X, &rapl_defaults_hsw_server),
+ X86_MATCH_VFM(INTEL_ICELAKE_D, &rapl_defaults_hsw_server),
+ X86_MATCH_VFM(INTEL_COMETLAKE_L, &rapl_defaults_core),
+ X86_MATCH_VFM(INTEL_COMETLAKE, &rapl_defaults_core),
+ X86_MATCH_VFM(INTEL_TIGERLAKE_L, &rapl_defaults_core_pl4),
+ X86_MATCH_VFM(INTEL_TIGERLAKE, &rapl_defaults_core),
+ X86_MATCH_VFM(INTEL_ROCKETLAKE, &rapl_defaults_core),
+ X86_MATCH_VFM(INTEL_ALDERLAKE, &rapl_defaults_core_pl4),
+ X86_MATCH_VFM(INTEL_ALDERLAKE_L, &rapl_defaults_core_pl4),
+ X86_MATCH_VFM(INTEL_ATOM_GRACEMONT, &rapl_defaults_core_pl4),
+ X86_MATCH_VFM(INTEL_RAPTORLAKE, &rapl_defaults_core_pl4),
+ X86_MATCH_VFM(INTEL_RAPTORLAKE_P, &rapl_defaults_core_pl4),
+ X86_MATCH_VFM(INTEL_RAPTORLAKE_S, &rapl_defaults_core),
+ X86_MATCH_VFM(INTEL_BARTLETTLAKE, &rapl_defaults_core),
+ X86_MATCH_VFM(INTEL_METEORLAKE, &rapl_defaults_core_pl4),
+ X86_MATCH_VFM(INTEL_METEORLAKE_L, &rapl_defaults_core_pl4),
+ X86_MATCH_VFM(INTEL_SAPPHIRERAPIDS_X, &rapl_defaults_spr_server),
+ X86_MATCH_VFM(INTEL_EMERALDRAPIDS_X, &rapl_defaults_spr_server),
+ X86_MATCH_VFM(INTEL_LUNARLAKE_M, &rapl_defaults_core),
+ X86_MATCH_VFM(INTEL_PANTHERLAKE_L, &rapl_defaults_core_pl4_pmu),
+ X86_MATCH_VFM(INTEL_WILDCATLAKE_L, &rapl_defaults_core_pl4_pmu),
+ X86_MATCH_VFM(INTEL_NOVALAKE, &rapl_defaults_core_pl4),
+ X86_MATCH_VFM(INTEL_NOVALAKE_L, &rapl_defaults_core_pl4),
+ X86_MATCH_VFM(INTEL_ARROWLAKE_H, &rapl_defaults_core_pl4),
+ X86_MATCH_VFM(INTEL_ARROWLAKE, &rapl_defaults_core),
+ X86_MATCH_VFM(INTEL_ARROWLAKE_U, &rapl_defaults_core_pl4),
+ X86_MATCH_VFM(INTEL_LAKEFIELD, &rapl_defaults_core),
+
+ X86_MATCH_VFM(INTEL_ATOM_SILVERMONT, &rapl_defaults_byt),
+ X86_MATCH_VFM(INTEL_ATOM_AIRMONT, &rapl_defaults_cht),
+ X86_MATCH_VFM(INTEL_ATOM_SILVERMONT_MID, &rapl_defaults_tng),
+ X86_MATCH_VFM(INTEL_ATOM_SILVERMONT_MID2, &rapl_defaults_ann),
+ X86_MATCH_VFM(INTEL_ATOM_GOLDMONT, &rapl_defaults_core),
+ X86_MATCH_VFM(INTEL_ATOM_GOLDMONT_PLUS, &rapl_defaults_core),
+ X86_MATCH_VFM(INTEL_ATOM_GOLDMONT_D, &rapl_defaults_core),
+ X86_MATCH_VFM(INTEL_ATOM_TREMONT, &rapl_defaults_core),
+ X86_MATCH_VFM(INTEL_ATOM_TREMONT_D, &rapl_defaults_core),
+ X86_MATCH_VFM(INTEL_ATOM_TREMONT_L, &rapl_defaults_core),
+
+ X86_MATCH_VFM(INTEL_XEON_PHI_KNL, &rapl_defaults_hsw_server),
+ X86_MATCH_VFM(INTEL_XEON_PHI_KNM, &rapl_defaults_hsw_server),
+
+ X86_MATCH_VENDOR_FAM(AMD, 0x17, &rapl_defaults_amd),
+ X86_MATCH_VENDOR_FAM(AMD, 0x19, &rapl_defaults_amd),
+ X86_MATCH_VENDOR_FAM(AMD, 0x1A, &rapl_defaults_amd),
+ X86_MATCH_VENDOR_FAM(HYGON, 0x18, &rapl_defaults_amd),
{}
};
+MODULE_DEVICE_TABLE(x86cpu, rapl_ids);
static int rapl_msr_probe(struct platform_device *pdev)
{
- const struct x86_cpu_id *id = x86_match_cpu(pl4_support_ids);
int ret;
switch (boot_cpu_data.x86_vendor) {
@@ -204,17 +504,19 @@ static int rapl_msr_probe(struct platform_device *pdev)
}
rapl_msr_priv->read_raw = rapl_msr_read_raw;
rapl_msr_priv->write_raw = rapl_msr_write_raw;
+ rapl_msr_priv->defaults = (const struct rapl_defaults *)pdev->dev.platform_data;
+ rapl_msr_priv->rpi = rpi_msr;
- if (id) {
+ if (rapl_msr_priv->defaults->msr_pl4_support) {
rapl_msr_priv->limits[RAPL_DOMAIN_PACKAGE] |= BIT(POWER_LIMIT4);
rapl_msr_priv->regs[RAPL_DOMAIN_PACKAGE][RAPL_DOMAIN_REG_PL4].msr =
MSR_VR_CURRENT_CONFIG;
- pr_info("PL4 support detected.\n");
+ pr_info("PL4 support detected (updated).\n");
}
- if (x86_match_cpu(pmu_support_ids)) {
+ if (rapl_msr_priv->defaults->msr_pmu_support) {
rapl_msr_pmu = true;
- pr_info("MSR-based RAPL PMU support enabled\n");
+ pr_info("MSR-based RAPL PMU support enabled (updated)\n");
}
rapl_msr_priv->control_type = powercap_register_control_type(NULL, "intel-rapl", NULL);
@@ -258,8 +560,43 @@ static struct platform_driver intel_rapl_msr_driver = {
},
};
-module_platform_driver(intel_rapl_msr_driver);
+static struct platform_device *rapl_msr_platdev;
+
+static int intel_rapl_msr_init(void)
+{
+ const struct rapl_defaults *def;
+ const struct x86_cpu_id *id;
+ int ret;
+
+ ret = platform_driver_register(&intel_rapl_msr_driver);
+ if (ret)
+ return ret;
+
+ /* Create the MSR RAPL platform device for supported platforms */
+ id = x86_match_cpu(rapl_ids);
+ if (!id)
+ return 0;
+
+ def = (const struct rapl_defaults *)id->driver_data;
+
+ rapl_msr_platdev = platform_device_register_data(NULL, "intel_rapl_msr", 0, def,
+ sizeof(*def));
+ if (IS_ERR(rapl_msr_platdev))
+ pr_debug("intel_rapl_msr device register failed, ret:%ld\n",
+ PTR_ERR(rapl_msr_platdev));
+
+ return 0;
+}
+module_init(intel_rapl_msr_init);
+
+static void intel_rapl_msr_exit(void)
+{
+ platform_device_unregister(rapl_msr_platdev);
+ platform_driver_unregister(&intel_rapl_msr_driver);
+}
+module_exit(intel_rapl_msr_exit);
MODULE_DESCRIPTION("Driver for Intel RAPL (Running Average Power Limit) control via MSR interface");
MODULE_AUTHOR("Zhang Rui <rui.zhang@intel.com>");
MODULE_LICENSE("GPL v2");
+MODULE_IMPORT_NS("INTEL_RAPL");
diff --git a/drivers/powercap/intel_rapl_tpmi.c b/drivers/powercap/intel_rapl_tpmi.c
index ba956a2571d1..7f41491d9cd1 100644
--- a/drivers/powercap/intel_rapl_tpmi.c
+++ b/drivers/powercap/intel_rapl_tpmi.c
@@ -9,12 +9,14 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/auxiliary_bus.h>
+#include <linux/bits.h>
#include <linux/intel_rapl.h>
#include <linux/intel_tpmi.h>
#include <linux/intel_vsec.h>
#include <linux/io.h>
#include <linux/module.h>
#include <linux/slab.h>
+#include <linux/units.h>
#define TPMI_RAPL_MAJOR_VERSION 0
#define TPMI_RAPL_MINOR_VERSION 1
@@ -60,6 +62,58 @@ static DEFINE_MUTEX(tpmi_rapl_lock);
static struct powercap_control_type *tpmi_control_type;
+/* bitmasks for RAPL TPMI, used by primitive access functions */
+#define TPMI_POWER_LIMIT_MASK GENMASK_ULL(17, 0)
+#define TPMI_POWER_LIMIT_ENABLE BIT_ULL(62)
+#define TPMI_POWER_HIGH_LOCK BIT_ULL(63)
+#define TPMI_TIME_WINDOW_MASK GENMASK_ULL(24, 18)
+#define TPMI_INFO_SPEC_MASK GENMASK_ULL(17, 0)
+#define TPMI_INFO_MIN_MASK GENMASK_ULL(35, 18)
+#define TPMI_INFO_MAX_MASK GENMASK_ULL(53, 36)
+#define TPMI_INFO_MAX_TIME_WIN_MASK GENMASK_ULL(60, 54)
+#define TPMI_ENERGY_STATUS_MASK GENMASK(31, 0)
+#define TPMI_PERF_STATUS_THROTTLE_TIME_MASK GENMASK(31, 0)
+
+/* RAPL primitives for TPMI I/F */
+static struct rapl_primitive_info rpi_tpmi[NR_RAPL_PRIMITIVES] = {
+ /* name, mask, shift, msr index, unit divisor */
+ [POWER_LIMIT1] = PRIMITIVE_INFO_INIT(POWER_LIMIT1, TPMI_POWER_LIMIT_MASK, 0,
+ RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0),
+ [POWER_LIMIT2] = PRIMITIVE_INFO_INIT(POWER_LIMIT2, TPMI_POWER_LIMIT_MASK, 0,
+ RAPL_DOMAIN_REG_PL2, POWER_UNIT, 0),
+ [POWER_LIMIT4] = PRIMITIVE_INFO_INIT(POWER_LIMIT4, TPMI_POWER_LIMIT_MASK, 0,
+ RAPL_DOMAIN_REG_PL4, POWER_UNIT, 0),
+ [ENERGY_COUNTER] = PRIMITIVE_INFO_INIT(ENERGY_COUNTER, TPMI_ENERGY_STATUS_MASK, 0,
+ RAPL_DOMAIN_REG_STATUS, ENERGY_UNIT, 0),
+ [PL1_LOCK] = PRIMITIVE_INFO_INIT(PL1_LOCK, TPMI_POWER_HIGH_LOCK, 63,
+ RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
+ [PL2_LOCK] = PRIMITIVE_INFO_INIT(PL2_LOCK, TPMI_POWER_HIGH_LOCK, 63,
+ RAPL_DOMAIN_REG_PL2, ARBITRARY_UNIT, 0),
+ [PL4_LOCK] = PRIMITIVE_INFO_INIT(PL4_LOCK, TPMI_POWER_HIGH_LOCK, 63,
+ RAPL_DOMAIN_REG_PL4, ARBITRARY_UNIT, 0),
+ [PL1_ENABLE] = PRIMITIVE_INFO_INIT(PL1_ENABLE, TPMI_POWER_LIMIT_ENABLE, 62,
+ RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
+ [PL2_ENABLE] = PRIMITIVE_INFO_INIT(PL2_ENABLE, TPMI_POWER_LIMIT_ENABLE, 62,
+ RAPL_DOMAIN_REG_PL2, ARBITRARY_UNIT, 0),
+ [PL4_ENABLE] = PRIMITIVE_INFO_INIT(PL4_ENABLE, TPMI_POWER_LIMIT_ENABLE, 62,
+ RAPL_DOMAIN_REG_PL4, ARBITRARY_UNIT, 0),
+ [TIME_WINDOW1] = PRIMITIVE_INFO_INIT(TIME_WINDOW1, TPMI_TIME_WINDOW_MASK, 18,
+ RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0),
+ [TIME_WINDOW2] = PRIMITIVE_INFO_INIT(TIME_WINDOW2, TPMI_TIME_WINDOW_MASK, 18,
+ RAPL_DOMAIN_REG_PL2, TIME_UNIT, 0),
+ [THERMAL_SPEC_POWER] = PRIMITIVE_INFO_INIT(THERMAL_SPEC_POWER, TPMI_INFO_SPEC_MASK, 0,
+ RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0),
+ [MAX_POWER] = PRIMITIVE_INFO_INIT(MAX_POWER, TPMI_INFO_MAX_MASK, 36,
+ RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0),
+ [MIN_POWER] = PRIMITIVE_INFO_INIT(MIN_POWER, TPMI_INFO_MIN_MASK, 18,
+ RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0),
+ [MAX_TIME_WINDOW] = PRIMITIVE_INFO_INIT(MAX_TIME_WINDOW, TPMI_INFO_MAX_TIME_WIN_MASK,
+ 54, RAPL_DOMAIN_REG_INFO, TIME_UNIT, 0),
+ [THROTTLED_TIME] = PRIMITIVE_INFO_INIT(THROTTLED_TIME,
+ TPMI_PERF_STATUS_THROTTLE_TIME_MASK,
+ 0, RAPL_DOMAIN_REG_PERF, TIME_UNIT, 0),
+};
+
static int tpmi_rapl_read_raw(int id, struct reg_action *ra, bool atomic)
{
if (!ra->reg.mmio)
@@ -250,6 +304,50 @@ static int parse_one_domain(struct tpmi_rapl_package *trp, u32 offset)
return 0;
}
+/* TPMI Unit register has different layout */
+#define TPMI_ENERGY_UNIT_SCALE 1000
+#define TPMI_POWER_UNIT_OFFSET 0x00
+#define TPMI_POWER_UNIT_MASK GENMASK(3, 0)
+#define TPMI_ENERGY_UNIT_OFFSET 0x06
+#define TPMI_ENERGY_UNIT_MASK GENMASK_ULL(10, 6)
+#define TPMI_TIME_UNIT_OFFSET 0x0C
+#define TPMI_TIME_UNIT_MASK GENMASK_ULL(15, 12)
+
+static int rapl_check_unit_tpmi(struct rapl_domain *rd)
+{
+ struct reg_action ra;
+ u32 value;
+
+ ra.reg = rd->regs[RAPL_DOMAIN_REG_UNIT];
+ ra.mask = ~0;
+ if (tpmi_rapl_read_raw(rd->rp->id, &ra, false)) {
+ pr_err("Failed to read power unit REG 0x%llx on %s:%s, exit.\n",
+ ra.reg.val, rd->rp->name, rd->name);
+ return -ENODEV;
+ }
+
+ value = (ra.value & TPMI_ENERGY_UNIT_MASK) >> TPMI_ENERGY_UNIT_OFFSET;
+ rd->energy_unit = (TPMI_ENERGY_UNIT_SCALE * MICROJOULE_PER_JOULE) >> value;
+
+ value = (ra.value & TPMI_POWER_UNIT_MASK) >> TPMI_POWER_UNIT_OFFSET;
+ rd->power_unit = MICROWATT_PER_WATT >> value;
+
+ value = (ra.value & TPMI_TIME_UNIT_MASK) >> TPMI_TIME_UNIT_OFFSET;
+ rd->time_unit = USEC_PER_SEC >> value;
+
+ pr_debug("Core CPU %s:%s energy=%dpJ, time=%dus, power=%duW\n",
+ rd->rp->name, rd->name, rd->energy_unit, rd->time_unit, rd->power_unit);
+
+ return 0;
+}
+
+static const struct rapl_defaults defaults_tpmi = {
+ .check_unit = rapl_check_unit_tpmi,
+ /* Reuse existing logic, ignore the PL_CLAMP failures and enable all Power Limits */
+ .set_floor_freq = rapl_default_set_floor_freq,
+ .compute_time_window = rapl_default_compute_time_window,
+};
+
static int intel_rapl_tpmi_probe(struct auxiliary_device *auxdev,
const struct auxiliary_device_id *id)
{
@@ -297,6 +395,8 @@ static int intel_rapl_tpmi_probe(struct auxiliary_device *auxdev,
trp->priv.read_raw = tpmi_rapl_read_raw;
trp->priv.write_raw = tpmi_rapl_write_raw;
trp->priv.control_type = tpmi_control_type;
+ trp->priv.defaults = &defaults_tpmi;
+ trp->priv.rpi = rpi_tpmi;
/* RAPL TPMI I/F is per physical package */
trp->rp = rapl_find_package_domain(info->package_id, &trp->priv, false);
@@ -348,6 +448,7 @@ static struct auxiliary_driver intel_rapl_tpmi_driver = {
module_auxiliary_driver(intel_rapl_tpmi_driver)
+MODULE_IMPORT_NS("INTEL_RAPL");
MODULE_IMPORT_NS("INTEL_TPMI");
MODULE_DESCRIPTION("Intel RAPL TPMI Driver");
diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_rapl.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_rapl.c
index bf51a17c5be6..f8b9745c1b8a 100644
--- a/drivers/thermal/intel/int340x_thermal/processor_thermal_rapl.c
+++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_rapl.c
@@ -11,6 +11,77 @@
static struct rapl_if_priv rapl_mmio_priv;
+/* bitmasks for RAPL MSRs, used by primitive access functions */
+#define MMIO_ENERGY_STATUS_MASK GENMASK(31, 0)
+
+#define MMIO_POWER_LIMIT1_MASK GENMASK(14, 0)
+#define MMIO_POWER_LIMIT1_ENABLE BIT(15)
+#define MMIO_POWER_LIMIT1_CLAMP BIT(16)
+
+#define MMIO_POWER_LIMIT2_MASK GENMASK_ULL(46, 32)
+#define MMIO_POWER_LIMIT2_ENABLE BIT_ULL(47)
+#define MMIO_POWER_LIMIT2_CLAMP BIT_ULL(48)
+
+#define MMIO_POWER_LOW_LOCK BIT(31)
+#define MMIO_POWER_HIGH_LOCK BIT_ULL(63)
+
+#define MMIO_POWER_LIMIT4_MASK GENMASK(12, 0)
+
+#define MMIO_TIME_WINDOW1_MASK GENMASK_ULL(23, 17)
+#define MMIO_TIME_WINDOW2_MASK GENMASK_ULL(55, 49)
+
+#define MMIO_POWER_INFO_MAX_MASK GENMASK_ULL(46, 32)
+#define MMIO_POWER_INFO_MIN_MASK GENMASK_ULL(30, 16)
+#define MMIO_POWER_INFO_MAX_TIME_WIN_MASK GENMASK_ULL(53, 48)
+#define MMIO_POWER_INFO_THERMAL_SPEC_MASK GENMASK(14, 0)
+
+#define MMIO_PERF_STATUS_THROTTLE_TIME_MASK GENMASK(31, 0)
+#define MMIO_PP_POLICY_MASK GENMASK(4, 0)
+
+/* RAPL primitives for MMIO I/F */
+static struct rapl_primitive_info rpi_mmio[NR_RAPL_PRIMITIVES] = {
+ /* name, mask, shift, msr index, unit divisor */
+ [POWER_LIMIT1] = PRIMITIVE_INFO_INIT(POWER_LIMIT1, MMIO_POWER_LIMIT1_MASK, 0,
+ RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0),
+ [POWER_LIMIT2] = PRIMITIVE_INFO_INIT(POWER_LIMIT2, MMIO_POWER_LIMIT2_MASK, 32,
+ RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0),
+ [POWER_LIMIT4] = PRIMITIVE_INFO_INIT(POWER_LIMIT4, MMIO_POWER_LIMIT4_MASK, 0,
+ RAPL_DOMAIN_REG_PL4, POWER_UNIT, 0),
+ [ENERGY_COUNTER] = PRIMITIVE_INFO_INIT(ENERGY_COUNTER, MMIO_ENERGY_STATUS_MASK, 0,
+ RAPL_DOMAIN_REG_STATUS, ENERGY_UNIT, 0),
+ [FW_LOCK] = PRIMITIVE_INFO_INIT(FW_LOCK, MMIO_POWER_LOW_LOCK, 31,
+ RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
+ [FW_HIGH_LOCK] = PRIMITIVE_INFO_INIT(FW_LOCK, MMIO_POWER_HIGH_LOCK, 63,
+ RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
+ [PL1_ENABLE] = PRIMITIVE_INFO_INIT(PL1_ENABLE, MMIO_POWER_LIMIT1_ENABLE, 15,
+ RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
+ [PL1_CLAMP] = PRIMITIVE_INFO_INIT(PL1_CLAMP, MMIO_POWER_LIMIT1_CLAMP, 16,
+ RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
+ [PL2_ENABLE] = PRIMITIVE_INFO_INIT(PL2_ENABLE, MMIO_POWER_LIMIT2_ENABLE, 47,
+ RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
+ [PL2_CLAMP] = PRIMITIVE_INFO_INIT(PL2_CLAMP, MMIO_POWER_LIMIT2_CLAMP, 48,
+ RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
+ [TIME_WINDOW1] = PRIMITIVE_INFO_INIT(TIME_WINDOW1, MMIO_TIME_WINDOW1_MASK, 17,
+ RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0),
+ [TIME_WINDOW2] = PRIMITIVE_INFO_INIT(TIME_WINDOW2, MMIO_TIME_WINDOW2_MASK, 49,
+ RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0),
+ [THERMAL_SPEC_POWER] = PRIMITIVE_INFO_INIT(THERMAL_SPEC_POWER,
+ MMIO_POWER_INFO_THERMAL_SPEC_MASK, 0,
+ RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0),
+ [MAX_POWER] = PRIMITIVE_INFO_INIT(MAX_POWER, MMIO_POWER_INFO_MAX_MASK, 32,
+ RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0),
+ [MIN_POWER] = PRIMITIVE_INFO_INIT(MIN_POWER, MMIO_POWER_INFO_MIN_MASK, 16,
+ RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0),
+ [MAX_TIME_WINDOW] = PRIMITIVE_INFO_INIT(MAX_TIME_WINDOW,
+ MMIO_POWER_INFO_MAX_TIME_WIN_MASK, 48,
+ RAPL_DOMAIN_REG_INFO, TIME_UNIT, 0),
+ [THROTTLED_TIME] = PRIMITIVE_INFO_INIT(THROTTLED_TIME,
+ MMIO_PERF_STATUS_THROTTLE_TIME_MASK, 0,
+ RAPL_DOMAIN_REG_PERF, TIME_UNIT, 0),
+ [PRIORITY_LEVEL] = PRIMITIVE_INFO_INIT(PRIORITY_LEVEL, MMIO_PP_POLICY_MASK, 0,
+ RAPL_DOMAIN_REG_POLICY, ARBITRARY_UNIT, 0),
+};
+
static const struct rapl_mmio_regs rapl_mmio_default = {
.reg_unit = 0x5938,
.regs[RAPL_DOMAIN_PACKAGE] = { 0x59a0, 0x593c, 0x58f0, 0, 0x5930, 0x59b0},
@@ -19,6 +90,13 @@ static const struct rapl_mmio_regs rapl_mmio_default = {
.limits[RAPL_DOMAIN_DRAM] = BIT(POWER_LIMIT2),
};
+static const struct rapl_defaults rapl_defaults_mmio = {
+ .floor_freq_reg_addr = 0,
+ .check_unit = rapl_default_check_unit,
+ .set_floor_freq = rapl_default_set_floor_freq,
+ .compute_time_window = rapl_default_compute_time_window,
+};
+
static int rapl_mmio_read_raw(int cpu, struct reg_action *ra, bool atomic)
{
if (!ra->reg.mmio)
@@ -67,6 +145,8 @@ int proc_thermal_rapl_add(struct pci_dev *pdev, struct proc_thermal_device *proc
rapl_mmio_priv.read_raw = rapl_mmio_read_raw;
rapl_mmio_priv.write_raw = rapl_mmio_write_raw;
+ rapl_mmio_priv.defaults = &rapl_defaults_mmio;
+ rapl_mmio_priv.rpi = rpi_mmio;
rapl_mmio_priv.control_type = powercap_register_control_type(NULL, "intel-rapl-mmio", NULL);
if (IS_ERR(rapl_mmio_priv.control_type)) {
@@ -111,4 +191,5 @@ void proc_thermal_rapl_remove(void)
EXPORT_SYMBOL_GPL(proc_thermal_rapl_remove);
MODULE_LICENSE("GPL v2");
+MODULE_IMPORT_NS("INTEL_RAPL");
MODULE_DESCRIPTION("RAPL interface using MMIO");
diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h
index d8e405becdc3..d1f02ceec4f9 100644
--- a/include/acpi/cppc_acpi.h
+++ b/include/acpi/cppc_acpi.h
@@ -162,6 +162,7 @@ extern int cppc_set_enable(int cpu, bool enable);
extern int cppc_get_perf_caps(int cpu, struct cppc_perf_caps *caps);
extern bool cppc_perf_ctrs_in_pcc_cpu(unsigned int cpu);
extern bool cppc_perf_ctrs_in_pcc(void);
+extern u64 cppc_get_dmi_max_khz(void);
extern unsigned int cppc_perf_to_khz(struct cppc_perf_caps *caps, unsigned int perf);
extern unsigned int cppc_khz_to_perf(struct cppc_perf_caps *caps, unsigned int freq);
extern bool acpi_cpc_valid(void);
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index cc894fc38971..2ab691828e48 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -79,8 +79,9 @@ struct cpufreq_policy {
* called, but you're in IRQ context */
struct freq_constraints constraints;
- struct freq_qos_request *min_freq_req;
- struct freq_qos_request *max_freq_req;
+ struct freq_qos_request min_freq_req;
+ struct freq_qos_request max_freq_req;
+ struct freq_qos_request boost_freq_req;
struct cpufreq_frequency_table *freq_table;
enum cpufreq_table_sorting freq_table_sorted;
@@ -232,7 +233,7 @@ static inline bool policy_is_inactive(struct cpufreq_policy *policy)
static inline bool policy_is_shared(struct cpufreq_policy *policy)
{
- return cpumask_weight(policy->cpus) > 1;
+ return cpumask_nth(1, policy->cpus) < nr_cpumask_bits;
}
#ifdef CONFIG_CPU_FREQ
@@ -372,7 +373,7 @@ struct cpufreq_driver {
* conditions) scale invariance can be disabled, which causes the
* schedutil governor to fall back to the latter.
*/
- void (*adjust_perf)(unsigned int cpu,
+ void (*adjust_perf)(struct cpufreq_policy *policy,
unsigned long min_perf,
unsigned long target_perf,
unsigned long capacity);
@@ -617,7 +618,7 @@ struct cpufreq_governor {
/* Pass a target to the cpufreq driver */
unsigned int cpufreq_driver_fast_switch(struct cpufreq_policy *policy,
unsigned int target_freq);
-void cpufreq_driver_adjust_perf(unsigned int cpu,
+void cpufreq_driver_adjust_perf(struct cpufreq_policy *policy,
unsigned long min_perf,
unsigned long target_perf,
unsigned long capacity);
diff --git a/include/linux/intel_rapl.h b/include/linux/intel_rapl.h
index fa1f328d6712..328004f605c3 100644
--- a/include/linux/intel_rapl.h
+++ b/include/linux/intel_rapl.h
@@ -77,7 +77,6 @@ enum rapl_primitives {
PSYS_TIME_WINDOW1,
PSYS_TIME_WINDOW2,
/* below are not raw primitive data */
- AVERAGE_POWER,
NR_RAPL_PRIMITIVES,
};
@@ -128,6 +127,46 @@ struct reg_action {
int err;
};
+struct rapl_defaults {
+ u8 floor_freq_reg_addr;
+ int (*check_unit)(struct rapl_domain *rd);
+ void (*set_floor_freq)(struct rapl_domain *rd, bool mode);
+ u64 (*compute_time_window)(struct rapl_domain *rd, u64 val, bool to_raw);
+ unsigned int dram_domain_energy_unit;
+ unsigned int psys_domain_energy_unit;
+ bool spr_psys_bits;
+ bool msr_pl4_support;
+ bool msr_pmu_support;
+};
+
+#define PRIMITIVE_INFO_INIT(p, m, s, i, u, f) { \
+ .name = #p, \
+ .mask = m, \
+ .shift = s, \
+ .id = i, \
+ .unit = u, \
+ .flag = f \
+ }
+
+enum unit_type {
+ ARBITRARY_UNIT, /* no translation */
+ POWER_UNIT,
+ ENERGY_UNIT,
+ TIME_UNIT,
+};
+
+/* per domain data. used to describe individual knobs such that access function
+ * can be consolidated into one instead of many inline functions.
+ */
+struct rapl_primitive_info {
+ const char *name;
+ u64 mask;
+ int shift;
+ enum rapl_domain_reg_id id;
+ enum unit_type unit;
+ u32 flag;
+};
+
/**
* struct rapl_if_priv: private data for different RAPL interfaces
* @control_type: Each RAPL interface must have its own powercap
@@ -142,8 +181,8 @@ struct reg_action {
* registers.
* @write_raw: Callback for writing RAPL interface specific
* registers.
- * @defaults: internal pointer to interface default settings
- * @rpi: internal pointer to interface primitive info
+ * @defaults: pointer to default settings
+ * @rpi: pointer to interface primitive info
*/
struct rapl_if_priv {
enum rapl_if_type type;
@@ -154,8 +193,8 @@ struct rapl_if_priv {
int limits[RAPL_DOMAIN_MAX];
int (*read_raw)(int id, struct reg_action *ra, bool pmu_ctx);
int (*write_raw)(int id, struct reg_action *ra);
- void *defaults;
- void *rpi;
+ const struct rapl_defaults *defaults;
+ struct rapl_primitive_info *rpi;
};
#ifdef CONFIG_PERF_EVENTS
@@ -211,6 +250,9 @@ void rapl_remove_package_cpuslocked(struct rapl_package *rp);
struct rapl_package *rapl_find_package_domain(int id, struct rapl_if_priv *priv, bool id_is_cpu);
struct rapl_package *rapl_add_package(int id, struct rapl_if_priv *priv, bool id_is_cpu);
void rapl_remove_package(struct rapl_package *rp);
+int rapl_default_check_unit(struct rapl_domain *rd);
+void rapl_default_set_floor_freq(struct rapl_domain *rd, bool mode);
+u64 rapl_default_compute_time_window(struct rapl_domain *rd, u64 value, bool to_raw);
#ifdef CONFIG_PERF_EVENTS
int rapl_package_add_pmu(struct rapl_package *rp);
diff --git a/include/linux/powercap.h b/include/linux/powercap.h
index 3d557bbcd2c7..603419db924c 100644
--- a/include/linux/powercap.h
+++ b/include/linux/powercap.h
@@ -238,7 +238,7 @@ static inline void *powercap_get_zone_data(struct powercap_zone *power_zone)
* Advantage of this parameter is that client can embed
* this data in its data structures and allocate in a
* single call, preventing multiple allocations.
-* @control_type_name: The Name of this control_type, which will be shown
+* @name: The Name of this control_type, which will be shown
* in the sysfs Interface.
* @ops: Callbacks for control type. This parameter is optional.
*
@@ -277,7 +277,7 @@ int powercap_unregister_control_type(struct powercap_control_type *instance);
* @name: A name for this zone.
* @parent: A pointer to the parent power zone instance if any or NULL
* @ops: Pointer to zone operation callback structure.
-* @no_constraints: Number of constraints for this zone
+* @nr_constraints: Number of constraints for this zone
* @const_ops: Pointer to constraint callback structure
*
* Register a power zone under a given control type. A power zone must register
diff --git a/include/linux/units.h b/include/linux/units.h
index 80d57c50b9e3..c6d78988613a 100644
--- a/include/linux/units.h
+++ b/include/linux/units.h
@@ -57,6 +57,9 @@
#define MICROWATT_PER_MILLIWATT 1000UL
#define MICROWATT_PER_WATT 1000000UL
+#define MICROJOULE_PER_JOULE 1000000UL
+#define NANOJOULE_PER_JOULE 1000000000UL
+
#define BYTES_PER_KBIT (KILO / BITS_PER_BYTE)
#define BYTES_PER_MBIT (MEGA / BITS_PER_BYTE)
#define BYTES_PER_GBIT (GIGA / BITS_PER_BYTE)
diff --git a/kernel/power/user.c b/kernel/power/user.c
index 4401cfe26e5c..be77f3556bd7 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -322,11 +322,14 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,
error = snapshot_write_finalize(&data->handle);
if (error)
break;
- if (data->mode != O_WRONLY || !data->frozen ||
- !snapshot_image_loaded(&data->handle)) {
+ if (data->mode != O_WRONLY || !data->frozen) {
error = -EPERM;
break;
}
+ if (!snapshot_image_loaded(&data->handle)) {
+ error = -ENODATA;
+ break;
+ }
error = hibernation_restore(data->platform_support);
break;
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 153232dd8276..ae9fd211cec1 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -461,6 +461,7 @@ static void sugov_update_single_perf(struct update_util_data *hook, u64 time,
unsigned int flags)
{
struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util);
+ struct sugov_policy *sg_policy = sg_cpu->sg_policy;
unsigned long prev_util = sg_cpu->util;
unsigned long max_cap;
@@ -482,10 +483,10 @@ static void sugov_update_single_perf(struct update_util_data *hook, u64 time,
if (sugov_hold_freq(sg_cpu) && sg_cpu->util < prev_util)
sg_cpu->util = prev_util;
- cpufreq_driver_adjust_perf(sg_cpu->cpu, sg_cpu->bw_min,
+ cpufreq_driver_adjust_perf(sg_policy->policy, sg_cpu->bw_min,
sg_cpu->util, max_cap);
- sg_cpu->sg_policy->last_freq_update_time = time;
+ sg_policy->last_freq_update_time = time;
}
static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time)
diff --git a/rust/kernel/cpufreq.rs b/rust/kernel/cpufreq.rs
index f5adee48d40c..d8d26870bea2 100644
--- a/rust/kernel/cpufreq.rs
+++ b/rust/kernel/cpufreq.rs
@@ -1257,18 +1257,17 @@ impl<T: Driver> Registration<T> {
/// # Safety
///
/// - This function may only be called from the cpufreq C infrastructure.
+ /// - The pointer arguments must be valid pointers.
unsafe extern "C" fn adjust_perf_callback(
- cpu: c_uint,
+ ptr: *mut bindings::cpufreq_policy,
min_perf: c_ulong,
target_perf: c_ulong,
capacity: c_ulong,
) {
- // SAFETY: The C API guarantees that `cpu` refers to a valid CPU number.
- let cpu_id = unsafe { CpuId::from_u32_unchecked(cpu) };
-
- if let Ok(mut policy) = PolicyCpu::from_cpu(cpu_id) {
- T::adjust_perf(&mut policy, min_perf, target_perf, capacity);
- }
+ // SAFETY: The `ptr` is guaranteed to be valid by the contract with the C code for the
+ // lifetime of `policy`.
+ let policy = unsafe { Policy::from_raw_mut(ptr) };
+ T::adjust_perf(policy, min_perf, target_perf, capacity);
}
/// Driver's `get_intermediate` callback.
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index dbe104df339b..86d17b195e79 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -415,7 +415,7 @@
*/
#define X86_FEATURE_OVERFLOW_RECOV (17*32+ 0) /* "overflow_recov" MCA overflow recovery support */
#define X86_FEATURE_SUCCOR (17*32+ 1) /* "succor" Uncorrectable error containment and recovery */
-
+#define X86_FEATURE_CPPC_PERF_PRIO (17*32+ 2) /* CPPC Floor Perf support */
#define X86_FEATURE_SMCA (17*32+ 3) /* "smca" Scalable MCA */
/* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */
diff --git a/tools/power/cpupower/man/cpupower-frequency-info.1 b/tools/power/cpupower/man/cpupower-frequency-info.1
index 47fdd7218748..b0d69c9adcbd 100644
--- a/tools/power/cpupower/man/cpupower-frequency-info.1
+++ b/tools/power/cpupower/man/cpupower-frequency-info.1
@@ -32,6 +32,12 @@ Gets the currently used cpufreq policy.
\fB\-g\fR \fB\-\-governors\fR
Determines available cpufreq governors.
.TP
+\fB\-b\fR \fB\-\-boost\fR
+Gets the current boost state support.
+.TP
+\fB\-z\fR \fB\-\-epp\fR
+Gets the current EPP (energy performance preference).
+.TP
\fB\-r\fR \fB\-\-related\-cpus\fR
Determines which CPUs run at the same hardware frequency.
.TP
@@ -53,7 +59,7 @@ human\-readable output for the \-f, \-w, \-s and \-y parameters.
\fB\-n\fR \fB\-\-no-rounding\fR
Output frequencies and latencies without rounding off values.
.TP
-\fB\-c\fR \fB\-\-perf\fR
+\fB\-c\fR \fB\-\-performance\fR
Get performances and frequencies capabilities of CPPC, by reading it from hardware (only available on the hardware with CPPC).
.TP
.SH "REMARKS"
diff --git a/tools/power/cpupower/man/cpupower-idle-info.1 b/tools/power/cpupower/man/cpupower-idle-info.1
index 20b6345c53ad..b2f92aba5f5b 100644
--- a/tools/power/cpupower/man/cpupower-idle-info.1
+++ b/tools/power/cpupower/man/cpupower-idle-info.1
@@ -11,10 +11,10 @@ A tool which prints out per cpu idle information helpful to developers and inter
.SH "OPTIONS"
.LP
.TP
-\fB\-f\fR \fB\-\-silent\fR
+\fB\-s\fR \fB\-\-silent\fR
Only print a summary of all available C-states in the system.
.TP
-\fB\-e\fR \fB\-\-proc\fR
+\fB\-o\fR \fB\-\-proc\fR
deprecated.
Prints out idle information in old /proc/acpi/processor/*/power format. This
interface has been removed from the kernel for quite some time, do not let
diff --git a/tools/power/cpupower/man/cpupower-info.1 b/tools/power/cpupower/man/cpupower-info.1
index 340bcd0be7de..1f42d8c388a0 100644
--- a/tools/power/cpupower/man/cpupower-info.1
+++ b/tools/power/cpupower/man/cpupower-info.1
@@ -3,7 +3,7 @@
cpupower\-info \- Shows processor power related kernel or hardware configurations
.SH SYNOPSIS
.ft B
-.B cpupower info [ \-b ]
+.B cpupower info [\fIoptions\fP]
.SH DESCRIPTION
\fBcpupower info \fP shows kernel configurations or processor hardware
@@ -13,6 +13,13 @@ Some options are platform wide, some affect single cores. By default values
of core zero are displayed only. cpupower --cpu all cpuinfo will show the
settings of all cores, see cpupower(1) how to choose specific cores.
+.SH "OPTIONS"
+.LP
+.TP
+\fB\-b\fR \fB\-\-perf-bias\fR
+Gets the current performance bias value.
+.TP
+
.SH "SEE ALSO"
Options are described in detail in:
diff --git a/tools/power/cpupower/utils/cpufreq-info.c b/tools/power/cpupower/utils/cpufreq-info.c
index 5fe01e516817..5a242b491a9d 100644
--- a/tools/power/cpupower/utils/cpufreq-info.c
+++ b/tools/power/cpupower/utils/cpufreq-info.c
@@ -542,8 +542,6 @@ static struct option info_opts[] = {
int cmd_freq_info(int argc, char **argv)
{
- extern char *optarg;
- extern int optind, opterr, optopt;
int ret = 0, cont = 1;
unsigned int cpu = 0;
unsigned int human = 0;
diff --git a/tools/power/cpupower/utils/cpufreq-set.c b/tools/power/cpupower/utils/cpufreq-set.c
index c5e60a39cfa6..06cd4b280132 100644
--- a/tools/power/cpupower/utils/cpufreq-set.c
+++ b/tools/power/cpupower/utils/cpufreq-set.c
@@ -195,8 +195,6 @@ static int do_one_cpu(unsigned int cpu, struct cpufreq_policy *new_pol,
int cmd_freq_set(int argc, char **argv)
{
- extern char *optarg;
- extern int optind, opterr, optopt;
int ret = 0, cont = 1;
int double_parm = 0, related = 0, policychange = 0;
unsigned long freq = 0;
diff --git a/tools/power/cpupower/utils/cpuidle-info.c b/tools/power/cpupower/utils/cpuidle-info.c
index 81b4763a97d6..ccb37125bd37 100644
--- a/tools/power/cpupower/utils/cpuidle-info.c
+++ b/tools/power/cpupower/utils/cpuidle-info.c
@@ -139,8 +139,6 @@ static inline void cpuidle_exit(int fail)
int cmd_idle_info(int argc, char **argv)
{
- extern char *optarg;
- extern int optind, opterr, optopt;
int ret = 0, cont = 1, output_param = 0, verbose = 1;
unsigned int cpu = 0;
diff --git a/tools/power/cpupower/utils/cpuidle-set.c b/tools/power/cpupower/utils/cpuidle-set.c
index a551d1d4ac51..703094f1343c 100644
--- a/tools/power/cpupower/utils/cpuidle-set.c
+++ b/tools/power/cpupower/utils/cpuidle-set.c
@@ -24,8 +24,6 @@ static struct option info_opts[] = {
int cmd_idle_set(int argc, char **argv)
{
- extern char *optarg;
- extern int optind, opterr, optopt;
int ret = 0, cont = 1, param = 0, disabled;
unsigned long long latency = 0, state_latency;
unsigned int cpu = 0, idlestate = 0, idlestates = 0;
diff --git a/tools/power/cpupower/utils/cpupower-info.c b/tools/power/cpupower/utils/cpupower-info.c
index 18fd7751f509..79154d71e498 100644
--- a/tools/power/cpupower/utils/cpupower-info.c
+++ b/tools/power/cpupower/utils/cpupower-info.c
@@ -28,8 +28,6 @@ static void print_wrong_arg_exit(void)
int cmd_info(int argc, char **argv)
{
- extern char *optarg;
- extern int optind, opterr, optopt;
unsigned int cpu;
struct utsname uts;
diff --git a/tools/power/cpupower/utils/cpupower-set.c b/tools/power/cpupower/utils/cpupower-set.c
index 550a942e72ce..c2176b9fa57d 100644
--- a/tools/power/cpupower/utils/cpupower-set.c
+++ b/tools/power/cpupower/utils/cpupower-set.c
@@ -33,8 +33,6 @@ static void print_wrong_arg_exit(void)
int cmd_set(int argc, char **argv)
{
- extern char *optarg;
- extern int optind, opterr, optopt;
unsigned int cpu;
struct utsname uts;