summaryrefslogtreecommitdiff
path: root/drivers/cpufreq
diff options
context:
space:
mode:
authorRafael J. Wysocki <rafael.j.wysocki@intel.com>2024-09-05 14:04:20 +0300
committerRafael J. Wysocki <rafael.j.wysocki@intel.com>2024-09-05 14:04:20 +0300
commit287f97a1510b951f01124b7c3d783684cee6d11e (patch)
treefcfde23709244b0bddea92a72509aebf6d062c32 /drivers/cpufreq
parentb02d2cf5b220872cd10afe610348b9ec41b9ac05 (diff)
parent929ebc93ccaa8d183a2ba9f1cf769d1bfb847cca (diff)
downloadlinux-287f97a1510b951f01124b7c3d783684cee6d11e.tar.xz
Merge back cpufreq material for 6.12
Diffstat (limited to 'drivers/cpufreq')
-rw-r--r--drivers/cpufreq/cpufreq.c27
-rw-r--r--drivers/cpufreq/intel_pstate.c240
-rw-r--r--drivers/cpufreq/maple-cpufreq.c1
-rw-r--r--drivers/cpufreq/pasemi-cpufreq.c1
-rw-r--r--drivers/cpufreq/pmac64-cpufreq.c1
-rw-r--r--drivers/cpufreq/powernv-cpufreq.c1
-rw-r--r--drivers/cpufreq/ppc_cbe_cpufreq.c1
7 files changed, 245 insertions, 27 deletions
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 04fc786dd2c0..f98c9438760c 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -575,30 +575,11 @@ unsigned int cpufreq_policy_transition_delay_us(struct cpufreq_policy *policy)
return policy->transition_delay_us;
latency = policy->cpuinfo.transition_latency / NSEC_PER_USEC;
- if (latency) {
- unsigned int max_delay_us = 2 * MSEC_PER_SEC;
+ if (latency)
+ /* Give a 50% breathing room between updates */
+ return latency + (latency >> 1);
- /*
- * If the platform already has high transition_latency, use it
- * as-is.
- */
- if (latency > max_delay_us)
- return latency;
-
- /*
- * For platforms that can change the frequency very fast (< 2
- * us), the above formula gives a decent transition delay. But
- * for platforms where transition_latency is in milliseconds, it
- * ends up giving unrealistic values.
- *
- * Cap the default transition delay to 2 ms, which seems to be
- * a reasonable amount of time after which we should reevaluate
- * the frequency.
- */
- return min(latency * LATENCY_MULTIPLIER, max_delay_us);
- }
-
- return LATENCY_MULTIPLIER;
+ return USEC_PER_MSEC;
}
EXPORT_SYMBOL_GPL(cpufreq_policy_transition_delay_us);
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index c0278d023cfc..aaea9a39eced 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -16,6 +16,7 @@
#include <linux/tick.h>
#include <linux/slab.h>
#include <linux/sched/cpufreq.h>
+#include <linux/sched/smt.h>
#include <linux/list.h>
#include <linux/cpu.h>
#include <linux/cpufreq.h>
@@ -215,6 +216,7 @@ struct global_params {
* @hwp_req_cached: Cached value of the last HWP Request MSR
* @hwp_cap_cached: Cached value of the last HWP Capabilities MSR
* @last_io_update: Last time when IO wake flag was set
+ * @capacity_perf: Highest perf used for scale invariance
* @sched_flags: Store scheduler flags for possible cross CPU update
* @hwp_boost_min: Last HWP boosted min performance
* @suspended: Whether or not the driver has been suspended.
@@ -253,6 +255,7 @@ struct cpudata {
u64 hwp_req_cached;
u64 hwp_cap_cached;
u64 last_io_update;
+ unsigned int capacity_perf;
unsigned int sched_flags;
u32 hwp_boost_min;
bool suspended;
@@ -295,6 +298,7 @@ static int hwp_mode_bdw __ro_after_init;
static bool per_cpu_limits __ro_after_init;
static bool hwp_forced __ro_after_init;
static bool hwp_boost __read_mostly;
+static bool hwp_is_hybrid;
static struct cpufreq_driver *intel_pstate_driver __read_mostly;
@@ -934,6 +938,139 @@ static struct freq_attr *hwp_cpufreq_attrs[] = {
NULL,
};
+static struct cpudata *hybrid_max_perf_cpu __read_mostly;
+/*
+ * Protects hybrid_max_perf_cpu, the capacity_perf fields in struct cpudata,
+ * and the x86 arch scale-invariance information from concurrent updates.
+ */
+static DEFINE_MUTEX(hybrid_capacity_lock);
+
+static void hybrid_set_cpu_capacity(struct cpudata *cpu)
+{
+ arch_set_cpu_capacity(cpu->cpu, cpu->capacity_perf,
+ hybrid_max_perf_cpu->capacity_perf,
+ cpu->capacity_perf,
+ cpu->pstate.max_pstate_physical);
+
+ pr_debug("CPU%d: perf = %u, max. perf = %u, base perf = %d\n", cpu->cpu,
+ cpu->capacity_perf, hybrid_max_perf_cpu->capacity_perf,
+ cpu->pstate.max_pstate_physical);
+}
+
+static void hybrid_clear_cpu_capacity(unsigned int cpunum)
+{
+ arch_set_cpu_capacity(cpunum, 1, 1, 1, 1);
+}
+
+static void hybrid_get_capacity_perf(struct cpudata *cpu)
+{
+ if (READ_ONCE(global.no_turbo)) {
+ cpu->capacity_perf = cpu->pstate.max_pstate_physical;
+ return;
+ }
+
+ cpu->capacity_perf = HWP_HIGHEST_PERF(READ_ONCE(cpu->hwp_cap_cached));
+}
+
+static void hybrid_set_capacity_of_cpus(void)
+{
+ int cpunum;
+
+ for_each_online_cpu(cpunum) {
+ struct cpudata *cpu = all_cpu_data[cpunum];
+
+ if (cpu)
+ hybrid_set_cpu_capacity(cpu);
+ }
+}
+
+static void hybrid_update_cpu_capacity_scaling(void)
+{
+ struct cpudata *max_perf_cpu = NULL;
+ unsigned int max_cap_perf = 0;
+ int cpunum;
+
+ for_each_online_cpu(cpunum) {
+ struct cpudata *cpu = all_cpu_data[cpunum];
+
+ if (!cpu)
+ continue;
+
+ /*
+ * During initialization, CPU performance at full capacity needs
+ * to be determined.
+ */
+ if (!hybrid_max_perf_cpu)
+ hybrid_get_capacity_perf(cpu);
+
+ /*
+ * If hybrid_max_perf_cpu is not NULL at this point, it is
+ * being replaced, so don't take it into account when looking
+ * for the new one.
+ */
+ if (cpu == hybrid_max_perf_cpu)
+ continue;
+
+ if (cpu->capacity_perf > max_cap_perf) {
+ max_cap_perf = cpu->capacity_perf;
+ max_perf_cpu = cpu;
+ }
+ }
+
+ if (max_perf_cpu) {
+ hybrid_max_perf_cpu = max_perf_cpu;
+ hybrid_set_capacity_of_cpus();
+ } else {
+ pr_info("Found no CPUs with nonzero maximum performance\n");
+ /* Revert to the flat CPU capacity structure. */
+ for_each_online_cpu(cpunum)
+ hybrid_clear_cpu_capacity(cpunum);
+ }
+}
+
+static void __hybrid_init_cpu_capacity_scaling(void)
+{
+ hybrid_max_perf_cpu = NULL;
+ hybrid_update_cpu_capacity_scaling();
+}
+
+static void hybrid_init_cpu_capacity_scaling(void)
+{
+ bool disable_itmt = false;
+
+ mutex_lock(&hybrid_capacity_lock);
+
+ /*
+ * If hybrid_max_perf_cpu is set at this point, the hybrid CPU capacity
+ * scaling has been enabled already and the driver is just changing the
+ * operation mode.
+ */
+ if (hybrid_max_perf_cpu) {
+ __hybrid_init_cpu_capacity_scaling();
+ goto unlock;
+ }
+
+ /*
+ * On hybrid systems, use asym capacity instead of ITMT, but because
+ * the capacity of SMT threads is not deterministic even approximately,
+ * do not do that when SMT is in use.
+ */
+ if (hwp_is_hybrid && !sched_smt_active() && arch_enable_hybrid_capacity_scale()) {
+ __hybrid_init_cpu_capacity_scaling();
+ disable_itmt = true;
+ }
+
+unlock:
+ mutex_unlock(&hybrid_capacity_lock);
+
+ /*
+ * Disabling ITMT causes sched domains to be rebuilt to disable asym
+ * packing and enable asym capacity.
+ */
+ if (disable_itmt)
+ sched_clear_itmt_support();
+}
+
static void __intel_pstate_get_hwp_cap(struct cpudata *cpu)
{
u64 cap;
@@ -962,6 +1099,43 @@ static void intel_pstate_get_hwp_cap(struct cpudata *cpu)
}
}
+static void hybrid_update_capacity(struct cpudata *cpu)
+{
+ unsigned int max_cap_perf;
+
+ mutex_lock(&hybrid_capacity_lock);
+
+ if (!hybrid_max_perf_cpu)
+ goto unlock;
+
+ /*
+ * The maximum performance of the CPU may have changed, but assume
+ * that the performance of the other CPUs has not changed.
+ */
+ max_cap_perf = hybrid_max_perf_cpu->capacity_perf;
+
+ intel_pstate_get_hwp_cap(cpu);
+
+ hybrid_get_capacity_perf(cpu);
+ /* Should hybrid_max_perf_cpu be replaced by this CPU? */
+ if (cpu->capacity_perf > max_cap_perf) {
+ hybrid_max_perf_cpu = cpu;
+ hybrid_set_capacity_of_cpus();
+ goto unlock;
+ }
+
+ /* If this CPU is hybrid_max_perf_cpu, should it be replaced? */
+ if (cpu == hybrid_max_perf_cpu && cpu->capacity_perf < max_cap_perf) {
+ hybrid_update_cpu_capacity_scaling();
+ goto unlock;
+ }
+
+ hybrid_set_cpu_capacity(cpu);
+
+unlock:
+ mutex_unlock(&hybrid_capacity_lock);
+}
+
static void intel_pstate_hwp_set(unsigned int cpu)
{
struct cpudata *cpu_data = all_cpu_data[cpu];
@@ -1070,6 +1244,22 @@ static void intel_pstate_hwp_offline(struct cpudata *cpu)
value |= HWP_ENERGY_PERF_PREFERENCE(HWP_EPP_POWERSAVE);
wrmsrl_on_cpu(cpu->cpu, MSR_HWP_REQUEST, value);
+
+ mutex_lock(&hybrid_capacity_lock);
+
+ if (!hybrid_max_perf_cpu) {
+ mutex_unlock(&hybrid_capacity_lock);
+
+ return;
+ }
+
+ if (hybrid_max_perf_cpu == cpu)
+ hybrid_update_cpu_capacity_scaling();
+
+ mutex_unlock(&hybrid_capacity_lock);
+
+ /* Reset the capacity of the CPU going offline to the initial value. */
+ hybrid_clear_cpu_capacity(cpu->cpu);
}
#define POWER_CTL_EE_ENABLE 1
@@ -1165,21 +1355,46 @@ static void __intel_pstate_update_max_freq(struct cpudata *cpudata,
static void intel_pstate_update_limits(unsigned int cpu)
{
struct cpufreq_policy *policy = cpufreq_cpu_acquire(cpu);
+ struct cpudata *cpudata;
if (!policy)
return;
- __intel_pstate_update_max_freq(all_cpu_data[cpu], policy);
+ cpudata = all_cpu_data[cpu];
+
+ __intel_pstate_update_max_freq(cpudata, policy);
+
+ /* Prevent the driver from being unregistered now. */
+ mutex_lock(&intel_pstate_driver_lock);
cpufreq_cpu_release(policy);
+
+ hybrid_update_capacity(cpudata);
+
+ mutex_unlock(&intel_pstate_driver_lock);
}
static void intel_pstate_update_limits_for_all(void)
{
int cpu;
- for_each_possible_cpu(cpu)
- intel_pstate_update_limits(cpu);
+ for_each_possible_cpu(cpu) {
+ struct cpufreq_policy *policy = cpufreq_cpu_acquire(cpu);
+
+ if (!policy)
+ continue;
+
+ __intel_pstate_update_max_freq(all_cpu_data[cpu], policy);
+
+ cpufreq_cpu_release(policy);
+ }
+
+ mutex_lock(&hybrid_capacity_lock);
+
+ if (hybrid_max_perf_cpu)
+ __hybrid_init_cpu_capacity_scaling();
+
+ mutex_unlock(&hybrid_capacity_lock);
}
/************************** sysfs begin ************************/
@@ -1618,6 +1833,13 @@ static void intel_pstate_notify_work(struct work_struct *work)
__intel_pstate_update_max_freq(cpudata, policy);
cpufreq_cpu_release(policy);
+
+ /*
+ * The driver will not be unregistered while this function is
+ * running, so update the capacity without acquiring the driver
+ * lock.
+ */
+ hybrid_update_capacity(cpudata);
}
wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_STATUS, 0);
@@ -2034,8 +2256,10 @@ static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)
if (pstate_funcs.get_cpu_scaling) {
cpu->pstate.scaling = pstate_funcs.get_cpu_scaling(cpu->cpu);
- if (cpu->pstate.scaling != perf_ctl_scaling)
+ if (cpu->pstate.scaling != perf_ctl_scaling) {
intel_pstate_hybrid_hwp_adjust(cpu);
+ hwp_is_hybrid = true;
+ }
} else {
cpu->pstate.scaling = perf_ctl_scaling;
}
@@ -2425,6 +2649,10 @@ static const struct x86_cpu_id intel_pstate_cpu_oob_ids[] __initconst = {
X86_MATCH(INTEL_ICELAKE_X, core_funcs),
X86_MATCH(INTEL_SAPPHIRERAPIDS_X, core_funcs),
X86_MATCH(INTEL_EMERALDRAPIDS_X, core_funcs),
+ X86_MATCH(INTEL_GRANITERAPIDS_D, core_funcs),
+ X86_MATCH(INTEL_GRANITERAPIDS_X, core_funcs),
+ X86_MATCH(INTEL_ATOM_CRESTMONT, core_funcs),
+ X86_MATCH(INTEL_ATOM_CRESTMONT_X, core_funcs),
{}
};
#endif
@@ -2703,6 +2931,8 @@ static int intel_pstate_cpu_online(struct cpufreq_policy *policy)
*/
intel_pstate_hwp_reenable(cpu);
cpu->suspended = false;
+
+ hybrid_update_capacity(cpu);
}
return 0;
@@ -3143,6 +3373,8 @@ static int intel_pstate_register_driver(struct cpufreq_driver *driver)
global.min_perf_pct = min_perf_pct_min();
+ hybrid_init_cpu_capacity_scaling();
+
return 0;
}
diff --git a/drivers/cpufreq/maple-cpufreq.c b/drivers/cpufreq/maple-cpufreq.c
index f9306410a07f..690da85c4865 100644
--- a/drivers/cpufreq/maple-cpufreq.c
+++ b/drivers/cpufreq/maple-cpufreq.c
@@ -238,4 +238,5 @@ bail_noprops:
module_init(maple_cpufreq_init);
+MODULE_DESCRIPTION("cpufreq driver for Maple 970FX/970MP boards");
MODULE_LICENSE("GPL");
diff --git a/drivers/cpufreq/pasemi-cpufreq.c b/drivers/cpufreq/pasemi-cpufreq.c
index ee925b53b6b9..5fc9cb480516 100644
--- a/drivers/cpufreq/pasemi-cpufreq.c
+++ b/drivers/cpufreq/pasemi-cpufreq.c
@@ -269,5 +269,6 @@ static void __exit pas_cpufreq_exit(void)
module_init(pas_cpufreq_init);
module_exit(pas_cpufreq_exit);
+MODULE_DESCRIPTION("cpufreq driver for PA Semi PWRficient");
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Egor Martovetsky <egor@pasemi.com>, Olof Johansson <olof@lixom.net>");
diff --git a/drivers/cpufreq/pmac64-cpufreq.c b/drivers/cpufreq/pmac64-cpufreq.c
index 2cd2b06849a2..9d3fe36075f8 100644
--- a/drivers/cpufreq/pmac64-cpufreq.c
+++ b/drivers/cpufreq/pmac64-cpufreq.c
@@ -671,4 +671,5 @@ static int __init g5_cpufreq_init(void)
module_init(g5_cpufreq_init);
+MODULE_DESCRIPTION("cpufreq driver for SMU & 970FX based G5 Macs");
MODULE_LICENSE("GPL");
diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c
index 50c62929f7ca..bc55723b4d87 100644
--- a/drivers/cpufreq/powernv-cpufreq.c
+++ b/drivers/cpufreq/powernv-cpufreq.c
@@ -1160,5 +1160,6 @@ static void __exit powernv_cpufreq_exit(void)
}
module_exit(powernv_cpufreq_exit);
+MODULE_DESCRIPTION("cpufreq driver for IBM/OpenPOWER powernv systems");
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Vaidyanathan Srinivasan <svaidy at linux.vnet.ibm.com>");
diff --git a/drivers/cpufreq/ppc_cbe_cpufreq.c b/drivers/cpufreq/ppc_cbe_cpufreq.c
index 5ee4c7bfdcc5..98595b3ea13f 100644
--- a/drivers/cpufreq/ppc_cbe_cpufreq.c
+++ b/drivers/cpufreq/ppc_cbe_cpufreq.c
@@ -168,5 +168,6 @@ static void __exit cbe_cpufreq_exit(void)
module_init(cbe_cpufreq_init);
module_exit(cbe_cpufreq_exit);
+MODULE_DESCRIPTION("cpufreq driver for Cell BE processors");
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Christian Krafft <krafft@de.ibm.com>");