diff options
author | Rafael J. Wysocki <rafael.j.wysocki@intel.com> | 2024-09-05 14:04:20 +0300 |
---|---|---|
committer | Rafael J. Wysocki <rafael.j.wysocki@intel.com> | 2024-09-05 14:04:20 +0300 |
commit | 287f97a1510b951f01124b7c3d783684cee6d11e (patch) | |
tree | fcfde23709244b0bddea92a72509aebf6d062c32 /drivers/cpufreq | |
parent | b02d2cf5b220872cd10afe610348b9ec41b9ac05 (diff) | |
parent | 929ebc93ccaa8d183a2ba9f1cf769d1bfb847cca (diff) | |
download | linux-287f97a1510b951f01124b7c3d783684cee6d11e.tar.xz |
Merge back cpufreq material for 6.12
Diffstat (limited to 'drivers/cpufreq')
-rw-r--r-- | drivers/cpufreq/cpufreq.c | 27 | ||||
-rw-r--r-- | drivers/cpufreq/intel_pstate.c | 240 | ||||
-rw-r--r-- | drivers/cpufreq/maple-cpufreq.c | 1 | ||||
-rw-r--r-- | drivers/cpufreq/pasemi-cpufreq.c | 1 | ||||
-rw-r--r-- | drivers/cpufreq/pmac64-cpufreq.c | 1 | ||||
-rw-r--r-- | drivers/cpufreq/powernv-cpufreq.c | 1 | ||||
-rw-r--r-- | drivers/cpufreq/ppc_cbe_cpufreq.c | 1 |
7 files changed, 245 insertions, 27 deletions
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 04fc786dd2c0..f98c9438760c 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -575,30 +575,11 @@ unsigned int cpufreq_policy_transition_delay_us(struct cpufreq_policy *policy) return policy->transition_delay_us; latency = policy->cpuinfo.transition_latency / NSEC_PER_USEC; - if (latency) { - unsigned int max_delay_us = 2 * MSEC_PER_SEC; + if (latency) + /* Give a 50% breathing room between updates */ + return latency + (latency >> 1); - /* - * If the platform already has high transition_latency, use it - * as-is. - */ - if (latency > max_delay_us) - return latency; - - /* - * For platforms that can change the frequency very fast (< 2 - * us), the above formula gives a decent transition delay. But - * for platforms where transition_latency is in milliseconds, it - * ends up giving unrealistic values. - * - * Cap the default transition delay to 2 ms, which seems to be - * a reasonable amount of time after which we should reevaluate - * the frequency. - */ - return min(latency * LATENCY_MULTIPLIER, max_delay_us); - } - - return LATENCY_MULTIPLIER; + return USEC_PER_MSEC; } EXPORT_SYMBOL_GPL(cpufreq_policy_transition_delay_us); diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index c0278d023cfc..aaea9a39eced 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -16,6 +16,7 @@ #include <linux/tick.h> #include <linux/slab.h> #include <linux/sched/cpufreq.h> +#include <linux/sched/smt.h> #include <linux/list.h> #include <linux/cpu.h> #include <linux/cpufreq.h> @@ -215,6 +216,7 @@ struct global_params { * @hwp_req_cached: Cached value of the last HWP Request MSR * @hwp_cap_cached: Cached value of the last HWP Capabilities MSR * @last_io_update: Last time when IO wake flag was set + * @capacity_perf: Highest perf used for scale invariance * @sched_flags: Store scheduler flags for possible cross CPU update * @hwp_boost_min: Last HWP boosted min performance * @suspended: Whether or not the driver has been suspended. @@ -253,6 +255,7 @@ struct cpudata { u64 hwp_req_cached; u64 hwp_cap_cached; u64 last_io_update; + unsigned int capacity_perf; unsigned int sched_flags; u32 hwp_boost_min; bool suspended; @@ -295,6 +298,7 @@ static int hwp_mode_bdw __ro_after_init; static bool per_cpu_limits __ro_after_init; static bool hwp_forced __ro_after_init; static bool hwp_boost __read_mostly; +static bool hwp_is_hybrid; static struct cpufreq_driver *intel_pstate_driver __read_mostly; @@ -934,6 +938,139 @@ static struct freq_attr *hwp_cpufreq_attrs[] = { NULL, }; +static struct cpudata *hybrid_max_perf_cpu __read_mostly; +/* + * Protects hybrid_max_perf_cpu, the capacity_perf fields in struct cpudata, + * and the x86 arch scale-invariance information from concurrent updates. + */ +static DEFINE_MUTEX(hybrid_capacity_lock); + +static void hybrid_set_cpu_capacity(struct cpudata *cpu) +{ + arch_set_cpu_capacity(cpu->cpu, cpu->capacity_perf, + hybrid_max_perf_cpu->capacity_perf, + cpu->capacity_perf, + cpu->pstate.max_pstate_physical); + + pr_debug("CPU%d: perf = %u, max. perf = %u, base perf = %d\n", cpu->cpu, + cpu->capacity_perf, hybrid_max_perf_cpu->capacity_perf, + cpu->pstate.max_pstate_physical); +} + +static void hybrid_clear_cpu_capacity(unsigned int cpunum) +{ + arch_set_cpu_capacity(cpunum, 1, 1, 1, 1); +} + +static void hybrid_get_capacity_perf(struct cpudata *cpu) +{ + if (READ_ONCE(global.no_turbo)) { + cpu->capacity_perf = cpu->pstate.max_pstate_physical; + return; + } + + cpu->capacity_perf = HWP_HIGHEST_PERF(READ_ONCE(cpu->hwp_cap_cached)); +} + +static void hybrid_set_capacity_of_cpus(void) +{ + int cpunum; + + for_each_online_cpu(cpunum) { + struct cpudata *cpu = all_cpu_data[cpunum]; + + if (cpu) + hybrid_set_cpu_capacity(cpu); + } +} + +static void hybrid_update_cpu_capacity_scaling(void) +{ + struct cpudata *max_perf_cpu = NULL; + unsigned int max_cap_perf = 0; + int cpunum; + + for_each_online_cpu(cpunum) { + struct cpudata *cpu = all_cpu_data[cpunum]; + + if (!cpu) + continue; + + /* + * During initialization, CPU performance at full capacity needs + * to be determined. + */ + if (!hybrid_max_perf_cpu) + hybrid_get_capacity_perf(cpu); + + /* + * If hybrid_max_perf_cpu is not NULL at this point, it is + * being replaced, so don't take it into account when looking + * for the new one. + */ + if (cpu == hybrid_max_perf_cpu) + continue; + + if (cpu->capacity_perf > max_cap_perf) { + max_cap_perf = cpu->capacity_perf; + max_perf_cpu = cpu; + } + } + + if (max_perf_cpu) { + hybrid_max_perf_cpu = max_perf_cpu; + hybrid_set_capacity_of_cpus(); + } else { + pr_info("Found no CPUs with nonzero maximum performance\n"); + /* Revert to the flat CPU capacity structure. */ + for_each_online_cpu(cpunum) + hybrid_clear_cpu_capacity(cpunum); + } +} + +static void __hybrid_init_cpu_capacity_scaling(void) +{ + hybrid_max_perf_cpu = NULL; + hybrid_update_cpu_capacity_scaling(); +} + +static void hybrid_init_cpu_capacity_scaling(void) +{ + bool disable_itmt = false; + + mutex_lock(&hybrid_capacity_lock); + + /* + * If hybrid_max_perf_cpu is set at this point, the hybrid CPU capacity + * scaling has been enabled already and the driver is just changing the + * operation mode. + */ + if (hybrid_max_perf_cpu) { + __hybrid_init_cpu_capacity_scaling(); + goto unlock; + } + + /* + * On hybrid systems, use asym capacity instead of ITMT, but because + * the capacity of SMT threads is not deterministic even approximately, + * do not do that when SMT is in use. + */ + if (hwp_is_hybrid && !sched_smt_active() && arch_enable_hybrid_capacity_scale()) { + __hybrid_init_cpu_capacity_scaling(); + disable_itmt = true; + } + +unlock: + mutex_unlock(&hybrid_capacity_lock); + + /* + * Disabling ITMT causes sched domains to be rebuilt to disable asym + * packing and enable asym capacity. + */ + if (disable_itmt) + sched_clear_itmt_support(); +} + static void __intel_pstate_get_hwp_cap(struct cpudata *cpu) { u64 cap; @@ -962,6 +1099,43 @@ static void intel_pstate_get_hwp_cap(struct cpudata *cpu) } } +static void hybrid_update_capacity(struct cpudata *cpu) +{ + unsigned int max_cap_perf; + + mutex_lock(&hybrid_capacity_lock); + + if (!hybrid_max_perf_cpu) + goto unlock; + + /* + * The maximum performance of the CPU may have changed, but assume + * that the performance of the other CPUs has not changed. + */ + max_cap_perf = hybrid_max_perf_cpu->capacity_perf; + + intel_pstate_get_hwp_cap(cpu); + + hybrid_get_capacity_perf(cpu); + /* Should hybrid_max_perf_cpu be replaced by this CPU? */ + if (cpu->capacity_perf > max_cap_perf) { + hybrid_max_perf_cpu = cpu; + hybrid_set_capacity_of_cpus(); + goto unlock; + } + + /* If this CPU is hybrid_max_perf_cpu, should it be replaced? */ + if (cpu == hybrid_max_perf_cpu && cpu->capacity_perf < max_cap_perf) { + hybrid_update_cpu_capacity_scaling(); + goto unlock; + } + + hybrid_set_cpu_capacity(cpu); + +unlock: + mutex_unlock(&hybrid_capacity_lock); +} + static void intel_pstate_hwp_set(unsigned int cpu) { struct cpudata *cpu_data = all_cpu_data[cpu]; @@ -1070,6 +1244,22 @@ static void intel_pstate_hwp_offline(struct cpudata *cpu) value |= HWP_ENERGY_PERF_PREFERENCE(HWP_EPP_POWERSAVE); wrmsrl_on_cpu(cpu->cpu, MSR_HWP_REQUEST, value); + + mutex_lock(&hybrid_capacity_lock); + + if (!hybrid_max_perf_cpu) { + mutex_unlock(&hybrid_capacity_lock); + + return; + } + + if (hybrid_max_perf_cpu == cpu) + hybrid_update_cpu_capacity_scaling(); + + mutex_unlock(&hybrid_capacity_lock); + + /* Reset the capacity of the CPU going offline to the initial value. */ + hybrid_clear_cpu_capacity(cpu->cpu); } #define POWER_CTL_EE_ENABLE 1 @@ -1165,21 +1355,46 @@ static void __intel_pstate_update_max_freq(struct cpudata *cpudata, static void intel_pstate_update_limits(unsigned int cpu) { struct cpufreq_policy *policy = cpufreq_cpu_acquire(cpu); + struct cpudata *cpudata; if (!policy) return; - __intel_pstate_update_max_freq(all_cpu_data[cpu], policy); + cpudata = all_cpu_data[cpu]; + + __intel_pstate_update_max_freq(cpudata, policy); + + /* Prevent the driver from being unregistered now. */ + mutex_lock(&intel_pstate_driver_lock); cpufreq_cpu_release(policy); + + hybrid_update_capacity(cpudata); + + mutex_unlock(&intel_pstate_driver_lock); } static void intel_pstate_update_limits_for_all(void) { int cpu; - for_each_possible_cpu(cpu) - intel_pstate_update_limits(cpu); + for_each_possible_cpu(cpu) { + struct cpufreq_policy *policy = cpufreq_cpu_acquire(cpu); + + if (!policy) + continue; + + __intel_pstate_update_max_freq(all_cpu_data[cpu], policy); + + cpufreq_cpu_release(policy); + } + + mutex_lock(&hybrid_capacity_lock); + + if (hybrid_max_perf_cpu) + __hybrid_init_cpu_capacity_scaling(); + + mutex_unlock(&hybrid_capacity_lock); } /************************** sysfs begin ************************/ @@ -1618,6 +1833,13 @@ static void intel_pstate_notify_work(struct work_struct *work) __intel_pstate_update_max_freq(cpudata, policy); cpufreq_cpu_release(policy); + + /* + * The driver will not be unregistered while this function is + * running, so update the capacity without acquiring the driver + * lock. + */ + hybrid_update_capacity(cpudata); } wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_STATUS, 0); @@ -2034,8 +2256,10 @@ static void intel_pstate_get_cpu_pstates(struct cpudata *cpu) if (pstate_funcs.get_cpu_scaling) { cpu->pstate.scaling = pstate_funcs.get_cpu_scaling(cpu->cpu); - if (cpu->pstate.scaling != perf_ctl_scaling) + if (cpu->pstate.scaling != perf_ctl_scaling) { intel_pstate_hybrid_hwp_adjust(cpu); + hwp_is_hybrid = true; + } } else { cpu->pstate.scaling = perf_ctl_scaling; } @@ -2425,6 +2649,10 @@ static const struct x86_cpu_id intel_pstate_cpu_oob_ids[] __initconst = { X86_MATCH(INTEL_ICELAKE_X, core_funcs), X86_MATCH(INTEL_SAPPHIRERAPIDS_X, core_funcs), X86_MATCH(INTEL_EMERALDRAPIDS_X, core_funcs), + X86_MATCH(INTEL_GRANITERAPIDS_D, core_funcs), + X86_MATCH(INTEL_GRANITERAPIDS_X, core_funcs), + X86_MATCH(INTEL_ATOM_CRESTMONT, core_funcs), + X86_MATCH(INTEL_ATOM_CRESTMONT_X, core_funcs), {} }; #endif @@ -2703,6 +2931,8 @@ static int intel_pstate_cpu_online(struct cpufreq_policy *policy) */ intel_pstate_hwp_reenable(cpu); cpu->suspended = false; + + hybrid_update_capacity(cpu); } return 0; @@ -3143,6 +3373,8 @@ static int intel_pstate_register_driver(struct cpufreq_driver *driver) global.min_perf_pct = min_perf_pct_min(); + hybrid_init_cpu_capacity_scaling(); + return 0; } diff --git a/drivers/cpufreq/maple-cpufreq.c b/drivers/cpufreq/maple-cpufreq.c index f9306410a07f..690da85c4865 100644 --- a/drivers/cpufreq/maple-cpufreq.c +++ b/drivers/cpufreq/maple-cpufreq.c @@ -238,4 +238,5 @@ bail_noprops: module_init(maple_cpufreq_init); +MODULE_DESCRIPTION("cpufreq driver for Maple 970FX/970MP boards"); MODULE_LICENSE("GPL"); diff --git a/drivers/cpufreq/pasemi-cpufreq.c b/drivers/cpufreq/pasemi-cpufreq.c index ee925b53b6b9..5fc9cb480516 100644 --- a/drivers/cpufreq/pasemi-cpufreq.c +++ b/drivers/cpufreq/pasemi-cpufreq.c @@ -269,5 +269,6 @@ static void __exit pas_cpufreq_exit(void) module_init(pas_cpufreq_init); module_exit(pas_cpufreq_exit); +MODULE_DESCRIPTION("cpufreq driver for PA Semi PWRficient"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Egor Martovetsky <egor@pasemi.com>, Olof Johansson <olof@lixom.net>"); diff --git a/drivers/cpufreq/pmac64-cpufreq.c b/drivers/cpufreq/pmac64-cpufreq.c index 2cd2b06849a2..9d3fe36075f8 100644 --- a/drivers/cpufreq/pmac64-cpufreq.c +++ b/drivers/cpufreq/pmac64-cpufreq.c @@ -671,4 +671,5 @@ static int __init g5_cpufreq_init(void) module_init(g5_cpufreq_init); +MODULE_DESCRIPTION("cpufreq driver for SMU & 970FX based G5 Macs"); MODULE_LICENSE("GPL"); diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c index 50c62929f7ca..bc55723b4d87 100644 --- a/drivers/cpufreq/powernv-cpufreq.c +++ b/drivers/cpufreq/powernv-cpufreq.c @@ -1160,5 +1160,6 @@ static void __exit powernv_cpufreq_exit(void) } module_exit(powernv_cpufreq_exit); +MODULE_DESCRIPTION("cpufreq driver for IBM/OpenPOWER powernv systems"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Vaidyanathan Srinivasan <svaidy at linux.vnet.ibm.com>"); diff --git a/drivers/cpufreq/ppc_cbe_cpufreq.c b/drivers/cpufreq/ppc_cbe_cpufreq.c index 5ee4c7bfdcc5..98595b3ea13f 100644 --- a/drivers/cpufreq/ppc_cbe_cpufreq.c +++ b/drivers/cpufreq/ppc_cbe_cpufreq.c @@ -168,5 +168,6 @@ static void __exit cbe_cpufreq_exit(void) module_init(cbe_cpufreq_init); module_exit(cbe_cpufreq_exit); +MODULE_DESCRIPTION("cpufreq driver for Cell BE processors"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Christian Krafft <krafft@de.ibm.com>"); |